]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: add support for historic op tracking.
authorRadosław Zarzyński <rzarzyns@redhat.com>
Thu, 14 Apr 2022 11:17:17 +0000 (13:17 +0200)
committerRadosław Zarzyński <rzarzyns@redhat.com>
Mon, 30 May 2022 14:37:19 +0000 (16:37 +0200)
Signed-off-by: Radosław Zarzyński <rzarzyns@redhat.com>
src/crimson/admin/osd_admin.cc
src/crimson/admin/osd_admin.h
src/crimson/common/operation.h
src/crimson/osd/CMakeLists.txt
src/crimson/osd/osd.cc
src/crimson/osd/osd_operation.cc
src/crimson/osd/osd_operation.h
src/crimson/osd/osd_operation_external_tracking.cc [new file with mode: 0644]
src/crimson/osd/osd_operation_external_tracking.h
src/crimson/osd/osd_operations/client_request.h

index 970010c970c9d94ab1f8ef2a94d20d93ee2d2e11..eba4c4c1769c14c002a1091f0230ac2631ab7129 100644 (file)
@@ -459,4 +459,28 @@ private:
 template std::unique_ptr<AdminSocketHook>
 make_asok_hook<DumpInFlightOpsHook>(const crimson::osd::OSDOperationRegistry& op_registry);
 
+
+class DumpHistoricOpsHook : public AdminSocketHook {
+public:
+  explicit DumpHistoricOpsHook(const crimson::osd::OSDOperationRegistry& op_registry) :
+    AdminSocketHook{"dump_historic_ops", "", "show recent ops"},
+    op_registry(op_registry)
+  {}
+  seastar::future<tell_result_t> call(const cmdmap_t&,
+                                     std::string_view format,
+                                     ceph::bufferlist&& input) const final
+  {
+    unique_ptr<Formatter> f{Formatter::create(format, "json-pretty", "json-pretty")};
+    f->open_object_section("historic_ops");
+    op_registry.dump_historic_client_requests(f.get());
+    f->close_section();
+    f->dump_int("num_ops", 0);
+    return seastar::make_ready_future<tell_result_t>(std::move(f));
+  }
+private:
+  const crimson::osd::OSDOperationRegistry& op_registry;
+};
+template std::unique_ptr<AdminSocketHook>
+make_asok_hook<DumpHistoricOpsHook>(const crimson::osd::OSDOperationRegistry& op_registry);
+
 } // namespace crimson::admin
index 273d17ec23c564cc5263982fc956e3dc5be61589..5ae6187c8e1dd1c0c6f280a25cabeb0a90de14d4 100644 (file)
@@ -18,6 +18,7 @@ class InjectMDataErrorHook;
 class OsdStatusHook;
 class SendBeaconHook;
 class DumpInFlightOpsHook;
+class DumpHistoricOpsHook;
 
 template<class Hook, class... Args>
 std::unique_ptr<AdminSocketHook> make_asok_hook(Args&&... args);
index 4191a0ec13cf0b7b822e61a632ea4b809b9ca890..90e7d157fa8ef044dfb5b87de0f5505f0713a1c5 100644 (file)
@@ -352,6 +352,7 @@ class OperationRegistryI {
 protected:
   virtual void do_register(Operation *op) = 0;
   virtual bool registries_empty() const = 0;
+  virtual void do_stop() = 0;
 
 public:
   using op_list = boost::intrusive::list<
@@ -367,6 +368,7 @@ public:
   }
 
   seastar::future<> stop() {
+    do_stop();
     shutdown_timer.set_callback([this] {
       if (registries_empty()) {
        shutdown.set_value();
@@ -413,6 +415,13 @@ public:
       REGISTRY_INDEX < std::tuple_size<decltype(registries)>::value);
     return registries[REGISTRY_INDEX];
   }
+
+  template <size_t REGISTRY_INDEX>
+  op_list& get_registry() {
+    static_assert(
+      REGISTRY_INDEX < std::tuple_size<decltype(registries)>::value);
+    return registries[REGISTRY_INDEX];
+  }
 };
 
 class PipelineExitBarrierI {
index b215c12e3a95b200db254b08729f2142c9ae6323..4313075a8d6d09435cdeec094b442914b52ba93f 100644 (file)
@@ -13,6 +13,7 @@ add_executable(crimson-osd
   object_context.cc
   ops_executer.cc
   osd_operation.cc
+  osd_operation_external_tracking.cc
   osd_operations/client_request.cc
   osd_operations/client_request_common.cc
   osd_operations/compound_peering_request.cc
index 34c894dd64fda82822143e2b2f3b85e3a0d55ee2..fe16c203195df864d57c8ef898588e16faf574a0 100644 (file)
@@ -549,6 +549,8 @@ seastar::future<> OSD::start_asok_admin()
     // ops commands
     asok->register_command(make_asok_hook<DumpInFlightOpsHook>(
       std::as_const(get_shard_services().registry)));
+    asok->register_command(make_asok_hook<DumpHistoricOpsHook>(
+      std::as_const(get_shard_services().registry)));
   });
 }
 
index f3a0964700d23a4db54fb1ab2edcda76e5e4f8b7..17f5d34478b24f44be6668c2f2117b71234cf27f 100644 (file)
@@ -14,6 +14,27 @@ namespace {
 
 namespace crimson::osd {
 
+void OSDOperationRegistry::do_stop()
+{
+  // we need to decouple visiting the registry from destructing
+  // ops because of the auto-unlink feature of boost::intrusive.
+  // the list shouldn't change while iterating due to constrains
+  // on iterator's validity.
+  constexpr auto historic_reg_index =
+    static_cast<size_t>(OperationTypeCode::historic_client_request);
+  auto& historic_registry = get_registry<historic_reg_index>();
+  std::vector<ClientRequest::ICRef> to_ref_down;
+  std::transform(std::begin(historic_registry), std::end(historic_registry),
+                std::back_inserter(to_ref_down),
+                [] (const Operation& op) {
+                  return ClientRequest::ICRef{
+                    static_cast<const ClientRequest*>(&op),
+                    /* add_ref= */ false
+                  };
+                });
+  // to_ref_down is going off
+}
+
 size_t OSDOperationRegistry::dump_client_requests(ceph::Formatter* f) const
 {
   const auto& client_registry =
@@ -25,6 +46,27 @@ size_t OSDOperationRegistry::dump_client_requests(ceph::Formatter* f) const
   return std::size(client_registry);
 }
 
+size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) const
+{
+  const auto& historic_client_registry =
+    get_registry<static_cast<size_t>(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>();
+  f->open_object_section("op_history");
+  f->dump_int("size", historic_client_registry.size());
+  // TODO: f->dump_int("duration", history_duration.load());
+  // the intrusive list is configured to not store the size
+  size_t ops_count = 0;
+  {
+    f->open_array_section("ops");
+    for (const auto& op : historic_client_registry) {
+      op.dump(f);
+      ++ops_count;
+    }
+    f->close_section();
+  }
+  f->close_section();
+  return ops_count;
+}
+
 OperationThrottler::OperationThrottler(ConfigProxy &conf)
   : scheduler(crimson::osd::scheduler::make_scheduler(conf))
 {
index e768503c02b3666434d29784603d5719408e8b5e..acee7af3c7b2b49f66559897320e992b89407236 100644 (file)
@@ -43,6 +43,7 @@ enum class OperationTypeCode {
   background_recovery,
   background_recovery_sub,
   internal_client_request,
+  historic_client_request,
   last_op
 };
 
@@ -56,6 +57,7 @@ static constexpr const char* const OP_NAMES[] = {
   "background_recovery",
   "background_recovery_sub",
   "internal_client_request",
+  "historic_client_request",
 };
 
 // prevent the addition of OperationTypeCode-s with no matching OP_NAMES entry:
@@ -77,6 +79,7 @@ template <typename T>
 struct OperationT : InterruptibleOperation {
   static constexpr const char *type_name = OP_NAMES[static_cast<int>(T::type)];
   using IRef = boost::intrusive_ptr<T>;
+  using ICRef = boost::intrusive_ptr<const T>;
 
   unsigned get_type() const final {
     return static_cast<unsigned>(T::type);
@@ -167,7 +170,10 @@ protected:
 struct OSDOperationRegistry : OperationRegistryT<
   static_cast<size_t>(OperationTypeCode::last_op)
 > {
+  void do_stop() override;
   size_t dump_client_requests(ceph::Formatter* f) const;
+  size_t dump_historic_client_requests(ceph::Formatter* f) const;
+
 };
 /**
  * Throttles set of currently running operations
diff --git a/src/crimson/osd/osd_operation_external_tracking.cc b/src/crimson/osd/osd_operation_external_tracking.cc
new file mode 100644 (file)
index 0000000..5202768
--- /dev/null
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/config.h"
+#include "crimson/osd/osd.h"
+#include "crimson/osd/osd_operation_external_tracking.h"
+
+namespace {
+  seastar::logger& logger() {
+    return crimson::get_logger(ceph_subsys_osd);
+  }
+}
+
+namespace crimson::osd {
+
+void HistoricBackend::handle(ClientRequest::CompletionEvent&,
+                             const Operation& op)
+{
+  // early exit if the history is disabled
+  using crimson::common::local_conf;
+  if (!local_conf()->osd_op_history_size) {
+    return;
+  }
+
+#ifdef NDEBUG
+  const auto& client_request = static_cast<const ClientRequest&>(op);
+#else
+  const auto& client_request = dynamic_cast<const ClientRequest&>(op);
+#endif
+  auto& main_registry = client_request.osd.get_shard_services().registry;
+
+  // unlink the op from the client request registry. this is a part of
+  // the re-link procedure. finally it will be in historic registry.
+  constexpr auto client_reg_index =
+    static_cast<size_t>(OperationTypeCode::client_request);
+  constexpr auto historic_reg_index =
+    static_cast<size_t>(OperationTypeCode::historic_client_request);
+  auto& client_registry = main_registry.get_registry<client_reg_index>();
+  auto& historic_registry = main_registry.get_registry<historic_reg_index>();
+
+  historic_registry.splice(std::end(historic_registry),
+                          client_registry,
+                          client_registry.iterator_to(client_request));
+  ClientRequest::ICRef(
+    &client_request, /* add_ref= */true
+  ).detach(); // yes, "leak" it for now!
+
+  // check whether the history size limit is not exceeded; if so, then
+  // purge the oldest op.
+  // NOTE: Operation uses the auto-unlink feature of boost::intrusive.
+  // NOTE: the cleaning happens in OSDOperationRegistry::do_stop()
+  if (historic_registry.size() > local_conf()->osd_op_history_size) {
+    const auto& oldest_historic_op =
+      static_cast<const ClientRequest&>(historic_registry.front());
+    // clear a previously "leaked" op
+    ClientRequest::ICRef(&oldest_historic_op, /* add_ref= */false);
+  }
+}
+
+} // namespace crimson::osd
index 1e946db95fb4020921e3497753ff5b6ac083a3f9..0bf2dbdd3f1db4f29ed194d803bb4688f32ce53c 100644 (file)
@@ -134,6 +134,105 @@ struct LttngBackendCompoundPeering
               const Operation&) override {}
 };
 
+struct HistoricBackend
+  : ClientRequest::StartEvent::Backend,
+    ConnectionPipeline::AwaitActive::BlockingEvent::Backend,
+    ConnectionPipeline::AwaitMap::BlockingEvent::Backend,
+    ConnectionPipeline::GetPG::BlockingEvent::Backend,
+    OSD_OSDMapGate::OSDMapBlocker::BlockingEvent::Backend,
+    PGMap::PGCreationBlockingEvent::Backend,
+    ClientRequest::PGPipeline::AwaitMap::BlockingEvent::Backend,
+    PG_OSDMapGate::OSDMapBlocker::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::WaitForActive::BlockingEvent::Backend,
+    PGActivationBlocker::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::RecoverMissing::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::GetOBC::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::Process::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::WaitRepop::BlockingEvent::Backend,
+    ClientRequest::PGPipeline::WaitRepop::BlockingEvent::ExitBarrierEvent::Backend,
+    ClientRequest::PGPipeline::SendReply::BlockingEvent::Backend,
+    ClientRequest::CompletionEvent::Backend
+{
+  void handle(ClientRequest::StartEvent&,
+              const Operation&) override {}
+
+  void handle(ConnectionPipeline::AwaitActive::BlockingEvent& ev,
+              const Operation& op,
+              const ConnectionPipeline::AwaitActive& blocker) override {
+  }
+
+  void handle(ConnectionPipeline::AwaitMap::BlockingEvent& ev,
+              const Operation& op,
+              const ConnectionPipeline::AwaitMap& blocker) override {
+  }
+
+  void handle(OSD_OSDMapGate::OSDMapBlocker::BlockingEvent&,
+              const Operation&,
+              const OSD_OSDMapGate::OSDMapBlocker&) override {
+  }
+
+  void handle(ConnectionPipeline::GetPG::BlockingEvent& ev,
+              const Operation& op,
+              const ConnectionPipeline::GetPG& blocker) override {
+  }
+
+  void handle(PGMap::PGCreationBlockingEvent&,
+              const Operation&,
+              const PGMap::PGCreationBlocker&) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::AwaitMap::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::AwaitMap& blocker) override {
+  }
+
+  void handle(PG_OSDMapGate::OSDMapBlocker::BlockingEvent&,
+              const Operation&,
+              const PG_OSDMapGate::OSDMapBlocker&) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::WaitForActive::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::WaitForActive& blocker) override {
+  }
+
+  void handle(PGActivationBlocker::BlockingEvent& ev,
+              const Operation& op,
+              const PGActivationBlocker& blocker) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::RecoverMissing::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::RecoverMissing& blocker) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::GetOBC::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::GetOBC& blocker) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::Process::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::Process& blocker) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::WaitRepop::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::WaitRepop& blocker) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::WaitRepop::BlockingEvent::ExitBarrierEvent& ev,
+              const Operation& op) override {
+  }
+
+  void handle(ClientRequest::PGPipeline::SendReply::BlockingEvent& ev,
+              const Operation& op,
+              const ClientRequest::PGPipeline::SendReply& blocker) override {
+  }
+
+  void handle(ClientRequest::CompletionEvent&,
+              const Operation&) override;
+};
 
 } // namespace crimson::osd
 
@@ -141,8 +240,8 @@ namespace crimson {
 
 template <>
 struct EventBackendRegistry<osd::ClientRequest> {
-  static std::tuple<osd::LttngBackend/*, HistoricBackend*/> get_backends() {
-    return { {} };
+  static std::tuple<osd::LttngBackend, osd::HistoricBackend> get_backends() {
+    return { {}, {} };
   }
 };
 
index a222ea6a03af04c0565163a746b4adb6f96cc791..15827ad9ebd8cabea08d34ed677e8709eb9731cd 100644 (file)
@@ -47,6 +47,7 @@ public:
     } send_reply;
     friend class ClientRequest;
     friend class LttngBackend;
+    friend class HistoricBackend;
   };
 
   using ordering_hook_t = boost::intrusive::list_member_hook<>;