From 6f5737e4f5a8ccc27cd19cb7dafafe0cfbbc02c7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Rados=C5=82aw=20Zarzy=C5=84ski?= Date: Thu, 14 Apr 2022 13:17:17 +0200 Subject: [PATCH] crimson/osd: add support for historic op tracking. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Radosław Zarzyński --- src/crimson/admin/osd_admin.cc | 24 ++++ src/crimson/admin/osd_admin.h | 1 + src/crimson/common/operation.h | 9 ++ src/crimson/osd/CMakeLists.txt | 1 + src/crimson/osd/osd.cc | 2 + src/crimson/osd/osd_operation.cc | 42 +++++++ src/crimson/osd/osd_operation.h | 6 + .../osd/osd_operation_external_tracking.cc | 60 ++++++++++ .../osd/osd_operation_external_tracking.h | 103 +++++++++++++++++- .../osd/osd_operations/client_request.h | 1 + 10 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 src/crimson/osd/osd_operation_external_tracking.cc diff --git a/src/crimson/admin/osd_admin.cc b/src/crimson/admin/osd_admin.cc index 970010c970c9d..eba4c4c1769c1 100644 --- a/src/crimson/admin/osd_admin.cc +++ b/src/crimson/admin/osd_admin.cc @@ -459,4 +459,28 @@ private: template std::unique_ptr make_asok_hook(const crimson::osd::OSDOperationRegistry& op_registry); + +class DumpHistoricOpsHook : public AdminSocketHook { +public: + explicit DumpHistoricOpsHook(const crimson::osd::OSDOperationRegistry& op_registry) : + AdminSocketHook{"dump_historic_ops", "", "show recent ops"}, + op_registry(op_registry) + {} + seastar::future call(const cmdmap_t&, + std::string_view format, + ceph::bufferlist&& input) const final + { + unique_ptr f{Formatter::create(format, "json-pretty", "json-pretty")}; + f->open_object_section("historic_ops"); + op_registry.dump_historic_client_requests(f.get()); + f->close_section(); + f->dump_int("num_ops", 0); + return seastar::make_ready_future(std::move(f)); + } +private: + const crimson::osd::OSDOperationRegistry& op_registry; +}; +template std::unique_ptr +make_asok_hook(const crimson::osd::OSDOperationRegistry& op_registry); + } // namespace crimson::admin diff --git a/src/crimson/admin/osd_admin.h b/src/crimson/admin/osd_admin.h index 273d17ec23c56..5ae6187c8e1dd 100644 --- a/src/crimson/admin/osd_admin.h +++ b/src/crimson/admin/osd_admin.h @@ -18,6 +18,7 @@ class InjectMDataErrorHook; class OsdStatusHook; class SendBeaconHook; class DumpInFlightOpsHook; +class DumpHistoricOpsHook; template std::unique_ptr make_asok_hook(Args&&... args); diff --git a/src/crimson/common/operation.h b/src/crimson/common/operation.h index 4191a0ec13cf0..90e7d157fa8ef 100644 --- a/src/crimson/common/operation.h +++ b/src/crimson/common/operation.h @@ -352,6 +352,7 @@ class OperationRegistryI { protected: virtual void do_register(Operation *op) = 0; virtual bool registries_empty() const = 0; + virtual void do_stop() = 0; public: using op_list = boost::intrusive::list< @@ -367,6 +368,7 @@ public: } seastar::future<> stop() { + do_stop(); shutdown_timer.set_callback([this] { if (registries_empty()) { shutdown.set_value(); @@ -413,6 +415,13 @@ public: REGISTRY_INDEX < std::tuple_size::value); return registries[REGISTRY_INDEX]; } + + template + op_list& get_registry() { + static_assert( + REGISTRY_INDEX < std::tuple_size::value); + return registries[REGISTRY_INDEX]; + } }; class PipelineExitBarrierI { diff --git a/src/crimson/osd/CMakeLists.txt b/src/crimson/osd/CMakeLists.txt index b215c12e3a95b..4313075a8d6d0 100644 --- a/src/crimson/osd/CMakeLists.txt +++ b/src/crimson/osd/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable(crimson-osd object_context.cc ops_executer.cc osd_operation.cc + osd_operation_external_tracking.cc osd_operations/client_request.cc osd_operations/client_request_common.cc osd_operations/compound_peering_request.cc diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc index 34c894dd64fda..fe16c203195df 100644 --- a/src/crimson/osd/osd.cc +++ b/src/crimson/osd/osd.cc @@ -549,6 +549,8 @@ seastar::future<> OSD::start_asok_admin() // ops commands asok->register_command(make_asok_hook( std::as_const(get_shard_services().registry))); + asok->register_command(make_asok_hook( + std::as_const(get_shard_services().registry))); }); } diff --git a/src/crimson/osd/osd_operation.cc b/src/crimson/osd/osd_operation.cc index f3a0964700d23..17f5d34478b24 100644 --- a/src/crimson/osd/osd_operation.cc +++ b/src/crimson/osd/osd_operation.cc @@ -14,6 +14,27 @@ namespace { namespace crimson::osd { +void OSDOperationRegistry::do_stop() +{ + // we need to decouple visiting the registry from destructing + // ops because of the auto-unlink feature of boost::intrusive. + // the list shouldn't change while iterating due to constrains + // on iterator's validity. + constexpr auto historic_reg_index = + static_cast(OperationTypeCode::historic_client_request); + auto& historic_registry = get_registry(); + std::vector to_ref_down; + std::transform(std::begin(historic_registry), std::end(historic_registry), + std::back_inserter(to_ref_down), + [] (const Operation& op) { + return ClientRequest::ICRef{ + static_cast(&op), + /* add_ref= */ false + }; + }); + // to_ref_down is going off +} + size_t OSDOperationRegistry::dump_client_requests(ceph::Formatter* f) const { const auto& client_registry = @@ -25,6 +46,27 @@ size_t OSDOperationRegistry::dump_client_requests(ceph::Formatter* f) const return std::size(client_registry); } +size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) const +{ + const auto& historic_client_registry = + get_registry(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>(); + f->open_object_section("op_history"); + f->dump_int("size", historic_client_registry.size()); + // TODO: f->dump_int("duration", history_duration.load()); + // the intrusive list is configured to not store the size + size_t ops_count = 0; + { + f->open_array_section("ops"); + for (const auto& op : historic_client_registry) { + op.dump(f); + ++ops_count; + } + f->close_section(); + } + f->close_section(); + return ops_count; +} + OperationThrottler::OperationThrottler(ConfigProxy &conf) : scheduler(crimson::osd::scheduler::make_scheduler(conf)) { diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h index e768503c02b36..acee7af3c7b2b 100644 --- a/src/crimson/osd/osd_operation.h +++ b/src/crimson/osd/osd_operation.h @@ -43,6 +43,7 @@ enum class OperationTypeCode { background_recovery, background_recovery_sub, internal_client_request, + historic_client_request, last_op }; @@ -56,6 +57,7 @@ static constexpr const char* const OP_NAMES[] = { "background_recovery", "background_recovery_sub", "internal_client_request", + "historic_client_request", }; // prevent the addition of OperationTypeCode-s with no matching OP_NAMES entry: @@ -77,6 +79,7 @@ template struct OperationT : InterruptibleOperation { static constexpr const char *type_name = OP_NAMES[static_cast(T::type)]; using IRef = boost::intrusive_ptr; + using ICRef = boost::intrusive_ptr; unsigned get_type() const final { return static_cast(T::type); @@ -167,7 +170,10 @@ protected: struct OSDOperationRegistry : OperationRegistryT< static_cast(OperationTypeCode::last_op) > { + void do_stop() override; size_t dump_client_requests(ceph::Formatter* f) const; + size_t dump_historic_client_requests(ceph::Formatter* f) const; + }; /** * Throttles set of currently running operations diff --git a/src/crimson/osd/osd_operation_external_tracking.cc b/src/crimson/osd/osd_operation_external_tracking.cc new file mode 100644 index 0000000000000..520276832c1ce --- /dev/null +++ b/src/crimson/osd/osd_operation_external_tracking.cc @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/config.h" +#include "crimson/osd/osd.h" +#include "crimson/osd/osd_operation_external_tracking.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_osd); + } +} + +namespace crimson::osd { + +void HistoricBackend::handle(ClientRequest::CompletionEvent&, + const Operation& op) +{ + // early exit if the history is disabled + using crimson::common::local_conf; + if (!local_conf()->osd_op_history_size) { + return; + } + +#ifdef NDEBUG + const auto& client_request = static_cast(op); +#else + const auto& client_request = dynamic_cast(op); +#endif + auto& main_registry = client_request.osd.get_shard_services().registry; + + // unlink the op from the client request registry. this is a part of + // the re-link procedure. finally it will be in historic registry. + constexpr auto client_reg_index = + static_cast(OperationTypeCode::client_request); + constexpr auto historic_reg_index = + static_cast(OperationTypeCode::historic_client_request); + auto& client_registry = main_registry.get_registry(); + auto& historic_registry = main_registry.get_registry(); + + historic_registry.splice(std::end(historic_registry), + client_registry, + client_registry.iterator_to(client_request)); + ClientRequest::ICRef( + &client_request, /* add_ref= */true + ).detach(); // yes, "leak" it for now! + + // check whether the history size limit is not exceeded; if so, then + // purge the oldest op. + // NOTE: Operation uses the auto-unlink feature of boost::intrusive. + // NOTE: the cleaning happens in OSDOperationRegistry::do_stop() + if (historic_registry.size() > local_conf()->osd_op_history_size) { + const auto& oldest_historic_op = + static_cast(historic_registry.front()); + // clear a previously "leaked" op + ClientRequest::ICRef(&oldest_historic_op, /* add_ref= */false); + } +} + +} // namespace crimson::osd diff --git a/src/crimson/osd/osd_operation_external_tracking.h b/src/crimson/osd/osd_operation_external_tracking.h index 1e946db95fb40..0bf2dbdd3f1db 100644 --- a/src/crimson/osd/osd_operation_external_tracking.h +++ b/src/crimson/osd/osd_operation_external_tracking.h @@ -134,6 +134,105 @@ struct LttngBackendCompoundPeering const Operation&) override {} }; +struct HistoricBackend + : ClientRequest::StartEvent::Backend, + ConnectionPipeline::AwaitActive::BlockingEvent::Backend, + ConnectionPipeline::AwaitMap::BlockingEvent::Backend, + ConnectionPipeline::GetPG::BlockingEvent::Backend, + OSD_OSDMapGate::OSDMapBlocker::BlockingEvent::Backend, + PGMap::PGCreationBlockingEvent::Backend, + ClientRequest::PGPipeline::AwaitMap::BlockingEvent::Backend, + PG_OSDMapGate::OSDMapBlocker::BlockingEvent::Backend, + ClientRequest::PGPipeline::WaitForActive::BlockingEvent::Backend, + PGActivationBlocker::BlockingEvent::Backend, + ClientRequest::PGPipeline::RecoverMissing::BlockingEvent::Backend, + ClientRequest::PGPipeline::GetOBC::BlockingEvent::Backend, + ClientRequest::PGPipeline::Process::BlockingEvent::Backend, + ClientRequest::PGPipeline::WaitRepop::BlockingEvent::Backend, + ClientRequest::PGPipeline::WaitRepop::BlockingEvent::ExitBarrierEvent::Backend, + ClientRequest::PGPipeline::SendReply::BlockingEvent::Backend, + ClientRequest::CompletionEvent::Backend +{ + void handle(ClientRequest::StartEvent&, + const Operation&) override {} + + void handle(ConnectionPipeline::AwaitActive::BlockingEvent& ev, + const Operation& op, + const ConnectionPipeline::AwaitActive& blocker) override { + } + + void handle(ConnectionPipeline::AwaitMap::BlockingEvent& ev, + const Operation& op, + const ConnectionPipeline::AwaitMap& blocker) override { + } + + void handle(OSD_OSDMapGate::OSDMapBlocker::BlockingEvent&, + const Operation&, + const OSD_OSDMapGate::OSDMapBlocker&) override { + } + + void handle(ConnectionPipeline::GetPG::BlockingEvent& ev, + const Operation& op, + const ConnectionPipeline::GetPG& blocker) override { + } + + void handle(PGMap::PGCreationBlockingEvent&, + const Operation&, + const PGMap::PGCreationBlocker&) override { + } + + void handle(ClientRequest::PGPipeline::AwaitMap::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::AwaitMap& blocker) override { + } + + void handle(PG_OSDMapGate::OSDMapBlocker::BlockingEvent&, + const Operation&, + const PG_OSDMapGate::OSDMapBlocker&) override { + } + + void handle(ClientRequest::PGPipeline::WaitForActive::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::WaitForActive& blocker) override { + } + + void handle(PGActivationBlocker::BlockingEvent& ev, + const Operation& op, + const PGActivationBlocker& blocker) override { + } + + void handle(ClientRequest::PGPipeline::RecoverMissing::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::RecoverMissing& blocker) override { + } + + void handle(ClientRequest::PGPipeline::GetOBC::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::GetOBC& blocker) override { + } + + void handle(ClientRequest::PGPipeline::Process::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::Process& blocker) override { + } + + void handle(ClientRequest::PGPipeline::WaitRepop::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::WaitRepop& blocker) override { + } + + void handle(ClientRequest::PGPipeline::WaitRepop::BlockingEvent::ExitBarrierEvent& ev, + const Operation& op) override { + } + + void handle(ClientRequest::PGPipeline::SendReply::BlockingEvent& ev, + const Operation& op, + const ClientRequest::PGPipeline::SendReply& blocker) override { + } + + void handle(ClientRequest::CompletionEvent&, + const Operation&) override; +}; } // namespace crimson::osd @@ -141,8 +240,8 @@ namespace crimson { template <> struct EventBackendRegistry { - static std::tuple get_backends() { - return { {} }; + static std::tuple get_backends() { + return { {}, {} }; } }; diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h index a222ea6a03af0..15827ad9ebd8c 100644 --- a/src/crimson/osd/osd_operations/client_request.h +++ b/src/crimson/osd/osd_operations/client_request.h @@ -47,6 +47,7 @@ public: } send_reply; friend class ClientRequest; friend class LttngBackend; + friend class HistoricBackend; }; using ordering_hook_t = boost::intrusive::list_member_hook<>; -- 2.47.3