From d6fb51fc6b193d4b195dc740eafe087ddad48dc5 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Mon, 24 Jun 2024 08:07:42 +0000 Subject: [PATCH] Revert "crimson/osd/osd_operation: fix dump_historic_slow_ops command works" This reverts commit 834ab99efc6453f91183a47849f56617cf73c112. Signed-off-by: Matan Breizman --- src/crimson/osd/osd_operation.cc | 105 +++++++++++++++++++------------ src/crimson/osd/osd_operation.h | 3 +- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/src/crimson/osd/osd_operation.cc b/src/crimson/osd/osd_operation.cc index 6cf80e60c6b41..920fdc1148046 100644 --- a/src/crimson/osd/osd_operation.cc +++ b/src/crimson/osd/osd_operation.cc @@ -33,11 +33,18 @@ void OSDOperationRegistry::do_stop() /* add_ref= */ false }; }); + last_of_recents = std::end(historic_registry); // to_ref_down is going off } OSDOperationRegistry::OSDOperationRegistry() - : OperationRegistryT(seastar::this_shard_id()) {} + : OperationRegistryT(seastar::this_shard_id()) +{ + constexpr auto historic_reg_index = + static_cast(OperationTypeCode::historic_client_request); + auto& historic_registry = get_registry(); + last_of_recents = std::begin(historic_registry); +} static auto get_duration(const ClientRequest& client_request) { @@ -48,46 +55,51 @@ static auto get_duration(const ClientRequest& client_request) void OSDOperationRegistry::put_historic(const ClientRequest& op) { - using crimson::common::local_conf; // unlink the op from the client request registry. this is a part of // the re-link procedure. finally it will be in historic registry. constexpr auto client_reg_index = static_cast(OperationTypeCode::client_request); constexpr auto historic_reg_index = static_cast(OperationTypeCode::historic_client_request); - constexpr auto slow_historic_reg_index = - static_cast(OperationTypeCode::historic_slow_client_request); - auto& client_registry = get_registry(); - // slow ops should put into historic_slow_client_request list - // we only save the newest op - if (get_duration(op) > local_conf()->osd_op_complaint_time) { - auto& slow_historic_registry = get_registry(); - slow_historic_registry.splice(std::end(slow_historic_registry), - client_registry, - client_registry.iterator_to(op)); - - if (num_slow_ops >= local_conf()->osd_op_history_slow_op_size) { - slow_historic_registry.pop_front(); - } else { - ++num_slow_ops; - } - } else { - auto& historic_registry = get_registry(); - historic_registry.splice(std::end(historic_registry), - client_registry, - client_registry.iterator_to(op)); - - if (num_recent_ops >= local_conf()->osd_op_history_size) { - historic_registry.pop_front(); - } else { - ++num_recent_ops; - } - } - + auto& historic_registry = get_registry(); + historic_registry.splice(std::end(historic_registry), + client_registry, + client_registry.iterator_to(op)); ClientRequest::ICRef( &op, /* add_ref= */true ).detach(); // yes, "leak" it for now! + + // check whether the history size limit is not exceeded; if so, then + // purge the oldest op. + // NOTE: Operation uses the auto-unlink feature of boost::intrusive. + // NOTE: the cleaning happens in OSDOperationRegistry::do_stop() + using crimson::common::local_conf; + if (num_recent_ops >= local_conf()->osd_op_history_size) { + ++last_of_recents; + ++num_slow_ops; + } else { + ++num_recent_ops; + } + if (num_slow_ops > local_conf()->osd_op_history_slow_op_size) { + // we're interested in keeping slowest ops. if the slow op history + // is disabled, the list will have only one element, so the full-blown + // search will boil down into `.front()`. + const auto fastest_historic_iter = std::min_element( + std::cbegin(historic_registry), last_of_recents, + [] (const auto& lop, const auto& rop) { + const auto& lclient_request = static_cast(lop); + const auto& rclient_request = static_cast(rop); + return get_duration(lclient_request) < get_duration(rclient_request); + }); + assert(fastest_historic_iter != std::end(historic_registry)); + const auto& fastest_historic_op = + static_cast(*fastest_historic_iter); + historic_registry.erase(fastest_historic_iter); + // clear a previously "leaked" op + ClientRequest::ICRef(&fastest_historic_op, /* add_ref= */false); + --num_slow_ops; + } } size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) const @@ -113,20 +125,33 @@ size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) c size_t OSDOperationRegistry::dump_slowest_historic_client_requests(ceph::Formatter* f) const { - const auto& slow_historic_client_registry = - get_registry(OperationTypeCode::historic_slow_client_request)>(); //ClientRequest::type)>(); + const auto& historic_client_registry = + get_registry(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>(); f->open_object_section("op_history"); - f->dump_int("size", slow_historic_client_registry.size()); + f->dump_int("size", historic_client_registry.size()); // TODO: f->dump_int("duration", history_duration.load()); // the intrusive list is configured to not store the size + std::multimap> sorted_slowest_ops; + // iterating over the entire registry as a slow op could be also + // in the "recently added" part. + std::transform(std::begin(historic_client_registry), + std::end(historic_client_registry), + std::inserter(sorted_slowest_ops, std::end(sorted_slowest_ops)), + [] (const Operation& op) { + const auto& cop = static_cast(op); + return std::make_pair(get_duration(cop), &cop); + }); + f->open_array_section("ops"); + using crimson::common::local_conf; size_t ops_count = 0; + for (auto it = std::begin(sorted_slowest_ops); + ops_count < local_conf()->osd_op_history_slow_op_size + && it != std::end(sorted_slowest_ops); + ++it, ++ops_count) { - f->open_array_section("ops"); - for (const auto& op : slow_historic_client_registry) { - op.dump(f); - ++ops_count; - } - f->close_section(); + it->second->dump(f); } f->close_section(); return ops_count; diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h index b80945e258d2e..1064a5c8e03e1 100644 --- a/src/crimson/osd/osd_operation.h +++ b/src/crimson/osd/osd_operation.h @@ -50,7 +50,6 @@ enum class OperationTypeCode { background_recovery_sub, internal_client_request, historic_client_request, - historic_slow_client_request, logmissing_request, logmissing_request_reply, snaptrim_event, @@ -73,7 +72,6 @@ static constexpr const char* const OP_NAMES[] = { "background_recovery_sub", "internal_client_request", "historic_client_request", - "historic_slow_client_request", "logmissing_request", "logmissing_request_reply", "snaptrim_event", @@ -232,6 +230,7 @@ struct OSDOperationRegistry : OperationRegistryT< size_t dump_slowest_historic_client_requests(ceph::Formatter* f) const; private: + op_list::const_iterator last_of_recents; size_t num_recent_ops = 0; size_t num_slow_ops = 0; }; -- 2.39.5