]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "crimson/osd/osd_operation: fix dump_historic_slow_ops command works" 58223/head
authorMatan Breizman <mbreizma@redhat.com>
Mon, 24 Jun 2024 08:07:42 +0000 (08:07 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Mon, 24 Jun 2024 08:07:42 +0000 (08:07 +0000)
This reverts commit 834ab99efc6453f91183a47849f56617cf73c112.

Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/osd/osd_operation.cc
src/crimson/osd/osd_operation.h

index 6cf80e60c6b415c42d386e916447525521ee61ec..920fdc11480469ab0a27e8dba62b708105923c14 100644 (file)
@@ -33,11 +33,18 @@ void OSDOperationRegistry::do_stop()
                     /* add_ref= */ false
                   };
                 });
+  last_of_recents = std::end(historic_registry);
   // to_ref_down is going off
 }
 
 OSDOperationRegistry::OSDOperationRegistry()
-  : OperationRegistryT(seastar::this_shard_id()) {}
+  : OperationRegistryT(seastar::this_shard_id())
+{
+  constexpr auto historic_reg_index =
+    static_cast<size_t>(OperationTypeCode::historic_client_request);
+  auto& historic_registry = get_registry<historic_reg_index>();
+  last_of_recents = std::begin(historic_registry);
+}
 
 static auto get_duration(const ClientRequest& client_request)
 {
@@ -48,46 +55,51 @@ static auto get_duration(const ClientRequest& client_request)
 
 void OSDOperationRegistry::put_historic(const ClientRequest& op)
 {
-  using crimson::common::local_conf;
   // unlink the op from the client request registry. this is a part of
   // the re-link procedure. finally it will be in historic registry.
   constexpr auto client_reg_index =
     static_cast<size_t>(OperationTypeCode::client_request);
   constexpr auto historic_reg_index =
     static_cast<size_t>(OperationTypeCode::historic_client_request);
-  constexpr auto slow_historic_reg_index = 
-    static_cast<size_t>(OperationTypeCode::historic_slow_client_request);
-
   auto& client_registry = get_registry<client_reg_index>();
-  // slow ops should put into historic_slow_client_request list
-  // we only save the newest op
-  if (get_duration(op) > local_conf()->osd_op_complaint_time) {
-    auto& slow_historic_registry = get_registry<slow_historic_reg_index>();
-    slow_historic_registry.splice(std::end(slow_historic_registry),
-          client_registry,
-          client_registry.iterator_to(op));
-
-    if (num_slow_ops >= local_conf()->osd_op_history_slow_op_size) {
-      slow_historic_registry.pop_front();
-    } else {
-      ++num_slow_ops;
-    }
-  } else {
-    auto& historic_registry = get_registry<historic_reg_index>();
-    historic_registry.splice(std::end(historic_registry),
-          client_registry,
-          client_registry.iterator_to(op));
-
-    if (num_recent_ops >= local_conf()->osd_op_history_size) {
-      historic_registry.pop_front();
-    } else {
-      ++num_recent_ops;
-    }
-  }
-
+  auto& historic_registry = get_registry<historic_reg_index>();
+  historic_registry.splice(std::end(historic_registry),
+                          client_registry,
+                          client_registry.iterator_to(op));
   ClientRequest::ICRef(
     &op, /* add_ref= */true
   ).detach(); // yes, "leak" it for now!
+
+  // check whether the history size limit is not exceeded; if so, then
+  // purge the oldest op.
+  // NOTE: Operation uses the auto-unlink feature of boost::intrusive.
+  // NOTE: the cleaning happens in OSDOperationRegistry::do_stop()
+  using crimson::common::local_conf;
+  if (num_recent_ops >= local_conf()->osd_op_history_size) {
+    ++last_of_recents;
+    ++num_slow_ops;
+  } else {
+    ++num_recent_ops;
+  }
+  if (num_slow_ops > local_conf()->osd_op_history_slow_op_size) {
+    // we're interested in keeping slowest ops. if the slow op history
+    // is disabled, the list will have only one element, so the full-blown
+    // search will boil down into `.front()`.
+    const auto fastest_historic_iter = std::min_element(
+      std::cbegin(historic_registry), last_of_recents,
+      [] (const auto& lop, const auto& rop) {
+        const auto& lclient_request = static_cast<const ClientRequest&>(lop);
+        const auto& rclient_request = static_cast<const ClientRequest&>(rop);
+       return get_duration(lclient_request) < get_duration(rclient_request);
+    });
+    assert(fastest_historic_iter != std::end(historic_registry));
+    const auto& fastest_historic_op =
+      static_cast<const ClientRequest&>(*fastest_historic_iter);
+    historic_registry.erase(fastest_historic_iter);
+    // clear a previously "leaked" op
+    ClientRequest::ICRef(&fastest_historic_op, /* add_ref= */false);
+    --num_slow_ops;
+  }
 }
 
 size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) const
@@ -113,20 +125,33 @@ size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) c
 
 size_t OSDOperationRegistry::dump_slowest_historic_client_requests(ceph::Formatter* f) const
 {
-  const auto& slow_historic_client_registry =
-    get_registry<static_cast<size_t>(OperationTypeCode::historic_slow_client_request)>(); //ClientRequest::type)>();
+  const auto& historic_client_registry =
+    get_registry<static_cast<size_t>(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>();
   f->open_object_section("op_history");
-  f->dump_int("size", slow_historic_client_registry.size());
+  f->dump_int("size", historic_client_registry.size());
   // TODO: f->dump_int("duration", history_duration.load());
   // the intrusive list is configured to not store the size
+  std::multimap<utime_t,
+               const ClientRequest*,
+               std::greater<utime_t>> sorted_slowest_ops;
+  // iterating over the entire registry as a slow op could be also
+  // in the "recently added" part.
+  std::transform(std::begin(historic_client_registry),
+                std::end(historic_client_registry),
+                std::inserter(sorted_slowest_ops, std::end(sorted_slowest_ops)),
+                [] (const Operation& op) {
+                  const auto& cop = static_cast<const ClientRequest&>(op);
+                  return std::make_pair(get_duration(cop), &cop);
+                });
+  f->open_array_section("ops");
+  using crimson::common::local_conf;
   size_t ops_count = 0;
+  for (auto it = std::begin(sorted_slowest_ops);
+       ops_count < local_conf()->osd_op_history_slow_op_size
+          && it != std::end(sorted_slowest_ops);
+       ++it, ++ops_count)
   {
-    f->open_array_section("ops");
-    for (const auto& op : slow_historic_client_registry) {
-      op.dump(f);
-      ++ops_count;
-    }
-    f->close_section();
+    it->second->dump(f);
   }
   f->close_section();
   return ops_count;
index b80945e258d2e6468afe7b2d6582a4e81c6ccffb..1064a5c8e03e1cfef1a19691b64d08378527708b 100644 (file)
@@ -50,7 +50,6 @@ enum class OperationTypeCode {
   background_recovery_sub,
   internal_client_request,
   historic_client_request,
-  historic_slow_client_request,
   logmissing_request,
   logmissing_request_reply,
   snaptrim_event,
@@ -73,7 +72,6 @@ static constexpr const char* const OP_NAMES[] = {
   "background_recovery_sub",
   "internal_client_request",
   "historic_client_request",
-  "historic_slow_client_request",
   "logmissing_request",
   "logmissing_request_reply",
   "snaptrim_event",
@@ -232,6 +230,7 @@ struct OSDOperationRegistry : OperationRegistryT<
   size_t dump_slowest_historic_client_requests(ceph::Formatter* f) const;
 
 private:
+  op_list::const_iterator last_of_recents;
   size_t num_recent_ops = 0;
   size_t num_slow_ops = 0;
 };