]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/osd_types: use appropriate cost value for PullOp
authorSridhar Seshasayee <sseshasa@redhat.com>
Fri, 3 Feb 2023 05:36:06 +0000 (11:06 +0530)
committerSridhar Seshasayee <sseshasa@redhat.com>
Thu, 27 Apr 2023 13:10:43 +0000 (18:40 +0530)
See included comments -- previous values did not account for object
size.  This causes problems for mclock which is much more strict
in how it interprets costs.

Fixes: https://tracker.ceph.com/issues/58607
Signed-off-by: Samuel Just <sjust@redhat.com>
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
src/osd/osd_types.cc
src/osd/osd_types.h

index e78500ba370807c90a15b92e9dad9843962a1a30..14694de195b62da27b80b0e67367915c0454ec58 100644 (file)
@@ -15,6 +15,7 @@
  *
  */
 
+#include <algorithm>
 #include <list>
 #include <map>
 #include <ostream>
@@ -6807,8 +6808,20 @@ ostream& operator<<(ostream& out, const PullOp &op)
 
 uint64_t PullOp::cost(CephContext *cct) const
 {
-  return cct->_conf->osd_push_per_object_cost +
-    cct->_conf->osd_recovery_max_chunk;
+  if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+    return std::clamp<uint64_t>(
+      recovery_progress.estimate_remaining_data_to_recover(recovery_info),
+      1,
+      cct->_conf->osd_recovery_max_chunk);
+  } else {
+    /* We retain this legacy behavior for WeightedPriorityQueue. It seems to
+     * require very large costs for several messages in order to do any
+     * meaningful amount of throttling.  This branch should be removed after
+     * Reef.
+     */
+    return cct->_conf->osd_push_per_object_cost +
+      cct->_conf->osd_recovery_max_chunk;
+  }
 }
 
 // -- PushOp --
index ce370c0b7dcb81166803f1a3c1c6f316c250375b..afed5fa835103075390aca1331aa3dc311e307dd 100644 (file)
@@ -6029,6 +6029,11 @@ struct ObjectRecoveryProgress {
       omap_complete;
   }
 
+  uint64_t estimate_remaining_data_to_recover(const ObjectRecoveryInfo& info) const {
+    // Overestimates in case of clones, but avoids traversing copy_subset
+    return info.size - data_recovered_to;
+  }
+
   static void generate_test_instances(std::list<ObjectRecoveryProgress*>& o);
   void encode(ceph::buffer::list &bl) const;
   void decode(ceph::buffer::list::const_iterator &bl);