]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/osd_types: use appropriate cost value for PushReplyOp
authorSridhar Seshasayee <sseshasa@redhat.com>
Wed, 25 Jan 2023 08:19:59 +0000 (13:49 +0530)
committerSridhar Seshasayee <sseshasa@redhat.com>
Thu, 27 Apr 2023 13:09:41 +0000 (18:39 +0530)
See included comments -- previous values did not account for object
size.  This causes problems for mclock which is much more strict
in how it interprets costs.

Fixes: https://tracker.ceph.com/issues/58529
Signed-off-by: Samuel Just <sjust@redhat.com>
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
src/osd/osd_types.cc

index 40b46a63df9905199829b9457d24267112fcf0ee..e78500ba370807c90a15b92e9dad9843962a1a30 100644 (file)
@@ -6716,9 +6716,34 @@ ostream& operator<<(ostream& out, const PushReplyOp &op)
 
 uint64_t PushReplyOp::cost(CephContext *cct) const
 {
-
-  return cct->_conf->osd_push_per_object_cost +
-    cct->_conf->osd_recovery_max_chunk;
+  if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+    /* In general, we really never want to throttle PushReplyOp messages.
+     * As long as the object is smaller than osd_recovery_max_chunk (8M at
+     * time of writing this comment, so this is basically always true),
+     * processing the PushReplyOp does not cost any further IO and simply
+     * permits the object once more to be written to.
+     *
+     * In the unlikely event that the object is larger than
+     * osd_recovery_max_chunk (again, 8M at the moment, so never for common
+     * configurations of rbd and virtually never for cephfs and rgw),
+     * we *still* want to push out the next portion immediately so that we can
+     * release the object for IO.
+     *
+     * The throttling for this operation on the primary occurs at the point
+     * where we queue the PGRecoveryContext which calls into recover_missing
+     * and recover_backfill to initiate pushes.
+     * See OSD::queue_recovery_context.
+     */
+    return 1;
+  } else {
+    /* We retain this legacy behavior for WeightedPriorityQueue. It seems to
+     * require very large costs for several messages in order to do any
+     * meaningful amount of throttling.  This branch should be removed after
+     * Reef.
+     */
+    return cct->_conf->osd_push_per_object_cost +
+      cct->_conf->osd_recovery_max_chunk;
+  }
 }
 
 // -- PullOp --