osd/osd_types: use appropriate cost value for PushReplyOp

author Sridhar Seshasayee <sseshasa@redhat.com>

Wed, 25 Jan 2023 08:19:59 +0000 (13:49 +0530)

committer Sridhar Seshasayee <sseshasa@redhat.com>

Thu, 27 Apr 2023 13:09:41 +0000 (18:39 +0530)
author Sridhar Seshasayee <sseshasa@redhat.com>
Wed, 25 Jan 2023 08:19:59 +0000 (13:49 +0530)
committer Sridhar Seshasayee <sseshasa@redhat.com>
Thu, 27 Apr 2023 13:09:41 +0000 (18:39 +0530)
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc

index 40b46a63df9905199829b9457d24267112fcf0ee..e78500ba370807c90a15b92e9dad9843962a1a30 100644 (file)
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -6716,9 +6716,34 @@ ostream& operator<<(ostream& out, const PushReplyOp &op)
  
  uint64_t PushReplyOp::cost(CephContext *cct) const
  {
-
-  return cct->_conf->osd_push_per_object_cost +
-    cct->_conf->osd_recovery_max_chunk;
+  if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+    /* In general, we really never want to throttle PushReplyOp messages.
+     * As long as the object is smaller than osd_recovery_max_chunk (8M at
+     * time of writing this comment, so this is basically always true),
+     * processing the PushReplyOp does not cost any further IO and simply
+     * permits the object once more to be written to.
+     *
+     * In the unlikely event that the object is larger than
+     * osd_recovery_max_chunk (again, 8M at the moment, so never for common
+     * configurations of rbd and virtually never for cephfs and rgw),
+     * we *still* want to push out the next portion immediately so that we can
+     * release the object for IO.
+     *
+     * The throttling for this operation on the primary occurs at the point
+     * where we queue the PGRecoveryContext which calls into recover_missing
+     * and recover_backfill to initiate pushes.
+     * See OSD::queue_recovery_context.
+     */
+    return 1;
+  } else {
+    /* We retain this legacy behavior for WeightedPriorityQueue. It seems to
+     * require very large costs for several messages in order to do any
+     * meaningful amount of throttling.  This branch should be removed after
+     * Reef.
+     */
+    return cct->_conf->osd_push_per_object_cost +
+      cct->_conf->osd_recovery_max_chunk;
+  }
  }
  
  // -- PullOp --
author	Sridhar Seshasayee <sseshasa@redhat.com>
	Wed, 25 Jan 2023 08:19:59 +0000 (13:49 +0530)
committer	Sridhar Seshasayee <sseshasa@redhat.com>
	Thu, 27 Apr 2023 13:09:41 +0000 (18:39 +0530)