uint64_t PushReplyOp::cost(CephContext *cct) const
{
-
- return cct->_conf->osd_push_per_object_cost +
- cct->_conf->osd_recovery_max_chunk;
+ if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+ /* In general, we really never want to throttle PushReplyOp messages.
+ * As long as the object is smaller than osd_recovery_max_chunk (8M at
+ * time of writing this comment, so this is basically always true),
+ * processing the PushReplyOp does not cost any further IO and simply
+ * permits the object once more to be written to.
+ *
+ * In the unlikely event that the object is larger than
+ * osd_recovery_max_chunk (again, 8M at the moment, so never for common
+ * configurations of rbd and virtually never for cephfs and rgw),
+ * we *still* want to push out the next portion immediately so that we can
+ * release the object for IO.
+ *
+ * The throttling for this operation on the primary occurs at the point
+ * where we queue the PGRecoveryContext which calls into recover_missing
+ * and recover_backfill to initiate pushes.
+ * See OSD::queue_recovery_context.
+ */
+ return 1;
+ } else {
+ /* We retain this legacy behavior for WeightedPriorityQueue. It seems to
+ * require very large costs for several messages in order to do any
+ * meaningful amount of throttling. This branch should be removed after
+ * Reef.
+ */
+ return cct->_conf->osd_push_per_object_cost +
+ cct->_conf->osd_recovery_max_chunk;
+ }
}
// -- PullOp --