From ef4f0d5369e85209da5b18a3ad5ee0177590018d Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 13 Feb 2025 04:16:47 +0000 Subject: [PATCH] dmclock/.../dmclock_server: do not clean clients with requests PriorityQueueBase::do_clean() shouldn't remove ClientRec instances which still have queued requests. Otherwise, very low priority clients might end up having requests actually lost, which shouldn't be possible. In the OSD, this resulted in PGRecovery items being lost if queued with background_best_effort while expanding a cluster. Such items can legitimately sit in the queue for a long period of time as they represent background data migration which is allowed to be starved by an aggressive client workload. Dropping the items broke an assumption in the OSD that all items enqueued would eventually be dequeued resulting in resources being leaked. Fixes: https://tracker.ceph.com/issues/61594 Signed-off-by: Samuel Just (cherry picked from commit 35cf0f6c1355ff420c431340ac36d9a3c029d6c2) --- src/dmclock/src/dmclock_server.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dmclock/src/dmclock_server.h b/src/dmclock/src/dmclock_server.h index ee9f66f582893..cee26f68343e7 100644 --- a/src/dmclock/src/dmclock_server.h +++ b/src/dmclock/src/dmclock_server.h @@ -1241,7 +1241,8 @@ namespace crimson { if (erase_point > 0 || idle_point > 0) { for (auto i = client_map.begin(); i != client_map.end(); /* empty */) { auto i2 = i++; - if (erase_point && + if (!(i2->second->has_request()) && + erase_point && erased_num < erase_max && i2->second->last_tick <= erase_point) { delete_from_heaps(i2->second); -- 2.39.5