From 573dc5f4aba16790a80989ba252ef17fbcb6e29a Mon Sep 17 00:00:00 2001 From: Shilpa Jagannath Date: Mon, 23 Mar 2026 16:38:34 +0000 Subject: [PATCH] rgw/multisite: use aio_operate for RGWSimpleRadosLockCR/UnlockCR RGWContinuousLeaseCR renews the sync lock every interval/2 by calling RGWSimpleRadosLockCR, which previously queued an RGWAsyncLockSystemObj request via async_rados->queue(). At a large scale, when the async thread pool is fully saturated, the cr thread can block, stalling lock renewal for extended periods of time, eventually expiring. Fix this by allowing RGWSimpleRadosLockCR and RGWSimpleRadosUnlockCR to use aio_operate without having to queue behind other async requests Signed-off-by: Shilpa Jagannath --- src/rgw/driver/rados/rgw_cr_rados.cc | 64 +++++++++++++++++----------- src/rgw/driver/rados/rgw_cr_rados.h | 8 ++-- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/src/rgw/driver/rados/rgw_cr_rados.cc b/src/rgw/driver/rados/rgw_cr_rados.cc index fe44103a649..c1a3f1c763c 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.cc +++ b/src/rgw/driver/rados/rgw_cr_rados.cc @@ -522,33 +522,43 @@ RGWSimpleRadosLockCR::RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, lock_name(_lock_name), cookie(_cookie), duration(_duration), - obj(_obj), - req(nullptr) + obj(_obj) { set_description() << "rados lock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie << " duration=" << duration; } void RGWSimpleRadosLockCR::request_cleanup() { - if (req) { - req->finish(); - req = NULL; - } } int RGWSimpleRadosLockCR::send_request(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + set_status() << "sending request"; - req = new RGWAsyncLockSystemObj(this, stack->create_completion_notifier(), - store, NULL, obj, lock_name, cookie, duration); - async_rados->queue(req); - return 0; + + rados::cls::lock::Lock l(lock_name); + utime_t dur(duration, 0); + l.set_duration(dur); + l.set_cookie(cookie); + l.set_may_renew(true); + + librados::ObjectWriteOperation op; + l.lock_exclusive(&op); + + cn = stack->create_completion_notifier(); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int RGWSimpleRadosLockCR::request_complete() { - set_status() << "request complete; ret=" << req->get_ret_status(); - return req->get_ret_status(); + int r = cn->completion()->get_return_value(); + set_status() << "request complete; ret=" << r; + return r; } RGWSimpleRadosUnlockCR::RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, @@ -559,34 +569,40 @@ RGWSimpleRadosUnlockCR::RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_ra store(_store), lock_name(_lock_name), cookie(_cookie), - obj(_obj), - req(NULL) + obj(_obj) { set_description() << "rados unlock dest=" << obj << " lock=" << lock_name << " cookie=" << cookie; } void RGWSimpleRadosUnlockCR::request_cleanup() { - if (req) { - req->finish(); - req = NULL; - } } int RGWSimpleRadosUnlockCR::send_request(const DoutPrefixProvider *dpp) { + int r = store->getRados()->get_raw_obj_ref(dpp, obj, &ref); + if (r < 0) { + ldpp_dout(dpp, -1) << "ERROR: failed to get ref for (" << obj << ") ret=" << r << dendl; + return r; + } + set_status() << "sending request"; - req = new RGWAsyncUnlockSystemObj(this, stack->create_completion_notifier(), - store, NULL, obj, lock_name, cookie); - async_rados->queue(req); - return 0; + rados::cls::lock::Lock l(lock_name); + l.set_cookie(cookie); + + librados::ObjectWriteOperation op; + l.unlock(&op); + + cn = stack->create_completion_notifier(); + return ref.ioctx.aio_operate(ref.obj.oid, cn->completion(), &op); } int RGWSimpleRadosUnlockCR::request_complete() { - set_status() << "request complete; ret=" << req->get_ret_status(); - return req->get_ret_status(); + int r = cn->completion()->get_return_value(); + set_status() << "request complete; ret=" << r; + return r; } int RGWOmapAppend::operate(const DoutPrefixProvider *dpp) { diff --git a/src/rgw/driver/rados/rgw_cr_rados.h b/src/rgw/driver/rados/rgw_cr_rados.h index df40db5dbc4..f49523c1164 100644 --- a/src/rgw/driver/rados/rgw_cr_rados.h +++ b/src/rgw/driver/rados/rgw_cr_rados.h @@ -784,8 +784,8 @@ class RGWSimpleRadosLockCR : public RGWSimpleCoroutine { uint32_t duration; rgw_raw_obj obj; - - RGWAsyncLockSystemObj *req; + rgw_rados_ref ref; + boost::intrusive_ptr cn; public: RGWSimpleRadosLockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, @@ -816,8 +816,8 @@ class RGWSimpleRadosUnlockCR : public RGWSimpleCoroutine { std::string cookie; rgw_raw_obj obj; - - RGWAsyncUnlockSystemObj *req; + rgw_rados_ref ref; + boost::intrusive_ptr cn; public: RGWSimpleRadosUnlockCR(RGWAsyncRadosProcessor *_async_rados, rgw::sal::RadosStore* _store, -- 2.47.3