From: Adam C. Emerson Date: Wed, 12 Mar 2025 15:53:43 +0000 (-0400) Subject: rgw: Try to handle unwatch errors sensibly X-Git-Tag: testing/wip-khiremat-testing-20250422.120708-squid-debug~34^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ff248d7ed94cc441a2e7f3254cb8c0d53d3997d1;p=ceph-ci.git rgw: Try to handle unwatch errors sensibly IF we get `-ENOENT` from unwatch just stop trying to renew. Otherwise, schedule retry on watch failure, but add a maximum retry timeout. Fixes: https://tracker.ceph.com/issues/70422 Signed-off-by: Adam C. Emerson (cherry picked from commit 34366f0f0d8e13eb8fef1e253794303b699e08b2) Fixes: https://tracker.ceph.com/issues/70526 Signed-off-by: Adam C. Emerson --- diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc index 5593dee9ae2..baa3512c1b7 100644 --- a/src/rgw/services/svc_notify.cc +++ b/src/rgw/services/svc_notify.cc @@ -33,6 +33,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { int register_ret{0}; bool unregister_done{false}; librados::AioCompletion *register_completion{nullptr}; + uint64_t retries = 0; class C_ReinitWatch : public Context { RGWWatcher *watcher; @@ -86,15 +87,28 @@ public: } void reinit() { + if (retries > 100) { + lderr(cct) << "ERROR: Looping in attempt to reinit watch. Halting." + << dendl; + abort(); + } if(!unregister_done) { int ret = unregister_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl; + if (-2 == ret) { + // Going down there is no such watch. + return; + } else { + ++retries; + svc->schedule_context(new C_ReinitWatch(this)); + } } } int ret = register_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl; + ++retries; svc->schedule_context(new C_ReinitWatch(this)); return; }