From: Adam C. Emerson Date: Wed, 12 Mar 2025 15:53:43 +0000 (-0400) Subject: rgw: Try to handle unwatch errors sensibly X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c95ea88269dfdc4d6a5550786ebe223cf9d465c8;p=ceph.git rgw: Try to handle unwatch errors sensibly IF we get `-ENOENT` from unwatch just stop trying to renew. Otherwise, schedule retry on watch failure, but add a maximum retry timeout. Fixes: https://tracker.ceph.com/issues/70422 Signed-off-by: Adam C. Emerson (cherry picked from commit 34366f0f0d8e13eb8fef1e253794303b699e08b2) Fixes: https://tracker.ceph.com/issues/70527 Signed-off-by: Adam C. Emerson --- diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc index 43f84ed0a4f7..1c0a3b1e2f3d 100644 --- a/src/rgw/services/svc_notify.cc +++ b/src/rgw/services/svc_notify.cc @@ -34,6 +34,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { int register_ret{0}; bool unregister_done{false}; librados::AioCompletion *register_completion{nullptr}; + uint64_t retries = 0; class C_ReinitWatch : public Context { RGWWatcher *watcher; @@ -86,15 +87,28 @@ public: } void reinit() { + if (retries > 100) { + lderr(cct) << "ERROR: Looping in attempt to reinit watch. Halting." + << dendl; + abort(); + } if(!unregister_done) { int ret = unregister_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl; + if (-2 == ret) { + // Going down there is no such watch. + return; + } else { + ++retries; + svc->schedule_context(new C_ReinitWatch(this)); + } } } int ret = register_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl; + ++retries; svc->schedule_context(new C_ReinitWatch(this)); return; }