From ff248d7ed94cc441a2e7f3254cb8c0d53d3997d1 Mon Sep 17 00:00:00 2001 From: "Adam C. Emerson" Date: Wed, 12 Mar 2025 11:53:43 -0400 Subject: [PATCH] rgw: Try to handle unwatch errors sensibly IF we get `-ENOENT` from unwatch just stop trying to renew. Otherwise, schedule retry on watch failure, but add a maximum retry timeout. Fixes: https://tracker.ceph.com/issues/70422 Signed-off-by: Adam C. Emerson (cherry picked from commit 34366f0f0d8e13eb8fef1e253794303b699e08b2) Fixes: https://tracker.ceph.com/issues/70526 Signed-off-by: Adam C. Emerson --- src/rgw/services/svc_notify.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc index 5593dee9ae2..baa3512c1b7 100644 --- a/src/rgw/services/svc_notify.cc +++ b/src/rgw/services/svc_notify.cc @@ -33,6 +33,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 { int register_ret{0}; bool unregister_done{false}; librados::AioCompletion *register_completion{nullptr}; + uint64_t retries = 0; class C_ReinitWatch : public Context { RGWWatcher *watcher; @@ -86,15 +87,28 @@ public: } void reinit() { + if (retries > 100) { + lderr(cct) << "ERROR: Looping in attempt to reinit watch. Halting." + << dendl; + abort(); + } if(!unregister_done) { int ret = unregister_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl; + if (-2 == ret) { + // Going down there is no such watch. + return; + } else { + ++retries; + svc->schedule_context(new C_ReinitWatch(this)); + } } } int ret = register_watch(); if (ret < 0) { ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl; + ++retries; svc->schedule_context(new C_ReinitWatch(this)); return; } -- 2.39.5