]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw: Try to handle unwatch errors sensibly
authorAdam C. Emerson <aemerson@redhat.com>
Wed, 12 Mar 2025 15:53:43 +0000 (11:53 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 12 Mar 2025 17:09:42 +0000 (13:09 -0400)
IF we get `-ENOENT` from unwatch just stop trying to renew.

Otherwise, schedule retry on watch failure, but add a maximum retry
timeout.

Fixes: https://tracker.ceph.com/issues/70422
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/rgw/services/svc_notify.cc

index dcd1668b17e7a3e6bd38e6bb7b1100ec502f2831..866ea7fd4ac119b854f504d428195fc53fee1c6f 100644 (file)
@@ -35,6 +35,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 {
   int register_ret{0};
   bool unregister_done{false};
   librados::AioCompletion *register_completion{nullptr};
+  uint64_t retries = 0;
 
   class C_ReinitWatch : public Context {
     RGWWatcher *watcher;
@@ -88,15 +89,28 @@ public:
   }
 
   void reinit() {
+    if (retries > 100) {
+      lderr(cct) << "ERROR: Looping in attempt to reinit watch. Halting."
+                << dendl;
+      abort();
+    }
     if(!unregister_done) {
       int ret = unregister_watch();
       if (ret < 0) {
         ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl;
+       if (-2 == ret) {
+         // Going down there is no such watch.
+         return;
+       } else {
+         ++retries;
+         svc->schedule_context(new C_ReinitWatch(this));
+       }
       }
     }
     int ret = register_watch();
     if (ret < 0) {
       ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl;
+      ++retries;
       svc->schedule_context(new C_ReinitWatch(this));
       return;
     }