]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw: Try to handle unwatch errors sensibly
authorAdam C. Emerson <aemerson@redhat.com>
Wed, 12 Mar 2025 15:53:43 +0000 (11:53 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 19 Mar 2025 19:50:18 +0000 (15:50 -0400)
IF we get `-ENOENT` from unwatch just stop trying to renew.

Otherwise, schedule retry on watch failure, but add a maximum retry
timeout.

Fixes: https://tracker.ceph.com/issues/70422
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
(cherry picked from commit 34366f0f0d8e13eb8fef1e253794303b699e08b2)

Fixes: https://tracker.ceph.com/issues/70526
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/rgw/services/svc_notify.cc

index 5593dee9ae291f0d124cbe10037eaa58388006a6..baa3512c1b768789432e19d1c0a10cbf9f2cdfc6 100644 (file)
@@ -33,6 +33,7 @@ class RGWWatcher : public DoutPrefixProvider , public librados::WatchCtx2 {
   int register_ret{0};
   bool unregister_done{false};
   librados::AioCompletion *register_completion{nullptr};
+  uint64_t retries = 0;
 
   class C_ReinitWatch : public Context {
     RGWWatcher *watcher;
@@ -86,15 +87,28 @@ public:
   }
 
   void reinit() {
+    if (retries > 100) {
+      lderr(cct) << "ERROR: Looping in attempt to reinit watch. Halting."
+                << dendl;
+      abort();
+    }
     if(!unregister_done) {
       int ret = unregister_watch();
       if (ret < 0) {
         ldout(cct, 0) << "ERROR: unregister_watch() returned ret=" << ret << dendl;
+       if (-2 == ret) {
+         // Going down there is no such watch.
+         return;
+       } else {
+         ++retries;
+         svc->schedule_context(new C_ReinitWatch(this));
+       }
       }
     }
     int ret = register_watch();
     if (ret < 0) {
       ldout(cct, 0) << "ERROR: register_watch() returned ret=" << ret << dendl;
+      ++retries;
       svc->schedule_context(new C_ReinitWatch(this));
       return;
     }