]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: auto-clean reshard queue entries for non-existent buckets 33300/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 1 Nov 2019 18:17:53 +0000 (14:17 -0400)
committerNathan Cutler <ncutler@suse.com>
Fri, 14 Feb 2020 09:30:58 +0000 (10:30 +0100)
It is possible for a bucket to be added to the reshard queue and then
to be removed before its entry in the reshard queue is processed. When
this is now encountered, processing of the reshard queue errors out.

This fix recognizes when the reshard queue entry refers to a
non-existent bucket and remove the entry from the reshard queue,
allowing processing of the queue to continue.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
(cherry picked from commit 304ad8803292e92911e86b84c39a6c2ff0d0121b)

Conflicts:
src/rgw/rgw_reshard.cc
- omitted cosmetic change in get_bucket_info arg list

src/rgw/rgw_reshard.cc

index c01e391bf8554548e792e99df7643068837b22f1..bbe45d936fe5ebc6165825e8dacc0d3294c7449a 100644 (file)
@@ -1063,9 +1063,31 @@ int RGWReshard::process_single_logshard(int logshard_num)
        ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name,
                                     bucket_info, nullptr, &attrs);
        if (ret < 0) {
-         ldout(cct, 0) <<  __func__ << ": Error in get_bucket_info: " <<
-           cpp_strerror(-ret) << dendl;
-         return -ret;
+         ldout(cct, 0) <<  __func__ <<
+           ": Error in get_bucket_info for bucket " << entry.bucket_name <<
+           ": " << cpp_strerror(-ret) << dendl;
+         if (ret != -ENOENT) {
+           // any error other than ENOENT will abort
+           return ret;
+         }
+
+         // we've encountered a reshard queue entry for an apparently
+         // non-existent bucket; let's try to recover by cleaning up
+         ldout(cct, 0) <<  __func__ <<
+           ": removing reshard queue entry for non-existent bucket " <<
+           entry.bucket_name << dendl;
+
+         ret = remove(entry);
+         if (ret < 0) {
+           ldout(cct, 0) << __func__ <<
+             ": Error removing non-existent bucket " <<
+             entry.bucket_name << " from resharding queue: " <<
+             cpp_strerror(-ret) << dendl;
+           return ret;
+         }
+
+         // we cleaned up, move on to the next entry
+         goto finished_entry;
        }
 
        RGWBucketReshard br(store, bucket_info, attrs, nullptr);
@@ -1075,23 +1097,26 @@ int RGWReshard::process_single_logshard(int logshard_num)
        ret = br.execute(entry.new_num_shards, max_entries, true, nullptr,
                         formatter, this);
        if (ret < 0) {
-         ldout (store->ctx(), 0) <<  __func__ <<
-           "ERROR in reshard_bucket " << entry.bucket_name << ":" <<
+         ldout(store->ctx(), 0) <<  __func__ <<
+           ": Error during resharding bucket " << entry.bucket_name << ":" <<
            cpp_strerror(-ret)<< dendl;
          return ret;
        }
 
-       ldout (store->ctx(), 20) <<  " removing entry" << entry.bucket_name <<
+       ldout(store->ctx(), 20) << __func__ <<
+         " removing reshard queue entry for bucket " << entry.bucket_name <<
          dendl;
 
        ret = remove(entry);
        if (ret < 0) {
-         ldout(cct, 0)<< __func__ << ":Error removing bucket " <<
-           entry.bucket_name << " for resharding queue: " <<
+         ldout(cct, 0) << __func__ << ": Error removing bucket " <<
+           entry.bucket_name << " from resharding queue: " <<
            cpp_strerror(-ret) << dendl;
          return ret;
        }
-      }
+      } // if new instance id is empty
+
+    finished_entry:
 
       Clock::time_point now = Clock::now();
       if (logshard_lock.should_renew(now)) {
@@ -1102,7 +1127,7 @@ int RGWReshard::process_single_logshard(int logshard_num)
       }
 
       entry.get_key(&marker);
-    }
+    } // entry for loop
   } while (truncated);
 
   logshard_lock.unlock();