]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: reshard: skip stale bucket id entries from reshard queue 34734/head
authorAbhishek Lekshmanan <abhishek@suse.com>
Fri, 17 Apr 2020 15:11:01 +0000 (17:11 +0200)
committerNathan Cutler <ncutler@suse.com>
Fri, 24 Apr 2020 13:03:24 +0000 (15:03 +0200)
If we encounter a reshard queue entry that has an older ID compared to the
bucket's current ID, it'd mean that some other process or a manual reshard has
already processed this entry, skip processing the entry this time. An
alternative is to verify the num_shards that we have in queue >= the current
shards, but this would mean that we may reshard a recently manual resharded
bucket again which might not be intended

Fixes: https://tracker.ceph.com/issues/45134
Signed-off-by: Abhishek Lekshmanan <abhishek@suse.com>
(cherry picked from commit 02664fc091674e28233559cd1c42f954d5776d86)

src/rgw/rgw_reshard.cc

index e20d8a392388da2a8d48dc2e76499d891f422b53..e55b0d1f5a680671afd97d29f50342e5e5a4dd5d 100644 (file)
@@ -641,7 +641,6 @@ int RGWBucketReshard::do_reshard(int num_shards,
            return ret;
          }
        }
-
        if (verbose_json_out) {
          formatter->close_section();
          formatter->flush(*out);
@@ -1021,19 +1020,25 @@ int RGWReshard::process_single_logshard(int logshard_num)
                                                 entry.tenant, entry.bucket_name,
                                                 bucket_info, nullptr,
                                                 null_yield, &attrs);
-       if (ret < 0) {
-         ldout(cct, 0) <<  __func__ <<
-           ": Error in get_bucket_info for bucket " << entry.bucket_name <<
-           ": " << cpp_strerror(-ret) << dendl;
-         if (ret != -ENOENT) {
-           // any error other than ENOENT will abort
-           return ret;
+       if (ret < 0 || bucket_info.bucket.bucket_id != entry.bucket_id) {
+         if (ret < 0) {
+           ldout(cct, 0) <<  __func__ <<
+             ": Error in get_bucket_info for bucket " << entry.bucket_name <<
+             ": " << cpp_strerror(-ret) << dendl;
+           if (ret != -ENOENT) {
+             // any error other than ENOENT will abort
+             return ret;
+           }
+         } else {
+           ldout(cct,0) << __func__ <<
+             ": Bucket: " << entry.bucket_name <<
+             " already resharded by someone, skipping " << dendl;
          }
 
          // we've encountered a reshard queue entry for an apparently
          // non-existent bucket; let's try to recover by cleaning up
          ldout(cct, 0) <<  __func__ <<
-           ": removing reshard queue entry for non-existent bucket " <<
+           ": removing reshard queue entry for a resharded or non-existent bucket" <<
            entry.bucket_name << dendl;
 
          ret = remove(entry);