From b5b73e184c1eb9b796aeded7dcfbbab36c03f1de Mon Sep 17 00:00:00 2001 From: Abhishek Lekshmanan Date: Fri, 17 Apr 2020 17:11:01 +0200 Subject: [PATCH] rgw: reshard: skip stale bucket id entries from reshard queue If we encounter a reshard queue entry that has an older ID compared to the bucket's current ID, it'd mean that some other process or a manual reshard has already processed this entry, skip processing the entry this time. An alternative is to verify the num_shards that we have in queue >= the current shards, but this would mean that we may reshard a recently manual resharded bucket again which might not be intended Fixes: https://tracker.ceph.com/issues/45134 Signed-off-by: Abhishek Lekshmanan (cherry picked from commit 02664fc091674e28233559cd1c42f954d5776d86) Conflicts: src/rgw/rgw_reshard.cc - immediately preceding "get_bucket_info" call looks different in nautilus --- src/rgw/rgw_reshard.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index cfbada535cfc..eb86b220b881 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -608,7 +608,6 @@ int RGWBucketReshard::do_reshard(int num_shards, return ret; } } - if (verbose_json_out) { formatter->close_section(); formatter->flush(*out); @@ -1011,19 +1010,25 @@ int RGWReshard::process_single_logshard(int logshard_num) ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name, bucket_info, nullptr, &attrs); - if (ret < 0) { - ldout(cct, 0) << __func__ << - ": Error in get_bucket_info for bucket " << entry.bucket_name << - ": " << cpp_strerror(-ret) << dendl; - if (ret != -ENOENT) { - // any error other than ENOENT will abort - return ret; + if (ret < 0 || bucket_info.bucket.bucket_id != entry.bucket_id) { + if (ret < 0) { + ldout(cct, 0) << __func__ << + ": Error in get_bucket_info for bucket " << entry.bucket_name << + ": " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + // any error other than ENOENT will abort + return ret; + } + } else { + ldout(cct,0) << __func__ << + ": Bucket: " << entry.bucket_name << + " already resharded by someone, skipping " << dendl; } // we've encountered a reshard queue entry for an apparently // non-existent bucket; let's try to recover by cleaning up ldout(cct, 0) << __func__ << - ": removing reshard queue entry for non-existent bucket " << + ": removing reshard queue entry for a resharded or non-existent bucket" << entry.bucket_name << dendl; ret = remove(entry); -- 2.47.3