From 2fe14a7568cb6c63d185812ba6a79ba4aebce78d Mon Sep 17 00:00:00 2001 From: Abhishek Lekshmanan Date: Fri, 17 Apr 2020 17:11:01 +0200 Subject: [PATCH] rgw: reshard: skip stale bucket id entries from reshard queue If we encounter a reshard queue entry that has an older ID compared to the bucket's current ID, it'd mean that some other process or a manual reshard has already processed this entry, skip processing the entry this time. An alternative is to verify the num_shards that we have in queue >= the current shards, but this would mean that we may reshard a recently manual resharded bucket again which might not be intended Fixes: https://tracker.ceph.com/issues/45134 Signed-off-by: Abhishek Lekshmanan (cherry picked from commit 02664fc091674e28233559cd1c42f954d5776d86) --- src/rgw/rgw_reshard.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index e20d8a392388d..e55b0d1f5a680 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -641,7 +641,6 @@ int RGWBucketReshard::do_reshard(int num_shards, return ret; } } - if (verbose_json_out) { formatter->close_section(); formatter->flush(*out); @@ -1021,19 +1020,25 @@ int RGWReshard::process_single_logshard(int logshard_num) entry.tenant, entry.bucket_name, bucket_info, nullptr, null_yield, &attrs); - if (ret < 0) { - ldout(cct, 0) << __func__ << - ": Error in get_bucket_info for bucket " << entry.bucket_name << - ": " << cpp_strerror(-ret) << dendl; - if (ret != -ENOENT) { - // any error other than ENOENT will abort - return ret; + if (ret < 0 || bucket_info.bucket.bucket_id != entry.bucket_id) { + if (ret < 0) { + ldout(cct, 0) << __func__ << + ": Error in get_bucket_info for bucket " << entry.bucket_name << + ": " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + // any error other than ENOENT will abort + return ret; + } + } else { + ldout(cct,0) << __func__ << + ": Bucket: " << entry.bucket_name << + " already resharded by someone, skipping " << dendl; } // we've encountered a reshard queue entry for an apparently // non-existent bucket; let's try to recover by cleaning up ldout(cct, 0) << __func__ << - ": removing reshard queue entry for non-existent bucket " << + ": removing reshard queue entry for a resharded or non-existent bucket" << entry.bucket_name << dendl; ret = remove(entry); -- 2.39.5