From 06f227a972950542f4ad1e8548373699907c83e1 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 1 Nov 2019 14:17:53 -0400 Subject: [PATCH] rgw: auto-clean reshard queue entries for non-existent buckets It is possible for a bucket to be added to the reshard queue and then to be removed before its entry in the reshard queue is processed. When this is now encountered, processing of the reshard queue errors out. This fix recognizes when the reshard queue entry refers to a non-existent bucket and remove the entry from the reshard queue, allowing processing of the queue to continue. Signed-off-by: J. Eric Ivancich (cherry picked from commit 304ad8803292e92911e86b84c39a6c2ff0d0121b) Conflicts: src/rgw/rgw_reshard.cc - omitted cosmetic change in get_bucket_info arg list --- src/rgw/rgw_reshard.cc | 45 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index c01e391bf8554..bbe45d936fe5e 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -1063,9 +1063,31 @@ int RGWReshard::process_single_logshard(int logshard_num) ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name, bucket_info, nullptr, &attrs); if (ret < 0) { - ldout(cct, 0) << __func__ << ": Error in get_bucket_info: " << - cpp_strerror(-ret) << dendl; - return -ret; + ldout(cct, 0) << __func__ << + ": Error in get_bucket_info for bucket " << entry.bucket_name << + ": " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + // any error other than ENOENT will abort + return ret; + } + + // we've encountered a reshard queue entry for an apparently + // non-existent bucket; let's try to recover by cleaning up + ldout(cct, 0) << __func__ << + ": removing reshard queue entry for non-existent bucket " << + entry.bucket_name << dendl; + + ret = remove(entry); + if (ret < 0) { + ldout(cct, 0) << __func__ << + ": Error removing non-existent bucket " << + entry.bucket_name << " from resharding queue: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + // we cleaned up, move on to the next entry + goto finished_entry; } RGWBucketReshard br(store, bucket_info, attrs, nullptr); @@ -1075,23 +1097,26 @@ int RGWReshard::process_single_logshard(int logshard_num) ret = br.execute(entry.new_num_shards, max_entries, true, nullptr, formatter, this); if (ret < 0) { - ldout (store->ctx(), 0) << __func__ << - "ERROR in reshard_bucket " << entry.bucket_name << ":" << + ldout(store->ctx(), 0) << __func__ << + ": Error during resharding bucket " << entry.bucket_name << ":" << cpp_strerror(-ret)<< dendl; return ret; } - ldout (store->ctx(), 20) << " removing entry" << entry.bucket_name << + ldout(store->ctx(), 20) << __func__ << + " removing reshard queue entry for bucket " << entry.bucket_name << dendl; ret = remove(entry); if (ret < 0) { - ldout(cct, 0)<< __func__ << ":Error removing bucket " << - entry.bucket_name << " for resharding queue: " << + ldout(cct, 0) << __func__ << ": Error removing bucket " << + entry.bucket_name << " from resharding queue: " << cpp_strerror(-ret) << dendl; return ret; } - } + } // if new instance id is empty + + finished_entry: Clock::time_point now = Clock::now(); if (logshard_lock.should_renew(now)) { @@ -1102,7 +1127,7 @@ int RGWReshard::process_single_logshard(int logshard_num) } entry.get_key(&marker); - } + } // entry for loop } while (truncated); logshard_lock.unlock(); -- 2.39.5