From 66d63176a80f0f5a86427f5effdef345fcea8110 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 1 Nov 2019 14:17:53 -0400 Subject: [PATCH] rgw: auto-clean reshard queue entries for non-existent buckets It is possible for a bucket to be added to the reshard queue and then to be removed before its entry in the reshard queue is processed. When this is now encountered, processing of the reshard queue errors out. This fix recognizes when the reshard queue entry refers to a non-existent bucket and remove the entry from the reshard queue, allowing processing of the queue to continue. Signed-off-by: J. Eric Ivancich (cherry picked from commit 304ad8803292e92911e86b84c39a6c2ff0d0121b) Conflicts: src/rgw/rgw_reshard.cc - omit white-space change in get_bucket_info method call --- src/rgw/rgw_reshard.cc | 45 ++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index 12ba93bd9d86..356179462aad 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -1011,9 +1011,31 @@ int RGWReshard::process_single_logshard(int logshard_num) ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name, bucket_info, nullptr, &attrs); if (ret < 0) { - ldout(cct, 0) << __func__ << ": Error in get_bucket_info: " << - cpp_strerror(-ret) << dendl; - return -ret; + ldout(cct, 0) << __func__ << + ": Error in get_bucket_info for bucket " << entry.bucket_name << + ": " << cpp_strerror(-ret) << dendl; + if (ret != -ENOENT) { + // any error other than ENOENT will abort + return ret; + } + + // we've encountered a reshard queue entry for an apparently + // non-existent bucket; let's try to recover by cleaning up + ldout(cct, 0) << __func__ << + ": removing reshard queue entry for non-existent bucket " << + entry.bucket_name << dendl; + + ret = remove(entry); + if (ret < 0) { + ldout(cct, 0) << __func__ << + ": Error removing non-existent bucket " << + entry.bucket_name << " from resharding queue: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + // we cleaned up, move on to the next entry + goto finished_entry; } RGWBucketReshard br(store, bucket_info, attrs, nullptr); @@ -1023,23 +1045,26 @@ int RGWReshard::process_single_logshard(int logshard_num) ret = br.execute(entry.new_num_shards, max_entries, true, nullptr, formatter, this); if (ret < 0) { - ldout (store->ctx(), 0) << __func__ << - "ERROR in reshard_bucket " << entry.bucket_name << ":" << + ldout(store->ctx(), 0) << __func__ << + ": Error during resharding bucket " << entry.bucket_name << ":" << cpp_strerror(-ret)<< dendl; return ret; } - ldout (store->ctx(), 20) << " removing entry" << entry.bucket_name << + ldout(store->ctx(), 20) << __func__ << + " removing reshard queue entry for bucket " << entry.bucket_name << dendl; ret = remove(entry); if (ret < 0) { - ldout(cct, 0)<< __func__ << ":Error removing bucket " << - entry.bucket_name << " for resharding queue: " << + ldout(cct, 0) << __func__ << ": Error removing bucket " << + entry.bucket_name << " from resharding queue: " << cpp_strerror(-ret) << dendl; return ret; } - } + } // if new instance id is empty + + finished_entry: Clock::time_point now = Clock::now(); if (logshard_lock.should_renew(now)) { @@ -1050,7 +1075,7 @@ int RGWReshard::process_single_logshard(int logshard_num) } entry.get_key(&marker); - } + } // entry for loop } while (truncated); logshard_lock.unlock(); -- 2.47.3