From: Yuval Lifshitz Date: Wed, 8 Dec 2021 19:35:25 +0000 (+0200) Subject: rgw: fix reshard cancelling race condition X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=10a64bc6db456e34d587b1525087452e4a352b03;p=ceph.git rgw: fix reshard cancelling race condition this is happening when resharding while objects are uploaded tests steps are here: https://gist.github.com/yuvalif/060f66f03511bff881e952287df3087b Signed-off-by: Yuval Lifshitz --- diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 0c072d869ced4..c5cdd43098fed 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -6912,6 +6912,15 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ << ": was able to take reshard lock for bucket " << bucket_id << dendl; + // the reshard may have finished, so call clear_resharding() with its current bucket info + ret = fetch_new_bucket_info("trying_to_clear_resharding"); + if (ret < 0) { + reshard_lock.unlock(); + ldpp_dout(dpp, 0) << __func__ << + " ERROR: failed to update bucket info before clear resharding for bucket " << + bucket_id << dendl; + continue; // try again + } ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, dpp); if (ret < 0) { reshard_lock.unlock(); diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index db37d000b8429..596421bbc99c0 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -463,7 +463,11 @@ static int cancel_reshard(rgw::sal::RadosStore* store, ret = 0; // non-fatal error } - return revert_target_layout(store, bucket_info, fault, dpp); + if (bucket_info.layout.target_index) { + return revert_target_layout(store, bucket_info, fault, dpp); + } + // there is nothing to revert + return 0; } // cancel_reshard static int commit_reshard(rgw::sal::RadosStore* store,