]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: fix reshard cancelling race condition
authorYuval Lifshitz <ylifshit@redhat.com>
Wed, 8 Dec 2021 19:35:25 +0000 (21:35 +0200)
committerAdam C. Emerson <aemerson@redhat.com>
Tue, 1 Feb 2022 23:22:28 +0000 (18:22 -0500)
this is happening when resharding while objects are uploaded
tests steps are here:
https://gist.github.com/yuvalif/060f66f03511bff881e952287df3087b

Signed-off-by: Yuval Lifshitz <ylifshit@redhat.com>
src/rgw/rgw_rados.cc
src/rgw/rgw_reshard.cc

index 0c072d869ced46ee441becb84e97bb78e7af7685..c5cdd43098fed1689e9b39e52f662c3f73b2be59 100644 (file)
@@ -6912,6 +6912,15 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
          ": was able to take reshard lock for bucket " <<
          bucket_id << dendl;
+        // the reshard may have finished, so call clear_resharding() with its current bucket info
+        ret = fetch_new_bucket_info("trying_to_clear_resharding");
+        if (ret < 0) {
+         reshard_lock.unlock();
+         ldpp_dout(dpp, 0) << __func__ <<
+           " ERROR: failed to update bucket info before clear resharding for bucket " <<
+           bucket_id << dendl;
+          continue; // try again
+        }
        ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, dpp);
        if (ret < 0) {
          reshard_lock.unlock();
index db37d000b84292db64d98b6d9d834b11ff4ae651..596421bbc99c05775c5fcbb5f96711aa86470a5b 100644 (file)
@@ -463,7 +463,11 @@ static int cancel_reshard(rgw::sal::RadosStore* store,
     ret = 0; // non-fatal error
   }
 
-  return revert_target_layout(store, bucket_info, fault, dpp);
+  if (bucket_info.layout.target_index) {
+    return revert_target_layout(store, bucket_info, fault, dpp);
+  }
+  // there is nothing to revert
+  return 0;
 } // cancel_reshard
 
 static int commit_reshard(rgw::sal::RadosStore* store,