]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: fix reshard cancelling race condition
authorYuval Lifshitz <ylifshit@redhat.com>
Wed, 8 Dec 2021 19:35:25 +0000 (21:35 +0200)
committerCasey Bodley <cbodley@redhat.com>
Fri, 27 May 2022 19:47:33 +0000 (15:47 -0400)
this is happening when resharding while objects are uploaded
tests steps are here:
https://gist.github.com/yuvalif/060f66f03511bff881e952287df3087b

Signed-off-by: Yuval Lifshitz <ylifshit@redhat.com>
src/rgw/rgw_rados.cc
src/rgw/rgw_reshard.cc

index 5623f589242d41853ba25bf8908d1c473955e0aa..0c0c8a651f753abe5613acbd551ce9c069b01c80 100644 (file)
@@ -6895,6 +6895,15 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
          ": was able to take reshard lock for bucket " <<
          bucket_id << dendl;
+        // the reshard may have finished, so call clear_resharding() with its current bucket info
+        ret = fetch_new_bucket_info("trying_to_clear_resharding");
+        if (ret < 0) {
+         reshard_lock.unlock();
+         ldpp_dout(dpp, 0) << __func__ <<
+           " ERROR: failed to update bucket info before clear resharding for bucket " <<
+           bucket_id << dendl;
+          continue; // try again
+        }
        ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, dpp);
        if (ret < 0) {
          reshard_lock.unlock();
index db37d000b84292db64d98b6d9d834b11ff4ae651..596421bbc99c05775c5fcbb5f96711aa86470a5b 100644 (file)
@@ -463,7 +463,11 @@ static int cancel_reshard(rgw::sal::RadosStore* store,
     ret = 0; // non-fatal error
   }
 
-  return revert_target_layout(store, bucket_info, fault, dpp);
+  if (bucket_info.layout.target_index) {
+    return revert_target_layout(store, bucket_info, fault, dpp);
+  }
+  // there is nothing to revert
+  return 0;
 } // cancel_reshard
 
 static int commit_reshard(rgw::sal::RadosStore* store,