]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw: Retry -ECANCELED in reshard commit and cancel
authorAdam C. Emerson <aemerson@redhat.com>
Mon, 7 Feb 2022 20:23:57 +0000 (15:23 -0500)
committerCasey Bodley <cbodley@redhat.com>
Fri, 27 May 2022 19:47:33 +0000 (15:47 -0400)
Signed-off-by: Adam C. Emerson <aemerson@redhat.com>
src/rgw/rgw_reshard.cc

index ffc176c591e8fe5f219479580e46cfbab1d78f2e..7a0030642452694fbc25e2aaefd7132b0c0337cb 100644 (file)
@@ -460,9 +460,18 @@ static int cancel_reshard(rgw::sal::RadosStore* store,
                           const ReshardFaultInjector& fault,
                           const DoutPrefixProvider *dpp)
 {
+  static constexpr auto max_retries = 10;
   // unblock writes to the current index shard objects
-  int ret = set_resharding_status(dpp, static_cast<rgw::sal::RadosStore*>(store), bucket_info,
-                                  cls_rgw_reshard_status::NOT_RESHARDING);
+  int ret = 0;
+  int tries = 0;
+  do {
+    ret = set_resharding_status(dpp, static_cast<rgw::sal::RadosStore*>(store), bucket_info,
+                               cls_rgw_reshard_status::NOT_RESHARDING);
+    ++tries;
+    ldpp_dout(dpp, 1) << "WARNING: " << __func__
+                     << " set_resharding_status got -ECANCELED. Retrying."
+                     << dendl;
+  } while (ret == -ECANCELED && tries < max_retries);
   if (ret < 0) {
     ldpp_dout(dpp, 1) << "WARNING: " << __func__ << " failed to unblock "
         "writes to current index objects: " << cpp_strerror(ret) << dendl;
@@ -470,7 +479,15 @@ static int cancel_reshard(rgw::sal::RadosStore* store,
   }
 
   if (bucket_info.layout.target_index) {
-    return revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp);
+    tries = 0;
+    do {
+      ret = revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp);
+      ++tries;
+      ldpp_dout(dpp, 1) << "WARNING: " << __func__
+                       << " revert_target_layout got -ECANCELED. Retrying."
+                       << dendl;
+    } while (ret == -ECANCELED && tries < max_retries);
+    return ret;
   }
   // there is nothing to revert
   return 0;
@@ -482,6 +499,7 @@ static int commit_reshard(rgw::sal::RadosStore* store,
                           const ReshardFaultInjector& fault,
                           const DoutPrefixProvider *dpp)
 {
+  static constexpr auto max_retries = 10;
   auto& layout = bucket_info.layout;
   auto prev = layout; // make a copy for cleanup
   const auto next_log_gen = layout.logs.empty() ? 1 :
@@ -514,10 +532,14 @@ static int commit_reshard(rgw::sal::RadosStore* store,
 
   int ret = fault.check("commit_target_layout");
   if (ret == 0) { // no fault injected, write the bucket instance metadata
-    ret =
-      store->getRados()->put_bucket_instance_info(bucket_info, false,
-                                                 real_time(),
-                                                 &bucket_attrs, dpp);
+    int tries = 0;
+    do {
+      ret =
+       store->getRados()->put_bucket_instance_info(bucket_info, false,
+                                                   real_time(),
+                                                   &bucket_attrs, dpp);
+      ++tries;
+    } while (ret == -ECANCELED && tries < max_retries);
   }
 
   if (ret < 0) {
@@ -527,8 +549,16 @@ static int commit_reshard(rgw::sal::RadosStore* store,
     bucket_info.layout = std::move(prev); // restore in-memory layout
 
     // unblock writes to the current index shard objects
-    int ret2 = set_resharding_status(dpp, store, bucket_info,
-                                     cls_rgw_reshard_status::NOT_RESHARDING);
+    int tries = 0;
+    int ret2 = 0;
+    do {
+      ret2 = set_resharding_status(dpp, store, bucket_info,
+                                  cls_rgw_reshard_status::NOT_RESHARDING);
+      ++tries;
+      ldpp_dout(dpp, 1) << "WARNING: " << __func__
+                       << " set_resharding_status got -ECANCELED. Retrying."
+                       << dendl;
+    } while (ret2 == -ECANCELED && tries < max_retries);
     if (ret2 < 0) {
       ldpp_dout(dpp, 1) << "WARNING: " << __func__ << " failed to unblock "
           "writes to current index objects: " << cpp_strerror(ret2) << dendl;