]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: save bucket instance xattrs when resharding cancelled
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 7 Jan 2022 19:43:05 +0000 (14:43 -0500)
committerAdam C. Emerson <aemerson@redhat.com>
Wed, 2 Feb 2022 00:09:28 +0000 (19:09 -0500)
There appears to be a long-standing bug in RGW such that when
resharding is cancelled and the bucket instance is updated to reflect
the new resharding status, the xattrs were lost. The xattrs are used
to store metadata such as ACLs and LifeCycle policies.

This commit makes sure that all call paths that lead to a cancelled
reshard provide the xattrs, so they can be included when the bucket
instance info is updated.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
src/rgw/rgw_rados.cc
src/rgw/rgw_reshard.cc
src/rgw/rgw_reshard.h

index 4013ab4deaf609dfe10ac306cbc1611f1f116662..f1fc710e1f3ade6d1ded9abd12703ba0a2dbf585 100644 (file)
@@ -6850,14 +6850,18 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
   int ret = 0;
   cls_rgw_bucket_instance_entry entry;
 
+  // gets loaded by fetch_new_bucket_info; can be used by
+  // clear_resharding
+  std::map<std::string, bufferlist> bucket_attrs;
+
   // since we want to run this recovery code from two distinct places,
   // let's just put it in a lambda so we can easily re-use; if the
   // lambda successfully fetches a new bucket id, it sets
   // new_bucket_id and returns 0, otherwise it returns a negative
   // error code
   auto fetch_new_bucket_info =
-    [this, &bucket_info, dpp](const std::string& log_tag) -> int {
-      int ret = try_refresh_bucket_info(bucket_info, nullptr, dpp);
+    [this, &bucket_info, &bucket_attrs, dpp](const std::string& log_tag) -> int {
+      int ret = try_refresh_bucket_info(bucket_info, nullptr, dpp, &bucket_attrs);
       if (ret < 0) {
        ldpp_dout(dpp, 0) << __func__ <<
          " ERROR: failed to refresh bucket info after reshard at " <<
@@ -6912,7 +6916,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
        ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ <<
          ": was able to take reshard lock for bucket " <<
          bucket_id << dendl;
-        // the reshard may have finished, so call clear_resharding() with its current bucket info
+        // the reshard may have finished, so call clear_resharding()
+        // with its current bucket info; ALSO this will load
+        // bucket_attrs for call to clear_resharding below
         ret = fetch_new_bucket_info("trying_to_clear_resharding");
         if (ret < 0) {
          reshard_lock.unlock();
@@ -6921,7 +6927,7 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs,
            bucket_id << dendl;
           continue; // try again
         }
-       ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, dpp);
+       ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, bucket_attrs, dpp);
        if (ret < 0) {
          reshard_lock.unlock();
          ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ <<
index 55233b81b80b453978d9bc43a62519169aabbf43..cd8a246ffaedea33c4194f0b1b2cfd07327dfb11 100644 (file)
@@ -390,6 +390,7 @@ static int init_target_layout(rgw::sal::RadosStore* store,
 // it from the bucket instance metadata
 static int revert_target_layout(rgw::sal::RadosStore* store,
                                 RGWBucketInfo& bucket_info,
+                               std::map<std::string, bufferlist>& bucket_attrs,
                                 const ReshardFaultInjector& fault,
                                 const DoutPrefixProvider* dpp)
 {
@@ -411,7 +412,8 @@ static int revert_target_layout(rgw::sal::RadosStore* store,
   if (ret = fault.check("revert_target_layout");
       ret == 0) { // no fault injected, revert the bucket instance metadata
     ret = store->getRados()->put_bucket_instance_info(bucket_info, false,
-                                                      real_time(), nullptr, dpp);
+                                                      real_time(),
+                                                     &bucket_attrs, dpp);
   }
 
   if (ret < 0) {
@@ -446,7 +448,7 @@ static int init_reshard(rgw::sal::RadosStore* store,
     ldpp_dout(dpp, 0) << "ERROR: " << __func__ << " failed to pause "
         "writes to the current index: " << cpp_strerror(ret) << dendl;
     // clean up the target layout (ignore errors)
-    revert_target_layout(store, bucket_info, fault, dpp);
+    revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp);
     return ret;
   }
   return 0;
@@ -454,6 +456,7 @@ static int init_reshard(rgw::sal::RadosStore* store,
 
 static int cancel_reshard(rgw::sal::RadosStore* store,
                           RGWBucketInfo& bucket_info,
+                         std::map<std::string, bufferlist>& bucket_attrs,
                           const ReshardFaultInjector& fault,
                           const DoutPrefixProvider *dpp)
 {
@@ -467,7 +470,7 @@ static int cancel_reshard(rgw::sal::RadosStore* store,
   }
 
   if (bucket_info.layout.target_index) {
-    return revert_target_layout(store, bucket_info, fault, dpp);
+    return revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp);
   }
   // there is nothing to revert
   return 0;
@@ -553,10 +556,11 @@ static int commit_reshard(rgw::sal::RadosStore* store,
 
 int RGWBucketReshard::clear_resharding(rgw::sal::RadosStore* store,
                                        RGWBucketInfo& bucket_info,
+                                      std::map<std::string, bufferlist>& bucket_attrs,
                                        const DoutPrefixProvider* dpp)
 {
   constexpr ReshardFaultInjector no_fault;
-  return cancel_reshard(store, bucket_info, no_fault, dpp);
+  return cancel_reshard(store, bucket_info, bucket_attrs, no_fault, dpp);
 }
 
 int RGWBucketReshard::cancel(const DoutPrefixProvider* dpp)
@@ -570,7 +574,7 @@ int RGWBucketReshard::cancel(const DoutPrefixProvider* dpp)
     ldpp_dout(dpp, -1) << "ERROR: bucket is not resharding" << dendl;
     ret = -EINVAL;
   } else {
-    ret = clear_resharding(store, bucket_info, dpp);
+    ret = clear_resharding(store, bucket_info, bucket_attrs, dpp);
   }
 
   reshard_lock.unlock();
@@ -837,7 +841,7 @@ int RGWBucketReshard::execute(int num_shards,
   }
 
   if (ret < 0) {
-    cancel_reshard(store, bucket_info, fault, dpp);
+    cancel_reshard(store, bucket_info, bucket_attrs, fault, dpp);
 
     ldpp_dout(dpp, 1) << __func__ << " INFO: reshard of bucket \""
         << bucket_info.bucket.name << "\" canceled due to errors" << dendl;
index cb9264dfd7bf0206e6de41d6261dffc5c38fadb0..a9b5e2c985b8aceae34048d0db10b035aaaf1cc0 100644 (file)
@@ -108,6 +108,7 @@ public:
 
   static int clear_resharding(rgw::sal::RadosStore* store,
                              RGWBucketInfo& bucket_info,
+                             std::map<std::string, bufferlist>& bucket_attrs,
                               const DoutPrefixProvider* dpp);
 
   static uint32_t get_max_prime_shards() {