From: J. Eric Ivancich Date: Fri, 7 Jan 2022 19:43:05 +0000 (-0500) Subject: rgw: save bucket instance xattrs when resharding cancelled X-Git-Tag: v18.0.0~787^2~50 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f0080c24b3e3e642ed35a9039ea45c67b07c11ca;p=ceph.git rgw: save bucket instance xattrs when resharding cancelled There appears to be a long-standing bug in RGW such that when resharding is cancelled and the bucket instance is updated to reflect the new resharding status, the xattrs were lost. The xattrs are used to store metadata such as ACLs and LifeCycle policies. This commit makes sure that all call paths that lead to a cancelled reshard provide the xattrs, so they can be included when the bucket instance info is updated. Signed-off-by: J. Eric Ivancich --- diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 0c0c8a651f75..f91debd2cea8 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -6833,14 +6833,18 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, int ret = 0; cls_rgw_bucket_instance_entry entry; + // gets loaded by fetch_new_bucket_info; can be used by + // clear_resharding + std::map bucket_attrs; + // since we want to run this recovery code from two distinct places, // let's just put it in a lambda so we can easily re-use; if the // lambda successfully fetches a new bucket id, it sets // new_bucket_id and returns 0, otherwise it returns a negative // error code auto fetch_new_bucket_info = - [this, &bucket_info, dpp](const std::string& log_tag) -> int { - int ret = try_refresh_bucket_info(bucket_info, nullptr, dpp); + [this, &bucket_info, &bucket_attrs, dpp](const std::string& log_tag) -> int { + int ret = try_refresh_bucket_info(bucket_info, nullptr, dpp, &bucket_attrs); if (ret < 0) { ldpp_dout(dpp, 0) << __func__ << " ERROR: failed to refresh bucket info after reshard at " << @@ -6895,7 +6899,9 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, ldpp_dout(dpp, 10) << __PRETTY_FUNCTION__ << ": was able to take reshard lock for bucket " << bucket_id << dendl; - // the reshard may have finished, so call clear_resharding() with its current bucket info + // the reshard may have finished, so call clear_resharding() + // with its current bucket info; ALSO this will load + // bucket_attrs for call to clear_resharding below ret = fetch_new_bucket_info("trying_to_clear_resharding"); if (ret < 0) { reshard_lock.unlock(); @@ -6904,7 +6910,7 @@ int RGWRados::block_while_resharding(RGWRados::BucketShard *bs, bucket_id << dendl; continue; // try again } - ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, dpp); + ret = RGWBucketReshard::clear_resharding(this->store, bucket_info, bucket_attrs, dpp); if (ret < 0) { reshard_lock.unlock(); ldpp_dout(dpp, 0) << __PRETTY_FUNCTION__ << diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index 55233b81b80b..cd8a246ffaed 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -390,6 +390,7 @@ static int init_target_layout(rgw::sal::RadosStore* store, // it from the bucket instance metadata static int revert_target_layout(rgw::sal::RadosStore* store, RGWBucketInfo& bucket_info, + std::map& bucket_attrs, const ReshardFaultInjector& fault, const DoutPrefixProvider* dpp) { @@ -411,7 +412,8 @@ static int revert_target_layout(rgw::sal::RadosStore* store, if (ret = fault.check("revert_target_layout"); ret == 0) { // no fault injected, revert the bucket instance metadata ret = store->getRados()->put_bucket_instance_info(bucket_info, false, - real_time(), nullptr, dpp); + real_time(), + &bucket_attrs, dpp); } if (ret < 0) { @@ -446,7 +448,7 @@ static int init_reshard(rgw::sal::RadosStore* store, ldpp_dout(dpp, 0) << "ERROR: " << __func__ << " failed to pause " "writes to the current index: " << cpp_strerror(ret) << dendl; // clean up the target layout (ignore errors) - revert_target_layout(store, bucket_info, fault, dpp); + revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp); return ret; } return 0; @@ -454,6 +456,7 @@ static int init_reshard(rgw::sal::RadosStore* store, static int cancel_reshard(rgw::sal::RadosStore* store, RGWBucketInfo& bucket_info, + std::map& bucket_attrs, const ReshardFaultInjector& fault, const DoutPrefixProvider *dpp) { @@ -467,7 +470,7 @@ static int cancel_reshard(rgw::sal::RadosStore* store, } if (bucket_info.layout.target_index) { - return revert_target_layout(store, bucket_info, fault, dpp); + return revert_target_layout(store, bucket_info, bucket_attrs, fault, dpp); } // there is nothing to revert return 0; @@ -553,10 +556,11 @@ static int commit_reshard(rgw::sal::RadosStore* store, int RGWBucketReshard::clear_resharding(rgw::sal::RadosStore* store, RGWBucketInfo& bucket_info, + std::map& bucket_attrs, const DoutPrefixProvider* dpp) { constexpr ReshardFaultInjector no_fault; - return cancel_reshard(store, bucket_info, no_fault, dpp); + return cancel_reshard(store, bucket_info, bucket_attrs, no_fault, dpp); } int RGWBucketReshard::cancel(const DoutPrefixProvider* dpp) @@ -570,7 +574,7 @@ int RGWBucketReshard::cancel(const DoutPrefixProvider* dpp) ldpp_dout(dpp, -1) << "ERROR: bucket is not resharding" << dendl; ret = -EINVAL; } else { - ret = clear_resharding(store, bucket_info, dpp); + ret = clear_resharding(store, bucket_info, bucket_attrs, dpp); } reshard_lock.unlock(); @@ -837,7 +841,7 @@ int RGWBucketReshard::execute(int num_shards, } if (ret < 0) { - cancel_reshard(store, bucket_info, fault, dpp); + cancel_reshard(store, bucket_info, bucket_attrs, fault, dpp); ldpp_dout(dpp, 1) << __func__ << " INFO: reshard of bucket \"" << bucket_info.bucket.name << "\" canceled due to errors" << dendl; diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index cb9264dfd7bf..a9b5e2c985b8 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -108,6 +108,7 @@ public: static int clear_resharding(rgw::sal::RadosStore* store, RGWBucketInfo& bucket_info, + std::map& bucket_attrs, const DoutPrefixProvider* dpp); static uint32_t get_max_prime_shards() {