From 221bbf46176c7b376c2f8a9feb842df8b0121d45 Mon Sep 17 00:00:00 2001 From: Shilpa Jagannath Date: Tue, 14 Jul 2020 17:56:05 +0530 Subject: [PATCH] rgw: failing to reshard, restore old indexes Signed-off-by: Shilpa Jagannath --- src/rgw/rgw_reshard.cc | 42 ++++++++++++++++++++++++++++-------------- src/rgw/rgw_reshard.h | 1 + 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index 111b32b502662..7547b96e29af8 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -644,7 +644,6 @@ int RGWBucketReshard::do_reshard(int num_shards, } //overwrite current_index for the next reshard process - const auto prev_index = bucket_info.layout.current_index; bucket_info.layout.current_index = *bucket_info.layout.target_index; bucket_info.layout.target_index = std::nullopt; // target_layout doesn't need to exist after reshard bucket_info.layout.resharding = rgw::BucketReshardState::None; @@ -659,18 +658,6 @@ int RGWBucketReshard::do_reshard(int num_shards, return ret; } - // resharding successful, so remove old bucket index shards; use - // best effort and don't report out an error; the lock isn't needed - // at this point since all we're using a best effor to to remove old - // shard objects - - ret = store->svc()->bi->clean_index(bucket_info, prev_index); - if (ret < 0) { - lderr(store->ctx()) << "Error: " << __func__ << - " failed to clean up old shards; " << - "RGWRados::clean_bucket_index returned " << ret << dendl; -} - return 0; // NB: some error clean-up is done by ~BucketInfoReshardUpdate } // RGWBucketReshard::do_reshard @@ -703,7 +690,9 @@ int RGWBucketReshard::execute(int num_shards, int max_op_entries, goto error_out; } } - + + // keep a copy of old index layout + prev_index = bucket_info.layout.current_index; ret = do_reshard(num_shards, max_op_entries, @@ -717,6 +706,18 @@ int RGWBucketReshard::execute(int num_shards, int max_op_entries, reshard_lock.unlock(); + // resharding successful, so remove old bucket index shards; use + // best effort and don't report out an error; the lock isn't needed + // at this point since all we're using a best effor to to remove old + // shard objects + + ret = store->svc()->bi->clean_index(bucket_info, prev_index); + if (ret < 0) { + lderr(store->ctx()) << "Error: " << __func__ << + " failed to clean up old shards; " << + "RGWRados::clean_bucket_index returned " << ret << dendl; +} + ldout(store->ctx(), 1) << __func__ << " INFO: reshard of bucket \"" << bucket_info.bucket.name << "\" completed successfully" << dendl; @@ -738,6 +739,19 @@ error_out: "RGWRados::clean_bucket_index returned " << ret2 << dendl; } + // restore old index if reshard fails + bucket_info.layout.current_index = prev_index; + ret = store->getRados()->put_bucket_instance_info(bucket_info, false, real_time(), &bucket_attrs, dpp); + if (ret < 0) { + lderr(store->ctx()) << "ERROR: failed writing bucket instance info: " << dendl; + return ret; + } + + ret = store->svc()->bi->init_index(bucket_info, bucket_info.layout.current_index); + if (ret < 0) { + return ret; + } + return ret; } // execute diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index 02a1e7d464fa2..b372ed81941cb 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -76,6 +76,7 @@ private: rgw::sal::RGWRadosStore *store; RGWBucketInfo bucket_info; std::map bucket_attrs; + rgw::bucket_index_layout_generation prev_index; RGWBucketReshardLock reshard_lock; RGWBucketReshardLock* outer_reshard_lock; -- 2.39.5