From 9c9534ee328a0a1359eb7ddbc18ed1ed699c274a Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Fri, 12 Oct 2018 18:07:24 -0400 Subject: [PATCH] rgw: failed resharding clears resharding status from shard heads Previously, when resharding failed, we restored the shard status on the bucket info object. However the status on each of the shards was left indicating a reshard was underway. This prevented some write operations from taking place, as they would wait for resharding to complete. This adds the missing functionality. It also makes the functionality available to other classes via static functions in RGWBucketReshard. Signed-off-by: J. Eric Ivancich (cherry picked from commit 4577801271454b147cb3b1f3169d268d1c14948b) --- src/cls/rgw/cls_rgw.cc | 2 +- src/cls/rgw/cls_rgw_client.cc | 1 - src/cls/rgw/cls_rgw_client.h | 2 +- src/cls/rgw/cls_rgw_types.h | 3 ++ src/rgw/rgw_reshard.cc | 61 +++++++++++++++++++++++++++++++---- src/rgw/rgw_reshard.h | 16 ++++++++- 6 files changed, 74 insertions(+), 11 deletions(-) diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 9dfa477ab9992..01c04259d29ef 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -3766,7 +3766,7 @@ static int rgw_set_bucket_resharding(cls_method_context_t hctx, bufferlist *in, static int rgw_clear_bucket_resharding(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { - cls_rgw_set_bucket_resharding_op op; + cls_rgw_clear_bucket_resharding_op op; bufferlist::iterator in_iter = in->begin(); try { diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index a0560b2b36f48..4d446dccc8153 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -960,4 +960,3 @@ int CLSRGWIssueSetBucketResharding::issue_op(int shard_id, const string& oid) { return issue_set_bucket_resharding(io_ctx, oid, entry, &manager); } - diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 86fc3a4f1f8d5..1f3c1687f0aab 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -537,7 +537,7 @@ int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const string& oid, cls_rgw_resh int cls_rgw_reshard_get_head(librados::IoCtx& io_ctx, const string& oid, cls_rgw_reshard_entry& entry); void cls_rgw_reshard_remove(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry); -/* resharding attribute */ +/* resharding attribute on bucket index shard headers */ int cls_rgw_set_bucket_resharding(librados::IoCtx& io_ctx, const string& oid, const cls_rgw_bucket_instance_entry& entry); int cls_rgw_clear_bucket_resharding(librados::IoCtx& io_ctx, const string& oid); diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index e1cfdb645fb04..8cf24dcbcd739 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_RGW_TYPES_H #define CEPH_CLS_RGW_TYPES_H diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index fab5522595fba..367bdb12a670e 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include + #include "rgw_rados.h" #include "rgw_bucket.h" #include "rgw_reshard.h" @@ -22,6 +24,7 @@ const string bucket_instance_lock_name = "bucket_instance_lock"; #define RESHARD_SHARD_WINDOW 64 #define RESHARD_MAX_AIO 128 + class BucketReshardShard { RGWRados *store; const RGWBucketInfo& bucket_info; @@ -266,6 +269,7 @@ int RGWBucketReshard::renew_lock_bucket(const Clock::time_point& now) reshard_oid << " with " << cpp_strerror(-ret) << dendl; return ret; } + reshard_lock.set_must_renew(false); lock_start_time = now; lock_renew_thresh = lock_start_time + lock_duration / 2; ldout(store->ctx(), 20) << __func__ << "(): successfully renewed lock on " << @@ -274,7 +278,11 @@ int RGWBucketReshard::renew_lock_bucket(const Clock::time_point& now) return 0; } -int RGWBucketReshard::set_resharding_status(const string& new_instance_id, int32_t num_shards, cls_rgw_reshard_status status) +int RGWBucketReshard::set_resharding_status(RGWRados* store, + RGWBucketInfo& bucket_info, + const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status) { if (new_instance_id.empty()) { ldout(store->ctx(), 0) << __func__ << " missing new bucket instance id" << dendl; @@ -293,16 +301,47 @@ int RGWBucketReshard::set_resharding_status(const string& new_instance_id, int32 return 0; } +// reshard lock assumes lock is held int RGWBucketReshard::clear_resharding() { - cls_rgw_bucket_instance_entry instance_entry; + int ret = clear_index_shard_reshard_status(); + if (ret < 0) { + ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ << + " ERROR: error clearing reshard status from index shard " << + cpp_strerror(-ret) << dendl; + return ret; + } - int ret = store->bucket_set_reshard(bucket_info, instance_entry); + cls_rgw_bucket_instance_entry instance_entry; + ret = store->bucket_set_reshard(bucket_info, instance_entry); if (ret < 0) { - ldout(store->ctx(), 0) << "RGWReshard::" << __func__ << " ERROR: error setting bucket resharding flag on bucket index: " - << cpp_strerror(-ret) << dendl; + ldout(store->ctx(), 0) << "RGWReshard::" << __func__ << + " ERROR: error setting bucket resharding flag on bucket index: " << + cpp_strerror(-ret) << dendl; return ret; } + + return 0; +} + +int RGWBucketReshard::clear_index_shard_reshard_status(RGWRados* store, + RGWBucketInfo& bucket_info) +{ + uint32_t num_shards = bucket_info.num_shards; + + if (num_shards < std::numeric_limits::max()) { + int ret = set_resharding_status(store, bucket_info, + bucket_info.bucket.bucket_id, + (num_shards < 1 ? 1 : num_shards), + CLS_RGW_RESHARD_NONE); + if (ret < 0) { + ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ << + " ERROR: error clearing reshard status from index shard " << + cpp_strerror(-ret) << dendl; + return ret; + } + } + return 0; } @@ -354,7 +393,7 @@ int RGWBucketReshard::cancel() ret = clear_resharding(); unlock_bucket(); - return 0; + return ret; } class BucketInfoReshardUpdate @@ -389,8 +428,14 @@ public: ~BucketInfoReshardUpdate() { if (in_progress) { + int ret = + RGWBucketReshard::clear_index_shard_reshard_status(store, bucket_info); + if (ret < 0) { + lderr(store->ctx()) << "Error: " << __func__ << + " clear_index_shard_status returned " << ret << dendl; + } bucket_info.new_bucket_instance_id.clear(); - set_status(CLS_RGW_RESHARD_NONE); + set_status(CLS_RGW_RESHARD_NONE); // saves new_bucket_instance as well } } @@ -441,6 +486,8 @@ int RGWBucketReshard::do_reshard(int num_shards, return -EINVAL; } + // NB: destructor cleans up sharding state if reshard does not + // complete successfully BucketInfoReshardUpdate bucket_info_updater(store, bucket_info, bucket_attrs, new_bucket_info.bucket.bucket_id); ret = bucket_info_updater.start(); diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index f8f77abbffdc1..bec457c64d8db 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -44,7 +44,6 @@ private: int lock_bucket(); void unlock_bucket(); int renew_lock_bucket(const Clock::time_point&); - int set_resharding_status(const string& new_instance_id, int32_t num_shards, cls_rgw_reshard_status status); int clear_resharding(); int create_new_bucket_instance(int new_num_shards, RGWBucketInfo& new_bucket_info); @@ -66,6 +65,21 @@ public: RGWReshard *reshard_log = nullptr); int get_status(std::list *status); int cancel(); + static int clear_index_shard_reshard_status(RGWRados* store, + RGWBucketInfo& bucket_info); + int clear_index_shard_reshard_status() { + return clear_index_shard_reshard_status(store, bucket_info); + } + static int set_resharding_status(RGWRados* store, RGWBucketInfo& bucket_info, + const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status); + int set_resharding_status(const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status) { + return set_resharding_status(store, bucket_info, + new_instance_id, num_shards, status); + } }; // RGWBucketReshard class RGWReshard { -- 2.39.5