From: J. Eric Ivancich Date: Fri, 12 Oct 2018 22:07:24 +0000 (-0400) Subject: rgw: failed resharding clears resharding status from shard heads X-Git-Tag: v13.2.3~42^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9c9534ee328a0a1359eb7ddbc18ed1ed699c274a;p=ceph.git rgw: failed resharding clears resharding status from shard heads Previously, when resharding failed, we restored the shard status on the bucket info object. However the status on each of the shards was left indicating a reshard was underway. This prevented some write operations from taking place, as they would wait for resharding to complete. This adds the missing functionality. It also makes the functionality available to other classes via static functions in RGWBucketReshard. Signed-off-by: J. Eric Ivancich (cherry picked from commit 4577801271454b147cb3b1f3169d268d1c14948b) --- diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 9dfa477ab99..01c04259d29 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -3766,7 +3766,7 @@ static int rgw_set_bucket_resharding(cls_method_context_t hctx, bufferlist *in, static int rgw_clear_bucket_resharding(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { - cls_rgw_set_bucket_resharding_op op; + cls_rgw_clear_bucket_resharding_op op; bufferlist::iterator in_iter = in->begin(); try { diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index a0560b2b36f..4d446dccc81 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -960,4 +960,3 @@ int CLSRGWIssueSetBucketResharding::issue_op(int shard_id, const string& oid) { return issue_set_bucket_resharding(io_ctx, oid, entry, &manager); } - diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 86fc3a4f1f8..1f3c1687f0a 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -537,7 +537,7 @@ int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const string& oid, cls_rgw_resh int cls_rgw_reshard_get_head(librados::IoCtx& io_ctx, const string& oid, cls_rgw_reshard_entry& entry); void cls_rgw_reshard_remove(librados::ObjectWriteOperation& op, const cls_rgw_reshard_entry& entry); -/* resharding attribute */ +/* resharding attribute on bucket index shard headers */ int cls_rgw_set_bucket_resharding(librados::IoCtx& io_ctx, const string& oid, const cls_rgw_bucket_instance_entry& entry); int cls_rgw_clear_bucket_resharding(librados::IoCtx& io_ctx, const string& oid); diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index e1cfdb645fb..8cf24dcbcd7 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_CLS_RGW_TYPES_H #define CEPH_CLS_RGW_TYPES_H diff --git a/src/rgw/rgw_reshard.cc b/src/rgw/rgw_reshard.cc index fab5522595f..367bdb12a67 100644 --- a/src/rgw/rgw_reshard.cc +++ b/src/rgw/rgw_reshard.cc @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include + #include "rgw_rados.h" #include "rgw_bucket.h" #include "rgw_reshard.h" @@ -22,6 +24,7 @@ const string bucket_instance_lock_name = "bucket_instance_lock"; #define RESHARD_SHARD_WINDOW 64 #define RESHARD_MAX_AIO 128 + class BucketReshardShard { RGWRados *store; const RGWBucketInfo& bucket_info; @@ -266,6 +269,7 @@ int RGWBucketReshard::renew_lock_bucket(const Clock::time_point& now) reshard_oid << " with " << cpp_strerror(-ret) << dendl; return ret; } + reshard_lock.set_must_renew(false); lock_start_time = now; lock_renew_thresh = lock_start_time + lock_duration / 2; ldout(store->ctx(), 20) << __func__ << "(): successfully renewed lock on " << @@ -274,7 +278,11 @@ int RGWBucketReshard::renew_lock_bucket(const Clock::time_point& now) return 0; } -int RGWBucketReshard::set_resharding_status(const string& new_instance_id, int32_t num_shards, cls_rgw_reshard_status status) +int RGWBucketReshard::set_resharding_status(RGWRados* store, + RGWBucketInfo& bucket_info, + const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status) { if (new_instance_id.empty()) { ldout(store->ctx(), 0) << __func__ << " missing new bucket instance id" << dendl; @@ -293,16 +301,47 @@ int RGWBucketReshard::set_resharding_status(const string& new_instance_id, int32 return 0; } +// reshard lock assumes lock is held int RGWBucketReshard::clear_resharding() { - cls_rgw_bucket_instance_entry instance_entry; + int ret = clear_index_shard_reshard_status(); + if (ret < 0) { + ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ << + " ERROR: error clearing reshard status from index shard " << + cpp_strerror(-ret) << dendl; + return ret; + } - int ret = store->bucket_set_reshard(bucket_info, instance_entry); + cls_rgw_bucket_instance_entry instance_entry; + ret = store->bucket_set_reshard(bucket_info, instance_entry); if (ret < 0) { - ldout(store->ctx(), 0) << "RGWReshard::" << __func__ << " ERROR: error setting bucket resharding flag on bucket index: " - << cpp_strerror(-ret) << dendl; + ldout(store->ctx(), 0) << "RGWReshard::" << __func__ << + " ERROR: error setting bucket resharding flag on bucket index: " << + cpp_strerror(-ret) << dendl; return ret; } + + return 0; +} + +int RGWBucketReshard::clear_index_shard_reshard_status(RGWRados* store, + RGWBucketInfo& bucket_info) +{ + uint32_t num_shards = bucket_info.num_shards; + + if (num_shards < std::numeric_limits::max()) { + int ret = set_resharding_status(store, bucket_info, + bucket_info.bucket.bucket_id, + (num_shards < 1 ? 1 : num_shards), + CLS_RGW_RESHARD_NONE); + if (ret < 0) { + ldout(store->ctx(), 0) << "RGWBucketReshard::" << __func__ << + " ERROR: error clearing reshard status from index shard " << + cpp_strerror(-ret) << dendl; + return ret; + } + } + return 0; } @@ -354,7 +393,7 @@ int RGWBucketReshard::cancel() ret = clear_resharding(); unlock_bucket(); - return 0; + return ret; } class BucketInfoReshardUpdate @@ -389,8 +428,14 @@ public: ~BucketInfoReshardUpdate() { if (in_progress) { + int ret = + RGWBucketReshard::clear_index_shard_reshard_status(store, bucket_info); + if (ret < 0) { + lderr(store->ctx()) << "Error: " << __func__ << + " clear_index_shard_status returned " << ret << dendl; + } bucket_info.new_bucket_instance_id.clear(); - set_status(CLS_RGW_RESHARD_NONE); + set_status(CLS_RGW_RESHARD_NONE); // saves new_bucket_instance as well } } @@ -441,6 +486,8 @@ int RGWBucketReshard::do_reshard(int num_shards, return -EINVAL; } + // NB: destructor cleans up sharding state if reshard does not + // complete successfully BucketInfoReshardUpdate bucket_info_updater(store, bucket_info, bucket_attrs, new_bucket_info.bucket.bucket_id); ret = bucket_info_updater.start(); diff --git a/src/rgw/rgw_reshard.h b/src/rgw/rgw_reshard.h index f8f77abbffd..bec457c64d8 100644 --- a/src/rgw/rgw_reshard.h +++ b/src/rgw/rgw_reshard.h @@ -44,7 +44,6 @@ private: int lock_bucket(); void unlock_bucket(); int renew_lock_bucket(const Clock::time_point&); - int set_resharding_status(const string& new_instance_id, int32_t num_shards, cls_rgw_reshard_status status); int clear_resharding(); int create_new_bucket_instance(int new_num_shards, RGWBucketInfo& new_bucket_info); @@ -66,6 +65,21 @@ public: RGWReshard *reshard_log = nullptr); int get_status(std::list *status); int cancel(); + static int clear_index_shard_reshard_status(RGWRados* store, + RGWBucketInfo& bucket_info); + int clear_index_shard_reshard_status() { + return clear_index_shard_reshard_status(store, bucket_info); + } + static int set_resharding_status(RGWRados* store, RGWBucketInfo& bucket_info, + const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status); + int set_resharding_status(const string& new_instance_id, + int32_t num_shards, + cls_rgw_reshard_status status) { + return set_resharding_status(store, bucket_info, + new_instance_id, num_shards, status); + } }; // RGWBucketReshard class RGWReshard {