From: Casey Bodley Date: Wed, 4 Sep 2024 20:25:51 +0000 (-0400) Subject: cls/rgw: duplicate reshard checks in all cls_rgw write operations X-Git-Tag: testing/wip-pdonnell-testing-20240920.212106-debug~16^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d011c522bb171e70fc5782f1e7a71ce5f37e573f;p=ceph-ci.git cls/rgw: duplicate reshard checks in all cls_rgw write operations with the addition of the reshard log, all write ops now have to read the omap header themselves. this read duplicates the one in cls_rgw_guard_bucket_resharding(), leading to a minor performance regression this commit prepares a long-term fix for this regression by duplicating the the reshard check done by cls_rgw_guard_bucket_resharding() in each write operation. this will allow the cls_rgw_guard_bucket_resharding() calls to be removed from rgw two releases later when we can guarantee that all OSDs are performing this check for all write operations Signed-off-by: Casey Bodley --- diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index ce2002f2a5b..2b73fb3b7aa 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -886,6 +886,23 @@ static int write_header_while_logrecord(cls_method_context_t hctx, return 0; } +static int guard_bucket_resharding(cls_method_context_t hctx, + const rgw_bucket_dir_header& header, + int error_code = -CLS_RGW_ERR_BUSY_RESHARDING) +{ + const ConfigProxy& conf = cls_get_config(hctx); + const uint32_t reshardlog_threshold = conf->rgw_reshardlog_threshold; + + if (header.resharding_in_progress() || + (header.resharding_in_logrecord() && header.reshardlog_entries >= reshardlog_threshold)) { + CLS_LOG(4, "ERROR: writes are blocked while bucket is " + "resharding, returning %d", error_code); + return error_code; + } + + return 0; +} + int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { const ConfigProxy& conf = cls_get_config(hctx); @@ -925,6 +942,11 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist return rc; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + // get on-disk state std::string idx; @@ -1131,6 +1153,11 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist return -EINVAL; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + rgw_bucket_dir_entry entry; bool ondisk = true; @@ -1757,6 +1784,11 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer return rc; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + /* read instance entry */ BIVerObjEntry obj(hctx, op.key); int ret = obj.init(op.delete_marker); @@ -1998,6 +2030,11 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in, return ret; } + ret = guard_bucket_resharding(hctx, header); + if (ret < 0) { + return ret; + } + BIVerObjEntry obj(hctx, dest_key); BIOLHEntry olh(hctx, dest_key); @@ -2230,6 +2267,11 @@ static int rgw_bucket_trim_olh_log(cls_method_context_t hctx, bufferlist *in, bu return rc; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + /* write the olh data entry */ ret = write_entry(hctx, olh_data_entry, olh_data_key, header); if (ret < 0) { @@ -2265,6 +2307,11 @@ static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, buffe return rc; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + /* read olh entry */ rgw_bucket_olh_entry olh_data_entry; string olh_data_key, olh_sub_ver; @@ -2336,6 +2383,11 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, return rc; } + rc = guard_bucket_resharding(hctx, header); + if (rc < 0) { + return rc; + } + const uint64_t config_op_expiration = conf->rgw_pending_bucket_index_op_expiration; @@ -2840,6 +2892,11 @@ static int rgw_bi_put_entries(cls_method_context_t hctx, bufferlist *in, bufferl return r; } + r = guard_bucket_resharding(hctx, header); + if (r < 0) { + return r; + } + if (op.check_existing) { // fetch any existing keys and decrement their stats before overwriting std::set keys; @@ -3308,18 +3365,12 @@ static int reshard_log_list_entries(cls_method_context_t hctx, const string& mar } static int check_index(cls_method_context_t hctx, - rgw_bucket_dir_header *existing_header, + const rgw_bucket_dir_header& existing_header, rgw_bucket_dir_header *calc_header) { - int rc = read_bucket_header(hctx, existing_header); - if (rc < 0) { - CLS_LOG(1, "ERROR: check_index(): failed to read header\n"); - return rc; - } - - calc_header->tag_timeout = existing_header->tag_timeout; - calc_header->ver = existing_header->ver; - calc_header->syncstopped = existing_header->syncstopped; + calc_header->tag_timeout = existing_header.tag_timeout; + calc_header->ver = existing_header.ver; + calc_header->syncstopped = existing_header.syncstopped; std::list entries; string start_obj; @@ -3329,7 +3380,7 @@ static int check_index(cls_method_context_t hctx, bool more; do { - rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more); + int rc = list_plain_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more); if (rc < 0) { return rc; } @@ -3358,7 +3409,7 @@ static int check_index(cls_method_context_t hctx, start_obj = ""; do { - rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more); + int rc = list_instance_entries(hctx, filter_prefix, start_obj, CHECK_CHUNK_SIZE, &entries, &more); if (rc < 0) { return rc; } @@ -3391,9 +3442,21 @@ static int check_index(cls_method_context_t hctx, int rgw_bucket_rebuild_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { CLS_LOG(10, "entered %s", __func__); + rgw_bucket_dir_header existing_header; + int rc = read_bucket_header(hctx, &existing_header); + if (rc < 0) { + CLS_LOG(1, "ERROR: check_index(): failed to read header\n"); + return rc; + } + + rc = guard_bucket_resharding(hctx, existing_header); + if (rc < 0) { + return rc; + } + rgw_bucket_dir_header calc_header; - int rc = check_index(hctx, &existing_header, &calc_header); + rc = check_index(hctx, existing_header, &calc_header); if (rc < 0) return rc; @@ -3405,8 +3468,13 @@ int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist { CLS_LOG(10, "entered %s", __func__); rgw_cls_check_index_ret ret; + int rc = read_bucket_header(hctx, &ret.existing_header); + if (rc < 0) { + CLS_LOG(1, "ERROR: check_index(): failed to read header\n"); + return rc; + } - int rc = check_index(hctx, &ret.existing_header, &ret.calculated_header); + rc = check_index(hctx, ret.existing_header, &ret.calculated_header); if (rc < 0) return rc; @@ -4934,13 +5002,9 @@ static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in { CLS_LOG(10, "entered %s", __func__); - const ConfigProxy& conf = cls_get_config(hctx); - const uint32_t reshardlog_threshold = conf->rgw_reshardlog_threshold; - cls_rgw_guard_bucket_resharding_op op; - - auto in_iter = in->cbegin(); try { + auto in_iter = in->cbegin(); decode(op, in_iter); } catch (ceph::buffer::error& err) { CLS_LOG(1, "ERROR: %s: failed to decode entry", __func__); @@ -4954,12 +5018,7 @@ static int rgw_guard_bucket_resharding(cls_method_context_t hctx, bufferlist *in return rc; } - if (header.resharding_in_progress() || - (header.resharding_in_logrecord() && header.reshardlog_entries >= reshardlog_threshold)) { - return op.ret_err; - } - - return 0; + return guard_bucket_resharding(hctx, header, op.ret_err); } static int rgw_get_bucket_resharding(cls_method_context_t hctx, diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index d7d6e2754ac..f14380b2919 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -675,8 +675,16 @@ int cls_rgw_reshard_list(librados::IoCtx& io_ctx, const std::string& oid, std::s int cls_rgw_reshard_get(librados::IoCtx& io_ctx, const std::string& oid, cls_rgw_reshard_entry& entry); #endif -/* resharding attribute on bucket index shard headers */ +// If writes to the bucket index should be blocked during resharding, fail with +// the given error code. RGWRados::guard_reshard() calls this in a loop to retry +// the write until the reshard completes. +// +// As of the T release, all index write ops in cls_rgw perform this check +// themselves. RGW can stop issuing this call in the T+2 (V) release once it +// knows that OSDs are running T at least. The call can be safely removed from +// cls_rgw in the T+4 (X) release. void cls_rgw_guard_bucket_resharding(librados::ObjectOperation& op, int ret_err); + // these overloads which call io_ctx.operate() should not be called in the rgw. // rgw_rados_operate() should be called after the overloads w/o calls to io_ctx.operate() #ifndef CLS_CLIENT_HIDE_IOCTX diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index 6b8d8a7abea..e6afe47ffc7 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -18,6 +18,8 @@ #define CEPH_RGW_DIR_SUGGEST_LOG_OP 0x80 #define CEPH_RGW_DIR_SUGGEST_OP_MASK 0x7f +#define CLS_RGW_ERR_BUSY_RESHARDING 2300 // also in rgw_common.h, don't change! + constexpr uint64_t CEPH_RGW_DEFAULT_TAG_TIMEOUT = 120; // in seconds class JSONObj; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index c4fdd83d7fa..b9e969a06fa 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -331,7 +331,7 @@ inline constexpr const char* RGW_REST_STS_XMLNS = #define ERR_PRESIGNED_URL_DISABLED 2224 #define ERR_AUTHORIZATION 2225 // SNS 403 AuthorizationError -#define ERR_BUSY_RESHARDING 2300 +#define ERR_BUSY_RESHARDING 2300 // also in cls_rgw_types.h, don't change! #define ERR_NO_SUCH_ENTITY 2301 #define ERR_LIMIT_EXCEEDED 2302