From: kchheda3 Date: Fri, 19 Apr 2024 14:59:13 +0000 (-0400) Subject: rgw/multisite-notification: retry storing bucket notification attrs for ECANCELED... X-Git-Tag: v19.1.0~18^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a5699661e76022b6c7306aebd5380ee6f67a1ddc;p=ceph.git rgw/multisite-notification: retry storing bucket notification attrs for ECANCELED(ConcurrentModification) errors. An ECANCELED error coming from our write of the bucket instance metadat is a common error for metadata writes on secondary zones, because secondary write races with metadata sync from the write that is forwarded to the master zone Signed-off-by: kchheda3 (cherry picked from commit 9ca76770434e4dd796afe2c88446f94279919026) --- diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index 481e14b541647..ef9a88c06b6eb 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -951,36 +951,6 @@ void rgw_bucket_object_pre_exec(req_state *s) dump_bucket_from_state(s); } -// So! Now and then when we try to update bucket information, the -// bucket has changed during the course of the operation. (Or we have -// a cache consistency problem that Watch/Notify isn't ruling out -// completely.) -// -// When this happens, we need to update the bucket info and try -// again. We have, however, to try the right *part* again. We can't -// simply re-send, since that will obliterate the previous update. -// -// Thus, callers of this function should include everything that -// merges information to be changed into the bucket information as -// well as the call to set it. -// -// The called function must return an integer, negative on error. In -// general, they should just return op_ret. -namespace { -template -int retry_raced_bucket_write(const DoutPrefixProvider *dpp, rgw::sal::Bucket* b, const F& f, optional_yield y) { - auto r = f(); - for (auto i = 0u; i < 15u && r == -ECANCELED; ++i) { - r = b->try_refresh_info(dpp, nullptr, y); - if (r >= 0) { - r = f(); - } - } - return r; -} -} - - int RGWGetObj::verify_permission(optional_yield y) { s->object->set_atomic(); diff --git a/src/rgw/rgw_op.h b/src/rgw/rgw_op.h index 5f947fd53c5e2..c43ce02483590 100644 --- a/src/rgw/rgw_op.h +++ b/src/rgw/rgw_op.h @@ -165,6 +165,36 @@ int rgw_rest_get_json_input(CephContext *cct, req_state *s, T& out, return 0; } +// So! Now and then when we try to update bucket information, the +// bucket has changed during the course of the operation. (Or we have +// a cache consistency problem that Watch/Notify isn't ruling out +// completely.) +// +// When this happens, we need to update the bucket info and try +// again. We have, however, to try the right *part* again. We can't +// simply re-send, since that will obliterate the previous update. +// +// Thus, callers of this function should include everything that +// merges information to be changed into the bucket information as +// well as the call to set it. +// +// The called function must return an integer, negative on error. In +// general, they should just return op_ret. +template +int retry_raced_bucket_write(const DoutPrefixProvider *dpp, + rgw::sal::Bucket *b, + const F &f, + optional_yield y) { + auto r = f(); + for (auto i = 0u; i < 15u && r == -ECANCELED; ++i) { + r = b->try_refresh_info(dpp, nullptr, y); + if (r >= 0) { + r = f(); + } + } + return r; +} + /** * Provide the base class for all ops. */ diff --git a/src/rgw/rgw_rest_pubsub.cc b/src/rgw/rgw_rest_pubsub.cc index bf72baac13ea1..e83443e036253 100644 --- a/src/rgw/rgw_rest_pubsub.cc +++ b/src/rgw/rgw_rest_pubsub.cc @@ -1318,48 +1318,49 @@ void RGWPSCreateNotifOp::execute_v2(optional_yield y) { op_ret = -ERR_SERVICE_UNAVAILABLE; return; } - - if (configurations.list.empty()) { - op_ret = remove_notification_v2(this, driver, s->bucket.get(), - /*delete all notif=true*/ "", y); - return; - } - rgw_pubsub_bucket_topics bucket_topics; - op_ret = get_bucket_notifications(this, s->bucket.get(), bucket_topics); - if (op_ret < 0) { - ldpp_dout(this, 1) - << "failed to load existing bucket notification on bucket: " - << s->bucket << ", ret = " << op_ret << dendl; - return; - } - for (const auto& c : configurations.list) { - const auto& notif_name = c.id; - - const auto arn = rgw::ARN::parse(c.topic_arn); - if (!arn) { // already validated above - continue; + op_ret = retry_raced_bucket_write(this, s->bucket.get(), [this, y] { + if (configurations.list.empty()) { + return remove_notification_v2(this, driver, s->bucket.get(), + /*delete all notif=true*/"", y); } - const auto& topic_name = arn->resource; - - auto t = topics.find(*arn); - if (t == topics.end()) { - continue; + rgw_pubsub_bucket_topics bucket_topics; + int ret = get_bucket_notifications(this, s->bucket.get(), bucket_topics); + if (ret < 0) { + ldpp_dout(this, 1) + << "failed to load existing bucket notification on bucket: " + << s->bucket << ", ret = " << ret << dendl; + return ret; } - auto& topic_info = t->second; + for (const auto &c : configurations.list) { + const auto ¬if_name = c.id; - auto& topic_filter = + const auto arn = rgw::ARN::parse(c.topic_arn); + if (!arn) { // already validated above + continue; + } + const auto &topic_name = arn->resource; + + auto t = topics.find(*arn); + if (t == topics.end()) { + continue; + } + auto &topic_info = t->second; + + auto &topic_filter = bucket_topics.topics[topic_to_unique(topic_name, notif_name)]; - topic_filter.topic = topic_info; - topic_filter.events = c.events; - topic_filter.s3_id = notif_name; - topic_filter.s3_filter = c.filter; - } - // finally store all the bucket notifications as attr. - bufferlist bl; - bucket_topics.encode(bl); - rgw::sal::Attrs& attrs = s->bucket->get_attrs(); - attrs[RGW_ATTR_BUCKET_NOTIFICATION] = std::move(bl); - op_ret = s->bucket->merge_and_store_attrs(this, attrs, y); + topic_filter.topic = topic_info; + topic_filter.events = c.events; + topic_filter.s3_id = notif_name; + topic_filter.s3_filter = c.filter; + } + // finally store all the bucket notifications as attr. + bufferlist bl; + bucket_topics.encode(bl); + rgw::sal::Attrs &attrs = s->bucket->get_attrs(); + attrs[RGW_ATTR_BUCKET_NOTIFICATION] = std::move(bl); + return s->bucket->merge_and_store_attrs(this, attrs, y); + }, y); + if (op_ret < 0) { ldpp_dout(this, 4) << "Failed to store RGW_ATTR_BUCKET_NOTIFICATION on bucket="