From f97693bdcc114cfd5287a6f2e06c8a13c011e479 Mon Sep 17 00:00:00 2001 From: Seena Fallah Date: Wed, 9 Apr 2025 01:51:42 +0200 Subject: [PATCH] rgw: dont store replication attrs on remote copy obj When an object is copied from a remote bucket in a different zonegroup, a replication trace attribute (RGW_ATTR_OBJ_REPLICATION_TRACE) is added to the object's metadata. If the trace includes the same zone as the current one, subsequent attempts to serve the object may incorrectly return -ERR_NOT_MODIFIED, preventing access to the object content. Making it optional would let us not store anything replication related when calling fetch_remote_obj() from copy object. Fixes: https://tracker.ceph.com/issues/70839 Signed-off-by: Seena Fallah --- src/rgw/driver/rados/rgw_rados.cc | 60 +++++++++++++++++-------------- src/rgw/driver/rados/rgw_rados.h | 2 +- src/test/rgw/rgw_multi/tests.py | 38 ++++++++++++++++++++ 3 files changed, 72 insertions(+), 28 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index a2016dddf6c..c76325b64d9 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -4317,7 +4317,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& dest_obj_ctx, RGWFetchObjFilter *filter, bool stat_follow_olh, const rgw_obj& stat_dest_obj, - const rgw_zone_set_entry& source_trace_entry, + std::optional source_trace_entry, rgw_zone_set *zones_trace, std::optional* bytes_transferred) { @@ -4559,32 +4559,39 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& dest_obj_ctx, //erase the append attr cb.get_attrs().erase(RGW_ATTR_APPEND_PART_NUM); - { // add x-amz-replication-status=REPLICA - auto& bl = cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_STATUS]; - bl.clear(); // overwrite source's status - bl.append("REPLICA"); - } - { // update replication trace - std::vector trace; - if (auto i = cb.get_attrs().find(RGW_ATTR_OBJ_REPLICATION_TRACE); - i != cb.get_attrs().end()) { - try { - decode(trace, i->second); - } catch (const buffer::error&) {} + if (source_trace_entry) { // replication attrs only if we are replicating + { // add x-amz-replication-status=REPLICA + auto& bl = cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_STATUS]; + bl.clear(); // overwrite source's status + bl.append("REPLICA"); } - // add the source entry to the end - trace.push_back(source_trace_entry); + { // update replication trace + std::vector trace; + if (auto i = cb.get_attrs().find(RGW_ATTR_OBJ_REPLICATION_TRACE); + i != cb.get_attrs().end()) { + try { + decode(trace, i->second); + } catch (const buffer::error&) {} + } + // add the source entry to the end + trace.push_back(*source_trace_entry); - bufferlist bl; - encode(trace, bl); - cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TRACE] = std::move(bl); - } - { - // add x-amz-replicated-at - bufferlist bl; - ceph::real_time timestamp = real_clock::now(); - encode(timestamp, bl); - cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TIMESTAMP] = std::move(bl); + bufferlist bl; + encode(trace, bl); + cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TRACE] = std::move(bl); + } + { + // add x-amz-replicated-at + bufferlist bl; + ceph::real_time timestamp = real_clock::now(); + encode(timestamp, bl); + cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TIMESTAMP] = std::move(bl); + } + } else { + // remove replication attrs + cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_STATUS); + cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_TRACE); + cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_TIMESTAMP); } if (source_zone.empty()) { @@ -4814,7 +4821,6 @@ int RGWRados::copy_obj(RGWObjectCtx& src_obj_ctx, ldpp_dout(dpp, 5) << "Copy object " << src_obj.bucket << ":" << src_obj.get_oid() << " => " << dest_obj.bucket << ":" << dest_obj.get_oid() << dendl; if (remote_src || !source_zone.empty()) { - rgw_zone_set_entry source_trace_entry{source_zone.id, std::nullopt}; // null_yield resolves a crash when calling progress_cb(), because the beast // frontend tried to use this same yield context to write the progress // response to the frontend socket. call fetch_remote_obj() synchronously so @@ -4826,7 +4832,7 @@ int RGWRados::copy_obj(RGWObjectCtx& src_obj_ctx, unmod_ptr, high_precision_time, if_match, if_nomatch, attrs_mod, copy_if_newer, attrs, category, olh_epoch, delete_at, ptag, petag, progress_cb, progress_data, rctx, - nullptr /* filter */, stat_follow_olh, stat_dest_obj, source_trace_entry); + nullptr /* filter */, stat_follow_olh, stat_dest_obj, std::nullopt); } map src_attrs; diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index c70a9b6415d..a084083c577 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -1177,7 +1177,7 @@ public: RGWFetchObjFilter *filter, bool stat_follow_olh, const rgw_obj& stat_dest_obj, - const rgw_zone_set_entry& source_trace_entry, + std::optional source_trace_entry, rgw_zone_set *zones_trace = nullptr, std::optional* bytes_transferred = 0); /** diff --git a/src/test/rgw/rgw_multi/tests.py b/src/test/rgw/rgw_multi/tests.py index 17d2cc4f80e..77161da9601 100644 --- a/src/test/rgw/rgw_multi/tests.py +++ b/src/test/rgw/rgw_multi/tests.py @@ -3905,6 +3905,13 @@ def test_bucket_create_location_constraint(): CreateBucketConfiguration={'LocationConstraint': zg.name}) assert e.response['ResponseMetadata']['HTTPStatusCode'] == 400 +def run_per_zonegroup(func): + def wrapper(*args, **kwargs): + for zonegroup in realm.current_period.zonegroups: + func(zonegroup, *args, **kwargs) + + return wrapper + def allow_bucket_replication(function): def wrapper(*args, **kwargs): zonegroup = realm.master_zonegroup() @@ -4865,3 +4872,34 @@ def test_bucket_delete_with_sync_policy_object_prefix(): remove_sync_policy_group(c1, "sync-group") return + +@run_per_zonegroup +def test_copy_obj_between_zonegroups(zonegroup): + if len(realm.current_period.zonegroups) < 2: + raise SkipTest('need at least 2 zonegroups to run this test') + + source_zone = ZonegroupConns(zonegroup).rw_zones[0] + source_bucket = source_zone.create_bucket(gen_bucket_name()) + + objname = 'dummy' + k = new_key(source_zone, source_bucket.name, objname) + k.set_contents_from_string('foo') + + for zg in realm.current_period.zonegroups: + if zg.name == zonegroup.name: + continue + + dest_zone = ZonegroupConns(zg).rw_zones[0] + dest_bucket = dest_zone.create_bucket(gen_bucket_name()) + realm_meta_checkpoint(realm) + + # copy object + dest_zone.s3_client.copy_object( + Bucket=dest_bucket.name, + CopySource=f'{source_bucket.name}/{objname}', + Key=objname + ) + + # check that object exists in destination bucket + k = get_key(dest_zone, dest_bucket, objname) + assert_equal(k.get_contents_as_string().decode('utf-8'), 'foo') -- 2.47.3