]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: dont store replication attrs on remote copy obj 62738/head
authorSeena Fallah <seenafallah@gmail.com>
Tue, 8 Apr 2025 23:51:42 +0000 (01:51 +0200)
committerSeena Fallah <seenafallah@gmail.com>
Sat, 19 Apr 2025 20:36:58 +0000 (22:36 +0200)
When an object is copied from a remote bucket in a different zonegroup,
a replication trace attribute (RGW_ATTR_OBJ_REPLICATION_TRACE) is added
to the object's metadata. If the trace includes the same zone as the
current one, subsequent attempts to serve the object may incorrectly
return -ERR_NOT_MODIFIED, preventing access to the object content.

Making it optional would let us not store anything replication related
when calling fetch_remote_obj() from copy object.

Fixes: https://tracker.ceph.com/issues/70839
Signed-off-by: Seena Fallah <seenafallah@gmail.com>
src/rgw/driver/rados/rgw_rados.cc
src/rgw/driver/rados/rgw_rados.h
src/test/rgw/rgw_multi/tests.py

index a2016dddf6c836efb500a50f0ba24851ae810dec..c76325b64d9636a12dfd95d8b5f5ef428caef485 100644 (file)
@@ -4317,7 +4317,7 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& dest_obj_ctx,
                RGWFetchObjFilter *filter,
                bool stat_follow_olh,
                const rgw_obj& stat_dest_obj,
-               const rgw_zone_set_entry& source_trace_entry,
+               std::optional<rgw_zone_set_entry> source_trace_entry,
                rgw_zone_set *zones_trace,
                std::optional<uint64_t>* bytes_transferred)
 {
@@ -4559,32 +4559,39 @@ int RGWRados::fetch_remote_obj(RGWObjectCtx& dest_obj_ctx,
   //erase the append attr
   cb.get_attrs().erase(RGW_ATTR_APPEND_PART_NUM);
 
-  { // add x-amz-replication-status=REPLICA
-    auto& bl = cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_STATUS];
-    bl.clear(); // overwrite source's status
-    bl.append("REPLICA");
-  }
-  { // update replication trace
-    std::vector<rgw_zone_set_entry> trace;
-    if (auto i = cb.get_attrs().find(RGW_ATTR_OBJ_REPLICATION_TRACE);
-        i != cb.get_attrs().end()) {
-      try {
-        decode(trace, i->second);
-      } catch (const buffer::error&) {}
+  if (source_trace_entry) { // replication attrs only if we are replicating
+    { // add x-amz-replication-status=REPLICA
+      auto& bl = cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_STATUS];
+      bl.clear(); // overwrite source's status
+      bl.append("REPLICA");
     }
-    // add the source entry to the end
-    trace.push_back(source_trace_entry);
+    { // update replication trace
+      std::vector<rgw_zone_set_entry> trace;
+      if (auto i = cb.get_attrs().find(RGW_ATTR_OBJ_REPLICATION_TRACE);
+          i != cb.get_attrs().end()) {
+        try {
+          decode(trace, i->second);
+        } catch (const buffer::error&) {}
+      }
+      // add the source entry to the end
+      trace.push_back(*source_trace_entry);
 
-    bufferlist bl;
-    encode(trace, bl);
-    cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TRACE] = std::move(bl);
-  }
-  {
-    // add x-amz-replicated-at
-    bufferlist bl;
-    ceph::real_time timestamp = real_clock::now();
-    encode(timestamp, bl);
-    cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TIMESTAMP] = std::move(bl);
+      bufferlist bl;
+      encode(trace, bl);
+      cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TRACE] = std::move(bl);
+    }
+    {
+      // add x-amz-replicated-at
+      bufferlist bl;
+      ceph::real_time timestamp = real_clock::now();
+      encode(timestamp, bl);
+      cb.get_attrs()[RGW_ATTR_OBJ_REPLICATION_TIMESTAMP] = std::move(bl);
+    }
+  } else {
+    // remove replication attrs
+    cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_STATUS);
+    cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_TRACE);
+    cb.get_attrs().erase(RGW_ATTR_OBJ_REPLICATION_TIMESTAMP);
   }
 
   if (source_zone.empty()) {
@@ -4814,7 +4821,6 @@ int RGWRados::copy_obj(RGWObjectCtx& src_obj_ctx,
   ldpp_dout(dpp, 5) << "Copy object " << src_obj.bucket << ":" << src_obj.get_oid() << " => " << dest_obj.bucket << ":" << dest_obj.get_oid() << dendl;
 
   if (remote_src || !source_zone.empty()) {
-    rgw_zone_set_entry source_trace_entry{source_zone.id, std::nullopt};
     // null_yield resolves a crash when calling progress_cb(), because the beast
     // frontend tried to use this same yield context to write the progress
     // response to the frontend socket. call fetch_remote_obj() synchronously so
@@ -4826,7 +4832,7 @@ int RGWRados::copy_obj(RGWObjectCtx& src_obj_ctx,
                unmod_ptr, high_precision_time,
                if_match, if_nomatch, attrs_mod, copy_if_newer, attrs, category,
                olh_epoch, delete_at, ptag, petag, progress_cb, progress_data, rctx,
-               nullptr /* filter */, stat_follow_olh, stat_dest_obj, source_trace_entry);
+               nullptr /* filter */, stat_follow_olh, stat_dest_obj, std::nullopt);
   }
 
   map<string, bufferlist> src_attrs;
index c70a9b6415d965927ba154be502024058f615e17..a084083c5774bff9d36823eb8512849121d453c1 100644 (file)
@@ -1177,7 +1177,7 @@ public:
                        RGWFetchObjFilter *filter,
                        bool stat_follow_olh,
                        const rgw_obj& stat_dest_obj,
-                       const rgw_zone_set_entry& source_trace_entry,
+                       std::optional<rgw_zone_set_entry> source_trace_entry,
                        rgw_zone_set *zones_trace = nullptr,
                        std::optional<uint64_t>* bytes_transferred = 0);
   /**
index 17d2cc4f80e5bddc3be4da85a5de043340fe2f51..77161da96013fead63abebfe86e115c3cf2943ee 100644 (file)
@@ -3905,6 +3905,13 @@ def test_bucket_create_location_constraint():
                                     CreateBucketConfiguration={'LocationConstraint': zg.name})
                 assert e.response['ResponseMetadata']['HTTPStatusCode'] == 400
 
+def run_per_zonegroup(func):
+    def wrapper(*args, **kwargs):
+        for zonegroup in realm.current_period.zonegroups:
+            func(zonegroup, *args, **kwargs)
+
+    return wrapper
+
 def allow_bucket_replication(function):
     def wrapper(*args, **kwargs):
         zonegroup = realm.master_zonegroup()
@@ -4865,3 +4872,34 @@ def test_bucket_delete_with_sync_policy_object_prefix():
     remove_sync_policy_group(c1, "sync-group")
 
     return
+
+@run_per_zonegroup
+def test_copy_obj_between_zonegroups(zonegroup):
+    if len(realm.current_period.zonegroups) < 2:
+        raise SkipTest('need at least 2 zonegroups to run this test')
+
+    source_zone = ZonegroupConns(zonegroup).rw_zones[0]
+    source_bucket = source_zone.create_bucket(gen_bucket_name())
+
+    objname = 'dummy'
+    k = new_key(source_zone, source_bucket.name, objname)
+    k.set_contents_from_string('foo')
+
+    for zg in realm.current_period.zonegroups:
+        if zg.name == zonegroup.name:
+            continue
+
+        dest_zone = ZonegroupConns(zg).rw_zones[0]
+        dest_bucket = dest_zone.create_bucket(gen_bucket_name())
+        realm_meta_checkpoint(realm)
+
+        # copy object
+        dest_zone.s3_client.copy_object(
+            Bucket=dest_bucket.name,
+            CopySource=f'{source_bucket.name}/{objname}',
+            Key=objname
+        )
+
+        # check that object exists in destination bucket
+        k = get_key(dest_zone, dest_bucket, objname)
+        assert_equal(k.get_contents_as_string().decode('utf-8'), 'foo')