]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rgw/dedup split-head split_head_simple_final_2
authorbenhanokh <gbenhano@redhat.com>
Mon, 23 Feb 2026 09:26:17 +0000 (11:26 +0200)
committerbenhanokh <gbenhano@redhat.com>
Tue, 24 Feb 2026 14:11:17 +0000 (16:11 +0200)
Simplified check for shared-tail-objects.
Added test for copy after dedup
Use tail-ioctx when removing newly created tail-head

Signed-off-by: benhanokh <gbenhano@redhat.com>
src/rgw/driver/rados/rgw_dedup.cc
src/rgw/driver/rados/rgw_dedup.h
src/test/rgw/dedup/test_dedup.py

index 19792969b8b84c9aa34fcf42adc2e366c5dc2f3b..330fb7e5015adb693a0abe46f29a5894aac71c7a 100644 (file)
@@ -605,13 +605,56 @@ namespace rgw::dedup {
   }
 
   //---------------------------------------------------------------------------
-  static void remove_created_tail_object(const DoutPrefixProvider *dpp,
-                                         librados::IoCtx &ioctx,
-                                         const std::string &tail_oid,
-                                         md5_stats_t *p_stats)
+  static inline bool invalid_tail_placement(const rgw_bucket_placement& tail_placement)
+  {
+    return (tail_placement.bucket.name.empty() || tail_placement.placement_rule.name.empty());
+  }
+
+  //---------------------------------------------------------------------------
+  int Background::get_tail_ioctx(const disk_record_t *p_rec,
+                                 const RGWObjManifest &manifest,
+                                 md5_stats_t *p_stats /*IN-OUT*/,
+                                 librados::IoCtx *p_ioctx /*OUT*/,
+                                 std::string *p_tail_name /*OUT*/,
+                                 std::string *p_oid /*OUT*/)
+  {
+    const rgw_bucket_placement &tail_placement = manifest.get_tail_placement();
+    // Tail placement_rule was fixed before committed to SLAB, if looks bad -> abort
+    if (unlikely(invalid_tail_placement(tail_placement))) {
+      p_stats->split_head_no_tail_placement++;
+      ldpp_dout(dpp, 1) << __func__ << "::invalid_tail_placement -> abort" << dendl;
+      return -EINVAL;
+    }
+
+    const rgw_bucket& bucket = tail_placement.bucket;
+    *p_tail_name = generate_split_head_tail_name(manifest);
+    // tail objects might be on another storage_class/pool, need another ioctx
+    int ret = get_ioctx_internal(dpp, driver, store, *p_tail_name, p_rec->instance,
+                                 bucket, p_ioctx, p_oid);
+    if (unlikely(ret != 0)) {
+      ldpp_dout(dpp, 1) << __func__ << "::ERR: failed get_ioctx_internal()" << dendl;
+      return ret;
+    }
+
+    return 0;
+  }
+
+  //---------------------------------------------------------------------------
+  void Background::remove_created_tail_object(const disk_record_t *p_rec,
+                                              const RGWObjManifest &manifest,
+                                              md5_stats_t *p_stats /*IN-OUT*/)
   {
+    librados::IoCtx tail_ioctx;
+    std::string tail_oid;
+    std::string tail_name;
+    int ret = get_tail_ioctx(p_rec, manifest, p_stats, &tail_ioctx, &tail_name,
+                             &tail_oid);
+    if (unlikely(ret != 0)) {
+      return;
+    }
+
     p_stats->rollback_tail_obj++;
-    int ret = ioctx.remove(tail_oid);
+    ret = tail_ioctx.remove(tail_oid);
     if (ret == 0) {
       ldpp_dout(dpp, 20) << __func__ << "::" << tail_oid
                          << " was successfully removed" << dendl;
@@ -923,8 +966,7 @@ namespace rgw::dedup {
                                const RGWObjManifest         &src_manifest,
                                const RGWObjManifest         &tgt_manifest,
                                md5_stats_t                  *p_stats,
-                               const dedup_table_t::value_t *p_src_val,
-                               const std::string            &tail_oid)
+                               const dedup_table_t::value_t *p_src_val)
   {
     const uint64_t src_head_size = src_manifest.get_head_size();
     const uint64_t tgt_head_size = tgt_manifest.get_head_size();
@@ -948,7 +990,7 @@ namespace rgw::dedup {
     if (unlikely(ret != 0)) {
       ldpp_dout(dpp, 1) << __func__ << "::ERR: failed TGT get_ioctx()" << dendl;
       if (p_src_rec->s.flags.is_split_head()) {
-        remove_created_tail_object(dpp, src_ioctx, tail_oid, p_stats);
+        remove_created_tail_object(p_src_rec, src_manifest, p_stats);
       }
       return ret;
     }
@@ -959,7 +1001,7 @@ namespace rgw::dedup {
       ldpp_dout(dpp, 5) << __func__ << "::abort! src_head_size=" << src_head_size
                         << "::tgt_head_size=" << tgt_head_size << dendl;
       if (p_src_rec->s.flags.is_split_head()) {
-        remove_created_tail_object(dpp, src_ioctx, tail_oid, p_stats);
+        remove_created_tail_object(p_src_rec, src_manifest, p_stats);
       }
       // TBD: can we create a test case (requires control over head-object-size)??
       return -ECANCELED;
@@ -971,7 +1013,7 @@ namespace rgw::dedup {
     ret = inc_ref_count_by_manifest(ref_tag, src_oid, src_manifest);
     if (unlikely(ret != 0)) {
       if (p_src_rec->s.flags.is_split_head()) {
-        remove_created_tail_object(dpp, src_ioctx, tail_oid, p_stats);
+        remove_created_tail_object(p_src_rec, src_manifest, p_stats);
       }
       return ret;
     }
@@ -1011,7 +1053,7 @@ namespace rgw::dedup {
                           << src_oid << "), err is " << cpp_strerror(-ret)<<dendl;
         rollback_ref_by_manifest(ref_tag, src_oid, src_manifest);
         if (p_src_rec->s.flags.is_split_head()) {
-          remove_created_tail_object(dpp, src_ioctx, tail_oid, p_stats);
+          remove_created_tail_object(p_src_rec, src_manifest, p_stats);
         }
         return ret;
       }
@@ -1126,12 +1168,6 @@ namespace rgw::dedup {
                        << p_tgt_rec->s.md5_low << std::dec << dendl;
   }
 
-  //---------------------------------------------------------------------------
-  static inline bool invalid_tail_placement(const rgw_bucket_placement& tail_placement)
-  {
-    return (tail_placement.bucket.name.empty() || tail_placement.placement_rule.name.empty());
-  }
-
   //---------------------------------------------------------------------------
   static void set_explicit_tail_placement(const DoutPrefixProvider* dpp,
                                           RGWObjManifest *p_manifest,// IN-OUT PARAM
@@ -1636,7 +1672,6 @@ namespace rgw::dedup {
   int Background::split_head_object(disk_record_t *p_src_rec, // IN-OUT PARAM
                                     RGWObjManifest &src_manifest, // IN/OUT PARAM
                                     const disk_record_t *p_tgt_rec,
-                                    std::string &tail_oid, // OUT PARAM
                                     md5_stats_t *p_stats)
   {
     ldpp_dout(dpp, 20) << __func__ << "::" << p_src_rec->obj_name << "::"
@@ -1683,22 +1718,11 @@ namespace rgw::dedup {
       }
     }
 
-    std::string tail_name = generate_split_head_tail_name(src_manifest);
-    const rgw_bucket_placement &tail_placement = src_manifest.get_tail_placement();
-    // Tail placement_rule was fixed before committed to SLAB, if looks bad -> abort
-    if (unlikely(invalid_tail_placement(tail_placement))) {
-      p_stats->split_head_no_tail_placement++;
-      ldpp_dout(dpp, 1) << __func__ << "::invalid_tail_placement -> abort" << dendl;
-      return -EINVAL;
-    }
-
-    const rgw_bucket *p_bucket = &tail_placement.bucket;
-    // tail objects might be on another storage_class/pool, need another ioctx
     librados::IoCtx tail_ioctx;
-    ret = get_ioctx_internal(dpp, driver, store, tail_name, p_src_rec->instance,
-                             *p_bucket, &tail_ioctx, &tail_oid);
+    std::string tail_oid, tail_name;
+    ret = get_tail_ioctx(p_src_rec, src_manifest, p_stats, &tail_ioctx, &tail_name,
+                         &tail_oid);
     if (unlikely(ret != 0)) {
-      ldpp_dout(dpp, 1) << __func__ << "::ERR: failed get_ioctx_internal()" << dendl;
       return ret;
     }
 
@@ -1736,7 +1760,7 @@ namespace rgw::dedup {
       ldpp_dout(dpp, 20) << __func__ << "::wrote tail obj:" << tail_oid << "::ret="
                          << ret << dendl;
     }
-
+    const rgw_bucket *p_bucket = &(src_manifest.get_tail_placement().bucket);
     build_and_set_explicit_manifest(dpp, p_bucket, tail_name, &src_manifest);
 
     bufferlist manifest_bl;
@@ -1753,7 +1777,6 @@ namespace rgw::dedup {
                                              RGWObjManifest &src_manifest,
                                              const RGWObjManifest &tgt_manifest,
                                              const dedup_table_t::value_t *p_src_val,
-                                             std::string &tail_oid, // OUT PARAM
                                              md5_stats_t *p_stats)
   {
     int ret = 0;
@@ -1798,7 +1821,7 @@ namespace rgw::dedup {
     // in the obj-attributes
     uint64_t head_size = src_manifest.get_head_size();
     if (should_split_head(head_size, src_manifest.get_obj_size())) {
-      ret = split_head_object(p_src_rec, src_manifest, p_tgt_rec, tail_oid, p_stats);
+      ret = split_head_object(p_src_rec, src_manifest, p_tgt_rec, p_stats);
       // compare_strong_hash() is called internally by split_head_object()
       return (ret == 0);
     }
@@ -2027,10 +2050,8 @@ namespace rgw::dedup {
       return 0;
     }
 
-
-    std::string tail_oid;
     bool success = check_and_set_strong_hash(p_src_rec, p_tgt_rec, src_manifest,
-                                             tgt_manifest, &src_val, tail_oid, p_stats);
+                                             tgt_manifest, &src_val, p_stats);
     if (unlikely(!success)) {
       if (p_src_rec->s.flags.hash_calculated() && !src_val.has_valid_hash()) {
         // set hash attributes on head objects to save calc next time
@@ -2049,7 +2070,7 @@ namespace rgw::dedup {
     }
 
     ret = dedup_object(p_src_rec, p_tgt_rec, src_manifest, tgt_manifest, p_stats,
-                       &src_val, tail_oid);
+                       &src_val);
     if (ret == 0) {
       ldpp_dout(dpp, 20) << __func__ << "::dedup success " << p_src_rec->obj_name << dendl;
       p_stats->deduped_objects++;
index adca55efebc5c747b2811564e87caf2ac81b1379..99af451cd4354178e51c89583df4bbc18da1c245 100644 (file)
@@ -99,6 +99,15 @@ namespace rgw::dedup {
 
     inline uint64_t __calc_deduped_bytes(uint16_t num_parts, uint64_t size_bytes);
     inline bool should_split_head(uint64_t head_size, uint64_t obj_size);
+    int get_tail_ioctx(const disk_record_t *p_rec,
+                       const RGWObjManifest &manifest,
+                       md5_stats_t *p_stats /*IN-OUT*/,
+                       librados::IoCtx *p_ioctx /*OUT*/,
+                       std::string *p_tail_name /*OUT*/,
+                       std::string *p_oid /*OUT*/);
+    void remove_created_tail_object(const disk_record_t *p_rec,
+                                    const RGWObjManifest &manifest,
+                                    md5_stats_t *p_stats /*IN-OUT*/);
     void run();
     int  setup(struct dedup_epoch_t*);
     void work_shards_barrier(work_shard_t num_work_shards);
@@ -191,7 +200,6 @@ namespace rgw::dedup {
     int split_head_object(disk_record_t *p_src_rec,     // IN/OUT PARAM
                           RGWObjManifest &src_manifest, // IN/OUT PARAM
                           const disk_record_t *p_tgt_rec,
-                          std::string &tail_oid,        // OUT PARAM
                           md5_stats_t *p_stats);
 
     int add_obj_attrs_to_record(disk_record_t         *p_rec,
@@ -211,7 +219,6 @@ namespace rgw::dedup {
                                    RGWObjManifest &src_manifest,
                                    const RGWObjManifest &tgt_manifest,
                                    const dedup_table_t::value_t *p_src_val,
-                                   std::string &tail_oid,    // OUT PARAM
                                    md5_stats_t *p_stats);
     int try_deduping_record(dedup_table_t   *p_table,
                             disk_record_t   *p_rec,
@@ -234,8 +241,7 @@ namespace rgw::dedup {
                      const RGWObjManifest         &src_manifest,
                      const RGWObjManifest         &tgt_manifest,
                      md5_stats_t                  *p_stats,
-                     const dedup_table_t::value_t *p_src_val,
-                     const std::string            &tail_oid);
+                     const dedup_table_t::value_t *p_src_val);
 #endif
     int  remove_slabs(unsigned worker_id, unsigned md5_shard, uint32_t slab_count);
     int  init_rados_access_handles(bool init_pool);
index 15b746a537a6294e39faf9d8b75c6192c27dea29..6541726b0f231acc6dedb8512f5eb70ba4ed7321 100644 (file)
@@ -1137,6 +1137,28 @@ def verify_objects(bucket_name, files, conn, expected_results, config, delete_du
 
     log.debug("verify_objects::completed successfully!!")
 
+#-------------------------------------------------------------------------------
+def verify_objects_copy(bucket_name, files, conn, expected_results, config):
+    if expected_results:
+        assert expected_results == count_object_parts_in_all_buckets(True)
+
+    tmpfile = OUT_DIR + "temp"
+    for f in files:
+        filename=f[0]
+        obj_size=f[1]
+        num_copies=f[2]
+        log.debug("comparing copy =%s, size=%d, copies=%d", filename, obj_size, num_copies)
+
+        for i in range(0, num_copies):
+            filecmp.clear_cache()
+            key = gen_object_name(filename, i) + "_cp"
+            conn.download_file(bucket_name, key, tmpfile, Config=config)
+            equal = filecmp.cmp(tmpfile, OUT_DIR + filename, shallow=False)
+            assert equal ,"Files %s and %s differ!!" % (key, tmpfile)
+            os.remove(tmpfile)
+
+    log.debug("verify_objects_copy::completed successfully!!")
+
 #-------------------------------------------------------------------------------
 def verify_objects_multi(files, conns, bucket_names, expected_results, config, delete_dups):
     if expected_results:
@@ -2288,16 +2310,19 @@ def test_copy_after_dedup():
 
     prepare_test()
     log.debug("test_copy_after_dedup: connect to AWS ...")
+    config=default_config
     max_copies_count=3
     num_files=8
     files=[]
     min_size=8*MB
 
+    # create files in range [1MB, 4MB] to force split-head
+    # This will verify server-side-copy with split-head generated tails
+    gen_files_in_range(files, num_files, 1*MB, 4*MB)
+
     # create files in range [8MB, 32MB] aligned on RADOS_OBJ_SIZE
-    gen_files_in_range(files, num_files, min_size, min_size*4)
+    gen_files_in_range(files, num_files, 8*MB, 32*MB)
 
-    # add file with excatly MULTIPART_SIZE
-    write_random(files, MULTIPART_SIZE, 2, 2)
     bucket_cp= gen_bucket_name()
     bucket_names=[]
     try:
@@ -2305,11 +2330,14 @@ def test_copy_after_dedup():
         conn.create_bucket(Bucket=bucket_cp)
         bucket_names=create_buckets(conn, max_copies_count)
         conns=[conn] * max_copies_count
-        dry_run=False
-        ret = simple_dedup_with_tenants(files, conns, bucket_names, default_config,
-                                        dry_run)
+        indices=[0] * len(files)
+        ret=upload_objects_multi(files, conns, bucket_names, indices, config)
         expected_results = ret[0]
         dedup_stats = ret[1]
+        dry_run=False
+        exec_dedup(dedup_stats, dry_run)
+        verify_objects_multi(files, conns, bucket_names, expected_results, config,
+                             False)
 
         cp_head_count=0
         for f in files:
@@ -2325,7 +2353,23 @@ def test_copy_after_dedup():
                 conn.copy_object(CopySource=base_obj, Bucket=bucket_cp, Key=key_cp)
                 cp_head_count += 1
 
+        # Make sure that server-side-copy behaved as expected copying only the head
+        # object and linking to the existing tail-objects
         assert (expected_results + cp_head_count) == count_object_parts_in_all_buckets(False, 0)
+        # delete the original objects and verify server-side-copy objects are valid
+        for (bucket_name, conn) in zip(bucket_names, conns):
+            delete_bucket_with_all_objects(bucket_name, conn)
+
+        result = admin(['gc', 'process', '--include-all'])
+        assert result[1] == 0
+        bucket_names.clear()
+        conns.clear()
+
+        # At this point the original obejcts are all removed
+        # Objects created by server-side-copy should keep the tail in place
+        # because of teh refcount
+        verify_objects_copy(bucket_cp, files, conn, expected_results, config)
+
     finally:
         # cleanup must be executed even after a failure
         delete_bucket_with_all_objects(bucket_cp, conn)