]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw/dedup: dedup-filter - add empty-file validation, fix dump default param, fix... 68575/head
authorbenhanokh <gbenhano@redhat.com>
Mon, 18 May 2026 06:38:35 +0000 (09:38 +0300)
committerbenhanokh <gbenhano@redhat.com>
Tue, 19 May 2026 03:50:55 +0000 (06:50 +0300)
Signed-off-by: Gabriel BenHanokh <gbenhano@redhat.com>
doc/radosgw/s3_objects_dedup.rst
src/rgw/driver/rados/rgw_dedup.cc
src/rgw/driver/rados/rgw_dedup_filter.cc
src/rgw/driver/rados/rgw_dedup_utils.cc
src/rgw/driver/rados/rgw_dedup_utils.h
src/test/rgw/dedup/test_dedup.py

index 0108af23025b6191eef5600705c8a786ae05603f..249f611e065a85307a226f8398067a3d6c3d8cad 100644 (file)
@@ -54,7 +54,8 @@ The ``dedup estimate`` and ``dedup exec`` commands also accept filter options:
    Mutually exclusive with ``--allow-storage-class-list``.
 
 **File format:** One name per line. Lines starting with or containing ``#``
-are treated as comments. Whitespace is ignored.
+are treated as comments. Whitespace is ignored. The file must contain at least
+one valid name; an empty or all-comment file is rejected.
 
 
 Skipped Objects
index 23f7fd90acc2bf1a79cc4b63a91e23b5f4d27fa7..260a8315f5e40f0445d8f546cc31d106d12934c3 100644 (file)
@@ -3128,20 +3128,15 @@ namespace rgw::dedup {
       break;
     case URGENT_MSG_RESTART:
       if (!d_ctl.dedup_exec) {
-        // Decode optional filter (may not be present for older senders)
+        // Decode optional filter
         {
           bool has_filter = false;
-          try {
-            ceph::decode(has_filter, bl_iter);
-            if (has_filter) {
-              decode(d_filter, bl_iter);
-              ldpp_dout(dpp, 5) << __func__ << "::RESTART with filter" << dendl;
-            }
-            else {
-              d_filter = dedup_filter_t{};
-            }
-          } catch (buffer::error&) {
-            // older sender without filter - reset to no-filter
+          ceph::decode(has_filter, bl_iter);
+          if (has_filter) {
+            decode(d_filter, bl_iter);
+            ldpp_dout(dpp, 5) << __func__ << "::RESTART with filter" << dendl;
+          }
+          else {
             d_filter = dedup_filter_t{};
           }
         }
index f7e9dd669b70cdca45ef81ef4656bbcf6972b404..3102d579b3c7c5fb4f1347d706e68209cdc23dd6 100644 (file)
@@ -83,7 +83,7 @@ namespace rgw::dedup {
   {
     std::ifstream f(path);
     if (!f.is_open()) {
-      ldpp_dout(dpp, 1) << __func__ << ":: failed to open filter file: " << path << dendl;
+      ldpp_dout(dpp, 1) << __func__ << "::Failed to open filter file: " << path << dendl;
       return -ENOENT;
     }
 
@@ -121,6 +121,12 @@ namespace rgw::dedup {
       }
     }
 
+    if (name_set.empty()) {
+      // we should have at least one valid filter
+      ldpp_dout(dpp, 1) << __func__ << "::No valid filter in file: " << path << dendl;
+      return -ENODATA;
+    }
+
     return 0;
   }
 
index fcea57fe9c6437cf095c16c68618c2c8916c26ca..b061b213c159bc9423990dfd74db9e38d2992ffe 100644 (file)
@@ -393,7 +393,9 @@ namespace rgw::dedup {
     // main section
     {
       Formatter::ObjectSection main(*f, "main");
-      f->dump_unsigned("Num Work-Shards", num_shards);
+      if (num_shards) {
+        f->dump_unsigned("Num Work-Shards", num_shards);
+      }
       f->dump_unsigned("Ingress Objs count", this->ingress_obj);
       f->dump_unsigned("Accum byte size Ingress Objs", this->ingress_obj_bytes);
       f->dump_unsigned("Egress Records count", this->egress_records);
@@ -442,10 +444,13 @@ namespace rgw::dedup {
                          this->ingress_skip_too_small_bytes);
       }
 
-      if (this->ingress_skip_filtered_bucket && num_shards) {
-        // buckets are scanned once per worker-shard
-        f->dump_unsigned("Ingress skip: filtered bucket",
-                         this->ingress_skip_filtered_bucket/num_shards);
+      if (this->ingress_skip_filtered_bucket) {
+        auto skip_filtered_count = this->ingress_skip_filtered_bucket;
+        if (num_shards) {
+          // buckets are scanned once per worker-shard
+          skip_filtered_count /= num_shards;
+        }
+        f->dump_unsigned("Ingress skip: filtered bucket", skip_filtered_count);
       }
       if (this->ingress_skip_filtered_storage_class) {
         f->dump_unsigned("Ingress skipped filtered storage class, num objects skipped",
@@ -468,7 +473,7 @@ namespace rgw::dedup {
   std::ostream& operator<<(std::ostream &out, const worker_stats_t &s)
   {
     JSONFormatter formatter(false);
-    s.dump(&formatter, 1);
+    s.dump(&formatter);
     std::stringstream sstream;
     formatter.flush(sstream);
     out << sstream.str();
index 2c2fe4b9c8aba998b2fab72e1b4cc09e157bb7f3..1159c130c6055e73596ea2e8f86c66a04640e30e 100644 (file)
@@ -172,7 +172,7 @@ namespace rgw::dedup {
 
   struct worker_stats_t {
     worker_stats_t& operator +=(const worker_stats_t& other);
-    void dump(Formatter *f, unsigned num_shards) const;
+    void dump(Formatter *f, unsigned num_shards = 0) const;
 
     uint64_t ingress_obj = 0;
     uint64_t ingress_obj_bytes = 0;
index 84f15516e1b147c8b677f3c7b32f6941740714fd..b09d5f58f15e926c5f6e8df3d77e829291316d90 100644 (file)
@@ -3699,6 +3699,17 @@ def test_dedup_filter_bucket_list_parsing():
         result = admin(['dedup', 'estimate', '--allow-bucket-list',
                         '/nonexistent/bucket_list.txt'])
         assert result[1] != 0, "Expected failure for non-existent filter file"
+
+        # 3. Empty file
+        empty_file = OUT_DIR + "empty_buckets.txt"
+        with open(empty_file, "w") as f:
+            pass
+
+        result = admin(['dedup', 'estimate', '--allow-bucket-list', empty_file])
+        assert result[1] != 0, "Expected failure for empty filter file"
+
+        result = admin(['dedup', 'estimate', '--deny-bucket-list', empty_file])
+        assert result[1] != 0, "Expected failure for empty filter file"
     finally:
         cleanup_local()
 
@@ -3712,8 +3723,8 @@ def test_dedup_filter_storage_class_list_parsing():
     prepare_test()
     try:
         # 1. Mutual exclusivity: --allow-storage-class-list and --deny-storage-class-list together.
-        allow_file = OUT_DIR + "allow_storage_classs.txt"
-        deny_file  = OUT_DIR + "deny_storage_classs.txt"
+        allow_file = OUT_DIR + "allow_storage_class.txt"
+        deny_file  = OUT_DIR + "deny_storage_class.txt"
         write_filter_list_file(allow_file, ['STORAGECLASS1'])
         write_filter_list_file(deny_file,  ['STORAGECLASS2'])
         result = admin(['dedup', 'estimate',
@@ -3727,6 +3738,17 @@ def test_dedup_filter_storage_class_list_parsing():
         result = admin(['dedup', 'estimate', '--allow-storage-class-list',
                         '/nonexistent/storage_class_list.txt'])
         assert result[1] != 0, "Expected failure for non-existent filter file"
+
+        # 3. Empty file
+        empty_file = OUT_DIR + "empty_storage_class.txt"
+        with open(empty_file, "w") as f:
+            pass
+
+        result = admin(['dedup', 'estimate', '--allow-storage-class-list', empty_file])
+        assert result[1] != 0, "Expected failure for empty filter file"
+
+        result = admin(['dedup', 'estimate', '--deny-storage-class-list', empty_file])
+        assert result[1] != 0, "Expected failure for empty filter file"
     finally:
         cleanup_local()
 
@@ -3786,7 +3808,7 @@ def dedup_filter_allow_deny_storage_class_common(dry_run, filter_mode_allow):
     """
     prepare_test()
     config=default_config
-    filter_file = OUT_DIR + "deny_storage_classs.txt"
+    filter_file = OUT_DIR + "deny_storage_class.txt"
     bucket_name = gen_bucket_name()
     conn=get_single_connection()
     files=[]