From 59d705b3ab0fb172c906c68a54ddb49c7e08615b Mon Sep 17 00:00:00 2001 From: benhanokh Date: Mon, 18 May 2026 09:38:35 +0300 Subject: [PATCH] rgw/dedup: dedup-filter - add empty-file validation, fix dump default param, fix typos Signed-off-by: Gabriel BenHanokh --- doc/radosgw/s3_objects_dedup.rst | 3 ++- src/rgw/driver/rados/rgw_dedup.cc | 19 ++++++---------- src/rgw/driver/rados/rgw_dedup_filter.cc | 8 ++++++- src/rgw/driver/rados/rgw_dedup_utils.cc | 17 +++++++++----- src/rgw/driver/rados/rgw_dedup_utils.h | 2 +- src/test/rgw/dedup/test_dedup.py | 28 +++++++++++++++++++++--- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/doc/radosgw/s3_objects_dedup.rst b/doc/radosgw/s3_objects_dedup.rst index 0108af23025b..249f611e065a 100644 --- a/doc/radosgw/s3_objects_dedup.rst +++ b/doc/radosgw/s3_objects_dedup.rst @@ -54,7 +54,8 @@ The ``dedup estimate`` and ``dedup exec`` commands also accept filter options: Mutually exclusive with ``--allow-storage-class-list``. **File format:** One name per line. Lines starting with or containing ``#`` -are treated as comments. Whitespace is ignored. +are treated as comments. Whitespace is ignored. The file must contain at least +one valid name; an empty or all-comment file is rejected. Skipped Objects diff --git a/src/rgw/driver/rados/rgw_dedup.cc b/src/rgw/driver/rados/rgw_dedup.cc index 23f7fd90acc2..260a8315f5e4 100644 --- a/src/rgw/driver/rados/rgw_dedup.cc +++ b/src/rgw/driver/rados/rgw_dedup.cc @@ -3128,20 +3128,15 @@ namespace rgw::dedup { break; case URGENT_MSG_RESTART: if (!d_ctl.dedup_exec) { - // Decode optional filter (may not be present for older senders) + // Decode optional filter { bool has_filter = false; - try { - ceph::decode(has_filter, bl_iter); - if (has_filter) { - decode(d_filter, bl_iter); - ldpp_dout(dpp, 5) << __func__ << "::RESTART with filter" << dendl; - } - else { - d_filter = dedup_filter_t{}; - } - } catch (buffer::error&) { - // older sender without filter - reset to no-filter + ceph::decode(has_filter, bl_iter); + if (has_filter) { + decode(d_filter, bl_iter); + ldpp_dout(dpp, 5) << __func__ << "::RESTART with filter" << dendl; + } + else { d_filter = dedup_filter_t{}; } } diff --git a/src/rgw/driver/rados/rgw_dedup_filter.cc b/src/rgw/driver/rados/rgw_dedup_filter.cc index f7e9dd669b70..3102d579b3c7 100644 --- a/src/rgw/driver/rados/rgw_dedup_filter.cc +++ b/src/rgw/driver/rados/rgw_dedup_filter.cc @@ -83,7 +83,7 @@ namespace rgw::dedup { { std::ifstream f(path); if (!f.is_open()) { - ldpp_dout(dpp, 1) << __func__ << ":: failed to open filter file: " << path << dendl; + ldpp_dout(dpp, 1) << __func__ << "::Failed to open filter file: " << path << dendl; return -ENOENT; } @@ -121,6 +121,12 @@ namespace rgw::dedup { } } + if (name_set.empty()) { + // we should have at least one valid filter + ldpp_dout(dpp, 1) << __func__ << "::No valid filter in file: " << path << dendl; + return -ENODATA; + } + return 0; } diff --git a/src/rgw/driver/rados/rgw_dedup_utils.cc b/src/rgw/driver/rados/rgw_dedup_utils.cc index fcea57fe9c64..b061b213c159 100644 --- a/src/rgw/driver/rados/rgw_dedup_utils.cc +++ b/src/rgw/driver/rados/rgw_dedup_utils.cc @@ -393,7 +393,9 @@ namespace rgw::dedup { // main section { Formatter::ObjectSection main(*f, "main"); - f->dump_unsigned("Num Work-Shards", num_shards); + if (num_shards) { + f->dump_unsigned("Num Work-Shards", num_shards); + } f->dump_unsigned("Ingress Objs count", this->ingress_obj); f->dump_unsigned("Accum byte size Ingress Objs", this->ingress_obj_bytes); f->dump_unsigned("Egress Records count", this->egress_records); @@ -442,10 +444,13 @@ namespace rgw::dedup { this->ingress_skip_too_small_bytes); } - if (this->ingress_skip_filtered_bucket && num_shards) { - // buckets are scanned once per worker-shard - f->dump_unsigned("Ingress skip: filtered bucket", - this->ingress_skip_filtered_bucket/num_shards); + if (this->ingress_skip_filtered_bucket) { + auto skip_filtered_count = this->ingress_skip_filtered_bucket; + if (num_shards) { + // buckets are scanned once per worker-shard + skip_filtered_count /= num_shards; + } + f->dump_unsigned("Ingress skip: filtered bucket", skip_filtered_count); } if (this->ingress_skip_filtered_storage_class) { f->dump_unsigned("Ingress skipped filtered storage class, num objects skipped", @@ -468,7 +473,7 @@ namespace rgw::dedup { std::ostream& operator<<(std::ostream &out, const worker_stats_t &s) { JSONFormatter formatter(false); - s.dump(&formatter, 1); + s.dump(&formatter); std::stringstream sstream; formatter.flush(sstream); out << sstream.str(); diff --git a/src/rgw/driver/rados/rgw_dedup_utils.h b/src/rgw/driver/rados/rgw_dedup_utils.h index 2c2fe4b9c8ab..1159c130c605 100644 --- a/src/rgw/driver/rados/rgw_dedup_utils.h +++ b/src/rgw/driver/rados/rgw_dedup_utils.h @@ -172,7 +172,7 @@ namespace rgw::dedup { struct worker_stats_t { worker_stats_t& operator +=(const worker_stats_t& other); - void dump(Formatter *f, unsigned num_shards) const; + void dump(Formatter *f, unsigned num_shards = 0) const; uint64_t ingress_obj = 0; uint64_t ingress_obj_bytes = 0; diff --git a/src/test/rgw/dedup/test_dedup.py b/src/test/rgw/dedup/test_dedup.py index 84f15516e1b1..b09d5f58f15e 100644 --- a/src/test/rgw/dedup/test_dedup.py +++ b/src/test/rgw/dedup/test_dedup.py @@ -3699,6 +3699,17 @@ def test_dedup_filter_bucket_list_parsing(): result = admin(['dedup', 'estimate', '--allow-bucket-list', '/nonexistent/bucket_list.txt']) assert result[1] != 0, "Expected failure for non-existent filter file" + + # 3. Empty file + empty_file = OUT_DIR + "empty_buckets.txt" + with open(empty_file, "w") as f: + pass + + result = admin(['dedup', 'estimate', '--allow-bucket-list', empty_file]) + assert result[1] != 0, "Expected failure for empty filter file" + + result = admin(['dedup', 'estimate', '--deny-bucket-list', empty_file]) + assert result[1] != 0, "Expected failure for empty filter file" finally: cleanup_local() @@ -3712,8 +3723,8 @@ def test_dedup_filter_storage_class_list_parsing(): prepare_test() try: # 1. Mutual exclusivity: --allow-storage-class-list and --deny-storage-class-list together. - allow_file = OUT_DIR + "allow_storage_classs.txt" - deny_file = OUT_DIR + "deny_storage_classs.txt" + allow_file = OUT_DIR + "allow_storage_class.txt" + deny_file = OUT_DIR + "deny_storage_class.txt" write_filter_list_file(allow_file, ['STORAGECLASS1']) write_filter_list_file(deny_file, ['STORAGECLASS2']) result = admin(['dedup', 'estimate', @@ -3727,6 +3738,17 @@ def test_dedup_filter_storage_class_list_parsing(): result = admin(['dedup', 'estimate', '--allow-storage-class-list', '/nonexistent/storage_class_list.txt']) assert result[1] != 0, "Expected failure for non-existent filter file" + + # 3. Empty file + empty_file = OUT_DIR + "empty_storage_class.txt" + with open(empty_file, "w") as f: + pass + + result = admin(['dedup', 'estimate', '--allow-storage-class-list', empty_file]) + assert result[1] != 0, "Expected failure for empty filter file" + + result = admin(['dedup', 'estimate', '--deny-storage-class-list', empty_file]) + assert result[1] != 0, "Expected failure for empty filter file" finally: cleanup_local() @@ -3786,7 +3808,7 @@ def dedup_filter_allow_deny_storage_class_common(dry_run, filter_mode_allow): """ prepare_test() config=default_config - filter_file = OUT_DIR + "deny_storage_classs.txt" + filter_file = OUT_DIR + "deny_storage_class.txt" bucket_name = gen_bucket_name() conn=get_single_connection() files=[] -- 2.47.3