From: Gabriel BenHanokh Date: Wed, 2 Jul 2025 09:44:47 +0000 (+0000) Subject: Replace SHA256 with faster hash calculation using BLAKE3 X-Git-Tag: v21.0.0~50^2~37^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b5cbc46fecc5c30b9680ed671e85505018dd3e1b;p=ceph.git Replace SHA256 with faster hash calculation using BLAKE3 Signed-off-by: Gabriel BenHanokh --- diff --git a/doc/radosgw/index.rst b/doc/radosgw/index.rst index 2b97ce75c3d0..4c05563bd1fc 100644 --- a/doc/radosgw/index.rst +++ b/doc/radosgw/index.rst @@ -91,4 +91,4 @@ Cluster with one API and then retrieve that data with the other API. Metrics UADK Acceleration for Compression Bucket Logging - + Full Object Deduplication diff --git a/doc/radosgw/s3_objects_dedup.rst b/doc/radosgw/s3_objects_dedup.rst index e8e351a96350..2c848113fa6a 100644 --- a/doc/radosgw/s3_objects_dedup.rst +++ b/doc/radosgw/s3_objects_dedup.rst @@ -1,25 +1,28 @@ ===================== Full RGW Object Dedup ===================== -Adds a ``radosgw-admin`` command to collect and report deduplication stats. - -.. note:: This utility doesn't perform dedup and doesn't make any - change to the existing system and will only collect - statistics and report them. +Adds ``radosgw-admin`` commands to remove duplicated RGW tail-objects and to collect and report deduplication stats. ************** Admin commands ************** -- ``radosgw-admin dedup stats``: - Collects & displays last dedup statistics. +- ``radosgw-admin dedup estimate``: + Starts a new dedup estimate session (aborting first existing session if exists). + + It doesn't make any change to the existing system and will only collect statistics and report them. +- ``radosgw-admin dedup restart --yes-i-really-mean-it``: + Starts a new dedup session (aborting first existing session if exists). + It will perfrom a full dedup, finding duplicated tail-objects and removing them. + + This command can lead to **data-loss** and should not be used on production data!! - ``radosgw-admin dedup pause``: Pauses an active dedup session (dedup resources are not released). - ``radosgw-admin dedup resume``: Resumes a paused dedup session. - ``radosgw-admin dedup abort``: Aborts an active dedup session and release all resources used by it. -- ``radosgw-admin dedup estimate``: - Starts a new dedup estimate session (aborting first existing session if exists). +- ``radosgw-admin dedup stats``: + Collects & displays last dedup statistics. *************** Skipped Objects @@ -31,10 +34,7 @@ Dedup Estimate process skips the following objects: - Objects with different pools. - Objects with different storage classes. -The dedup process itself (which will be released in a later Ceph release) will also skip -**compressed** and **user-encrypted** objects, but the estimate -process will accept them (since we don't have access to that -information during the estimate process). +The full dedup process skips all the above and it also skips **compressed** and **user-encrypted** objects. ******************* Estimate Processing @@ -54,6 +54,24 @@ the underlying media storing the objects (SSD/HDD) since the bucket indices are virtually always stored on a fast medium (SSD with heavy memory caching). +********************* +Full Dedup Processing +********************* +The Full Dedup process begins by constructing a dedup table from the bucket indices, similar to the estimate process above. + +This table is then scanned linearly to purge objects without duplicates, leaving only dedup candidates. + +Next, we iterate through these dedup candidate objects, reading their complete information from the object metadata (a per-object RADOS operation). +During this step, we filter out **compressed** and **user-encrypted** objects. + +Following this, we calculate a strong-hash of the object data, which involves a full-object read and is a resource-intensive operation. +This strong-hash ensures that the dedup candidates are indeed perfect matches. +If they are, we proceed with the deduplication: + +- incrementing the reference count on the source tail-objects one by one. +- copying the manifest from the source to the target. +- removing all tail-objects on the target. + ************ Memory Usage ************ diff --git a/src/rgw/radosgw-admin/radosgw-admin.cc b/src/rgw/radosgw-admin/radosgw-admin.cc index 54246e68b8df..c0e6ba7ca961 100644 --- a/src/rgw/radosgw-admin/radosgw-admin.cc +++ b/src/rgw/radosgw-admin/radosgw-admin.cc @@ -152,7 +152,7 @@ void usage() cout << " caps rm remove user capabilities\n"; cout << " dedup stats Display dedup statistics from the last run\n"; cout << " dedup estimate Runs dedup in estimate mode (no changes will be made)\n"; - cout << " dedup restart Restart dedup\n"; + cout << " dedup restart Restart dedup; must include --yes-i-really-mean-it to activate\n"; cout << " dedup abort Abort dedup\n"; cout << " dedup pause Pause dedup\n"; cout << " dedup resume Resume paused dedup\n"; @@ -3652,7 +3652,6 @@ int main(int argc, const char **argv) int skip_zero_entries = false; // log show int purge_keys = false; int yes_i_really_mean_it = false; - int tech_preview = false; int delete_child_objects = false; int fix = false; int remove_bad = false; @@ -4113,8 +4112,6 @@ int main(int argc, const char **argv) // do nothing } else if (ceph_argparse_binary_flag(args, i, &yes_i_really_mean_it, NULL, "--yes-i-really-mean-it", (char*)NULL)) { // do nothing - } else if (ceph_argparse_binary_flag(args, i, &tech_preview, NULL, "--tech-preview", (char*)NULL)) { - // do nothing } else if (ceph_argparse_binary_flag(args, i, &fix, NULL, "--fix", (char*)NULL)) { // do nothing } else if (ceph_argparse_binary_flag(args, i, &remove_bad, NULL, "--remove-bad", (char*)NULL)) { @@ -9246,12 +9243,6 @@ next: << std::endl; return EINVAL; } - if (!tech_preview) { - cerr << "Full Dedup is supplied as a tech-preview only and should not be used on production systems!\n" - << "Please acknowledge that you understand this is a tech preview (requires --tech-preview)" - << std::endl; - return EINVAL; - } dedup_type = dedup_req_type_t::DEDUP_TYPE_FULL; #ifndef FULL_DEDUP_SUPPORT std::cerr << "Only dedup estimate is supported!" << std::endl; diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 05ea592be945..67365e388170 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -89,6 +89,7 @@ using ceph::crypto::MD5; #define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" #define RGW_ATTR_CKSUM RGW_ATTR_PREFIX "cksum" #define RGW_ATTR_SHA256 RGW_ATTR_PREFIX "x-amz-content-sha256" +#define RGW_ATTR_BLAKE3 RGW_ATTR_PREFIX "blake3" #define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets" #define RGW_ATTR_META_PREFIX RGW_ATTR_PREFIX RGW_AMZ_META_PREFIX #define RGW_ATTR_CONTENT_TYPE RGW_ATTR_PREFIX "content_type" diff --git a/src/rgw/rgw_dedup.cc b/src/rgw/rgw_dedup.cc index 3374d00c494a..7c00ddf6f2a7 100644 --- a/src/rgw/rgw_dedup.cc +++ b/src/rgw/rgw_dedup.cc @@ -704,7 +704,7 @@ namespace rgw::dedup { //--------------------------------------------------------------------------- static void init_cmp_pairs(const disk_record_t *p_rec, const bufferlist &etag_bl, - bufferlist &sha256_bl, // OUT PARAM + bufferlist &hash_bl, // OUT PARAM librados::ObjectWriteOperation *p_op) { p_op->cmpxattr(RGW_ATTR_ETAG, CEPH_OSD_CMPXATTR_OP_EQ, etag_bl); @@ -712,15 +712,15 @@ namespace rgw::dedup { // Can replace it with something cheaper like size/version? p_op->cmpxattr(RGW_ATTR_MANIFEST, CEPH_OSD_CMPXATTR_OP_EQ, p_rec->manifest_bl); - // SHA has 256 bit splitted into multiple 64bit units + // BLAKE3 hash has 256 bit splitted into multiple 64bit units const unsigned units = (256 / (sizeof(uint64_t)*8)); static_assert(units == 4); for (unsigned i = 0; i < units; i++) { - ceph::encode(p_rec->s.sha256[i], sha256_bl); + ceph::encode(p_rec->s.hash[i], hash_bl); } - if (!p_rec->s.flags.sha256_calculated()) { - p_op->cmpxattr(RGW_ATTR_SHA256, CEPH_OSD_CMPXATTR_OP_EQ, sha256_bl); + if (!p_rec->s.flags.hash_calculated()) { + p_op->cmpxattr(RGW_ATTR_BLAKE3, CEPH_OSD_CMPXATTR_OP_EQ, hash_bl); } } @@ -755,17 +755,17 @@ namespace rgw::dedup { ldpp_dout(dpp, 20) << __func__ << "::num_parts=" << p_tgt_rec->s.num_parts << "::ETAG=" << etag_bl.to_str() << dendl; - bufferlist hash_bl, manifest_hash_bl, tgt_sha256_bl; + bufferlist hash_bl, manifest_hash_bl, tgt_hash_bl; crypto::digest(p_src_rec->manifest_bl).encode(hash_bl); // Use a shorter hash (64bit instead of 160bit) hash_bl.splice(0, 8, &manifest_hash_bl); librados::ObjectWriteOperation tgt_op; - init_cmp_pairs(p_tgt_rec, etag_bl, tgt_sha256_bl, &tgt_op); + init_cmp_pairs(p_tgt_rec, etag_bl, tgt_hash_bl, &tgt_op); tgt_op.setxattr(RGW_ATTR_SHARE_MANIFEST, manifest_hash_bl); tgt_op.setxattr(RGW_ATTR_MANIFEST, p_src_rec->manifest_bl); - if (p_tgt_rec->s.flags.sha256_calculated()) { - tgt_op.setxattr(RGW_ATTR_SHA256, tgt_sha256_bl); - p_stats->set_sha256_attrs++; + if (p_tgt_rec->s.flags.hash_calculated()) { + tgt_op.setxattr(RGW_ATTR_BLAKE3, tgt_hash_bl); + p_stats->set_hash_attrs++; } std::string src_oid, tgt_oid; @@ -800,13 +800,13 @@ namespace rgw::dedup { // disk-record (as require an expensive random-disk-write). // When deduping C we can trust the shared_manifest state in the table and // skip a redundant update to SRC object attribute - bufferlist src_sha256_bl; + bufferlist src_hash_bl; librados::ObjectWriteOperation src_op; - init_cmp_pairs(p_src_rec, etag_bl, src_sha256_bl, &src_op); + init_cmp_pairs(p_src_rec, etag_bl, src_hash_bl, &src_op); src_op.setxattr(RGW_ATTR_SHARE_MANIFEST, manifest_hash_bl); - if (p_src_rec->s.flags.sha256_calculated()) { - src_op.setxattr(RGW_ATTR_SHA256, src_sha256_bl); - p_stats->set_sha256_attrs++; + if (p_src_rec->s.flags.hash_calculated()) { + src_op.setxattr(RGW_ATTR_BLAKE3, src_hash_bl); + p_stats->set_hash_attrs++; } ldpp_dout(dpp, 20) << __func__ <<"::send SRC CLS (Shared_Manifest)"<< dendl; @@ -824,57 +824,49 @@ namespace rgw::dedup { return ret; } - using ceph::crypto::SHA256; //--------------------------------------------------------------------------- - int Background::calc_object_sha256(const disk_record_t *p_rec, uint8_t *p_sha256) + int Background::calc_object_blake3(const disk_record_t *p_rec, uint8_t *p_hash) { - ldpp_dout(dpp, 20) << __func__ << "::p_rec->obj_name=" << p_rec->obj_name << dendl; - // Open questions - - // 1) do we need the secret if so what is the correct one to use? - // 2) are we passing the head/tail objects in the correct order? + ldpp_dout(dpp, 20) << __func__ << "::obj_name=" << p_rec->obj_name << dendl; RGWObjManifest manifest; try { auto bl_iter = p_rec->manifest_bl.cbegin(); decode(manifest, bl_iter); } catch (buffer::error& err) { - ldpp_dout(dpp, 1) << __func__ << "::ERROR: bad src manifest" << dendl; + ldpp_dout(dpp, 1) << __func__ << "::ERROR: bad src manifest for: " + << p_rec->obj_name << dendl; return -EINVAL; } - std::string oid; - build_oid(p_rec->bucket_id, p_rec->obj_name, &oid); - librados::IoCtx head_ioctx; - const char *secret = "0555b35654ad1656d804f1b017cd26e9"; - TOPNSPC::crypto::HMACSHA256 hmac((const uint8_t*)secret, strlen(secret)); + + blake3_hasher hmac; + blake3_hasher_init(&hmac); for (auto p = manifest.obj_begin(dpp); p != manifest.obj_end(dpp); ++p) { rgw_raw_obj raw_obj = p.get_location().get_raw_obj(rados); rgw_rados_ref obj; int ret = rgw_get_rados_ref(dpp, rados_handle, raw_obj, &obj); if (ret < 0) { - ldpp_dout(dpp, 1) << __func__ << "::failed rgw_get_rados_ref() for raw_obj=" - << raw_obj << dendl; + ldpp_dout(dpp, 1) << __func__ << "::failed rgw_get_rados_ref() for oid: " + << raw_obj.oid << ", err is " << cpp_strerror(-ret) << dendl; return ret; } - if (oid == raw_obj.oid) { - ldpp_dout(dpp, 20) << __func__ << "::manifest: head object=" << oid << dendl; - head_ioctx = obj.ioctx; - } bufferlist bl; librados::IoCtx ioctx = obj.ioctx; // read full object ret = ioctx.read(raw_obj.oid, bl, 0, 0); if (ret > 0) { for (const auto& bptr : bl.buffers()) { - hmac.Update((const unsigned char *)bptr.c_str(), bptr.length()); + blake3_hasher_update(&hmac, (const unsigned char *)bptr.c_str(), bptr.length()); } } else { - ldpp_dout(dpp, 1) << __func__ << "::ERR: failed to read " << oid + ldpp_dout(dpp, 1) << __func__ << "::ERR: failed to read " << raw_obj.oid << ", error is " << cpp_strerror(-ret) << dendl; return ret; } } - hmac.Final(p_sha256); + + blake3_hasher_finalize(&hmac, p_hash, BLAKE3_OUT_LEN); return 0; } @@ -977,33 +969,33 @@ namespace rgw::dedup { memset(&p_rec->s.shared_manifest, 0, sizeof(p_rec->s.shared_manifest)); } - itr = attrs.find(RGW_ATTR_SHA256); + itr = attrs.find(RGW_ATTR_BLAKE3); if (itr != attrs.end()) { try { auto bl_iter = itr->second.cbegin(); - // SHA has 256 bit splitted into multiple 64bit units + // BLAKE3 hash 256 bit splitted into multiple 64bit units const unsigned units = (256 / (sizeof(uint64_t)*8)); static_assert(units == 4); for (unsigned i = 0; i < units; i++) { uint64_t val; ceph::decode(val, bl_iter); - p_rec->s.sha256[i] = val; + p_rec->s.hash[i] = val; } - p_stats->valid_sha256_attrs++; + p_stats->valid_hash_attrs++; return 0; } catch (buffer::error& err) { - ldpp_dout(dpp, 1) << __func__ << "::ERR: failed SHA256 decode" << dendl; + ldpp_dout(dpp, 1) << __func__ << "::ERR: failed HASH decode" << dendl; return -EINVAL; } } - p_stats->invalid_sha256_attrs++; + p_stats->invalid_hash_attrs++; // TBD: redundant memset... - memset(p_rec->s.sha256, 0, sizeof(p_rec->s.sha256)); - // CEPH_CRYPTO_HMACSHA256_DIGESTSIZE is 32 Bytes (32*8=256) - int ret = calc_object_sha256(p_rec, (uint8_t*)p_rec->s.sha256); + memset(p_rec->s.hash, 0, sizeof(p_rec->s.hash)); + // BLAKE3_OUT_LEN is 32 Bytes + int ret = calc_object_blake3(p_rec, (uint8_t*)p_rec->s.hash); if (ret == 0) { - p_rec->s.flags.set_sha256_calculated(); + p_rec->s.flags.set_hash_calculated(); } return ret; @@ -1177,18 +1169,18 @@ namespace rgw::dedup { } //--------------------------------------------------------------------------- - static int write_sha256_object_attribute(const DoutPrefixProvider* const dpp, + static int write_blake3_object_attribute(const DoutPrefixProvider* const dpp, rgw::sal::Driver* driver, RGWRados* rados, const disk_record_t *p_rec) { bufferlist etag_bl; - bufferlist sha256_bl; + bufferlist hash_bl; librados::ObjectWriteOperation op; etag_to_bufferlist(p_rec->s.md5_high, p_rec->s.md5_low, p_rec->s.num_parts, &etag_bl); - init_cmp_pairs(p_rec, etag_bl, sha256_bl /*OUT PARAM*/, &op); - op.setxattr(RGW_ATTR_SHA256, sha256_bl); + init_cmp_pairs(p_rec, etag_bl, hash_bl /*OUT PARAM*/, &op); + op.setxattr(RGW_ATTR_BLAKE3, hash_bl); std::string oid; librados::IoCtx ioctx; @@ -1304,17 +1296,17 @@ namespace rgw::dedup { return 0; } - if (memcmp(src_rec.s.sha256, p_tgt_rec->s.sha256, sizeof(src_rec.s.sha256)) != 0) { - p_stats->sha256_mismatch++; - ldpp_dout(dpp, 10) << __func__ << "::SHA256 mismatch" << dendl; - // TBD: set sha256 attributes on head objects to save calc next time - if (src_rec.s.flags.sha256_calculated()) { - write_sha256_object_attribute(dpp, driver, rados, &src_rec); - p_stats->set_sha256_attrs++; + if (memcmp(src_rec.s.hash, p_tgt_rec->s.hash, sizeof(src_rec.s.hash)) != 0) { + p_stats->hash_mismatch++; + ldpp_dout(dpp, 10) << __func__ << "::HASH mismatch" << dendl; + // TBD: set hash attributes on head objects to save calc next time + if (src_rec.s.flags.hash_calculated()) { + write_blake3_object_attribute(dpp, driver, rados, &src_rec); + p_stats->set_hash_attrs++; } - if (p_tgt_rec->s.flags.sha256_calculated()) { - write_sha256_object_attribute(dpp, driver, rados, p_tgt_rec); - p_stats->set_sha256_attrs++; + if (p_tgt_rec->s.flags.hash_calculated()) { + write_blake3_object_attribute(dpp, driver, rados, p_tgt_rec); + p_stats->set_hash_attrs++; } return 0; } diff --git a/src/rgw/rgw_dedup.h b/src/rgw/rgw_dedup.h index 57ed0e824de5..48dafe38cb1e 100644 --- a/src/rgw/rgw_dedup.h +++ b/src/rgw/rgw_dedup.h @@ -179,7 +179,7 @@ namespace rgw::dedup { remapper_t *remapper); #ifdef FULL_DEDUP_SUPPORT - int calc_object_sha256(const disk_record_t *p_rec, uint8_t *p_sha256); + int calc_object_blake3(const disk_record_t *p_rec, uint8_t *p_hash); int add_obj_attrs_to_record(rgw_bucket *p_rb, disk_record_t *p_rec, const rgw::sal::Attrs &attrs, diff --git a/src/rgw/rgw_dedup_store.cc b/src/rgw/rgw_dedup_store.cc index 18898bbba952..fd15bbc372d8 100644 --- a/src/rgw/rgw_dedup_store.cc +++ b/src/rgw/rgw_dedup_store.cc @@ -63,7 +63,7 @@ namespace rgw::dedup { this->s.manifest_len = 0; this->s.shared_manifest = 0; - memset(this->s.sha256, 0, sizeof(this->s.sha256)); + memset(this->s.hash, 0, sizeof(this->s.hash)); this->ref_tag = ""; this->manifest_bl.clear(); } @@ -117,8 +117,11 @@ namespace rgw::dedup { } else { this->s.shared_manifest = CEPHTOH_64(p_rec->s.shared_manifest); - for (int i = 0; i < 4; i++) { - this->s.sha256[i] = CEPHTOH_64(p_rec->s.sha256[i]); + // BLAKE3 hash has 256 bit splitted into multiple 64bit units + const unsigned units = (256 / (sizeof(uint64_t)*8)); + static_assert(units == 4); + for (unsigned i = 0; i < units; i++) { + this->s.hash[i] = CEPHTOH_64(p_rec->s.hash[i]); } this->ref_tag = std::string(p, this->s.ref_tag_len); p += p_rec->s.ref_tag_len; @@ -176,8 +179,11 @@ namespace rgw::dedup { } else { p_rec->s.shared_manifest = HTOCEPH_64(this->s.shared_manifest); - for (int i = 0; i < 4; i++) { - p_rec->s.sha256[i] = HTOCEPH_64(this->s.sha256[i]); + // BLAKE3 hash has 256 bit splitted into multiple 64bit units + const unsigned units = (256 / (sizeof(uint64_t)*8)); + static_assert(units == 4); + for (unsigned i = 0; i < units; i++) { + p_rec->s.hash[i] = HTOCEPH_64(this->s.hash[i]); } len = this->ref_tag.length(); std::memcpy(p, this->ref_tag.data(), len); @@ -251,9 +257,12 @@ namespace rgw::dedup { stream << "num_parts = " << rec.s.num_parts << "\n"; stream << "obj_size = " << rec.s.obj_bytes_size/1024 <<" KiB" << "\n"; stream << "MD5 = " << std::hex << rec.s.md5_high << rec.s.md5_low << "\n"; - stream << "SHA256 = "; - for (int i =0; i < 4; i++) { - stream << rec.s.sha256[i]; + stream << "HASH = "; + // BLAKE3 hash has 256 bit splitted into multiple 64bit units + const unsigned units = (256 / (sizeof(uint64_t)*8)); + static_assert(units == 4); + for (unsigned i = 0; i < units; i++) { + stream << rec.s.hash[i]; } stream << "\n"; diff --git a/src/rgw/rgw_dedup_store.h b/src/rgw/rgw_dedup_store.h index d01a5c757ba0..a89abb134206 100644 --- a/src/rgw/rgw_dedup_store.h +++ b/src/rgw/rgw_dedup_store.h @@ -27,6 +27,7 @@ #include "include/rados/buffer.h" #include "include/rados/librados.hpp" #include "rgw_dedup_utils.h" +#include "BLAKE3/c/blake3.h" namespace rgw::dedup { struct key_t; @@ -171,7 +172,7 @@ namespace rgw::dedup { uint8_t pad[6]; uint64_t shared_manifest; // 64bit hash of the SRC object manifest - uint64_t sha256[4]; // 4 * 8 Bytes of SHA256 + uint64_t hash[4]; // 4 * 8 Bytes of BLAKE3 }s; std::string obj_name; // TBD: find pool name making it easier to get ioctx @@ -182,7 +183,7 @@ namespace rgw::dedup { std::string stor_class; bufferlist manifest_bl; }; - + static_assert(BLAKE3_OUT_LEN == sizeof(disk_record_t::packed_rec_t::hash)); std::ostream &operator<<(std::ostream &stream, const disk_record_t & rec); static constexpr unsigned BLOCK_MAGIC = 0xFACE; diff --git a/src/rgw/rgw_dedup_utils.cc b/src/rgw/rgw_dedup_utils.cc index c380bff842c6..baadee5aeef5 100644 --- a/src/rgw/rgw_dedup_utils.cc +++ b/src/rgw/rgw_dedup_utils.cc @@ -432,15 +432,15 @@ namespace rgw::dedup { this->skipped_source_record += other.skipped_source_record; this->duplicate_records += other.duplicate_records; this->size_mismatch += other.size_mismatch; - this->sha256_mismatch += other.sha256_mismatch; + this->hash_mismatch += other.hash_mismatch; this->failed_src_load += other.failed_src_load; this->failed_rec_load += other.failed_rec_load; this->failed_block_load += other.failed_block_load; - this->valid_sha256_attrs += other.valid_sha256_attrs; - this->invalid_sha256_attrs += other.invalid_sha256_attrs; - this->set_sha256_attrs += other.set_sha256_attrs; - this->skip_sha256_cmp += other.skip_sha256_cmp; + this->valid_hash_attrs += other.valid_hash_attrs; + this->invalid_hash_attrs += other.invalid_hash_attrs; + this->set_hash_attrs += other.set_hash_attrs; + this->skip_hash_cmp += other.skip_hash_cmp; this->set_shared_manifest_src += other.set_shared_manifest_src; this->loaded_objects += other.loaded_objects; @@ -514,15 +514,15 @@ namespace rgw::dedup { f->dump_unsigned("Failed Remap Overflow", this->failed_map_overflow); } - f->dump_unsigned("Valid SHA256 attrs", this->valid_sha256_attrs); - f->dump_unsigned("Invalid SHA256 attrs", this->invalid_sha256_attrs); + f->dump_unsigned("Valid HASH attrs", this->valid_hash_attrs); + f->dump_unsigned("Invalid HASH attrs", this->invalid_hash_attrs); - if (this->set_sha256_attrs) { - f->dump_unsigned("Set SHA256", this->set_sha256_attrs); + if (this->set_hash_attrs) { + f->dump_unsigned("Set HASH", this->set_hash_attrs); } - if (this->skip_sha256_cmp) { - f->dump_unsigned("Can't run SHA256 compare", this->skip_sha256_cmp); + if (this->skip_hash_cmp) { + f->dump_unsigned("Can't run HASH compare", this->skip_hash_cmp); } } @@ -582,8 +582,8 @@ namespace rgw::dedup { { Formatter::ObjectSection logical_failures(*f, "logical failures"); - if (this->sha256_mismatch) { - f->dump_unsigned("SHA256 mismatch", this->sha256_mismatch); + if (this->hash_mismatch) { + f->dump_unsigned("HASH mismatch", this->hash_mismatch); } if (this->duplicate_records) { f->dump_unsigned("Duplicate SRC/TGT", this->duplicate_records); @@ -620,15 +620,15 @@ namespace rgw::dedup { encode(m.skipped_source_record, bl); encode(m.duplicate_records, bl); encode(m.size_mismatch, bl); - encode(m.sha256_mismatch, bl); + encode(m.hash_mismatch, bl); encode(m.failed_src_load, bl); encode(m.failed_rec_load, bl); encode(m.failed_block_load, bl); - encode(m.valid_sha256_attrs, bl); - encode(m.invalid_sha256_attrs, bl); - encode(m.set_sha256_attrs, bl); - encode(m.skip_sha256_cmp, bl); + encode(m.valid_hash_attrs, bl); + encode(m.invalid_hash_attrs, bl); + encode(m.set_hash_attrs, bl); + encode(m.skip_hash_cmp, bl); encode(m.set_shared_manifest_src, bl); encode(m.loaded_objects, bl); @@ -670,15 +670,15 @@ namespace rgw::dedup { decode(m.skipped_source_record, bl); decode(m.duplicate_records, bl); decode(m.size_mismatch, bl); - decode(m.sha256_mismatch, bl); + decode(m.hash_mismatch, bl); decode(m.failed_src_load, bl); decode(m.failed_rec_load, bl); decode(m.failed_block_load, bl); - decode(m.valid_sha256_attrs, bl); - decode(m.invalid_sha256_attrs, bl); - decode(m.set_sha256_attrs, bl); - decode(m.skip_sha256_cmp, bl); + decode(m.valid_hash_attrs, bl); + decode(m.invalid_hash_attrs, bl); + decode(m.set_hash_attrs, bl); + decode(m.skip_hash_cmp, bl); decode(m.set_shared_manifest_src, bl); decode(m.loaded_objects, bl); diff --git a/src/rgw/rgw_dedup_utils.h b/src/rgw/rgw_dedup_utils.h index 859724acdf71..f008fcaba38b 100644 --- a/src/rgw/rgw_dedup_utils.h +++ b/src/rgw/rgw_dedup_utils.h @@ -63,7 +63,7 @@ namespace rgw::dedup { std::ostream& operator<<(std::ostream &out, const dedup_req_type_t& dedup_type); struct __attribute__ ((packed)) dedup_flags_t { private: - static constexpr uint8_t RGW_DEDUP_FLAG_SHA256_CALCULATED = 0x01; // REC + static constexpr uint8_t RGW_DEDUP_FLAG_HASH_CALCULATED = 0x01; // REC static constexpr uint8_t RGW_DEDUP_FLAG_SHARED_MANIFEST = 0x02; // REC + TAB static constexpr uint8_t RGW_DEDUP_FLAG_OCCUPIED = 0x04; // TAB static constexpr uint8_t RGW_DEDUP_FLAG_FASTLANE = 0x08; // REC @@ -72,8 +72,8 @@ namespace rgw::dedup { dedup_flags_t() : flags(0) {} dedup_flags_t(uint8_t _flags) : flags(_flags) {} inline void clear() { this->flags = 0; } - inline bool sha256_calculated() const { return ((flags & RGW_DEDUP_FLAG_SHA256_CALCULATED) != 0); } - inline void set_sha256_calculated() { flags |= RGW_DEDUP_FLAG_SHA256_CALCULATED; } + inline bool hash_calculated() const { return ((flags & RGW_DEDUP_FLAG_HASH_CALCULATED) != 0); } + inline void set_hash_calculated() { flags |= RGW_DEDUP_FLAG_HASH_CALCULATED; } inline bool has_shared_manifest() const { return ((flags & RGW_DEDUP_FLAG_SHARED_MANIFEST) != 0); } inline void set_shared_manifest() { flags |= RGW_DEDUP_FLAG_SHARED_MANIFEST; } inline bool is_occupied() const {return ((this->flags & RGW_DEDUP_FLAG_OCCUPIED) != 0); } @@ -158,15 +158,15 @@ namespace rgw::dedup { uint64_t skipped_source_record = 0; uint64_t duplicate_records = 0; uint64_t size_mismatch = 0; - uint64_t sha256_mismatch = 0; + uint64_t hash_mismatch = 0; uint64_t failed_src_load = 0; uint64_t failed_rec_load = 0; uint64_t failed_block_load = 0; - uint64_t valid_sha256_attrs = 0; - uint64_t invalid_sha256_attrs = 0; - uint64_t set_sha256_attrs = 0; - uint64_t skip_sha256_cmp = 0; + uint64_t valid_hash_attrs = 0; + uint64_t invalid_hash_attrs = 0; + uint64_t set_hash_attrs = 0; + uint64_t skip_hash_cmp = 0; uint64_t set_shared_manifest_src = 0; uint64_t loaded_objects = 0; diff --git a/src/test/cli/radosgw-admin/help.t b/src/test/cli/radosgw-admin/help.t index 2146de6ed8a9..06fb72fd07ea 100644 --- a/src/test/cli/radosgw-admin/help.t +++ b/src/test/cli/radosgw-admin/help.t @@ -18,7 +18,7 @@ caps rm remove user capabilities dedup stats Display dedup statistics from the last run dedup estimate Runs dedup in estimate mode (no changes will be made) - dedup restart Restart dedup + dedup restart Restart dedup; must include --yes-i-really-mean-it to activate dedup abort Abort dedup dedup pause Pause dedup dedup resume Resume paused dedup diff --git a/src/test/rgw/dedup/test_dedup.py b/src/test/rgw/dedup/test_dedup.py index d8b3540e2bea..4b7e21fe1644 100644 --- a/src/test/rgw/dedup/test_dedup.py +++ b/src/test/rgw/dedup/test_dedup.py @@ -35,10 +35,10 @@ class Dedup_Stats: skip_src_record: int = 0 skip_changed_object: int = 0 corrupted_etag: int = 0 - sha256_mismatch: int = 0 - valid_sha256: int = 0 - invalid_sha256: int = 0 - set_sha256: int = 0 + hash_mismatch: int = 0 + valid_hash: int = 0 + invalid_hash: int = 0 + set_hash: int = 0 total_processed_objects: int = 0 size_before_dedup: int = 0 #loaded_objects: int = 0 @@ -594,8 +594,8 @@ def calc_expected_stats(dedup_stats, obj_size, num_copies, config): else: dedup_stats.skip_src_record += 1 dedup_stats.set_shared_manifest_src += 1 - dedup_stats.set_sha256 += num_copies - dedup_stats.invalid_sha256 += num_copies + dedup_stats.set_hash += num_copies + dedup_stats.invalid_hash += num_copies dedup_stats.unique_obj += 1 dedup_stats.duplicate_obj += dups_count dedup_stats.deduped_obj += dups_count @@ -939,10 +939,10 @@ def reset_full_dedup_stats(dedup_stats): dedup_stats.skip_singleton_bytes = 0 dedup_stats.skip_changed_object = 0 dedup_stats.corrupted_etag = 0 - dedup_stats.sha256_mismatch = 0 - dedup_stats.valid_sha256 = 0 - dedup_stats.invalid_sha256 = 0 - dedup_stats.set_sha256 = 0 + dedup_stats.hash_mismatch = 0 + dedup_stats.valid_hash = 0 + dedup_stats.invalid_hash = 0 + dedup_stats.set_hash = 0 #------------------------------------------------------------------------------- @@ -964,11 +964,11 @@ def read_full_dedup_stats(dedup_stats, md5_stats): dedup_stats.skip_changed_object = skipped[key] notify=md5_stats['notify'] - dedup_stats.valid_sha256 = notify['Valid SHA256 attrs'] - dedup_stats.invalid_sha256 = notify['Invalid SHA256 attrs'] - key='Set SHA256' + dedup_stats.valid_hash = notify['Valid HASH attrs'] + dedup_stats.invalid_hash = notify['Invalid HASH attrs'] + key='Set HASH' if key in notify: - dedup_stats.set_sha256 = notify[key] + dedup_stats.set_hash = notify[key] sys_failures = md5_stats['system failures'] key='Corrupted ETAG' @@ -976,9 +976,9 @@ def read_full_dedup_stats(dedup_stats, md5_stats): dedup_stats.corrupted_etag = sys_failures[key] log_failures = md5_stats['logical failures'] - key='SHA256 mismatch' + key='HASH mismatch' if key in log_failures: - dedup_stats.sha256_mismatch = log_failures[key] + dedup_stats.hash_mismatch = log_failures[key] #------------------------------------------------------------------------------- @@ -1076,7 +1076,7 @@ def exec_dedup_internal(expected_dedup_stats, dry_run, max_dedup_time): result = admin(['dedup', 'estimate']) reset_full_dedup_stats(expected_dedup_stats) else: - result = admin(['dedup', 'restart', '--yes-i-really-mean-it', '--tech-preview']) + result = admin(['dedup', 'restart', '--yes-i-really-mean-it']) assert result[1] == 0 log.debug("wait for dedup to complete") @@ -1121,7 +1121,7 @@ def exec_dedup(expected_dedup_stats, dry_run, verify_stats=True): log.debug("potential_unique_obj= %d / %d ", dedup_stats.potential_unique_obj, expected_dedup_stats.potential_unique_obj) - #dedup_stats.set_sha256 = dedup_stats.invalid_sha256 + #dedup_stats.set_hash = dedup_stats.invalid_hash if dedup_stats != expected_dedup_stats: log.debug("==================================================") print_dedup_stats_diff(dedup_stats, expected_dedup_stats) @@ -1308,7 +1308,7 @@ def check_full_dedup_state(): global full_dedup_state_was_checked global full_dedup_state_disabled log.debug("check_full_dedup_state:: sending FULL Dedup request") - result = admin(['dedup', 'restart', '--yes-i-really-mean-it', '--tech-preview']) + result = admin(['dedup', 'restart', '--yes-i-really-mean-it']) if result[1] == 0: log.info("full dedup is enabled!") full_dedup_state_disabled = False @@ -1455,9 +1455,9 @@ def test_dedup_etag_corruption(): dedup_ratio_actual=ret[3] if corruption == "no corruption": - expected_dedup_stats.valid_sha256=1 - expected_dedup_stats.invalid_sha256=0 - expected_dedup_stats.set_sha256=0 + expected_dedup_stats.valid_hash=1 + expected_dedup_stats.invalid_hash=0 + expected_dedup_stats.set_hash=0 s3_bytes_before=expected_dedup_stats.size_before_dedup expected_ratio_actual=Dedup_Ratio() @@ -1527,10 +1527,10 @@ def test_md5_collisions(): dedup_stats.size_before_dedup=2*BLOCK_SIZE # the md5 collision confuses the estimate dedup_stats.dedup_bytes_estimate=BLOCK_SIZE - # SHA256 check will expose the problem - dedup_stats.invalid_sha256=dedup_stats.total_processed_objects - dedup_stats.set_sha256=dedup_stats.total_processed_objects - dedup_stats.sha256_mismatch=1 + # HASH check will expose the problem + dedup_stats.invalid_hash=dedup_stats.total_processed_objects + dedup_stats.set_hash=dedup_stats.total_processed_objects + dedup_stats.hash_mismatch=1 s3_bytes_before=dedup_stats.size_before_dedup expected_ratio_actual=Dedup_Ratio() expected_ratio_actual.s3_bytes_before=s3_bytes_before @@ -1544,9 +1544,9 @@ def test_md5_collisions(): assert expected_ratio_actual == dedup_ratio_actual - dedup_stats.valid_sha256=dedup_stats.total_processed_objects - dedup_stats.invalid_sha256=0 - dedup_stats.set_sha256=0 + dedup_stats.valid_hash=dedup_stats.total_processed_objects + dedup_stats.invalid_hash=0 + dedup_stats.set_hash=0 log.debug("test_md5_collisions: second call to exec_dedup") ret=exec_dedup(dedup_stats, dry_run) @@ -1657,9 +1657,9 @@ def test_dedup_inc_0_with_tenants(): dedup_stats2.set_shared_manifest_src=0 dedup_stats2.deduped_obj=0 dedup_stats2.deduped_obj_bytes=0 - dedup_stats2.valid_sha256=dedup_stats.invalid_sha256 - dedup_stats2.invalid_sha256=0 - dedup_stats2.set_sha256=0 + dedup_stats2.valid_hash=dedup_stats.invalid_hash + dedup_stats2.invalid_hash=0 + dedup_stats2.set_hash=0 log.debug("test_dedup_inc_0_with_tenants: incremental dedup:") # run dedup again and make sure nothing has changed @@ -1706,9 +1706,9 @@ def test_dedup_inc_0(): dedup_stats2.set_shared_manifest_src=0 dedup_stats2.deduped_obj=0 dedup_stats2.deduped_obj_bytes=0 - dedup_stats2.valid_sha256=dedup_stats.invalid_sha256 - dedup_stats2.invalid_sha256=0 - dedup_stats2.set_sha256=0 + dedup_stats2.valid_hash=dedup_stats.invalid_hash + dedup_stats2.invalid_hash=0 + dedup_stats2.set_hash=0 log.debug("test_dedup_inc_0: incremental dedup:") # run dedup again and make sure nothing has changed @@ -1775,9 +1775,9 @@ def test_dedup_inc_1_with_tenants(): stats_combined.deduped_obj -= stats_base.deduped_obj stats_combined.deduped_obj_bytes -= stats_base.deduped_obj_bytes - stats_combined.valid_sha256 = stats_base.set_sha256 - stats_combined.invalid_sha256 -= stats_base.set_sha256 - stats_combined.set_sha256 -= stats_base.set_sha256 + stats_combined.valid_hash = stats_base.set_hash + stats_combined.invalid_hash -= stats_base.set_hash + stats_combined.set_hash -= stats_base.set_hash log.debug("test_dedup_inc_1_with_tenants: incremental dedup:") # run dedup again @@ -1840,9 +1840,9 @@ def test_dedup_inc_1(): stats_combined.deduped_obj -= stats_base.deduped_obj stats_combined.deduped_obj_bytes -= stats_base.deduped_obj_bytes - stats_combined.valid_sha256 = stats_base.set_sha256 - stats_combined.invalid_sha256 -= stats_base.set_sha256 - stats_combined.set_sha256 -= stats_base.set_sha256 + stats_combined.valid_hash = stats_base.set_hash + stats_combined.invalid_hash -= stats_base.set_hash + stats_combined.set_hash -= stats_base.set_hash log.debug("test_dedup_inc_1: incremental dedup:") # run dedup again @@ -1918,9 +1918,9 @@ def test_dedup_inc_2_with_tenants(): stats_combined.deduped_obj -= stats_base.deduped_obj stats_combined.deduped_obj_bytes -= stats_base.deduped_obj_bytes - stats_combined.valid_sha256 = stats_base.set_sha256 - stats_combined.invalid_sha256 -= stats_base.set_sha256 - stats_combined.set_sha256 -= stats_base.set_sha256 + stats_combined.valid_hash = stats_base.set_hash + stats_combined.invalid_hash -= stats_base.set_hash + stats_combined.set_hash -= stats_base.set_hash log.debug("test_dedup_inc_2_with_tenants: incremental dedup:") # run dedup again @@ -1991,9 +1991,9 @@ def test_dedup_inc_2(): stats_combined.deduped_obj -= stats_base.deduped_obj stats_combined.deduped_obj_bytes -= stats_base.deduped_obj_bytes - stats_combined.valid_sha256 = stats_base.set_sha256 - stats_combined.invalid_sha256 -= stats_base.set_sha256 - stats_combined.set_sha256 -= stats_base.set_sha256 + stats_combined.valid_hash = stats_base.set_hash + stats_combined.invalid_hash -= stats_base.set_hash + stats_combined.set_hash -= stats_base.set_hash log.debug("test_dedup_inc_2: incremental dedup:") # run dedup again @@ -2039,7 +2039,7 @@ def test_dedup_inc_with_remove_multi_tenants(): # REMOVE some objects and update stats/expected src_record=0 shared_manifest=0 - valid_sha=0 + valid_hash=0 object_keys=[] files_sub=[] dedup_stats = Dedup_Stats() @@ -2052,7 +2052,7 @@ def test_dedup_inc_with_remove_multi_tenants(): log.debug("objects::%s::size=%d, num_copies=%d", filename, obj_size, num_copies_2); if num_copies_2: if num_copies_2 > 1 and obj_size > RADOS_OBJ_SIZE: - valid_sha += num_copies_2 + valid_hash += num_copies_2 src_record += 1 shared_manifest += (num_copies_2 - 1) @@ -2076,9 +2076,9 @@ def test_dedup_inc_with_remove_multi_tenants(): dedup_stats.deduped_obj_bytes=0 dedup_stats.skip_src_record=src_record dedup_stats.skip_shared_manifest=shared_manifest - dedup_stats.valid_sha256=valid_sha - dedup_stats.invalid_sha256=0 - dedup_stats.set_sha256=0 + dedup_stats.valid_hash=valid_hash + dedup_stats.invalid_hash=0 + dedup_stats.set_hash=0 log.debug("test_dedup_inc_with_remove: incremental dedup:") dry_run=False @@ -2119,7 +2119,7 @@ def test_dedup_inc_with_remove(): # REMOVE some objects and update stats/expected src_record=0 shared_manifest=0 - valid_sha=0 + valid_hash=0 object_keys=[] files_sub=[] dedup_stats = Dedup_Stats() @@ -2132,7 +2132,7 @@ def test_dedup_inc_with_remove(): log.debug("objects::%s::size=%d, num_copies=%d", filename, obj_size, num_copies_2); if num_copies_2: if num_copies_2 > 1 and obj_size > RADOS_OBJ_SIZE: - valid_sha += num_copies_2 + valid_hash += num_copies_2 src_record += 1 shared_manifest += (num_copies_2 - 1) @@ -2163,9 +2163,9 @@ def test_dedup_inc_with_remove(): dedup_stats.deduped_obj_bytes=0 dedup_stats.skip_src_record=src_record dedup_stats.skip_shared_manifest=shared_manifest - dedup_stats.valid_sha256=valid_sha - dedup_stats.invalid_sha256=0 - dedup_stats.set_sha256=0 + dedup_stats.valid_hash=valid_hash + dedup_stats.invalid_hash=0 + dedup_stats.set_hash=0 log.debug("test_dedup_inc_with_remove: incremental dedup:") log.debug("stats_base.size_before_dedup=%d", stats_base.size_before_dedup) @@ -2322,7 +2322,7 @@ def test_dedup_small_multipart(): #------------------------------------------------------------------------------- @pytest.mark.basic_test def test_dedup_large_scale_with_tenants(): - return + #return if full_dedup_is_disabled(): return @@ -2342,7 +2342,7 @@ def test_dedup_large_scale_with_tenants(): #------------------------------------------------------------------------------- @pytest.mark.basic_test def test_dedup_large_scale(): - return + #return if full_dedup_is_disabled(): return @@ -2362,7 +2362,7 @@ def test_dedup_large_scale(): #------------------------------------------------------------------------------- @pytest.mark.basic_test def test_empty_bucket(): - return + #return if full_dedup_is_disabled(): return @@ -2426,9 +2426,9 @@ def inc_step_with_tenants(stats_base, files, conns, bucket_names, config): stats_combined.deduped_obj -= stats_base.deduped_obj stats_combined.deduped_obj_bytes -= stats_base.deduped_obj_bytes - stats_combined.valid_sha256 = stats_base.set_sha256 - stats_combined.invalid_sha256 -= stats_base.set_sha256 - stats_combined.set_sha256 -= stats_base.set_sha256 + stats_combined.valid_hash = stats_base.set_hash + stats_combined.invalid_hash -= stats_base.set_hash + stats_combined.set_hash -= stats_base.set_hash log.debug("test_dedup_inc_2_with_tenants: incremental dedup:") # run dedup again @@ -2470,10 +2470,10 @@ def test_dedup_inc_loop_with_tenants(): files=ret[0] stats_last=ret[1] stats_base.set_shared_manifest_src += stats_last.set_shared_manifest_src - stats_base.dup_head_size += stats_last.dup_head_size - stats_base.deduped_obj += stats_last.deduped_obj - stats_base.deduped_obj_bytes += stats_last.deduped_obj_bytes - stats_base.set_sha256 += stats_last.set_sha256 + stats_base.dup_head_size += stats_last.dup_head_size + stats_base.deduped_obj += stats_last.deduped_obj + stats_base.deduped_obj_bytes += stats_last.deduped_obj_bytes + stats_base.set_hash += stats_last.set_hash finally: # cleanup must be executed even after a failure cleanup_all_buckets(bucket_names, conns)