Previously, split-head was restricted to objects whose entire data fit in the head (≤4 MiB).
It also migrates the split-head manifest representation from the legacy explicit-objs format to the prefix+index rules-based format.
Refactored should_split_head():
Now performs a larger set of eligibility checks:
* d_split_head flag is set
* single-part object only
* non-empty head
* not a legacy manifest
* not an Alibaba Cloud OSS AppendObject
Explicit skips for unsupported manifest types:
— old-style explicit-objs manifests
— OSS AppendObject manifests (detected via non-empty override_prefix)
New config option: rgw_dedup_split_obj_head:
Default is true (split-head enabled).
Setting to false disables split-head entirely.
Tail object lookup via manifest iterator:
Replaces the old get_tail_ioctx() which manually constructed the tail OID via generate_split_head_tail_name().
The new function simply calls manifest.obj_begin() and resolves the first tail object location through the standard manifest iterator.
Stats cleanup:
Removed the "Potential Dedup" stats section (small_objs_stat, dup_head_bytes, dup_head_bytes_estimate, ingress_skip_too_small_64KB*)
which tracked 64KB–4MB objects as potential-but-skipped candidates.
Since split-head now covers all sizes, this distinction is no longer meaningful. calc_deduped_bytes() is simplified accordingly.
Signed-off-by: benhanokh <gbenhano@redhat.com>
- Copy the manifest from the source to the target.
- Remove all tail objects on the target.
-
Split Head Mode
===============
-Dedup code can split the head object into 2 objects
+The dedup code can split a head object into 2 objects:
-- one with attributes and no data and
+- one with attributes and no data, and
- a new tail object with only data.
-The new tail object will be deduped, unlike the head objects, which cannot
+The new tail object will be deduped, unlike head objects, which cannot
be deduplicated.
-This feature is only enabled for RGW objects without existing tail objects
-(in other words, objects sized 4 MB or less).
+
+:confval:`rgw_dedup_split_obj_head` (default: true). Setting
+this option to ``false`` disables split-head entirely.
+
+.. confval:: rgw_dedup_split_obj_head
Memory Usage
default: false
services:
- rgw
+- name: rgw_dedup_split_obj_head
+ type: bool
+ level: advanced
+ desc: Enables the split-head functionality
+ long_desc: Dedup code can split head object into two objects -
+ one with attributes and no data and a new tail-object with only data.
+ The new-tail object will be deduped (unlike the head objects which
+ can't be deduplicated)
+ default: true
+ services:
+ - rgw
+ with_legacy: true
- name: rgw_dedup_min_obj_size_for_dedup
type: size
level: advanced
{
d_head_object_size = cct->_conf->rgw_max_chunk_size;
d_min_obj_size_for_dedup = cct->_conf->rgw_dedup_min_obj_size_for_dedup;
-
- // limit split head to objects without tail
- d_max_obj_size_for_split = d_head_object_size;
+ d_split_head = cct->_conf->rgw_dedup_split_obj_head;
ldpp_dout(dpp, 10) << "Config Vals::d_head_object_size=" << d_head_object_size
<< "::d_min_obj_size_for_dedup=" << d_min_obj_size_for_dedup
- << "::d_max_obj_size_for_split=" << d_max_obj_size_for_split
+ << "::d_split_head=" << d_split_head
<< dendl;
int ret = init_rados_access_handles(false);
//------------------------------------------------------------------------------
uint64_t Background::__calc_deduped_bytes(uint16_t num_parts, uint64_t size_bytes)
{
- return calc_deduped_bytes(d_head_object_size,
- d_min_obj_size_for_dedup,
- d_max_obj_size_for_split,
- num_parts,
- size_bytes);
+ return calc_deduped_bytes(d_head_object_size, d_min_obj_size_for_dedup,
+ d_split_head, num_parts, size_bytes);
}
//---------------------------------------------------------------------------
<< p_rec->s.md5_low << std::dec << dendl;
int ret = p_table->add_entry(&key, block_id, rec_id, has_shared_manifest,
- &p_stats->small_objs_stat, &p_stats->big_objs_stat,
- &p_stats->dup_head_bytes_estimate);
+ &p_stats->big_objs_stat);
if (ret == 0) {
p_stats->loaded_objects ++;
ldpp_dout(dpp, 20) << __func__ << "::" << p_rec->bucket_name << "/"
}
//---------------------------------------------------------------------------
- static int get_ioctx_internal(const DoutPrefixProvider* const dpp,
- rgw::sal::Driver* driver,
- rgw::sal::RadosStore* store,
- const std::string &obj_name,
- const std::string &instance,
- const rgw_bucket &rb,
- librados::IoCtx *p_ioctx /*OUT*/,
- std::string *p_oid /*OUT*/)
+ static inline int get_ioctx(const DoutPrefixProvider* const dpp,
+ rgw::sal::Driver* driver,
+ rgw::sal::RadosStore* store,
+ const disk_record_t *p_rec,
+ librados::IoCtx *p_ioctx /*OUT*/,
+ std::string *p_oid /*OUT*/)
{
+ rgw_bucket rb{p_rec->tenant_name, p_rec->bucket_name, p_rec->bucket_id};
unique_ptr<rgw::sal::Bucket> bucket;
{
int ret = driver->load_bucket(dpp, rb, &bucket, null_yield);
}
string dummy_locator;
- const rgw_obj_index_key key(obj_name, instance);
+ const rgw_obj_index_key key(p_rec->obj_name, p_rec->instance);
rgw_obj obj(bucket->get_key(), key);
get_obj_bucket_and_oid_loc(obj, *p_oid, dummy_locator);
RGWBucketInfo& bucket_info = bucket->get_info();
return store->get_obj_head_ioctx(dpp, bucket_info, obj, p_ioctx);
- }
- //---------------------------------------------------------------------------
- static inline int get_ioctx(const DoutPrefixProvider* const dpp,
- rgw::sal::Driver* driver,
- rgw::sal::RadosStore* store,
- const disk_record_t *p_rec,
- librados::IoCtx *p_ioctx /*OUT*/,
- std::string *p_oid /*OUT*/)
- {
- rgw_bucket b{p_rec->tenant_name, p_rec->bucket_name, p_rec->bucket_id};
- return get_ioctx_internal(dpp, driver, store, p_rec->obj_name, p_rec->instance,
- b, p_ioctx, p_oid);
- }
-
- //---------------------------------------------------------------------------
- static inline std::string generate_split_head_tail_name(const RGWObjManifest &manifest)
- {
- static constexpr std::string_view shadow_string(RGW_OBJ_NS_SHADOW);
- std::string_view suffix = "0";
- const std::string &prefix = manifest.get_prefix();
-
- std::string tail_name;
- tail_name.reserve(shadow_string.size() + prefix.size() + suffix.size() + 1);
- // TBD:
- // it is unclear when RGW code pads with "_" before the shadow string
- // It won't change correctness, but might look weird
- //tail_name.append("_");
- tail_name.append(shadow_string);
- tail_name.append("_");
- tail_name.append(prefix);
- tail_name.append(suffix);
- return tail_name;
}
//---------------------------------------------------------------------------
}
//---------------------------------------------------------------------------
- int Background::get_tail_ioctx(const disk_record_t *p_rec,
- const RGWObjManifest &manifest,
- const std::string &tail_name,
- md5_stats_t *p_stats /*IN-OUT*/,
- librados::IoCtx *p_ioctx /*OUT*/,
- std::string *p_oid /*OUT*/)
+ static int get_first_tail_obj_params(const DoutPrefixProvider *dpp,
+ RGWRados *rados,
+ const RGWObjManifest &manifest,
+ librados::IoCtx *p_tail_ioctx, /*OUT*/
+ std::string *p_tail_oid /*OUT*/)
{
- const rgw_bucket_placement &tail_placement = manifest.get_tail_placement();
- // Tail placement_rule was fixed before committed to SLAB, if looks bad -> abort
- if (unlikely(invalid_tail_placement(tail_placement))) {
- p_stats->split_head_no_tail_placement++;
- ldpp_dout(dpp, 1) << __func__ << "::invalid_tail_placement -> abort" << dendl;
- return -EINVAL;
- }
-
- const rgw_bucket& bucket = tail_placement.bucket;
- // tail objects might be on another storage_class/pool, need another ioctx
- int ret = get_ioctx_internal(dpp, driver, store, tail_name, p_rec->instance,
- bucket, p_ioctx, p_oid);
- if (unlikely(ret != 0)) {
- ldpp_dout(dpp, 1) << __func__ << "::ERR: failed get_ioctx_internal()" << dendl;
+ auto p = manifest.obj_begin(dpp);
+ const rgw_obj_select& os = p.get_location();
+ rgw_raw_obj raw_obj = os.get_raw_obj(rados);
+ rgw_rados_ref obj;
+ int ret = rgw_get_rados_ref(dpp, rados->get_rados_handle(), raw_obj, &obj);
+ if (ret < 0) {
+ ldpp_dout(dpp, 1) << __func__ << "::failed rgw_get_rados_ref() for oid="
+ << raw_obj.oid << ", err is " << cpp_strerror(-ret) << dendl;
return ret;
}
+ *p_tail_ioctx = obj.ioctx;
+ *p_tail_oid = raw_obj.oid;
+
return 0;
}
//---------------------------------------------------------------------------
void Background::remove_created_tail_object(const disk_record_t *p_rec,
const RGWObjManifest &manifest,
- const std::string &tail_name,
md5_stats_t *p_stats /*IN-OUT*/)
{
librados::IoCtx tail_ioctx;
std::string tail_oid;
- int ret = get_tail_ioctx(p_rec, manifest, tail_name, p_stats, &tail_ioctx,
- &tail_oid);
+ int ret = get_first_tail_obj_params(dpp, rados, manifest, &tail_ioctx, &tail_oid);
if (unlikely(ret != 0)) {
return;
}
}
//---------------------------------------------------------------------------
- inline bool Background::should_split_head(uint64_t head_size, uint64_t obj_size)
+ inline bool Background::should_split_head(const RGWObjManifest& manifest)
{
- // Don't split RGW objects with existing tail-objects
- return (head_size > 0 && head_size == obj_size);
+ // Split-head is only applicable for single-part objects with a non-empty head.
+ // To avoid issues with manifests created via append (specifically for Alibaba Cloud OSS),
+ // we should disable split-head whenever the manifest contains an override_prefix in the rules.
+ // We also reject manifests with multiple rules which are exclusively an
+ // artifact of the OSS AppendObject operation.
+ // The head_size should either represent the full object or be equal to the stripe_max_size.
+
+ bool success = false;
+ uint64_t head_size = manifest.get_head_size();
+ uint64_t obj_size = manifest.get_obj_size();
+ RGWObjManifestRule rule;
+ if (manifest.get_rule(0, &rule)) {
+ success = (d_split_head && // split-head was not disabled from yaml
+ rule.part_size == 0 && // not a multi-part object
+ head_size > 0 && // non-empty head
+ !manifest.has_explicit_objs() && // not an explicit manifest
+ rule.override_prefix.empty() && // not Alibaba Cloud OSS
+ manifest.get_rules().size() == 1 && // not Alibaba Cloud OSS
+ (head_size == rule.stripe_max_size || head_size == obj_size));
+
+ if (unlikely(!success)) {
+ ldpp_dout(dpp, 20) << __func__ << "::ERR::d_split_head=" << d_split_head
+ << "::obj_size=" << obj_size
+ << "::head_size=" << head_size
+ << "::rule.part_size=" << rule.part_size
+ << "::rule.stripe_max_size=" << rule.stripe_max_size
+ << "::rule.override_prefix=" << rule.override_prefix
+ << "::rule.override_prefix.empty()=" << rule.override_prefix.empty()
+ << dendl;
+ }
+ } // don't split head if can't get rule
+
+ return success;
}
//---------------------------------------------------------------------------
cls_refcount_get(op, ref_tag, true);
d_ctl.metadata_access_throttle.acquire();
ldpp_dout(dpp, 20) << __func__ << "::inc ref-count on tail object: "
- << raw_obj.oid << "::" << obj.obj.oid << dendl;
+ << obj.obj.oid << "::" << raw_obj.to_str() << dendl;
rgw::AioResultList completed = aio->get(obj.obj,
rgw::Aio::librados_op(obj.ioctx, std::move(op), null_yield),
cost, id);
ret = rgw::check_for_errors(completed);
all_results.splice(all_results.end(), completed);
if (ret < 0) {
- ldpp_dout(dpp, 1) << __func__ << "::ERROR: failed to copy obj=" << obj
+ ldpp_dout(dpp, 1) << __func__ << "::ERR: failed refcount_get() obj=" << obj
<< ", ret=" << ret << " err is " << cpp_strerror(-ret) << dendl;
break;
}
const RGWObjManifest &src_manifest,
const RGWObjManifest &tgt_manifest,
md5_stats_t *p_stats,
- const std::string &tail_name,
const dedup_table_t::value_t *p_src_val)
{
const uint64_t src_head_size = src_manifest.get_head_size();
if (unlikely(ret != 0)) {
ldpp_dout(dpp, 1) << __func__ << "::ERR: failed TGT get_ioctx()" << dendl;
if (p_src_rec->s.flags.is_split_head()) {
- remove_created_tail_object(p_src_rec, src_manifest, tail_name, p_stats);
+ remove_created_tail_object(p_src_rec, src_manifest, p_stats);
}
return ret;
}
ldpp_dout(dpp, 5) << __func__ << "::abort! src_head_size=" << src_head_size
<< "::tgt_head_size=" << tgt_head_size << dendl;
if (p_src_rec->s.flags.is_split_head()) {
- remove_created_tail_object(p_src_rec, src_manifest, tail_name, p_stats);
+ remove_created_tail_object(p_src_rec, src_manifest, p_stats);
}
// TBD: can we create a test case (requires control over head-object-size)??
return -ECANCELED;
ret = inc_ref_count_by_manifest(ref_tag, src_oid, src_manifest);
if (unlikely(ret != 0)) {
if (p_src_rec->s.flags.is_split_head()) {
- remove_created_tail_object(p_src_rec, src_manifest, tail_name, p_stats);
+ remove_created_tail_object(p_src_rec, src_manifest, p_stats);
}
return ret;
}
<< src_oid << "), err is " << cpp_strerror(-ret)<<dendl;
rollback_ref_by_manifest(ref_tag, src_oid, src_manifest);
if (p_src_rec->s.flags.is_split_head()) {
- remove_created_tail_object(p_src_rec, src_manifest, tail_name, p_stats);
+ remove_created_tail_object(p_src_rec, src_manifest, p_stats);
}
return ret;
}
<< "::ERROR: unable to decode manifest" << dendl;
return -EINVAL;
}
- need_to_split_head = should_split_head(manifest.get_head_size(),
- p_rec->s.obj_bytes_size);
+
+ if (unlikely(manifest.has_explicit_objs())) {
+ // we don't support dedup of explicit_objs manifest
+ p_stats->ingress_skip_explicit_objs++;
+ ldpp_dout(dpp, 20) << __func__ << "::explicit_objs can't be dedup" << dendl;
+ return -ENOTSUP;
+ }
+
+ RGWObjManifestRule rule;
+ if (!manifest.get_rule(0, &rule) ||
+ // if not a multi-part must have exactly 1 rule
+ (rule.part_size == 0 && manifest.get_rules().size() != 1) ||
+ !rule.override_prefix.empty()) {
+ // we don't support dedup of Alibaba Cloud OSS using AppendObject API
+ p_stats->ingress_skip_alibaba++;
+ ldpp_dout(dpp, 10) << __func__ << "::Alibaba Cloud OSS can't be dedup"
+ << "::rules.size()=" << manifest.get_rules().size()
+ << "::get_rule ret=" << manifest.get_rule(0, &rule)
+ << "::override_prefix=" << rule.override_prefix << dendl;
+ return -ENOTSUP;
+ }
+
+ need_to_split_head = should_split_head(manifest);
// force explicit tail_placement as the dedup could be on another bucket
const rgw_bucket_placement& tail_placement = manifest.get_tail_placement();
p_rec->s.flags.clear();
ret = add_obj_attrs_to_record(p_rec, attrs, p_stats);
if (unlikely(ret != 0)) {
+ // don't trace errors for unsupported manifest
+ if (ret == -ENOTSUP) {
+ return 0;
+ }
+
ldpp_dout(dpp, 5) << __func__ << "::ERR: failed add_obj_attrs_to_record() ret="
<< ret << "::" << cpp_strerror(-ret) << dendl;
return ret;
}
//---------------------------------------------------------------------------
- static void build_and_set_explicit_manifest(const DoutPrefixProvider *dpp,
- const rgw_bucket *p_bucket,
- const std::string &tail_name,
- RGWObjManifest *p_manifest)
+ static int set_manifest_for_split_head(const DoutPrefixProvider *const dpp,
+ RGWObjManifest *p_manifest /*IN-OUT*/)
{
- uint64_t obj_size = p_manifest->get_obj_size();
- ceph_assert(obj_size == p_manifest->get_head_size());
-
- const rgw_obj &head_obj = p_manifest->get_obj();
- const rgw_obj_key &head_key = head_obj.key;
- rgw_obj_key tail_key(tail_name, head_key.instance, head_key.ns);
- rgw_obj tail_obj(*p_bucket, tail_key);
-
- RGWObjManifestPart tail_part;
- tail_part.loc = tail_obj;
- tail_part.loc_ofs = 0;
- tail_part.size = obj_size;
-
- std::map<uint64_t, RGWObjManifestPart> objs_map;
- objs_map[0] = tail_part;
-
+ // Set head-size to zero in the manifest with start_ofs set to zero
+ // This means no data is stored in the head-object and the first tail-object
+ // holds the first data byte
+ uint64_t tail_ofs = 0;
p_manifest->set_head_size(0);
p_manifest->set_max_head_size(0);
- p_manifest->set_prefix("");
- p_manifest->clear_rules();
- p_manifest->set_explicit(obj_size, objs_map);
+ RGWObjManifestRule rule;
+ if (p_manifest->get_rule(0, &rule)) {
+ ldpp_dout(dpp, 20) << "OLD Rule::start_part_num="<< rule.start_part_num
+ << "::start_ofs=" << rule.start_ofs
+ << "::part_size=" << rule.part_size
+ << "::stripe_max_size=" << rule.stripe_max_size
+ << "::override_prefix=" << rule.override_prefix << dendl;
+ p_manifest->clear_rules();
+ p_manifest->set_trivial_rule(tail_ofs, rule.stripe_max_size);
+ return 0;
+ }
+ else {
+ // No Rules, probably explicit_objs - should never happen (was checked before)
+ return -ENOENT;
+ }
}
//---------------------------------------------------------------------------
int Background::split_head_object(disk_record_t *p_src_rec, // IN-OUT PARAM
RGWObjManifest &src_manifest, // IN/OUT PARAM
const disk_record_t *p_tgt_rec,
- std::string *p_tail_name /*OUT*/,
md5_stats_t *p_stats /* IN-OUT */)
{
ldpp_dout(dpp, 20) << __func__ << "::" << p_src_rec->obj_name << "::"
}
}
- *p_tail_name = generate_split_head_tail_name(src_manifest);
+ ret = set_manifest_for_split_head(dpp, &src_manifest);
+ if (unlikely(ret != 0)) {
+ return ret;
+ }
+
librados::IoCtx tail_ioctx;
std::string tail_oid;
- ret = get_tail_ioctx(p_src_rec, src_manifest, *p_tail_name, p_stats,
- &tail_ioctx, &tail_oid);
+ ret = get_first_tail_obj_params(dpp, rados, src_manifest, &tail_ioctx, &tail_oid);
if (unlikely(ret != 0)) {
return ret;
}
ldpp_dout(dpp, 20) << __func__ << "::wrote tail obj:" << tail_oid << "::ret="
<< ret << dendl;
}
- const rgw_bucket *p_bucket = &(src_manifest.get_tail_placement().bucket);
- build_and_set_explicit_manifest(dpp, p_bucket, *p_tail_name, &src_manifest);
bufferlist manifest_bl;
encode(src_manifest, manifest_bl);
RGWObjManifest &src_manifest,
const RGWObjManifest &tgt_manifest,
const dedup_table_t::value_t *p_src_val,
- std::string *p_tail_name /*OUT*/,
md5_stats_t *p_stats)
{
int ret = 0;
// we might still need to split-head here when hash is valid
// can happen if we failed compare before (md5-collison) and stored the src hash
// in the obj-attributes
- uint64_t head_size = src_manifest.get_head_size();
- if (should_split_head(head_size, src_manifest.get_obj_size())) {
- ret = split_head_object(p_src_rec, src_manifest, p_tgt_rec, p_tail_name, p_stats);
+ if (should_split_head(src_manifest)) {
+ ret = split_head_object(p_src_rec, src_manifest, p_tgt_rec, p_stats);
// compare_strong_hash() is called internally by split_head_object()
return (ret == 0);
}
if (unlikely(has_shared_tail_objects(dpp, rados, p_src_rec, p_tgt_rec, src_manifest, tgt_manifest, p_stats))) {
return 0;
}
- std::string tail_name;
+
bool success = check_and_set_strong_hash(p_src_rec, p_tgt_rec, src_manifest,
- tgt_manifest, &src_val, &tail_name, p_stats);
+ tgt_manifest, &src_val, p_stats);
if (unlikely(!success)) {
if (p_src_rec->s.flags.hash_calculated() && !src_val.has_valid_hash()) {
// set hash attributes on head objects to save calc next time
}
ret = dedup_object(p_src_rec, p_tgt_rec, src_manifest, tgt_manifest, p_stats,
- tail_name, &src_val);
+ &src_val);
if (ret == 0) {
ldpp_dout(dpp, 20) << __func__ << "::dedup success " << p_src_rec->obj_name << dendl;
p_stats->deduped_objects++;
<< ondisk_byte_size << dendl;
p_stats->split_head_dedup_bytes += ondisk_byte_size;
}
- else if (p_tgt_rec->s.num_parts == 0 &&
- // if we don't split head it will be duplicated
- p_tgt_rec->s.obj_bytes_size > d_head_object_size) {
- // single part objects duplicate the head object when dedup is used
- p_stats->dup_head_bytes += d_head_object_size;
- }
// mark the SRC object as a providor of a shared manifest
if (!src_val.has_shared_manifest()) {
// ceph store full blocks so need to round up and multiply by block_size
uint64_t ondisk_byte_size = calc_on_disk_byte_size(entry.meta.size);
- // count all objects including too small and non default storage_class objs
p_worker_stats->ingress_obj++;
p_worker_stats->ingress_obj_bytes += ondisk_byte_size;
if (ondisk_byte_size < d_min_obj_size_for_dedup) {
if (parsed_etag.num_parts == 0) {
- // dedup only useful for objects bigger than 4MB
+ // dedup is only applied to objects larger than the configured minimum size
+ // `rgw_dedup_min_obj_size_for_dedup`
p_worker_stats->ingress_skip_too_small++;
p_worker_stats->ingress_skip_too_small_bytes += ondisk_byte_size;
-
- if (ondisk_byte_size >= 64*1024) {
- p_worker_stats->ingress_skip_too_small_64KB++;
- p_worker_stats->ingress_skip_too_small_64KB_bytes += ondisk_byte_size;
- }
- else {
- return 0;
- }
+ return 0;
}
else {
// multipart objects are always good candidates for dedup
<< "::total_count=" << obj_count_in_shard
<< "::loaded_objects=" << p_stats->loaded_objects
<< p_stats->big_objs_stat << dendl;
- ldpp_dout(dpp, 10) << __func__ << "::small objs::"
- << p_stats->small_objs_stat << dendl;
}
//---------------------------------------------------------------------------
return -ECANCELED;
}
}
- p_table->count_duplicates(&p_stats->small_objs_stat, &p_stats->big_objs_stat);
+ p_table->count_duplicates(&p_stats->big_objs_stat);
display_table_stat_counters(dpp, p_stats);
ldpp_dout(dpp, 10) << __func__ << "::MD5 Loop::" << d_ctl.dedup_type << dendl;
md5_stats_t md5_stats;
//DEDUP_DYN_ALLOC
dedup_table_t table(dpp, d_head_object_size, d_min_obj_size_for_dedup,
- d_max_obj_size_for_split, raw_mem, raw_mem_size);
+ d_split_head, raw_mem, raw_mem_size);
int ret = objects_dedup_single_md5_shard(&table, md5_shard, &md5_stats, num_work_shards);
if (ret == 0) {
md5_stats.duration = ceph_clock_now() - start_time;
};
inline uint64_t __calc_deduped_bytes(uint16_t num_parts, uint64_t size_bytes);
- inline bool should_split_head(uint64_t head_size, uint64_t obj_size);
- int get_tail_ioctx(const disk_record_t *p_rec,
- const RGWObjManifest &manifest,
- const std::string &tail_name,
- md5_stats_t *p_stats /*IN-OUT*/,
- librados::IoCtx *p_ioctx /*OUT*/,
- std::string *p_oid /*OUT*/);
+ inline bool should_split_head(const RGWObjManifest &manifest);
void remove_created_tail_object(const disk_record_t *p_rec,
const RGWObjManifest &manifest,
- const std::string &tail_name,
md5_stats_t *p_stats /*IN-OUT*/);
void run();
int setup(struct dedup_epoch_t*);
int split_head_object(disk_record_t *p_src_rec, // IN/OUT PARAM
RGWObjManifest &src_manifest, // IN/OUT PARAM
const disk_record_t *p_tgt_rec,
- std::string *p_tail_name /*OUT*/,
md5_stats_t *p_stats /* IN-OUT */);
int add_obj_attrs_to_record(disk_record_t *p_rec,
RGWObjManifest &src_manifest,
const RGWObjManifest &tgt_manifest,
const dedup_table_t::value_t *p_src_val,
- std::string *p_tail_name /*OUT*/,
md5_stats_t *p_stats /* IN-OUT */);
int try_deduping_record(dedup_table_t *p_table,
disk_record_t *p_rec,
const RGWObjManifest &src_manifest,
const RGWObjManifest &tgt_manifest,
md5_stats_t *p_stats,
- const std::string &tail_name,
const dedup_table_t::value_t *p_src_val);
#endif
int remove_slabs(unsigned worker_id, unsigned md5_shard, uint32_t slab_count);
uint64_t d_all_buckets_obj_size = 0;
uint32_t d_min_obj_size_for_dedup = (64ULL * 1024);
- uint32_t d_max_obj_size_for_split = (16ULL * 1024 * 1024);
+ bool d_split_head = true;
uint32_t d_head_object_size = (4ULL * 1024 * 1024);
control_t d_ctl;
uint64_t d_watch_handle = 0;
Formatter::ObjectSection section{*fmt, "dedup_ratio_estimate"};
fmt->dump_unsigned("s3_bytes_before", s3_bytes_before);
fmt->dump_unsigned("s3_bytes_after", s3_bytes_after);
- fmt->dump_unsigned("dup_head_bytes", md5_stats_sum.dup_head_bytes_estimate);
if (s3_bytes_before > s3_bytes_after && s3_bytes_after) {
double dedup_ratio = (double)s3_bytes_before/s3_bytes_after;
Formatter::ObjectSection section{*fmt, "dedup_ratio_actual"};
fmt->dump_unsigned("s3_bytes_before", s3_bytes_before);
fmt->dump_unsigned("s3_bytes_after", s3_bytes_after);
- fmt->dump_unsigned("dup_head_bytes", md5_stats_sum.dup_head_bytes);
if (s3_bytes_before > s3_bytes_after && s3_bytes_after) {
double dedup_ratio = (double)s3_bytes_before/s3_bytes_after;
fmt->dump_float("dedup_ratio", dedup_ratio);
dedup_table_t::dedup_table_t(const DoutPrefixProvider* _dpp,
uint32_t _head_object_size,
uint32_t _min_obj_size_for_dedup,
- uint32_t _max_obj_size_for_split,
+ bool _split_head,
uint8_t *p_slab,
uint64_t slab_size)
{
dpp = _dpp;
head_object_size = _head_object_size;
min_obj_size_for_dedup = _min_obj_size_for_dedup;
- max_obj_size_for_split = _max_obj_size_for_split;
+ split_head = _split_head;
memset(p_slab, 0, slab_size);
hash_tab = (table_entry_t*)p_slab;
entries_count = slab_size/sizeof(table_entry_t);
}
//---------------------------------------------------------------------------
+ // find_entry() assumes that entries are not removed during operation
+ // remove_entry() is only called from remove_singletons_and_redistribute_keys()
+ // doing a linear pass over the array.
uint32_t dedup_table_t::find_entry(const key_t *p_key) const
{
uint32_t idx = p_key->hash() % entries_count;
//---------------------------------------------------------------------------
void dedup_table_t::inc_counters(const key_t *p_key,
- dedup_stats_t *p_small_objs,
- dedup_stats_t *p_big_objs,
- uint64_t *p_duplicate_head_bytes)
+ dedup_stats_t *p_dedup_stats)
{
// This is an approximation only since size is stored in 4KB resolution
uint64_t byte_size_approx = disk_blocks_to_byte_size(p_key->size_4k_units);
- // skip small single part objects which we can't dedup
- if (!dedupable_object(p_key->multipart_object(), min_obj_size_for_dedup, byte_size_approx)) {
- p_small_objs->duplicate_count ++;
- p_small_objs->dedup_bytes_estimate += byte_size_approx;
- return;
- }
- else {
- uint64_t dup_bytes_approx = calc_deduped_bytes(head_object_size,
- min_obj_size_for_dedup,
- max_obj_size_for_split,
- p_key->num_parts,
- byte_size_approx);
- p_big_objs->duplicate_count ++;
- p_big_objs->dedup_bytes_estimate += dup_bytes_approx;
-
- // object smaller than max_obj_size_for_split will split their head
- // and won't dup it
- if (!p_key->multipart_object() && byte_size_approx > max_obj_size_for_split) {
- // single part objects duplicate the head object when dedup is used
- *p_duplicate_head_bytes += head_object_size;
- }
+ uint64_t dup_bytes_approx = calc_deduped_bytes(head_object_size,
+ min_obj_size_for_dedup,
+ split_head,
+ p_key->num_parts,
+ byte_size_approx);
+ if (dup_bytes_approx) {
+ p_dedup_stats->duplicate_count ++;
+ p_dedup_stats->dedup_bytes_estimate += dup_bytes_approx;
}
}
disk_block_id_t block_id,
record_id_t rec_id,
bool shared_manifest,
- dedup_stats_t *p_small_objs,
- dedup_stats_t *p_big_objs,
- uint64_t *p_duplicate_head_bytes)
+ dedup_stats_t *p_dedup_stats)
{
value_t new_val(block_id, rec_id, shared_manifest);
uint32_t idx = find_entry(p_key);
else {
ceph_assert(hash_tab[idx].key == *p_key);
if (val.count <= MAX_COPIES_PER_OBJ) {
- inc_counters(p_key, p_small_objs, p_big_objs, p_duplicate_head_bytes);
+ inc_counters(p_key, p_dedup_stats);
}
if (val.count < std::numeric_limits<std::uint16_t>::max()) {
val.count ++;
}
//---------------------------------------------------------------------------
- void dedup_table_t::count_duplicates(dedup_stats_t *p_small_objs,
- dedup_stats_t *p_big_objs)
+ void dedup_table_t::count_duplicates(dedup_stats_t *p_dedup_stats)
{
for (uint32_t tab_idx = 0; tab_idx < entries_count; tab_idx++) {
if (!hash_tab[tab_idx].val.is_occupied()) {
continue;
}
- const key_t &key = hash_tab[tab_idx].key;
- // This is an approximation only since size is stored in 4KB resolution
- uint64_t byte_size_approx = disk_blocks_to_byte_size(key.size_4k_units);
-
- // skip small single part objects which we can't dedup
- if (!dedupable_object(key.multipart_object(), min_obj_size_for_dedup, byte_size_approx)) {
- if (hash_tab[tab_idx].val.is_singleton()) {
- p_small_objs->singleton_count++;
- }
- else {
- p_small_objs->unique_count ++;
- }
+ if (hash_tab[tab_idx].val.is_singleton()) {
+ p_dedup_stats->singleton_count++;
}
else {
- if (hash_tab[tab_idx].val.is_singleton()) {
- p_big_objs->singleton_count++;
- }
- else {
- ceph_assert(hash_tab[tab_idx].val.count > 1);
- p_big_objs->unique_count ++;
- }
+ ceph_assert(hash_tab[tab_idx].val.count > 1);
+ p_dedup_stats->unique_count ++;
}
}
}
dedup_table_t(const DoutPrefixProvider* _dpp,
uint32_t _head_object_size,
uint32_t _min_obj_size_for_dedup,
- uint32_t _max_obj_size_for_split,
+ bool _split_head,
uint8_t *p_slab,
uint64_t slab_size);
int add_entry(key_t *p_key,
disk_block_id_t block_id,
record_id_t rec_id,
bool shared_manifest,
- dedup_stats_t *p_small_objs_stat,
- dedup_stats_t *p_big_objs_stat,
- uint64_t *p_duplicate_head_bytes);
+ dedup_stats_t *p_dedup_stats);
void update_entry(key_t *p_key, disk_block_id_t block_id, record_id_t rec_id,
bool shared_manifest);
bool set_shared_manifest_src,
bool set_has_valid_hash_src);
- void count_duplicates(dedup_stats_t *p_small_objs_stat,
- dedup_stats_t *p_big_objs_stat);
-
+ void count_duplicates(dedup_stats_t *p_dedup_stats);
void remove_singletons_and_redistribute_keys();
private:
// 32 Bytes unified entries
uint32_t find_entry(const key_t *p_key) const;
void inc_counters(const key_t *p_key,
- dedup_stats_t *p_small_objs,
- dedup_stats_t *p_big_objs,
- uint64_t *p_duplicate_head_bytes);
+ dedup_stats_t *p_dedup_stats);
uint32_t entries_count = 0;
uint32_t occupied_count = 0;
uint32_t head_object_size;
uint32_t min_obj_size_for_dedup;
- uint32_t max_obj_size_for_split;
+ bool split_head;
table_entry_t *hash_tab = nullptr;
// stat counters
this->ingress_corrupted_etag += other.ingress_corrupted_etag;
this->ingress_skip_too_small_bytes += other.ingress_skip_too_small_bytes;
this->ingress_skip_too_small += other.ingress_skip_too_small;
- this->ingress_skip_too_small_64KB_bytes += other.ingress_skip_too_small_64KB_bytes;
- this->ingress_skip_too_small_64KB += other.ingress_skip_too_small_64KB;
return *this;
}
this->ingress_skip_too_small);
f->dump_unsigned("Ingress skip: too small bytes",
this->ingress_skip_too_small_bytes);
-
- if(this->ingress_skip_too_small_64KB) {
- f->dump_unsigned("Ingress skip: 64KB<=size<=4MB Obj",
- this->ingress_skip_too_small_64KB);
- f->dump_unsigned("Ingress skip: 64KB<=size<=4MB Bytes",
- this->ingress_skip_too_small_64KB_bytes);
- }
}
}
encode(w.ingress_skip_too_small_bytes, bl);
encode(w.ingress_skip_too_small, bl);
- encode(w.ingress_skip_too_small_64KB_bytes, bl);
- encode(w.ingress_skip_too_small_64KB, bl);
-
encode(w.duration, bl);
ENCODE_FINISH(bl);
}
decode(w.ingress_corrupted_etag, bl);
decode(w.ingress_skip_too_small_bytes, bl);
decode(w.ingress_skip_too_small, bl);
- decode(w.ingress_skip_too_small_64KB_bytes, bl);
- decode(w.ingress_skip_too_small_64KB, bl);
decode(w.duration, bl);
DECODE_FINISH(bl);
//---------------------------------------------------------------------------
md5_stats_t& md5_stats_t::operator+=(const md5_stats_t& other)
{
- this->small_objs_stat += other.small_objs_stat;
this->big_objs_stat += other.big_objs_stat;
this->ingress_slabs += other.ingress_slabs;
this->ingress_failed_load_bucket += other.ingress_failed_load_bucket;
this->ingress_skip_compressed += other.ingress_skip_compressed;
this->ingress_skip_compressed_bytes += other.ingress_skip_compressed_bytes;
this->ingress_skip_changed_objs += other.ingress_skip_changed_objs;
+ this->ingress_skip_explicit_objs += other.ingress_skip_explicit_objs;
+ this->ingress_skip_alibaba += other.ingress_skip_alibaba;
this->shared_manifest_dedup_bytes += other.shared_manifest_dedup_bytes;
this->skipped_shared_manifest += other.skipped_shared_manifest;
this->set_shared_manifest_src += other.set_shared_manifest_src;
this->loaded_objects += other.loaded_objects;
this->processed_objects += other.processed_objects;
- this->dup_head_bytes_estimate += other.dup_head_bytes_estimate;
this->deduped_objects += other.deduped_objects;
this->deduped_objects_bytes += other.deduped_objects_bytes;
- this->dup_head_bytes += other.dup_head_bytes;
this->failed_dedup += other.failed_dedup;
this->md_throttle_sleep_events += other.md_throttle_sleep_events;
f->dump_unsigned("Set Shared-Manifest SRC", this->set_shared_manifest_src);
f->dump_unsigned("Deduped Obj (this cycle)", this->deduped_objects);
f->dump_unsigned("Deduped Bytes(this cycle)", this->deduped_objects_bytes);
- f->dump_unsigned("Dup head bytes (not dedup)", this->dup_head_bytes);
f->dump_unsigned("Already Deduped bytes (prev cycles)",
this->shared_manifest_dedup_bytes);
f->dump_unsigned("Dedup Bytes Estimate", ds.dedup_bytes_estimate);
}
- // Potential Dedup Section:
- // What could be gained by allowing dedup for smaller objects (64KB-4MB)
- // Space wasted because of duplicated head-object (4MB)
- {
- Formatter::ObjectSection potential(*f, "Potential Dedup");
- const dedup_stats_t &ds = this->small_objs_stat;
- f->dump_unsigned("Singleton Obj (64KB-4MB)", ds.singleton_count);
- f->dump_unsigned("Unique Obj (64KB-4MB)", ds.unique_count);
- f->dump_unsigned("Duplicate Obj (64KB-4MB)", ds.duplicate_count);
- f->dump_unsigned("Dedup Bytes Estimate (64KB-4MB)", ds.dedup_bytes_estimate);
- f->dump_unsigned("Duplicated Head Bytes Estimate",
- this->dup_head_bytes_estimate);
- f->dump_unsigned("Duplicated Head Bytes", this->dup_head_bytes);
- }
-
{
Formatter::ObjectSection notify(*f, "notify");
if (this->md_throttle_sleep_events) {
if (this->ingress_skip_changed_objs) {
f->dump_unsigned("Skipped Changed Object", this->ingress_skip_changed_objs);
}
+ if (this->ingress_skip_explicit_objs) {
+ f->dump_unsigned("Skipped Explicit Objs", this->ingress_skip_explicit_objs);
+ }
+ if (this->ingress_skip_alibaba) {
+ f->dump_unsigned("Skipped Alibaba Cloud OSS", this->ingress_skip_alibaba);
+ }
}
{
{
ENCODE_START(1, 1, bl);
- encode(m.small_objs_stat, bl);
encode(m.big_objs_stat, bl);
encode(m.ingress_slabs, bl);
encode(m.ingress_failed_load_bucket, bl);
encode(m.ingress_skip_compressed, bl);
encode(m.ingress_skip_compressed_bytes, bl);
encode(m.ingress_skip_changed_objs, bl);
+ encode(m.ingress_skip_explicit_objs, bl);
+ encode(m.ingress_skip_alibaba, bl);
encode(m.shared_manifest_dedup_bytes, bl);
encode(m.skipped_shared_manifest, bl);
encode(m.loaded_objects, bl);
encode(m.processed_objects, bl);
- encode(m.dup_head_bytes_estimate, bl);
encode(m.deduped_objects, bl);
encode(m.deduped_objects_bytes, bl);
- encode(m.dup_head_bytes, bl);
encode(m.failed_dedup, bl);
encode(m.md_throttle_sleep_events, bl);
encode(m.md_throttle_sleep_time_usec, bl);
void decode(md5_stats_t& m, ceph::bufferlist::const_iterator& bl)
{
DECODE_START(1, bl);
- decode(m.small_objs_stat, bl);
decode(m.big_objs_stat, bl);
decode(m.ingress_slabs, bl);
decode(m.ingress_failed_load_bucket, bl);
decode(m.ingress_skip_compressed, bl);
decode(m.ingress_skip_compressed_bytes, bl);
decode(m.ingress_skip_changed_objs, bl);
+ decode(m.ingress_skip_explicit_objs, bl);
+ decode(m.ingress_skip_alibaba, bl);
decode(m.shared_manifest_dedup_bytes, bl);
decode(m.skipped_shared_manifest, bl);
decode(m.loaded_objects, bl);
decode(m.processed_objects, bl);
- decode(m.dup_head_bytes_estimate, bl);
decode(m.deduped_objects, bl);
decode(m.deduped_objects_bytes, bl);
- decode(m.dup_head_bytes, bl);
decode(m.failed_dedup, bl);
decode(m.md_throttle_sleep_events, bl);
decode(m.md_throttle_sleep_time_usec, bl);
uint64_t ingress_skip_too_small_bytes = 0;
uint64_t ingress_skip_too_small = 0;
- uint64_t ingress_skip_too_small_64KB_bytes = 0;
- uint64_t ingress_skip_too_small_64KB = 0;
-
utime_t duration = {0, 0};
};
std::ostream& operator<<(std::ostream &out, const worker_stats_t &s);
md5_stats_t& operator +=(const md5_stats_t& other);
void dump(Formatter *f) const;
- dedup_stats_t small_objs_stat;
dedup_stats_t big_objs_stat;
uint64_t ingress_slabs = 0;
uint64_t ingress_failed_load_bucket = 0;
uint64_t ingress_skip_compressed = 0;
uint64_t ingress_skip_compressed_bytes = 0;
uint64_t ingress_skip_changed_objs = 0;
+ uint64_t ingress_skip_explicit_objs = 0;
+ uint64_t ingress_skip_alibaba = 0;
uint64_t shared_manifest_dedup_bytes = 0;
uint64_t skipped_shared_manifest = 0;
uint64_t loaded_objects = 0;
uint64_t processed_objects = 0;
// counter is using on-disk size affected by block-size
- uint64_t dup_head_bytes_estimate = 0; //duplicate_head_bytes
uint64_t deduped_objects = 0;
// counter is using s3 byte size disregarding the on-disk size affected by block-size
uint64_t deduped_objects_bytes = 0;
- uint64_t dup_head_bytes = 0;
uint64_t failed_dedup = 0;
uint64_t md_throttle_sleep_events = 0;
uint64_t md_throttle_sleep_time_usec = 0;
//---------------------------------------------------------------------------
static inline uint64_t calc_deduped_bytes(uint32_t head_obj_size,
uint32_t min_obj_size_for_dedup,
- uint32_t max_obj_size_for_split,
+ bool split_head,
uint16_t num_parts,
uint64_t size_bytes)
{
// multipart objects with an empty head i.e. we achive full dedup
return size_bytes;
}
+ else if (size_bytes < min_obj_size_for_dedup) {
+ return 0;
+ }
+ else if (split_head) {
+ // Head is splitted into an empty obj and a new tail enabling a full dedup
+ return size_bytes;
+ }
else {
- // reduce the head size
- if (size_bytes > max_obj_size_for_split) {
- return size_bytes - head_obj_size;
- }
- else if (size_bytes >= min_obj_size_for_dedup) {
- // Head is splitted into an empty obj and a new tail enabling a full dedup
- return size_bytes;
- }
- else {
- return 0;
- }
+ // reduce the head size which is not dedup
+ return size_bytes - std::min(size_bytes, (uint64_t)head_obj_size);
}
}
return 0;
}
-bool RGWObjManifest::get_rule(uint64_t ofs, RGWObjManifestRule *rule)
+bool RGWObjManifest::get_rule(uint64_t ofs, RGWObjManifestRule *rule) const
{
if (rules.empty()) {
return false;
}
- map<uint64_t, RGWObjManifestRule>::iterator iter = rules.upper_bound(ofs);
+ map<uint64_t, RGWObjManifestRule>::const_iterator iter = rules.upper_bound(ofs);
if (iter != rules.begin()) {
--iter;
}
}
void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs,
- std::string *override_prefix, rgw_obj_select *location) const;
+ const std::string *override_prefix, rgw_obj_select *location) const;
+
+ const std::map<uint64_t, RGWObjManifestRule>& get_rules() const {
+ return rules;
+ }
void clear_rules() {
rules.clear();
int append(const DoutPrefixProvider *dpp, RGWObjManifest& m, const RGWZoneGroup& zonegroup,
const RGWZoneParams& zone_params);
- bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
+ bool get_rule(uint64_t ofs, RGWObjManifestRule *rule) const;
bool empty() const {
if (explicit_objs)
}
void RGWObjManifest::get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe,
- uint64_t ofs, string *override_prefix, rgw_obj_select *location) const
+ uint64_t ofs, const string *override_prefix, rgw_obj_select *location) const
{
rgw_obj loc;
duplicate_obj : int = 0
deduped_obj_bytes : int = 0
non_default_storage_class_objs_bytes : int = 0
- potential_singleton_obj : int = 0
- potential_unique_obj : int = 0
- potential_duplicate_obj : int = 0
- potential_dedup_space : int = 0
@dataclass
class Dedup_Ratio:
OUT_DIR="/tmp/dedup/"
KB=(1024)
MB=(1024*KB)
-POTENTIAL_OBJ_SIZE=(64*KB)
DEDUP_MIN_OBJ_SIZE=(64*KB)
-SPLIT_HEAD_SIZE=(4*MB)
+SPLIT_HEAD=True
RADOS_OBJ_SIZE=(4*MB)
# The default multipart threshold size for S3cmd is 15 MB.
MULTIPART_SIZE=(15*MB)
def calc_dedupable_space(obj_size, config):
on_disk_byte_size = calc_on_disk_byte_size(obj_size)
- threshold = config.multipart_threshold
# Objects with size bigger than MULTIPART_SIZE are uploaded as multi-part
# multi-part objects got a zero size Head objects
- if obj_size >= threshold:
+ if obj_size >= config.multipart_threshold:
dedupable_space = on_disk_byte_size
- elif obj_size > SPLIT_HEAD_SIZE:
- dedupable_space = on_disk_byte_size - RADOS_OBJ_SIZE
- elif obj_size >= DEDUP_MIN_OBJ_SIZE:
+ elif obj_size < DEDUP_MIN_OBJ_SIZE:
+ dedupable_space = 0
+ elif SPLIT_HEAD:
dedupable_space = on_disk_byte_size
else:
- dedupable_space = 0
+ dedupable_space = (on_disk_byte_size - min(on_disk_byte_size, RADOS_OBJ_SIZE))
log.debug("obj_size=%.2f MiB, dedupable_space=%.2f MiB",
float(obj_size)/MB, float(dedupable_space)/MB)
threshold = config.multipart_threshold
on_disk_byte_size = calc_on_disk_byte_size(obj_size)
- if num_copies < 2 or on_disk_byte_size > SPLIT_HEAD_SIZE or obj_size >= threshold:
+ if num_copies < 2 or not SPLIT_HEAD or obj_size >= threshold:
return 0
if on_disk_byte_size < DEDUP_MIN_OBJ_SIZE:
if on_disk_byte_size < DEDUP_MIN_OBJ_SIZE and threshold > DEDUP_MIN_OBJ_SIZE:
dedup_stats.skip_too_small += num_copies
dedup_stats.skip_too_small_bytes += (on_disk_byte_size * num_copies)
-
- if on_disk_byte_size >= POTENTIAL_OBJ_SIZE:
- if num_copies == 1:
- dedup_stats.potential_singleton_obj += 1
- else:
- dedup_stats.potential_unique_obj += 1
- dedup_stats.potential_duplicate_obj += dups_count
- dedup_stats.potential_dedup_space += (on_disk_byte_size * dups_count)
-
return
dedup_stats.total_processed_objects += num_copies
dedup_stats.duplicate_obj = main['Duplicate Obj']
dedup_stats.dedup_bytes_estimate = main['Dedup Bytes Estimate']
- potential = md5_stats['Potential Dedup']
- dedup_stats.potential_singleton_obj = potential['Singleton Obj (64KB-4MB)']
- dedup_stats.potential_unique_obj = potential['Unique Obj (64KB-4MB)']
- dedup_stats.potential_duplicate_obj = potential['Duplicate Obj (64KB-4MB)']
- dedup_stats.potential_dedup_space = potential['Dedup Bytes Estimate (64KB-4MB)']
-
dedup_work_was_completed=jstats['completed']
if dedup_work_was_completed:
dedup_ratio_estimate=read_dedup_ratio(jstats, 'dedup_ratio_estimate')
if verify_stats == False:
return ret
- if dedup_stats.potential_unique_obj or expected_dedup_stats.potential_unique_obj:
- log.debug("potential_unique_obj= %d / %d ", dedup_stats.potential_unique_obj,
- expected_dedup_stats.potential_unique_obj)
-
-
#dedup_stats.set_hash = dedup_stats.invalid_hash
if dedup_stats != expected_dedup_stats:
log.debug("==================================================")
os.mkdir(OUT_DIR)
-#-------------------------------------------------------------------------------
-def copy_potential_stats(new_dedup_stats, dedup_stats):
- new_dedup_stats.potential_singleton_obj = dedup_stats.potential_singleton_obj
- new_dedup_stats.potential_unique_obj = dedup_stats.potential_unique_obj
- new_dedup_stats.potential_duplicate_obj = dedup_stats.potential_duplicate_obj
- new_dedup_stats.potential_dedup_space = dedup_stats.potential_dedup_space
-
-
#-------------------------------------------------------------------------------
def small_single_part_objs_dedup(conn, bucket_name, dry_run):
# 1) generate small random files and store them on disk
# expected stats for small objects - all zeros except for skip_too_small
small_objs_dedup_stats = Dedup_Stats()
- #small_objs_dedup_stats.loaded_objects=dedup_stats.loaded_objects
- copy_potential_stats(small_objs_dedup_stats, dedup_stats)
small_objs_dedup_stats.size_before_dedup = dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small_bytes=dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small = s3_objects_total
min_size=1*KB
max_size=MULTIPART_SIZE*2
success=False
+ # Declare the variable with a type hint
+ conn: BaseClient
try:
conn=get_single_connection()
conn.create_bucket(Bucket=bucket_name)
# expected stats for small objects - all zeros except for skip_too_small
small_objs_dedup_stats = Dedup_Stats()
- #small_objs_dedup_stats.loaded_objects=dedup_stats.loaded_objects
- copy_potential_stats(small_objs_dedup_stats, dedup_stats)
small_objs_dedup_stats.size_before_dedup=dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small_bytes=dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small=s3_objects_total
# expected stats for small objects - all zeros except for skip_too_small
small_objs_dedup_stats = Dedup_Stats()
- copy_potential_stats(small_objs_dedup_stats, dedup_stats)
small_objs_dedup_stats.size_before_dedup=dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small_bytes=dedup_stats.size_before_dedup
small_objs_dedup_stats.skip_too_small=s3_objects_total
force_clean=True
log.info("test_dedup_identical_copies_multipart:full test")
__test_dedup_identical_copies(files, config, dry_run, verify, force_clean)
+