From ae4da729282fad6f554dac3091f3d0ccb92f6eba Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Wed, 11 Jun 2025 16:24:12 +0100 Subject: [PATCH] osd: Remove all references to hinfo from optimized EC Legacy EC used hinfo to store two things: 1. Shard size 2. CRCs of the shards However: * Optimized EC stores different object sizes on each shard * Optimized EC scrub calculates the correct sizes of shards and checks them, so shard size checks are not needed in hinfo. * Bluestore checks the CRC. * Seastore checks the CRC. As such, the hinfo object is redundant. As such we remove it in optimized EC: 1. Remove all references/upgrades to hinfo. 2. Delete hinfo attribute if found on recovery/backfill. 3. Redirect all scrub references for hinfo to legacy EC. Signed-off-by: Alex Ainscow --- src/crimson/osd/scrub/scrub_validator.cc | 13 +- src/erasure-code/consistency/ECEncoder.cc | 2 +- src/osd/ECBackend.cc | 167 ++---------------- src/osd/ECBackend.h | 12 +- src/osd/ECCommon.cc | 66 +------ src/osd/ECCommon.h | 26 --- src/osd/ECTransaction.cc | 50 +----- src/osd/ECTransaction.h | 9 +- src/osd/ECUtil.cc | 100 +---------- src/osd/ECUtil.h | 59 +------ src/osd/scrubber/scrub_backend.cc | 9 +- src/test/osd/TestECBackend.cc | 2 +- src/test/osd/test_ec_transaction.cc | 22 --- src/tools/CMakeLists.txt | 2 +- src/tools/ceph-dencoder/osd_types.h | 4 +- src/tools/ceph_objectstore_tool.cc | 6 +- .../erasure-code/ceph-erasure-code-tool.cc | 2 +- src/tools/rados/rados.cc | 6 +- 18 files changed, 52 insertions(+), 505 deletions(-) diff --git a/src/crimson/osd/scrub/scrub_validator.cc b/src/crimson/osd/scrub/scrub_validator.cc index 2274a2df14df8..ec8e72928add8 100644 --- a/src/crimson/osd/scrub/scrub_validator.cc +++ b/src/crimson/osd/scrub/scrub_validator.cc @@ -8,6 +8,7 @@ #include "crimson/common/log.h" #include "crimson/osd/scrub/scrub_validator.h" #include "osd/ECUtil.h" +#include "osd/ECUtilL.h" SET_SUBSYS(osd); @@ -31,7 +32,7 @@ struct shard_evaluation_t { std::optional object_info; std::optional snapset; - std::optional hinfo; + std::optional hinfo; size_t omap_keys{0}; size_t omap_bytes{0}; @@ -132,11 +133,11 @@ shard_evaluation_t evaluate_object_shard( } if (policy.is_ec()) { - auto xiter = obj.attrs.find(ECUtil::get_hinfo_key()); + auto xiter = obj.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); if (xiter == obj.attrs.end()) { ret.shard_info.set_hinfo_missing(); } else { - ret.hinfo = ECUtil::HashInfo{}; + ret.hinfo = ECLegacy::ECUtilL::HashInfo{}; try { auto bliter = xiter->second.cbegin(); decode(*(ret.hinfo), bliter); @@ -210,10 +211,10 @@ librados::obj_err_t compare_candidate_to_authoritative( } if (policy.is_ec()) { - auto aiter = auth_si.attrs.find(ECUtil::get_hinfo_key()); + auto aiter = auth_si.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); ceph_assert(aiter != auth_si.attrs.end()); - auto citer = cand_si.attrs.find(ECUtil::get_hinfo_key()); + auto citer = cand_si.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); if (citer == cand_si.attrs.end() || !aiter->second.contents_equal(citer->second)) { ret.errors |= obj_err_t::HINFO_INCONSISTENCY; @@ -226,7 +227,7 @@ librados::obj_err_t compare_candidate_to_authoritative( auto is_sys_attr = [&policy](const auto &str) { return str == OI_ATTR || str == SS_ATTR || - (policy.is_ec() && str == ECUtil::get_hinfo_key()); + (policy.is_ec() && str == ECLegacy::ECUtilL::get_hinfo_key()); }; for (auto aiter = auth_si.attrs.begin(); aiter != auth_si.attrs.end(); ++aiter) { if (is_sys_attr(aiter->first)) continue; diff --git a/src/erasure-code/consistency/ECEncoder.cc b/src/erasure-code/consistency/ECEncoder.cc index 1b737a9f2a2c0..504d6d80e2e1c 100644 --- a/src/erasure-code/consistency/ECEncoder.cc +++ b/src/erasure-code/consistency/ECEncoder.cc @@ -106,7 +106,7 @@ std::optional ECEncoder::do_encode(ceph::buff sinfo.ro_range_to_shard_extent_map(0, inbl.length(), inbl, encoded_data); encoded_data.insert_parity_buffers(); - int r = encoded_data.encode(ec_impl, nullptr, encoded_data.get_ro_end()); + int r = encoded_data.encode(ec_impl); if (r < 0) { std::cerr << "Failed to encode: " << cpp_strerror(r) << std::endl; return {}; diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 6dfb10cacca01..ce6fe25246ec6 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -90,10 +90,9 @@ ECBackend::ECBackend( rmw_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(), *this, ec_extent_cache_lru), recovery_backend(cct, switcher->coll, ec_impl, this->sinfo, read_pipeline, - unstable_hashinfo_registry, get_parent(), this), + get_parent(), this), ec_impl(ec_impl), - sinfo(ec_impl, &(get_parent()->get_pool()), stripe_width), - unstable_hashinfo_registry(cct, ec_impl) { + sinfo(ec_impl, &(get_parent()->get_pool()), stripe_width) { /* EC makes some assumptions about how the plugin organises the *data* shards: * - The chunk size is constant for a particular profile. @@ -113,7 +112,6 @@ ECBackend::RecoveryBackend::RecoveryBackend( ceph::ErasureCodeInterfaceRef ec_impl, const ECUtil::stripe_info_t &sinfo, ReadPipeline &read_pipeline, - UnstableHashInfoRegistry &unstable_hashinfo_registry, ECListener *parent, ECBackend *ecbackend) : cct(cct), @@ -121,7 +119,6 @@ ECBackend::RecoveryBackend::RecoveryBackend( ec_impl(std::move(ec_impl)), sinfo(sinfo), read_pipeline(read_pipeline), - unstable_hashinfo_registry(unstable_hashinfo_registry), parent(parent), ecbackend(ecbackend) {} @@ -335,17 +332,6 @@ void ECBackend::RecoveryBackend::handle_recovery_read_complete( op.recovery_info.size = op.obc->obs.oi.size; op.recovery_info.oi = op.obc->obs.oi; } - - if (sinfo.get_is_hinfo_required()) { - ECUtil::HashInfo hinfo(sinfo.get_k_plus_m()); - if (op.obc->obs.oi.size > 0) { - ceph_assert(op.xattrs.count(ECUtil::get_hinfo_key())); - auto bp = op.xattrs[ECUtil::get_hinfo_key()].cbegin(); - decode(hinfo, bp); - } - op.hinfo = unstable_hashinfo_registry.maybe_put_hash_info( - hoid, std::move(hinfo)); - } } ceph_assert(op.xattrs.size()); ceph_assert(op.obc); @@ -561,29 +547,6 @@ void ECBackend::RecoveryBackend::continue_recovery_op( if (op.recovery_progress.first && op.obc) { op.xattrs = op.obc->attr_cache; - if (sinfo.get_is_hinfo_required()) { - if (auto [r, attrs, size] = ecbackend->get_attrs_n_size_from_disk( - op.hoid); - r >= 0 || r == -ENOENT) { - op.hinfo = unstable_hashinfo_registry.get_hash_info( - op.hoid, false, attrs, size); - } else { - derr << __func__ << ": can't stat-or-getattr on " << op.hoid << - dendl; - } - if (!op.hinfo) { - derr << __func__ << ": " << op.hoid << " has inconsistent hinfo" - << dendl; - ceph_assert(recovery_ops.count(op.hoid)); - eversion_t v = recovery_ops[op.hoid].v; - recovery_ops.erase(op.hoid); - // TODO: not in crimson yet - get_parent()->on_failed_pull({get_parent()->whoami_shard()}, - op.hoid, v); - return; - } - encode(*(op.hinfo), op.xattrs[ECUtil::get_hinfo_key()]); - } } read_request_t read_request(std::move(want), @@ -675,6 +638,12 @@ void ECBackend::RecoveryBackend::continue_recovery_op( dout(10) << __func__ << ": push all attrs (not nonprimary)" << dendl; pop.attrset = op.xattrs; } + + // Following an upgrade, or turning of overwrites, we can take this + // opportunity to clean up hinfo. + if (pop.attrset.contains(ECUtil::get_hinfo_key())) { + pop.attrset.erase(ECUtil::get_hinfo_key()); + } } pop.recovery_info = op.recovery_info; pop.before_progress = op.recovery_progress; @@ -1100,53 +1069,6 @@ void ECBackend::handle_sub_read( << bl.length() << dendl; reply->buffers_read[hoid].push_back(make_pair(offset, bl)); } - - if (!sinfo.supports_ec_overwrites()) { - // This shows that we still need deep scrub because large enough files - // are read in sections, so the digest check here won't be done here. - // Do NOT check osd_read_eio_on_bad_digest here. We need to report - // the state of our chunk in case other chunks could substitute. - ECUtil::HashInfoRef hinfo; - map> attrs; - struct stat st; - int r = object_stat(hoid, &st); - if (r >= 0) { - dout(10) << __func__ << ": found on disk, size " << st.st_size << dendl; - r = switcher->objects_get_attrs_with_hinfo(hoid, &attrs); - } - if (r >= 0) { - hinfo = unstable_hashinfo_registry.get_hash_info( - hoid, false, attrs, st.st_size); - } else { - derr << __func__ << ": access (attrs) on " << hoid << " failed: " - << cpp_strerror(r) << dendl; - } - if (!hinfo) { - r = -EIO; - get_parent()->clog_error() << "Corruption detected: object " - << hoid << " is missing hash_info"; - dout(5) << __func__ << ": No hinfo for " << hoid << dendl; - goto error; - } - ceph_assert(hinfo->has_chunk_hash()); - if ((bl.length() == hinfo->get_total_chunk_size()) && - (offset == 0)) { - dout(20) << __func__ << ": Checking hash of " << hoid << dendl; - bufferhash h(-1); - h << bl; - if (h.digest() != hinfo->get_chunk_hash(shard)) { - get_parent()->clog_error() << "Bad hash for " << hoid << - " digest 0x" - << hex << h.digest() << " expected 0x" << hinfo-> - get_chunk_hash(shard) << dec; - dout(5) << __func__ << ": Bad hash for " << hoid << " digest 0x" - << hex << h.digest() << " expected 0x" - << hinfo->get_chunk_hash(shard) << dec << dendl; - r = -EIO; - goto error; - } - } - } } continue; error: @@ -1229,9 +1151,8 @@ void ECBackend::handle_sub_read_reply( ++i) { if (ECInject::test_read_error0( ghobject_t(i->first, ghobject_t::NO_GEN, op.from.shard))) { - dout(0) << __func__ << " Error inject - EIO error for shard " << op.from - .shard - << dendl; + dout(0) << __func__ << " Error inject - EIO error for shard " + << op.from.shard << dendl; op.buffers_read.erase(i->first); op.attrs_read.erase(i->first); op.errors[i->first] = -EIO; @@ -1513,14 +1434,6 @@ std::tuple< return {0, real_attrs, st.st_size}; } -ECUtil::HashInfoRef ECBackend::get_hinfo_from_disk(hobject_t oid) { - auto [r, attrs, size] = get_attrs_n_size_from_disk(oid); - ceph_assert(r >= 0 || r == -ENOENT); - ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.get_hash_info( - oid, true, attrs, size); - return hinfo; -} - std::optional ECBackend::get_object_info_from_obc( ObjectContextRef &obc) { std::optional ret; @@ -1578,21 +1491,12 @@ void ECBackend::submit_transaction( op->t->safe_create_traverse( [&](std::pair &i) { const auto &[oid, inner_op] = i; - ECUtil::HashInfoRef shinfo; auto &obc = obc_map.at(oid); object_info_t oi = obc->obs.oi; std::optional soi; - ECUtil::HashInfoRef hinfo; - - if (!sinfo.supports_ec_overwrites()) { - hinfo = get_hinfo_from_disk(oid); - } hobject_t source; if (inner_op.has_source(&source)) { - if (!sinfo.supports_ec_overwrites()) { - shinfo = get_hinfo_from_disk(source); - } if (!inner_op.is_rename()) { soi = get_object_info_from_obc(obc_map.at(source)); } @@ -1619,8 +1523,7 @@ void ECBackend::submit_transaction( ECTransaction::WritePlanObj plan(oid, inner_op, sinfo, readable_shards, writable_shards, object_in_cache, old_object_size, - oi, soi, std::move(hinfo), - std::move(shinfo), + oi, soi, rmw_pipeline.ec_pdw_write_mode); if (plan.to_read) plans.want_read = true; @@ -1857,53 +1760,7 @@ int ECBackend::be_deep_scrub( return -EINPROGRESS; } - if (!sinfo.get_is_hinfo_required()) { - o.digest = 0; - o.digest_present = true; - o.omap_digest = -1; - o.omap_digest_present = true; - return 0; - } - - ECUtil::HashInfoRef hinfo = unstable_hashinfo_registry.get_hash_info( - poid, false, o.attrs, o.size); - if (!hinfo) { - dout(0) << "_scan_list " << poid << " could not retrieve hash info" << dendl; - o.read_error = true; - o.digest_present = false; - return 0; - } - if (!hinfo->has_chunk_hash()) { - dout(0) << "_scan_list " << poid << " got invalid hash info" << dendl; - o.ec_size_mismatch = true; - return 0; - } - if (hinfo->get_total_chunk_size() != (unsigned)pos.data_pos) { - dout(0) << "_scan_list " << poid << " got incorrect size on read 0x" - << std::hex << pos - << " expected 0x" << hinfo->get_total_chunk_size() << std::dec - << dendl; - o.ec_size_mismatch = true; - return 0; - } - - if (hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) != - pos.data_hash.digest()) { - dout(0) << "_scan_list " << poid << " got incorrect hash on read 0x" - << std::hex << pos.data_hash.digest() << " != expected 0x" - << hinfo->get_chunk_hash(get_parent()->whoami_shard().shard) - << std::dec << dendl; - o.ec_hash_mismatch = true; - return 0; - } - - /* We checked above that we match our own stored hash. We cannot - * send a hash of the actual object, so instead we simply send - * our locally stored hash of shard 0 on the assumption that if - * we match our chunk hash and our recollection of the hash for - * chunk 0 matches that of our peers, there is likely no corruption. - */ - o.digest = hinfo->get_chunk_hash(shard_id_t(0)); + o.digest = 0; o.digest_present = true; o.omap_digest = -1; o.omap_digest_present = true; diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index bee0cd5afab9d..d85defc9e5987 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -217,7 +217,6 @@ class ECBackend : public ECCommon { ceph::ErasureCodeInterfaceRef ec_impl; const ECUtil::stripe_info_t &sinfo; ReadPipeline &read_pipeline; - UnstableHashInfoRegistry &unstable_hashinfo_registry; // TODO: lay an interface down here ECListener *parent; ECBackend *ecbackend; @@ -244,7 +243,6 @@ class ECBackend : public ECCommon { ceph::ErasureCodeInterfaceRef ec_impl, const ECUtil::stripe_info_t &sinfo, ReadPipeline &read_pipeline, - UnstableHashInfoRegistry &unstable_hashinfo_registry, ECListener *parent, ECBackend *ecbackend); @@ -278,7 +276,6 @@ class ECBackend : public ECCommon { // must be filled if state == WRITING std::optional returned_data; std::map> xattrs; - ECUtil::HashInfoRef hinfo; ObjectContextRef obc; std::set waiting_on_pushes; @@ -352,12 +349,10 @@ class ECBackend : public ECCommon { ceph::ErasureCodeInterfaceRef ec_impl, const ECUtil::stripe_info_t &sinfo, ReadPipeline &read_pipeline, - UnstableHashInfoRegistry &unstable_hashinfo_registry, PGBackend::Listener *parent, ECBackend *ecbackend) : RecoveryBackend(cct, coll, std::move(ec_impl), sinfo, read_pipeline, - unstable_hashinfo_registry, parent->get_eclistener(), - ecbackend), + parent->get_eclistener(), ecbackend), parent(parent) {} void commit_txn_send_replies( @@ -481,17 +476,12 @@ class ECBackend : public ECCommon { const ECUtil::stripe_info_t sinfo; - ECCommon::UnstableHashInfoRegistry unstable_hashinfo_registry; - - std::tuple< int, std::map>, size_t > get_attrs_n_size_from_disk(const hobject_t &hoid); - ECUtil::HashInfoRef get_hinfo_from_disk(hobject_t oid); - std::optional get_object_info_from_obc( ObjectContextRef &obc_map ); diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index 06d53bcb8da0b..1987d51adeeb5 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -62,12 +62,6 @@ static ostream &_prefix(std::ostream *_dout, return read_pipeline->get_parent()->gen_dbg_prefix(*_dout); } -static ostream &_prefix(std::ostream *_dout, - ECCommon::UnstableHashInfoRegistry * - unstable_hash_info_registry) { - return *_dout; -} - static ostream &_prefix(std::ostream *_dout, struct ClientReadCompleter const *read_completer ); @@ -747,16 +741,6 @@ void ECCommon::RMWPipeline::cache_ready(Op &op) { dout(20) << __func__ << ": written: " << written << ", op: " << op << dendl; - if (!sinfo.supports_ec_overwrites()) { - for (auto &&i: op.log_entries) { - if (i.requires_kraken()) { - derr << __func__ << ": log entry " << i << " requires kraken" - << " but overwrites are not enabled!" << dendl; - ceph_abort(); - } - } - } - ObjectStore::Transaction empty; bool should_write_local = false; ECSubWrite local_write_op; @@ -967,52 +951,4 @@ void ECCommon::RMWPipeline::on_change2() { void ECCommon::RMWPipeline::call_write_ordered(std::function &&cb) { extent_cache.add_on_write(std::move(cb)); -} - -ECUtil::HashInfoRef ECCommon::UnstableHashInfoRegistry::maybe_put_hash_info( - const hobject_t &hoid, - ECUtil::HashInfo &&hinfo) { - return registry.lookup_or_create(hoid, hinfo); -} - -ECUtil::HashInfoRef ECCommon::UnstableHashInfoRegistry::get_hash_info( - const hobject_t &hoid, - bool create, - const map> &attrs, - uint64_t size) { - dout(10) << __func__ << ": Getting attr on " << hoid << dendl; - auto ref = registry.lookup(hoid); - if (!ref) { - dout(10) << __func__ << ": not in cache " << hoid << dendl; - ECUtil::HashInfo hinfo(ec_impl->get_chunk_count()); - bufferlist bl; - if (attrs.contains(ECUtil::get_hinfo_key())) { - bl = attrs.at(ECUtil::get_hinfo_key()); - } else { - dout(30) << __func__ << " " << hoid << " missing hinfo attr" << dendl; - } - if (bl.length() > 0) { - auto bp = bl.cbegin(); - try { - decode(hinfo, bp); - } - catch (...) { - dout(0) << __func__ << ": Can't decode hinfo for " << hoid << dendl; - return ECUtil::HashInfoRef(); - } - if (hinfo.get_total_chunk_size() != size) { - dout(0) << __func__ << ": Mismatch of total_chunk_size " - << hinfo.get_total_chunk_size() << dendl; - return ECUtil::HashInfoRef(); - } - create = true; - } else if (size == 0) { - // If empty object and no hinfo, create it - create = true; - } - if (create) { - ref = registry.lookup_or_create(hoid, hinfo); - } - } - return ref; -} +} \ No newline at end of file diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h index 9f000cfb491db..499aa86019f52 100644 --- a/src/osd/ECCommon.h +++ b/src/osd/ECCommon.h @@ -32,8 +32,6 @@ struct ECTransaction { bool invalidates_cache = false; // Yes, both are possible std::map to_read; std::map will_write; - - std::map hash_infos; }; }; @@ -636,30 +634,6 @@ struct ECCommon { extent_cache(*this, ec_extent_cache_lru, sinfo, cct), ec_pdw_write_mode(cct->_conf.get_val("ec_pdw_write_mode")) {} }; - - class UnstableHashInfoRegistry { - CephContext *cct; - ceph::ErasureCodeInterfaceRef ec_impl; - /// If modified, ensure that the ref is held until the update is applied - SharedPtrRegistry registry; - - public: - UnstableHashInfoRegistry( - CephContext *cct, - ceph::ErasureCodeInterfaceRef ec_impl) - : cct(cct), - ec_impl(std::move(ec_impl)) {} - - ECUtil::HashInfoRef maybe_put_hash_info( - const hobject_t &hoid, - ECUtil::HashInfo &&hinfo); - - ECUtil::HashInfoRef get_hash_info( - const hobject_t &hoid, - bool create, - const std::map> &attrs, - uint64_t size); - }; }; template diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index d430543b4465e..996a37f03d806 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -70,7 +70,7 @@ void ECTransaction::Generate::encode_and_write() { to_write.pad_with_other(plan.will_write, *read_sem); r = to_write.encode_parity_delta(ec_impl, *read_sem); } else { - r = to_write.encode(ec_impl, plan.hinfo, plan.orig_size); + r = to_write.encode(ec_impl); } ceph_assert(r == 0); // Remove any unnecessary writes. @@ -124,14 +124,10 @@ ECTransaction::WritePlanObj::WritePlanObj( uint64_t orig_size, const std::optional &oi, const std::optional &soi, - const ECUtil::HashInfoRef &&hinfo, - const ECUtil::HashInfoRef &&shinfo, - const unsigned pdw_write_mode + unsigned pdw_write_mode ) : hoid(hoid), will_write(sinfo.get_k_plus_m()), - hinfo(hinfo), - shinfo(shinfo), orig_size(orig_size), // On-disk object sizes are rounded up to the next page. projected_size(soi?soi->size:(oi?oi->size:0)) { @@ -369,8 +365,6 @@ void ECTransaction::Generate::delete_first() { ghobject_t(oid, ghobject_t::NO_GEN, shard)); } } - if (plan.hinfo) - plan.hinfo->clear(); } void ECTransaction::Generate::process_init() { @@ -400,10 +394,6 @@ void ECTransaction::Generate::process_init() { ghobject_t(oid, ghobject_t::NO_GEN, shard)); } - if (plan.hinfo && plan.shinfo) { - plan.hinfo->update_to(*plan.shinfo); - } - if (obc) { auto cobciter = t.obc_map.find(cop.source); ceph_assert(cobciter != t.obc_map.end()); @@ -420,9 +410,7 @@ void ECTransaction::Generate::process_init() { coll_t(spg_t(pgid, shard)), ghobject_t(oid, ghobject_t::NO_GEN, shard)); } - if (plan.hinfo && plan.shinfo) { - plan.hinfo->update_to(*plan.shinfo); - } + if (obc) { auto cobciter = t.obc_map.find(rop.source); ceph_assert(cobciter == t.obc_map.end()); @@ -519,12 +507,6 @@ ECTransaction::Generate::Generate(PGTransaction &t, entry->mod_desc.update_snaps(op.updated_snaps->first); } - bufferlist old_hinfo; - if (plan.hinfo) { - encode(*(plan.hinfo), old_hinfo); - xattr_rollback[ECUtil::get_hinfo_key()] = old_hinfo; - } - if (op.is_none() && op.truncate && op.truncate->first == 0) { zero_truncate_to_delete(); } @@ -571,19 +553,10 @@ ECTransaction::Generate::Generate(PGTransaction &t, written_map->emplace(oid, std::move(to_write)); - if (entry && plan.hinfo) { - plan.hinfo->set_total_chunk_size_clear_hash( - sinfo.ro_offset_to_next_stripe_ro_offset(plan.projected_size)); - } - if (entry && plan.orig_size < plan.projected_size) { entry->mod_desc.append(ECUtil::align_next(plan.orig_size)); } - if (op.is_delete()) { - handle_deletes(); - } - // On a size change, we want to update OI on all shards if (plan.orig_size != plan.projected_size) { all_shards_written(); @@ -996,23 +969,6 @@ void ECTransaction::Generate::attr_updates() { ceph_assert(!xattr_rollback.empty()); } -void ECTransaction::Generate::handle_deletes() { - bufferlist hbuf; - if (plan.hinfo) { - encode(*plan.hinfo, hbuf); - for (auto &&[shard, t]: transactions) { - if (!sinfo.is_nonprimary_shard(shard)) { - shard_written(shard); - t.setattr( - coll_t(spg_t(pgid, shard)), - ghobject_t(oid, ghobject_t::NO_GEN, shard), - ECUtil::get_hinfo_key(), - hbuf); - } - } - } -} - void ECTransaction::generate_transactions( PGTransaction *_t, WritePlan &plans, diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h index 4f4059e414c13..65c040b5a9537 100644 --- a/src/osd/ECTransaction.h +++ b/src/osd/ECTransaction.h @@ -26,8 +26,6 @@ class WritePlanObj { const hobject_t hoid; std::optional to_read; ECUtil::shard_extent_set_t will_write; - const ECUtil::HashInfoRef hinfo; - const ECUtil::HashInfoRef shinfo; const uint64_t orig_size; const uint64_t projected_size; bool invalidates_cache; @@ -43,16 +41,12 @@ class WritePlanObj { uint64_t orig_size, const std::optional &oi, const std::optional &soi, - const ECUtil::HashInfoRef &&hinfo, - const ECUtil::HashInfoRef &&shinfo, - const unsigned pdw_write_mode); + unsigned pdw_write_mode); void print(std::ostream &os) const { os << "{hoid: " << hoid << " to_read: " << to_read << " will_write: " << will_write - << " hinfo: " << hinfo - << " shinfo: " << shinfo << " orig_size: " << orig_size << " projected_size: " << projected_size << " invalidates_cache: " << invalidates_cache @@ -113,7 +107,6 @@ class Generate { void appends_and_clone_ranges(); void written_and_present_shards(); void attr_updates(); - void handle_deletes(); public: Generate(PGTransaction &t, diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc index 51a00b7c27ccc..b1522a7fbd8c7 100644 --- a/src/osd/ECUtil.cc +++ b/src/osd/ECUtil.cc @@ -512,27 +512,8 @@ int shard_extent_map_t::_encode(const ErasureCodeInterfaceRef &ec_impl) { /* Encode parity chunks, using the encode_chunks interface into the * erasure coding. This generates all parity using full stripe writes. */ -int shard_extent_map_t::encode(const ErasureCodeInterfaceRef &ec_impl, - const HashInfoRef &hinfo, - uint64_t before_ro_size) { - int r = _encode(ec_impl); - - if (!r && hinfo && ro_start >= before_ro_size) { - /* NEEDS REVIEW: The following calculates the new hinfo CRCs. This is - * currently considering ALL the buffers, including the - * parity buffers. Is this really right? - * Also, does this really belong here? Its convenient - * because have just built the buffer list... - */ - shard_id_set full_set; - full_set.insert_range(shard_id_t(0), sinfo->get_k_plus_m()); - for (auto iter = begin_slice_iterator(full_set); !iter.is_end(); ++iter) { - ceph_assert(ro_start == before_ro_size); - hinfo->append(iter.get_offset(), iter.get_in_bufferptrs()); - } - } - - return r; +int shard_extent_map_t::encode(const ErasureCodeInterfaceRef &ec_impl) { + return _encode(ec_impl); } /* Encode parity chunks, using the parity delta write interfaces on plugins @@ -1089,57 +1070,8 @@ void shard_extent_set_t::insert(const shard_extent_set_t &other) { } } -void ECUtil::HashInfo::append(uint64_t old_size, - shard_id_map &to_append) { - ceph_assert(old_size == total_chunk_size); - uint64_t size_to_append = to_append.begin()->second.length(); - if (has_chunk_hash()) { - ceph_assert(to_append.size() == cumulative_shard_hashes.size()); - for (auto &&[shard, ptr] : to_append) { - ceph_assert(size_to_append == ptr.length()); - ceph_assert(shard < static_cast(cumulative_shard_hashes.size())); - cumulative_shard_hashes[int(shard)] = - ceph_crc32c(cumulative_shard_hashes[int(shard)], - (unsigned char*)ptr.c_str(), ptr.length()); - } - } - total_chunk_size += size_to_append; -} - -void ECUtil::HashInfo::encode(bufferlist &bl) const { - ENCODE_START(1, 1, bl); - encode(total_chunk_size, bl); - encode(cumulative_shard_hashes, bl); - ENCODE_FINISH(bl); -} - -void ECUtil::HashInfo::decode(bufferlist::const_iterator &bl) { - DECODE_START(1, bl); - decode(total_chunk_size, bl); - decode(cumulative_shard_hashes, bl); - DECODE_FINISH(bl); -} - -void ECUtil::HashInfo::dump(Formatter *f) const { - f->dump_unsigned("total_chunk_size", total_chunk_size); - f->open_array_section("cumulative_shard_hashes"); - for (unsigned i = 0; i != cumulative_shard_hashes.size(); ++i) { - f->open_object_section("hash"); - f->dump_unsigned("shard", i); - f->dump_unsigned("hash", cumulative_shard_hashes[i]); - f->close_section(); - } - f->close_section(); -} namespace ECUtil { -std::ostream &operator<<(std::ostream &out, const HashInfo &hi) { - ostringstream hashes; - for (auto hash : hi.cumulative_shard_hashes) { - hashes << " " << hex << hash; - } - return out << "tcs=" << hi.total_chunk_size << hashes.str(); -} std::ostream &operator<<(std::ostream &out, const shard_extent_map_t &rhs) { // sinfo not thought to be needed for debug, as it is constant. @@ -1172,34 +1104,16 @@ std::ostream &operator<<(std::ostream &out, const log_entry_t &rhs) { } return out << "[" << rhs.shard << "]->" << rhs.io << "\n"; } -} - -void ECUtil::HashInfo::generate_test_instances(list &o) { - o.push_back(new HashInfo(3)); - { - bufferlist bl; - bl.append_zero(20); - - bufferptr bp = bl.begin().get_current_ptr(); - - // We don't have the k+m here, but this is not critical performance, so - // create an oversized map. - shard_id_map buffers(128); - buffers[shard_id_t(0)] = bp; - buffers[shard_id_t(1)] = bp; - buffers[shard_id_t(2)] = bp; - o.back()->append(0, buffers); - o.back()->append(20, buffers); - } - o.push_back(new HashInfo(4)); -} +// Upgraded pools can still have keys, so there are a few functions that still +// require these keys. const string HINFO_KEY = "hinfo_key"; -bool ECUtil::is_hinfo_key_string(const string &key) { +bool is_hinfo_key_string(const string &key) { return key == HINFO_KEY; } -const string &ECUtil::get_hinfo_key() { +const string &get_hinfo_key() { return HINFO_KEY; } +} diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h index 93c5e8b98499b..04ac579d69948 100644 --- a/src/osd/ECUtil.h +++ b/src/osd/ECUtil.h @@ -450,7 +450,8 @@ public: chunk_mapping(complete_chunk_mapping(std::vector(), k + m)), chunk_mapping_reverse(reverse_chunk_mapping(chunk_mapping)), data_shards(calc_shards(raw_shard_id_t(), k, chunk_mapping)), - parity_shards(calc_shards(raw_shard_id_t(k), m, chunk_mapping)) { + parity_shards(calc_shards(raw_shard_id_t(k), m, chunk_mapping)), + all_shards(calc_all_shards(k + m)) { ceph_assert(stripe_width != 0); ceph_assert(stripe_width % k == 0); } @@ -743,57 +744,6 @@ public: ECUtil::shard_extent_set_t &shard_extent_set) const; }; -class HashInfo { - uint64_t total_chunk_size = 0; - std::vector cumulative_shard_hashes; - -public: - HashInfo() {} - - explicit HashInfo(unsigned num_chunks) : - cumulative_shard_hashes(num_chunks, -1) {} - - void append(uint64_t old_size, shard_id_map &to_append); - - void clear() { - total_chunk_size = 0; - cumulative_shard_hashes = std::vector( - cumulative_shard_hashes.size(), - -1); - } - - void encode(ceph::buffer::list &bl) const; - void decode(ceph::buffer::list::const_iterator &bl); - void dump(ceph::Formatter *f) const; - static void generate_test_instances(std::list &o); - - uint32_t get_chunk_hash(shard_id_t shard) const { - ceph_assert(shard < cumulative_shard_hashes.size()); - return cumulative_shard_hashes[int(shard)]; - } - - uint64_t get_total_chunk_size() const { - return total_chunk_size; - } - - void set_total_chunk_size_clear_hash(uint64_t new_chunk_size) { - cumulative_shard_hashes.clear(); - total_chunk_size = new_chunk_size; - } - - bool has_chunk_hash() const { - return !cumulative_shard_hashes.empty(); - } - - void update_to(const HashInfo &rhs) { - *this = rhs; - } - - friend std::ostream &operator<<(std::ostream &out, const HashInfo &hi); -}; - -typedef std::shared_ptr HashInfoRef; - class shard_extent_map_t { static const uint64_t invalid_offset = std::numeric_limits::max(); @@ -940,8 +890,7 @@ public: void append_zeros_to_ro_offset(uint64_t ro_offset); void insert_ro_extent_map(const extent_map &host_extent_map); extent_set get_extent_superset() const; - int encode(const ErasureCodeInterfaceRef &ec_impl, const HashInfoRef &hinfo, - uint64_t before_ro_size); + int encode(const ErasureCodeInterfaceRef &ec_impl); int _encode(const ErasureCodeInterfaceRef &ec_impl); int encode_parity_delta(const ErasureCodeInterfaceRef &ec_impl, shard_extent_map_t &old_sem); @@ -1082,7 +1031,5 @@ struct log_entry_t { bool is_hinfo_key_string(const std::string &key); const std::string &get_hinfo_key(); - -WRITE_CLASS_ENCODER(ECUtil::HashInfo) } diff --git a/src/osd/scrubber/scrub_backend.cc b/src/osd/scrubber/scrub_backend.cc index ea1eeeda6f738..2dccc1bb92eab 100644 --- a/src/osd/scrubber/scrub_backend.cc +++ b/src/osd/scrubber/scrub_backend.cc @@ -13,6 +13,7 @@ #include "include/utime_fmt.h" #include "messages/MOSDRepScrubMap.h" #include "osd/ECUtil.h" +#include "osd/ECUtilL.h" #include "osd/OSD.h" #include "osd/PG.h" #include "osd/PrimaryLogPG.h" @@ -664,7 +665,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj, } if (m_pg.get_is_hinfo_required()) { - auto k = smap_obj.attrs.find(ECUtil::get_hinfo_key()); + auto k = smap_obj.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); if (dup_error_cond(err, false, (k == smap_obj.attrs.end()), @@ -673,7 +674,7 @@ shard_as_auth_t ScrubBackend::possible_auth_shard(const hobject_t& obj, "candidate had a missing hinfo key"sv, errstream)) { const bufferlist& hk_bl = k->second; - ECUtil::HashInfo hi; + ECLegacy::ECUtilL::HashInfo hi; try { auto bliter = hk_bl.cbegin(); decode(hi, bliter); @@ -1310,11 +1311,11 @@ bool ScrubBackend::compare_obj_details(pg_shard_t auth_shard, if (!shard_result.has_hinfo_missing() && !shard_result.has_hinfo_corrupted()) { - auto can_hi = candidate.attrs.find(ECUtil::get_hinfo_key()); + auto can_hi = candidate.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); ceph_assert(can_hi != candidate.attrs.end()); const bufferlist& can_bl = can_hi->second; - auto auth_hi = auth.attrs.find(ECUtil::get_hinfo_key()); + auto auth_hi = auth.attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); ceph_assert(auth_hi != auth.attrs.end()); const bufferlist& auth_bl = auth_hi->second; diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc index efcdf6f39f90a..a84648d02996c 100644 --- a/src/test/osd/TestECBackend.cc +++ b/src/test/osd/TestECBackend.cc @@ -1262,7 +1262,7 @@ TEST(ECCommon, encode) bl.append_zero(i>=k?4096:2048); semap.insert_in_shard(i, 12*1024, bl); } - semap.encode(ec_impl, nullptr, 0); + semap.encode(ec_impl); } TEST(ECCommon, decode) diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc index 65e944887c072..44984a4444c7f 100644 --- a/src/test/osd/test_ec_transaction.cc +++ b/src/test/osd/test_ec_transaction.cc @@ -57,8 +57,6 @@ TEST(ectransaction, two_writes_separated_append) 0, std::nullopt, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -95,8 +93,6 @@ TEST(ectransaction, two_writes_separated_misaligned_overwrite) oi.size, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -135,8 +131,6 @@ TEST(ectransaction, partial_write) 0, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -177,8 +171,6 @@ TEST(ectransaction, overlapping_write_non_aligned) 8, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -220,8 +212,6 @@ TEST(ectransaction, test_appending_write_non_aligned) 8, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -263,8 +253,6 @@ TEST(ectransaction, append_with_large_hole) 4096, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -306,8 +294,6 @@ TEST(ectransaction, test_append_not_page_aligned_with_large_hole) EC_ALIGN_SIZE, oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -351,8 +337,6 @@ TEST(ectransaction, test_overwrite_with_missing) 42*(EC_ALIGN_SIZE / 4), oi, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -392,8 +376,6 @@ TEST(ectransaction, truncate_to_bigger_without_write) 4096, std::nullopt, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -423,8 +405,6 @@ TEST(ectransaction, truncate_to_smalelr_without_write) { 16*EC_ALIGN_SIZE, std::nullopt, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; @@ -470,8 +450,6 @@ TEST(ectransaction, delete_and_write_misaligned) { 16*EC_ALIGN_SIZE, std::nullopt, std::nullopt, - ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), - nullptr, 0); generic_derr << "plan " << plan << dendl; diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index fd9135b44737d..ee8ff3f24bc88 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -5,7 +5,7 @@ set(rados_srcs rados/PoolDump.cc ${PROJECT_SOURCE_DIR}/src/common/util.cc ${PROJECT_SOURCE_DIR}/src/common/obj_bencher.cc - ${PROJECT_SOURCE_DIR}/src/osd/ECUtil.cc) + ${PROJECT_SOURCE_DIR}/src/osd/ECUtilL.cc) if(WIN32) list(APPEND rados_srcs ../common/win32/code_page.rc) endif() diff --git a/src/tools/ceph-dencoder/osd_types.h b/src/tools/ceph-dencoder/osd_types.h index c49173a90b6d6..082aa463addb5 100644 --- a/src/tools/ceph-dencoder/osd_types.h +++ b/src/tools/ceph-dencoder/osd_types.h @@ -72,9 +72,9 @@ TYPE(eversion_t) //TYPE(compact_interval_t) declared in .cc //TYPE(pg_missing_t::item) -#include "osd/ECUtil.h" +#include "osd/ECUtilL.h" // TYPE(stripe_info_t) non-standard encoding/decoding functions -TYPE(ECUtil::HashInfo) +TYPE(ECLegacy::ECUtilL::HashInfo) #include "osd/ECMsgTypes.h" TYPE_NOCOPY(ECSubWrite) diff --git a/src/tools/ceph_objectstore_tool.cc b/src/tools/ceph_objectstore_tool.cc index ad4d709488ef5..299421e726928 100644 --- a/src/tools/ceph_objectstore_tool.cc +++ b/src/tools/ceph_objectstore_tool.cc @@ -38,7 +38,7 @@ #include "osd/PGLog.h" #include "osd/OSD.h" #include "osd/PG.h" -#include "osd/ECUtil.h" +#include "osd/ECUtilL.h" #include "json_spirit/json_spirit_value.h" #include "json_spirit/json_spirit_reader.h" @@ -2899,9 +2899,9 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter } } bufferlist hattr; - gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr); + gr = store->getattr(ch, ghobj, ECLegacy::ECUtilL::get_hinfo_key(), hattr); if (gr == 0) { - ECUtil::HashInfo hinfo; + ECLegacy::ECUtilL::HashInfo hinfo; auto hp = hattr.cbegin(); try { decode(hinfo, hp); diff --git a/src/tools/erasure-code/ceph-erasure-code-tool.cc b/src/tools/erasure-code/ceph-erasure-code-tool.cc index 82c2ba94eba60..78f29f57a81b3 100644 --- a/src/tools/erasure-code/ceph-erasure-code-tool.cc +++ b/src/tools/erasure-code/ceph-erasure-code-tool.cc @@ -216,7 +216,7 @@ int do_encode(const std::vector &args) { sinfo->ro_range_to_shard_extent_map(0, input_data.length(), input_data, encoded_data); encoded_data.insert_parity_buffers(); - r = encoded_data.encode(ec_impl, nullptr, encoded_data.get_ro_end()); + r = encoded_data.encode(ec_impl); if (r < 0) { std::cerr << "failed to encode: " << cpp_strerror(r) << std::endl; return 1; diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 5e685edaff422..9781a174d0f17 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -60,7 +60,7 @@ #include "PoolDump.h" #include "RadosImport.h" -#include "osd/ECUtil.h" +#include "osd/ECUtilL.h" // For hinfo in legacy EC #include "objclass/objclass.h" #include "cls/refcount/cls_refcount_ops.h" @@ -1643,10 +1643,10 @@ static void dump_shard(const shard_info_t& shard, || inc.union_shards.has_hinfo_corrupted() || inc.has_hinfo_inconsistency()) && !shard.has_hinfo_missing()) { - map::iterator k = (const_cast(shard)).attrs.find(ECUtil::get_hinfo_key()); + map::iterator k = (const_cast(shard)).attrs.find(ECLegacy::ECUtilL::get_hinfo_key()); ceph_assert(k != shard.attrs.end()); // Can't be missing if (!shard.has_hinfo_corrupted()) { - ECUtil::HashInfo hi; + ECLegacy::ECUtilL::HashInfo hi; bufferlist bl; auto bliter = k->second.cbegin(); decode(hi, bliter); // Can't be corrupted -- 2.39.5