From d02bf3c404811bb682c1e95b4c909cc94385bc19 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 20 Jun 2025 21:47:32 +0100 Subject: [PATCH] osd: Correct truncate logic for new EC The clone logic in the truncate was only cloning from the truncate to the end of the pre-truncate object. If the next shard was being truncated to a shorter length (which is common), then this shard has a larger clone. The rollback, however, can only be given a single range, so it was given a range which covers all clones. The problem is that if shard 0 is rolled back, then some empty space from the clone was copied to shard 0. Fix is easy - calculate the full clone length and apply to all shards, so it matches the rollback. Signed-off-by: Alex Ainscow (cherry picked from commit 5d7588c051b31098c9970877ab6a784967ff94c8) --- src/osd/ECTransaction.cc | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index 6486e89521d..2c5b0fe3ea0 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -37,12 +37,14 @@ using ceph::ErasureCodeInterfaceRef; void debug(const hobject_t &oid, const std::string &str, const ECUtil::shard_extent_map_t &map, DoutPrefixProvider *dpp) { ldpp_dout(dpp, 20) - << " generate_transactions: " << "oid: " << oid << str << map << dendl; + << " generate_transactions: " << "oid: " << oid << " " << str << " " << map << dendl; ldpp_dout(dpp, 20) << "EC_DEBUG_BUFFERS: " << map.debug_string(2048, 0) << dendl; } void ECTransaction::Generate::encode_and_write() { + ldpp_dout(dpp, 20)<< __func__ << dendl; + // For PDW, we already have necessary parity buffers. if (!plan.do_parity_delta_write) { to_write.insert_parity_buffers(); @@ -625,17 +627,29 @@ void ECTransaction::Generate::truncate() { uint64_t clone_start = std::numeric_limits::max(); uint64_t clone_end = 0; - shard_id_set clone_shards; // intentionally left blank! + shard_id_set clone_shards; for (auto &&[shard, eset]: truncate_eset) { clone_shards.insert(shard); + uint64_t start = eset.range_start(); + uint64_t start_align_prev = ECUtil::align_prev(start); + uint64_t end = eset.range_end(); + + if (clone_start > start_align_prev) { + clone_start = start_align_prev; + } + if (clone_end < end) { + clone_end = end; + } + } + + for (auto &&[shard, eset]: truncate_eset) { if (!transactions.contains(shard)) { continue; } auto &t = transactions.at(shard); uint64_t start = eset.range_start(); - uint64_t start_align_prev = ECUtil::align_prev(start); uint64_t start_align_next = ECUtil::align_next(start); uint64_t end = eset.range_end(); t.touch( @@ -645,9 +659,9 @@ void ECTransaction::Generate::truncate() { coll_t(spg_t(pgid, shard)), ghobject_t(oid, ghobject_t::NO_GEN, shard), ghobject_t(oid, entry->version.version, shard), - start_align_prev, - end - start_align_prev, - start_align_prev); + clone_start, + end - clone_start, + clone_start); // First truncate to exactly the right size. t.truncate( @@ -664,21 +678,15 @@ void ECTransaction::Generate::truncate() { ghobject_t(oid, ghobject_t::NO_GEN, shard), start_align_next); } - - if (clone_start > start_align_prev) { - clone_start = start_align_prev; - } - if (clone_end < end) { - clone_end = end; - } } - shards_written(clone_shards); - rollback_extents.emplace_back(make_pair(clone_start, clone_end)); + rollback_extents.emplace_back(make_pair(clone_start, clone_end - clone_start)); rollback_shards.emplace_back(clone_shards); } } void ECTransaction::Generate::overlay_writes() { + ldpp_dout(dpp, 20) << __func__ << " start " << dendl; + for (auto &&extent: op.buffer_updates) { using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate; bufferlist bl; @@ -708,6 +716,7 @@ void ECTransaction::Generate::appends_and_clone_ranges() { extent_set clone_ranges = plan.will_write.get_extent_superset(); uint64_t clone_max = ECUtil::align_next(plan.orig_size); + ldpp_dout(dpp, 20) << __func__ << dendl; if (op.delete_first) { clone_max = 0; -- 2.39.5