From dafe15ec4d7fd6c7deb77cfa00a3e737c71545dc Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Wed, 7 May 2025 10:33:24 +0100 Subject: [PATCH] osd: Do not do a read-modify-write if op.delete_first is set Some client OPs are able to generate transactions which delete an object and then write it again. This is used by the copy-from ops. If such a write is not 4k aligned, then the new EC code was incorrectly doing a read-modify write on the misaligned 4k. This causes some garbage to be written to the backend OSD, off the end of the object. This is only a problem if the object is later extended without the end being written. Problematic sequence is: 1. Create two objects (A and B) of size X and Y where: X > Y, (Y % 4096) != 0 2. copy_from OP B -> A 3. Extend B without writing offset Y+1 This will result in a corrupt data buffer at Y+1 without this fix. Signed-off-by: Alex Ainscow --- src/osd/ECTransaction.cc | 12 +++++--- src/test/osd/test_ec_transaction.cc | 46 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index cc1abcbebc62b..d430543b4465e 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -241,7 +241,12 @@ ECTransaction::WritePlanObj::WritePlanObj( */ } - if (!reads.empty()) { + /* If the read is not empty AND the object is not going tbe deleted as + * part of the op, then record that we need to do the reads. Potentially + * some performance could be gained by not calculating reads at all if + * delete_first is set, but this is not a performance-critical code path. + */ + if (!reads.empty() && !op.delete_first) { to_read = std::move(reads); } } @@ -333,14 +338,13 @@ void ECTransaction::Generate::delete_first() { /* We also want to remove the std::nullopt entries since * the keys already won't exist */ for (auto j = op.attr_updates.begin(); - j != op.attr_updates.end(); - ) { + j != op.attr_updates.end();) { if (j->second) { ++j; } else { j = op.attr_updates.erase(j); } - } + } /* Fill in all current entries for xattr rollback */ if (obc) { xattr_rollback.insert( diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc index 42e78bcb7e920..65e944887c072 100644 --- a/src/test/osd/test_ec_transaction.cc +++ b/src/test/osd/test_ec_transaction.cc @@ -441,3 +441,49 @@ TEST(ectransaction, truncate_to_smalelr_without_write) { ASSERT_EQ(ref_write, plan.will_write); } +TEST(ectransaction, delete_and_write_misaligned) { + hobject_t h; + PGTransaction::ObjectOperation op; + bufferlist a; + uint64_t new_size = 14 * (EC_ALIGN_SIZE / 4); + + // We have a 4k write quite a way after the current limit of a 4k object + a.append_zero(new_size); + op.buffer_updates.insert(0, new_size, PGTransaction::ObjectOperation::BufferUpdate::Write{a, 0}); + op.delete_first = true; + + pg_pool_t pool; + pool.set_flag(pg_pool_t::FLAG_EC_OPTIMIZATIONS); + ECUtil::stripe_info_t sinfo(2, 1, 2 * EC_ALIGN_SIZE, &pool, std::vector(0)); + object_info_t oi; + oi.size = new_size; + shard_id_set shards; + shards.insert_range(shard_id_t(0), 3); + + ECTransaction::WritePlanObj plan( + h, + op, + sinfo, + shards, + shards, + false, + 16*EC_ALIGN_SIZE, + std::nullopt, + std::nullopt, + ECUtil::HashInfoRef(new ECUtil::HashInfo(1)), + nullptr, + 0); + + generic_derr << "plan " << plan << dendl; + + /* We are going to delete the object before writing it. Best not write anything + * from the old object... */ + ASSERT_FALSE(plan.to_read); + + // Writes should cover parity only. + ECUtil::shard_extent_set_t ref_write(sinfo.get_k_plus_m()); + ref_write[shard_id_t(0)].insert(0, 2*EC_ALIGN_SIZE); + ref_write[shard_id_t(1)].insert(0, 2*EC_ALIGN_SIZE); + ref_write[shard_id_t(2)].insert(0, 2*EC_ALIGN_SIZE); + ASSERT_EQ(ref_write, plan.will_write); +} \ No newline at end of file -- 2.39.5