From b45c09b93482d6b94505d44b8bd4aae9513bc023 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 16 Jun 2017 13:56:14 -0400 Subject: [PATCH] osd/ECTransaction: only read partial stripes when below *original* object size If we touch a partial stripe that is before the stripe-aligned object size we need to read it in to do a read/modify/write that overalys the new data onto the old. However, if we have two writes, both touching the same stripe but not completely covering it, and that stripe is past the original end of file, there is no old data to read. The previous code was using the projected size to decide whether to do a stripe read, and would end up trying to read data that wasn't there. Fix by basing the stripe read decision on the original object size instead of the projected_size. Fixes: http://tracker.ceph.com/issues/19882 Signed-off-by: Sage Weil --- src/osd/ECTransaction.h | 18 +++++++++---- src/test/osd/test_ec_transaction.cc | 41 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h index 346dcd146f3b..036c8ca8d3d2 100644 --- a/src/osd/ECTransaction.h +++ b/src/osd/ECTransaction.h @@ -98,6 +98,7 @@ namespace ECTransaction { raw_write_set.insert(extent.get_off(), extent.get_len()); } + auto orig_size = projected_size; for (auto extent = raw_write_set.begin(); extent != raw_write_set.end(); ++extent) { @@ -109,9 +110,12 @@ namespace ECTransaction { head_start = projected_size; } if (head_start != head_finish && - head_start < projected_size) { - assert(head_finish <= projected_size); + head_start < orig_size) { + assert(head_finish <= orig_size); assert(head_finish - head_start == sinfo.get_stripe_width()); + ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe " + << head_start << "~" << sinfo.get_stripe_width() + << dendl; plan.to_read[i.first].union_insert( head_start, sinfo.get_stripe_width()); } @@ -124,9 +128,12 @@ namespace ECTransaction { extent.get_start() + extent.get_len()); if (tail_start != tail_finish && (head_start == head_finish || tail_start != head_start) && - tail_start < projected_size) { - assert(tail_finish <= projected_size); + tail_start < orig_size) { + assert(tail_finish <= orig_size); assert(tail_finish - tail_start == sinfo.get_stripe_width()); + ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe " + << tail_start << "~" << sinfo.get_stripe_width() + << dendl; plan.to_read[i.first].union_insert( tail_start, sinfo.get_stripe_width()); } @@ -152,7 +159,8 @@ namespace ECTransaction { ldpp_dout(dpp, 20) << __func__ << ": truncating out to " << truncating_to << dendl; - will_write.union_insert(projected_size, truncating_to - projected_size); + will_write.union_insert(projected_size, + truncating_to - projected_size); projected_size = truncating_to; } diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc index 0818cf36b947..1ef2d5d832c1 100644 --- a/src/test/osd/test_ec_transaction.cc +++ b/src/test/osd/test_ec_transaction.cc @@ -81,3 +81,44 @@ TEST(ectransaction, two_writes_nearby) ASSERT_EQ(0u, plan.to_read.size()); ASSERT_EQ(1u, plan.will_write.size()); } + +TEST(ectransaction, many_writes) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + a.append_zero(512); + b.append_zero(4096); + t->create(h); + + ECUtil::stripe_info_t sinfo(2, 8192); + // write 2801664~512 + // write 2802176~512 + // write 2802688~512 + // write 2803200~512 + t->write(h, 2801664, a.length(), a, 0); + t->write(h, 2802176, a.length(), a, 0); + t->write(h, 2802688, a.length(), a, 0); + t->write(h, 2803200, a.length(), a, 0); + + // write 2805760~4096 + // write 2809856~4096 + // write 2813952~4096 + t->write(h, 2805760, b.length(), b, 0); + t->write(h, 2809856, b.length(), b, 0); + t->write(h, 2813952, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} -- 2.47.3