From: Sage Weil Date: Fri, 16 Jun 2017 17:56:14 +0000 (-0400) Subject: osd/ECTransaction: only read partial stripes when below *original* object size X-Git-Tag: v12.1.0~69^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F15712%2Fhead;p=ceph.git osd/ECTransaction: only read partial stripes when below *original* object size If we touch a partial stripe that is before the stripe-aligned object size we need to read it in to do a read/modify/write that overalys the new data onto the old. However, if we have two writes, both touching the same stripe but not completely covering it, and that stripe is past the original end of file, there is no old data to read. The previous code was using the projected size to decide whether to do a stripe read, and would end up trying to read data that wasn't there. Fix by basing the stripe read decision on the original object size instead of the projected_size. Fixes: http://tracker.ceph.com/issues/19882 Signed-off-by: Sage Weil --- diff --git a/src/osd/ECTransaction.h b/src/osd/ECTransaction.h index 346dcd146f3..036c8ca8d3d 100644 --- a/src/osd/ECTransaction.h +++ b/src/osd/ECTransaction.h @@ -98,6 +98,7 @@ namespace ECTransaction { raw_write_set.insert(extent.get_off(), extent.get_len()); } + auto orig_size = projected_size; for (auto extent = raw_write_set.begin(); extent != raw_write_set.end(); ++extent) { @@ -109,9 +110,12 @@ namespace ECTransaction { head_start = projected_size; } if (head_start != head_finish && - head_start < projected_size) { - assert(head_finish <= projected_size); + head_start < orig_size) { + assert(head_finish <= orig_size); assert(head_finish - head_start == sinfo.get_stripe_width()); + ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe " + << head_start << "~" << sinfo.get_stripe_width() + << dendl; plan.to_read[i.first].union_insert( head_start, sinfo.get_stripe_width()); } @@ -124,9 +128,12 @@ namespace ECTransaction { extent.get_start() + extent.get_len()); if (tail_start != tail_finish && (head_start == head_finish || tail_start != head_start) && - tail_start < projected_size) { - assert(tail_finish <= projected_size); + tail_start < orig_size) { + assert(tail_finish <= orig_size); assert(tail_finish - tail_start == sinfo.get_stripe_width()); + ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe " + << tail_start << "~" << sinfo.get_stripe_width() + << dendl; plan.to_read[i.first].union_insert( tail_start, sinfo.get_stripe_width()); } @@ -152,7 +159,8 @@ namespace ECTransaction { ldpp_dout(dpp, 20) << __func__ << ": truncating out to " << truncating_to << dendl; - will_write.union_insert(projected_size, truncating_to - projected_size); + will_write.union_insert(projected_size, + truncating_to - projected_size); projected_size = truncating_to; } diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc index 0818cf36b94..1ef2d5d832c 100644 --- a/src/test/osd/test_ec_transaction.cc +++ b/src/test/osd/test_ec_transaction.cc @@ -81,3 +81,44 @@ TEST(ectransaction, two_writes_nearby) ASSERT_EQ(0u, plan.to_read.size()); ASSERT_EQ(1u, plan.will_write.size()); } + +TEST(ectransaction, many_writes) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + a.append_zero(512); + b.append_zero(4096); + t->create(h); + + ECUtil::stripe_info_t sinfo(2, 8192); + // write 2801664~512 + // write 2802176~512 + // write 2802688~512 + // write 2803200~512 + t->write(h, 2801664, a.length(), a, 0); + t->write(h, 2802176, a.length(), a, 0); + t->write(h, 2802688, a.length(), a, 0); + t->write(h, 2803200, a.length(), a, 0); + + // write 2805760~4096 + // write 2809856~4096 + // write 2813952~4096 + t->write(h, 2805760, b.length(), b, 0); + t->write(h, 2809856, b.length(), b, 0); + t->write(h, 2813952, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +}