]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/ECTransaction: only read partial stripes when below *original* object size 15712/head
authorSage Weil <sage@redhat.com>
Fri, 16 Jun 2017 17:56:14 +0000 (13:56 -0400)
committerSage Weil <sage@redhat.com>
Fri, 16 Jun 2017 17:56:14 +0000 (13:56 -0400)
If we touch a partial stripe that is before the stripe-aligned object size
we need to read it in to do a read/modify/write that overalys the new data
onto the old.  However, if we have two writes, both touching the same
stripe but not completely covering it, and that stripe is past the original
end of file, there is no old data to read.  The previous code was using
the projected size to decide whether to do a stripe read, and would end
up trying to read data that wasn't there.

Fix by basing the stripe read decision on the original object size instead
of the projected_size.

Fixes: http://tracker.ceph.com/issues/19882
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/ECTransaction.h
src/test/osd/test_ec_transaction.cc

index 346dcd146f3b040cc57197daf7f148c5ffa82cf0..036c8ca8d3d26da60fdbf3868b0aa07d921d7589 100644 (file)
@@ -98,6 +98,7 @@ namespace ECTransaction {
          raw_write_set.insert(extent.get_off(), extent.get_len());
        }
 
+       auto orig_size = projected_size;
        for (auto extent = raw_write_set.begin();
             extent != raw_write_set.end();
             ++extent) {
@@ -109,9 +110,12 @@ namespace ECTransaction {
            head_start = projected_size;
          }
          if (head_start != head_finish &&
-             head_start < projected_size) {
-           assert(head_finish <= projected_size);
+             head_start < orig_size) {
+           assert(head_finish <= orig_size);
            assert(head_finish - head_start == sinfo.get_stripe_width());
+           ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe "
+                              << head_start << "~" << sinfo.get_stripe_width()
+                              << dendl;
            plan.to_read[i.first].union_insert(
              head_start, sinfo.get_stripe_width());
          }
@@ -124,9 +128,12 @@ namespace ECTransaction {
              extent.get_start() + extent.get_len());
          if (tail_start != tail_finish &&
              (head_start == head_finish || tail_start != head_start) &&
-             tail_start < projected_size) {
-           assert(tail_finish <= projected_size);
+             tail_start < orig_size) {
+           assert(tail_finish <= orig_size);
            assert(tail_finish - tail_start == sinfo.get_stripe_width());
+           ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe "
+                              << tail_start << "~" << sinfo.get_stripe_width()
+                              << dendl;
            plan.to_read[i.first].union_insert(
              tail_start, sinfo.get_stripe_width());
          }
@@ -152,7 +159,8 @@ namespace ECTransaction {
          ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
                             <<  truncating_to
                             << dendl;
-         will_write.union_insert(projected_size, truncating_to - projected_size);
+         will_write.union_insert(projected_size,
+                                 truncating_to - projected_size);
          projected_size = truncating_to;
        }
 
index 0818cf36b947be42dd44a8bb470be636c4bcc36b..1ef2d5d832c136292988b46a67ecd785de3ded5c 100644 (file)
@@ -81,3 +81,44 @@ TEST(ectransaction, two_writes_nearby)
   ASSERT_EQ(0u, plan.to_read.size());
   ASSERT_EQ(1u, plan.will_write.size());
 }
+
+TEST(ectransaction, many_writes)
+{
+  hobject_t h;
+  PGTransactionUPtr t(new PGTransaction);
+  bufferlist a, b;
+  a.append_zero(512);
+  b.append_zero(4096);
+  t->create(h);
+
+  ECUtil::stripe_info_t sinfo(2, 8192);
+  // write 2801664~512
+  // write 2802176~512
+  // write 2802688~512
+  // write 2803200~512
+  t->write(h, 2801664, a.length(), a, 0);
+  t->write(h, 2802176, a.length(), a, 0);
+  t->write(h, 2802688, a.length(), a, 0);
+  t->write(h, 2803200, a.length(), a, 0);
+
+  // write 2805760~4096
+  // write 2809856~4096
+  // write 2813952~4096
+  t->write(h, 2805760, b.length(), b, 0);
+  t->write(h, 2809856, b.length(), b, 0);
+  t->write(h, 2813952, b.length(), b, 0);
+
+  auto plan = ECTransaction::get_write_plan(
+    sinfo,
+    std::move(t),
+    [&](const hobject_t &i) {
+      ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1));
+      return ref;
+    },
+    &dpp);
+  generic_derr << "to_read " << plan.to_read << dendl;
+  generic_derr << "will_write " << plan.will_write << dendl;
+
+  ASSERT_EQ(0u, plan.to_read.size());
+  ASSERT_EQ(1u, plan.will_write.size());
+}