]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: defer csum calcuations sometimes
authorSage Weil <sage@redhat.com>
Thu, 19 May 2016 10:25:23 +0000 (06:25 -0400)
committerSage Weil <sage@redhat.com>
Wed, 1 Jun 2016 15:38:50 +0000 (11:38 -0400)
When we are doing a partial chunk overwrite, we need to defer the csum_data
update.  Otherwise, another write in the same transaction might need to
read part of the chunk, not find the data in the buffer cache, read it
from disk, and fail the csum check.

This patch defers the calculation until after we've build the transaction
and are about to commit to the kv store.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index c9f2047cb5faddd2bd483dad72dfdbacbe883916..20b1372ebfcb2f06d2a35f80cebf8a16dcb5ce24 100644 (file)
@@ -4529,6 +4529,16 @@ int BlueStore::queue_transactions(
     _txc_add_transaction(txc, &(*p));
   }
 
+  // delayed csum calculation?
+  for (auto& d : txc->deferred_csum) {
+    bluestore_blob_t *b = d.onode->get_blob_ptr(d.blob);
+    dout(20) << __func__ << "  deferred csum calc blob " << d.blob
+            << " b_off 0x" << std::hex << d.b_off << std::dec
+            << " on " << d.onode->oid << dendl;
+    checksummer->calculate(b->csum_type, b->get_csum_block_size(),
+                          d.b_off, d.data.length(), d.data, &b->csum_data);
+  }
+
   _txc_write_nodes(txc, txc->t);
 
   // journal wal items
@@ -5503,8 +5513,7 @@ void BlueStore::_do_write_small(
               op->extents.emplace_back(bluestore_pextent_t(offset, length));
             });
       if (b->csum_type) {
-       checksummer->calculate(b->csum_type, b->get_csum_block_size(),
-                              b_off, padded.length(), padded, &b->csum_data);
+       txc->add_deferred_csum(o, blob, b_off, padded);
       }
       op->data.claim(padded);
       dout(20) << __func__ << "  wal write 0x" << std::hex << b_off << "~0x"
index 425ba97e3145937a8eff96a9e71bcfa59f14f435..133247f3e272adc42b28e0bd949934c4aadf9f1f 100644 (file)
@@ -627,6 +627,18 @@ public:
     uint64_t seq = 0;
     utime_t start;
 
+    struct DeferredCsum {
+      OnodeRef onode;
+      int64_t blob;
+      uint64_t b_off;
+      bufferlist data;
+
+      DeferredCsum(OnodeRef& o, int64_t b, uint64_t bo, bufferlist& bl)
+       : onode(o), blob(b), b_off(bo), data(bl) {}
+    };
+
+    list<DeferredCsum> deferred_csum;
+
     explicit TransContext(OpSequencer *o)
       : state(STATE_PREPARE),
        osr(o),
@@ -651,6 +663,10 @@ public:
     void write_bnode(BnodeRef &e) {
       bnodes.insert(e);
     }
+
+    void add_deferred_csum(OnodeRef& o, int64_t b, uint64_t bo, bufferlist& bl) {
+      deferred_csum.emplace_back(TransContext::DeferredCsum(o, b, bo, bl));
+    }
   };
 
   class OpSequencer : public Sequencer_impl {