From: Sage Weil Date: Thu, 19 May 2016 10:25:23 +0000 (-0400) Subject: os/bluestore: defer csum calcuations sometimes X-Git-Tag: v11.0.0~359^2~66 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d4f4fa0312d943dd0ce3c27f5fc56c7a753bb471;p=ceph.git os/bluestore: defer csum calcuations sometimes When we are doing a partial chunk overwrite, we need to defer the csum_data update. Otherwise, another write in the same transaction might need to read part of the chunk, not find the data in the buffer cache, read it from disk, and fail the csum check. This patch defers the calculation until after we've build the transaction and are about to commit to the kv store. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index c9f2047cb5fa..20b1372ebfcb 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4529,6 +4529,16 @@ int BlueStore::queue_transactions( _txc_add_transaction(txc, &(*p)); } + // delayed csum calculation? + for (auto& d : txc->deferred_csum) { + bluestore_blob_t *b = d.onode->get_blob_ptr(d.blob); + dout(20) << __func__ << " deferred csum calc blob " << d.blob + << " b_off 0x" << std::hex << d.b_off << std::dec + << " on " << d.onode->oid << dendl; + checksummer->calculate(b->csum_type, b->get_csum_block_size(), + d.b_off, d.data.length(), d.data, &b->csum_data); + } + _txc_write_nodes(txc, txc->t); // journal wal items @@ -5503,8 +5513,7 @@ void BlueStore::_do_write_small( op->extents.emplace_back(bluestore_pextent_t(offset, length)); }); if (b->csum_type) { - checksummer->calculate(b->csum_type, b->get_csum_block_size(), - b_off, padded.length(), padded, &b->csum_data); + txc->add_deferred_csum(o, blob, b_off, padded); } op->data.claim(padded); dout(20) << __func__ << " wal write 0x" << std::hex << b_off << "~0x" diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 425ba97e3145..133247f3e272 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -627,6 +627,18 @@ public: uint64_t seq = 0; utime_t start; + struct DeferredCsum { + OnodeRef onode; + int64_t blob; + uint64_t b_off; + bufferlist data; + + DeferredCsum(OnodeRef& o, int64_t b, uint64_t bo, bufferlist& bl) + : onode(o), blob(b), b_off(bo), data(bl) {} + }; + + list deferred_csum; + explicit TransContext(OpSequencer *o) : state(STATE_PREPARE), osr(o), @@ -651,6 +663,10 @@ public: void write_bnode(BnodeRef &e) { bnodes.insert(e); } + + void add_deferred_csum(OnodeRef& o, int64_t b, uint64_t bo, bufferlist& bl) { + deferred_csum.emplace_back(TransContext::DeferredCsum(o, b, bo, bl)); + } }; class OpSequencer : public Sequencer_impl {