From: Igor Fedotov Date: Thu, 7 Jul 2016 17:50:56 +0000 (+0300) Subject: os/bluestore: collect 'dirty' blobs on per-transaction basis to avoid onode::blob_map... X-Git-Tag: ses5-milestone5~146^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F10215%2Fhead;p=ceph.git os/bluestore: collect 'dirty' blobs on per-transaction basis to avoid onode::blob_map enumeration Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2ffbbf189cb0..6e7c137e7063 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4727,20 +4727,10 @@ void BlueStore::_txc_state_proc(TransContext *txc) //assert(txc->osr->qlock.is_locked()); // see _txc_finish_io txc->log_state_latency(logger, l_bluestore_state_io_done_lat); txc->state = TransContext::STATE_KV_QUEUED; - // FIXME: use a per-txc dirty blob list? - - if (txc->first_collection) { - (txc->first_collection)->lock.get_read(); + for (auto& b : txc->blobs) { + b->bc.finish_write(txc->seq); } - for (auto& o : txc->onodes) { - for (auto& p : o->blob_map.blob_map) { - p.bc.finish_write(txc->seq); - } - } - if (txc->first_collection) { - (txc->first_collection)->lock.put_read(); - } - + txc->blobs.clear(); if (!g_conf->bluestore_sync_transaction) { if (g_conf->bluestore_sync_submit_transaction) { _txc_finalize_kv(txc, txc->t); @@ -5939,8 +5929,8 @@ void BlueStore::_do_write_small( << " pad 0x" << head_pad << " + 0x" << tail_pad << std::dec << " of mutable " << blob << ": " << b << dendl; assert(b->blob.is_unreferenced(b_off, b_len)); - b->bc.write(txc->seq, b_off, padded, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, b, b_off, padded, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + b->blob.map_bl( b_off, padded, [&](uint64_t offset, uint64_t length, bufferlist& t) { @@ -6007,8 +5997,8 @@ void BlueStore::_do_write_small( b->blob.is_allocated(b_off, b_len)) { bluestore_wal_op_t *op = _get_wal_op(txc, o); op->op = bluestore_wal_op_t::OP_WRITE; - b->bc.write(txc->seq, b_off, padded, - wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, b, b_off, padded, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + b->blob.map( b_off, b_len, [&](uint64_t offset, uint64_t length) { @@ -6038,7 +6028,7 @@ void BlueStore::_do_write_small( b = o->blob_map.new_blob(c->cache); unsigned alloc_len = min_alloc_size; uint64_t b_off = P2PHASE(offset, alloc_len); - b->bc.write(txc->seq, b_off, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, b, b_off, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); _pad_zeros(&bl, &b_off, block_size); bluestore_lextent_t lex(b->id, P2PHASE(offset, alloc_len), length); o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); @@ -6071,7 +6061,7 @@ void BlueStore::_do_write_big( auto l = MIN(max_blob_len, length); bufferlist t; blp.copy(l, t); - b->bc.write(txc->seq, 0, t, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); + _buffer_cache_write(txc, b, 0, t, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); wctx->write(b, l, 0, t, false); bluestore_lextent_t lex(b->id, 0, l); o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index c51f29b6d42d..91457ceeef95 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -820,6 +820,8 @@ public: set onodes; ///< these onodes need to be updated/written set bnodes; ///< these bnodes need to be updated/written + set blobs; ///< these blobs need to be updated on io completion + KeyValueDB::Transaction t; ///< then we will commit this Context *oncommit; ///< signal on commit Context *onreadable; ///< signal on readable @@ -1272,6 +1274,15 @@ private: boost::dynamic_bitset<> &used_blocks, store_statfs_t& expected_statfs); + void _buffer_cache_write( + TransContext *txc, + BlobRef b, + uint64_t offset, + bufferlist& bl, + unsigned flags) { + b->bc.write(txc->seq, offset, bl, flags); + txc->blobs.insert(b); + } public: BlueStore(CephContext *cct, const string& path); ~BlueStore();