From 645b535042273d88a0efe05d158565d17b5b8e9b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 24 Dec 2015 13:39:43 -0500 Subject: [PATCH] os/kstore: cache in-flight stripe updates in memory The write process may do a read/modify/write on a stripe. In order to allow multiple writes to coexist within the same transaction, we need to be able to "see" our writes. Clear the "cached" stripe values when the last TransContext touching an onode is finished. In theory we could pin memory with a constant stream of updates to an object; we may need to address that later. Signed-off-by: Sage Weil --- src/os/kstore/KStore.cc | 64 +++++++++++++++++++++++++++-------------- src/os/kstore/KStore.h | 10 +++++++ 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/os/kstore/KStore.cc b/src/os/kstore/KStore.cc index d474343157939..039f06981e951 100644 --- a/src/os/kstore/KStore.cc +++ b/src/os/kstore/KStore.cc @@ -1548,10 +1548,8 @@ int KStore::_do_read( stripe_off = offset % stripe_size; while (length > 0) { - string key; - get_data_key(o->onode.nid, offset - stripe_off, &key); bufferlist stripe; - db->get(PREFIX_DATA, key, &stripe); + _do_read_stripe(o, offset - stripe_off, &stripe); dout(30) << __func__ << " stripe " << offset - stripe_off << " got " << stripe.length() << dendl; unsigned swant = MIN(stripe_size - stripe_off, length); @@ -2297,8 +2295,10 @@ void KStore::_txc_finish(TransContext *txc) << dendl; assert((*p)->flush_txns.count(txc)); (*p)->flush_txns.erase(txc); - if ((*p)->flush_txns.empty()) + if ((*p)->flush_txns.empty()) { (*p)->flush_cond.Signal(); + (*p)->clear_pending_stripes(); + } } // clear out refs @@ -2860,6 +2860,36 @@ void KStore::_pad_zeros( *_dout << dendl; } +void KStore::_do_read_stripe(OnodeRef o, uint64_t offset, bufferlist *pbl) +{ + map::iterator p = o->pending_stripes.find(offset); + if (p == o->pending_stripes.end()) { + string key; + get_data_key(o->onode.nid, offset, &key); + db->get(PREFIX_DATA, key, pbl); + o->pending_stripes[offset] = *pbl; + } else { + *pbl = p->second; + } +} + +void KStore::_do_write_stripe(TransContext *txc, OnodeRef o, + uint64_t offset, bufferlist& bl) +{ + o->pending_stripes[offset] = bl; + string key; + get_data_key(o->onode.nid, offset, &key); + txc->t->set(PREFIX_DATA, key, bl); +} + +void KStore::_do_remove_stripe(TransContext *txc, OnodeRef o, uint64_t offset) +{ + o->pending_stripes.erase(offset); + string key; + get_data_key(o->onode.nid, offset, &key); + txc->t->rmkey(PREFIX_DATA, key); +} + int KStore::_do_write(TransContext *txc, OnodeRef o, uint64_t offset, uint64_t length, @@ -2892,20 +2922,16 @@ int KStore::_do_write(TransContext *txc, if (offset_rem == 0 && end_rem == 0) { bufferlist bl; bl.substr_of(orig_bl, bl_off, stripe_size); - string key; - get_data_key(o->onode.nid, offset, &key); dout(30) << __func__ << " full stripe " << offset << dendl; - txc->t->set(PREFIX_DATA, key, bl); + _do_write_stripe(txc, o, offset, bl); offset += stripe_size; length -= stripe_size; bl_off += stripe_size; continue; } uint64_t stripe_off = offset - offset_rem; - string key; - get_data_key(o->onode.nid, stripe_off, &key); bufferlist prev; - db->get(PREFIX_DATA, key, &prev); + _do_read_stripe(o, stripe_off, &prev); dout(20) << __func__ << " read previous stripe " << stripe_off << ", got " << prev.length() << dendl; bufferlist bl; @@ -2942,7 +2968,7 @@ int KStore::_do_write(TransContext *txc, dout(30) << " writing:\n"; bl.hexdump(*_dout); *_dout << dendl; - txc->t->set(PREFIX_DATA, key, bl); + _do_write_stripe(txc, o, stripe_off, bl); offset += use; length -= use; } @@ -2998,11 +3024,9 @@ int KStore::_zero(TransContext *txc, uint64_t pos = offset; uint64_t stripe_off = pos % stripe_size; while (pos < offset + length) { - string key; - get_data_key(o->onode.nid, pos - stripe_off, &key); if (stripe_off || end - pos < stripe_size) { bufferlist stripe; - db->get(PREFIX_DATA, key, &stripe); + _do_read_stripe(o, pos - stripe_off, &stripe); dout(30) << __func__ << " stripe " << pos - stripe_off << " got " << stripe.length() << dendl; bufferlist bl; @@ -3024,12 +3048,12 @@ int KStore::_zero(TransContext *txc, bl.claim_append(t); } } - txc->t->set(PREFIX_DATA, key, bl); + _do_write_stripe(txc, o, pos - stripe_off, bl); pos += stripe_size - stripe_off; stripe_off = 0; } else { dout(20) << __func__ << " rm stripe " << pos << dendl; - txc->t->rmkey(PREFIX_DATA, key); + _do_remove_stripe(txc, o, pos - stripe_off); pos += stripe_size; } } @@ -3056,23 +3080,21 @@ int KStore::_do_truncate(TransContext *txc, OnodeRef o, uint64_t offset) uint64_t pos = offset; uint64_t stripe_off = pos % stripe_size; while (pos < o->onode.size) { - string key; - get_data_key(o->onode.nid, pos - stripe_off, &key); if (stripe_off) { bufferlist stripe; - db->get(PREFIX_DATA, key, &stripe); + _do_read_stripe(o, pos - stripe_off, &stripe); dout(30) << __func__ << " stripe " << pos - stripe_off << " got " << stripe.length() << dendl; bufferlist t; t.substr_of(stripe, 0, MIN(stripe_off, stripe.length())); - txc->t->set(PREFIX_DATA, key, t); + _do_write_stripe(txc, o, pos - stripe_off, t); dout(20) << __func__ << " truncated stripe " << pos - stripe_off << " to " << t.length() << dendl; pos += stripe_size - stripe_off; stripe_off = 0; } else { dout(20) << __func__ << " rm stripe " << pos << dendl; - txc->t->rmkey(PREFIX_DATA, key); + _do_remove_stripe(txc, o, pos - stripe_off); pos += stripe_size; } } diff --git a/src/os/kstore/KStore.h b/src/os/kstore/KStore.h index 5be447331683f..d8cbdc2ac0b62 100644 --- a/src/os/kstore/KStore.h +++ b/src/os/kstore/KStore.h @@ -59,6 +59,8 @@ public: uint64_t tail_offset; bufferlist tail_bl; + map pending_stripes; ///< unwritten stripes + Onode(const ghobject_t& o, const string& k); void flush(); @@ -74,6 +76,9 @@ public: tail_offset = 0; tail_bl.clear(); } + void clear_pending_stripes() { + pending_stripes.clear(); + } }; typedef boost::intrusive_ptr OnodeRef; @@ -357,6 +362,11 @@ private: kv_stop = false; } + void _do_read_stripe(OnodeRef o, uint64_t offset, bufferlist *pbl); + void _do_write_stripe(TransContext *txc, OnodeRef o, + uint64_t offset, bufferlist& bl); + void _do_remove_stripe(TransContext *txc, OnodeRef o, uint64_t offset); + public: KStore(CephContext *cct, const string& path); ~KStore(); -- 2.39.5