From: Sage Weil Date: Sun, 27 Jul 2014 04:19:34 +0000 (-0700) Subject: Revert "Merge pull request #2129 from ceph/wip-librbd-oc" X-Git-Tag: v0.84~56 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=288908b3316bc975a2b3f75aea5131d7c1cba57f;p=ceph.git Revert "Merge pull request #2129 from ceph/wip-librbd-oc" This reverts commit 74b386f03e4ca9970256db72c575589aea077534, reversing changes made to 36265d0db0d7c0eb31d25a0f77ac233b3fd198f8. The dirty_or_tx list is used by flush_set, which means we can resubmit new IOs for writes that are already in progress. This has a compounding effect that overwhelms the OSDs with dup IOs and stalls out the client. See, for example, teh failues in this run: /a/sage-2014-07-25_17:14:20-fs-wip-msgr-testing-basic-plana The fix is probably pretty simple, but reverting for now to make the tests pass. Signed-off-by: Sage Weil --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index fa11c21ad5a5..3b95e39043d6 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -728,7 +728,6 @@ OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts -OPTION(rbd_cache_max_dirty_object, OPT_INT, 0) // dirty limit for objects - set to 0 for auto calculate from rbd_cache_size OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false) OPTION(rbd_concurrent_management_ops, OPT_INT, 10) // how many operations can be in flight for a management operation like deleting or resizing an image OPTION(rbd_balance_snap_reads, OPT_BOOL, false) diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index b5c2db6e7895..d2f0a4a96b19 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -185,14 +185,10 @@ namespace librbd { // size object cache appropriately if (object_cacher) { - uint64_t obj = cct->_conf->rbd_cache_max_dirty_object; - if (!obj) { - obj = cct->_conf->rbd_cache_size / (1ull << order); - obj = obj * 4 + 10; - } + uint64_t obj = cct->_conf->rbd_cache_size / (1ull << order); ldout(cct, 10) << " cache bytes " << cct->_conf->rbd_cache_size << " order " << (int)order << " -> about " << obj << " objects" << dendl; - object_cacher->set_max_objects(obj); + object_cacher->set_max_objects(obj * 4 + 10); } ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit @@ -591,7 +587,7 @@ namespace librbd { cache_lock.Unlock(); if (unclean) { lderr(cct) << "could not release all objects from cache: " - << unclean << " bytes remain" << dendl; + << unclean << " bytes remain" << dendl; return -EBUSY; } return r; diff --git a/src/osdc/ObjectCacher.cc b/src/osdc/ObjectCacher.cc index ed18adc7e854..d3626884fcd2 100644 --- a/src/osdc/ObjectCacher.cc +++ b/src/osdc/ObjectCacher.cc @@ -530,7 +530,7 @@ ObjectCacher::~ObjectCacher() assert(bh_lru_rest.lru_get_size() == 0); assert(bh_lru_dirty.lru_get_size() == 0); assert(ob_lru.lru_get_size() == 0); - assert(dirty_or_tx_bh.empty()); + assert(dirty_bh.empty()); } void ObjectCacher::perf_start() @@ -1613,28 +1613,22 @@ bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish) // we'll need to wait for all objects to flush! C_GatherBuilder gather(cct); - set waitfor_commit; - - set::iterator next, it; - next = it = dirty_or_tx_bh.begin(); - while (it != dirty_or_tx_bh.end()) { - next++; - BufferHead *bh = *it; - waitfor_commit.insert(bh->ob); - bh_write(bh); - it = next; - } - for (set::iterator i = waitfor_commit.begin(); - i != waitfor_commit.end(); ++i) { + for (xlist::iterator i = oset->objects.begin(); + !i.end(); ++i) { Object *ob = *i; - // we'll need to gather... - ldout(cct, 10) << "flush_set " << oset << " will wait for ack tid " - << ob->last_write_tid - << " on " << *ob - << dendl; - ob->waitfor_commit[ob->last_write_tid].push_back(gather.new_sub()); + if (ob->dirty_or_tx == 0) + continue; + + if (!flush(ob, 0, 0)) { + // we'll need to gather... + ldout(cct, 10) << "flush_set " << oset << " will wait for ack tid " + << ob->last_write_tid + << " on " << *ob + << dendl; + ob->waitfor_commit[ob->last_write_tid].push_back(gather.new_sub()); + } } return _flush_set_finish(&gather, onfinish); @@ -2003,28 +1997,17 @@ void ObjectCacher::bh_stat_sub(BufferHead *bh) void ObjectCacher::bh_set_state(BufferHead *bh, int s) { assert(lock.is_locked()); - int state = bh->get_state(); // move between lru lists? - if (s == BufferHead::STATE_DIRTY && state != BufferHead::STATE_DIRTY) { + if (s == BufferHead::STATE_DIRTY && bh->get_state() != BufferHead::STATE_DIRTY) { bh_lru_rest.lru_remove(bh); bh_lru_dirty.lru_insert_top(bh); - } else if (s != BufferHead::STATE_DIRTY && state == BufferHead::STATE_DIRTY) { + dirty_bh.insert(bh); + } + if (s != BufferHead::STATE_DIRTY && bh->get_state() == BufferHead::STATE_DIRTY) { bh_lru_dirty.lru_remove(bh); bh_lru_rest.lru_insert_top(bh); + dirty_bh.erase(bh); } - - if ((s == BufferHead::STATE_TX || - s == BufferHead::STATE_DIRTY) && - state != BufferHead::STATE_TX && - state != BufferHead::STATE_DIRTY) { - dirty_or_tx_bh.insert(bh); - } else if ((state == BufferHead::STATE_TX || - state == BufferHead::STATE_DIRTY) && - s != BufferHead::STATE_TX && - s != BufferHead::STATE_DIRTY) { - dirty_or_tx_bh.erase(bh); - } - if (s != BufferHead::STATE_ERROR && bh->get_state() == BufferHead::STATE_ERROR) { bh->error = 0; } @@ -2042,14 +2025,10 @@ void ObjectCacher::bh_add(Object *ob, BufferHead *bh) ob->add_bh(bh); if (bh->is_dirty()) { bh_lru_dirty.lru_insert_top(bh); - dirty_or_tx_bh.insert(bh); + dirty_bh.insert(bh); } else { bh_lru_rest.lru_insert_top(bh); } - - if (bh->is_tx()) { - dirty_or_tx_bh.insert(bh); - } bh_stat_add(bh); } @@ -2060,14 +2039,10 @@ void ObjectCacher::bh_remove(Object *ob, BufferHead *bh) ob->remove_bh(bh); if (bh->is_dirty()) { bh_lru_dirty.lru_remove(bh); - dirty_or_tx_bh.erase(bh); + dirty_bh.erase(bh); } else { bh_lru_rest.lru_remove(bh); } - - if (bh->is_tx()) { - dirty_or_tx_bh.erase(bh); - } bh_stat_sub(bh); } diff --git a/src/osdc/ObjectCacher.h b/src/osdc/ObjectCacher.h index ca16138fa2d0..9d9ab3fe90c9 100644 --- a/src/osdc/ObjectCacher.h +++ b/src/osdc/ObjectCacher.h @@ -343,7 +343,7 @@ class ObjectCacher { ceph_tid_t last_read_tid; - set dirty_or_tx_bh; + set dirty_bh; LRU bh_lru_dirty, bh_lru_rest; LRU ob_lru;