From: Josh Durgin Date: Wed, 13 Mar 2013 16:42:43 +0000 (-0700) Subject: librbd: make aio_writes to the cache always non-blocking by default X-Git-Tag: v0.60~3^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=03ac01fa6a94fa7a66ede057e9267e0a562c3cdb;p=ceph.git librbd: make aio_writes to the cache always non-blocking by default When the ObjectCacher's writex blocks, it affects the thread requesting the aio, which can cause starvation for other I/O when used by QEMU. Preserve the old behavior via a config option in case this has any bad side-effects, like too much memory usage under heavy write loads. Fixes: #4091 Signed-off-by: Josh Durgin --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 3ae286cd3f28..5ab3f13b9615 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -502,6 +502,7 @@ OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts +OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false) OPTION(nss_db_path, OPT_STR, "") // path to nss db diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 0423190fd6fe..cddd0d44ac67 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -90,7 +90,7 @@ namespace librbd { init_max_dirty, cct->_conf->rbd_cache_target_dirty, cct->_conf->rbd_cache_max_dirty_age, - true); + cct->_conf->rbd_cache_block_writes_upfront); object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0); object_set->return_enoent = true; object_cacher->start(); @@ -473,7 +473,7 @@ namespace librbd { } void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len, - uint64_t off) { + uint64_t off, Context *onfinish) { snap_lock.get_read(); ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl, utime_t(), 0); @@ -484,7 +484,7 @@ namespace librbd { wr->extents.push_back(extent); { Mutex::Locker l(cache_lock); - object_cacher->writex(wr, object_set, cache_lock, NULL); + object_cacher->writex(wr, object_set, cache_lock, onfinish); } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 9bb1f936271b..550d747524be 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -128,7 +128,8 @@ namespace librbd { uint64_t *overlap) const; void aio_read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off, Context *onfinish); - void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off); + void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off, + Context *onfinish); int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off); void user_flushed(); int flush_cache(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 24a3444a8594..fcefdc43161e 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2544,9 +2544,10 @@ reprotect_and_return_err: bl.append(buf + q->first, q->second); } + C_AioWrite *req_comp = new C_AioWrite(cct, c); if (ictx->object_cacher) { - // may block - ictx->write_to_cache(p->oid, bl, p->length, p->offset); + c->add_request(); + ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp); } else { // reverse map this object extent onto the parent vector > objectx; @@ -2555,7 +2556,6 @@ reprotect_and_return_err: objectx); uint64_t object_overlap = ictx->prune_parent_extents(objectx, overlap); - C_AioWrite *req_comp = new C_AioWrite(cct, c); AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset, objectx, object_overlap, bl, snapc, snap_id, req_comp);