From: Josh Durgin Date: Wed, 13 Mar 2013 16:42:43 +0000 (-0700) Subject: librbd: make aio_writes to the cache always non-blocking by default X-Git-Tag: v0.56.5~12^2~19 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3b0c565dceb41dcaf53965321ef420641bcb699b;p=ceph.git librbd: make aio_writes to the cache always non-blocking by default When the ObjectCacher's writex blocks, it affects the thread requesting the aio, which can cause starvation for other I/O when used by QEMU. Preserve the old behavior via a config option in case this has any bad side-effects, like too much memory usage under heavy write loads. Fixes: #4091 Signed-off-by: Josh Durgin (cherry picked from commit 03ac01fa6a94fa7a66ede057e9267e0a562c3cdb) --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 464c2a2cf02c..4987fa481da8 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -446,6 +446,7 @@ OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0) // seconds in cache before writeback starts +OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false) OPTION(nss_db_path, OPT_STR, "") // path to nss db diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 2f57a21c8757..40b92a4d876e 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -90,7 +90,7 @@ namespace librbd { init_max_dirty, cct->_conf->rbd_cache_target_dirty, cct->_conf->rbd_cache_max_dirty_age, - true); + cct->_conf->rbd_cache_block_writes_upfront); object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0); object_set->return_enoent = true; object_cacher->start(); @@ -483,7 +483,7 @@ namespace librbd { } void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len, - uint64_t off) { + uint64_t off, Context *onfinish) { snap_lock.get_read(); ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl, utime_t(), 0); @@ -494,7 +494,7 @@ namespace librbd { wr->extents.push_back(extent); { Mutex::Locker l(cache_lock); - object_cacher->writex(wr, object_set, cache_lock, NULL); + object_cacher->writex(wr, object_set, cache_lock, onfinish); } } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index f185f8a4fc7c..ea03ec08d13c 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -129,7 +129,8 @@ namespace librbd { uint64_t *overlap) const; void aio_read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off, Context *onfinish); - void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off); + void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off, + Context *onfinish); int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off); void user_flushed(); int flush_cache(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 0307762d0f59..06836ad8b552 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2549,9 +2549,10 @@ reprotect_and_return_err: bl.append(buf + q->first, q->second); } + C_AioWrite *req_comp = new C_AioWrite(cct, c); if (ictx->object_cacher) { - // may block - ictx->write_to_cache(p->oid, bl, p->length, p->offset); + c->add_request(); + ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp); } else { // reverse map this object extent onto the parent vector > objectx; @@ -2560,7 +2561,6 @@ reprotect_and_return_err: objectx); uint64_t object_overlap = ictx->prune_parent_extents(objectx, overlap); - C_AioWrite *req_comp = new C_AioWrite(cct, c); AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset, objectx, object_overlap, bl, snapc, snap_id, req_comp);