]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: make aio_writes to the cache always non-blocking by default
authorJosh Durgin <josh.durgin@inktank.com>
Wed, 13 Mar 2013 16:42:43 +0000 (09:42 -0700)
committerJosh Durgin <josh.durgin@inktank.com>
Thu, 28 Mar 2013 17:46:58 +0000 (10:46 -0700)
When the ObjectCacher's writex blocks, it affects the thread requesting
the aio, which can cause starvation for other I/O when used by QEMU.

Preserve the old behavior via a config option in case this has any
bad side-effects, like too much memory usage under heavy write loads.

Fixes: #4091
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
src/common/config_opts.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc

index 3ae286cd3f2838ec6591e0537b8b80dee7de309b..5ab3f13b9615e3e72bec707884002bb5058b27e7 100644 (file)
@@ -502,6 +502,7 @@ OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20)         // cache size in bytes
 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20)    // dirty limit in bytes - set to 0 for write-through caching
 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
 OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0)      // seconds in cache before writeback starts
+OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
 
 OPTION(nss_db_path, OPT_STR, "") // path to nss db
 
index 0423190fd6fef5be2f297d83856a18ead4897d48..cddd0d44ac67cc1325a7cfff4c2bec1f492f0bf9 100644 (file)
@@ -90,7 +90,7 @@ namespace librbd {
                                       init_max_dirty,
                                       cct->_conf->rbd_cache_target_dirty,
                                       cct->_conf->rbd_cache_max_dirty_age,
-                                      true);
+                                      cct->_conf->rbd_cache_block_writes_upfront);
       object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
       object_set->return_enoent = true;
       object_cacher->start();
@@ -473,7 +473,7 @@ namespace librbd {
   }
 
   void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len,
-                               uint64_t off) {
+                               uint64_t off, Context *onfinish) {
     snap_lock.get_read();
     ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl,
                                                              utime_t(), 0);
@@ -484,7 +484,7 @@ namespace librbd {
     wr->extents.push_back(extent);
     {
       Mutex::Locker l(cache_lock);
-      object_cacher->writex(wr, object_set, cache_lock, NULL);
+      object_cacher->writex(wr, object_set, cache_lock, onfinish);
     }
   }
 
index 9bb1f936271bc4ad7bde5e7b2c935adf6d6a230b..550d747524be1343c0cea9a55307a52c30eef54e 100644 (file)
@@ -128,7 +128,8 @@ namespace librbd {
                           uint64_t *overlap) const;
     void aio_read_from_cache(object_t o, bufferlist *bl, size_t len,
                             uint64_t off, Context *onfinish);
-    void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off);
+    void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off,
+                       Context *onfinish);
     int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off);
     void user_flushed();
     int flush_cache();
index 24a3444a859468e793d260d50652eb81ebb4a96e..fcefdc43161e77d5ffc88757465257b1eac7c6fa 100644 (file)
@@ -2544,9 +2544,10 @@ reprotect_and_return_err:
        bl.append(buf + q->first, q->second);
       }
 
+      C_AioWrite *req_comp = new C_AioWrite(cct, c);
       if (ictx->object_cacher) {
-       // may block
-       ictx->write_to_cache(p->oid, bl, p->length, p->offset);
+       c->add_request();
+       ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp);
       } else {
        // reverse map this object extent onto the parent
        vector<pair<uint64_t,uint64_t> > objectx;
@@ -2555,7 +2556,6 @@ reprotect_and_return_err:
                              objectx);
        uint64_t object_overlap = ictx->prune_parent_extents(objectx, overlap);
 
-       C_AioWrite *req_comp = new C_AioWrite(cct, c);
        AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset,
                                     objectx, object_overlap,
                                     bl, snapc, snap_id, req_comp);