]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: make aio_writes to the cache always non-blocking by default
authorJosh Durgin <josh.durgin@inktank.com>
Wed, 13 Mar 2013 16:42:43 +0000 (09:42 -0700)
committerJosh Durgin <josh.durgin@inktank.com>
Tue, 23 Apr 2013 18:33:17 +0000 (11:33 -0700)
When the ObjectCacher's writex blocks, it affects the thread requesting
the aio, which can cause starvation for other I/O when used by QEMU.

Preserve the old behavior via a config option in case this has any
bad side-effects, like too much memory usage under heavy write loads.

Fixes: #4091
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
(cherry picked from commit 03ac01fa6a94fa7a66ede057e9267e0a562c3cdb)

src/common/config_opts.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc

index 464c2a2cf02c27a733395f9984082c76de250a43..4987fa481da8122866e785ceb12f615f58c3a9fd 100644 (file)
@@ -446,6 +446,7 @@ OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20)         // cache size in bytes
 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20)    // dirty limit in bytes - set to 0 for write-through caching
 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
 OPTION(rbd_cache_max_dirty_age, OPT_FLOAT, 1.0)      // seconds in cache before writeback starts
+OPTION(rbd_cache_block_writes_upfront, OPT_BOOL, false) // whether to block writes to the cache before the aio_write call completes (true), or block before the aio completion is called (false)
 
 OPTION(nss_db_path, OPT_STR, "") // path to nss db
 
index 2f57a21c8757816b59559fbd00fac19489dbc9f7..40b92a4d876ed714fd9742c068d05914b40781c9 100644 (file)
@@ -90,7 +90,7 @@ namespace librbd {
                                       init_max_dirty,
                                       cct->_conf->rbd_cache_target_dirty,
                                       cct->_conf->rbd_cache_max_dirty_age,
-                                      true);
+                                      cct->_conf->rbd_cache_block_writes_upfront);
       object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
       object_set->return_enoent = true;
       object_cacher->start();
@@ -483,7 +483,7 @@ namespace librbd {
   }
 
   void ImageCtx::write_to_cache(object_t o, bufferlist& bl, size_t len,
-                               uint64_t off) {
+                               uint64_t off, Context *onfinish) {
     snap_lock.get_read();
     ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(snapc, bl,
                                                              utime_t(), 0);
@@ -494,7 +494,7 @@ namespace librbd {
     wr->extents.push_back(extent);
     {
       Mutex::Locker l(cache_lock);
-      object_cacher->writex(wr, object_set, cache_lock, NULL);
+      object_cacher->writex(wr, object_set, cache_lock, onfinish);
     }
   }
 
index f185f8a4fc7cf748b961b93806fb2e950efe3757..ea03ec08d13c00a2bbd97edd1e184a45133b84f2 100644 (file)
@@ -129,7 +129,8 @@ namespace librbd {
                           uint64_t *overlap) const;
     void aio_read_from_cache(object_t o, bufferlist *bl, size_t len,
                             uint64_t off, Context *onfinish);
-    void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off);
+    void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off,
+                       Context *onfinish);
     int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off);
     void user_flushed();
     int flush_cache();
index 0307762d0f59db8ba17dea28bb36af8d300278dc..06836ad8b552f1bc071774ec837b37d60526d4f9 100644 (file)
@@ -2549,9 +2549,10 @@ reprotect_and_return_err:
        bl.append(buf + q->first, q->second);
       }
 
+      C_AioWrite *req_comp = new C_AioWrite(cct, c);
       if (ictx->object_cacher) {
-       // may block
-       ictx->write_to_cache(p->oid, bl, p->length, p->offset);
+       c->add_request();
+       ictx->write_to_cache(p->oid, bl, p->length, p->offset, req_comp);
       } else {
        // reverse map this object extent onto the parent
        vector<pair<uint64_t,uint64_t> > objectx;
@@ -2560,7 +2561,6 @@ reprotect_and_return_err:
                              objectx);
        uint64_t object_overlap = ictx->prune_parent_extents(objectx, overlap);
 
-       C_AioWrite *req_comp = new C_AioWrite(cct, c);
        AioWrite *req = new AioWrite(ictx, p->oid.name, p->objectno, p->offset,
                                     objectx, object_overlap,
                                     bl, snapc, snap_id, req_comp);