]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: optionally wait for a flush before enabling writeback
authorJosh Durgin <josh.durgin@inktank.com>
Sat, 16 Mar 2013 00:28:13 +0000 (17:28 -0700)
committerJosh Durgin <josh.durgin@inktank.com>
Tue, 23 Apr 2013 18:33:17 +0000 (11:33 -0700)
Older guests may not send flushes properly (i.e. never), so if this is
enabled, rbd_cache=true is safe for them transparently.

Disable by default, since it will unnecessarily slow down newer guest
boot, and prevent writeback caching for things that don't need to send
flushes, like the command line tool.

Refs: #3817
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
(cherry picked from commit 1597b3e3a1d776b56e05c57d7c3de396f4f2b5b2)

doc/rbd/rbd-config-ref.rst
src/common/config_opts.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc

index dde3dedf257a3bdb2be3c2fb28b89d2c270a60b1..f57477d1fce835a2a0d5549ef1b91c0f750034b6 100644 (file)
@@ -86,4 +86,13 @@ section of your configuration file. The settings include:
 :Required: No
 :Default: ``1.0``
 
-.. _Block Device: ../../rbd/rbd/
\ No newline at end of file
+.. versionadded:: 0.60
+
+``rbd cache writethrough until flush``
+
+:Description: Start out in write-through mode, and switch to write-back after the first flush request is received. Enabling this is a conservative but safe setting in case VMs running on rbd are too old to send flushes, like the virtio driver in Linux before 2.6.32.
+:Type: Boolean
+:Required: No
+:Default: ``false``
+
+.. _Block Device: ../../rbd/rbd/
index d5f1e24cf553879d53adea1865665120e235d773..464c2a2cf02c27a733395f9984082c76de250a43 100644 (file)
@@ -441,6 +441,7 @@ OPTION(journal_align_min_size, OPT_INT, 64 << 10)  // align data payloads >= thi
 OPTION(journal_replay_from, OPT_INT, 0)
 OPTION(journal_zero_on_create, OPT_BOOL, false)
 OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
+OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
 OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20)         // cache size in bytes
 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20)    // dirty limit in bytes - set to 0 for write-through caching
 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
index db185c2dc0e2aabdd680abaf0d5a44d9f230c772..12b81ef0906e25f5b029350dae144f25fcaa5c9c 100644 (file)
@@ -34,6 +34,7 @@ namespace librbd {
       snap_id(CEPH_NOSNAP),
       snap_exists(true),
       read_only(ro),
+      flush_encountered(false),
       exclusive_locked(false),
       name(image_name),
       wctx(NULL),
@@ -68,13 +69,25 @@ namespace librbd {
 
     if (cct->_conf->rbd_cache) {
       Mutex::Locker l(cache_lock);
-      ldout(cct, 20) << "enabling writeback caching..." << dendl;
+      ldout(cct, 20) << "enabling caching..." << dendl;
       writeback_handler = new LibrbdWriteback(this, cache_lock);
+
+      uint64_t init_max_dirty = cct->_conf->rbd_cache_max_dirty;
+      if (cct->_conf->rbd_cache_writethrough_until_flush)
+       init_max_dirty = 0;
+      ldout(cct, 20) << "Initial cache settings:"
+                    << " size=" << cct->_conf->rbd_cache_size
+                    << " num_objects=" << 10
+                    << " max_dirty=" << init_max_dirty
+                    << " target_dirty=" << cct->_conf->rbd_cache_target_dirty
+                    << " max_dirty_age="
+                    << cct->_conf->rbd_cache_max_dirty_age << dendl;
+
       object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock,
                                       NULL, NULL,
                                       cct->_conf->rbd_cache_size,
                                       10,  /* reset this in init */
-                                      cct->_conf->rbd_cache_max_dirty,
+                                      init_max_dirty,
                                       cct->_conf->rbd_cache_target_dirty,
                                       cct->_conf->rbd_cache_max_dirty_age);
       object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
@@ -499,6 +512,25 @@ namespace librbd {
     return r;
   }
 
+  void ImageCtx::user_flushed() {
+    if (object_cacher && cct->_conf->rbd_cache_writethrough_until_flush) {
+      md_lock.get_read();
+      bool flushed_before = flush_encountered;
+      md_lock.put_read();
+
+      uint64_t max_dirty = cct->_conf->rbd_cache_max_dirty;
+      if (!flushed_before && max_dirty > 0) {
+       md_lock.get_write();
+       flush_encountered = true;
+       md_lock.put_write();
+
+       ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl;
+       Mutex::Locker l(cache_lock);
+       object_cacher->set_max_dirty(max_dirty);
+      }
+    }
+  }
+
   int ImageCtx::flush_cache() {
     int r = 0;
     Mutex mylock("librbd::ImageCtx::flush_cache");
index 90d815e69ea6b09053e55ce58680e5f130bff8da..f185f8a4fc7cf748b961b93806fb2e950efe3757 100644 (file)
@@ -43,6 +43,7 @@ namespace librbd {
     bool snap_exists; // false if our snap_id was deleted
     // whether the image was opened read-only. cannot be changed after opening
     bool read_only;
+    bool flush_encountered;
 
     std::map<rados::cls::lock::locker_id_t,
             rados::cls::lock::locker_info_t> lockers;
@@ -130,6 +131,7 @@ namespace librbd {
                             uint64_t off, Context *onfinish);
     void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off);
     int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off);
+    void user_flushed();
     int flush_cache();
     void shutdown_cache();
     void invalidate_cache();
index fb4114e94f3918a626898ced097daf48954d1c47..87bc7a3ee163e60013e76028cee688aef0259145 100644 (file)
@@ -2469,6 +2469,7 @@ reprotect_and_return_err:
     if (r < 0)
       return r;
 
+    ictx->user_flushed();
     return _flush(ictx);
   }