]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: optionally wait for a flush before enabling writeback
authorJosh Durgin <josh.durgin@inktank.com>
Sat, 16 Mar 2013 00:28:13 +0000 (17:28 -0700)
committerSage Weil <sage@inktank.com>
Tue, 19 Mar 2013 18:42:27 +0000 (11:42 -0700)
Older guests may not send flushes properly (i.e. never), so if this is
enabled, rbd_cache=true is safe for them transparently.

Disable by default, since it will unnecessarily slow down newer guest
boot, and prevent writeback caching for things that don't need to send
flushes, like the command line tool.

Refs: #3817
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
doc/rbd/rbd-config-ref.rst
src/common/config_opts.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc

index dde3dedf257a3bdb2be3c2fb28b89d2c270a60b1..f57477d1fce835a2a0d5549ef1b91c0f750034b6 100644 (file)
@@ -86,4 +86,13 @@ section of your configuration file. The settings include:
 :Required: No
 :Default: ``1.0``
 
-.. _Block Device: ../../rbd/rbd/
\ No newline at end of file
+.. versionadded:: 0.60
+
+``rbd cache writethrough until flush``
+
+:Description: Start out in write-through mode, and switch to write-back after the first flush request is received. Enabling this is a conservative but safe setting in case VMs running on rbd are too old to send flushes, like the virtio driver in Linux before 2.6.32.
+:Type: Boolean
+:Required: No
+:Default: ``false``
+
+.. _Block Device: ../../rbd/rbd/
index 9e832be2db893a3a6a88abc295d2991c4568dfaf..52880f59feba1b7bbf6e3d4bb4ca00a9c364b1ac 100644 (file)
@@ -497,6 +497,7 @@ OPTION(journal_replay_from, OPT_INT, 0)
 OPTION(journal_zero_on_create, OPT_BOOL, false)
 OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt
 OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0)
+OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe
 OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20)         // cache size in bytes
 OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20)    // dirty limit in bytes - set to 0 for write-through caching
 OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes
index 7342e2b4bb9a73dda5a3aefc92e92f5a4cb448ef..add9a98ac4310ff29cd31928a42eb5b67bd646da 100644 (file)
@@ -34,6 +34,7 @@ namespace librbd {
       snap_id(CEPH_NOSNAP),
       snap_exists(true),
       read_only(ro),
+      flush_encountered(false),
       exclusive_locked(false),
       name(image_name),
       wctx(NULL),
@@ -68,13 +69,25 @@ namespace librbd {
 
     if (cct->_conf->rbd_cache) {
       Mutex::Locker l(cache_lock);
-      ldout(cct, 20) << "enabling writeback caching..." << dendl;
+      ldout(cct, 20) << "enabling caching..." << dendl;
       writeback_handler = new LibrbdWriteback(this, cache_lock);
+
+      uint64_t init_max_dirty = cct->_conf->rbd_cache_max_dirty;
+      if (cct->_conf->rbd_cache_writethrough_until_flush)
+       init_max_dirty = 0;
+      ldout(cct, 20) << "Initial cache settings:"
+                    << " size=" << cct->_conf->rbd_cache_size
+                    << " num_objects=" << 10
+                    << " max_dirty=" << init_max_dirty
+                    << " target_dirty=" << cct->_conf->rbd_cache_target_dirty
+                    << " max_dirty_age="
+                    << cct->_conf->rbd_cache_max_dirty_age << dendl;
+
       object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock,
                                       NULL, NULL,
                                       cct->_conf->rbd_cache_size,
                                       10,  /* reset this in init */
-                                      cct->_conf->rbd_cache_max_dirty,
+                                      init_max_dirty,
                                       cct->_conf->rbd_cache_target_dirty,
                                       cct->_conf->rbd_cache_max_dirty_age);
       object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
@@ -489,6 +502,25 @@ namespace librbd {
     return r;
   }
 
+  void ImageCtx::user_flushed() {
+    if (object_cacher && cct->_conf->rbd_cache_writethrough_until_flush) {
+      md_lock.get_read();
+      bool flushed_before = flush_encountered;
+      md_lock.put_read();
+
+      uint64_t max_dirty = cct->_conf->rbd_cache_max_dirty;
+      if (!flushed_before && max_dirty > 0) {
+       md_lock.get_write();
+       flush_encountered = true;
+       md_lock.put_write();
+
+       ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl;
+       Mutex::Locker l(cache_lock);
+       object_cacher->set_max_dirty(max_dirty);
+      }
+    }
+  }
+
   int ImageCtx::flush_cache() {
     int r = 0;
     Mutex mylock("librbd::ImageCtx::flush_cache");
index 42910c86364c29305cc80382805efe5078475803..9bb1f936271bc4ad7bde5e7b2c935adf6d6a230b 100644 (file)
@@ -43,6 +43,7 @@ namespace librbd {
     bool snap_exists; // false if our snap_id was deleted
     // whether the image was opened read-only. cannot be changed after opening
     bool read_only;
+    bool flush_encountered;
 
     std::map<rados::cls::lock::locker_id_t,
             rados::cls::lock::locker_info_t> lockers;
@@ -129,6 +130,7 @@ namespace librbd {
                             uint64_t off, Context *onfinish);
     void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off);
     int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off);
+    void user_flushed();
     int flush_cache();
     void shutdown_cache();
     void invalidate_cache();
index 0afeb0a3ca1db8313e88ac003384fe073004e7c7..24a3444a859468e793d260d50652eb81ebb4a96e 100644 (file)
@@ -2472,6 +2472,7 @@ reprotect_and_return_err:
     if (r < 0)
       return r;
 
+    ictx->user_flushed();
     return _flush(ictx);
   }