From: Josh Durgin Date: Sat, 16 Mar 2013 00:28:13 +0000 (-0700) Subject: librbd: optionally wait for a flush before enabling writeback X-Git-Tag: v0.56.5~12^2~22 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9facdcac7bb8ab92f1ab486eaabee681e6b6519d;p=ceph.git librbd: optionally wait for a flush before enabling writeback Older guests may not send flushes properly (i.e. never), so if this is enabled, rbd_cache=true is safe for them transparently. Disable by default, since it will unnecessarily slow down newer guest boot, and prevent writeback caching for things that don't need to send flushes, like the command line tool. Refs: #3817 Signed-off-by: Josh Durgin Reviewed-by: Sage Weil (cherry picked from commit 1597b3e3a1d776b56e05c57d7c3de396f4f2b5b2) --- diff --git a/doc/rbd/rbd-config-ref.rst b/doc/rbd/rbd-config-ref.rst index dde3dedf257a..f57477d1fce8 100644 --- a/doc/rbd/rbd-config-ref.rst +++ b/doc/rbd/rbd-config-ref.rst @@ -86,4 +86,13 @@ section of your configuration file. The settings include: :Required: No :Default: ``1.0`` -.. _Block Device: ../../rbd/rbd/ \ No newline at end of file +.. versionadded:: 0.60 + +``rbd cache writethrough until flush`` + +:Description: Start out in write-through mode, and switch to write-back after the first flush request is received. Enabling this is a conservative but safe setting in case VMs running on rbd are too old to send flushes, like the virtio driver in Linux before 2.6.32. +:Type: Boolean +:Required: No +:Default: ``false`` + +.. _Block Device: ../../rbd/rbd/ diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d5f1e24cf553..464c2a2cf02c 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -441,6 +441,7 @@ OPTION(journal_align_min_size, OPT_INT, 64 << 10) // align data payloads >= thi OPTION(journal_replay_from, OPT_INT, 0) OPTION(journal_zero_on_create, OPT_BOOL, false) OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0) +OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index db185c2dc0e2..12b81ef0906e 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -34,6 +34,7 @@ namespace librbd { snap_id(CEPH_NOSNAP), snap_exists(true), read_only(ro), + flush_encountered(false), exclusive_locked(false), name(image_name), wctx(NULL), @@ -68,13 +69,25 @@ namespace librbd { if (cct->_conf->rbd_cache) { Mutex::Locker l(cache_lock); - ldout(cct, 20) << "enabling writeback caching..." << dendl; + ldout(cct, 20) << "enabling caching..." << dendl; writeback_handler = new LibrbdWriteback(this, cache_lock); + + uint64_t init_max_dirty = cct->_conf->rbd_cache_max_dirty; + if (cct->_conf->rbd_cache_writethrough_until_flush) + init_max_dirty = 0; + ldout(cct, 20) << "Initial cache settings:" + << " size=" << cct->_conf->rbd_cache_size + << " num_objects=" << 10 + << " max_dirty=" << init_max_dirty + << " target_dirty=" << cct->_conf->rbd_cache_target_dirty + << " max_dirty_age=" + << cct->_conf->rbd_cache_max_dirty_age << dendl; + object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock, NULL, NULL, cct->_conf->rbd_cache_size, 10, /* reset this in init */ - cct->_conf->rbd_cache_max_dirty, + init_max_dirty, cct->_conf->rbd_cache_target_dirty, cct->_conf->rbd_cache_max_dirty_age); object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0); @@ -499,6 +512,25 @@ namespace librbd { return r; } + void ImageCtx::user_flushed() { + if (object_cacher && cct->_conf->rbd_cache_writethrough_until_flush) { + md_lock.get_read(); + bool flushed_before = flush_encountered; + md_lock.put_read(); + + uint64_t max_dirty = cct->_conf->rbd_cache_max_dirty; + if (!flushed_before && max_dirty > 0) { + md_lock.get_write(); + flush_encountered = true; + md_lock.put_write(); + + ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl; + Mutex::Locker l(cache_lock); + object_cacher->set_max_dirty(max_dirty); + } + } + } + int ImageCtx::flush_cache() { int r = 0; Mutex mylock("librbd::ImageCtx::flush_cache"); diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 90d815e69ea6..f185f8a4fc7c 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -43,6 +43,7 @@ namespace librbd { bool snap_exists; // false if our snap_id was deleted // whether the image was opened read-only. cannot be changed after opening bool read_only; + bool flush_encountered; std::map lockers; @@ -130,6 +131,7 @@ namespace librbd { uint64_t off, Context *onfinish); void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off); int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off); + void user_flushed(); int flush_cache(); void shutdown_cache(); void invalidate_cache(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index fb4114e94f39..87bc7a3ee163 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2469,6 +2469,7 @@ reprotect_and_return_err: if (r < 0) return r; + ictx->user_flushed(); return _flush(ictx); }