From: Josh Durgin Date: Sat, 16 Mar 2013 00:28:13 +0000 (-0700) Subject: librbd: optionally wait for a flush before enabling writeback X-Git-Tag: v0.60~34 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1597b3e3a1d776b56e05c57d7c3de396f4f2b5b2;p=ceph.git librbd: optionally wait for a flush before enabling writeback Older guests may not send flushes properly (i.e. never), so if this is enabled, rbd_cache=true is safe for them transparently. Disable by default, since it will unnecessarily slow down newer guest boot, and prevent writeback caching for things that don't need to send flushes, like the command line tool. Refs: #3817 Signed-off-by: Josh Durgin Reviewed-by: Sage Weil --- diff --git a/doc/rbd/rbd-config-ref.rst b/doc/rbd/rbd-config-ref.rst index dde3dedf257..f57477d1fce 100644 --- a/doc/rbd/rbd-config-ref.rst +++ b/doc/rbd/rbd-config-ref.rst @@ -86,4 +86,13 @@ section of your configuration file. The settings include: :Required: No :Default: ``1.0`` -.. _Block Device: ../../rbd/rbd/ \ No newline at end of file +.. versionadded:: 0.60 + +``rbd cache writethrough until flush`` + +:Description: Start out in write-through mode, and switch to write-back after the first flush request is received. Enabling this is a conservative but safe setting in case VMs running on rbd are too old to send flushes, like the virtio driver in Linux before 2.6.32. +:Type: Boolean +:Required: No +:Default: ``false`` + +.. _Block Device: ../../rbd/rbd/ diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 9e832be2db8..52880f59feb 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -497,6 +497,7 @@ OPTION(journal_replay_from, OPT_INT, 0) OPTION(journal_zero_on_create, OPT_BOOL, false) OPTION(journal_ignore_corruption, OPT_BOOL, false) // assume journal is not corrupt OPTION(rbd_cache, OPT_BOOL, false) // whether to enable caching (writeback unless rbd_cache_max_dirty is 0) +OPTION(rbd_cache_writethrough_until_flush, OPT_BOOL, false) // whether to make writeback caching writethrough until flush is called, to be sure the user of librbd will send flushs so that writeback is safe OPTION(rbd_cache_size, OPT_LONGLONG, 32<<20) // cache size in bytes OPTION(rbd_cache_max_dirty, OPT_LONGLONG, 24<<20) // dirty limit in bytes - set to 0 for write-through caching OPTION(rbd_cache_target_dirty, OPT_LONGLONG, 16<<20) // target dirty limit in bytes diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 7342e2b4bb9..add9a98ac43 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -34,6 +34,7 @@ namespace librbd { snap_id(CEPH_NOSNAP), snap_exists(true), read_only(ro), + flush_encountered(false), exclusive_locked(false), name(image_name), wctx(NULL), @@ -68,13 +69,25 @@ namespace librbd { if (cct->_conf->rbd_cache) { Mutex::Locker l(cache_lock); - ldout(cct, 20) << "enabling writeback caching..." << dendl; + ldout(cct, 20) << "enabling caching..." << dendl; writeback_handler = new LibrbdWriteback(this, cache_lock); + + uint64_t init_max_dirty = cct->_conf->rbd_cache_max_dirty; + if (cct->_conf->rbd_cache_writethrough_until_flush) + init_max_dirty = 0; + ldout(cct, 20) << "Initial cache settings:" + << " size=" << cct->_conf->rbd_cache_size + << " num_objects=" << 10 + << " max_dirty=" << init_max_dirty + << " target_dirty=" << cct->_conf->rbd_cache_target_dirty + << " max_dirty_age=" + << cct->_conf->rbd_cache_max_dirty_age << dendl; + object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock, NULL, NULL, cct->_conf->rbd_cache_size, 10, /* reset this in init */ - cct->_conf->rbd_cache_max_dirty, + init_max_dirty, cct->_conf->rbd_cache_target_dirty, cct->_conf->rbd_cache_max_dirty_age); object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0); @@ -489,6 +502,25 @@ namespace librbd { return r; } + void ImageCtx::user_flushed() { + if (object_cacher && cct->_conf->rbd_cache_writethrough_until_flush) { + md_lock.get_read(); + bool flushed_before = flush_encountered; + md_lock.put_read(); + + uint64_t max_dirty = cct->_conf->rbd_cache_max_dirty; + if (!flushed_before && max_dirty > 0) { + md_lock.get_write(); + flush_encountered = true; + md_lock.put_write(); + + ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl; + Mutex::Locker l(cache_lock); + object_cacher->set_max_dirty(max_dirty); + } + } + } + int ImageCtx::flush_cache() { int r = 0; Mutex mylock("librbd::ImageCtx::flush_cache"); diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 42910c86364..9bb1f936271 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -43,6 +43,7 @@ namespace librbd { bool snap_exists; // false if our snap_id was deleted // whether the image was opened read-only. cannot be changed after opening bool read_only; + bool flush_encountered; std::map lockers; @@ -129,6 +130,7 @@ namespace librbd { uint64_t off, Context *onfinish); void write_to_cache(object_t o, bufferlist& bl, size_t len, uint64_t off); int read_from_cache(object_t o, bufferlist *bl, size_t len, uint64_t off); + void user_flushed(); int flush_cache(); void shutdown_cache(); void invalidate_cache(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 0afeb0a3ca1..24a3444a859 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2472,6 +2472,7 @@ reprotect_and_return_err: if (r < 0) return r; + ictx->user_flushed(); return _flush(ictx); }