From: John Spray Date: Wed, 1 Mar 2017 12:33:05 +0000 (+0000) Subject: mds: flush PQ even when not consuming X-Git-Tag: v12.0.1~140^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5df00222fe7069cf30ae0544d47c450b973b2f6a;p=ceph.git mds: flush PQ even when not consuming In normal operation we generate flushes from _consume when we read from the journaler. However, we should also have a fallback flush mechanism for situations where can_consume() is false fo a long time. This comes up in testing when we set throttle to zero to prevent progress, but would also come up in real life if we were busy purging a few very large files, or if purging was stuck due to bad PGs in the data pool -- we don't want that to stop us completing appends to the PQ. Signed-off-by: John Spray --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 8e7e02d15c31..0292749904a7 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -601,6 +601,8 @@ OPTION(mds_max_purge_ops, OPT_U32, 8192) // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5) +OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0) + OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems diff --git a/src/mds/PurgeQueue.cc b/src/mds/PurgeQueue.cc index b8a59798ba07..af2ca3a2ad65 100644 --- a/src/mds/PurgeQueue.cc +++ b/src/mds/PurgeQueue.cc @@ -78,7 +78,8 @@ PurgeQueue::PurgeQueue( ops_in_flight(0), max_purge_ops(0), drain_initial(0), - draining(false) + draining(false), + delayed_flush(nullptr) { assert(cct != nullptr); assert(on_error != nullptr); @@ -175,11 +176,25 @@ void PurgeQueue::push(const PurgeItem &pi, Context *completion) journaler.append_entry(bl); journaler.wait_for_flush(completion); - // It is not necessary to explicitly flush here, because the reader - // will get flushes generated inside Journaler::is_readable - // Maybe go ahead and do something with it right away - _consume(); + bool could_consume = _consume(); + if (!could_consume) { + // Usually, it is not necessary to explicitly flush here, because the reader + // will get flushes generated inside Journaler::is_readable. However, + // if we remain in a can_consume()==false state for a long period then + // we should flush in order to allow MDCache to drop its strays rather + // than having them wait for purgequeue to progress. + if (!delayed_flush) { + delayed_flush = new FunctionContext([this](int r){ + delayed_flush = nullptr; + journaler.flush(); + }); + + timer.add_event_after( + g_conf->mds_purge_queue_busy_flush_period, + delayed_flush); + } + } } uint32_t PurgeQueue::_calculate_ops(const PurgeItem &item) const @@ -241,11 +256,22 @@ bool PurgeQueue::can_consume() } } -void PurgeQueue::_consume() +bool PurgeQueue::_consume() { assert(lock.is_locked_by_me()); + bool could_consume = false; while(can_consume()) { + could_consume = true; + + if (delayed_flush) { + // We are now going to read from the journal, so any proactive + // flush is no longer necessary. This is not functionally necessary + // but it can avoid generating extra fragmented flush IOs. + timer.cancel_event(delayed_flush); + delayed_flush = nullptr; + } + if (!journaler.is_readable()) { dout(10) << " not readable right now" << dendl; // Because we are the writer and the reader of the journal @@ -259,7 +285,7 @@ void PurgeQueue::_consume() })); } - return; + return could_consume; } // The journaler is readable: consume an entry @@ -283,6 +309,8 @@ void PurgeQueue::_consume() } dout(10) << " cannot consume right now" << dendl; + + return could_consume; } void PurgeQueue::_execute_item( diff --git a/src/mds/PurgeQueue.h b/src/mds/PurgeQueue.h index e572cb4fab8a..a0b034613784 100644 --- a/src/mds/PurgeQueue.h +++ b/src/mds/PurgeQueue.h @@ -112,7 +112,14 @@ protected: // Has drain() ever been called on this instance? bool draining; - void _consume(); + /** + * @return true if we were in a position to try and consume something: + * does not mean we necessarily did. + */ + bool _consume(); + + // Do we currently have a flush timer event waiting? + Context *delayed_flush; void _execute_item( const PurgeItem &item,