From: John Spray <john.spray@redhat.com>
Date: Wed, 1 Mar 2017 12:33:05 +0000 (+0000)
Subject: mds: flush PQ even when not consuming
X-Git-Tag: v12.0.1~140^2~3
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5df00222fe7069cf30ae0544d47c450b973b2f6a;p=ceph.git

mds: flush PQ even when not consuming

In normal operation we generate flushes from
_consume when we read from the journaler.  However,
we should also have a fallback flush mechanism for
situations where can_consume() is false fo a long time.

This comes up in testing when we set throttle to zero to
prevent progress, but would also come up in real life if
we were busy purging a few very large files, or if purging
was stuck due to bad PGs in the data pool -- we don't want
that to stop us completing appends to the PQ.

Signed-off-by: John Spray <john.spray@redhat.com>
---

diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index 8e7e02d15c31..0292749904a7 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -601,6 +601,8 @@ OPTION(mds_max_purge_ops, OPT_U32, 8192)
 // Maximum number of concurrent RADOS ops to issue in purging, scaled by PG count
 OPTION(mds_max_purge_ops_per_pg, OPT_FLOAT, 0.5)
 
+OPTION(mds_purge_queue_busy_flush_period, OPT_FLOAT, 1.0)
+
 OPTION(mds_root_ino_uid, OPT_INT, 0) // The UID of / on new filesystems
 OPTION(mds_root_ino_gid, OPT_INT, 0) // The GID of / on new filesystems
 
diff --git a/src/mds/PurgeQueue.cc b/src/mds/PurgeQueue.cc
index b8a59798ba07..af2ca3a2ad65 100644
--- a/src/mds/PurgeQueue.cc
+++ b/src/mds/PurgeQueue.cc
@@ -78,7 +78,8 @@ PurgeQueue::PurgeQueue(
     ops_in_flight(0),
     max_purge_ops(0),
     drain_initial(0),
-    draining(false)
+    draining(false),
+    delayed_flush(nullptr)
 {
   assert(cct != nullptr);
   assert(on_error != nullptr);
@@ -175,11 +176,25 @@ void PurgeQueue::push(const PurgeItem &pi, Context *completion)
   journaler.append_entry(bl);
   journaler.wait_for_flush(completion);
 
-  // It is not necessary to explicitly flush here, because the reader
-  // will get flushes generated inside Journaler::is_readable
-
   // Maybe go ahead and do something with it right away
-  _consume();
+  bool could_consume = _consume();
+  if (!could_consume) {
+    // Usually, it is not necessary to explicitly flush here, because the reader
+    // will get flushes generated inside Journaler::is_readable.  However,
+    // if we remain in a can_consume()==false state for a long period then
+    // we should flush in order to allow MDCache to drop its strays rather
+    // than having them wait for purgequeue to progress.
+    if (!delayed_flush) {
+      delayed_flush = new FunctionContext([this](int r){
+            delayed_flush = nullptr;
+            journaler.flush();
+          });
+
+      timer.add_event_after(
+          g_conf->mds_purge_queue_busy_flush_period,
+          delayed_flush);
+    }
+  }
 }
 
 uint32_t PurgeQueue::_calculate_ops(const PurgeItem &item) const
@@ -241,11 +256,22 @@ bool PurgeQueue::can_consume()
   }
 }
 
-void PurgeQueue::_consume()
+bool PurgeQueue::_consume()
 {
   assert(lock.is_locked_by_me());
 
+  bool could_consume = false;
   while(can_consume()) {
+    could_consume = true;
+
+    if (delayed_flush) {
+      // We are now going to read from the journal, so any proactive
+      // flush is no longer necessary.  This is not functionally necessary
+      // but it can avoid generating extra fragmented flush IOs.
+      timer.cancel_event(delayed_flush);
+      delayed_flush = nullptr;
+    }
+
     if (!journaler.is_readable()) {
       dout(10) << " not readable right now" << dendl;
       // Because we are the writer and the reader of the journal
@@ -259,7 +285,7 @@ void PurgeQueue::_consume()
         }));
       }
 
-      return;
+      return could_consume;
     }
 
     // The journaler is readable: consume an entry
@@ -283,6 +309,8 @@ void PurgeQueue::_consume()
   }
 
   dout(10) << " cannot consume right now" << dendl;
+
+  return could_consume;
 }
 
 void PurgeQueue::_execute_item(
diff --git a/src/mds/PurgeQueue.h b/src/mds/PurgeQueue.h
index e572cb4fab8a..a0b034613784 100644
--- a/src/mds/PurgeQueue.h
+++ b/src/mds/PurgeQueue.h
@@ -112,7 +112,14 @@ protected:
   // Has drain() ever been called on this instance?
   bool draining;
 
-  void _consume();
+  /**
+   * @return true if we were in a position to try and consume something:
+   *         does not mean we necessarily did.
+   */
+  bool _consume();
+
+  // Do we currently have a flush timer event waiting?
+  Context *delayed_flush;
 
   void _execute_item(
       const PurgeItem &item,