]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Implement asynchronous scrub sleep
authorBrad Hubbard <bhubbard@redhat.com>
Mon, 24 Apr 2017 04:10:47 +0000 (14:10 +1000)
committerBrad Hubbard <bhubbard@redhat.com>
Tue, 25 Jul 2017 06:00:28 +0000 (16:00 +1000)
Rather than blocking the main op queue just do an async sleep.

Fixes: http://tracker.ceph.com/issues/19497
Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
(cherry picked from commit 7af3e86c2e4992db35637864b83832535c94d0e6)

src/osd/PG.cc
src/osd/PG.h

index a29576aa5cea1f1f67653d43dfb7b7094615abdf..bf71f8e1a80a0762697d49cee1b0fd069b9cf546 100644 (file)
@@ -254,7 +254,9 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   do_sort_bitwise(false),
-  last_epoch(0)
+  last_epoch(0),
+  scrub_sleep_lock("PG::scrub_sleep_lock"),
+  scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */)
 {
 #ifdef PG_DEBUG_REFS
   osd->add_pgid(p, this);
@@ -264,6 +266,8 @@ PG::PG(OSDService *o, OSDMapRef curmap,
 
 PG::~PG()
 {
+  Mutex::Locker l(scrub_sleep_lock);
+  scrub_sleep_timer.shutdown();
 #ifdef PG_DEBUG_REFS
   osd->remove_pgid(info.pgid, this);
 #endif
@@ -2816,6 +2820,8 @@ void PG::init(
   dirty_info = true;
   dirty_big_info = true;
   write_if_dirty(*t);
+
+  scrub_sleep_timer.init();
 }
 
 #pragma GCC diagnostic ignored "-Wpragmas"
@@ -4065,22 +4071,34 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
 {
   if (g_conf->osd_scrub_sleep > 0 &&
       (scrubber.state == PG::Scrubber::NEW_CHUNK ||
-       scrubber.state == PG::Scrubber::INACTIVE)) {
+       scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) {
+    ceph_assert(!scrubber.sleeping);
     dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
-    unlock();
-    utime_t t;
-    t.set_from_double(g_conf->osd_scrub_sleep);
-    handle.suspend_tp_timeout();
-    t.sleep();
-    handle.reset_tp_timeout();
-    lock();
-    dout(20) << __func__ << " slept for " << t << dendl;
+    // Do an async sleep so we don't block the op queue
+    auto scrub_requeue_callback = new FunctionContext([this](int r) {
+      lock();
+      scrubber.sleeping = false;
+      scrubber.needs_sleep = false;
+      dout(20) << __func__ << " slept for "
+               << ceph_clock_now() - scrubber.sleep_start
+               << ", re-queuing scrub" << dendl;
+      scrub_queued = false;
+      requeue_scrub();
+      scrubber.sleep_start = utime_t();
+      unlock();
+    });
+    Mutex::Locker l(scrub_sleep_lock);
+    scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback);
+    scrubber.sleeping = true;
+    scrubber.sleep_start = ceph_clock_now();
+    return;
   }
   if (pg_has_reset_since(queued)) {
     return;
   }
   assert(scrub_queued);
   scrub_queued = false;
+  scrubber.needs_sleep = true;
 
   if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) {
     dout(10) << "scrub -- not primary or active or not clean" << dendl;
index 76327121b8d971d49128821aeaf308967f5840d4..862a98ec988cd3e1e24ae3151578cbd0c14670af 100644 (file)
@@ -34,6 +34,7 @@
 #include "osd_types.h"
 #include "include/xlist.h"
 #include "SnapMapper.h"
+#include "common/Timer.h"
 
 #include "PGLog.h"
 #include "OSDMap.h"
@@ -1101,6 +1102,11 @@ public:
     OpRequestRef active_rep_scrub;
     utime_t scrub_reg_stamp;  // stamp we registered for
 
+    // For async sleep
+    bool sleeping = false;
+    bool needs_sleep = true;
+    utime_t sleep_start;
+
     // flags to indicate explicitly requested scrubs (by admin)
     bool must_scrub, must_deep_scrub, must_repair;
 
@@ -1219,6 +1225,9 @@ public:
       authoritative.clear();
       num_digest_updates_pending = 0;
       cleaned_meta_map = ScrubMap();
+      sleeping = false;
+      needs_sleep = true;
+      sleep_start = utime_t();
     }
 
     void create_results(const hobject_t& obj);
@@ -2079,6 +2088,9 @@ public:
   bool do_sort_bitwise;
   epoch_t last_epoch;
 
+  Mutex scrub_sleep_lock;
+  SafeTimer scrub_sleep_timer;
+
  public:
   const spg_t&      get_pgid() const { return pg_id; }