]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Move scrub sleep timer to osdservice 15529/head
authorBrad Hubbard <bhubbard@redhat.com>
Mon, 22 May 2017 03:21:25 +0000 (13:21 +1000)
committerBrad Hubbard <bhubbard@redhat.com>
Wed, 7 Jun 2017 00:58:57 +0000 (10:58 +1000)
PR 14886 erroneously creates a scrub sleep timer for every pg resulting
in a proliferation of threads. Move the timer to the osd service so
there can be only one.

Fixes: http://tracker.ceph.com/issues/19986
Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
(cherry picked from commit f110a82437df79dc20207d296e8229fc0e9ce18b)

Conflicts:
        src/osd/PG.cc - ceph_clock_now requires a CephContext argmunent
        in Jewel

src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h

index c1794ea581dd69a50626e2745ed310c4f82e0deb..f5cfda3b686a50cb1c3df1a0907bda30754a8003 100644 (file)
@@ -257,6 +257,9 @@ OSDService::OSDService(OSD *osd) :
   snap_sleep_lock("OSDService::snap_sleep_lock"),
   snap_sleep_timer(
     osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
+  scrub_sleep_lock("OSDService::scrub_sleep_lock"),
+  scrub_sleep_timer(
+    osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */),
   snap_reserver(&reserver_finisher,
                cct->_conf->osd_max_trimming_pgs),
   map_cache_lock("OSDService::map_lock"),
@@ -493,6 +496,11 @@ void OSDService::shutdown()
     snap_sleep_timer.shutdown();
   }
 
+  {
+    Mutex::Locker l(scrub_sleep_lock);
+    scrub_sleep_timer.shutdown();
+  }
+
   osdmap = OSDMapRef();
   next_osdmap = OSDMapRef();
 }
@@ -505,6 +513,7 @@ void OSDService::init()
   watch_timer.init();
   agent_timer.init();
   snap_sleep_timer.init();
+  scrub_sleep_timer.init();
 
   agent_thread.create("osd_srv_agent");
 }
@@ -3116,6 +3125,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid)
   return pg;
 }
 
+PG *OSD::lookup_lock_pg(spg_t pgid)
+{
+  return _lookup_lock_pg(pgid);
+}
+
 
 PG *OSD::_lookup_pg(spg_t pgid)
 {
index d5e0d3898e07cdc557ddab032f204500cfc27bcf..0d1b2268557dec52e7ecdbe918eae6499cd3e43f 100644 (file)
@@ -864,6 +864,10 @@ public:
 
   Mutex snap_sleep_lock;
   SafeTimer snap_sleep_timer;
+
+  Mutex scrub_sleep_lock;
+  SafeTimer scrub_sleep_timer;
+
   AsyncReserver<spg_t> snap_reserver;
 
   void queue_for_scrub(PG *pg) {
@@ -1954,6 +1958,10 @@ protected:
   bool  _have_pg(spg_t pgid);
   PG   *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
   PG   *_lookup_lock_pg(spg_t pgid);
+public:
+  PG   *lookup_lock_pg(spg_t pgid);
+
+protected:
   PG   *_lookup_pg(spg_t pgid);
   PG   *_open_lock_pg(OSDMapRef createmap,
                      spg_t pg, bool no_lockdep_check=false);
index d79d45a098cec03fa217978903a9b479ab644ae2..1cf86a6299b0686c8c216864e2583243670e73e7 100644 (file)
@@ -249,9 +249,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   do_sort_bitwise(false),
-  last_epoch(0),
-  scrub_sleep_lock("PG::scrub_sleep_lock"),
-  scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */)
+  last_epoch(0)
 {
 #ifdef PG_DEBUG_REFS
   osd->add_pgid(p, this);
@@ -260,8 +258,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,
 
 PG::~PG()
 {
-  Mutex::Locker l(scrub_sleep_lock);
-  scrub_sleep_timer.shutdown();
 #ifdef PG_DEBUG_REFS
   osd->remove_pgid(info.pgid, this);
 #endif
@@ -2798,8 +2794,6 @@ void PG::init(
   dirty_info = true;
   dirty_big_info = true;
   write_if_dirty(*t);
-
-  scrub_sleep_timer.init();
 }
 
 #pragma GCC diagnostic ignored "-Wpragmas"
@@ -4035,26 +4029,41 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
 {
   if (g_conf->osd_scrub_sleep > 0 &&
       (scrubber.state == PG::Scrubber::NEW_CHUNK ||
-       scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) {
+       scrubber.state == PG::Scrubber::INACTIVE) &&
+       scrubber.needs_sleep) {
     ceph_assert(!scrubber.sleeping);
     dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
+
     // Do an async sleep so we don't block the op queue
-    auto scrub_requeue_callback = new FunctionContext([this](int r) {
-      lock();
-      scrubber.sleeping = false;
-      scrubber.needs_sleep = false;
-      dout(20) << __func__ << " slept for "
-               << ceph_clock_now() - scrubber.sleep_start
-               << ", re-queuing scrub" << dendl;
-      scrub_queued = false;
-      requeue_scrub();
-      scrubber.sleep_start = utime_t();
-      unlock();
-    });
-    Mutex::Locker l(scrub_sleep_lock);
-    scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback);
+    OSDService *osds = osd;
+    spg_t pgid = get_pgid();
+    int state = scrubber.state;
+    auto scrub_requeue_callback =
+        new FunctionContext([osds, pgid, state](int r) {
+          PG *pg = osds->osd->lookup_lock_pg(pgid);
+          if (pg == nullptr) {
+            lgeneric_dout(osds->osd->cct, 20)
+                << "scrub_requeue_callback: Could not find "
+                << "PG " << pgid << " can't complete scrub requeue after sleep"
+                << dendl;
+            return;
+          }
+          pg->scrubber.sleeping = false;
+          pg->scrubber.needs_sleep = false;
+          lgeneric_dout(pg->cct, 20)
+              << "scrub_requeue_callback: slept for "
+              << ceph_clock_now(pg->cct) - pg->scrubber.sleep_start
+              << ", re-queuing scrub with state " << state << dendl;
+          pg->scrub_queued = false;
+          pg->requeue_scrub();
+          pg->scrubber.sleep_start = utime_t();
+          pg->unlock();
+        });
+    Mutex::Locker l(osd->scrub_sleep_lock);
+    osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
+                                           scrub_requeue_callback);
     scrubber.sleeping = true;
-    scrubber.sleep_start = ceph_clock_now();
+    scrubber.sleep_start = ceph_clock_now(cct);
     return;
   }
   if (pg_has_reset_since(queued)) {
index 32b409c325db736af3328a29d3876c2cf4849572..3ed3c5238e5e2395e86f3f8c2f056d3797f77dc4 100644 (file)
@@ -873,7 +873,6 @@ protected:
 public:
   void clear_primary_state();
 
- public:
   bool is_actingbackfill(pg_shard_t osd) const {
     return actingbackfill.count(osd);
   }
@@ -2150,9 +2149,6 @@ public:
   bool do_sort_bitwise;
   epoch_t last_epoch;
 
-  Mutex scrub_sleep_lock;
-  SafeTimer scrub_sleep_timer;
-
  public:
   const spg_t&      get_pgid() const { return pg_id; }