]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Move scrub sleep timer to osdservice 15217/head
authorBrad Hubbard <bhubbard@redhat.com>
Mon, 22 May 2017 03:21:25 +0000 (13:21 +1000)
committerBrad Hubbard <bhubbard@redhat.com>
Tue, 30 May 2017 23:29:49 +0000 (09:29 +1000)
PR 14886 erroneously creates a scrub sleep timer for every pg resulting
in a proliferation of threads. Move the timer to the osd service so
there can be only one.

Fixes: http://tracker.ceph.com/issues/19986
Signed-off-by: Brad Hubbard <bhubbard@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h

index 87177a3d7596074ef0fdd744c1af570025310d17..c644afb0b80670acf5cc4af2ad408714adcc950c 100644 (file)
@@ -265,6 +265,9 @@ OSDService::OSDService(OSD *osd) :
   snap_sleep_lock("OSDService::snap_sleep_lock"),
   snap_sleep_timer(
     osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
+  scrub_sleep_lock("OSDService::scrub_sleep_lock"),
+  scrub_sleep_timer(
+    osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */),
   snap_reserver(&reserver_finisher,
                cct->_conf->osd_max_trimming_pgs),
   recovery_lock("OSDService::recovery_lock"),
@@ -503,6 +506,11 @@ void OSDService::shutdown()
     snap_sleep_timer.shutdown();
   }
 
+  {
+    Mutex::Locker l(scrub_sleep_lock);
+    scrub_sleep_timer.shutdown();
+  }
+
   osdmap = OSDMapRef();
   next_osdmap = OSDMapRef();
 }
@@ -519,6 +527,7 @@ void OSDService::init()
   watch_timer.init();
   agent_timer.init();
   snap_sleep_timer.init();
+  scrub_sleep_timer.init();
 
   agent_thread.create("osd_srv_agent");
 
@@ -3644,6 +3653,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid)
   return pg;
 }
 
+PG *OSD::lookup_lock_pg(spg_t pgid)
+{
+  return _lookup_lock_pg(pgid);
+}
+
 PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid)
 {
   assert(pg_map.count(pgid));
index cad51ed9264357feab9b6804030baa0ea594bb21..f78ca7a636d62531566561846d933034d940d7b2 100644 (file)
@@ -954,6 +954,9 @@ public:
   Mutex snap_sleep_lock;
   SafeTimer snap_sleep_timer;
 
+  Mutex scrub_sleep_lock;
+  SafeTimer scrub_sleep_timer;
+
   AsyncReserver<spg_t> snap_reserver;
   void queue_for_snap_trim(PG *pg);
 
@@ -2033,6 +2036,11 @@ protected:
 
   PG   *_lookup_lock_pg_with_map_lock_held(spg_t pgid);
   PG   *_lookup_lock_pg(spg_t pgid);
+
+public:
+  PG   *lookup_lock_pg(spg_t pgid);
+
+protected:
   PG   *_open_lock_pg(OSDMapRef createmap,
                      spg_t pg, bool no_lockdep_check=false);
   enum res_result {
index fe248331e7c6139176caa903ad2fed37d649df65..414a603efa77e5ff920dbc0f20d970f638b1a5d8 100644 (file)
@@ -326,9 +326,7 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
   upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT),
-  last_epoch(0),
-  scrub_sleep_lock("PG::scrub_sleep_lock"),
-  scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */)
+  last_epoch(0)
 {
 #ifdef PG_DEBUG_REFS
   osd->add_pgid(p, this);
@@ -344,8 +342,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,
 PG::~PG()
 {
   pgstate_history.set_pg_in_destructor();
-  Mutex::Locker l(scrub_sleep_lock);
-  scrub_sleep_timer.shutdown();
 #ifdef PG_DEBUG_REFS
   osd->remove_pgid(info.pgid, this);
 #endif
@@ -2799,8 +2795,6 @@ void PG::init(
   dirty_info = true;
   dirty_big_info = true;
   write_if_dirty(*t);
-
-  scrub_sleep_timer.init();
 }
 
 #pragma GCC diagnostic ignored "-Wpragmas"
@@ -4207,24 +4201,39 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle)
 {
   if (cct->_conf->osd_scrub_sleep > 0 &&
       (scrubber.state == PG::Scrubber::NEW_CHUNK ||
-       scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) {
+       scrubber.state == PG::Scrubber::INACTIVE) &&
+       scrubber.needs_sleep) {
     ceph_assert(!scrubber.sleeping);
     dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl;
+
     // Do an async sleep so we don't block the op queue
-    auto scrub_requeue_callback = new FunctionContext([this](int r) {
-      lock();
-      scrubber.sleeping = false;
-      scrubber.needs_sleep = false;
-      dout(20) << __func__ << " slept for "
-               << ceph_clock_now() - scrubber.sleep_start
-               << ", re-queuing scrub" << dendl;
-      scrub_queued = false;
-      requeue_scrub();
-      scrubber.sleep_start = utime_t();
-      unlock();
-    });
-    Mutex::Locker l(scrub_sleep_lock);
-    scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback);
+    OSDService *osds = osd;
+    spg_t pgid = get_pgid();
+    int state = scrubber.state;
+    auto scrub_requeue_callback =
+        new FunctionContext([osds, pgid, state](int r) {
+          PG *pg = osds->osd->lookup_lock_pg(pgid);
+          if (pg == nullptr) {
+            lgeneric_dout(osds->osd->cct, 20)
+                << "scrub_requeue_callback: Could not find "
+                << "PG " << pgid << " can't complete scrub requeue after sleep"
+                << dendl;
+            return;
+          }
+          pg->scrubber.sleeping = false;
+          pg->scrubber.needs_sleep = false;
+          lgeneric_dout(pg->cct, 20)
+              << "scrub_requeue_callback: slept for "
+              << ceph_clock_now() - pg->scrubber.sleep_start
+              << ", re-queuing scrub with state " << state << dendl;
+          pg->scrub_queued = false;
+          pg->requeue_scrub();
+          pg->scrubber.sleep_start = utime_t();
+          pg->unlock();
+        });
+    Mutex::Locker l(osd->scrub_sleep_lock);
+    osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep,
+                                           scrub_requeue_callback);
     scrubber.sleeping = true;
     scrubber.sleep_start = ceph_clock_now();
     return;
index 0f9282a44a44588c0360ad2dded1fabad3ed1f7c..c452fbb0c95398a3b43c6b7bab65ed80dd27aa65 100644 (file)
@@ -942,7 +942,6 @@ protected:
 public:
   void clear_primary_state();
 
- public:
   bool is_actingbackfill(pg_shard_t osd) const {
     return actingbackfill.count(osd);
   }
@@ -2197,9 +2196,6 @@ public:
 
   epoch_t last_epoch;
 
-  Mutex scrub_sleep_lock;
-  SafeTimer scrub_sleep_timer;
-
  public:
   const spg_t&      get_pgid() const { return pg_id; }