From: Brad Hubbard Date: Mon, 22 May 2017 03:21:25 +0000 (+1000) Subject: osd: Move scrub sleep timer to osdservice X-Git-Tag: ses5-milestone6~9^2~5^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f110a82437df79dc20207d296e8229fc0e9ce18b;p=ceph.git osd: Move scrub sleep timer to osdservice PR 14886 erroneously creates a scrub sleep timer for every pg resulting in a proliferation of threads. Move the timer to the osd service so there can be only one. Fixes: http://tracker.ceph.com/issues/19986 Signed-off-by: Brad Hubbard --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 87177a3d7596..c644afb0b806 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -265,6 +265,9 @@ OSDService::OSDService(OSD *osd) : snap_sleep_lock("OSDService::snap_sleep_lock"), snap_sleep_timer( osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */), + scrub_sleep_lock("OSDService::scrub_sleep_lock"), + scrub_sleep_timer( + osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */), snap_reserver(&reserver_finisher, cct->_conf->osd_max_trimming_pgs), recovery_lock("OSDService::recovery_lock"), @@ -503,6 +506,11 @@ void OSDService::shutdown() snap_sleep_timer.shutdown(); } + { + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.shutdown(); + } + osdmap = OSDMapRef(); next_osdmap = OSDMapRef(); } @@ -519,6 +527,7 @@ void OSDService::init() watch_timer.init(); agent_timer.init(); snap_sleep_timer.init(); + scrub_sleep_timer.init(); agent_thread.create("osd_srv_agent"); @@ -3644,6 +3653,11 @@ PG *OSD::_lookup_lock_pg(spg_t pgid) return pg; } +PG *OSD::lookup_lock_pg(spg_t pgid) +{ + return _lookup_lock_pg(pgid); +} + PG *OSD::_lookup_lock_pg_with_map_lock_held(spg_t pgid) { assert(pg_map.count(pgid)); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index cad51ed92643..f78ca7a636d6 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -954,6 +954,9 @@ public: Mutex snap_sleep_lock; SafeTimer snap_sleep_timer; + Mutex scrub_sleep_lock; + SafeTimer scrub_sleep_timer; + AsyncReserver snap_reserver; void queue_for_snap_trim(PG *pg); @@ -2033,6 +2036,11 @@ protected: PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid); PG *_lookup_lock_pg(spg_t pgid); + +public: + PG *lookup_lock_pg(spg_t pgid); + +protected: PG *_open_lock_pg(OSDMapRef createmap, spg_t pg, bool no_lockdep_check=false); enum res_result { diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fe248331e7c6..414a603efa77 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -326,9 +326,7 @@ PG::PG(OSDService *o, OSDMapRef curmap, peer_features(CEPH_FEATURES_SUPPORTED_DEFAULT), acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), - last_epoch(0), - scrub_sleep_lock("PG::scrub_sleep_lock"), - scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */) + last_epoch(0) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); @@ -344,8 +342,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, PG::~PG() { pgstate_history.set_pg_in_destructor(); - Mutex::Locker l(scrub_sleep_lock); - scrub_sleep_timer.shutdown(); #ifdef PG_DEBUG_REFS osd->remove_pgid(info.pgid, this); #endif @@ -2799,8 +2795,6 @@ void PG::init( dirty_info = true; dirty_big_info = true; write_if_dirty(*t); - - scrub_sleep_timer.init(); } #pragma GCC diagnostic ignored "-Wpragmas" @@ -4207,24 +4201,39 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle) { if (cct->_conf->osd_scrub_sleep > 0 && (scrubber.state == PG::Scrubber::NEW_CHUNK || - scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) { + scrubber.state == PG::Scrubber::INACTIVE) && + scrubber.needs_sleep) { ceph_assert(!scrubber.sleeping); dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl; + // Do an async sleep so we don't block the op queue - auto scrub_requeue_callback = new FunctionContext([this](int r) { - lock(); - scrubber.sleeping = false; - scrubber.needs_sleep = false; - dout(20) << __func__ << " slept for " - << ceph_clock_now() - scrubber.sleep_start - << ", re-queuing scrub" << dendl; - scrub_queued = false; - requeue_scrub(); - scrubber.sleep_start = utime_t(); - unlock(); - }); - Mutex::Locker l(scrub_sleep_lock); - scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback); + OSDService *osds = osd; + spg_t pgid = get_pgid(); + int state = scrubber.state; + auto scrub_requeue_callback = + new FunctionContext([osds, pgid, state](int r) { + PG *pg = osds->osd->lookup_lock_pg(pgid); + if (pg == nullptr) { + lgeneric_dout(osds->osd->cct, 20) + << "scrub_requeue_callback: Could not find " + << "PG " << pgid << " can't complete scrub requeue after sleep" + << dendl; + return; + } + pg->scrubber.sleeping = false; + pg->scrubber.needs_sleep = false; + lgeneric_dout(pg->cct, 20) + << "scrub_requeue_callback: slept for " + << ceph_clock_now() - pg->scrubber.sleep_start + << ", re-queuing scrub with state " << state << dendl; + pg->scrub_queued = false; + pg->requeue_scrub(); + pg->scrubber.sleep_start = utime_t(); + pg->unlock(); + }); + Mutex::Locker l(osd->scrub_sleep_lock); + osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, + scrub_requeue_callback); scrubber.sleeping = true; scrubber.sleep_start = ceph_clock_now(); return; diff --git a/src/osd/PG.h b/src/osd/PG.h index 0f9282a44a44..c452fbb0c953 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -942,7 +942,6 @@ protected: public: void clear_primary_state(); - public: bool is_actingbackfill(pg_shard_t osd) const { return actingbackfill.count(osd); } @@ -2197,9 +2196,6 @@ public: epoch_t last_epoch; - Mutex scrub_sleep_lock; - SafeTimer scrub_sleep_timer; - public: const spg_t& get_pgid() const { return pg_id; }