From: Brad Hubbard Date: Mon, 24 Apr 2017 04:10:47 +0000 (+1000) Subject: osd: Implement asynchronous scrub sleep X-Git-Tag: v11.2.1~12^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=460a820a3b2fbd48c8a7966502b235aae8d5d298;p=ceph.git osd: Implement asynchronous scrub sleep Rather than blocking the main op queue just do an async sleep. Fixes: http://tracker.ceph.com/issues/19497 Signed-off-by: Brad Hubbard (cherry picked from commit 7af3e86c2e4992db35637864b83832535c94d0e6) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index a29576aa5cea1..bf71f8e1a80a0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -254,7 +254,9 @@ PG::PG(OSDService *o, OSDMapRef curmap, acting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), upacting_features(CEPH_FEATURES_SUPPORTED_DEFAULT), do_sort_bitwise(false), - last_epoch(0) + last_epoch(0), + scrub_sleep_lock("PG::scrub_sleep_lock"), + scrub_sleep_timer(o->cct, scrub_sleep_lock, false /* relax locking */) { #ifdef PG_DEBUG_REFS osd->add_pgid(p, this); @@ -264,6 +266,8 @@ PG::PG(OSDService *o, OSDMapRef curmap, PG::~PG() { + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.shutdown(); #ifdef PG_DEBUG_REFS osd->remove_pgid(info.pgid, this); #endif @@ -2816,6 +2820,8 @@ void PG::init( dirty_info = true; dirty_big_info = true; write_if_dirty(*t); + + scrub_sleep_timer.init(); } #pragma GCC diagnostic ignored "-Wpragmas" @@ -4065,22 +4071,34 @@ void PG::scrub(epoch_t queued, ThreadPool::TPHandle &handle) { if (g_conf->osd_scrub_sleep > 0 && (scrubber.state == PG::Scrubber::NEW_CHUNK || - scrubber.state == PG::Scrubber::INACTIVE)) { + scrubber.state == PG::Scrubber::INACTIVE) && scrubber.needs_sleep) { + ceph_assert(!scrubber.sleeping); dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl; - unlock(); - utime_t t; - t.set_from_double(g_conf->osd_scrub_sleep); - handle.suspend_tp_timeout(); - t.sleep(); - handle.reset_tp_timeout(); - lock(); - dout(20) << __func__ << " slept for " << t << dendl; + // Do an async sleep so we don't block the op queue + auto scrub_requeue_callback = new FunctionContext([this](int r) { + lock(); + scrubber.sleeping = false; + scrubber.needs_sleep = false; + dout(20) << __func__ << " slept for " + << ceph_clock_now() - scrubber.sleep_start + << ", re-queuing scrub" << dendl; + scrub_queued = false; + requeue_scrub(); + scrubber.sleep_start = utime_t(); + unlock(); + }); + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, scrub_requeue_callback); + scrubber.sleeping = true; + scrubber.sleep_start = ceph_clock_now(); + return; } if (pg_has_reset_since(queued)) { return; } assert(scrub_queued); scrub_queued = false; + scrubber.needs_sleep = true; if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) { dout(10) << "scrub -- not primary or active or not clean" << dendl; diff --git a/src/osd/PG.h b/src/osd/PG.h index 76327121b8d97..862a98ec988cd 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -34,6 +34,7 @@ #include "osd_types.h" #include "include/xlist.h" #include "SnapMapper.h" +#include "common/Timer.h" #include "PGLog.h" #include "OSDMap.h" @@ -1101,6 +1102,11 @@ public: OpRequestRef active_rep_scrub; utime_t scrub_reg_stamp; // stamp we registered for + // For async sleep + bool sleeping = false; + bool needs_sleep = true; + utime_t sleep_start; + // flags to indicate explicitly requested scrubs (by admin) bool must_scrub, must_deep_scrub, must_repair; @@ -1219,6 +1225,9 @@ public: authoritative.clear(); num_digest_updates_pending = 0; cleaned_meta_map = ScrubMap(); + sleeping = false; + needs_sleep = true; + sleep_start = utime_t(); } void create_results(const hobject_t& obj); @@ -2079,6 +2088,9 @@ public: bool do_sort_bitwise; epoch_t last_epoch; + Mutex scrub_sleep_lock; + SafeTimer scrub_sleep_timer; + public: const spg_t& get_pgid() const { return pg_id; }