From 445b0221ecfab7e0972227642a600e5c42f895c5 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Wed, 5 Jun 2019 22:57:55 -0700 Subject: [PATCH] osd: Fix PG::sched_scrub() logic so time_for_deep is consistent on re-entry Perform calculations on the first entry to PG::sched_scrub() and save scrubber.time_for_deep. This makes it clear that upon re-entry we are already committed to a deep scrub and/or auto repair. Signed-off-by: David Zafman --- src/osd/PG.cc | 190 ++++++++++++++++++++++++++------------------------ src/osd/PG.h | 2 + 2 files changed, 99 insertions(+), 93 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index f6fd0492a47..ce0524fb8b5 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -335,7 +335,7 @@ PG::Scrubber::Scrubber() active(false), shallow_errors(0), deep_errors(0), fixed(0), must_scrub(false), must_deep_scrub(false), must_repair(false), - need_auto(false), + need_auto(false), time_for_deep(false), auto_repair(false), check_repair(false), deep_scrub_on_error(false), @@ -1325,80 +1325,98 @@ void PG::requeue_map_waiters() // returns true if a scrub has been newly kicked off bool PG::sched_scrub() { - bool nodeep_scrub = false; ceph_assert(is_locked()); if (!(is_primary() && is_active() && is_clean() && !is_scrubbing())) { return false; } - double deep_scrub_interval = 0; - pool.info.opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval); - if (deep_scrub_interval <= 0) { - deep_scrub_interval = cct->_conf->osd_deep_scrub_interval; - } - bool time_for_deep = ceph_clock_now() >= - info.history.last_deep_scrub_stamp + deep_scrub_interval; - - bool deep_coin_flip = false; - // Only add random deep scrubs when NOT user initiated scrub - // If we randomize when noscrub set then it guarantees - // we will deep scrub because this function is called often. - if (!time_for_deep && !scrubber.must_scrub - && !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) - || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB))) - deep_coin_flip = (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100; - dout(20) << __func__ << ": time_for_deep=" << time_for_deep << " deep_coin_flip=" << deep_coin_flip << dendl; - - time_for_deep = (time_for_deep || deep_coin_flip); - - //NODEEP_SCRUB so ignore time initiated deep-scrub - if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || - pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) { - time_for_deep = false; - nodeep_scrub = true; - } - - if (!scrubber.must_scrub) { - ceph_assert(!scrubber.must_deep_scrub); - - //NOSCRUB so skip regular scrubs - if ((get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || - pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) && !time_for_deep) { - if (scrubber.reserved) { - // cancel scrub if it is still in scheduling, - // so pgs from other pools where scrub are still legal - // have a chance to go ahead with scrubbing. - clear_scrub_reserved(); - scrub_unreserve_replicas(); + // All processing the first time through commits us to whatever + // choices are made. + if (!scrubber.reserved) { + dout(20) << __func__ << ": Start processing pg " << info.pgid << dendl; + + bool allow_deep_scrub = !(get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) || + pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)); + bool allow_scrub = !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) || + pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)); + bool has_deep_errors = (info.stats.stats.sum.num_deep_scrub_errors > 0); + bool try_to_auto_repair = (cct->_conf->osd_scrub_auto_repair + && get_pgbackend()->auto_repair_supported()); + + scrubber.time_for_deep = false; + // Clear these in case user issues the scrub/repair command during + // the scheduling of the scrub/repair (e.g. request reservation) + scrubber.deep_scrub_on_error = false; + scrubber.auto_repair = false; + + // All periodic scrub handling goes here because must_scrub is + // always set for must_deep_scrub and must_repair. + if (!scrubber.must_scrub) { + ceph_assert(!scrubber.must_deep_scrub && !scrubber.must_repair); + // Handle deep scrub determination only if allowed + if (allow_deep_scrub) { + // Initial entry and scheduled scrubs without nodeep_scrub set get here + if (scrubber.need_auto) { + dout(20) << __func__ << ": need repair after scrub errors" << dendl; + scrubber.time_for_deep = true; + } else { + double deep_scrub_interval = 0; + pool.info.opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval); + if (deep_scrub_interval <= 0) { + deep_scrub_interval = cct->_conf->osd_deep_scrub_interval; + } + scrubber.time_for_deep = ceph_clock_now() >= + info.history.last_deep_scrub_stamp + deep_scrub_interval; + + bool deep_coin_flip = false; + // If we randomize when !allow_scrub && allow_deep_scrub, then it guarantees + // we will deep scrub because this function is called often. + if (!scrubber.time_for_deep && allow_scrub) + deep_coin_flip = (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100; + dout(20) << __func__ << ": time_for_deep=" << scrubber.time_for_deep << " deep_coin_flip=" << deep_coin_flip << dendl; + + scrubber.time_for_deep = (scrubber.time_for_deep || deep_coin_flip); + } + + if (!scrubber.time_for_deep && has_deep_errors) { + osd->clog->info() << "osd." << osd->whoami + << " pg " << info.pgid + << " Deep scrub errors, upgrading scrub to deep-scrub"; + scrubber.time_for_deep = true; + } + + if (try_to_auto_repair) { + if (scrubber.time_for_deep) { + dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; + scrubber.auto_repair = true; + } else if (allow_scrub) { + dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found" << dendl; + scrubber.deep_scrub_on_error = true; + } + } + } else { // !allow_deep_scrub + dout(20) << __func__ << ": nodeep_scrub set" << dendl; + if (has_deep_errors) { + osd->clog->error() << "osd." << osd->whoami + << " pg " << info.pgid + << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set"; + return false; + } } - return false; - } - } - // Clear these in case user issues the scrub/repair command during - // the scheduling of the scrub/repair (e.g. request reservation) - scrubber.deep_scrub_on_error = false; - scrubber.auto_repair = false; - if (scrubber.need_auto) { - dout(20) << __func__ << ": need repair after scrub errors" << dendl; - time_for_deep = true; - } else if (cct->_conf->osd_scrub_auto_repair - && get_pgbackend()->auto_repair_supported() - // respect the command from user, and not do auto-repair - && !scrubber.must_repair - && !scrubber.must_scrub - && !scrubber.must_deep_scrub) { - if (time_for_deep) { - dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl; - scrubber.auto_repair = true; - } else { - dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found" << dendl; - scrubber.deep_scrub_on_error = true; + //NOSCRUB so skip regular scrubs + if (!allow_scrub && !scrubber.time_for_deep) { + return false; + } + // scrubber.must_scrub + } else if (!scrubber.must_deep_scrub && has_deep_errors) { + osd->clog->error() << "osd." << osd->whoami + << " pg " << info.pgid + << " Regular scrub request, deep-scrub details will be lost"; } - } + // Unless precluded this was handle above + scrubber.need_auto = false; - bool ret = true; - if (!scrubber.reserved) { ceph_assert(scrubber.reserved_peers.empty()); if ((cct->_conf->osd_scrub_during_recovery || !osd->is_recovery_active()) && osd->inc_scrubs_pending()) { @@ -1408,48 +1426,32 @@ bool PG::sched_scrub() scrub_reserve_replicas(); } else { dout(20) << __func__ << ": failed to reserve locally" << dendl; - ret = false; + return false; } } + if (scrubber.reserved) { if (scrubber.reserve_failed) { - dout(20) << "sched_scrub: failed, a peer declined" << dendl; + dout(20) << __func__ << ": failed, a peer declined" << dendl; clear_scrub_reserved(); scrub_unreserve_replicas(); - ret = false; + return false; } else if (scrubber.reserved_peers.size() == recovery_state.get_acting().size()) { - dout(20) << "sched_scrub: success, reserved self and replicas" << dendl; - if (time_for_deep) { - dout(10) << "sched_scrub: scrub will be deep" << dendl; + dout(20) << __func__ << ": success, reserved self and replicas" << dendl; + if (scrubber.time_for_deep) { + dout(10) << __func__ << ": scrub will be deep" << dendl; state_set(PG_STATE_DEEP_SCRUB); - } else if (!scrubber.must_deep_scrub && info.stats.stats.sum.num_deep_scrub_errors) { - if (!nodeep_scrub) { - osd->clog->info() << "osd." << osd->whoami - << " pg " << info.pgid - << " Deep scrub errors, upgrading scrub to deep-scrub"; - state_set(PG_STATE_DEEP_SCRUB); - } else if (!scrubber.must_scrub) { - osd->clog->error() << "osd." << osd->whoami - << " pg " << info.pgid - << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set"; - clear_scrub_reserved(); - scrub_unreserve_replicas(); - return false; - } else { - osd->clog->error() << "osd." << osd->whoami - << " pg " << info.pgid - << " Regular scrub request, deep-scrub details will be lost"; - } + scrubber.time_for_deep = false; } queue_scrub(); } else { // none declined, since scrubber.reserved is set - dout(20) << "sched_scrub: reserved " << scrubber.reserved_peers << ", waiting for replicas" << dendl; + dout(20) << __func__ << ": reserved " << scrubber.reserved_peers + << ", waiting for replicas" << dendl; } } - - return ret; + return true; } bool PG::is_scrub_registered() @@ -3327,6 +3329,8 @@ ostream& operator<<(ostream& out, const PG& pg) out << " MUST_DEEP_SCRUB"; if (pg.scrubber.must_scrub) out << " MUST_SCRUB"; + if (pg.scrubber.time_for_deep) + out << " TIME_FOR_DEEP"; if (pg.scrubber.need_auto) out << " NEED_AUTO"; diff --git a/src/osd/PG.h b/src/osd/PG.h index d43c47126f2..0601b7ae276 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1118,6 +1118,7 @@ public: // Priority to use for scrub scheduling unsigned priority = 0; + bool time_for_deep; // this flag indicates whether we would like to do auto-repair of the PG or not bool auto_repair; // this flag indicates that we are scrubbing post repair to verify everything is fixed @@ -1237,6 +1238,7 @@ public: must_deep_scrub = false; must_repair = false; need_auto = false; + time_for_deep = false; auto_repair = false; check_repair = false; deep_scrub_on_error = false; -- 2.39.5