From: David Zafman Date: Tue, 11 Jun 2019 17:47:36 +0000 (-0700) Subject: osd: Handle scrub interval changes X-Git-Tag: v15.1.0~2302^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=27918bb9062b42c5a1e03381f752428c07ccee84;p=ceph.git osd: Handle scrub interval changes Global changes reschedule all PG scrubs Pool changes reschedule pool PG scrubs Signed-off-by: David Zafman --- diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh index 09a2cb111425..c530233ea820 100755 --- a/qa/standalone/scrub/osd-scrub-test.sh +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -107,6 +107,86 @@ function TEST_scrub_test() { teardown $dir || return 1 } +# Grab year-month-day +DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/" +DATEFORMAT="%Y-%m-%d" + +function check_dump_scrubs() { + local primary=$1 + local sched_time_check="$2" + local deadline_check="$3" + + DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)" + # use eval to drop double-quotes + eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time') + test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1 + # use eval to drop double-quotes + eval DEADLINE=$(echo $DS | jq '.[0].deadline') + test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1 +} + +function TEST_interval_changes() { + local poolname=test + local OSDS=2 + local objects=10 + # Don't assume how internal defaults are set + local day="$(expr 24 \* 60 \* 60)" + local week="$(expr $day \* 7)" + local min_interval=$day + local max_interval=$week + local WAIT_FOR_UPDATE=2 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + # This min scrub interval results in 30 seconds backoff time + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + + # Check initial settings from above (min 1 day, min 1 week) + check_dump_scrubs $primary "1 day" "1 week" || return 1 + + # Change global osd_scrub_min_interval to 2 days + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "1 week" || return 1 + + # Change global osd_scrub_max_interval to 2 weeks + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "2 week" || return 1 + + # Change pool osd_scrub_min_interval to 3 days + ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "2 week" || return 1 + + # Change pool osd_scrub_max_interval to 3 weeks + ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "3 week" || return 1 + + teardown $dir || return 1 +} + main osd-scrub-test "$@" # Local Variables: diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 744dda22caa2..08772d3f51cd 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7154,6 +7154,27 @@ void OSD::sched_scrub() dout(20) << "sched_scrub done" << dendl; } +void OSD::resched_all_scrubs() +{ + dout(10) << __func__ << ": start" << dendl; + OSDService::ScrubJob scrub; + if (service.first_scrub_stamp(&scrub)) { + do { + dout(20) << __func__ << ": examine " << scrub.pgid << dendl; + + PGRef pg = _lookup_lock_pg(scrub.pgid); + if (!pg) + continue; + if (!pg->scrubber.must_scrub && !pg->scrubber.need_auto) { + dout(20) << __func__ << ": reschedule " << scrub.pgid << dendl; + pg->on_info_history_change(); + } + pg->unlock(); + } while (service.next_scrub_stamp(scrub, &scrub)); + } + dout(10) << __func__ << ": done" << dendl; +} + MPGStats* OSD::collect_pg_stats() { // This implementation unconditionally sends every is_primary PG's @@ -7524,6 +7545,7 @@ void OSD::handle_osd_map(MOSDMap *m) OSDMap::Incremental inc; auto p = bl.cbegin(); inc.decode(p); + if (o->apply_incremental(inc) < 0) { derr << "ERROR: bad fsid? i have " << osdmap->get_fsid() << " and inc has " << inc.fsid << dendl; ceph_abort_msg("bad fsid"); @@ -8181,6 +8203,31 @@ bool OSD::advance_pg( nextmap, lastmap, newup, up_primary, newacting, acting_primary, rctx); + auto oldpool = lastmap->get_pools().find(pg->pg_id.pool()); + auto newpool = nextmap->get_pools().find(pg->pg_id.pool()); + if (oldpool != lastmap->get_pools().end() + && newpool != nextmap->get_pools().end()) { + dout(20) << __func__ + << " new pool opts " << newpool->second.opts + << " old pool opts " << oldpool->second.opts + << dendl; + + double old_min_interval = 0, new_min_interval = 0; + oldpool->second.opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &old_min_interval); + newpool->second.opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &new_min_interval); + + double old_max_interval = 0, new_max_interval = 0; + oldpool->second.opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &old_max_interval); + newpool->second.opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &new_max_interval); + + // Assume if an interval is change from set to unset or vice versa the actual config + // is different. Keep it simple even if it is possible to call resched_all_scrub() + // unnecessarily. + if (old_min_interval != new_min_interval || old_max_interval != new_max_interval) { + pg->on_info_history_change(); + } + } + if (new_pg_num && old_pg_num != new_pg_num) { // check for split set children; @@ -9393,6 +9440,8 @@ const char** OSD::get_tracked_conf_keys() const "osd_heartbeat_min_size", "osd_heartbeat_interval", "osd_object_clean_region_max_num_intervals", + "osd_scrub_min_interval", + "osd_scrub_max_interval", NULL }; return KEYS; @@ -9483,6 +9532,11 @@ void OSD::handle_conf_change(const ConfigProxy& conf, ObjectCleanRegions::set_max_num_intervals(cct->_conf->osd_object_clean_region_max_num_intervals); } + if (changed.count("osd_scrub_min_interval") || + changed.count("osd_scrub_max_interval")) { + resched_all_scrubs(); + dout(0) << __func__ << ": scrub interval change" << dendl; + } check_config(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 24d11b9daedf..a6bd5289b929 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1973,6 +1973,7 @@ protected: // -- scrubbing -- void sched_scrub(); + void resched_all_scrubs(); bool scrub_random_backoff(); bool scrub_load_below_threshold(); bool scrub_time_permit(utime_t now);