From ef2dc05de00fbc5db8d864f83ee8d318286b6d6e Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 26 Nov 2018 16:48:52 -0800 Subject: [PATCH] osd, test: Add test case with osd support for overdue PG scrubs and deep scrubs Add trigger_deep_scrub osd command for testing Publish stats when trigger_scrub/trigger_deep_scrub is used for testing Add optional argument to trigger_scrub/trigger_deep_scrub for amount of extra time to change last scrub stamps Signed-off-by: David Zafman --- qa/standalone/scrub/osd-scrub-repair.sh | 85 +++++++++++++++++++++++++ src/osd/OSD.cc | 46 ++++++++++--- src/osd/PG.h | 7 ++ 3 files changed, 129 insertions(+), 9 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index bf695a54a84b3..759f436181ffe 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -5271,6 +5271,91 @@ function TEST_periodic_scrub_replicated() { rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1 } +function TEST_scrub_warning() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local scrubs=5 + local deep_scrubs=5 + local i1_day=86400 + local i7_days=$(calc $i1_day \* 7) + local i14_days=$(calc $i1_day \* 14) + local overdue=0.5 + local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) ) + local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) ) + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 || return 1 + run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1 + run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1 + + for i in $(seq 1 $(expr $scrubs + $deep_scrubs)) + do + create_pool $poolname-$i 1 1 || return 1 + wait_for_clean || return 1 + if [ $i = "1" ]; + then + ceph osd pool set $poolname-$i scrub_max_interval $i14_days + fi + if [ $i = $(expr $scrubs + 1) ]; + then + ceph osd pool set $poolname-$i deep_scrub_interval $i14_days + fi + done + + # Only 1 osd + local primary=0 + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + ceph config set global osd_scrub_interval_randomize_ratio 0 + ceph config set global osd_deep_scrub_randomize_ratio 0 + ceph config set global osd_scrub_max_interval ${i7_days} + ceph config set global osd_deep_scrub_interval ${i7_days} + + # Fake schedule scrubs + for i in $(seq 1 $scrubs) + do + if [ $i = "1" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \ + trigger_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1 + done + # Fake schedule deep scrubs + for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs)) + do + if [ $i = "$(expr $scrubs + 1)" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.${primary}) \ + trigger_deep_scrub ${i}.0 $(expr ${overdue_seconds} + ${i}00) || return 1 + done + flush_pg_stats + + ceph health + ceph health detail + ceph health | grep -q "$deep_scrubs pgs not deep-scrubbed in time" || return 1 + ceph health | grep -q "$scrubs pgs not scrubbed in time" || return 1 + COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l) + if [ "$COUNT" != $scrubs ]; then + ceph health detail | grep "not scrubbed since" + return 1 + fi + COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l) + if [ "$COUNT" != $deep_scrubs ]; then + ceph health detail | grep "not deep-scrubbed since" + return 1 + fi + return 0 +} + # # Corrupt snapset in replicated pool # diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 0444aab500a8f..75c004fdb6b9a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3371,10 +3371,19 @@ void OSD::final_init() r = admin_socket->register_command( "trigger_scrub", "trigger_scrub " \ - "name=pgid,type=CephString ", + "name=pgid,type=CephString " \ + "name=time,type=CephInt,req=false", test_ops_hook, "Trigger a scheduled scrub "); ceph_assert(r == 0); + r = admin_socket->register_command( + "trigger_deep_scrub", + "trigger_deep_scrub " \ + "name=pgid,type=CephString " \ + "name=time,type=CephInt,req=false", + test_ops_hook, + "Trigger a scheduled deep scrub "); + ceph_assert(r == 0); r = admin_socket->register_command( "injectfull", "injectfull " \ @@ -5506,8 +5515,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store, << "to " << service->cct->_conf->osd_recovery_delay_start; return; } - if (command == "trigger_scrub") { + if (command == "trigger_scrub" || command == "trigger_deep_scrub") { spg_t pgid; + bool deep = (command == "trigger_deep_scrub"); OSDMapRef curmap = service->get_osdmap(); string pgidstr; @@ -5518,6 +5528,9 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store, return; } + int64_t time; + cmd_getval(service->cct, cmdmap, "time", time, (int64_t)0); + PGRef pg = service->osd->_lookup_lock_pg(pgid); if (pg == nullptr) { ss << "Can't find pg " << pgid; @@ -5528,16 +5541,31 @@ void TestOpsSocketHook::test_ops(OSDService *service, ObjectStore *store, pg->unreg_next_scrub(); const pg_pool_t *p = curmap->get_pg_pool(pgid.pool()); double pool_scrub_max_interval = 0; - p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval); - double scrub_max_interval = pool_scrub_max_interval > 0 ? - pool_scrub_max_interval : g_conf()->osd_scrub_max_interval; + double scrub_max_interval; + if (deep) { + p->opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &pool_scrub_max_interval); + scrub_max_interval = pool_scrub_max_interval > 0 ? + pool_scrub_max_interval : g_conf()->osd_deep_scrub_interval; + } else { + p->opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &pool_scrub_max_interval); + scrub_max_interval = pool_scrub_max_interval > 0 ? + pool_scrub_max_interval : g_conf()->osd_scrub_max_interval; + } // Instead of marking must_scrub force a schedule scrub utime_t stamp = ceph_clock_now(); - stamp -= scrub_max_interval; - stamp -= 100.0; // push back last scrub more for good measure - pg->set_last_scrub_stamp(stamp); + if (time == 0) + stamp -= scrub_max_interval; + else + stamp -= (float)time; + stamp -= 100.0; // push back last scrub more for good measure + if (deep) { + pg->set_last_deep_scrub_stamp(stamp); + } else { + pg->set_last_scrub_stamp(stamp); + } pg->reg_next_scrub(); - ss << "ok"; + pg->publish_stats_to_osd(); + ss << "ok - set" << (deep ? " deep" : "" ) << " stamp " << stamp; } else { ss << "Not primary"; } diff --git a/src/osd/PG.h b/src/osd/PG.h index a6f1f647c6224..96996c7b5431f 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -315,9 +315,15 @@ public: } void set_last_scrub_stamp(utime_t t) { + info.stats.last_scrub_stamp = t; info.history.last_scrub_stamp = t; } + void set_last_deep_scrub_stamp(utime_t t) { + info.stats.last_deep_scrub_stamp = t; + info.history.last_deep_scrub_stamp = t; + } + bool is_deleting() const { return deleting; } @@ -1398,6 +1404,7 @@ protected: void _update_calc_stats(); void _update_blocked_by(); + friend class TestOpsSocketHook; void publish_stats_to_osd(); void clear_publish_stats(); -- 2.39.5