From 6a9895b97a2b2fb533092f294e45fede154a7f82 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Tue, 15 Jan 2019 12:12:39 -0800 Subject: [PATCH] mon: Fix scrub health warning handling and change config to a ratio Make this mon_warn code clearer since it involves 2 values Code used mon scrub interval instead of pg scrub interval Rename config values to include _pg_ and ratio to make it more clear Fix scrub warniing handling use per-pool intervals when specified Fixes: http://tracker.ceph.com/issues/37264 Signed-off-by: David Zafman --- PendingReleaseNotes | 5 ++ doc/rados/operations/health-checks.rst | 8 +- src/common/legacy_config_opts.h | 4 +- src/common/options.cc | 20 ++--- src/mon/PGMap.cc | 100 ++++++++++++++----------- 5 files changed, 79 insertions(+), 58 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 39f273544411f..9ff4518af0d4e 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -160,6 +160,11 @@ * The 'cephfs-data-scan scan_links' now automatically repair inotables and snaptable. +* Configuration values mon_warn_not_scrubbed/mon_warn_not_deep_scrubbed have been + renamed. They are now mon_warn_pg_not_scrubbed_ratio/mon_warn_pg_not_deep_scrubbed_ratio + respectively. This is to clarify that these warnings are related to pg scrubbing + and are a ratio of the related interval. These options are now enabled by default. + >=13.1.0 -------- diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index dbb8f5224be0f..ba8d663cebac3 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -741,8 +741,8 @@ _______________ One or more PGs has not been scrubbed recently. PGs are normally scrubbed every ``mon_scrub_interval`` seconds, and this warning -triggers when ``mon_warn_not_scrubbed`` such intervals have elapsed -without a scrub. +triggers when ``mon_warn_pg_not_scrubbed_ratio`` percentage of interval has elapsed +without a scrub since it was due. PGs will not scrub if they are not flagged as *clean*, which may happen if they are misplaced or degraded (see *PG_AVAILABILITY* and @@ -757,8 +757,8 @@ ____________________ One or more PGs has not been deep scrubbed recently. PGs are normally scrubbed every ``osd_deep_mon_scrub_interval`` seconds, and this warning -triggers when ``mon_warn_not_deep_scrubbed`` such intervals have elapsed -without a scrub. +triggers when ``mon_warn_pg_not_deep_scrubbed_ratio`` percentage of interval has elapsed +without a scrub since it was due. PGs will not (deep) scrub if they are not flagged as *clean*, which may happen if they are misplaced or degraded (see *PG_AVAILABILITY* and diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index a8fd2ac4eaf45..4a53082b506c9 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -274,8 +274,8 @@ OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE) OPTION(mon_data_avail_crit, OPT_INT) OPTION(mon_data_avail_warn, OPT_INT) OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes) -OPTION(mon_warn_not_scrubbed, OPT_INT) -OPTION(mon_warn_not_deep_scrubbed, OPT_INT) +OPTION(mon_warn_pg_not_scrubbed_ratio, OPT_FLOAT) +OPTION(mon_warn_pg_not_deep_scrubbed_ratio, OPT_FLOAT) OPTION(mon_scrub_interval, OPT_INT) // once a day OPTION(mon_scrub_timeout, OPT_INT) // let's give it 5 minutes; why not. OPTION(mon_scrub_max_keys, OPT_INT) // max number of keys to scrub each time diff --git a/src/common/options.cc b/src/common/options.cc index dab00da1f1d56..f0e661e86c4ba 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -1715,16 +1715,18 @@ std::vector