From 33c647e8114b37404d8d62a08c85664cea709118 Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Tue, 5 Nov 2019 16:10:14 +0530 Subject: [PATCH] osd/OSDMap: Show health warning if a pool is configured with size 1 Introduce a config option called 'mon_warn_on_pool_no_redundancy' that is used to show a health warning if any pool in the ceph cluster is configured with a size of 1. The user can mute/unmute the warning using 'ceph health mute/unmute POOL_NO_REDUNDANCY'. Add standalone test to verify warning on setting pool size=1. Set the associated warning to 'false' in ceph.conf.template under qa/tasks so that existing tests do not break. Fixes: https://tracker.ceph.com/issues/41666 Signed-off-by: Sridhar Seshasayee --- PendingReleaseNotes | 11 +++++++++++ doc/rados/configuration/mon-config-ref.rst | 8 ++++++++ qa/standalone/mon/health-mute.sh | 15 +++++++++++++++ qa/tasks/ceph.conf.template | 1 + src/common/options.cc | 7 +++++++ src/osd/OSDMap.cc | 21 +++++++++++++++++++++ 6 files changed, 63 insertions(+) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 9edf474c8faf4..9a5f0da523af9 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -38,6 +38,17 @@ ceph config set global mon_warn_on_pool_pg_num_not_power_of_two false +* Ceph will issue a health warning if a RADOS pool's ``size`` is set to 1 + or in other words the pool is configured with no redundancy. This can + be fixed by setting the pool size to the minimum recommended value + with:: + + ceph osd pool set size + + The warning can be silenced with:: + + ceph config set global mon_warn_on_pool_no_redundancy false + >=15.0.0 -------- diff --git a/doc/rados/configuration/mon-config-ref.rst b/doc/rados/configuration/mon-config-ref.rst index e40b53ff96be0..dbfc20b908404 100644 --- a/doc/rados/configuration/mon-config-ref.rst +++ b/doc/rados/configuration/mon-config-ref.rst @@ -420,6 +420,14 @@ by setting it in the ``[mon]`` section of the configuration file. :Default: ``0`` +``mon warn on pool no redundancy`` + +:Description: Issue a ``HEALTH_WARN`` in cluster log if any pool is + configured with no replicas. +:Type: Boolean +:Default: ``True`` + + ``mon cache target full warn ratio`` :Description: Position between pool's ``cache_target_full`` and diff --git a/qa/standalone/mon/health-mute.sh b/qa/standalone/mon/health-mute.sh index 270d30c65d974..a255798b82fbd 100755 --- a/qa/standalone/mon/health-mute.sh +++ b/qa/standalone/mon/health-mute.sh @@ -35,6 +35,21 @@ function TEST_mute() { ceph osd pool application enable foo rbd --yes-i-really-mean-it wait_for_clean || return 1 + ceph -s + ceph health | grep HEALTH_OK || return 1 + # test warning on setting pool size=1 + ceph osd pool set foo size 1 + ceph -s + ceph health | grep HEALTH_WARN || return 1 + ceph health detail | grep POOL_NO_REDUNDANCY || return 1 + ceph health mute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_OK | grep POOL_NO_REDUNDANCY || return 1 + ceph health unmute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_WARN || return 1 + # restore pool size to default + ceph osd pool set foo size 3 ceph -s ceph health | grep HEALTH_OK || return 1 ceph osd set noup diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template index 2c7f365f0103b..493eacb32caa3 100644 --- a/qa/tasks/ceph.conf.template +++ b/qa/tasks/ceph.conf.template @@ -29,6 +29,7 @@ mon warn on osd down out interval zero = false mon warn on too few osds = false mon_warn_on_pool_pg_num_not_power_of_two = false + mon_warn_on_pool_no_redundancy = false osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 ruleset-failure-domain=osd crush-failure-domain=osd" diff --git a/src/common/options.cc b/src/common/options.cc index 1f76931f8054c..b261d63e7210c 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -1712,6 +1712,13 @@ std::vector