From: Kefu Chai Date: Thu, 23 Nov 2017 09:34:52 +0000 (+0800) Subject: mgr/PGMap: drop REQUEST_{SLOW,STUCK} HEALTH_WARNs in mimic X-Git-Tag: v13.0.2~803^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f5f2ced624809dfef997236a5fe352b221432fb7;p=ceph.git mgr/PGMap: drop REQUEST_{SLOW,STUCK} HEALTH_WARNs in mimic SLOW_OPS unifies both of them since mimic Signed-off-by: Kefu Chai --- diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index 40b886f93f55..99621323d3a6 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -463,8 +463,8 @@ If the latest copy of the object is not available, the cluster can be told to roll back to a previous version of the object. See :ref:`failures-osd-unfound` for more information. -REQUEST_SLOW -____________ +SLOW_OPS +________ One or more OSD requests is taking a long time to process. This can be an indication of extreme load, a slow storage device, or a software @@ -483,15 +483,6 @@ The location of an OSD can be found with:: ceph osd find osd. -REQUEST_STUCK -_____________ - -One or more OSD requests has been blocked for an extremely long time. -This is an indication that either the cluster has been unhealthy for -an extended period of time (e.g., not enough running OSDs) or there is -some internal problem with the OSD. See the dicussion of -*REQUEST_SLOW* above. - PG_NOT_SCRUBBED _______________ diff --git a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml index 8af6e7faa4ce..ac6491ebde7b 100644 --- a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml +++ b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml @@ -6,7 +6,6 @@ overrides: - \(CACHE_POOL_NO_HIT_SET\) - \(CACHE_POOL_NEAR_FULL\) - \(POOL_FULL\) - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(MON_DOWN\) - \(PG_ diff --git a/qa/suites/rados/singleton/all/thrash-eio.yaml b/qa/suites/rados/singleton/all/thrash-eio.yaml index 3f6ee66ace52..88870c0892e7 100644 --- a/qa/suites/rados/singleton/all/thrash-eio.yaml +++ b/qa/suites/rados/singleton/all/thrash-eio.yaml @@ -26,7 +26,6 @@ tasks: - objects unfound and apparently lost - overall HEALTH_ - \(OSDMAP_FLAGS\) - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(PG_ - \(OSD_ diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml index 0d6b28bdb9d9..d1e2c971cf83 100644 --- a/qa/suites/rados/verify/tasks/rados_api_tests.yaml +++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -6,7 +6,6 @@ overrides: - \(CACHE_POOL_NO_HIT_SET\) - \(POOL_FULL\) - \(SMALLER_PGP_NUM\) - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(CACHE_POOL_NEAR_FULL\) - \(POOL_APP_NOT_ENABLED\) diff --git a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml index 7ab3185ec10e..573aff3b6099 100644 --- a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml +++ b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml @@ -1,7 +1,7 @@ overrides: ceph: log-whitelist: - - \(REQUEST_SLOW\) + - \(SLOW_OPS\) tasks: - workunit: clients: diff --git a/qa/suites/smoke/basic/tasks/mon_thrash.yaml b/qa/suites/smoke/basic/tasks/mon_thrash.yaml index 5bb30fb25942..595ef667d645 100644 --- a/qa/suites/smoke/basic/tasks/mon_thrash.yaml +++ b/qa/suites/smoke/basic/tasks/mon_thrash.yaml @@ -11,7 +11,6 @@ overrides: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) conf: diff --git a/qa/suites/smoke/basic/tasks/rados_api_tests.yaml b/qa/suites/smoke/basic/tasks/rados_api_tests.yaml index 38bbeb3a5907..ef4c50fc0b0b 100644 --- a/qa/suites/smoke/basic/tasks/rados_api_tests.yaml +++ b/qa/suites/smoke/basic/tasks/rados_api_tests.yaml @@ -11,7 +11,7 @@ tasks: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) + - \(SLOW_OPS\) - \(TOO_FEW_PGS\) - reached quota - but it is still running diff --git a/qa/suites/smoke/basic/tasks/rados_bench.yaml b/qa/suites/smoke/basic/tasks/rados_bench.yaml index 08f69c9591c2..0c77640f9b32 100644 --- a/qa/suites/smoke/basic/tasks/rados_bench.yaml +++ b/qa/suites/smoke/basic/tasks/rados_bench.yaml @@ -20,7 +20,6 @@ tasks: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) - thrashosds: diff --git a/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml b/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml index 0054e96d8e78..38a04979d05d 100644 --- a/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml +++ b/qa/suites/smoke/basic/tasks/rados_cache_snaps.yaml @@ -10,7 +10,6 @@ tasks: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) - thrashosds: diff --git a/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml b/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml index f0c58286330d..32c9c767393f 100644 --- a/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml +++ b/qa/suites/smoke/basic/tasks/rados_ec_snaps.yaml @@ -11,7 +11,6 @@ tasks: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) - thrashosds: diff --git a/qa/suites/smoke/basic/tasks/rbd_fsx.yaml b/qa/suites/smoke/basic/tasks/rbd_fsx.yaml index 770b2c3a2c1b..8440c80c0a69 100644 --- a/qa/suites/smoke/basic/tasks/rbd_fsx.yaml +++ b/qa/suites/smoke/basic/tasks/rbd_fsx.yaml @@ -9,7 +9,6 @@ overrides: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) conf: diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml index 4fddb05b8796..0f4e6aa176f8 100644 --- a/qa/tasks/thrashosds-health.yaml +++ b/qa/tasks/thrashosds-health.yaml @@ -9,6 +9,5 @@ overrides: - \(CACHE_POOL_ - \(SMALLER_PGP_NUM\) - \(OBJECT_ - - \(REQUEST_SLOW\) - \(SLOW_OPS\) - \(TOO_FEW_PGS\) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 5045337a5e70..441071c32b68 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2621,7 +2621,9 @@ void PGMap::get_health_checks( // REQUEST_SLOW // REQUEST_STUCK - if (cct->_conf->mon_osd_warn_op_age > 0 && + // SLOW_OPS unifies them in mimic. + if (osdmap.require_osd_release < CEPH_RELEASE_MIMIC && + cct->_conf->mon_osd_warn_op_age > 0 && !osd_sum.op_queue_age_hist.h.empty() && osd_sum.op_queue_age_hist.upper_bound() / 1000.0 > cct->_conf->mon_osd_warn_op_age) {