From 5529b984a10c2426c4e8cc82f56a4d9f725dabef Mon Sep 17 00:00:00 2001 From: Kamoltat Sirivadhna Date: Wed, 21 May 2025 20:55:04 +0000 Subject: [PATCH] qa/suites/rados: increase debug && msgr-failures/none && white list bump mon debug level to 30 in RADOS and bump debug_ms from mon in rados/monthrash && rados/multimon. Add msgr-failures/none scenario to multimon and monthrash suite this is a control scenario, where MON_NETSPLIT can only be organically generated due to actually monitor network partition. Whitelist the MON_NETSPLIT health warning in msgr-failures cases (excluding none) for both multimon and monthrash suites. This is because all other msgr-failures that is not `none` will have ms_inject_socket_failures which is not an organic case of MON_NETSPLIT. Fixes: https://tracker.ceph.com/issues/71344 Signed-off-by: Kamoltat Sirivadhna --- qa/config/rados.yaml | 1 + qa/suites/rados/monthrash/ceph.yaml | 1 + qa/suites/rados/monthrash/msgr-failures/few.yaml | 1 + qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml | 1 + qa/suites/rados/monthrash/msgr-failures/none.yaml | 6 ++++++ qa/suites/rados/multimon/ceph.yaml | 6 ++++++ qa/suites/rados/multimon/msgr-failures/few.yaml | 1 + qa/suites/rados/multimon/msgr-failures/many.yaml | 1 + qa/suites/rados/multimon/msgr-failures/none.yaml | 6 ++++++ 9 files changed, 24 insertions(+) create mode 100644 qa/suites/rados/monthrash/msgr-failures/none.yaml create mode 100644 qa/suites/rados/multimon/ceph.yaml create mode 100644 qa/suites/rados/multimon/msgr-failures/none.yaml diff --git a/qa/config/rados.yaml b/qa/config/rados.yaml index 710847f594b..ad006bab444 100644 --- a/qa/config/rados.yaml +++ b/qa/config/rados.yaml @@ -11,3 +11,4 @@ overrides: osd mclock profile: high_recovery_ops mon: mon scrub interval: 300 + debug mon: 30 \ No newline at end of file diff --git a/qa/suites/rados/monthrash/ceph.yaml b/qa/suites/rados/monthrash/ceph.yaml index 8055fe37221..f8628139900 100644 --- a/qa/suites/rados/monthrash/ceph.yaml +++ b/qa/suites/rados/monthrash/ceph.yaml @@ -13,6 +13,7 @@ overrides: mon osdmap full prune txsize: 2 mon scrub inject crc mismatch: 0.01 mon scrub inject missing keys: 0.05 + debug ms: 20 # thrashing monitors may make mgr have trouble w/ its keepalive log-ignorelist: - ScrubResult diff --git a/qa/suites/rados/monthrash/msgr-failures/few.yaml b/qa/suites/rados/monthrash/msgr-failures/few.yaml index 519288992fe..87a438d5f1f 100644 --- a/qa/suites/rados/monthrash/msgr-failures/few.yaml +++ b/qa/suites/rados/monthrash/msgr-failures/few.yaml @@ -6,3 +6,4 @@ overrides: mon client directed command retry: 5 log-ignorelist: - \(OSD_SLOW_PING_TIME + - \(MON_NETSPLIT) diff --git a/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml index 83b1365183e..3d7fad08cc6 100644 --- a/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml +++ b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml @@ -12,3 +12,4 @@ overrides: debug monc: 10 log-ignorelist: - \(OSD_SLOW_PING_TIME + - \(MON_NETSPLIT) diff --git a/qa/suites/rados/monthrash/msgr-failures/none.yaml b/qa/suites/rados/monthrash/msgr-failures/none.yaml new file mode 100644 index 00000000000..23d355fb86d --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/none.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 0 + diff --git a/qa/suites/rados/multimon/ceph.yaml b/qa/suites/rados/multimon/ceph.yaml new file mode 100644 index 00000000000..44bda90fd75 --- /dev/null +++ b/qa/suites/rados/multimon/ceph.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + mon: + debug ms: 20 + diff --git a/qa/suites/rados/multimon/msgr-failures/few.yaml b/qa/suites/rados/multimon/msgr-failures/few.yaml index 519288992fe..87a438d5f1f 100644 --- a/qa/suites/rados/multimon/msgr-failures/few.yaml +++ b/qa/suites/rados/multimon/msgr-failures/few.yaml @@ -6,3 +6,4 @@ overrides: mon client directed command retry: 5 log-ignorelist: - \(OSD_SLOW_PING_TIME + - \(MON_NETSPLIT) diff --git a/qa/suites/rados/multimon/msgr-failures/many.yaml b/qa/suites/rados/multimon/msgr-failures/many.yaml index d47b466b90d..87ca8e3b7d7 100644 --- a/qa/suites/rados/multimon/msgr-failures/many.yaml +++ b/qa/suites/rados/multimon/msgr-failures/many.yaml @@ -7,3 +7,4 @@ overrides: mon mgr beacon grace: 90 log-ignorelist: - \(OSD_SLOW_PING_TIME + - \(MON_NETSPLIT) diff --git a/qa/suites/rados/multimon/msgr-failures/none.yaml b/qa/suites/rados/multimon/msgr-failures/none.yaml new file mode 100644 index 00000000000..23d355fb86d --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/none.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 0 + -- 2.39.5