]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/suites/rados: increase debug && msgr-failures/none && white list
authorKamoltat Sirivadhna <ksirivad@redhat.com>
Wed, 21 May 2025 20:55:04 +0000 (20:55 +0000)
committerKamoltat Sirivadhna <ksirivad@redhat.com>
Fri, 18 Jul 2025 05:13:03 +0000 (05:13 +0000)
bump mon debug level to 30 in RADOS
and bump debug_ms from mon in
rados/monthrash && rados/multimon.

Add msgr-failures/none scenario to multimon and monthrash suite
this is a control scenario, where MON_NETSPLIT can only be organically
generated due to actually monitor network partition.

Whitelist the MON_NETSPLIT health warning in msgr-failures cases (excluding none)
for both multimon and monthrash suites. This is because all other
msgr-failures that is not `none` will have ms_inject_socket_failures
which is not an organic case of MON_NETSPLIT.

Fixes: https://tracker.ceph.com/issues/71344
Signed-off-by: Kamoltat Sirivadhna <ksirivad@redhat.com>
qa/config/rados.yaml
qa/suites/rados/monthrash/ceph.yaml
qa/suites/rados/monthrash/msgr-failures/few.yaml
qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml
qa/suites/rados/monthrash/msgr-failures/none.yaml [new file with mode: 0644]
qa/suites/rados/multimon/ceph.yaml [new file with mode: 0644]
qa/suites/rados/multimon/msgr-failures/few.yaml
qa/suites/rados/multimon/msgr-failures/many.yaml
qa/suites/rados/multimon/msgr-failures/none.yaml [new file with mode: 0644]

index 710847f594b8f2993d7e717588f505e3cde01055..ad006bab444d74c490fa5277c1384d10661a1430 100644 (file)
@@ -11,3 +11,4 @@ overrides:
         osd mclock profile: high_recovery_ops
       mon:
         mon scrub interval: 300
+        debug mon: 30
\ No newline at end of file
index 8055fe372214046399a5ceb84028bd7cb7813ee8..f86281399004140081231441b8b4eb3ce2e07cc5 100644 (file)
@@ -13,6 +13,7 @@ overrides:
         mon osdmap full prune txsize: 2
         mon scrub inject crc mismatch: 0.01
         mon scrub inject missing keys: 0.05
+        debug ms: 20
 # thrashing monitors may make mgr have trouble w/ its keepalive
     log-ignorelist:
       - ScrubResult
index 519288992fea6848f2448e9f305e8b580fe76137..87a438d5f1fd87496893d1e5769f88434a0a25aa 100644 (file)
@@ -6,3 +6,4 @@ overrides:
         mon client directed command retry: 5
     log-ignorelist:
       - \(OSD_SLOW_PING_TIME
+      - \(MON_NETSPLIT)
index 83b1365183e78e4366ad7b9aa1c26181c5192671..3d7fad08cc6d6410f96cdaa7fcaea0a43b40d591 100644 (file)
@@ -12,3 +12,4 @@ overrides:
         debug monc: 10
     log-ignorelist:
       - \(OSD_SLOW_PING_TIME
+      - \(MON_NETSPLIT)
diff --git a/qa/suites/rados/monthrash/msgr-failures/none.yaml b/qa/suites/rados/monthrash/msgr-failures/none.yaml
new file mode 100644 (file)
index 0000000..23d355f
--- /dev/null
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 0
+
diff --git a/qa/suites/rados/multimon/ceph.yaml b/qa/suites/rados/multimon/ceph.yaml
new file mode 100644 (file)
index 0000000..44bda90
--- /dev/null
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      mon:
+        debug ms: 20
+
index 519288992fea6848f2448e9f305e8b580fe76137..87a438d5f1fd87496893d1e5769f88434a0a25aa 100644 (file)
@@ -6,3 +6,4 @@ overrides:
         mon client directed command retry: 5
     log-ignorelist:
       - \(OSD_SLOW_PING_TIME
+      - \(MON_NETSPLIT)
index d47b466b90d65e26738fac9b81cf3ed06431f75d..87ca8e3b7d7b54886f760a1b4a482c75bf737e33 100644 (file)
@@ -7,3 +7,4 @@ overrides:
         mon mgr beacon grace: 90
     log-ignorelist:
       - \(OSD_SLOW_PING_TIME
+      - \(MON_NETSPLIT)
diff --git a/qa/suites/rados/multimon/msgr-failures/none.yaml b/qa/suites/rados/multimon/msgr-failures/none.yaml
new file mode 100644 (file)
index 0000000..23d355f
--- /dev/null
@@ -0,0 +1,6 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        ms inject socket failures: 0
+