]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/suites: added more whitelisting + fix typo 55717/head
authorKamoltat <ksirivad@redhat.com>
Thu, 22 Feb 2024 16:55:26 +0000 (16:55 +0000)
committerKamoltat <ksirivad@redhat.com>
Mon, 26 Feb 2024 16:29:00 +0000 (16:29 +0000)
Problem:

1. Not enough whitelisting for certain Cephadm failures
2. previous PR that landed has a typo that
causes https://tracker.ceph.com/issues/64452

Solution:

1. Add more whitelisting
2. Fix typo in https://tracker.ceph.com/issues/64452

Fixes: https://tracker.ceph.com/issues/64452
Signed-off-by: Kamoltat <ksirivad@redhat.com>
qa/cephfs/overrides/ignorelist_health.yaml
qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
qa/suites/orch/cephadm/dashboard/task/test_e2e.yaml
qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml
qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml
qa/suites/orch/cephadm/thrash/2-thrash.yaml
qa/suites/orch/cephadm/workunits/task/test_nfs.yaml
qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml
qa/suites/rados/basic/tasks/rados_api_tests.yaml
qa/tasks/thrashosds-health.yaml

index ac6d32045c8dd317912d8d9036976d00347cb622..a698da517b4d27a8413bf270351bbc1e3a3031b2 100644 (file)
@@ -2,7 +2,10 @@ overrides:
   ceph:
     log-ignorelist:
       - overall HEALTH_
+      - \(CEPHADM_STRAY_DAEMON\)
       - \(FS_DEGRADED\)
+      - FS_
+      - \(CEPHADM_
       - \(MDS_FAILED\)
       - \(MDS_DEGRADED\)
       - \(FS_WITH_FAILED_MDS\)
@@ -13,6 +16,7 @@ overrides:
       - \(PG_DEGRADED\)
       - Degraded data redundancy
       - \(PG_
+      - acting
       - MDS_INSUFFICIENT_STANDBY
       - deprecated feature inline_data
       - compat changed unexpectedly
index abd26643bacf006581a6475dbcaf67c3f2898d77..64c8c24f597862d3258ac05e1e4f9c620584e57f 100644 (file)
@@ -7,3 +7,5 @@ overrides:
       - but it is still running
 # MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
       - is not responding
+      - is down
+      - osds down
index b4ed447bf58273fc56dd588a948e0a2c3e9b58f5..ca7268ac689238f443679fbb63c456e091b3a841 100644 (file)
@@ -4,6 +4,25 @@ overrides:
       - \(HOST_IN_MAINTENANCE\)
       - \(OSD_DOWN\)
       - \(MON_DOWN\)
+      - down
+      - overall HEALTH_
+      - \(CEPHADM_STRAY_DAEMON\)
+      - stray daemon
+      - \(FS_DEGRADED\)
+      - \(MDS_FAILED\)
+      - \(MDS_DEGRADED\)
+      - \(FS_WITH_FAILED_MDS\)
+      - \(MDS_DAMAGE\)
+      - \(MDS_ALL_DOWN\)
+      - \(MDS_UP_LESS_THAN_MAX\)
+      - \(FS_INLINE_DATA_DEPRECATED\)
+      - \(PG_DEGRADED\)
+      - Degraded data redundancy
+      - \(PG_
+      - acting
+      - MDS_INSUFFICIENT_STANDBY
+      - deprecated feature inline_data
+      - compat changed unexpectedly
 roles:
 # 3 osd roles on host.a is required for cephadm task. It checks if the cluster is healthy.
 # More daemons will be deployed on both hosts in e2e tests.
index 8a45050d40835500491304bcd231bdafaa40685c..db4b260539494c1cd61e70ec6872209dce8bf9b4 100644 (file)
@@ -1,7 +1,3 @@
-overrides:
-  ceph:
-    log-ignorelist:
-      - slow requests
 tasks:
 - cephadm.shell:
     host.a:
@@ -28,6 +24,21 @@ openstack:
     size: 10 # GB
 overrides:
   ceph:
+    log-ignorelist:
+      - slow requests
+      - \(PG_
+      - PG_
+      - \(CEPHADM_STRAY_DAEMON\)
+      - slow request
+      - \(MDS_
+      - MDS_
+      - osds down
+      - OSD_
+      - \(OSD_
+      - client
+      - FS_
+      - \(FS_
+      - degraded
     conf:
       osd:
         osd shutdown pgref assert: true
index e0706e0dce91d66af488d1030be02793f4441b35..501dea155836acd25b3393a05c90a0303ceecfe5 100644 (file)
@@ -5,6 +5,7 @@ overrides:
       - \(OSD_DOWN\)
       - \(PG_
       - but it is still running
+      - \(CEPHADM_STRAY_DAEMON\)
 tasks:
 - cephadm.shell:
     host.a:
index 591538bad9ca7d2f7522b20f89da08d8b5d9141d..2f45d767658962294ad179d2085e951f521aa37b 100644 (file)
@@ -7,9 +7,19 @@ overrides:
     - \(OSDMAP_FLAGS\)
     - flag\(s\) set
     - \(CACHE_POOL_NO_HIT_SET\)
+    - \(CACHE_
     - \(PG_
     - \(OSD_
-    - mons down:
+    - \(POOL_
+    - \(CEPHADM_STRAY_DAEMON\)
+    - PG_
+    - CACHE_
+    - degraded
+    - backfill
+    - mons down
+    - OSD_
+    - is down
+    - acting
     conf:
       osd:
         osd debug reject backfill probability: .3
index 5e1ea3d5e03b9dbbdc221b665c2dd125832d0317..afa9deecb8e4b4122fbd8188d6b57884f5a266b3 100644 (file)
@@ -3,6 +3,7 @@ overrides:
     log-ignorelist:
     - Replacing daemon mds
     - FS_DEGRADED
+    - \(CEPHADM_STRAY_DAEMON\)
 roles:
 - - host.a
   - osd.0
index 723c6ad16dc6f2a37188f1edf95a2e11912d8fd5..a1b8a4c0f899af226227e39ef6b2c33c3b9c4f8a 100644 (file)
@@ -3,6 +3,7 @@ overrides:
     log-ignorelist:
       - \(MON_DOWN\)
       - \(OSD_DOWN\)
+      - \(CEPHADM_PAUSED\)
       - mons down
 roles:
 - - host.a
index c5c8c45ff6dbbca41df4f7af3f2694f95903f853..47b293e4c135b34d73598ef58f2128e9a0dc4606 100644 (file)
@@ -12,6 +12,7 @@ overrides:
     - \(PG_AVAILABILITY\)
     - \(PG_DEGRADED\)
     - \(MON_DOWN\)
+    - \(CEPHADM_STRAY_DAEMON\)
     - missing hit_sets
     - do not have an application enabled
     - application not enabled on pool
index b3101abf2d55f9a8f649b48cf6855bb3bc4af73e..2340944e88517db29ed6dc09e7e4e642edd4fb95 100644 (file)
@@ -24,3 +24,7 @@ overrides:
       - PG_
       - Reduced data availability
       - stuck undersized
+      - backfill_toofull
+      - is down
+      - stuck peering
+      - acting