]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
suites: check for host thrasher 58275/head
authorNitzan Mordechai <nmordech@redhat.com>
Wed, 26 Jun 2024 08:01:09 +0000 (08:01 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Wed, 26 Jun 2024 12:16:48 +0000 (12:16 +0000)
The last PR modified the suites to only check for host thrasher.
This update fixes that issue by implementing different settings
with dedicated YAML files for host thrashing

Fixes: https://tracker.ceph.com/issues/66657
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
qa/suites/rados/thrash/thrashers/careful.yaml
qa/suites/rados/thrash/thrashers/careful_host.yaml [new file with mode: 0644]
qa/suites/rados/thrash/thrashers/default.yaml
qa/suites/rados/thrash/thrashers/default_host.yaml [new file with mode: 0644]
qa/suites/rados/thrash/thrashers/mapgap_host.yaml [new file with mode: 0644]
qa/suites/rados/thrash/thrashers/pggrow.yaml
qa/suites/rados/thrash/thrashers/pggrow_host.yaml [new file with mode: 0644]

index 7a866fc009d03de89fe0b779aa970579fd942a91..47d2db48169f80042a7a584558069fb156129e36 100644 (file)
@@ -21,7 +21,6 @@ tasks:
 - thrashosds:
     timeout: 1200
     min_in: 2
-    thrash_hosts: true
     chance_pgnum_grow: 1
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash/thrashers/careful_host.yaml b/qa/suites/rados/thrash/thrashers/careful_host.yaml
new file mode 100644 (file)
index 0000000..7a866fc
--- /dev/null
@@ -0,0 +1,28 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    min_in: 2
+    thrash_hosts: true
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    aggressive_pg_num_changes: false
index 6a0cf59a2afd46168163d1e4751ae3d7e19e6cd1..592c7780046346549b19a4fa39c8d2e12e81a1fc 100644 (file)
@@ -22,7 +22,6 @@ tasks:
 - thrashosds:
     timeout: 1200
     min_in: 2
-    thrash_hosts: true
     chance_pgnum_grow: 1
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash/thrashers/default_host.yaml b/qa/suites/rados/thrash/thrashers/default_host.yaml
new file mode 100644 (file)
index 0000000..6a0cf59
--- /dev/null
@@ -0,0 +1,30 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd debug reject backfill probability: .3
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd max backfills: 3
+        osd snap trim sleep: 2
+        osd delete sleep: 1
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    min_in: 2
+    thrash_hosts: true
+    chance_pgnum_grow: 1
+    chance_pgnum_shrink: 1
+    chance_pgpnum_fix: 1
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
diff --git a/qa/suites/rados/thrash/thrashers/mapgap_host.yaml b/qa/suites/rados/thrash/thrashers/mapgap_host.yaml
new file mode 100644 (file)
index 0000000..707eaa9
--- /dev/null
@@ -0,0 +1,31 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    - osd_map_cache_size
+    conf:
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+      osd:
+        osd map cache size: 1
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        osd scrub during recovery: false
+        osd max backfills: 6
+        osd beacon report interval: 30
+tasks:
+- thrashosds:
+    timeout: 1800
+    min_in: 2
+    thrash_hosts: true
+    chance_pgnum_grow: 0.25
+    chance_pgnum_shrink: 0.25
+    chance_pgpnum_fix: 0.25
+    chance_test_map_discontinuity: 2
+    map_discontinuity_sleep_time: 200
index d3c5a63d0d2228932304c462b1fa9f05588e2a97..79f96fd0a8d2781a06dad18ada9883387ab0617c 100644 (file)
@@ -21,6 +21,5 @@ tasks:
 - thrashosds:
     timeout: 1200
     min_in: 2
-    thrash_hosts: true
     chance_pgnum_grow: 2
     chance_pgpnum_fix: 1
diff --git a/qa/suites/rados/thrash/thrashers/pggrow_host.yaml b/qa/suites/rados/thrash/thrashers/pggrow_host.yaml
new file mode 100644 (file)
index 0000000..d3c5a63
--- /dev/null
@@ -0,0 +1,26 @@
+overrides:
+  ceph:
+    log-ignorelist:
+    - but it is still running
+    - objects unfound and apparently lost
+    conf:
+      osd:
+        osd scrub min interval: 60
+        osd scrub max interval: 120
+        filestore odsync write: true
+        osd max backfills: 2
+        osd snap trim sleep: .5
+      mon:
+        mon min osdmap epochs: 50
+        paxos service trim min: 10
+        # prune full osdmaps regularly
+        mon osdmap full prune min: 15
+        mon osdmap full prune interval: 2
+        mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+    timeout: 1200
+    min_in: 2
+    thrash_hosts: true
+    chance_pgnum_grow: 2
+    chance_pgpnum_fix: 1