From: Nitzan Mordechai Date: Wed, 26 Jun 2024 08:01:09 +0000 (+0000) Subject: suites: check for host thrasher X-Git-Tag: v20.0.0~1461^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=89d695fb8b684086a1c13b5727ddba6e201d439d;p=ceph.git suites: check for host thrasher The last PR modified the suites to only check for host thrasher. This update fixes that issue by implementing different settings with dedicated YAML files for host thrashing Fixes: https://tracker.ceph.com/issues/66657 Signed-off-by: Nitzan Mordechai --- diff --git a/qa/suites/rados/thrash/thrashers/careful.yaml b/qa/suites/rados/thrash/thrashers/careful.yaml index 7a866fc009d03..47d2db48169f8 100644 --- a/qa/suites/rados/thrash/thrashers/careful.yaml +++ b/qa/suites/rados/thrash/thrashers/careful.yaml @@ -21,7 +21,6 @@ tasks: - thrashosds: timeout: 1200 min_in: 2 - thrash_hosts: true chance_pgnum_grow: 1 chance_pgnum_shrink: 1 chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashers/careful_host.yaml b/qa/suites/rados/thrash/thrashers/careful_host.yaml new file mode 100644 index 0000000000000..7a866fc009d03 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/careful_host.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + min_in: 2 + thrash_hosts: true + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash/thrashers/default.yaml b/qa/suites/rados/thrash/thrashers/default.yaml index 6a0cf59a2afd4..592c778004634 100644 --- a/qa/suites/rados/thrash/thrashers/default.yaml +++ b/qa/suites/rados/thrash/thrashers/default.yaml @@ -22,7 +22,6 @@ tasks: - thrashosds: timeout: 1200 min_in: 2 - thrash_hosts: true chance_pgnum_grow: 1 chance_pgnum_shrink: 1 chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashers/default_host.yaml b/qa/suites/rados/thrash/thrashers/default_host.yaml new file mode 100644 index 0000000000000..6a0cf59a2afd4 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/default_host.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + osd delete sleep: 1 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + min_in: 2 + thrash_hosts: true + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash/thrashers/mapgap_host.yaml b/qa/suites/rados/thrash/thrashers/mapgap_host.yaml new file mode 100644 index 0000000000000..707eaa97492af --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/mapgap_host.yaml @@ -0,0 +1,31 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd scrub during recovery: false + osd max backfills: 6 + osd beacon report interval: 30 +tasks: +- thrashosds: + timeout: 1800 + min_in: 2 + thrash_hosts: true + chance_pgnum_grow: 0.25 + chance_pgnum_shrink: 0.25 + chance_pgpnum_fix: 0.25 + chance_test_map_discontinuity: 2 + map_discontinuity_sleep_time: 200 diff --git a/qa/suites/rados/thrash/thrashers/pggrow.yaml b/qa/suites/rados/thrash/thrashers/pggrow.yaml index d3c5a63d0d222..79f96fd0a8d27 100644 --- a/qa/suites/rados/thrash/thrashers/pggrow.yaml +++ b/qa/suites/rados/thrash/thrashers/pggrow.yaml @@ -21,6 +21,5 @@ tasks: - thrashosds: timeout: 1200 min_in: 2 - thrash_hosts: true chance_pgnum_grow: 2 chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashers/pggrow_host.yaml b/qa/suites/rados/thrash/thrashers/pggrow_host.yaml new file mode 100644 index 0000000000000..d3c5a63d0d222 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/pggrow_host.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + min_in: 2 + thrash_hosts: true + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1