The last PR modified the suites to only check for host thrasher.
This update fixes that issue by implementing different settings
with dedicated YAML files for host thrashing
Fixes: https://tracker.ceph.com/issues/66657
Signed-off-by: Nitzan Mordechai <nmordech@redhat.com>
- thrashosds:
timeout: 1200
min_in: 2
- thrash_hosts: true
chance_pgnum_grow: 1
chance_pgnum_shrink: 1
chance_pgpnum_fix: 1
--- /dev/null
+overrides:
+ ceph:
+ log-ignorelist:
+ - but it is still running
+ - objects unfound and apparently lost
+ conf:
+ osd:
+ osd debug reject backfill probability: .3
+ osd scrub min interval: 60
+ osd scrub max interval: 120
+ osd max backfills: 3
+ osd snap trim sleep: 2
+ mon:
+ mon min osdmap epochs: 50
+ paxos service trim min: 10
+ # prune full osdmaps regularly
+ mon osdmap full prune min: 15
+ mon osdmap full prune interval: 2
+ mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+ timeout: 1200
+ min_in: 2
+ thrash_hosts: true
+ chance_pgnum_grow: 1
+ chance_pgnum_shrink: 1
+ chance_pgpnum_fix: 1
+ aggressive_pg_num_changes: false
- thrashosds:
timeout: 1200
min_in: 2
- thrash_hosts: true
chance_pgnum_grow: 1
chance_pgnum_shrink: 1
chance_pgpnum_fix: 1
--- /dev/null
+overrides:
+ ceph:
+ log-ignorelist:
+ - but it is still running
+ - objects unfound and apparently lost
+ conf:
+ osd:
+ osd debug reject backfill probability: .3
+ osd scrub min interval: 60
+ osd scrub max interval: 120
+ osd max backfills: 3
+ osd snap trim sleep: 2
+ osd delete sleep: 1
+ mon:
+ mon min osdmap epochs: 50
+ paxos service trim min: 10
+ # prune full osdmaps regularly
+ mon osdmap full prune min: 15
+ mon osdmap full prune interval: 2
+ mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+ timeout: 1200
+ min_in: 2
+ thrash_hosts: true
+ chance_pgnum_grow: 1
+ chance_pgnum_shrink: 1
+ chance_pgpnum_fix: 1
+ chance_bluestore_reshard: 1
+ bluestore_new_sharding: random
--- /dev/null
+overrides:
+ ceph:
+ log-ignorelist:
+ - but it is still running
+ - objects unfound and apparently lost
+ - osd_map_cache_size
+ conf:
+ mon:
+ mon min osdmap epochs: 50
+ paxos service trim min: 10
+ # prune full osdmaps regularly
+ mon osdmap full prune min: 15
+ mon osdmap full prune interval: 2
+ mon osdmap full prune txsize: 2
+ osd:
+ osd map cache size: 1
+ osd scrub min interval: 60
+ osd scrub max interval: 120
+ osd scrub during recovery: false
+ osd max backfills: 6
+ osd beacon report interval: 30
+tasks:
+- thrashosds:
+ timeout: 1800
+ min_in: 2
+ thrash_hosts: true
+ chance_pgnum_grow: 0.25
+ chance_pgnum_shrink: 0.25
+ chance_pgpnum_fix: 0.25
+ chance_test_map_discontinuity: 2
+ map_discontinuity_sleep_time: 200
- thrashosds:
timeout: 1200
min_in: 2
- thrash_hosts: true
chance_pgnum_grow: 2
chance_pgpnum_fix: 1
--- /dev/null
+overrides:
+ ceph:
+ log-ignorelist:
+ - but it is still running
+ - objects unfound and apparently lost
+ conf:
+ osd:
+ osd scrub min interval: 60
+ osd scrub max interval: 120
+ filestore odsync write: true
+ osd max backfills: 2
+ osd snap trim sleep: .5
+ mon:
+ mon min osdmap epochs: 50
+ paxos service trim min: 10
+ # prune full osdmaps regularly
+ mon osdmap full prune min: 15
+ mon osdmap full prune interval: 2
+ mon osdmap full prune txsize: 2
+tasks:
+- thrashosds:
+ timeout: 1200
+ min_in: 2
+ thrash_hosts: true
+ chance_pgnum_grow: 2
+ chance_pgpnum_fix: 1