From: Jaya Prakash Date: Mon, 18 May 2026 19:57:50 +0000 (+0000) Subject: qa/suites: add faster allocation recovery thrashing suite X-Git-Tag: v21.0.1~7^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a4cb4a236bc00a3bd141e9a3433819c3fd5a1f96;p=ceph.git qa/suites: add faster allocation recovery thrashing suite Signed-off-by: Jaya Prakash --- diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/+ b/qa/suites/rados/thrash-faster-alloc-recovery/+ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/.qa b/qa/suites/rados/thrash-faster-alloc-recovery/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/ceph.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/ceph.yaml new file mode 100644 index 00000000000..67393c5640f --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/ceph.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/clusters/$ b/qa/suites/rados/thrash-faster-alloc-recovery/clusters/$ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/clusters/.qa b/qa/suites/rados/thrash-faster-alloc-recovery/clusters/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/clusters/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/clusters/fixed-6-osds.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/clusters/fixed-6-osds.yaml new file mode 100644 index 00000000000..d2f4630e6ae --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/clusters/fixed-6-osds.yaml @@ -0,0 +1,14 @@ +roles: +- - mon.a + - mgr.x + - client.0 +- - osd.0 + - osd.1 +- - osd.2 + - osd.3 +- - osd.4 + - osd.5 +openstack: +- volumes: # attached to each instance + count: 6 + size: 20 # GB diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/faster-alloc-recovery.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/faster-alloc-recovery.yaml new file mode 100644 index 00000000000..224ca5f2c4e --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/faster-alloc-recovery.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + bluestore_debug_fast_recovery_compare_chance: 1.0 \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/rados.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/rados.yaml new file mode 100644 index 00000000000..716796e6af1 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/rados.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + osd: + osd op queue: debug_random + osd op queue cut off: debug_random + osd debug verify missing on start: true + osd debug verify cached snaps: true + bluestore zero block detection: true + osd mclock override recovery settings: true + osd mclock profile: high_recovery_ops + osd mclock skip benchmark: true + mon: + mon scrub interval: 300 + debug mon: 30 diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/$ b/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/$ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/.qa b/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/6-threads.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/6-threads.yaml new file mode 100644 index 00000000000..a197988e42d --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/recovery-threads/6-threads.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + bluestore_allocation_recovery_threads: 6 \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/$ b/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/$ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/.qa b/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/thrasher.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/thrasher.yaml new file mode 100644 index 00000000000..b18ed39d526 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/thrashers/thrasher.yaml @@ -0,0 +1,29 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + osd delete sleep: 1 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 3600 + op_delay: 600 + min_in: 2 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/thrashosds-health.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/thrashosds-health.yaml new file mode 100644 index 00000000000..764496b2ec9 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/thrashosds-health.yaml @@ -0,0 +1,44 @@ +overrides: + ceph: + conf: + osd: + osd max markdown count: 1000 + osd blocked scrub grace period: 3600 + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - SLOW_OPS + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request + - timeout on replica + - late reservation from + - MON_DOWN + - OSDMAP_FLAGS + - OSD_DOWN + - PG_DEGRADED + - PG_AVAILABILITY + - POOL_APP_NOT_ENABLED + - mons down + - mon down + - out of quorum + - noscrub + - nodeep-scrub + - Degraded data redundancy + - is down + - osds down + - pg .*? is .*?degraded.*?, acting + - pg .*? is stuck + - pg degraded + - PG_BACKFILL_FULL + - Low space hindering backfill .*? backfill_toofull + - OSD_ROOT_DOWN + - pgs degraded + - pgs undersized + - is active.*backfill_toofull.* diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/workloads/$ b/qa/suites/rados/thrash-faster-alloc-recovery/workloads/$ new file mode 100644 index 00000000000..e69de29bb2d diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/workloads/.qa b/qa/suites/rados/thrash-faster-alloc-recovery/workloads/.qa new file mode 120000 index 00000000000..a602a0353e7 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/workloads/.qa @@ -0,0 +1 @@ +../.qa/ \ No newline at end of file diff --git a/qa/suites/rados/thrash-faster-alloc-recovery/workloads/fio-workload.yaml b/qa/suites/rados/thrash-faster-alloc-recovery/workloads/fio-workload.yaml new file mode 100644 index 00000000000..7705c5b9951 --- /dev/null +++ b/qa/suites/rados/thrash-faster-alloc-recovery/workloads/fio-workload.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(OBJECT_ + - \(PG_ + - \(SLOW_OPS\) + - overall HEALTH + - slow request +tasks: +- workunit: + clients: + client.0: + - rados/fio_ec_workload.sh