From: Bill Scales Date: Fri, 21 Nov 2025 10:06:22 +0000 (+0000) Subject: qa: Reduce number of osd threads when using compression X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ea9588e3304822b75be18ee26d84c0c77e58cbed;p=ceph.git qa: Reduce number of osd threads when using compression Smithi nodes used by teuthology tests have 8 CPU cores and typically run 4 OSD processes. When bluestore software compression is enabled the size of the OSD thread pool needs to be reduced to 2 threads per OSD because these threads can easily use 100% of a core. This avoids excessive amounts of context switches, which leads to OSD threads timing out, which causes the OSD to drop heartbeat pings and for the monitor to temporarily mark it down. In extreme cases this can lead to PGs getting stuck in repeated loops of peering until the teuthology test times out. Context switches happen oppurtunistically at the end of system calls so functions with lots of logging are some of the worst affected. Fixes: https://tracker.ceph.com/issues/72879 Signed-off-by: Bill Scales --- diff --git a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/lz4.yaml b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/lz4.yaml index 4635ce6f880..dae42267d05 100644 --- a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/lz4.yaml +++ b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/lz4.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: lz4 osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/snappy.yaml b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/snappy.yaml index e2f4b3eca17..c336c86632a 100644 --- a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/snappy.yaml +++ b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/snappy.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: snappy osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zlib.yaml b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zlib.yaml index 686e02abddf..8b08227a65b 100644 --- a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zlib.yaml +++ b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zlib.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: zlib osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zstd.yaml b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zstd.yaml index fc37fc18caa..2235cc8b408 100644 --- a/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zstd.yaml +++ b/qa/objectstore_debug/bluestore/write$/random/compr$/yes$/zstd.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: zstd osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/lz4.yaml b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/lz4.yaml index a565dc4acb1..8032a751ccd 100644 --- a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/lz4.yaml +++ b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/lz4.yaml @@ -4,3 +4,5 @@ overrides: osd: bluestore compression mode: aggressive bluestore compression algorithm: lz4 + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/snappy.yaml b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/snappy.yaml index 7e4493bbebc..b6f92cdacd4 100644 --- a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/snappy.yaml +++ b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/snappy.yaml @@ -4,3 +4,5 @@ overrides: osd: bluestore compression mode: aggressive bluestore compression algorithm: snappy + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zlib.yaml b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zlib.yaml index 83ec6c8b4a7..f07b2f21b1f 100644 --- a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zlib.yaml +++ b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zlib.yaml @@ -4,3 +4,5 @@ overrides: osd: bluestore compression mode: aggressive bluestore compression algorithm: zlib + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zstd.yaml b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zstd.yaml index 5149d8aaaf7..9a02ae7c869 100644 --- a/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zstd.yaml +++ b/qa/objectstore_debug/bluestore/write$/v1/compr$/yes$/zstd.yaml @@ -4,3 +4,5 @@ overrides: osd: bluestore compression mode: aggressive bluestore compression algorithm: zstd + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/lz4.yaml b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/lz4.yaml index 4635ce6f880..dae42267d05 100644 --- a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/lz4.yaml +++ b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/lz4.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: lz4 osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/snappy.yaml b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/snappy.yaml index e2f4b3eca17..c336c86632a 100644 --- a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/snappy.yaml +++ b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/snappy.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: snappy osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zlib.yaml b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zlib.yaml index 686e02abddf..8b08227a65b 100644 --- a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zlib.yaml +++ b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zlib.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: zlib osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1 diff --git a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zstd.yaml b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zstd.yaml index fc37fc18caa..2235cc8b408 100644 --- a/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zstd.yaml +++ b/qa/objectstore_debug/bluestore/write$/v2/compr$/yes$/zstd.yaml @@ -5,3 +5,5 @@ overrides: bluestore compression mode: aggressive bluestore compression algorithm: zstd osd_mclock_skip_benchmark: true #new recompression makes us fail the test + osd op num threads per shard: 2 #https://tracker.ceph.com/issues/72879 + osd op num shards: 1