From 117e9c90234930f8670ee5261d6580e2ce642a42 Mon Sep 17 00:00:00 2001 From: Brad Hubbard Date: Tue, 16 Jul 2024 12:09:01 +1000 Subject: [PATCH] qa: Restrict rados api tests to large clusters and increase timeout Running these tests with thrashers on small clusters leads to many very slow ops due to the cluster being overloaded. That has a tendency to make some of the API tests timeout and fail. Fixes: https://tracker.ceph.com/issues/50371 Signed-off-by: Brad Hubbard (cherry picked from commit d034fec463330d9cefba05bfe9beb0c066e03b43) --- qa/clusters/2-node-mgr.yaml | 1 + qa/clusters/extra-client.yaml | 3 ++- qa/clusters/fixed-1.yaml | 1 + qa/clusters/fixed-2.yaml | 1 + qa/clusters/fixed-3-cephfs.yaml | 1 + qa/clusters/fixed-3.yaml | 1 + qa/clusters/fixed-4.yaml | 3 ++- qa/suites/rados/thrash/workloads/rados_api_tests.yaml | 5 +++++ qa/suites/rados/verify/tasks/rados_api_tests.yaml | 6 +++++- 9 files changed, 19 insertions(+), 3 deletions(-) diff --git a/qa/clusters/2-node-mgr.yaml b/qa/clusters/2-node-mgr.yaml index b1c29a86638..be657492c1e 100644 --- a/qa/clusters/2-node-mgr.yaml +++ b/qa/clusters/2-node-mgr.yaml @@ -1,3 +1,4 @@ +clusternodes: 2 roles: - [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0] - [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1] diff --git a/qa/clusters/extra-client.yaml b/qa/clusters/extra-client.yaml index 33fa505b714..4bf258bcd41 100644 --- a/qa/clusters/extra-client.yaml +++ b/qa/clusters/extra-client.yaml @@ -1,3 +1,4 @@ +clusternodes: 2 roles: - [mon.a, mon.c, osd.0, osd.1, osd.2] - [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5] @@ -11,4 +12,4 @@ overrides: ceph: conf: osd: - osd shutdown pgref assert: true \ No newline at end of file + osd shutdown pgref assert: true diff --git a/qa/clusters/fixed-1.yaml b/qa/clusters/fixed-1.yaml index d8e5898b99f..a7cf5db3d69 100644 --- a/qa/clusters/fixed-1.yaml +++ b/qa/clusters/fixed-1.yaml @@ -1,3 +1,4 @@ +clusternodes: 1 overrides: ceph-deploy: conf: diff --git a/qa/clusters/fixed-2.yaml b/qa/clusters/fixed-2.yaml index e4448bb2008..964d6a49b2b 100644 --- a/qa/clusters/fixed-2.yaml +++ b/qa/clusters/fixed-2.yaml @@ -1,3 +1,4 @@ +clusternodes: 2 roles: - [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0, node-exporter.a] - [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1, prometheus.a, node-exporter.b] diff --git a/qa/clusters/fixed-3-cephfs.yaml b/qa/clusters/fixed-3-cephfs.yaml index 9e021b3bd69..493b044eb99 100644 --- a/qa/clusters/fixed-3-cephfs.yaml +++ b/qa/clusters/fixed-3-cephfs.yaml @@ -1,3 +1,4 @@ +clusternodes: 2 roles: - [mon.a, mds.a, mgr.x, osd.0, osd.1] - [mon.b, mds.b, mon.c, mgr.y, osd.2, osd.3] diff --git a/qa/clusters/fixed-3.yaml b/qa/clusters/fixed-3.yaml index ddc79a84b60..e87ae939237 100644 --- a/qa/clusters/fixed-3.yaml +++ b/qa/clusters/fixed-3.yaml @@ -1,3 +1,4 @@ +clusternodes: 2 roles: - [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3] - [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7] diff --git a/qa/clusters/fixed-4.yaml b/qa/clusters/fixed-4.yaml index df767f35710..29c23b75a7e 100644 --- a/qa/clusters/fixed-4.yaml +++ b/qa/clusters/fixed-4.yaml @@ -1,3 +1,4 @@ +clusternodes: 4 roles: - [mon.a, mgr.y, osd.0, osd.4, osd.8, osd.12] - [mon.b, osd.1, osd.5, osd.9, osd.13] @@ -7,4 +8,4 @@ overrides: ceph: conf: osd: - osd shutdown pgref assert: true \ No newline at end of file + osd shutdown pgref assert: true diff --git a/qa/suites/rados/thrash/workloads/rados_api_tests.yaml b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml index 3e72897ae05..97520e6a8cc 100644 --- a/qa/suites/rados/thrash/workloads/rados_api_tests.yaml +++ b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml @@ -1,3 +1,7 @@ +teuthology: + postmerge: + # Don't run this test on tiny clusters + - if yaml.clusternodes < 4 then reject() end overrides: ceph: log-ignorelist: @@ -16,6 +20,7 @@ overrides: osd: osd class load list: "*" osd class default list: "*" + osd client watch timeout: 240 tasks: - workunit: clients: diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml index e5a54e69e01..d23eda8cf10 100644 --- a/qa/suites/rados/verify/tasks/rados_api_tests.yaml +++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -1,3 +1,7 @@ +teuthology: + postmerge: + # Don't run this test on tiny clusters + - if yaml.clusternodes < 4 then reject() end overrides: ceph: log-ignorelist: @@ -23,7 +27,7 @@ overrides: osd: osd class load list: "*" osd class default list: "*" - osd client watch timeout: 120 + osd client watch timeout: 240 tasks: - workunit: timeout: 6h -- 2.47.3