]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/suites/upgrade/octopus-x/stress-split: cephadm-based stress-split upgrade 39404/head
authorSage Weil <sage@newdream.net>
Thu, 4 Feb 2021 18:56:49 +0000 (12:56 -0600)
committerSage Weil <sage@newdream.net>
Thu, 11 Feb 2021 00:09:04 +0000 (18:09 -0600)
Apply stress during upgrade, and also pause partway and do some
thrashing.

Signed-off-by: Sage Weil <sage@newdream.net>
24 files changed:
qa/suites/upgrade/octopus-x/stress-split/% [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/.qa [new symlink]
qa/suites/upgrade/octopus-x/stress-split/0-roles.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/1-start.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/.qa [new symlink]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/radosbench.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-cls.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-import-export.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd_api.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/readwrite.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/+ [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/radosbench.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-cls.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-import-export.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd_api.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/readwrite.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/snaps-few-objects.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/radosbench.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/rbd-import-export.yaml [new file with mode: 0644]
qa/suites/upgrade/octopus-x/stress-split/distro$/.qa [new symlink]
qa/suites/upgrade/octopus-x/stress-split/distro$/centos_latest.yaml [new symlink]
qa/suites/upgrade/octopus-x/stress-split/distro$/ubuntu_latest.yaml [new symlink]
qa/suites/upgrade/octopus-x/stress-split/mon_election [new symlink]

diff --git a/qa/suites/upgrade/octopus-x/stress-split/% b/qa/suites/upgrade/octopus-x/stress-split/%
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/upgrade/octopus-x/stress-split/.qa b/qa/suites/upgrade/octopus-x/stress-split/.qa
new file mode 120000 (symlink)
index 0000000..fea2489
--- /dev/null
@@ -0,0 +1 @@
+../.qa
\ No newline at end of file
diff --git a/qa/suites/upgrade/octopus-x/stress-split/0-roles.yaml b/qa/suites/upgrade/octopus-x/stress-split/0-roles.yaml
new file mode 100644 (file)
index 0000000..ad3ee43
--- /dev/null
@@ -0,0 +1,31 @@
+roles:
+- - mon.a
+  - mon.c
+  - mgr.y
+  - osd.0
+  - osd.1
+  - osd.2
+  - osd.3
+  - client.0
+  - node-exporter.a
+  - alertmanager.a
+- - mon.b
+  - mgr.x
+  - osd.4
+  - osd.5
+  - osd.6
+  - osd.7
+  - client.1
+  - prometheus.a
+  - grafana.a
+  - node-exporter.b
+openstack:
+- volumes: # attached to each instance
+    count: 4
+    size: 10 # GB
+overrides:
+  ceph:
+    create_rbd_pool: true
+    conf:
+      osd:
+        osd shutdown pgref assert: true
diff --git a/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml b/qa/suites/upgrade/octopus-x/stress-split/1-start.yaml
new file mode 100644 (file)
index 0000000..8458365
--- /dev/null
@@ -0,0 +1,116 @@
+tasks:
+- install:
+    branch: octopus
+
+- cephadm:
+    image: docker.io/ceph/daemon-base:latest-octopus
+    cephadm_branch: octopus
+    cephadm_git_url: https://github.com/ceph/ceph
+    conf:
+      osd:
+        #set config option for which cls modules are allowed to be loaded / used
+        osd_class_load_list: "*"
+        osd_class_default_list: "*"
+
+- cephadm.shell:
+    mon.a:
+      - ceph fs volume create foo
+- ceph.healthy:
+
+- print: "**** upgrading first half of cluster, with stress ****"
+- parallel:
+    - first-half-tasks
+    - first-half-sequence
+- print: "**** done upgrading first half of cluster ****"
+
+- ceph.healthy:
+
+- print: "**** applying stress + thrashing to mixed-version cluster ****"
+
+- parallel:
+    - stress-tasks
+
+- ceph.healthy:
+
+- print: "**** finishing upgrade ****"
+- parallel:
+    - second-half-tasks
+    - second-half-sequence
+
+- ceph.healthy:
+
+
+#################
+
+first-half-sequence:
+- cephadm.shell:
+    env: [sha1]
+    mon.a:
+      - ceph config set mgr mgr/cephadm/daemon_cache_timeout 60
+
+      - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1
+      - ceph orch ps
+
+      - echo wait for minority of mons to upgrade
+      - while ! ceph mon versions | grep $sha1 ; do sleep 2 ; done
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for majority of mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | egrep ': [23]' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for all mons to upgrade
+      - "while ! ceph mon versions | grep $sha1 | grep ': 3' ; do sleep 2 ; done"
+      - ceph orch ps
+      - ceph orch upgrade pause
+      - sleep 60
+      - ceph orch upgrade resume
+
+      - echo wait for half of osds to upgrade
+      - "while ! ceph osd versions | grep $sha1 | egrep ': [45678]'; do sleep 2 ; done"
+      - ceph orch upgrade pause
+      - ceph orch ps
+
+      - ceph orch ps
+      - ceph versions
+
+
+#################
+
+stress-tasks:
+- thrashosds:
+    timeout: 1200
+    chance_pgnum_grow: 1
+    chance_pgpnum_fix: 1
+    chance_thrash_cluster_full: 0
+    chance_thrash_pg_upmap: 0
+    chance_thrash_pg_upmap_items: 0
+    disable_objectstore_tool_tests: true
+    chance_force_recovery: 0
+    aggressive_pg_num_changes: false
+
+
+#################
+
+second-half-sequence:
+  sequential:
+    - cephadm.shell:
+        env: [sha1]
+        mon.a:
+          - ceph orch upgrade resume
+          - sleep 60
+
+          - echo wait for upgrade to complete
+          - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done
+
+          - echo upgrade complete
+          - ceph orch ps
+          - ceph versions
+          - ceph versions | jq -e '.overall | length == 1'
+          - ceph versions | jq -e '.overall | keys' | grep $sha1
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/.qa b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/.qa
new file mode 120000 (symlink)
index 0000000..fea2489
--- /dev/null
@@ -0,0 +1 @@
+../.qa
\ No newline at end of file
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/radosbench.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/radosbench.yaml
new file mode 100644 (file)
index 0000000..3816ca3
--- /dev/null
@@ -0,0 +1,19 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+first-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-cls.yaml
new file mode 100644 (file)
index 0000000..ffe09dc
--- /dev/null
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+first-half-tasks:
+- workunit:
+    branch: octopus
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd-import-export.yaml
new file mode 100644 (file)
index 0000000..992f31b
--- /dev/null
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+first-half-tasks:
+- workunit:
+    branch: octopus
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd_api.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/rbd_api.yaml
new file mode 100644 (file)
index 0000000..e4c1d54
--- /dev/null
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+first-half-tasks:
+- workunit:
+     branch: octopus
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/readwrite.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/readwrite.yaml
new file mode 100644 (file)
index 0000000..21a9f37
--- /dev/null
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/octopus-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml
new file mode 100644 (file)
index 0000000..6447c22
--- /dev/null
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+first-half-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/+ b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/+
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/radosbench.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/radosbench.yaml
new file mode 100644 (file)
index 0000000..9058bd8
--- /dev/null
@@ -0,0 +1,25 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+stress-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-cls.yaml
new file mode 100644 (file)
index 0000000..07ab6e1
--- /dev/null
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   run basic cls tests for rbd
+stress-tasks:
+- workunit:
+    branch: octopus
+    clients:
+      client.0:
+        - cls/test_cls_rbd.sh
+- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-import-export.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd-import-export.yaml
new file mode 100644 (file)
index 0000000..a3968fe
--- /dev/null
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+stress-tasks:
+- workunit:
+    branch: octopus
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd_api.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/rbd_api.yaml
new file mode 100644 (file)
index 0000000..7212d3f
--- /dev/null
@@ -0,0 +1,10 @@
+meta:
+- desc: |
+   librbd C and C++ api tests
+stress-tasks:
+- workunit:
+     branch: octopus
+     clients:
+        client.0:
+           - rbd/test_librbd.sh
+- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/readwrite.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/readwrite.yaml
new file mode 100644 (file)
index 0000000..41e34d6
--- /dev/null
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool,
+   using only reads, writes, and deletes
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 500
+      write_append_excl: false
+      op_weights:
+        read: 45
+        write: 45
+        delete: 10
+- print: "**** done rados/readwrite 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/octopus-x/stress-split/3-stress-tasks/snaps-few-objects.yaml
new file mode 100644 (file)
index 0000000..f56d0de
--- /dev/null
@@ -0,0 +1,18 @@
+meta:
+- desc: |
+   randomized correctness test for rados operations on a replicated pool with snapshot operations
+stress-tasks:
+- full_sequential:
+  - rados:
+      clients: [client.0]
+      ops: 4000
+      objects: 50
+      write_append_excl: false
+      op_weights:
+        read: 100
+        write: 100
+        delete: 50
+        snap_create: 50
+        snap_remove: 50
+        rollback: 50
+- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/radosbench.yaml b/qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/radosbench.yaml
new file mode 100644 (file)
index 0000000..7268cb1
--- /dev/null
@@ -0,0 +1,16 @@
+meta:
+- desc: |
+   run randomized correctness test for rados operations
+   generate write load with rados bench
+second-half-tasks:
+- full_sequential:
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+  - radosbench:
+      clients: [client.0]
+      time: 90
+- print: "**** done end radosbench.yaml"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/octopus-x/stress-split/4-second-half-tasks/rbd-import-export.yaml
new file mode 100644 (file)
index 0000000..f223bda
--- /dev/null
@@ -0,0 +1,12 @@
+meta:
+- desc: |
+   run basic import/export cli tests for rbd
+second-half-tasks:
+- workunit:
+    branch: octopus
+    clients:
+      client.0:
+        - rbd/import_export.sh
+    env:
+      RBD_CREATE_ARGS: --new-format
+- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/qa/suites/upgrade/octopus-x/stress-split/distro$/.qa b/qa/suites/upgrade/octopus-x/stress-split/distro$/.qa
new file mode 120000 (symlink)
index 0000000..fea2489
--- /dev/null
@@ -0,0 +1 @@
+../.qa
\ No newline at end of file
diff --git a/qa/suites/upgrade/octopus-x/stress-split/distro$/centos_latest.yaml b/qa/suites/upgrade/octopus-x/stress-split/distro$/centos_latest.yaml
new file mode 120000 (symlink)
index 0000000..bd9854e
--- /dev/null
@@ -0,0 +1 @@
+.qa/distros/supported/centos_latest.yaml
\ No newline at end of file
diff --git a/qa/suites/upgrade/octopus-x/stress-split/distro$/ubuntu_latest.yaml b/qa/suites/upgrade/octopus-x/stress-split/distro$/ubuntu_latest.yaml
new file mode 120000 (symlink)
index 0000000..3a09f9a
--- /dev/null
@@ -0,0 +1 @@
+.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file
diff --git a/qa/suites/upgrade/octopus-x/stress-split/mon_election b/qa/suites/upgrade/octopus-x/stress-split/mon_election
new file mode 120000 (symlink)
index 0000000..3f331e6
--- /dev/null
@@ -0,0 +1 @@
+.qa/mon_election
\ No newline at end of file