]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rados: upgrade suite test filestore removal
authorNitzan Mordechai <nmordech@redhat.com>
Wed, 21 Dec 2022 07:06:21 +0000 (07:06 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 12 Feb 2023 06:11:29 +0000 (06:11 +0000)
When upgrading osd with filestore to reef, restart should not be possible
the osd won't boot and error message will be showed in the osd log

Signed-off-by: Nitzan Mordechai <nmordec@redhat.com>
13 files changed:
qa/suites/upgrade/quincy-x/filestore-remove-check/% [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/.qa [new symlink]
qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa [new symlink]
qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa [new symlink]
qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa [new symlink]
qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml [new file with mode: 0644]
qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml [new file with mode: 0644]
qa/tasks/ceph.py

diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/% b/qa/suites/upgrade/quincy-x/filestore-remove-check/%
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa
new file mode 120000 (symlink)
index 0000000..fea2489
--- /dev/null
@@ -0,0 +1 @@
+../.qa
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml
new file mode 100644 (file)
index 0000000..5caffc3
--- /dev/null
@@ -0,0 +1,6 @@
+openstack:
+  - machine:
+      disk: 100 # GB
+  - volumes: # attached to each instance
+      count: 4
+      size: 30 # GB
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml
new file mode 100644 (file)
index 0000000..57e455b
--- /dev/null
@@ -0,0 +1,33 @@
+meta:
+- desc: |
+   Run ceph on one nodes,
+   Use xfs beneath the osds. upgrade to reef
+   should fail to start the osds with filestore
+overrides:
+  ceph:
+    mon_bind_msgr2: false
+    mon_bind_addrvec: false
+    mon-health-to-clog: false
+    wait-for-healthy: false
+    wait-for-osds-up: false
+    wait-for-scrub: false
+    skip_stop_pg_num_changes: true
+    fs: xfs
+    log-ignorelist:
+      - overall HEALTH_
+      - \(MON_DOWN\)
+      - \(MGR_DOWN\)
+      - slow request
+      - \(MON_MSGR2_NOT_ENABLED\)
+    conf:
+      global:
+        enable experimental unrecoverable data corrupting features: "*"
+        mon warn on msgr2 not enabled: false
+      mon:
+        mon warn on osd down out interval zero: false
+roles:
+- - mon.a
+  - mgr.x
+  - osd.0
+  - osd.1
+  - osd.2
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml
new file mode 100644 (file)
index 0000000..471bd61
--- /dev/null
@@ -0,0 +1,32 @@
+meta:
+- desc: install ceph/quincy latest
+tasks:
+- install:
+    exclude_packages:
+      - ceph-mgr-cephadm
+      - cephadm
+      - libcephfs-dev
+    branch: quincy
+- print: "**** done install quincy"
+- ceph:
+    create_rbd_pool: false
+    conf:
+      global:
+        bluestore_warn_on_legacy_statfs: false
+        bluestore warn on no per pool omap: false
+        mon pg warn min per osd: 0
+      mon:
+        mon_warn_on_insecure_global_id_reclaim: false
+        mon_warn_on_insecure_global_id_reclaim_allowed: false
+    log-ignorelist:
+      - Not found or unloadable
+      - evicting unresponsive client
+- exec:
+    osd.0:
+      - ceph osd require-osd-release quincy
+- print: "**** done ceph"
+overrides:
+  ceph:
+    conf:
+      mon:
+        mon warn on osd down out interval zero: false
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml
new file mode 100644 (file)
index 0000000..6aa429f
--- /dev/null
@@ -0,0 +1,20 @@
+meta:
+- desc: |
+   install upgrade ceph/-x on cluster
+   restart : mons, osd.*
+tasks:
+- install.upgrade:
+    mon.a:
+- exec:
+    osd.0:
+      - ceph osd require-osd-release quincy
+- print: "**** done install.upgrade of nodes"
+- ceph.restart:
+    daemons: [mon.a,mgr.x,osd.0,osd.1,osd.2]
+    mon-health-to-clog: false
+    wait-for-healthy: false
+    wait-for-osds-up: false
+    wait-for-scrub: false
+    skip_stop_pg_num_changes: true
+    expected-failure: "FileStore has been deprecated and is no longer supported"
+- print: "**** done ceph.restart of all mons and osds"
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa
new file mode 120000 (symlink)
index 0000000..a602a03
--- /dev/null
@@ -0,0 +1 @@
+../.qa/
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml
new file mode 100644 (file)
index 0000000..b6ef47b
--- /dev/null
@@ -0,0 +1,14 @@
+overrides:
+  ceph:
+    fs: xfs
+    conf:
+      osd:
+        osd objectstore: filestore
+        osd sloppy crc: true
+  ceph-deploy:
+    fs: xfs
+    filestore: True
+    conf:
+      osd:
+        osd objectstore: filestore
+        osd sloppy crc: true
\ No newline at end of file
diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml
new file mode 100644 (file)
index 0000000..e1374c4
--- /dev/null
@@ -0,0 +1,9 @@
+os_type: ubuntu
+os_version: "20.04"
+# the normal ubuntu 20.04 kernel (5.4.0-88-generic currently) have a bug that prevents the nvme_loop
+# from behaving.  I think it is this:
+#   https://lkml.org/lkml/2020/9/21/1456
+# (at least, that is the symptom: nvme nvme1: Connect command failed, error wo/DNR bit: 880)
+overrides:
+  kernel:
+    hwe: true
\ No newline at end of file
index b10bc6723108b261fcbd99913605134cada05d4e..08d66e60d4cf2d62c99927637c1d8c594a3543fc 100644 (file)
@@ -1585,6 +1585,20 @@ def restart(ctx, config):
     if config.get('wait-for-osds-up', False):
         for cluster in clusters:
             ctx.managers[cluster].wait_for_all_osds_up()
+    if config.get('expected-failure') is not None:
+        log.info('Checking for expected-failure in osds logs after restart...')
+        expected_fail = config.get('expected-failure')
+        is_osd = teuthology.is_type('osd')
+        for role in daemons:
+            if not is_osd(role):
+                continue
+            (remote,) = ctx.cluster.only(role).remotes.keys()
+            cluster, type_, id_ = teuthology.split_role(role)
+            remote.run(
+               args = ['sudo',
+                       'egrep', expected_fail,
+                       '/var/log/ceph/{cluster}-{type_}.{id_}.log'.format(cluster=cluster, type_=type_, id_=id_),
+                ])
     yield
 
 
@@ -1899,7 +1913,8 @@ def task(ctx, config):
         finally:
             # set pg_num_targets back to actual pg_num, so we don't have to
             # wait for pending merges (which can take a while!)
-            ctx.managers[config['cluster']].stop_pg_num_changes()
+            if not config.get('skip_stop_pg_num_changes', True):
+                ctx.managers[config['cluster']].stop_pg_num_changes()
 
             if config.get('wait-for-scrub', True):
                 # wait for pgs to become active+clean in case any