qa: add test for snap format upgrade

author Patrick Donnelly <pdonnell@redhat.com>

Mon, 30 Apr 2018 21:43:28 +0000 (14:43 -0700)

committer Patrick Donnelly <pdonnell@redhat.com>

Thu, 3 May 2018 16:09:03 +0000 (09:09 -0700)
author Patrick Donnelly <pdonnell@redhat.com>
Mon, 30 Apr 2018 21:43:28 +0000 (14:43 -0700)
committer Patrick Donnelly <pdonnell@redhat.com>
Thu, 3 May 2018 16:09:03 +0000 (09:09 -0700)
diff --git a/qa/suites/fs/upgrade/snaps/% b/qa/suites/fs/upgrade/snaps/%

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml b/qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml

new file mode 120000 (symlink)

index 0000000..e3aff18
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/clusters/3-mds.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/objectstore-ec b/qa/suites/fs/upgrade/snaps/objectstore-ec

new file mode 120000 (symlink)

index 0000000..0545524
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/objectstore-ec
@@ -0,0 +1 @@
+../../../../cephfs/objectstore-ec/
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/overrides/+ b/qa/suites/fs/upgrade/snaps/overrides/+

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/upgrade/snaps/overrides/debug.yaml b/qa/suites/fs/upgrade/snaps/overrides/debug.yaml

new file mode 120000 (symlink)

index 0000000..4fdb9dd
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/overrides/debug.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/overrides/debug.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml b/qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml

new file mode 120000 (symlink)

index 0000000..9e0f15f
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/overrides/frag_enable.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml b/qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml

new file mode 100644 (file)

index 0000000..c740a45
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml
@@ -0,0 +1,3 @@
+overrides:
+  ceph:
+    max_mds: 1
diff --git a/qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml b/qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml

new file mode 120000 (symlink)

index 0000000..42fa3ea
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/overrides/whitelist_health.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/overrides/whitelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/snaps/overrides/whitelist_wrongly_marked_down.yaml

new file mode 120000 (symlink)

index 0000000..3728aac
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/overrides/whitelist_wrongly_marked_down.yaml
@@ -0,0 +1 @@
+../../../../../cephfs/overrides/whitelist_wrongly_marked_down.yaml
+\ No newline at end of file
diff --git a/qa/suites/fs/upgrade/snaps/tasks/% b/qa/suites/fs/upgrade/snaps/tasks/%

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml b/qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml

new file mode 100644 (file)

index 0000000..bf627dc
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml
@@ -0,0 +1,30 @@
+meta:
+- desc: |
+   install ceph/luminous latest
+tasks:
+- install:
+    branch: luminous
+- print: "**** done installing luminous"
+- ceph:
+    log-whitelist:
+      - overall HEALTH_
+      - \(FS_
+      - \(MDS_
+      - \(OSD_
+      - \(MON_DOWN\)
+      - \(CACHE_POOL_
+      - \(POOL_
+      - \(MGR_DOWN\)
+      - \(PG_
+      - \(SMALLER_PGP_NUM\)
+      - Monitor daemon marked osd
+      - Behind on trimming
+      - Manager daemon
+    conf:
+      global:
+        mon warn on pool no app: false
+- exec:
+    osd.0:
+      - ceph osd require-osd-release luminous
+      - ceph osd set-require-min-compat-client luminous
+- print: "**** done ceph"
diff --git a/qa/suites/fs/upgrade/snaps/tasks/1-client.yaml b/qa/suites/fs/upgrade/snaps/tasks/1-client.yaml

new file mode 100644 (file)

index 0000000..e9dea8f
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/1-client.yaml
@@ -0,0 +1,13 @@
+tasks:
+- ceph-fuse:
+- print: "**** done luminous client"
+- exec:
+    mon.a:
+    - ceph fs set cephfs allow_new_snaps true --yes-i-really-mean-it
+- workunit:
+    timeout: 5m
+    cleanup: false
+    clients:
+      client.0:
+      - fs/snaps/snap-hierarchy.sh
+- print: "**** done snap hierarchy"
diff --git a/qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml b/qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml

new file mode 100644 (file)

index 0000000..5c50d60
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml
@@ -0,0 +1,12 @@
+tasks:
+- mds_pre_upgrade:
+- print: "**** done mds pre-upgrade sequence"
+- install.upgrade:
+    mon.a:
+    mon.b:
+- print: "**** done install.upgrade both hosts"
+- ceph.stop: [mds.*]
+- ceph.restart:
+    daemons: [mon.*, mgr.*, osd.*, mds.*]
+    mon-health-to-clog: false
+- print: "**** done ceph.restart"
diff --git a/qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml b/qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml

new file mode 100644 (file)

index 0000000..d93dc3b
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml
@@ -0,0 +1,10 @@
+tasks:
+- exec:
+    mon.a:
+    - ceph status
+    - ceph fs dump --format=json-pretty
+    - ceph fs set cephfs max_mds 2 && exit 1 || true
+- print: "**** confirmed cannot set max_mds=2"
+- exec:
+    mon.a:
+    - ceph fs set cephfs allow_new_snaps true
diff --git a/qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/no.yaml b/qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/no.yaml

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml b/qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml

new file mode 100644 (file)

index 0000000..13b590e
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml
@@ -0,0 +1,10 @@
+tasks:
+- install.upgrade:
+    client.0:
+- print: "**** done install.upgrade on client.0"
+- ceph-fuse:
+    client.0:
+      mounted: false
+- ceph-fuse:
+    client.0:
+- print: "**** done remount client"
diff --git a/qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml b/qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml

new file mode 100644 (file)

index 0000000..f32a89d
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml
@@ -0,0 +1,10 @@
+tasks:
+- workunit:
+    timeout: 5m
+    cleanup: false
+    env:
+      VERIFY: verify
+    clients:
+      client.0:
+      - fs/snaps/snap-hierarchy.sh
+- print: "**** done verify snap hierarchy"
diff --git a/qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml b/qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml

new file mode 100644 (file)

index 0000000..fe0b17e
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml
@@ -0,0 +1,16 @@
+overrides:
+  ceph:
+    log-whitelist:
+      - bad backtrace on inode
+tasks:
+- cephfs_upgrade_snap:
+- print: "**** upgraded snapshot metadata"
+- exec:
+    mon.a:
+    - ceph fs set cephfs max_mds 2
+- print: "**** increased max_mds=2"
+- sleep:
+    duration: 10
+- exec:
+    mon.a:
+    - ceph fs dump | grep '^max_mds.*2'
diff --git a/qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml b/qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml

new file mode 120000 (symlink)

index 0000000..4ad65e4
--- /dev/null
+++ b/qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml
@@ -0,0 +1 @@
+5-client-sanity.yaml
+\ No newline at end of file
diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py

index 1990e10c2311a0ec813e5cf9d6968851e9752c4a..e53adcf31a713a5a835e04dc60cbd88dcdbb72d0 100644 (file)
--- a/qa/tasks/ceph.py
+++ b/qa/tasks/ceph.py
@@ -376,7 +376,7 @@ def cephfs_setup(ctx, config):
          all_roles = [item for remote_roles in mdss.remotes.values() for item in remote_roles]
          num_active = len([r for r in all_roles if is_active_mds(r)])
  
-        fs.set_max_mds(num_active)
+        fs.set_max_mds(config.get('max_mds', num_active))
  
      yield
  
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py

index 4687c392571d8a9699f0878a65cc35a433a668d7..d22126a9baab844edeecda751662f7dc0cbabf31 100644 (file)
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -728,6 +728,16 @@ class Filesystem(MDSCluster):
  
          return result
  
+    def get_rank(self, rank=0, status=None):
+        if status is None:
+            status = self.getinfo()
+        return status.get_rank(self.id, rank)
+
+    def get_ranks(self, status=None):
+        if status is None:
+            status = self.getinfo()
+        return status.get_ranks(self.id)
+
      def get_rank_names(self, status=None):
          """
          Return MDS daemon names of those daemons holding a rank,
@@ -854,6 +864,10 @@ class Filesystem(MDSCluster):
  
          return self.json_asok(command, 'mds', mds_id)
  
+    def rank_asok(self, command, rank=0):
+        info = self.get_rank(rank=rank)
+        return self.json_asok(command, 'mds', info['name'])
+
      def read_cache(self, path, depth=None):
          cmd = ["dump", "tree", path]
          if depth is not None:
diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py

new file mode 100644 (file)

index 0000000..a11b1d7
--- /dev/null
+++ b/qa/tasks/cephfs_upgrade_snap.py
@@ -0,0 +1,45 @@
+"""
+Upgrade cluster snap format.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Upgrade CephFS file system snap format.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'snap-upgrade task only accepts a dict for configuration'
+
+    fs = Filesystem(ctx)
+
+    mds_map = fs.get_mds_map()
+    assert(mds_map['max_mds'] == 1)
+
+    json = fs.rank_asok(["scrub_path", "/", "force", "recursive", "repair"])
+    if not json or json['return_code'] == 0:
+        log.info("scrub / completed")
+    else:
+        log.info("scrub / failed: {}".format(json))
+
+    json = fs.rank_asok(["scrub_path", "~mdsdir", "force", "recursive", "repair"])
+    if not json or json['return_code'] == 0:
+        log.info("scrub ~mdsdir completed")
+    else:
+        log.info("scrub / failed: {}".format(json))
+
+    for i in range(0, 10):
+        mds_map = fs.get_mds_map()
+        if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0:
+            break
+        time.sleep(10)
+    assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS
+    assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS
diff --git a/qa/tasks/mds_pre_upgrade.py b/qa/tasks/mds_pre_upgrade.py

new file mode 100644 (file)

index 0000000..5193f92
--- /dev/null
+++ b/qa/tasks/mds_pre_upgrade.py
@@ -0,0 +1,56 @@
+"""
+Prepare MDS cluster for upgrade.
+"""
+
+import logging
+import time
+
+from tasks.cephfs.filesystem import Filesystem
+
+log = logging.getLogger(__name__)
+
+def task(ctx, config):
+    """
+    Prepare MDS cluster for upgrade.
+
+    This task reduces ranks to 1 and stops all standbys.
+    """
+
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), \
+        'snap-upgrade task only accepts a dict for configuration'
+
+    fs = Filesystem(ctx)
+    status = fs.getinfo()
+
+    fs.set_max_mds(1)
+    status = fs.getinfo()
+    targets = filter(lambda r: r['rank'] >= 1, fs.get_ranks(status=status))
+    if len(targets) > 0:
+        # deactivate mds in decending order
+        targets = sorted(targets, key=lambda r: r['rank'], reverse=True)
+        for target in targets:
+            self.log("deactivating rank %d" % target['rank'])
+            self.fs.deactivate(target['rank'])
+            status = self.wait_for_stable()[0]
+        else:
+            status = self.wait_for_stable()[0]
+
+    assert(fs.get_mds_map(status=status)['max_mds'] == 1)
+    assert(fs.get_mds_map(status=status)['in'] == [0])
+
+    # Stop standbys now to minimize time rank 0 is down in subsequent:
+    # tasks:
+    # - ceph.stop: [mds.*]
+    rank0 = fs.get_rank(rank=0, status=status)
+    for daemon in ctx.daemons.iter_daemons_of_role('mds', fs.mon_manager.cluster):
+        if rank0['name'] != daemon.id_:
+            daemon.stop()
+
+    for i in range(1, 10):
+        time.sleep(5) # time for FSMap to update
+        status = fs.getinfo()
+        if len(list(status.get_standbys())) == 0:
+            break
+    assert(len(list(status.get_standbys())) == 0)
diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py

index f69b3960a4c910e209d640c243143d05820b7293..e9b97e1f4c220e3890dfa8d0d67c11a9ab50a5f4 100644 (file)
--- a/qa/tasks/workunit.py
+++ b/qa/tasks/workunit.py
@@ -160,6 +160,7 @@ def task(ctx, config):
          refspec = Head()
  
      timeout = config.get('timeout', '3h')
+    cleanup = config.get('cleanup', True)
  
      log.info('Pulling workunits from ref %s', refspec)
  
@@ -181,24 +182,28 @@ def task(ctx, config):
          created_mountpoint[role] = created_mnt_dir
  
      # Execute any non-all workunits
+    log.info("timeout={}".format(timeout))
+    log.info("cleanup={}".format(cleanup))
      with parallel() as p:
          for role, tests in clients.iteritems():
              if role != "all":
                  p.spawn(_run_tests, ctx, refspec, role, tests,
                          config.get('env'),
                          basedir=config.get('basedir','qa/workunits'),
-                        timeout=timeout)
+                        timeout=timeout,cleanup=cleanup)
  
-    # Clean up dirs from any non-all workunits
-    for role, created in created_mountpoint.items():
-        _delete_dir(ctx, role, created)
+    if cleanup:
+        # Clean up dirs from any non-all workunits
+        for role, created in created_mountpoint.items():
+            _delete_dir(ctx, role, created)
  
      # Execute any 'all' workunits
      if 'all' in clients:
          all_tasks = clients["all"]
          _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'),
                                config.get('basedir', 'qa/workunits'),
-                              config.get('subdir'), timeout=timeout)
+                              config.get('subdir'), timeout=timeout,
+                              cleanup=cleanup)
  
  
  def _client_mountpoint(ctx, cluster, id_):
@@ -326,7 +331,7 @@ def _make_scratch_dir(ctx, role, subdir):
      return created_mountpoint
  
  
-def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None):
+def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True):
      """
      Make a scratch directory for each client in the cluster, and then for each
      test spawn _run_tests() for each role.
@@ -351,12 +356,13 @@ def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=Non
                          timeout=timeout)
  
      # cleanup the generated client directories
-    for role, _ in client_remotes.items():
-        _delete_dir(ctx, role, created_mountpoint[role])
+    if cleanup:
+        for role, _ in client_remotes.items():
+            _delete_dir(ctx, role, created_mountpoint[role])
  
  
  def _run_tests(ctx, refspec, role, tests, env, basedir,
-               subdir=None, timeout=None):
+               subdir=None, timeout=None, cleanup=True):
      """
      Run the individual test. Create a scratch directory and then extract the
      workunits from git. Make the executables, and then run the tests.
@@ -472,10 +478,11 @@ def _run_tests(ctx, refspec, role, tests, env, basedir,
                      args=args,
                      label="workunit test {workunit}".format(workunit=workunit)
                  )
-                remote.run(
-                    logger=log.getChild(role),
-                    args=['sudo', 'rm', '-rf', '--', scratch_tmp],
-                )
+                if cleanup:
+                    remote.run(
+                        logger=log.getChild(role),
+                        args=['sudo', 'rm', '-rf', '--', scratch_tmp],
+                    )
      finally:
          log.info('Stopping %s on %s...', tests, role)
          remote.run(
diff --git a/qa/workunits/fs/snaps/snap-hierarchy.sh b/qa/workunits/fs/snaps/snap-hierarchy.sh

new file mode 100755 (executable)

index 0000000..67f0e01
--- /dev/null
+++ b/qa/workunits/fs/snaps/snap-hierarchy.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -ex
+
+if [ -d "$1" ]; then
+  mkdir -p -- "$1" && cd "$1"
+fi
+
+[ "$VERIFY" != verify ] && mkdir 1
+[ "$VERIFY" != verify ] && mkdir 1/.snap/first
+stat 1/.snap/first
+[ "$VERIFY" != verify ] && mkdir 1/2
+stat 1/.snap/first/2 && exit 1
+[ "$VERIFY" != verify ] && mkdir 1/2/.snap/second
+stat 1/2/.snap/second
+[ "$VERIFY" != verify ] && touch 1/foo
+stat 1/.snap/first/foo && exit 1
+[ "$VERIFY" != verify ] && mkdir 1/.snap/third
+stat 1/.snap/third/foo || exit 1
+[ "$VERIFY" != verify ] && mkdir 1/2/3
+[ "$VERIFY" != verify ] && mkdir 1/2/.snap/fourth
+stat 1/2/.snap/fourth/3
+
+exit 0
author	Patrick Donnelly <pdonnell@redhat.com>
	Mon, 30 Apr 2018 21:43:28 +0000 (14:43 -0700)
committer	Patrick Donnelly <pdonnell@redhat.com>
	Thu, 3 May 2018 16:09:03 +0000 (09:09 -0700)
qa/suites/fs/upgrade/snaps/%	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/clusters/3-mds.yaml	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/objectstore-ec	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/+	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/debug.yaml	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/frag_enable.yaml	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/no_multimds.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/whitelist_health.yaml	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/overrides/whitelist_wrongly_marked_down.yaml	[new symlink]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/%	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/0-luminous.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/1-client.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/2-upgrade.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/3-sanity.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/no.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/4-client-upgrade/yes.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/5-client-sanity.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/6-snap-upgrade.yaml	[new file with mode: 0644]	patch \| blob
qa/suites/fs/upgrade/snaps/tasks/7-client-sanity.yaml	[new symlink]	patch \| blob
qa/tasks/ceph.py		patch \| blob \| history
qa/tasks/cephfs/filesystem.py		patch \| blob \| history
qa/tasks/cephfs_upgrade_snap.py	[new file with mode: 0644]	patch \| blob
qa/tasks/mds_pre_upgrade.py	[new file with mode: 0644]	patch \| blob
qa/tasks/workunit.py		patch \| blob \| history
qa/workunits/fs/snaps/snap-hierarchy.sh	[new file with mode: 0755]	patch \| blob