]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: Add bluestore resharing test 40197/head
authorAdam Kupczyk <akupczyk@redhat.com>
Fri, 19 Feb 2021 18:09:48 +0000 (19:09 +0100)
committerNeha Ojha <nojha@redhat.com>
Wed, 17 Mar 2021 19:12:33 +0000 (19:12 +0000)
Signed-off-by: Adam Kupczyk <akupczyk@redhat.com>
(cherry picked from commit a84820b7432926617d710cf05f0e93d0e7151b49)

qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml
qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml
qa/suites/rados/thrash-erasure-code/thrashers/default.yaml
qa/suites/rados/thrash/thrashers/default.yaml
qa/tasks/ceph_manager.py

index c36175c503709c5209566b7f5042b80a3ab9d99c..d2c7b85b50ecd00dbb5231a393d56d526dc07e26 100644 (file)
@@ -17,3 +17,5 @@ tasks:
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
     min_in: 8
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
index 3dab3f9c569079f2c91bd6a71c1fb4e947d4c0d4..a869369fadd6f76f2e74fe9227f32b83673eb620 100644 (file)
@@ -17,3 +17,5 @@ tasks:
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
     min_in: 8
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
index e66dab90de60a53ae4f544cb963dcf6613bab59b..3728bd8e718630b04ca3bcb93f7fd630889dbc58 100644 (file)
@@ -16,3 +16,5 @@ tasks:
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
     min_in: 4
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
index 05e0f8e76d11d53ea633e3a70ef6e2f785c9e28e..5a300a9ff7014b60acd8b00b8b5d8dfd7d73d760 100644 (file)
@@ -24,3 +24,5 @@ tasks:
     chance_pgnum_grow: 1
     chance_pgnum_shrink: 1
     chance_pgpnum_fix: 1
+    chance_bluestore_reshard: 1
+    bluestore_new_sharding: random
index b84d564debd5ce32973816dfea3f8a7dbf650c49..e65ee3a21d2e6fd6174eccf62ad105fb924515d3 100644 (file)
@@ -239,6 +239,22 @@ class OSDThrasher(Thrasher):
                 stdout=StringIO(),
                 stderr=StringIO())
 
+    def run_ceph_bluestore_tool(self, remote, osd, cmd):
+        if self.ceph_manager.cephadm:
+            return shell(
+                self.ceph_manager.ctx, self.ceph_manager.cluster, remote,
+                args=['ceph-bluestore-tool', '--err-to-stderr'] + cmd,
+                name=osd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+        else:
+            return remote.run(
+                args=['sudo', 'ceph-bluestore-tool', '--err-to-stderr'] + cmd,
+                wait=True, check_status=False,
+                stdout=StringIO(),
+                stderr=StringIO())
+
     def kill_osd(self, osd=None, mark_down=False, mark_out=False):
         """
         :param osd: Osd to be killed.
@@ -953,6 +969,113 @@ class OSDThrasher(Thrasher):
             self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'],
                                      check_status=True, timeout=30, stdout=DEVNULL)
 
+
+    def generate_random_sharding(self):
+        prefixes = [
+            'm','O','P','L'
+        ]
+        new_sharding = ''
+        for prefix in prefixes:
+            choose = random.choice([False, True])
+            if not choose:
+                continue
+            if new_sharding != '':
+                new_sharding = new_sharding + ' '
+            columns = random.randint(1, 5)
+            do_hash = random.choice([False, True])
+            if do_hash:
+                low_hash = random.choice([0, 5, 8])
+                do_high_hash = random.choice([False, True])
+                if do_high_hash:
+                    high_hash = random.choice([8, 16, 30]) + low_hash
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-' + str(high_hash) + ')'
+                else:
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-)'
+            else:
+                if columns == 1:
+                    new_sharding = new_sharding + prefix
+                else:
+                    new_sharding = new_sharding + prefix + '(' + str(columns) + ')'
+        return new_sharding
+
+    def test_bluestore_reshard_action(self):
+        """
+        Test if resharding of bluestore works properly.
+        If bluestore is not used, or bluestore is in version that
+        does not support sharding, skip.
+        """
+
+        osd = random.choice(self.dead_osds)
+        remote = self.ceph_manager.find_remote('osd', osd)
+        FSPATH = self.ceph_manager.get_filepath()
+
+        prefix = [
+                '--no-mon-config',
+                '--log-file=/var/log/ceph/bluestore_tool.$pid.log',
+                '--log-level=10',
+                '--path', FSPATH.format(id=osd)
+            ]
+
+        # sanity check if bluestore-tool accessible
+        self.log('checking if target objectstore is bluestore on osd.%s' % osd)
+        cmd = prefix + [
+            'show-label'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool access failed.")
+
+        # check if sharding is possible
+        self.log('checking if target bluestore supports sharding on osd.%s' % osd)
+        cmd = prefix + [
+            'show-sharding'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            self.log("Unable to test resharding, "
+                     "ceph-bluestore-tool does not support it.")
+            return
+
+        # now go for reshard to something else
+        self.log('applying new sharding to bluestore on osd.%s' % osd)
+        new_sharding = self.config.get('bluestore_new_sharding','random')
+
+        if new_sharding == 'random':
+            self.log('generate random sharding')
+            new_sharding = self.generate_random_sharding()
+
+        self.log("applying new sharding: " + new_sharding)
+        cmd = prefix + [
+            '--sharding', new_sharding,
+            'reshard'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool resharding failed.")
+
+        # now do fsck to
+        self.log('running fsck to verify new sharding on osd.%s' % osd)
+        cmd = prefix + [
+            'fsck'
+            ]
+        proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd)
+        if proc.exitstatus != 0:
+            raise Exception("ceph-bluestore-tool fsck failed.")
+        self.log('resharding successfully completed')
+
+    def test_bluestore_reshard(self):
+        """
+        1) kills an osd
+        2) reshards bluestore on killed osd
+        3) revives the osd
+        """
+        self.log('test_bluestore_reshard started')
+        self.kill_osd(mark_down=True, mark_out=True)
+        self.test_bluestore_reshard_action()
+        self.revive_osd()
+        self.log('test_bluestore_reshard completed')
+
+
     def test_map_discontinuity(self):
         """
         1) Allows the osds to recover
@@ -1054,6 +1177,13 @@ class OSDThrasher(Thrasher):
                  self.config.get('chance_inject_pause_long', 0),)]:
                 actions.append(scenario)
 
+        # only consider resharding if objectstore is bluestore
+        cluster_name = self.ceph_manager.cluster
+        cluster = self.ceph_manager.ctx.ceph[cluster_name]
+        if cluster.conf.get('osd', {}).get('osd objectstore', 'bluestore') == 'bluestore':
+            actions.append((self.test_bluestore_reshard,
+                            self.config.get('chance_bluestore_reshard', 0),))
+
         total = sum([y for (x, y) in actions])
         val = random.uniform(0, total)
         for (action, prob) in actions: