]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/tasks/thrashosds,ceph_manager: thrash pg_remap[_items] 13984/head
authorSage Weil <sage@redhat.com>
Thu, 23 Mar 2017 11:48:27 +0000 (06:48 -0500)
committerSage Weil <sage@redhat.com>
Tue, 28 Mar 2017 14:12:10 +0000 (10:12 -0400)
Signed-off-by: Sage Weil <sage@redhat.com>
qa/suites/rados/upgrade/jewel-x-singleton/3-thrash/default.yaml
qa/suites/upgrade/jewel-x/stress-split-erasure-code/3-thrash/default.yaml
qa/suites/upgrade/jewel-x/stress-split/3-thrash/default.yaml
qa/suites/upgrade/kraken-x/stress-split-erasure-code/3-thrash/default.yaml
qa/suites/upgrade/kraken-x/stress-split/3-thrash/default.yaml
qa/tasks/ceph_manager.py
qa/tasks/thrashosds.py

index bde754e4d5f5b308733d1778d1d31f02ecaa1c35..8785de6c3f24552c13ef719edad279e50500b5f9 100644 (file)
@@ -18,4 +18,6 @@ split_tasks:
         chance_pgnum_grow: 1
         chance_pgpnum_fix: 1
         chance_thrash_cluster_full: 0
+        chance_thrash_pg_remap: 0
+        chance_thrash_pg_remap_items: 0
     - print: "**** done thrashosds 3-thrash"
index 8b4de177db4ab5fdef221c323a4c20b1e04cda82..0272cb94c14498eff3c79d4c8332523592f10048 100644 (file)
@@ -18,4 +18,6 @@ stress-tasks:
     chance_pgpnum_fix: 1
     min_in: 4
     chance_thrash_cluster_full: 0
+    chance_thrash_pg_remap: 0
+    chance_thrash_pg_remap_items: 0
 - print: "**** done thrashosds 3-thrash"
index f33ade4acf9d18c1e7b4a1ca0da7b92561da1197..185bdbceb7feac8e388edfe84e54881fc2a87c10 100644 (file)
@@ -17,4 +17,6 @@ stress-tasks:
     chance_pgnum_grow: 1
     chance_pgpnum_fix: 1
     chance_thrash_cluster_full: 0
+    chance_thrash_pg_remap: 0
+    chance_thrash_pg_remap_items: 0
 - print: "**** done thrashosds 3-thrash"
index 8b4de177db4ab5fdef221c323a4c20b1e04cda82..0272cb94c14498eff3c79d4c8332523592f10048 100644 (file)
@@ -18,4 +18,6 @@ stress-tasks:
     chance_pgpnum_fix: 1
     min_in: 4
     chance_thrash_cluster_full: 0
+    chance_thrash_pg_remap: 0
+    chance_thrash_pg_remap_items: 0
 - print: "**** done thrashosds 3-thrash"
index f33ade4acf9d18c1e7b4a1ca0da7b92561da1197..185bdbceb7feac8e388edfe84e54881fc2a87c10 100644 (file)
@@ -17,4 +17,6 @@ stress-tasks:
     chance_pgnum_grow: 1
     chance_pgpnum_fix: 1
     chance_thrash_cluster_full: 0
+    chance_thrash_pg_remap: 0
+    chance_thrash_pg_remap_items: 0
 - print: "**** done thrashosds 3-thrash"
index 5b00132b20235d5c36c5ad2b05f7b935aeea6982..8ff2556a7a0e9334c296c44f25b75063690dd049 100644 (file)
@@ -123,6 +123,8 @@ class Thrasher:
         self.dump_ops_enable = self.config.get('dump_ops_enable')
         self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay')
         self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05)
+        self.chance_thrash_pg_remap = self.config.get('chance_thrash_pg_remap', 1.0)
+        self.chance_thrash_pg_remap_items = self.config.get('chance_thrash_pg_remap', 1.0)
 
         num_osds = self.in_osds + self.out_osds
         self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * num_osds
@@ -505,6 +507,86 @@ class Thrasher:
         self.log('Setting full ratio back to .95')
         self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95')
 
+    def thrash_pg_remap(self):
+        """
+        Install or remove random pg_remap entries in OSDMap
+        """
+        from random import shuffle
+        out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+        j = json.loads(out)
+        self.log('j is %s' % j)
+        try:
+            if random.random() >= .3:
+                pgs = self.ceph_manager.get_pg_stats()
+                pg = random.choice(pgs)
+                pgid = str(pg['pgid'])
+                poolid = int(pgid.split('.')[0])
+                sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+                if len(sizes) == 0:
+                    return
+                n = sizes[0]
+                osds = self.in_osds + self.out_osds
+                shuffle(osds)
+                osds = osds[0:n]
+                self.log('Setting %s to %s' % (pgid, osds))
+                cmd = ['osd', 'pg-remap', pgid] + [str(x) for x in osds]
+                self.log('cmd %s' % cmd)
+                self.ceph_manager.raw_cluster_cmd(*cmd)
+            else:
+                m = j['pg_remap']
+                if len(m) > 0:
+                    shuffle(m)
+                    pg = m[0]['pgid']
+                    self.log('Clearing pg_remap on %s' % pg)
+                    self.ceph_manager.raw_cluster_cmd(
+                        'osd',
+                        'rm-pg-remap',
+                        pg)
+                else:
+                    self.log('No pg_remap entries; doing nothing')
+        except CommandFailedError:
+            self.log('Failed to rm-pg-remap, ignoring')
+
+    def thrash_pg_remap_items(self):
+        """
+        Install or remove random pg_remap_items entries in OSDMap
+        """
+        from random import shuffle
+        out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty')
+        j = json.loads(out)
+        self.log('j is %s' % j)
+        try:
+            if random.random() >= .3:
+                pgs = self.ceph_manager.get_pg_stats()
+                pg = random.choice(pgs)
+                pgid = str(pg['pgid'])
+                poolid = int(pgid.split('.')[0])
+                sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid]
+                if len(sizes) == 0:
+                    return
+                n = sizes[0]
+                osds = self.in_osds + self.out_osds
+                shuffle(osds)
+                osds = osds[0:n*2]
+                self.log('Setting %s to %s' % (pgid, osds))
+                cmd = ['osd', 'pg-remap-items', pgid] + [str(x) for x in osds]
+                self.log('cmd %s' % cmd)
+                self.ceph_manager.raw_cluster_cmd(*cmd)
+            else:
+                m = j['pg_remap_items']
+                if len(m) > 0:
+                    shuffle(m)
+                    pg = m[0]['pgid']
+                    self.log('Clearing pg_remap on %s' % pg)
+                    self.ceph_manager.raw_cluster_cmd(
+                        'osd',
+                        'rm-pg-remap-items',
+                        pg)
+                else:
+                    self.log('No pg_remap entries; doing nothing')
+        except CommandFailedError:
+            self.log('Failed to rm-pg-remap-items, ignoring')
+
     def all_up(self):
         """
         Make sure all osds are up and not out.
@@ -723,6 +805,11 @@ class Thrasher:
                         chance_test_backfill_full,))
         if self.chance_thrash_cluster_full > 0:
             actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,))
+        if self.chance_thrash_pg_remap > 0:
+            actions.append((self.thrash_pg_remap, self.chance_thrash_pg_remap,))
+        if self.chance_thrash_pg_remap_items > 0:
+            actions.append((self.thrash_pg_remap_items, self.chance_thrash_pg_remap_items,))
+
         for key in ['heartbeat_inject_failure', 'filestore_inject_stall']:
             for scenario in [
                 (lambda:
index 876aab395ec7d91c640f67816db743614db94f58..37c52062b5b2217b44fd60d28b28706d84951779 100644 (file)
@@ -122,6 +122,9 @@ def task(ctx, config):
 
     chance_thrash_cluster_full: .05
 
+    chance_thrash_pg_remap: 1.0
+    chance_thrash_pg_remap_items: 1.0
+
     example:
 
     tasks: