]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph_manager: In Thrasher randomly remove past intervals
authorDavid Zafman <dzafman@redhat.com>
Mon, 20 Oct 2014 22:03:54 +0000 (15:03 -0700)
committerDavid Zafman <dzafman@redhat.com>
Wed, 4 Mar 2015 00:09:52 +0000 (16:09 -0800)
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 4a195c78c1d932ecf03055d3cfc2e5584947b657)

tasks/ceph_manager.py

index adf961b2d402a9ac0c0908c7b5efd5ad1d5ad5c3..b3c9ed89e82c3b2c0ff450a55488b50e0ee72ae3 100644 (file)
@@ -105,8 +105,11 @@ class Thrasher:
         if self.cmd_exists_on_osds("ceph-objectstore-tool"):
             self.ceph_objectstore_tool = \
                 self.config.get('ceph_objectstore_tool', False)
+            self.test_rm_past_intervals = \
+                self.config.get('test_rm_past_intervals', True)
         else:
             self.ceph_objectstore_tool = False
+            self.test_rm_past_intervals = False
             self.log("Unable to test ceph_objectstore_tool, "
                      "not available on all OSD nodes")
 
@@ -202,6 +205,51 @@ class Thrasher:
             if imp_remote != exp_remote:
                 imp_remote.run(args=cmd)
 
+    def rm_past_intervals(self, osd=None):
+        """
+        :param osd: Osd to find pg to remove past intervals
+        """
+        if self.test_rm_past_intervals:
+            if osd is None:
+                osd = random.choice(self.dead_osds)
+            self.log("Use ceph_objectstore_tool to remove past intervals")
+            (remote,) = self.ceph_manager.ctx.\
+                cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys()
+            FSPATH = self.ceph_manager.get_filepath()
+            JPATH = os.path.join(FSPATH, "journal")
+            if ('keyvaluestore_backend' in
+                    self.ceph_manager.ctx.ceph.conf['osd']):
+                prefix = ("sudo ceph-objectstore-tool "
+                          "--data-path {fpath} --journal-path {jpath} "
+                          "--type keyvaluestore-dev "
+                          "--log-file="
+                          "/var/log/ceph/objectstore_tool.\\$pid.log ".
+                          format(fpath=FSPATH, jpath=JPATH))
+            else:
+                prefix = ("sudo ceph-objectstore-tool "
+                          "--data-path {fpath} --journal-path {jpath} "
+                          "--log-file="
+                          "/var/log/ceph/objectstore_tool.\\$pid.log ".
+                          format(fpath=FSPATH, jpath=JPATH))
+            cmd = (prefix + "--op list-pgs").format(id=osd)
+            proc = remote.run(args=cmd, wait=True,
+                              check_status=True, stdout=StringIO())
+            if proc.exitstatus:
+                raise Exception("ceph_objectstore_tool: "
+                                "exp list-pgs failure with status {ret}".
+                                format(ret=proc.exitstatus))
+            pgs = proc.stdout.getvalue().split('\n')[:-1]
+            if len(pgs) == 0:
+                self.log("No PGs found for osd.{osd}".format(osd=osd))
+                return
+            pg = random.choice(pgs)
+            cmd = (prefix + "--op rm-past-intervals --pgid {pg}").
+                  format(id=osd, pg=pg)
+            proc = remote.run(args=cmd)
+            if proc.exitstatus:
+                raise Exception("ceph_objectstore_tool: "
+                                "rm-past-intervals failure with status {ret}".
+                                format(ret=proc.exitstatus))
 
     def blackhole_kill_osd(self, osd=None):
         """
@@ -456,6 +504,8 @@ class Thrasher:
             actions.append((self.out_osd, 1.0,))
         if len(self.live_osds) > minlive and chance_down > 0:
             actions.append((self.kill_osd, chance_down,))
+        if len(self.dead_osds) > 1:
+            actions.append((self.rm_past_intervals, 1.0,))
         if len(self.out_osds) > minout:
             actions.append((self.in_osd, 1.7,))
         if len(self.dead_osds) > mindead: