From a0c19fcc6393dca205daa2267f5e01a895632f77 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 20 Oct 2014 15:03:54 -0700 Subject: [PATCH] ceph_manager: In Thrasher randomly remove past intervals Signed-off-by: David Zafman (cherry picked from commit 4a195c78c1d932ecf03055d3cfc2e5584947b657) --- tasks/ceph_manager.py | 50 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tasks/ceph_manager.py b/tasks/ceph_manager.py index adf961b2d402a..b3c9ed89e82c3 100644 --- a/tasks/ceph_manager.py +++ b/tasks/ceph_manager.py @@ -105,8 +105,11 @@ class Thrasher: if self.cmd_exists_on_osds("ceph-objectstore-tool"): self.ceph_objectstore_tool = \ self.config.get('ceph_objectstore_tool', False) + self.test_rm_past_intervals = \ + self.config.get('test_rm_past_intervals', True) else: self.ceph_objectstore_tool = False + self.test_rm_past_intervals = False self.log("Unable to test ceph_objectstore_tool, " "not available on all OSD nodes") @@ -202,6 +205,51 @@ class Thrasher: if imp_remote != exp_remote: imp_remote.run(args=cmd) + def rm_past_intervals(self, osd=None): + """ + :param osd: Osd to find pg to remove past intervals + """ + if self.test_rm_past_intervals: + if osd is None: + osd = random.choice(self.dead_osds) + self.log("Use ceph_objectstore_tool to remove past intervals") + (remote,) = self.ceph_manager.ctx.\ + cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() + FSPATH = self.ceph_manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + if ('keyvaluestore_backend' in + self.ceph_manager.ctx.ceph.conf['osd']): + prefix = ("sudo ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--type keyvaluestore-dev " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + else: + prefix = ("sudo ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.\\$pid.log ". + format(fpath=FSPATH, jpath=JPATH)) + cmd = (prefix + "--op list-pgs").format(id=osd) + proc = remote.run(args=cmd, wait=True, + check_status=True, stdout=StringIO()) + if proc.exitstatus: + raise Exception("ceph_objectstore_tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if len(pgs) == 0: + self.log("No PGs found for osd.{osd}".format(osd=osd)) + return + pg = random.choice(pgs) + cmd = (prefix + "--op rm-past-intervals --pgid {pg}"). + format(id=osd, pg=pg) + proc = remote.run(args=cmd) + if proc.exitstatus: + raise Exception("ceph_objectstore_tool: " + "rm-past-intervals failure with status {ret}". + format(ret=proc.exitstatus)) def blackhole_kill_osd(self, osd=None): """ @@ -456,6 +504,8 @@ class Thrasher: actions.append((self.out_osd, 1.0,)) if len(self.live_osds) > minlive and chance_down > 0: actions.append((self.kill_osd, chance_down,)) + if len(self.dead_osds) > 1: + actions.append((self.rm_past_intervals, 1.0,)) if len(self.out_osds) > minout: actions.append((self.in_osd, 1.7,)) if len(self.dead_osds) > mindead: -- 2.39.5