From 4ec37b2391bfe7eb92dc55e81b21f5f4c94be51c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 17 Oct 2011 15:32:22 -0700 Subject: [PATCH] add lost_unfound task Also some misc useful bits to ceph_manager. --- teuthology/task/ceph_manager.py | 58 ++++++++++++ teuthology/task/lost_unfound.py | 153 ++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 teuthology/task/lost_unfound.py diff --git a/teuthology/task/ceph_manager.py b/teuthology/task/ceph_manager.py index 3351129084bfb..ea81cc3e192ff 100644 --- a/teuthology/task/ceph_manager.py +++ b/teuthology/task/ceph_manager.py @@ -3,6 +3,7 @@ import random import time import re import gevent +import json from ..orchestra import run class Thrasher(gevent.Greenlet): @@ -141,6 +142,7 @@ class CephManager: '/tmp/cephtest/binary/usr/local/bin/ceph', '-k', '/tmp/cephtest/ceph.keyring', '-c', '/tmp/cephtest/ceph.conf', + '--concise', ] ceph_args.extend(args) proc = self.controller.run( @@ -185,6 +187,27 @@ class CephManager: "\d* pgs:", status).group(0).split()[0]) + def get_pg_stats(self): + out = self.raw_cluster_cmd('--', 'pg','dump','--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return j['pg_stats'] + + def get_osd_dump(self): + out = self.raw_cluster_cmd('--', 'osd','dump','--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return j['osds'] + + def get_num_unfound_objects(self): + status = self.raw_cluster_status() + self.log(status) + match = re.search( + "\d+/\d+ unfound", + status) + if match == None: + return 0 + else: + return int(match.group(0).split('/')[0]) + def get_num_active_clean(self): status = self.raw_cluster_status() self.log(status) @@ -196,6 +219,14 @@ class CephManager: else: return int(match.group(0).split()[0]) + def get_num_active(self): + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if pg['state'].startswith('active'): + num += 1 + return num + def is_clean(self): return self.get_num_active_clean() == self.get_num_pgs() @@ -209,6 +240,33 @@ class CephManager: time.sleep(3) self.log("clean!") + def osd_is_up(self, osd): + osds = self.get_osd_dump() + return osds[osd]['up'] > 0 + + def wait_till_osd_is_up(self, osd, timeout=None): + self.log('waiting for osd.%d to be up' % osd); + start = time.time() + while not self.osd_is_up(osd): + if timeout is not None: + assert time.time() - start < timeout, \ + 'osd.%d failed to come up before timeout expired' % osd + time.sleep(3) + self.log('osd.%d is up' % osd) + + def is_active(self): + return self.get_num_active() == self.get_num_pgs() + + def wait_till_active(self, timeout=None): + self.log("waiting till active") + start = time.time() + while not self.is_active(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'failed to become active before timeout expired' + time.sleep(3) + self.log("active!") + def mark_out_osd(self, osd): self.raw_cluster_cmd('osd', 'out', str(osd)) diff --git a/teuthology/task/lost_unfound.py b/teuthology/task/lost_unfound.py new file mode 100644 index 0000000000000..906f29d7497d0 --- /dev/null +++ b/teuthology/task/lost_unfound.py @@ -0,0 +1,153 @@ +import contextlib +import logging +import ceph_manager +from teuthology import misc as teuthology +import time +import json + + +log = logging.getLogger(__name__) + + +def rados(remote, cmd): + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'LD_LIBRARY_PATH=/tmp/cephtest/binary/usr/local/lib', + '/tmp/cephtest/enable-coredump', + '/tmp/cephtest/binary/usr/local/bin/ceph-coverage', + '/tmp/cephtest/archive/coverage', + '/tmp/cephtest/binary/usr/local/bin/rados', + '-c', '/tmp/cephtest/ceph.conf', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + check_status=False + ) + return proc.exitstatus + +def task(ctx, config): + """ + Test handling of lost objects. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while manager.get_osd_status()['up'] < 3: + manager.sleep(10) + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_clean() + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(mon, ['-p', 'data', 'put', 'dummy', dummyfile]) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.wait_till_clean() + + # create old objects + for f in range(1, 10): + rados(mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) + rados(mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile]) + rados(mon, ['-p', 'data', 'rm', 'existed_%d' % f]) + + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', '--osd-recovery-delay-start 1000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(mon, ['-p', 'data', 'put', 'new_%d' % f, dummyfile]) + rados(mon, ['-p', 'data', 'put', 'existed_%d' % f, dummyfile]) + rados(mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.wait_till_osd_is_up(0) + + manager.raw_cluster_cmd('tell', 'osd.1', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.wait_till_osd_is_up(2) + + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_active() + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd( + 'tell', primary, + 'mark_unfound_lost', pg['pgid'], 'revert' + ) + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.0', 'flush_pg_stats') + manager.raw_cluster_cmd('tell', 'osd.2', 'flush_pg_stats') + manager.wait_till_clean() + + # verify result + for f in range(1, 10): + err = rados(mon, ['-p', 'data', 'get', 'new_%d' % f, '-']) + assert err + err = rados(mon, ['-p', 'data', 'get', 'existed_%d' % f, '-']) + assert err + err = rados(mon, ['-p', 'data', 'get', 'existing_%d' % f, '-']) + assert not err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_till_clean() -- 2.39.5