)
return proc
+ def do_put(self, pool, obj, fname):
+ return self.do_rados(
+ self.controller,
+ [
+ '-p',
+ pool,
+ 'put',
+ obj,
+ fname
+ ]
+ )
+
+ def do_get(self, pool, obj, fname='/dev/null'):
+ return self.do_rados(
+ self.controller,
+ [
+ '-p',
+ pool,
+ 'stat',
+ obj,
+ fname
+ ]
+ )
+
def osd_admin_socket(self, osdnum, command, check_status=True):
testdir = teuthology.get_testdir(self.ctx)
remote = None
check_status=check_status
)
+ def get_pgid(self, pool, pgnum):
+ poolnum = self.get_pool_num(pool)
+ pg_str = "{poolnum}.{pgnum}".format(
+ poolnum=poolnum,
+ pgnum=pgnum)
+ return pg_str
+
+ def get_pg_replica(self, pool, pgnum):
+ """
+ get replica for pool, pgnum (e.g. (data, 0)->0
+ """
+ output = self.raw_cluster_cmd("pg", "dump", '--format=json')
+ j = json.loads('\n'.join(output.split('\n')[1:]))
+ pg_str = self.get_pgid(pool, pgnum)
+ for pg in j['pg_stats']:
+ if pg['pgid'] == pg_str:
+ return int(pg['acting'][-1])
+ assert False
+
def get_pg_primary(self, pool, pgnum):
"""
get primary for pool, pgnum (e.g. (data, 0)->0
"""
- poolnum = self.get_pool_num(pool)
output = self.raw_cluster_cmd("pg", "dump", '--format=json')
j = json.loads('\n'.join(output.split('\n')[1:]))
- pg_str = "%d.%d" % (poolnum, pgnum)
+ pg_str = self.get_pgid(pool, pgnum)
for pg in j['pg_stats']:
if pg['pgid'] == pg_str:
return int(pg['acting'][0])
ret[status] += 1
return ret
+ def pg_scrubbing(self, pool, pgnum):
+ pgstr = self.get_pgid(pool, pgnum)
+ stats = self.get_single_pg_stats(pgstr)
+ return 'scrub' in stats['state']
+
+ def pg_repairing(self, pool, pgnum):
+ pgstr = self.get_pgid(pool, pgnum)
+ stats = self.get_single_pg_stats(pgstr)
+ return 'repair' in stats['state']
+
+ def pg_inconsistent(self, pool, pgnum):
+ pgstr = self.get_pgid(pool, pgnum)
+ stats = self.get_single_pg_stats(pgstr)
+ return 'inconsistent' in stats['state']
+
+ def get_last_scrub_stamp(self, pool, pgnum):
+ stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum))
+ return stats["last_scrub_stamp"]
+
+ def do_pg_scrub(self, pool, pgnum, stype):
+ init = self.get_last_scrub_stamp(pool, pgnum)
+ self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum))
+ while init == self.get_last_scrub_stamp(pool, pgnum):
+ self.log("waiting for scrub type %s"%(stype,))
+ time.sleep(10)
+
def get_single_pg_stats(self, pgid):
all_stats = self.get_pg_stats()
--- /dev/null
+import logging
+
+import ceph_manager
+from teuthology import misc as teuthology
+
+log = logging.getLogger(__name__)
+
+def setup(ctx, config):
+ ctx.manager.wait_for_clean()
+ ctx.manager.create_pool("repair_test_pool", 1)
+ return "repair_test_pool"
+
+def teardown(ctx, config, pool):
+ ctx.manager.remove_pool(pool)
+ ctx.manager.wait_for_clean()
+
+def run_test(ctx, config, test):
+ s = setup(ctx, config)
+ test(ctx, config, s)
+ teardown(ctx, config, s)
+
+def choose_primary(ctx):
+ def ret(pool, num):
+ log.info("Choosing primary")
+ return ctx.manager.get_pg_primary(pool, num)
+ return ret
+
+def choose_replica(ctx):
+ def ret(pool, num):
+ log.info("Choosing replica")
+ return ctx.manager.get_pg_replica(pool, num)
+ return ret
+
+def trunc(ctx):
+ def ret(osd, pool, obj):
+ log.info("truncating object")
+ return ctx.manager.osd_admin_socket(
+ osd,
+ ['truncobj', pool, obj, '1'])
+ return ret
+
+def dataerr(ctx):
+ def ret(osd, pool, obj):
+ log.info("injecting data err on object")
+ return ctx.manager.osd_admin_socket(
+ osd,
+ ['injectdataerr', pool, obj])
+ return ret
+
+def mdataerr(ctx):
+ def ret(osd, pool, obj):
+ log.info("injecting mdata err on object")
+ return ctx.manager.osd_admin_socket(
+ osd,
+ ['injectmdataerr', pool, obj])
+ return ret
+
+def gen_repair_test(corrupter, chooser, scrub_type):
+ def ret(ctx, config, pool):
+ log.info("starting repair test")
+ victim_osd = chooser(pool, 0)
+
+ # create object
+ log.info("doing put")
+ ctx.manager.do_put(pool, 'repair_test_obj', '/etc/hosts')
+
+ # corrupt object
+ log.info("corrupting object")
+ corrupter(victim_osd, pool, 'repair_test_obj')
+
+ # verify inconsistent
+ log.info("scrubbing")
+ ctx.manager.do_pg_scrub(pool, 0, scrub_type)
+
+ assert ctx.manager.pg_inconsistent(pool, 0)
+
+ # repair
+ log.info("repairing")
+ ctx.manager.do_pg_scrub(pool, 0, "repair")
+
+ log.info("re-scrubbing")
+ ctx.manager.do_pg_scrub(pool, 0, scrub_type)
+
+ # verify consistent
+ assert not ctx.manager.pg_inconsistent(pool, 0)
+ log.info("done")
+ return ret
+
+def task(ctx, config):
+ """
+ Test [deep] repair in several situations:
+ Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica]
+ """
+ if config is None:
+ config = {}
+ assert isinstance(config, dict), \
+ 'repair_test task only accepts a dict for config'
+
+ if not hasattr(ctx, 'manager'):
+ first_mon = teuthology.get_first_mon(ctx, config)
+ (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
+ ctx.manager = ceph_manager.CephManager(
+ mon,
+ ctx=ctx,
+ logger=log.getChild('ceph_manager')
+ )
+
+ tests = [
+ gen_repair_test(mdataerr(ctx), choose_primary(ctx), "scrub"),
+ gen_repair_test(mdataerr(ctx), choose_replica(ctx), "scrub"),
+ gen_repair_test(dataerr(ctx), choose_primary(ctx), "deep-scrub"),
+ gen_repair_test(dataerr(ctx), choose_replica(ctx), "deep-scrub"),
+ gen_repair_test(trunc(ctx), choose_primary(ctx), "scrub"),
+ gen_repair_test(trunc(ctx), choose_replica(ctx), "scrub")
+ ]
+
+ for test in tests:
+ run_test(ctx, config, test)