From: Kefu Chai Date: Sat, 27 May 2017 08:58:12 +0000 (+0800) Subject: qa/tasks/rebuild_mondb: update to address ceph-mgr changes X-Git-Tag: v12.1.0~57^2~20^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8abc6e1bea14bd82415ad3cb1e15ad707fd4af7b;p=ceph.git qa/tasks/rebuild_mondb: update to address ceph-mgr changes - revive ceph-mgr after updating the keyring cap - grant "mgr:allow *" to client.admin - minor refactors Signed-off-by: Kefu Chai --- diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 9b59622ffec7d..ab3df5773f415 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -2268,6 +2268,20 @@ class CephManager: self.make_admin_daemon_dir(remote) self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart() + def revive_mgr(self, mgr): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mgr', mgr) + self.log('revive_mgr on mgr.{m} doing powercycle of {s}'. + format(m=mgr, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart() + def get_mon_status(self, mon): """ Extract all the monitor status information from the cluster diff --git a/qa/tasks/rebuild_mondb.py b/qa/tasks/rebuild_mondb.py index fe70c641d7aa0..900bd16a41fbe 100644 --- a/qa/tasks/rebuild_mondb.py +++ b/qa/tasks/rebuild_mondb.py @@ -14,7 +14,7 @@ from teuthology import misc as teuthology log = logging.getLogger(__name__) -def push_directory(path, remote, remote_dir): +def _push_directory(path, remote, remote_dir): """ local_temp_path=`mktemp` tar czf $local_temp_path $path @@ -46,45 +46,20 @@ def push_directory(path, remote, remote_dir): remote.run(args=['sudo', 'rm', '-fr', remote_temp_path]) -def task(ctx, config): - """ - Test monitor recovery from OSD - """ - if config is None: - config = {} - assert isinstance(config, dict), \ - 'task only accepts a dict for configuration' - - first_mon = teuthology.get_first_mon(ctx, config) - (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() - - manager = ceph_manager.CephManager( - mon, - ctx=ctx, - logger=log.getChild('ceph_manager')) - - mons = ctx.cluster.only(teuthology.is_type('mon')) +def _nuke_mons(manager, mons, mon_id): assert mons - # note down the first cluster_name and mon_id - # we will recover it later on - cluster_name = None - mon_id = None + is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): - is_mon = teuthology.is_type('mon') for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) - if cluster_name is None: - cluster_name = cluster - mon_id = m - assert cluster_name == cluster log.info('killing {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) manager.kill_mon(m) mon_data = os.path.join('/var/lib/ceph/mon/', - '{0}-{1}'.format(cluster_name, m)) + '{0}-{1}'.format(cluster, m)) if m == mon_id: # so we will only need to recreate the store.db for the # first mon, would be easier than mkfs on it then replace @@ -94,13 +69,15 @@ def task(ctx, config): else: remote.run(args=['sudo', 'rm', '-r', mon_data]) + +def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path): local_mstore = tempfile.mkdtemp() # collect the maps from all OSDs - osds = ctx.cluster.only(teuthology.is_type('osd')) + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) assert osds for osd, roles in osds.remotes.iteritems(): - is_osd = teuthology.is_type('osd') for role in roles: if not is_osd(role): continue @@ -113,7 +90,7 @@ def task(ctx, config): osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store') osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore]) - push_directory(local_mstore, osd, osd_mstore) + _push_directory(local_mstore, osd, osd_mstore) log.info('rm -rf {0}'.format(local_mstore)) shutil.rmtree(local_mstore) # update leveldb with OSD data @@ -135,12 +112,10 @@ def task(ctx, config): # pull from recovered leveldb from client mon_store_dir = os.path.join('/var/lib/ceph/mon', '{0}-{1}'.format(cluster_name, mon_id)) - push_directory(local_mstore, mon, mon_store_dir) + _push_directory(local_mstore, mon, mon_store_dir) mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir]) shutil.rmtree(local_mstore) - default_keyring = '/etc/ceph/{cluster}.keyring'.format( - cluster=cluster_name) - keyring_path = config.get('keyring_path', default_keyring) + # fill up the caps in the keyring file mon.run(args=['sudo', 'ceph-authtool', keyring_path, @@ -151,23 +126,25 @@ def task(ctx, config): '-n', 'client.admin', '--cap', 'mon', 'allow *', '--cap', 'osd', 'allow *', - '--cap', 'mds', 'allow *']) + '--cap', 'mds', 'allow *', + '--cap', 'mgr', 'allow *']) mon.run(args=['sudo', '-u', 'ceph', 'ceph-monstore-tool', mon_store_dir, 'rebuild', '--', '--keyring', keyring_path]) + +def _revive_mons(manager, mons, recovered, keyring_path): # revive monitors # the initial monmap is in the ceph.conf, so we are good. n_mons = 0 + is_mon = teuthology.is_type('mon') for remote, roles in mons.remotes.iteritems(): - is_mon = teuthology.is_type('mon') for role in roles: if not is_mon(role): continue cluster, _, m = teuthology.split_role(role) - assert cluster_name == cluster - if mon_id != m: + if recovered != m: log.info('running mkfs on {cluster}:mon.{mon}'.format( cluster=cluster, mon=m)) @@ -179,15 +156,61 @@ def task(ctx, config): '--mkfs', '-i', m, '--keyring', keyring_path]) + log.info('reviving mon.{0}'.format(m)) manager.revive_mon(m) n_mons += 1 - manager.wait_for_mon_quorum_size(n_mons, timeout=30) - for osd, roles in osds.remotes.iteritems(): - is_osd = teuthology.is_type('osd') + + +def _revive_mgrs(ctx, manager): + is_mgr = teuthology.is_type('mgr') + mgrs = ctx.cluster.only(is_mgr) + for _, roles in mgrs.remotes.iteritems(): + for role in roles: + if not is_mgr(role): + continue + _, _, mgr_id = teuthology.split_role(role) + log.info('reviving mgr.{0}'.format(mgr_id)) + manager.revive_mgr(mgr_id) + + +def _revive_osds(ctx, manager): + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) + for _, roles in osds.remotes.iteritems(): for role in roles: if not is_osd(role): continue _, _, osd_id = teuthology.split_role(role) log.info('reviving osd.{0}'.format(osd_id)) manager.revive_osd(osd_id) + + +def task(ctx, config): + """ + Test monitor recovery from OSD + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager')) + + mons = ctx.cluster.only(teuthology.is_type('mon')) + # note down the first cluster_name and mon_id + # we will recover it later on + cluster_name, _, mon_id = teuthology.split_role(first_mon) + _nuke_mons(manager, mons, mon_id) + default_keyring = '/etc/ceph/{cluster}.keyring'.format( + cluster=cluster_name) + keyring_path = config.get('keyring_path', default_keyring) + _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) + _revive_mons(manager, mons, mon_id, keyring_path) + _revive_mgrs(ctx, manager) + _revive_osds(ctx, manager)