]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
tasks/cephfs: further thrasher fixes
authorJohn Spray <john.spray@redhat.com>
Mon, 11 Jan 2016 19:43:40 +0000 (19:43 +0000)
committerJohn Spray <john.spray@redhat.com>
Fri, 11 Mar 2016 10:39:37 +0000 (10:39 +0000)
Move the thrasher-specific methods out of CephManager
into MDSThrasher and plumb them into MDSCluster.

Signed-off-by: John Spray <john.spray@redhat.com
tasks/ceph_manager.py
tasks/cephfs/filesystem.py
tasks/cephfs/test_failover.py
tasks/mds_thrash.py

index ce13738ed9936058d5606d8afe4a88e8d7fdd559..378a9ece3ef5cb81633a1b07a1fc2e6cd62064d8 100644 (file)
@@ -49,17 +49,6 @@ def write_conf(ctx, conf_path=DEFAULT_CONF_PATH):
     run.wait(writes)
 
 
-def make_admin_daemon_dir(ctx, remote):
-    """
-    Create /var/run/ceph directory on remote site.
-
-    :param ctx: Context
-    :param remote: Remote site
-    """
-    remote.run(args=['sudo',
-                     'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
-
-
 def mount_osd_data(ctx, remote, osd):
     """
     Mount a remote OSD
@@ -1785,7 +1774,7 @@ class CephManager:
                                 format(o=osd))
             teuthology.reconnect(self.ctx, 60, [remote])
             mount_osd_data(self.ctx, remote, str(osd))
-            make_admin_daemon_dir(self.ctx, remote)
+            self.make_admin_daemon_dir(remote)
             self.ctx.daemons.get_daemon('osd', osd).reset()
         self.ctx.daemons.get_daemon('osd', osd).restart()
 
@@ -1859,7 +1848,7 @@ class CephManager:
                                                 "Check ipmi config.")
 
             remote.console.power_on()
-            make_admin_daemon_dir(self.ctx, remote)
+            self.make_admin_daemon_dir(remote)
         self.ctx.daemons.get_daemon('mon', mon).restart()
 
     def get_mon_status(self, mon):
@@ -1902,60 +1891,6 @@ class CephManager:
             self.log('health:\n{h}'.format(h=out))
         return json.loads(out)
 
-    ## metadata servers
-
-    def kill_mds(self, mds):
-        """
-        Powercyle if set in config, otherwise just stop.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
-                         remotes.iterkeys())
-            self.log('kill_mds on mds.{m} doing powercycle of {s}'.
-                     format(m=mds, s=remote.name))
-            assert remote.console is not None, ("powercycling requested "
-                                                "but RemoteConsole is not "
-                                                "initialized.  "
-                                                "Check ipmi config.")
-            remote.console.power_off()
-        else:
-            self.ctx.daemons.get_daemon('mds', mds).stop()
-
-    def kill_mds_by_rank(self, rank):
-        """
-        kill_mds wrapper to kill based on rank passed.
-        """
-        status = self.get_mds_status_by_rank(rank)
-        self.kill_mds(status['name'])
-
-    def revive_mds(self, mds, standby_for_rank=None):
-        """
-        Revive mds -- do an ipmpi powercycle (if indicated by the config)
-        and then restart (using --hot-standby if specified.
-        """
-        if self.config.get('powercycle'):
-            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
-                         remotes.iterkeys())
-            self.log('revive_mds on mds.{m} doing powercycle of {s}'.
-                     format(m=mds, s=remote.name))
-            assert remote.console is not None, ("powercycling requested "
-                                                "but RemoteConsole is not "
-                                                "initialized.  "
-                                                "Check ipmi config.")
-            remote.console.power_on()
-            make_admin_daemon_dir(self.ctx, remote)
-        args = []
-        if standby_for_rank:
-            args.extend(['--hot-standby', standby_for_rank])
-        self.ctx.daemons.get_daemon('mds', mds).restart(*args)
-
-    def revive_mds_by_rank(self, rank, standby_for_rank=None):
-        """
-        revive_mds wrapper to revive based on rank passed.
-        """
-        status = self.get_mds_status_by_rank(rank)
-        self.revive_mds(status['name'], standby_for_rank)
-
     def get_mds_status(self, mds):
         """
         Run cluster commands for the mds in order to get mds information
@@ -1968,31 +1903,22 @@ class CephManager:
                 return info
         return None
 
-    def get_mds_status_by_rank(self, rank):
+    def get_filepath(self):
         """
-        Run cluster commands for the mds in order to get mds information
-        check rank.
+        Return path to osd data with {id} needing to be replaced
         """
-        j = self.get_mds_status_all()
-        # collate; for dup ids, larger gid wins.
-        for info in j['info'].itervalues():
-            if info['rank'] == rank:
-                return info
-        return None
+        return "/var/lib/ceph/osd/ceph-{id}"
 
-    def get_mds_status_all(self):
-        """
-        Run cluster command to extract all the mds status.
+    def make_admin_daemon_dir(self, remote):
         """
-        out = self.raw_cluster_cmd('mds', 'dump', '--format=json')
-        j = json.loads(' '.join(out.splitlines()[1:]))
-        return j
+        Create /var/run/ceph directory on remote site.
 
-    def get_filepath(self):
-        """
-        Return path to osd data with {id} needing to be replaced
+        :param ctx: Context
+        :param remote: Remote site
         """
-        return "/var/lib/ceph/osd/ceph-{id}"
+        remote.run(args=['sudo',
+                         'install', '-d', '-m0777', '--', '/var/run/ceph', ], )
+
 
 def utility_task(name):
     """
index 0d1e178d1016836b2d63967d877a4caf9c46a298..f9644eaf6b068cb82b4ece327e1c56284ec9d904 100644 (file)
@@ -244,6 +244,13 @@ class MDSCluster(object):
 
         return None
 
+    def get_mds_info_by_rank(self, mds_rank):
+        for mds_info in self._all_info():
+            if mds_info['rank'] == mds_rank:
+                return mds_info
+
+        return None
+
 
 class Filesystem(MDSCluster):
     """
index afdfa157d44e3b57aad57edf479f53fa80b10add..7075e2fced07a04f1377f70c378ba57c61e27f6a 100644 (file)
@@ -36,7 +36,8 @@ class TestFailover(CephFSTestCase):
             active = self.fs.get_active_names()
             return active and active[0] in original_standbys
 
-        log.info("Waiting for promotion of one of the original standbys {0}".format(original_standbys))
+        log.info("Waiting for promotion of one of the original standbys {0}".format(
+            original_standbys))
         self.wait_until_true(
             promoted,
             timeout=grace*2)
@@ -76,7 +77,7 @@ class TestFailover(CephFSTestCase):
 
         # Wait for everyone to go laggy
         def laggy():
-            mdsmap = self.fs.mon_manager.get_mds_status_all()
+            mdsmap = self.fs.get_mds_map()
             for info in mdsmap['info'].values():
                 if "laggy_since" not in info:
                     return False
index bd6340764a8fdd80d26f404d1672b89f61a73c28..75530f8a8efaca9aad82596f9a2b30007ca60b8d 100644 (file)
@@ -110,6 +110,10 @@ class MDSThrasher(Greenlet):
         self.failure_group = failure_group
         self.weight = weight
 
+        # TODO support multiple filesystems: will require behavioural change to select
+        # which filesystem to act on when doing rank-ish things
+        self.fs = Filesystem(self.ctx)
+
     def _run(self):
         try:
             self.do_thrash()
@@ -126,15 +130,63 @@ class MDSThrasher(Greenlet):
     def stop(self):
         self.stopping.set()
 
+    def kill_mds(self, mds):
+        if self.config.get('powercycle'):
+            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+                         remotes.iterkeys())
+            self.log('kill_mds on mds.{m} doing powercycle of {s}'.
+                     format(m=mds, s=remote.name))
+            assert remote.console is not None, ("powercycling requested "
+                                                "but RemoteConsole is not "
+                                                "initialized.  "
+                                                "Check ipmi config.")
+            remote.console.power_off()
+        else:
+            self.ctx.daemons.get_daemon('mds', mds).stop()
+
+    def kill_mds_by_rank(self, rank):
+        """
+        kill_mds wrapper to kill based on rank passed.
+        """
+        status = self.mds_cluster.get_mds_info_by_rank(rank)
+        self.kill_mds(status['name'])
+
+    def revive_mds(self, mds, standby_for_rank=None):
+        """
+        Revive mds -- do an ipmpi powercycle (if indicated by the config)
+        and then restart (using --hot-standby if specified.
+        """
+        if self.config.get('powercycle'):
+            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
+                         remotes.iterkeys())
+            self.log('revive_mds on mds.{m} doing powercycle of {s}'.
+                     format(m=mds, s=remote.name))
+            assert remote.console is not None, ("powercycling requested "
+                                                "but RemoteConsole is not "
+                                                "initialized.  "
+                                                "Check ipmi config.")
+            remote.console.power_on()
+            self.manager.make_admin_daemon_dir(self.ctx, remote)
+        args = []
+        if standby_for_rank:
+            args.extend(['--hot-standby', standby_for_rank])
+        self.ctx.daemons.get_daemon('mds', mds).restart(*args)
+
+    def revive_mds_by_rank(self, rank, standby_for_rank=None):
+        """
+        revive_mds wrapper to revive based on rank passed.
+        """
+        status = self.mds_cluster.get_mds_info_by_rank(rank)
+        self.revive_mds(status['name'], standby_for_rank)
+
+    def get_mds_status_all(self):
+        return self.fs.get_mds_map()
+
     def do_thrash(self):
         """
         Perform the random thrashing action
         """
 
-        # TODO support multiple filesystems: will require behavioural change to select
-        # which filesystem to act on when doing rank-ish things
-        fs = Filesystem(self.ctx)
-
         self.log('starting mds_do_thrash for failure group: ' + ', '.join(
             ['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
         while not self.stopping.is_set():
@@ -169,7 +221,7 @@ class MDSThrasher(Greenlet):
             last_laggy_since = None
             itercount = 0
             while True:
-                failed = fs.get_mds_map()['failed']
+                failed = self.fs.get_mds_map()['failed']
                 status = self.mds_cluster.get_mds_info(active_mds)
                 if not status:
                     break
@@ -277,7 +329,6 @@ def task(ctx, config):
         'mds_thrash task requires at least 2 metadata servers'
 
     # choose random seed
-    seed = None
     if 'seed' in config:
         seed = int(config['seed'])
     else:
@@ -349,7 +400,7 @@ def task(ctx, config):
 
         # if thrash_weights isn't specified and we've reached max_thrash,
         # we're done
-        if not 'thrash_weights' in config and len(thrashers) == max_thrashers:
+        if 'thrash_weights' not in config and len(thrashers) == max_thrashers:
             break
 
     try: