From: John Spray Date: Tue, 29 Jul 2014 15:12:31 +0000 (+0100) Subject: task/mpi: Explicit check for version mismatch X-Git-Tag: 1.1.0~1300^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F300%2Fhead;p=teuthology.git task/mpi: Explicit check for version mismatch Instead of proceeding to have mpiexec fail or hang, do an up-front check that the MPI version is the same on all of the nodes in the test. Signed-off-by: John Spray --- diff --git a/teuthology/task/mpi.py b/teuthology/task/mpi.py index 6d2381ee5..e92d93903 100644 --- a/teuthology/task/mpi.py +++ b/teuthology/task/mpi.py @@ -1,12 +1,36 @@ """ Start mpi processes (and allow commands to be run inside process) """ +from StringIO import StringIO import logging +import re from teuthology import misc as teuthology log = logging.getLogger(__name__) + +def _check_mpi_version(remotes): + """ + Retrieve the MPI version from each of `remotes` and raise an exception + if they are not all the same version. + """ + versions = set() + for remote in remotes: + version_str = remote.run(args=["mpiexec", "--version"], stdout=StringIO()).stdout.getvalue() + try: + version = re.search("^\s+Version:\s+(.+)$", version_str, re.MULTILINE).group(1) + except AttributeError: + raise RuntimeError("Malformed MPI version output: {0}".format(version_str)) + else: + versions.add(version) + + if len(versions) != 1: + raise RuntimeError("MPI version mismatch. Versions are: {0}".format(", ".join(versions))) + else: + log.info("MPI version {0}".format(list(versions)[0])) + + def task(ctx, config): """ Setup MPI and execute commands @@ -91,6 +115,9 @@ def task(ctx, config): hosts.append(ip) remotes.append(remote) + # mpich is sensitive to different versions on different nodes + _check_mpi_version(remotes) + workdir = [] if 'workdir' in config: workdir = ['-wdir', config['workdir'].replace('$TESTDIR', testdir) ]