``supervisor`` mode. ``supervisor`` reimages the target machines and invokes
``teuthology`` (the command). ``teuthology`` proceeds to execute the job
(execute every task in the YAML job description). After the execution is
-completed (ie ``teuthology`` process exits), ``supervisor`` unlocks or nukes
-the target machines depending on the status of the job. If the requested
-machines are not available, the ``dispatcher`` waits for the machines to be
-available before running anymore jobs. Results from the job are stored in the
-archive directory of the worker for forensic analysis.
+completed (ie ``teuthology`` process exits), ``supervisor`` unlocks the
+target machines. If the requested machines are not available, the ``dispatcher``
+waits for the machines to be available before running anymore jobs. Results from
+the job are stored in the archive directory of the worker for forensic analysis.
Since `QA suites <https://github.com/ceph/ceph-qa-suite>`__ usually
specify ``install`` and ``ceph`` tasks, we briefly describe what they do. When
* :ref:`teuthology-lock` - Lock, unlock, and update status of machines
* :ref:`teuthology-ls` - List job results by examining an archive directory
* :ref:`teuthology-openstack` - Use OpenStack backend (wrapper around ``teuthology-suite``)
-* :ref:`teuthology-nuke` - Attempt to return a machine to a pristine state
* :ref:`teuthology-queue` - List, or delete, jobs in the queue
* :ref:`teuthology-report` - Submit test results to a web service (we use `paddles <https://github.com/ceph/paddles/>`__)
* :ref:`teuthology-results` - Examing a finished run and email results
Many of these tasks are used to run python scripts that are defined in the
ceph/ceph-qa-suite.
-If machines were locked as part of the run (with the --lock switch),
-teuthology normally leaves them locked when there is any task failure
-for investigation of the machine state. When developing new teuthology
-tasks, sometimes this behavior is not useful. The ``unlock_on_failure``
-global option can be set to true to make the unlocking happen unconditionally.
-
Troubleshooting
===============
console_scripts =
teuthology = scripts.run:main
teuthology-openstack = scripts.openstack:main
- teuthology-nuke = scripts.nuke:main
teuthology-suite = scripts.suite:main
teuthology-ls = scripts.ls:main
teuthology-worker = scripts.worker:main
from tarfile import ReadError
+from typing import Optional
+
from teuthology.util.compat import urljoin, urlopen, HTTPError
from netaddr.strategy.ipv4 import valid_str as _is_ipv4
else:
return hostname.split('.', 1)[0]
-def canonicalize_hostname(hostname, user='ubuntu'):
+def canonicalize_hostname(hostname, user: Optional[str] ='ubuntu'):
hostname_expr = hostname_expr_templ.format(
lab_domain=config.lab_domain.replace('.', r'\.'))
match = re.match(hostname_expr, hostname)
return config_dict
-class MergeConfig(argparse.Action):
- """
- Used by scripts to mergeg configurations. (nuke, run, and
- schedule, for example)
- """
- def __call__(self, parser, namespace, values, option_string=None):
- """
- Perform merges of all the day in the config dictionaries.
- """
- config_dict = getattr(namespace, self.dest)
- for new in values:
- deep_merge(config_dict, new)
-
-
def merge_configs(config_paths):
""" Takes one or many paths to yaml config files and merges them
together, returning the result.
SHELL=/bin/bash
-*/30 * * * * ( date ; source $HOME/openrc.sh ; time timeout 900 $HOME/teuthology/virtualenv/bin/teuthology-nuke --stale-openstack ) >> $HOME/cron.log 2>&1
import teuthology.lock.cli
import teuthology.lock.query
import teuthology.lock.util
-import teuthology.nuke
import teuthology.misc
import teuthology.schedule
import teuthology.suite
ssh = ''
assert 'teuthology.log' in teuthology.misc.sh(ssh + " rsync -av " + upload)
- def test_suite_nuke(self):
- cwd = os.getcwd()
- args = ['--suite', 'nuke',
- '--suite-dir', cwd + '/teuthology/openstack/test',
- '--machine-type', 'openstack',
- '--verbose']
- logging.info("TestSuite:test_suite_nuke")
- scripts.suite.main(args)
- self.wait_worker()
- log = self.get_teuthology_log()
- assert "teuthology.run:FAIL" in log
- locks = teuthology.lock.query.list_locks(locked=True)
- assert len(locks) == 0
-
class TestSchedule(Integration):
def setup_method(self):
out, err = capsys.readouterr()
assert 'machine_type' in out
assert 'openstack' in out
-
-class TestNuke(Integration):
-
- def setup_method(self):
- self.options = ['--verbose',
- '--machine-type', 'openstack']
-
- def test_nuke(self):
- image = next(iter(teuthology.openstack.OpenStack.image2url.keys()))
-
- (os_type, os_version, arch) = image.split('-')
- args = scripts.lock.parse_args(self.options +
- ['--lock-many', '1',
- '--os-type', os_type,
- '--os-version', os_version])
- assert teuthology.lock.cli.main(args) == 0
- locks = teuthology.lock.query.list_locks(locked=True)
- logging.info('list_locks = ' + str(locks))
- assert len(locks) == 1
- ctx = argparse.Namespace(name=None,
- config={
- 'targets': { locks[0]['name']: None },
- },
- owner=locks[0]['locked_by'],
- teuthology_config={})
- teuthology.nuke.nuke(ctx, should_unlock=True)
- locks = teuthology.lock.query.list_locks(locked=True)
- assert len(locks) == 0
from teuthology import report
from teuthology.job_status import get_status
from teuthology.misc import get_user, merge_configs
-from teuthology.nuke import nuke
from teuthology.run_tasks import run_tasks
from teuthology.repo_utils import fetch_qa_suite
from teuthology.results import email_results
status = get_status(summary)
passed = status == 'pass'
- if not passed and bool(config.get('nuke-on-error')):
- # only unlock if we locked them in the first place
- nuke(fake_ctx, fake_ctx.lock)
-
if archive is not None:
with open(os.path.join(archive, 'summary.yaml'), 'w') as f:
yaml.safe_dump(summary, f, default_flow_style=False)
'archive_upload': Placeholder('archive_upload'),
'archive_upload_key': Placeholder('archive_upload_key'),
'machine_type': Placeholder('machine_type'),
- 'nuke-on-error': True,
'os_type': Placeholder('distro'),
'os_version': Placeholder('distro_version'),
'overrides': {
try:
proc.wait()
except run.CommandFailedError:
- log.error('Host %s has stale /var/lib/ceph, check lock and nuke/cleanup.', proc.remote.shortname)
+ log.error('Host %s has stale /var/lib/ceph!', proc.remote.shortname)
failed = True
if failed:
raise RuntimeError('Stale /var/lib/ceph detected, aborting.')
assert result == "/some/other/suite/path/qa"
@patch("teuthology.run.get_status")
- @patch("teuthology.run.nuke")
@patch("yaml.safe_dump")
@patch("teuthology.report.try_push_job_info")
@patch("teuthology.run.email_results")
@patch("teuthology.run.open")
@patch("sys.exit")
- def test_report_outcome(self, m_sys_exit, m_open, m_email_results, m_try_push_job_info, m_safe_dump, m_nuke, m_get_status):
+ def test_report_outcome(self, m_sys_exit, m_open, m_email_results, m_try_push_job_info, m_safe_dump, m_get_status):
m_get_status.return_value = "fail"
fake_ctx = Mock()
summary = {"failure_reason": "reasons"}
summary_dump = "failure_reason: reasons\n"
- config = {"nuke-on-error": True, "email-on-error": True}
- config_dump = "nuke-on-error: true\nemail-on-error: true\n"
+ config = {"email-on-error": True}
+ config_dump = "email-on-error: true\n"
m_safe_dump.side_effect = [None, summary_dump, config_dump]
run.report_outcome(config, "the/archive/path", summary, fake_ctx)
- assert m_nuke.called
m_try_push_job_info.assert_called_with(config, summary)
m_open.assert_called_with("the/archive/path/summary.yaml", "w")
assert m_email_results.called