From: Zack Cerza Date: Tue, 23 May 2023 19:53:23 +0000 (-0600) Subject: exporter: Instrument node reimaging success/fail X-Git-Tag: 1.2.0~106^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d1b85f4126c5f12f96fb22c97f9314a9e7885730;p=teuthology.git exporter: Instrument node reimaging success/fail Signed-off-by: Zack Cerza --- diff --git a/teuthology/exporter.py b/teuthology/exporter.py index f1d910da..b5986de1 100644 --- a/teuthology/exporter.py +++ b/teuthology/exporter.py @@ -184,6 +184,22 @@ class _JobResults(TeuthologyMetric): JobResults = _JobResults() + +class _NodeReimagingResults(TeuthologyMetric): + def __init__(self): + self.metric = Counter( + "teuthology_reimaging_results", + "Teuthology Reimaging Results", + ["machine_type", "status"], + ) + + # As this is to be used within job processes, we implement record() rather than update() + def record(self, machine_type, status): + self.metric.labels(machine_type=machine_type, status=status).inc() + + +NodeReimagingResults = _NodeReimagingResults() + NodeLockingTime = Summary( "teuthology_node_locking_duration_seconds", "Time spent waiting to lock nodes", diff --git a/teuthology/provision/__init__.py b/teuthology/provision/__init__.py index 325f2c34..2e9ba23f 100644 --- a/teuthology/provision/__init__.py +++ b/teuthology/provision/__init__.py @@ -1,5 +1,6 @@ import logging +import teuthology.exporter import teuthology.lock.query from teuthology.misc import decanonicalize_hostname, get_distro, get_distro_version @@ -18,9 +19,11 @@ def _logfile(ctx, shortname): return os.path.join(ctx.config['archive_path'], shortname + '.downburst.log') + def get_reimage_types(): return pelagos.get_types() + fog.get_types() + def reimage(ctx, machine_name, machine_type): os_type = get_distro(ctx) os_version = get_distro_version(ctx) @@ -36,7 +39,21 @@ def reimage(ctx, machine_name, machine_type): else: raise Exception("The machine_type '%s' is not known to any " "of configured provisioners" % machine_type) - return obj.create() + status = "fail" + try: + result = obj.create() + status = "success" + except Exception: + # We only need this clause so that we avoid triggering the finally + # clause below in cases where the exception raised is KeyboardInterrupt + # or SystemExit + raise + finally: + teuthology.exporter.NodeReimagingResults.record( + ctx.config.get("machine_type"), + status, + ) + return result def create_if_vm(ctx, machine_name, _downburst=None):