]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
dispatcher: Add instrumentation for locking time
authorZack Cerza <zack@redhat.com>
Thu, 9 Mar 2023 18:28:02 +0000 (11:28 -0700)
committerZack Cerza <zack@redhat.com>
Wed, 22 Mar 2023 15:52:10 +0000 (09:52 -0600)
Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/dispatcher/__init__.py
teuthology/exporter.py

index b6b03dd5a9318343ca49dd1613bb9da2acd339ba..4ec6fc90b95c4c55c289e509b35f4336f6e3d3fc 100644 (file)
@@ -14,6 +14,7 @@ from teuthology import (
     install_except_hook,
     # modules
     beanstalk,
+    exporter,
     nuke,
     report,
     repo_utils,
@@ -228,13 +229,16 @@ def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
 def lock_machines(job_config):
     report.try_push_job_info(job_config, dict(status='running'))
     fake_ctx = supervisor.create_fake_context(job_config, block=True)
-    lock_ops.block_and_lock_machines(
-        fake_ctx,
-        len(job_config['roles']),
-        job_config['machine_type'],
-        tries=-1,
-        reimage=False,
-    )
+    machine_type = job_config["machine_type"]
+    count = len(job_config['roles'])
+    with exporter.NodeLockingTime.labels(machine_type, count).time():
+        lock_ops.block_and_lock_machines(
+            fake_ctx,
+            count,
+            machine_type,
+            tries=-1,
+            reimage=False,
+        )
     job_config = fake_ctx.config
     return job_config
 
index d76b65c4fed1ec99f91425f23f4277f31a86540a..d40e55b109a7d661e749097debfac5bc93b3e41c 100644 (file)
@@ -23,6 +23,7 @@ from prometheus_client import (  # noqa: E402
     start_http_server,
     Gauge,
     Counter,
+    Summary,
     multiprocess,
     CollectorRegistry,
 )
@@ -112,9 +113,7 @@ class BeanstalkQueue(TeuthologyMetric):
 
     def update(self):
         for machine_type in MACHINE_TYPES:
-            queue_stats = beanstalk.stats_tube(
-                beanstalk.connect(), machine_type
-            )
+            queue_stats = beanstalk.stats_tube(beanstalk.connect(), machine_type)
             self.length.labels(machine_type).set(queue_stats["count"])
             self.paused.labels(machine_type).set(1 if queue_stats["paused"] else 0)
 
@@ -183,6 +182,13 @@ class JobResults(TeuthologyMetric):
         self.metric.labels(machine_type=machine_type, status=status).inc()
 
 
+NodeLockingTime = Summary(
+    "teuthology_node_locking_duration_seconds",
+    "Time spent waiting to lock a node",
+    ["machine_type", "count"],
+)
+
+
 def main(args):
     exporter = TeuthologyExporter(interval=int(args["--interval"]))
     exporter.start()