]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
dispatcher: Add instrumentation for locking time
authorZack Cerza <zack@redhat.com>
Thu, 9 Mar 2023 18:28:02 +0000 (11:28 -0700)
committerZack Cerza <zack@redhat.com>
Thu, 9 Mar 2023 23:08:15 +0000 (16:08 -0700)
Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/dispatcher/__init__.py
teuthology/exporter.py

index 69b27072bff15adb2c80b2e88ed7e320b53b97d2..f6f27a056d9b13f8cb4d7649c620aa71c8a61f4a 100644 (file)
@@ -9,6 +9,7 @@ from datetime import datetime
 from typing import Dict, List
 
 import teuthology.dispatcher.supervisor as supervisor
+import teuthology.exporter as exporter
 import teuthology.lock.ops as lock_ops
 import teuthology.nuke as nuke
 import teuthology.worker as worker
@@ -224,13 +225,16 @@ def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
 def lock_machines(job_config):
     report.try_push_job_info(job_config, dict(status='running'))
     fake_ctx = supervisor.create_fake_context(job_config, block=True)
-    lock_ops.block_and_lock_machines(
-        fake_ctx,
-        len(job_config['roles']),
-        job_config['machine_type'],
-        tries=-1,
-        reimage=False,
-    )
+    machine_type = job_config["machine_type"]
+    count = len(job_config['roles'])
+    with exporter.NodeLockingTime.labels(machine_type, count).time():
+        lock_ops.block_and_lock_machines(
+            fake_ctx,
+            count,
+            machine_type,
+            tries=-1,
+            reimage=False,
+        )
     job_config = fake_ctx.config
     return job_config
 
index d76b65c4fed1ec99f91425f23f4277f31a86540a..d40e55b109a7d661e749097debfac5bc93b3e41c 100644 (file)
@@ -23,6 +23,7 @@ from prometheus_client import (  # noqa: E402
     start_http_server,
     Gauge,
     Counter,
+    Summary,
     multiprocess,
     CollectorRegistry,
 )
@@ -112,9 +113,7 @@ class BeanstalkQueue(TeuthologyMetric):
 
     def update(self):
         for machine_type in MACHINE_TYPES:
-            queue_stats = beanstalk.stats_tube(
-                beanstalk.connect(), machine_type
-            )
+            queue_stats = beanstalk.stats_tube(beanstalk.connect(), machine_type)
             self.length.labels(machine_type).set(queue_stats["count"])
             self.paused.labels(machine_type).set(1 if queue_stats["paused"] else 0)
 
@@ -183,6 +182,13 @@ class JobResults(TeuthologyMetric):
         self.metric.labels(machine_type=machine_type, status=status).inc()
 
 
+NodeLockingTime = Summary(
+    "teuthology_node_locking_duration_seconds",
+    "Time spent waiting to lock a node",
+    ["machine_type", "count"],
+)
+
+
 def main(args):
     exporter = TeuthologyExporter(interval=int(args["--interval"]))
     exporter.start()