From e510a84afb3a5925eeac7ded7c528596ee48c594 Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Thu, 6 Mar 2025 19:08:29 +0530 Subject: [PATCH] lock/query.py: add grace_time param to node_active_job() This is because we only want this grace period when looking for stale jobs. Not when jobs are being unlocked by the supervisor by unlock_one_safe(). Fixes: https://github.com/ceph/teuthology/pull/2033#issuecomment-2703733380 Signed-off-by: Vallari Agrawal --- teuthology/lock/query.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/teuthology/lock/query.py b/teuthology/lock/query.py index 7d79ce9aee..ac4cede070 100644 --- a/teuthology/lock/query.py +++ b/teuthology/lock/query.py @@ -127,17 +127,18 @@ def find_stale_locks(owner=None) -> List[Dict]: # running result = list() for node in nodes: - if node_active_job(node["name"]): + if node_active_job(node["name"], grace_time=5): continue result.append(node) return result -def node_active_job(name: str, status: Union[dict, None] = None) -> Union[str, None]: +def node_active_job(name: str, status: Union[dict, None] = None, grace_time: int = 0) -> Union[str, None]: """ Is this node's job active (e.g. running or waiting)? :param node: The node dict as returned from the lock server :param cache: A set() used for caching results + :param grace: A period of time (in mins) after job finishes before we consider the node inactive :returns: A string if the node has an active job, or None if not """ status = status or get_status(name) @@ -167,9 +168,11 @@ def node_active_job(name: str, status: Union[dict, None] = None) -> Union[str, N if active: break job_updated = job_obj["updated"] + if not grace_time: + break try: delta = datetime.datetime.now(datetime.timezone.utc) - parse_timestamp(job_updated) - active = active or delta < datetime.timedelta(minutes=5) + active = active or delta < datetime.timedelta(minutes=grace_time) except Exception: log.exception(f"{run_name}/{job_id} updated={job_updated}") break -- 2.39.5