From b164cdc39dd302e90f57a85721c4fc7bbd5d1112 Mon Sep 17 00:00:00 2001 From: Kyr Shatskyy Date: Tue, 12 May 2020 18:27:49 +0200 Subject: [PATCH] schedule: do not report status for first and last in suite jobs Addresses the issue when teuthology run gets stuck with first_in_suite or laste_in_suite jobs in queued state. Attention: This change requires the next steps, which are not mutually exclusive: 1) server teuthology worker restart, otherwise old worker's code will try to remove reported job from paddles and exit with unexpected exception. 2) user's teuthology runner environment should be updated to recent code, because new workers will not cleanup FIS and LIS jobs and they will remain in paddles, correspondingly the run will get stuck. Requires: a34fb6a3694538db8496b3bc599c3e62b9acaca6 Fixes: http://tracker.ceph.com/issues/43291 Signed-off-by: Kyr Shatskyy --- teuthology/schedule.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/teuthology/schedule.py b/teuthology/schedule.py index e8049bcfac..309f68be4d 100644 --- a/teuthology/schedule.py +++ b/teuthology/schedule.py @@ -23,6 +23,11 @@ def main(args): if args[opt]: raise ValueError(msg_fmt.format(opt=opt)) + if args['--first-in-suite'] or args['--last-in-suite']: + report_status = False + else: + report_status = True + name = args['--name'] if not name or name.isdigit(): raise ValueError("Please use a more descriptive value for --name") @@ -30,7 +35,7 @@ def main(args): if args['--dry-run']: pprint.pprint(job_config) else: - schedule_job(job_config, args['--num']) + schedule_job(job_config, args['--num'], report_status) def build_config(args): @@ -75,7 +80,7 @@ def build_config(args): return job_config -def schedule_job(job_config, num=1): +def schedule_job(job_config, num=1, report_status=True): """ Schedule a job. @@ -96,5 +101,6 @@ def schedule_job(job_config, num=1): print('Job scheduled with name {name} and ID {jid}'.format( name=job_config['name'], jid=jid)) job_config['job_id'] = str(jid) - report.try_push_job_info(job_config, dict(status='queued')) + if report_status: + report.try_push_job_info(job_config, dict(status='queued')) num -= 1 -- 2.39.5