From: Zack Cerza Date: Wed, 7 Dec 2022 18:55:14 +0000 (-0700) Subject: suite: Speed up 'newest' feature X-Git-Tag: 1.2.0~134^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ba4be811f37b8e10ee83298918840ee511b60610;p=teuthology.git suite: Speed up 'newest' feature For a 'newest' value of N, we were calling find_git_parent N times. find_git_parent was invoking githelper's 'refresh' endpoint each time, which in my testing took 20-30 seconds. So, for a sha1 that needed 20 backtracks, I was seeing teuthology-suite take eight minutes to complete. With this change, it took 30 seconds for the same teuthology-suite invocation. We were also not including the trailing slash on the 'refresh' endpoint, resulting in a 308 redirect each time - doubling the request count. That's fixed as well. Signed-off-by: Zack Cerza --- diff --git a/teuthology/suite/run.py b/teuthology/suite/run.py index ab2d8f06..4abd4d8e 100644 --- a/teuthology/suite/run.py +++ b/teuthology/suite/run.py @@ -644,16 +644,16 @@ Note: If you still want to go ahead, use --job-threshold 0''' # if not, do it once backtrack = 0 limit = self.args.newest + sha1s = [] while backtrack <= limit: jobs_missing_packages, jobs_to_schedule = \ self.collect_jobs(arch, configs, self.args.newest, job_limit) if jobs_missing_packages and self.args.newest: - new_sha1 = \ - util.find_git_parent('ceph', self.base_config.sha1) - if new_sha1 is None: + if not sha1s: + sha1s = util.find_git_parents('ceph', str(self.base_config.sha1), self.args.newest) + if not sha1s: util.schedule_fail('Backtrack for --newest failed', name, dry_run=self.args.dry_run) - # rebuild the base config to resubstitute sha1 - self.config_input['ceph_hash'] = new_sha1 + self.config_input['ceph_hash'] = sha1s.pop(0) self.base_config = self.build_base_config() backtrack += 1 continue diff --git a/teuthology/suite/test/test_run_.py b/teuthology/suite/test/test_run_.py index 278e1358..c9bb53bc 100644 --- a/teuthology/suite/test/test_run_.py +++ b/teuthology/suite/test/test_run_.py @@ -308,7 +308,7 @@ class TestScheduleSuite(object): ) m_write_rerun_memo.assert_called_once_with() - @patch('teuthology.suite.util.find_git_parent') + @patch('teuthology.suite.util.find_git_parents') @patch('teuthology.suite.run.Run.schedule_jobs') @patch('teuthology.suite.util.has_packages_for_distro') @patch('teuthology.suite.util.get_package_versions') @@ -331,7 +331,7 @@ class TestScheduleSuite(object): m_get_package_versions, m_has_packages_for_distro, m_schedule_jobs, - m_find_git_parent, + m_find_git_parents, ): m_get_arch.return_value = 'x86_64' m_git_validate_sha1.return_value = self.args.ceph_sha1 @@ -350,7 +350,7 @@ class TestScheduleSuite(object): False for i in range(11) ] - m_find_git_parent.side_effect = lambda proj, sha1: sha1 + '^' + m_find_git_parents.side_effect = lambda proj, sha1, count: [f"{sha1}_{i}" for i in range(11)] self.args.newest = 10 runobj = self.klass(self.args) @@ -358,11 +358,11 @@ class TestScheduleSuite(object): with pytest.raises(ScheduleFailError) as exc: runobj.schedule_suite() assert 'Exceeded 10 backtracks' in str(exc.value) - m_find_git_parent.assert_has_calls( - [call('ceph', 'ceph_sha1' + i * '^') for i in range(10)] + m_find_git_parents.assert_has_calls( + [call('ceph', 'ceph_sha1', 10)] ) - @patch('teuthology.suite.util.find_git_parent') + @patch('teuthology.suite.util.find_git_parents') @patch('teuthology.suite.run.Run.schedule_jobs') @patch('teuthology.suite.run.Run.write_rerun_memo') @patch('teuthology.suite.util.has_packages_for_distro') @@ -387,7 +387,7 @@ class TestScheduleSuite(object): m_has_packages_for_distro, m_write_rerun_memo, m_schedule_jobs, - m_find_git_parent, + m_find_git_parents, ): m_get_arch.return_value = 'x86_64' # rig has_packages_for_distro to fail this many times, so @@ -409,7 +409,7 @@ class TestScheduleSuite(object): m_has_packages_for_distro.side_effect = \ [False for i in range(NUM_FAILS)] + [True] - m_find_git_parent.side_effect = lambda proj, sha1: sha1 + '^' + m_find_git_parents.side_effect = lambda proj, sha1, count: [f"{sha1}_{i}" for i in range(NUM_FAILS)] self.args.newest = 10 runobj = self.klass(self.args) @@ -417,9 +417,9 @@ class TestScheduleSuite(object): count = runobj.schedule_suite() assert count == 1 m_has_packages_for_distro.assert_has_calls( - [call('ceph_sha1' + '^' * i, 'ubuntu', '14.04', 'default', {}) - for i in range(NUM_FAILS+1)] + [call(f"ceph_sha1_{i}", 'ubuntu', '14.04', 'default', {}) + for i in range(NUM_FAILS)] ) - m_find_git_parent.assert_has_calls( - [call('ceph', 'ceph_sha1' + i * '^') for i in range(NUM_FAILS)] + m_find_git_parents.assert_has_calls( + [call('ceph', 'ceph_sha1', 10)] ) diff --git a/teuthology/suite/test/test_util.py b/teuthology/suite/test/test_util.py index fdd352c4..ecdbfa41 100644 --- a/teuthology/suite/test/test_util.py +++ b/teuthology/suite/test/test_util.py @@ -172,14 +172,14 @@ Branch 'no-branch' not found in repo: https://github.com/ceph/ceph-ci.git!" assert util.git_ls_remote('ceph', 'main') is not None @patch('teuthology.suite.util.requests.get') - def test_find_git_parent(self, m_requests_get): + def test_find_git_parents(self, m_requests_get): refresh_resp = Mock(ok=True) history_resp = Mock(ok=True) history_resp.json.return_value = {'sha1s': ['sha1', 'sha1_p']} m_requests_get.side_effect = [refresh_resp, history_resp] - parent_sha1 = util.find_git_parent('ceph', 'sha1') + parent_sha1s = util.find_git_parents('ceph', 'sha1') assert len(m_requests_get.mock_calls) == 2 - assert parent_sha1 == 'sha1_p' + assert parent_sha1s == ['sha1_p'] class TestFlavor(object): diff --git a/teuthology/suite/util.py b/teuthology/suite/util.py index 1955f85a..fabfa394 100644 --- a/teuthology/suite/util.py +++ b/teuthology/suite/util.py @@ -463,15 +463,15 @@ def teuthology_schedule(args, verbose, dry_run, log_prefix='', stdin=None): else: p.communicate() -def find_git_parent(project, sha1): +def find_git_parents(project: str, sha1: str, count=1): base_url = config.githelper_base_url if not base_url: log.warning('githelper_base_url not set, --newest disabled') - return None + return [] def refresh(project): - url = '%s/%s.git/refresh' % (base_url, project) + url = '%s/%s.git/refresh/' % (base_url, project) resp = requests.get(url) if not resp.ok: log.error('git refresh failed for %s: %s', @@ -489,11 +489,10 @@ def find_git_parent(project, sha1): int(count), sha1, project, resp.json()['error']) return sha1s - # XXX don't do this every time?.. refresh(project) - # we want the one just before sha1; list two, return the second - sha1s = get_sha1s(project, sha1, 2) - if len(sha1s) == 2: - return sha1s[1] - else: - return None + # index 0 will be the commit whose parents we want to find. + # So we will query for count+1, and strip index 0 from the result. + sha1s = get_sha1s(project, sha1, count + 1) + if sha1s: + return sha1s[1:] + return []