]> git-server-git.apps.pok.os.sepia.ceph.com Git - teuthology.git/commitdiff
suite: Speed up 'newest' feature 1802/head
authorZack Cerza <zack@redhat.com>
Wed, 7 Dec 2022 18:55:14 +0000 (11:55 -0700)
committerZack Cerza <zack@redhat.com>
Thu, 8 Dec 2022 20:10:38 +0000 (13:10 -0700)
For a 'newest' value of N, we were calling find_git_parent N times.
find_git_parent was invoking githelper's 'refresh' endpoint each time,
which in my testing took 20-30 seconds. So, for a sha1 that needed 20
backtracks, I was seeing teuthology-suite take eight minutes to
complete. With this change, it took 30 seconds for the same
teuthology-suite invocation.

We were also not including the trailing slash on the 'refresh' endpoint,
resulting in a 308 redirect each time - doubling the request count.
That's fixed as well.

Signed-off-by: Zack Cerza <zack@redhat.com>
teuthology/suite/run.py
teuthology/suite/test/test_run_.py
teuthology/suite/test/test_util.py
teuthology/suite/util.py

index ab2d8f064f1b671239f84a5f731b625b54418a43..4abd4d8e740c164381c30b773499741dcd8e15d0 100644 (file)
@@ -644,16 +644,16 @@ Note: If you still want to go ahead, use --job-threshold 0'''
         # if not, do it once
         backtrack = 0
         limit = self.args.newest
+        sha1s = []
         while backtrack <= limit:
             jobs_missing_packages, jobs_to_schedule = \
                 self.collect_jobs(arch, configs, self.args.newest, job_limit)
             if jobs_missing_packages and self.args.newest:
-                new_sha1 = \
-                    util.find_git_parent('ceph', self.base_config.sha1)
-                if new_sha1 is None:
+                if not sha1s:
+                    sha1s = util.find_git_parents('ceph', str(self.base_config.sha1), self.args.newest)
+                if not sha1s:
                     util.schedule_fail('Backtrack for --newest failed', name, dry_run=self.args.dry_run)
-                 # rebuild the base config to resubstitute sha1
-                self.config_input['ceph_hash'] = new_sha1
+                self.config_input['ceph_hash'] = sha1s.pop(0)
                 self.base_config = self.build_base_config()
                 backtrack += 1
                 continue
index 278e1358b1e4d572c54b90162eb76d524d2b9b3a..c9bb53bc825a73bf7d0d574b0478a15724b20671 100644 (file)
@@ -308,7 +308,7 @@ class TestScheduleSuite(object):
         )
         m_write_rerun_memo.assert_called_once_with()
 
-    @patch('teuthology.suite.util.find_git_parent')
+    @patch('teuthology.suite.util.find_git_parents')
     @patch('teuthology.suite.run.Run.schedule_jobs')
     @patch('teuthology.suite.util.has_packages_for_distro')
     @patch('teuthology.suite.util.get_package_versions')
@@ -331,7 +331,7 @@ class TestScheduleSuite(object):
         m_get_package_versions,
         m_has_packages_for_distro,
         m_schedule_jobs,
-        m_find_git_parent,
+        m_find_git_parents,
     ):
         m_get_arch.return_value = 'x86_64'
         m_git_validate_sha1.return_value = self.args.ceph_sha1
@@ -350,7 +350,7 @@ class TestScheduleSuite(object):
             False for i in range(11)
         ]
 
-        m_find_git_parent.side_effect = lambda proj, sha1: sha1 + '^'
+        m_find_git_parents.side_effect = lambda proj, sha1, count: [f"{sha1}_{i}" for i in range(11)]
 
         self.args.newest = 10
         runobj = self.klass(self.args)
@@ -358,11 +358,11 @@ class TestScheduleSuite(object):
         with pytest.raises(ScheduleFailError) as exc:
             runobj.schedule_suite()
         assert 'Exceeded 10 backtracks' in str(exc.value)
-        m_find_git_parent.assert_has_calls(
-            [call('ceph', 'ceph_sha1' + i * '^') for i in range(10)]
+        m_find_git_parents.assert_has_calls(
+            [call('ceph', 'ceph_sha1'10)]
         )
 
-    @patch('teuthology.suite.util.find_git_parent')
+    @patch('teuthology.suite.util.find_git_parents')
     @patch('teuthology.suite.run.Run.schedule_jobs')
     @patch('teuthology.suite.run.Run.write_rerun_memo')
     @patch('teuthology.suite.util.has_packages_for_distro')
@@ -387,7 +387,7 @@ class TestScheduleSuite(object):
         m_has_packages_for_distro,
         m_write_rerun_memo,
         m_schedule_jobs,
-        m_find_git_parent,
+        m_find_git_parents,
     ):
         m_get_arch.return_value = 'x86_64'
         # rig has_packages_for_distro to fail this many times, so
@@ -409,7 +409,7 @@ class TestScheduleSuite(object):
         m_has_packages_for_distro.side_effect = \
             [False for i in range(NUM_FAILS)] + [True]
 
-        m_find_git_parent.side_effect = lambda proj, sha1: sha1 + '^'
+        m_find_git_parents.side_effect = lambda proj, sha1, count: [f"{sha1}_{i}" for i in range(NUM_FAILS)]
 
         self.args.newest = 10
         runobj = self.klass(self.args)
@@ -417,9 +417,9 @@ class TestScheduleSuite(object):
         count = runobj.schedule_suite()
         assert count == 1
         m_has_packages_for_distro.assert_has_calls(
-            [call('ceph_sha1' + '^' * i, 'ubuntu', '14.04', 'default', {})
-             for i in range(NUM_FAILS+1)]
+            [call(f"ceph_sha1_{i}", 'ubuntu', '14.04', 'default', {})
+             for i in range(NUM_FAILS)]
         )
-        m_find_git_parent.assert_has_calls(
-            [call('ceph', 'ceph_sha1' + i * '^') for i in range(NUM_FAILS)]
+        m_find_git_parents.assert_has_calls(
+            [call('ceph', 'ceph_sha1', 10)]
         )
index fdd352c4501c381bcfb107c003cda6bb6fc97032..ecdbfa41cce1c1e46bcd1cdbbd5c4a08214a2f09 100644 (file)
@@ -172,14 +172,14 @@ Branch 'no-branch' not found in repo: https://github.com/ceph/ceph-ci.git!"
         assert util.git_ls_remote('ceph', 'main') is not None
 
     @patch('teuthology.suite.util.requests.get')
-    def test_find_git_parent(self, m_requests_get):
+    def test_find_git_parents(self, m_requests_get):
         refresh_resp = Mock(ok=True)
         history_resp = Mock(ok=True)
         history_resp.json.return_value = {'sha1s': ['sha1', 'sha1_p']}
         m_requests_get.side_effect = [refresh_resp, history_resp]
-        parent_sha1 = util.find_git_parent('ceph', 'sha1')
+        parent_sha1s = util.find_git_parents('ceph', 'sha1')
         assert len(m_requests_get.mock_calls) == 2
-        assert parent_sha1 == 'sha1_p'
+        assert parent_sha1s == ['sha1_p']
 
 
 class TestFlavor(object):
index 1955f85af505ad61c949a27ac46f95eec2a54288..fabfa394724331f2c108f71c7791b8eec8a10c10 100644 (file)
@@ -463,15 +463,15 @@ def teuthology_schedule(args, verbose, dry_run, log_prefix='', stdin=None):
         else:
             p.communicate()
 
-def find_git_parent(project, sha1):
+def find_git_parents(project: str, sha1: str, count=1):
 
     base_url = config.githelper_base_url
     if not base_url:
         log.warning('githelper_base_url not set, --newest disabled')
-        return None
+        return []
 
     def refresh(project):
-        url = '%s/%s.git/refresh' % (base_url, project)
+        url = '%s/%s.git/refresh/' % (base_url, project)
         resp = requests.get(url)
         if not resp.ok:
             log.error('git refresh failed for %s: %s',
@@ -489,11 +489,10 @@ def find_git_parent(project, sha1):
                        int(count), sha1, project, resp.json()['error'])
         return sha1s
 
-    # XXX don't do this every time?..
     refresh(project)
-    # we want the one just before sha1; list two, return the second
-    sha1s = get_sha1s(project, sha1, 2)
-    if len(sha1s) == 2:
-        return sha1s[1]
-    else:
-        return None
+    # index 0 will be the commit whose parents we want to find.
+    # So we will query for count+1, and strip index 0 from the result.
+    sha1s = get_sha1s(project, sha1, count + 1)
+    if sha1s:
+        return sha1s[1:]
+    return []