repo_utils: allow checking out a specific commit
authorJosh Durgin <jdurgin@redhat.com>
Sun, 17 Jan 2021 02:04:08 +0000 (21:04 -0500)
committerJosh Durgin <jdurgin@redhat.com>
Tue, 19 Jan 2021 06:43:10 +0000 (01:43 -0500)
Since we're cloning a particular branch with git clone --shallow,
assume we're still passed a branch that contains the commit. Otherwise
we'd waste time and space cloning all the branches in the repo.

Assume that this is only used for checking out a particular sha1 once,
to avoid repetitive work.

There are two use cases for these utilities:

1) on the user's machine when they're scheduling a suite - there it
will make sense to maintain a single checkout of a branch e.g. teuthology master

2) on the queue consumer side - here it's best if we use the same commit for an
entire run, so checking out by sha1 makes more sense

Signed-off-by: Josh Durgin <jdurgin@redhat.com>
teuthology/repo_utils.py
teuthology/test/test_repo_utils.py

index be7ca22eb289e4f832d8e79b749ed88bb5d6aa8d..4a9495e2ed41ecaf097915a2c0e1aeda6bfcaba0 100644 (file)
@@ -9,7 +9,7 @@ from teuthology import misc
 from teuthology.util.flock import FileLock
 from teuthology.config import config
 from teuthology.contextutil import MaxWhileTries, safe_while
-from teuthology.exceptions import BootstrapError, BranchNotFoundError, GitError
+from teuthology.exceptions import BootstrapError, BranchNotFoundError, CommitNotFoundError, GitError
 
 log = logging.getLogger(__name__)
 
@@ -70,33 +70,42 @@ def ls_remote(url, ref):
     return sha1
 
 
-def enforce_repo_state(repo_url, dest_path, branch, remove_on_error=True):
+def enforce_repo_state(repo_url, dest_path, branch, commit=None, remove_on_error=True):
     """
     Use git to either clone or update a given repo, forcing it to switch to the
     specified branch.
 
-    :param repo_url:  The full URL to the repo (not including the branch)
-    :param dest_path: The full path to the destination directory
-    :param branch:    The branch.
-    :param remove:    Whether or not to remove dest_dir when an error occurs
-    :raises:          BranchNotFoundError if the branch is not found;
-                      GitError for other errors
+    :param repo_url:        The full URL to the repo (not including the branch)
+    :param dest_path:       The full path to the destination directory
+    :param branch:          The branch.
+    :param commit:          The sha1 to checkout. Defaults to None, which uses HEAD of the branch.
+    :param remove_on_error: Whether or not to remove dest_dir when an error occurs
+    :raises:                BranchNotFoundError if the branch is not found;
+                            CommitNotFoundError if the commit is not found;
+                            GitError for other errors
     """
     validate_branch(branch)
     sentinel = os.path.join(dest_path, '.fetched')
+    # sentinel to track whether the repo has checked out the intended
+    # version, in addition to being cloned
+    repo_reset = os.path.join(dest_path, '.fetched_and_reset')
     try:
         if not os.path.isdir(dest_path):
             clone_repo(repo_url, dest_path, branch)
-        elif not is_fresh(sentinel):
+        elif not commit and not is_fresh(sentinel):
             set_remote(dest_path, repo_url)
             fetch_branch(dest_path, branch)
             touch_file(sentinel)
         else:
-            log.info("%s was just updated; assuming it is current", dest_path)
+            log.info("%s was just updated or references a specific commit; assuming it is current", dest_path)
+
+        if commit and os.path.exists(repo_reset):
+            return
 
-        reset_repo(repo_url, dest_path, branch)
+        reset_repo(repo_url, dest_path, branch, commit)
+        touch_file(repo_reset)
         # remove_pyc_files(dest_path)
-    except BranchNotFoundError:
+    except (BranchNotFoundError, CommitNotFoundError):
         if remove_on_error:
             shutil.rmtree(dest_path, ignore_errors=True)
         raise
@@ -262,13 +271,15 @@ def fetch_branch(repo_path, branch, shallow=True):
             raise GitError("git fetch failed!")
 
 
-def reset_repo(repo_url, dest_path, branch):
+def reset_repo(repo_url, dest_path, branch, commit=None):
     """
 
     :param repo_url:  The full URL to the repo (not including the branch)
     :param dest_path: The full path to the destination directory
     :param branch:    The branch.
+    :param commit:    The sha1 to checkout. Defaults to None, which uses HEAD of the branch.
     :raises:          BranchNotFoundError if the branch is not found;
+                      CommitNotFoundError if the commit is not found;
                       GitError for other errors
     """
     validate_branch(branch)
@@ -276,15 +287,18 @@ def reset_repo(repo_url, dest_path, branch):
         reset_branch = lsstrip(remote_ref_from_ref(branch), 'refs/remotes/')
     else:
         reset_branch = 'origin/%s' % branch
-    log.info('Resetting repo at %s to branch %s', dest_path, reset_branch)
+    reset_ref = commit or reset_branch
+    log.info('Resetting repo at %s to %s', dest_path, reset_ref)
     # This try/except block will notice if the requested branch doesn't
     # exist, whether it was cloned or fetched.
     try:
         subprocess.check_output(
-            ('git', 'reset', '--hard', reset_branch),
+            ('git', 'reset', '--hard', reset_ref),
             cwd=dest_path,
         )
     except subprocess.CalledProcessError:
+        if commit:
+            raise CommitNotFoundError(commit, repo_url)
         raise BranchNotFoundError(branch, repo_url)
 
 
@@ -308,6 +322,7 @@ def fetch_repo(url, branch, bootstrap=None, lock=True):
     :param bootstrap:  An optional callback function to execute. Gets passed a
                        dest_dir argument: the path to the repo on-disk.
     :param branch:     The branch we want
+    :param commit:     The sha1 to checkout. Defaults to None, which uses HEAD of the branch.
     :returns:          The destination path
     """
     src_base_path = config.src_base_path
index 2aa57a06ec36aae1108615557ace91ee8d4a3c67..b29567ba2d7dce655f53e20e37ce942fe630caa8 100644 (file)
@@ -7,7 +7,7 @@ import shutil
 import subprocess
 import tempfile
 
-from teuthology.exceptions import BranchNotFoundError
+from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError
 from teuthology import repo_utils
 from teuthology import parallel
 repo_utils.log.setLevel(logging.WARNING)
@@ -23,8 +23,10 @@ class TestRepoUtils(object):
 
         if 'TEST_ONLINE' in os.environ:
             cls.repo_url = 'https://github.com/ceph/empty.git'
+            cls.commit = '71245d8e454a06a38a00bff09d8f19607c72e8bf'
         else:
             cls.repo_url = 'file://' + cls.src_path
+            cls.commit = None
 
     @classmethod
     def teardown_class(cls):
@@ -56,17 +58,25 @@ class TestRepoUtils(object):
             stdout=subprocess.PIPE,
         )
         assert proc.wait() == 0
+        if not self.commit:
+            result = subprocess.check_output(
+                'git rev-parse HEAD',
+                shell=True,
+                cwd=self.src_path,
+            ).split()
+            assert result
+            self.commit = result[0].decode()
 
     def teardown_method(self, method):
         shutil.rmtree(self.dest_path, ignore_errors=True)
 
     def test_clone_repo_existing_branch(self):
-        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master')
+        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit)
         assert os.path.exists(self.dest_path)
 
     def test_clone_repo_non_existing_branch(self):
         with raises(BranchNotFoundError):
-            repo_utils.clone_repo(self.repo_url, self.dest_path, 'nobranch')
+            repo_utils.clone_repo(self.repo_url, self.dest_path, 'nobranch', self.commit)
         assert not os.path.exists(self.dest_path)
 
     def test_fetch_no_repo(self):
@@ -77,7 +87,7 @@ class TestRepoUtils(object):
         assert not os.path.exists(fake_dest_path)
 
     def test_fetch_noop(self):
-        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master')
+        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit)
         repo_utils.fetch(self.dest_path)
         assert os.path.exists(self.dest_path)
 
@@ -89,7 +99,7 @@ class TestRepoUtils(object):
         assert not os.path.exists(fake_dest_path)
 
     def test_fetch_branch_fake_branch(self):
-        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master')
+        repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit)
         with raises(BranchNotFoundError):
             repo_utils.fetch_branch(self.dest_path, 'nobranch')
 
@@ -118,21 +128,32 @@ class TestRepoUtils(object):
                                       'master')
         assert os.path.exists(self.dest_path)
 
+    def test_enforce_existing_commit(self):
+        repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
+                                      'master', self.commit)
+        assert os.path.exists(self.dest_path)
+
     def test_enforce_non_existing_branch(self):
         with raises(BranchNotFoundError):
             repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                          'blah')
+                                          'blah', self.commit)
+        assert not os.path.exists(self.dest_path)
+
+    def test_enforce_non_existing_commit(self):
+        with raises(CommitNotFoundError):
+            repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
+                                          'master', 'c69e90807d222c1719c45c8c758bf6fac3d985f1')
         assert not os.path.exists(self.dest_path)
 
     def test_enforce_multiple_calls_same_branch(self):
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
 
     def test_enforce_multiple_calls_different_branches(self):
@@ -141,48 +162,48 @@ class TestRepoUtils(object):
                                           'blah1')
         assert not os.path.exists(self.dest_path)
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
         with raises(BranchNotFoundError):
             repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
                                           'blah2')
         assert not os.path.exists(self.dest_path)
         repo_utils.enforce_repo_state(self.repo_url, self.dest_path,
-                                      'master')
+                                      'master', self.commit)
         assert os.path.exists(self.dest_path)
 
     def test_enforce_invalid_branch(self):
         with raises(ValueError):
-            repo_utils.enforce_repo_state(self.repo_url, self.dest_path, 'a b')
+            repo_utils.enforce_repo_state(self.repo_url, self.dest_path, 'a b', self.commit)
 
     def test_simultaneous_access(self):
         count = 5
         with parallel.parallel() as p:
             for i in range(count):
                 p.spawn(repo_utils.enforce_repo_state, self.repo_url,
-                        self.dest_path, 'master')
+                        self.dest_path, 'master', self.commit)
             for result in p:
                 assert result is None
 
     def test_simultaneous_access_different_branches(self):
-        branches = ['master', 'master', 'nobranch',
-                    'nobranch', 'master', 'nobranch']
+        branches = [('master', self.commit),  ('master', self.commit), ('nobranch', 'nocommit'),
+                    ('nobranch', 'nocommit'), ('master', self.commit), ('nobranch', 'nocommit')]
 
         with parallel.parallel() as p:
-            for branch in branches:
+            for branch, commit in branches:
                 if branch == 'master':
                     p.spawn(repo_utils.enforce_repo_state, self.repo_url,
-                            self.dest_path, branch)
+                            self.dest_path, branch, commit)
                 else:
                     dest_path = self.dest_path + '_' + branch
 
                     def func():
                         repo_utils.enforce_repo_state(
                             self.repo_url, dest_path,
-                            branch)
+                            branch, commit)
                     p.spawn(
                         raises,
                         BranchNotFoundError,