From: Josh Durgin Date: Sun, 17 Jan 2021 02:04:08 +0000 (-0500) Subject: repo_utils: allow checking out a specific commit X-Git-Tag: 1.1.0~18^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=10c86d9c88cbf7ab4a6f7b81c1d4e823f36c6e68;p=teuthology.git repo_utils: allow checking out a specific commit Since we're cloning a particular branch with git clone --shallow, assume we're still passed a branch that contains the commit. Otherwise we'd waste time and space cloning all the branches in the repo. Assume that this is only used for checking out a particular sha1 once, to avoid repetitive work. There are two use cases for these utilities: 1) on the user's machine when they're scheduling a suite - there it will make sense to maintain a single checkout of a branch e.g. teuthology master 2) on the queue consumer side - here it's best if we use the same commit for an entire run, so checking out by sha1 makes more sense Signed-off-by: Josh Durgin --- diff --git a/teuthology/repo_utils.py b/teuthology/repo_utils.py index be7ca22e..4a9495e2 100644 --- a/teuthology/repo_utils.py +++ b/teuthology/repo_utils.py @@ -9,7 +9,7 @@ from teuthology import misc from teuthology.util.flock import FileLock from teuthology.config import config from teuthology.contextutil import MaxWhileTries, safe_while -from teuthology.exceptions import BootstrapError, BranchNotFoundError, GitError +from teuthology.exceptions import BootstrapError, BranchNotFoundError, CommitNotFoundError, GitError log = logging.getLogger(__name__) @@ -70,33 +70,42 @@ def ls_remote(url, ref): return sha1 -def enforce_repo_state(repo_url, dest_path, branch, remove_on_error=True): +def enforce_repo_state(repo_url, dest_path, branch, commit=None, remove_on_error=True): """ Use git to either clone or update a given repo, forcing it to switch to the specified branch. - :param repo_url: The full URL to the repo (not including the branch) - :param dest_path: The full path to the destination directory - :param branch: The branch. - :param remove: Whether or not to remove dest_dir when an error occurs - :raises: BranchNotFoundError if the branch is not found; - GitError for other errors + :param repo_url: The full URL to the repo (not including the branch) + :param dest_path: The full path to the destination directory + :param branch: The branch. + :param commit: The sha1 to checkout. Defaults to None, which uses HEAD of the branch. + :param remove_on_error: Whether or not to remove dest_dir when an error occurs + :raises: BranchNotFoundError if the branch is not found; + CommitNotFoundError if the commit is not found; + GitError for other errors """ validate_branch(branch) sentinel = os.path.join(dest_path, '.fetched') + # sentinel to track whether the repo has checked out the intended + # version, in addition to being cloned + repo_reset = os.path.join(dest_path, '.fetched_and_reset') try: if not os.path.isdir(dest_path): clone_repo(repo_url, dest_path, branch) - elif not is_fresh(sentinel): + elif not commit and not is_fresh(sentinel): set_remote(dest_path, repo_url) fetch_branch(dest_path, branch) touch_file(sentinel) else: - log.info("%s was just updated; assuming it is current", dest_path) + log.info("%s was just updated or references a specific commit; assuming it is current", dest_path) + + if commit and os.path.exists(repo_reset): + return - reset_repo(repo_url, dest_path, branch) + reset_repo(repo_url, dest_path, branch, commit) + touch_file(repo_reset) # remove_pyc_files(dest_path) - except BranchNotFoundError: + except (BranchNotFoundError, CommitNotFoundError): if remove_on_error: shutil.rmtree(dest_path, ignore_errors=True) raise @@ -262,13 +271,15 @@ def fetch_branch(repo_path, branch, shallow=True): raise GitError("git fetch failed!") -def reset_repo(repo_url, dest_path, branch): +def reset_repo(repo_url, dest_path, branch, commit=None): """ :param repo_url: The full URL to the repo (not including the branch) :param dest_path: The full path to the destination directory :param branch: The branch. + :param commit: The sha1 to checkout. Defaults to None, which uses HEAD of the branch. :raises: BranchNotFoundError if the branch is not found; + CommitNotFoundError if the commit is not found; GitError for other errors """ validate_branch(branch) @@ -276,15 +287,18 @@ def reset_repo(repo_url, dest_path, branch): reset_branch = lsstrip(remote_ref_from_ref(branch), 'refs/remotes/') else: reset_branch = 'origin/%s' % branch - log.info('Resetting repo at %s to branch %s', dest_path, reset_branch) + reset_ref = commit or reset_branch + log.info('Resetting repo at %s to %s', dest_path, reset_ref) # This try/except block will notice if the requested branch doesn't # exist, whether it was cloned or fetched. try: subprocess.check_output( - ('git', 'reset', '--hard', reset_branch), + ('git', 'reset', '--hard', reset_ref), cwd=dest_path, ) except subprocess.CalledProcessError: + if commit: + raise CommitNotFoundError(commit, repo_url) raise BranchNotFoundError(branch, repo_url) @@ -308,6 +322,7 @@ def fetch_repo(url, branch, bootstrap=None, lock=True): :param bootstrap: An optional callback function to execute. Gets passed a dest_dir argument: the path to the repo on-disk. :param branch: The branch we want + :param commit: The sha1 to checkout. Defaults to None, which uses HEAD of the branch. :returns: The destination path """ src_base_path = config.src_base_path diff --git a/teuthology/test/test_repo_utils.py b/teuthology/test/test_repo_utils.py index 2aa57a06..b29567ba 100644 --- a/teuthology/test/test_repo_utils.py +++ b/teuthology/test/test_repo_utils.py @@ -7,7 +7,7 @@ import shutil import subprocess import tempfile -from teuthology.exceptions import BranchNotFoundError +from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError from teuthology import repo_utils from teuthology import parallel repo_utils.log.setLevel(logging.WARNING) @@ -23,8 +23,10 @@ class TestRepoUtils(object): if 'TEST_ONLINE' in os.environ: cls.repo_url = 'https://github.com/ceph/empty.git' + cls.commit = '71245d8e454a06a38a00bff09d8f19607c72e8bf' else: cls.repo_url = 'file://' + cls.src_path + cls.commit = None @classmethod def teardown_class(cls): @@ -56,17 +58,25 @@ class TestRepoUtils(object): stdout=subprocess.PIPE, ) assert proc.wait() == 0 + if not self.commit: + result = subprocess.check_output( + 'git rev-parse HEAD', + shell=True, + cwd=self.src_path, + ).split() + assert result + self.commit = result[0].decode() def teardown_method(self, method): shutil.rmtree(self.dest_path, ignore_errors=True) def test_clone_repo_existing_branch(self): - repo_utils.clone_repo(self.repo_url, self.dest_path, 'master') + repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit) assert os.path.exists(self.dest_path) def test_clone_repo_non_existing_branch(self): with raises(BranchNotFoundError): - repo_utils.clone_repo(self.repo_url, self.dest_path, 'nobranch') + repo_utils.clone_repo(self.repo_url, self.dest_path, 'nobranch', self.commit) assert not os.path.exists(self.dest_path) def test_fetch_no_repo(self): @@ -77,7 +87,7 @@ class TestRepoUtils(object): assert not os.path.exists(fake_dest_path) def test_fetch_noop(self): - repo_utils.clone_repo(self.repo_url, self.dest_path, 'master') + repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit) repo_utils.fetch(self.dest_path) assert os.path.exists(self.dest_path) @@ -89,7 +99,7 @@ class TestRepoUtils(object): assert not os.path.exists(fake_dest_path) def test_fetch_branch_fake_branch(self): - repo_utils.clone_repo(self.repo_url, self.dest_path, 'master') + repo_utils.clone_repo(self.repo_url, self.dest_path, 'master', self.commit) with raises(BranchNotFoundError): repo_utils.fetch_branch(self.dest_path, 'nobranch') @@ -118,21 +128,32 @@ class TestRepoUtils(object): 'master') assert os.path.exists(self.dest_path) + def test_enforce_existing_commit(self): + repo_utils.enforce_repo_state(self.repo_url, self.dest_path, + 'master', self.commit) + assert os.path.exists(self.dest_path) + def test_enforce_non_existing_branch(self): with raises(BranchNotFoundError): repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'blah') + 'blah', self.commit) + assert not os.path.exists(self.dest_path) + + def test_enforce_non_existing_commit(self): + with raises(CommitNotFoundError): + repo_utils.enforce_repo_state(self.repo_url, self.dest_path, + 'master', 'c69e90807d222c1719c45c8c758bf6fac3d985f1') assert not os.path.exists(self.dest_path) def test_enforce_multiple_calls_same_branch(self): repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) def test_enforce_multiple_calls_different_branches(self): @@ -141,48 +162,48 @@ class TestRepoUtils(object): 'blah1') assert not os.path.exists(self.dest_path) repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) with raises(BranchNotFoundError): repo_utils.enforce_repo_state(self.repo_url, self.dest_path, 'blah2') assert not os.path.exists(self.dest_path) repo_utils.enforce_repo_state(self.repo_url, self.dest_path, - 'master') + 'master', self.commit) assert os.path.exists(self.dest_path) def test_enforce_invalid_branch(self): with raises(ValueError): - repo_utils.enforce_repo_state(self.repo_url, self.dest_path, 'a b') + repo_utils.enforce_repo_state(self.repo_url, self.dest_path, 'a b', self.commit) def test_simultaneous_access(self): count = 5 with parallel.parallel() as p: for i in range(count): p.spawn(repo_utils.enforce_repo_state, self.repo_url, - self.dest_path, 'master') + self.dest_path, 'master', self.commit) for result in p: assert result is None def test_simultaneous_access_different_branches(self): - branches = ['master', 'master', 'nobranch', - 'nobranch', 'master', 'nobranch'] + branches = [('master', self.commit), ('master', self.commit), ('nobranch', 'nocommit'), + ('nobranch', 'nocommit'), ('master', self.commit), ('nobranch', 'nocommit')] with parallel.parallel() as p: - for branch in branches: + for branch, commit in branches: if branch == 'master': p.spawn(repo_utils.enforce_repo_state, self.repo_url, - self.dest_path, branch) + self.dest_path, branch, commit) else: dest_path = self.dest_path + '_' + branch def func(): repo_utils.enforce_repo_state( self.repo_url, dest_path, - branch) + branch, commit) p.spawn( raises, BranchNotFoundError,