From 27e22e567f34a2fc6a7de2bef75e579f7bee8a20 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 17 Oct 2023 19:18:44 +0200 Subject: [PATCH] Abstract away SCM integration (#803) The goal is to support different backends, like sapling. --- alibuild_helpers/build.py | 48 +++++++++++++++++++----------------- alibuild_helpers/git.py | 30 ++++++++++++++++++++++ alibuild_helpers/init.py | 3 ++- alibuild_helpers/scm.py | 19 ++++++++++++++ alibuild_helpers/workarea.py | 41 +++++++++++++++--------------- tests/test_build.py | 28 ++++++++++----------- tests/test_hashing.py | 2 +- tests/test_workarea.py | 15 ++++++----- 8 files changed, 120 insertions(+), 66 deletions(-) create mode 100644 alibuild_helpers/scm.py diff --git a/alibuild_helpers/build.py b/alibuild_helpers/build.py index 0bf205b9..1a6a9b0c 100644 --- a/alibuild_helpers/build.py +++ b/alibuild_helpers/build.py @@ -13,11 +13,11 @@ from alibuild_helpers.utilities import Hasher from alibuild_helpers.utilities import yamlDump from alibuild_helpers.utilities import resolve_tag, resolve_version -from alibuild_helpers.git import git, clone_speedup_options +from alibuild_helpers.git import git, clone_speedup_options, Git from alibuild_helpers.sync import (NoRemoteSync, HttpRemoteSync, S3RemoteSync, Boto3RemoteSync, RsyncRemoteSync) import yaml -from alibuild_helpers.workarea import cleanup_git_log, logged_git, updateReferenceRepoSpec +from alibuild_helpers.workarea import cleanup_git_log, logged_scm, updateReferenceRepoSpec from alibuild_helpers.log import logger_handler, LogFormatter, ProgressPrint from datetime import datetime from glob import glob @@ -61,13 +61,15 @@ def update_git_repos(args, specs, buildOrder, develPkgs): """ def update_repo(package, git_prompt): + specs[package]["scm"] = Git() updateReferenceRepoSpec(args.referenceSources, package, specs[package], fetch=args.fetchRepos, usePartialClone=not args.docker, allowGitPrompt=git_prompt) # Retrieve git heads - cmd = ["ls-remote", "--heads", "--tags"] + scm = specs[package]["scm"] + cmd = scm.listRefsCmd() if package in develPkgs: specs[package]["source"] = \ os.path.join(os.getcwd(), specs[package]["package"]) @@ -75,12 +77,9 @@ def update_repo(package, git_prompt): else: cmd.append(specs[package].get("reference", specs[package]["source"])) - output = logged_git(package, args.referenceSources, + output = logged_scm(scm, package, args.referenceSources, cmd, ".", prompt=git_prompt, logOutput=False) - specs[package]["git_refs"] = { - git_ref: git_hash for git_hash, sep, git_ref - in (line.partition("\t") for line in output.splitlines()) if sep - } + specs[package]["scm_refs"] = scm.parseRefs(output) requires_auth = set() with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: @@ -102,7 +101,7 @@ def update_repo(package, git_prompt): (futurePackage, exc)) else: debug("%r package updated: %d refs found", futurePackage, - len(specs[futurePackage]["git_refs"])) + len(specs[futurePackage]["scm_refs"])) # Now execute git commands for private packages one-by-one, so the user can # type their username and password without multiple prompts interfering. @@ -114,7 +113,7 @@ def update_repo(package, git_prompt): specs[package]["source"]) update_repo(package, git_prompt=True) debug("%r package updated: %d refs found", package, - len(specs[package]["git_refs"])) + len(specs[package]["scm_refs"])) # Creates a directory in the store which contains symlinks to the package @@ -191,7 +190,7 @@ def hash_data_for_key(key): h_default(spec["commit_hash"]) try: # If spec["commit_hash"] is a tag, get the actual git commit hash. - real_commit_hash = spec["git_refs"]["refs/tags/" + spec["commit_hash"]] + real_commit_hash = spec["scm_refs"]["refs/tags/" + spec["commit_hash"]] except KeyError: # If it's not a tag, assume it's an actual commit hash. real_commit_hash = spec["commit_hash"] @@ -202,7 +201,7 @@ def hash_data_for_key(key): h_real_commit(real_commit_hash) h_alternatives = [(spec.get("tag", "0"), spec["commit_hash"], h_default), (spec.get("tag", "0"), real_commit_hash, h_real_commit)] - for ref, git_hash in spec.get("git_refs", {}).items(): + for ref, git_hash in spec.get("scm_refs", {}).items(): if ref.startswith("refs/tags/") and git_hash == real_commit_hash: tag_name = ref[len("refs/tags/"):] debug("Tag %s also points to %s, storing alternative", @@ -269,12 +268,14 @@ def h_all(data): # pylint: disable=function-redefined list({h.hexdigest() for _, _, h, in h_alternatives} - {spec["local_revision_hash"]}) -def hash_local_changes(directory): +def hash_local_changes(spec): """Produce a hash of all local changes in the given git repo. If there are untracked files, this function returns a unique hash to force a rebuild, and logs a warning, as we cannot detect changes to those files. """ + directory = spec["source"] + scm = spec["scm"] untrackedFilesDirectories = [] class UntrackedChangesError(Exception): """Signal that we cannot detect code changes due to untracked files.""" @@ -283,10 +284,10 @@ def hash_output(msg, args): lines = msg % args # `git status --porcelain` indicates untracked files using "??". # Lines from `git diff` never start with "??". - if any(line.startswith("?? ") for line in lines.split("\n")): + if any(scm.checkUntracked(line) for line in lines.split("\n")): raise UntrackedChangesError() h(lines) - cmd = "cd %s && git diff -r HEAD && git status --porcelain" % directory + cmd = scm.diffCmd(directory) try: err = execute(cmd, hash_output) debug("Command %s returned %d", cmd, err) @@ -363,7 +364,10 @@ def doBuild(args, parser): if not exists(specDir): makedirs(specDir) - os.environ["ALIBUILD_ALIDIST_HASH"] = git(("rev-parse", "HEAD"), directory=args.configDir) + # By default Git is our SCM, so we find the commit hash of alidist + # using it. + scm = Git() + os.environ["ALIBUILD_ALIDIST_HASH"] = scm.checkedOutCommitName(directory=args.configDir) debug("Building for architecture %s", args.architecture) debug("Number of parallel builds: %d", args.jobs) @@ -504,23 +508,21 @@ def doBuild(args, parser): # the commit_hash. If it's not a branch, it must be a tag or a raw commit # hash, so we use it directly. Finally if the package is a development # one, we use the name of the branch as commit_hash. - assert "git_refs" in spec + assert "scm_refs" in spec try: - spec["commit_hash"] = spec["git_refs"]["refs/heads/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/heads/" + spec["tag"]] except KeyError: spec["commit_hash"] = spec["tag"] # We are in development mode, we need to rebuild if the commit hash is # different or if there are extra changes on top. if spec["package"] in develPkgs: # Devel package: we get the commit hash from the checked source, not from remote. - out = git(("rev-parse", "HEAD"), directory=spec["source"]) + out = spec["scm"].checkedOutCommitName(directory=spec["source"]) spec["commit_hash"] = out.strip() - local_hash, untracked = hash_local_changes(spec["source"]) + local_hash, untracked = hash_local_changes(spec) untrackedFilesDirectories.extend(untracked) spec["devel_hash"] = spec["commit_hash"] + local_hash - out = git(("rev-parse", "--abbrev-ref", "HEAD"), directory=spec["source"]) - if out == "HEAD": - out = git(("rev-parse", "HEAD"), directory=spec["source"])[:10] + out = spec["scm"].branchOrRef(directory=spec["source"]) develPackageBranch = out.replace("/", "-") spec["tag"] = args.develPrefix if "develPrefix" in args else develPackageBranch spec["commit_hash"] = "0" diff --git a/alibuild_helpers/git.py b/alibuild_helpers/git.py index 0163bd45..f93e10a9 100644 --- a/alibuild_helpers/git.py +++ b/alibuild_helpers/git.py @@ -4,6 +4,7 @@ from pipes import quote # Python 2.7 from alibuild_helpers.cmd import getstatusoutput from alibuild_helpers.log import debug +from alibuild_helpers.scm import SCM GIT_COMMAND_TIMEOUT_SEC = 120 """How many seconds to let any git command execute before being terminated.""" @@ -16,6 +17,35 @@ def clone_speedup_options(): return ["--filter=blob:none"] return [] +class Git(SCM): + name = "Git" + def checkedOutCommitName(self, directory): + return git(("rev-parse", "HEAD"), directory) + def branchOrRef(self, directory): + out = git(("rev-parse", "--abbrev-ref", "HEAD"), directory=directory) + if out == "HEAD": + out = git(("rev-parse", "HEAD"), directory)[:10] + return out + def exec(self, *args, **kwargs): + return git(*args, **kwargs) + def parseRefs(self, output): + return { + git_ref: git_hash for git_hash, sep, git_ref + in (line.partition("\t") for line in output.splitlines()) if sep + } + def listRefsCmd(self): + return ["ls-remote", "--heads", "--tags"] + def cloneCmd(self, source, referenceRepo, usePartialClone): + cmd = ["clone", "--bare", source, referenceRepo] + if usePartialClone: + cmd.extend(clone_speedup_options()) + return cmd + def fetchCmd(self, source): + return ["fetch", "-f", "--tags", source, "+refs/heads/*:refs/heads/*"] + def diffCmd(self, directory): + return "cd %s && git diff -r HEAD && git status --porcelain" % directory + def checkUntracked(self, line): + return line.startswith("?? ") def git(args, directory=".", check=True, prompt=True): debug("Executing git %s (in directory %s)", " ".join(args), directory) diff --git a/alibuild_helpers/init.py b/alibuild_helpers/init.py index 84ce4f3e..d3bc896f 100644 --- a/alibuild_helpers/init.py +++ b/alibuild_helpers/init.py @@ -1,4 +1,4 @@ -from alibuild_helpers.git import git +from alibuild_helpers.git import git, Git from alibuild_helpers.utilities import getPackageList, parseDefaults, readDefaults, validateDefaults from alibuild_helpers.log import debug, error, warning, banner, info from alibuild_helpers.log import dieOnError @@ -66,6 +66,7 @@ def doInit(args): for p in pkgs: spec = specs.get(p["name"]) + spec["scm"] = Git() dieOnError(spec is None, "cannot find recipe for package %s" % p["name"]) dest = join(args.develPrefix, spec["package"]) writeRepo = spec.get("write_repo", spec.get("source")) diff --git a/alibuild_helpers/scm.py b/alibuild_helpers/scm.py new file mode 100644 index 00000000..9f1a78c3 --- /dev/null +++ b/alibuild_helpers/scm.py @@ -0,0 +1,19 @@ +class SCM(object): + def checkedOutCommitName(self, directory): + raise NotImplementedError + def branchOrRef(self, directory): + raise NotImplementedError + def lsRemote(self, remote): + raise NotImplementedError + def listRefsCmd(self): + raise NotImplementedError + def parseRefs(self, output): + raise NotImplementedError + def exec(self, *args, **kwargs): + raise NotImplementedError + def cloneCmd(self, spec, referenceRepo, usePartialClone): + raise NotImplementedError + def diffCmd(self, directory): + raise NotImplementedError + def checkUntracked(self, line): + raise NotImplementedError diff --git a/alibuild_helpers/workarea.py b/alibuild_helpers/workarea.py index 4fc49ae1..df81dda2 100644 --- a/alibuild_helpers/workarea.py +++ b/alibuild_helpers/workarea.py @@ -9,7 +9,7 @@ from ordereddict import OrderedDict from alibuild_helpers.log import dieOnError, debug, info, error -from alibuild_helpers.git import git, clone_speedup_options +from alibuild_helpers.git import clone_speedup_options FETCH_LOG_NAME = "fetch-log.txt" @@ -29,32 +29,32 @@ def cleanup_git_log(referenceSources): "Could not delete stale git log: %s" % exc) -def logged_git(package, referenceSources, +def logged_scm(scm, package, referenceSources, command, directory, prompt, logOutput=True): - """Run a git command, but produce an output file if it fails. + """Run an SCM command, but produce an output file if it fails. - This is useful in CI, so that we can pick up git failures and show them in + This is useful in CI, so that we can pick up SCM failures and show them in the final produced log. For this reason, the file we write in this function - must not contain any secrets. We only output the git command we ran, its exit + must not contain any secrets. We only output the SCM command we ran, its exit code, and the package name, so this should be safe. """ # This might take a long time, so show the user what's going on. - info("Git %s for repository for %s...", command[0], package) - err, output = git(command, directory=directory, check=False, prompt=prompt) + info("%s %s for repository for %s...", scm.name, command[0], package) + err, output = scm.exec(command, directory=directory, check=False, prompt=prompt) if logOutput: debug(output) if err: try: with codecs.open(os.path.join(referenceSources, FETCH_LOG_NAME), "a", encoding="utf-8", errors="replace") as logf: - logf.write("Git command for package %r failed.\n" - "Command: git %s\nIn directory: %s\nExit code: %d\n" % - (package, " ".join(command), directory, err)) + logf.write("%s command for package %r failed.\n" + "Command: %s %s\nIn directory: %s\nExit code: %d\n" % + (scm.name, package, scm.name.lower(), " ".join(command), directory, err)) except OSError as exc: - error("Could not write error log from git command:", exc_info=exc) - dieOnError(err, "Error during git %s for reference repo for %s." % - (command[0], package)) - info("Done git %s for repository for %s", command[0], package) + error("Could not write error log from SCM command:", exc_info=exc) + dieOnError(err, "Error during %s %s for reference repo for %s." % + (scm.name.lower(), command[0], package)) + info("Done %s %s for repository for %s", scm.name.lower(), command[0], package) return output @@ -97,6 +97,8 @@ def updateReferenceRepo(referenceSources, p, spec, if "source" not in spec: return + scm = spec["scm"] + debug("Updating references.") referenceRepo = os.path.join(os.path.abspath(referenceSources), p.lower()) @@ -114,14 +116,11 @@ def updateReferenceRepo(referenceSources, p, spec, return None # no reference can be found and created (not fatal) if not os.path.exists(referenceRepo): - cmd = ["clone", "--bare", spec["source"], referenceRepo] - if usePartialClone: - cmd.extend(clone_speedup_options()) - logged_git(p, referenceSources, cmd, ".", allowGitPrompt) + cmd = scm.cloneCmd(spec["source"], referenceRepo, usePartialClone) + logged_scm(scm, p, referenceSources, cmd, ".", allowGitPrompt) elif fetch: - logged_git(p, referenceSources, ( - "fetch", "-f", "--tags", spec["source"], "+refs/heads/*:refs/heads/*", - ), referenceRepo, allowGitPrompt) + cmd = scm.fetchCmd(spec["source"]) + logged_scm(scm, p, referenceSources, cmd, referenceRepo, allowGitPrompt) return referenceRepo # reference is read-write diff --git a/tests/test_build.py b/tests/test_build.py index b1ddf85e..cd710bc4 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -180,6 +180,7 @@ def dummy_exists(x): return False return { "/alidist": True, + "/alidist/.git": True, "/sw": True, "/sw/SPECS": False, "/sw/MIRROR/root": True, @@ -196,10 +197,8 @@ def dummy_exists(x): class BuildTestCase(unittest.TestCase): @patch("alibuild_helpers.analytics", new=MagicMock()) @patch("requests.Session.get", new=MagicMock()) - @patch("alibuild_helpers.build.git", new=dummy_git) - @patch("alibuild_helpers.workarea.git") - @patch("alibuild_helpers.build.execute", new=dummy_execute) @patch("alibuild_helpers.sync.execute", new=dummy_execute) + @patch("alibuild_helpers.git.git") @patch("alibuild_helpers.build.getstatusoutput", new=dummy_getstatusoutput) @patch("alibuild_helpers.build.exists", new=MagicMock(side_effect=dummy_exists)) @patch("os.path.exists", new=MagicMock(side_effect=dummy_exists)) @@ -225,8 +224,8 @@ class BuildTestCase(unittest.TestCase): @patch("alibuild_helpers.workarea.is_writeable", new=MagicMock(return_value=True)) @patch("alibuild_helpers.build.basename", new=MagicMock(return_value="aliBuild")) @patch("alibuild_helpers.build.install_wrapper_script", new=MagicMock()) - def test_coverDoBuild(self, mock_debug, mock_glob, mock_sys, mock_workarea_git): - mock_workarea_git.side_effect = dummy_git + def test_coverDoBuild(self, mock_debug, mock_glob, mock_sys, mock_git_git): + mock_git_git.side_effect = dummy_git mock_debug.side_effect = lambda *args: None mock_glob.side_effect = lambda x: { "*": ["zlib"], @@ -277,20 +276,21 @@ def test_coverDoBuild(self, mock_debug, mock_glob, mock_sys, mock_workarea_git): call(list(clone_args), directory=clone_dir, check=clone_check, prompt=False), call(["ls-remote", "--heads", "--tags", args.referenceSources + "/zlib"], directory=".", check=False, prompt=False), + call(["clone", "--bare", "https://github.com/star-externals/zlib", "/sw/MIRROR/zlib", "--filter=blob:none"]), call(["ls-remote", "--heads", "--tags", args.referenceSources + "/root"], directory=".", check=False, prompt=False), ] - mock_workarea_git.reset_mock() + mock_git_git.reset_mock() mock_debug.reset_mock() exit_code = doBuild(args, mock_parser) self.assertEqual(exit_code, 0) mock_debug.assert_called_with("Everything done") - self.assertEqual(mock_workarea_git.call_count, len(common_calls)) - mock_workarea_git.has_calls(common_calls) + self.assertEqual(mock_git_git.call_count, len(common_calls)) + mock_git_git.has_calls(common_calls) # Force fetching repos - mock_workarea_git.reset_mock() + mock_git_git.reset_mock() mock_debug.reset_mock() args.fetchRepos = True exit_code = doBuild(args, mock_parser) @@ -299,8 +299,8 @@ def test_coverDoBuild(self, mock_debug, mock_glob, mock_sys, mock_workarea_git): mock_glob.assert_called_with("/sw/TARS/osx_x86-64/ROOT/ROOT-v6-08-30-*.osx_x86-64.tar.gz") # We can't compare directly against the list of calls here as they # might happen in any order. - self.assertEqual(mock_workarea_git.call_count, len(common_calls) + 1) - mock_workarea_git.has_calls(common_calls + [ + self.assertEqual(mock_git_git.call_count, len(common_calls) + 1) + mock_git_git.has_calls(common_calls + [ call(fetch_args, directory=fetch_dir, check=fetch_check), ]) @@ -323,13 +323,13 @@ def setup_spec(script): (root, TEST_ROOT_GIT_REFS), (extra, TEST_EXTRA_GIT_REFS)): spec.setdefault("requires", []).append(default["package"]) - spec["git_refs"] = {ref: hash for hash, _, ref in ( + spec["scm_refs"] = {ref: hash for hash, _, ref in ( line.partition("\t") for line in refs.splitlines() )} try: - spec["commit_hash"] = spec["git_refs"]["refs/tags/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/tags/" + spec["tag"]] except KeyError: - spec["commit_hash"] = spec["git_refs"]["refs/heads/" + spec["tag"]] + spec["commit_hash"] = spec["scm_refs"]["refs/heads/" + spec["tag"]] specs = {pkg["package"]: pkg for pkg in (default, zlib, root, extra)} storeHashes("defaults-release", specs, isDevelPkg=False, considerRelocation=False) diff --git a/tests/test_hashing.py b/tests/test_hashing.py index f9f32b1d..4882910c 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -58,7 +58,7 @@ def test_hashes_match_build_log(self): self.assertEqual(spec["remote_revision_hash"], remote) self.assertEqual(spec["local_revision_hash"], local) # For logs produced by old hash implementations (which didn't - # consider spec["git_refs"]), alt_{remote,local} will only + # consider spec["scm_refs"]), alt_{remote,local} will only # contain the primary hash anyway, so this works nicely. self.assertEqual(spec["remote_hashes"], alt_remote.split(", ")) self.assertEqual(spec["local_hashes"], alt_local.split(", ")) diff --git a/tests/test_workarea.py b/tests/test_workarea.py index ad5a1c88..0bde82cb 100644 --- a/tests/test_workarea.py +++ b/tests/test_workarea.py @@ -10,11 +10,13 @@ from ordereddict import OrderedDict from alibuild_helpers.workarea import updateReferenceRepoSpec +from alibuild_helpers.git import Git MOCK_SPEC = OrderedDict(( ("package", "AliRoot"), ("source", "https://github.com/alisw/AliRoot"), + ("scm", Git()), )) @@ -26,7 +28,7 @@ class WorkareaTestCase(unittest.TestCase): @patch("os.path.exists") @patch("os.makedirs") - @patch("alibuild_helpers.workarea.git") + @patch("alibuild_helpers.git") @patch("alibuild_helpers.workarea.is_writeable", new=MagicMock(return_value=False)) def test_reference_sources_reused(self, mock_git, mock_makedirs, mock_exists): """Check mirrors are reused when pre-existing, but not writable. @@ -45,7 +47,7 @@ def test_reference_sources_reused(self, mock_git, mock_makedirs, mock_exists): @patch("os.path.exists") @patch("os.makedirs") @patch("codecs.open") - @patch("alibuild_helpers.workarea.git") + @patch("alibuild_helpers.git.git") @patch("alibuild_helpers.workarea.is_writeable", new=MagicMock(return_value=True)) def test_reference_sources_updated(self, mock_git, mock_open, mock_makedirs, mock_exists): """Check mirrors are updated when possible and git output is logged.""" @@ -58,15 +60,16 @@ def test_reference_sources_updated(self, mock_git, mock_open, mock_makedirs, moc updateReferenceRepoSpec(referenceSources="sw/MIRROR", p="AliRoot", spec=spec, fetch=True) mock_exists.assert_called_with("%s/sw/MIRROR/aliroot" % getcwd()) + mock_exists.has_calls([]) mock_makedirs.assert_called_with("%s/sw/MIRROR" % getcwd()) - mock_git.assert_called_once_with(( + mock_git.assert_called_once_with([ "fetch", "-f", "--tags", spec["source"], "+refs/heads/*:refs/heads/*", - ), directory="%s/sw/MIRROR/aliroot" % getcwd(), check=False, prompt=True) + ], directory="%s/sw/MIRROR/aliroot" % getcwd(), check=False, prompt=True) self.assertEqual(spec.get("reference"), "%s/sw/MIRROR/aliroot" % getcwd()) @patch("os.path.exists") @patch("os.makedirs") - @patch("alibuild_helpers.workarea.git") + @patch("alibuild_helpers.git") @patch("alibuild_helpers.workarea.is_writeable", new=MagicMock(return_value=False)) def test_reference_sources_not_writable(self, mock_git, mock_makedirs, mock_exists): """Check nothing is fetched when mirror directory isn't writable.""" @@ -82,7 +85,7 @@ def test_reference_sources_not_writable(self, mock_git, mock_makedirs, mock_exis @patch("os.path.exists") @patch("os.makedirs") - @patch("alibuild_helpers.workarea.git") + @patch("alibuild_helpers.git.git") @patch("alibuild_helpers.workarea.is_writeable", new=MagicMock(return_value=True)) def test_reference_sources_created(self, mock_git, mock_makedirs, mock_exists): """Check the mirror directory is created when possible."""