From 6b1b3637ff0aa98d96f9b6e8f9e2693c56afb4e9 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Fri, 24 Feb 2023 19:40:35 +0000 Subject: [PATCH] Add `parallel_bisect.jl` debugging tool This is a debugging tool that enables us to do parallel bisections over the Julia codebase, working around the fact that our bootstrap process is slow and single-threaded. --- debugging/Manifest.toml | 74 +++++- debugging/Project.toml | 2 + debugging/julia_checkout.jl | 68 ++++-- debugging/parallel_bisect/README.md | 12 + debugging/parallel_bisect/parallel_bisect.jl | 211 ++++++++++++++++++ .../tests/edge_case/linear_solve_example.jl | 5 + .../tests/edge_case/runtest.sh | 8 + .../tests/unbuildable_commits/runtest.sh | 8 + .../test_download_exists.jl | 3 + 9 files changed, 364 insertions(+), 27 deletions(-) create mode 100644 debugging/parallel_bisect/README.md create mode 100644 debugging/parallel_bisect/parallel_bisect.jl create mode 100644 debugging/parallel_bisect/tests/edge_case/linear_solve_example.jl create mode 100755 debugging/parallel_bisect/tests/edge_case/runtest.sh create mode 100755 debugging/parallel_bisect/tests/unbuildable_commits/runtest.sh create mode 100644 debugging/parallel_bisect/tests/unbuildable_commits/test_download_exists.jl diff --git a/debugging/Manifest.toml b/debugging/Manifest.toml index 04f16aaf..2b40d63a 100644 --- a/debugging/Manifest.toml +++ b/debugging/Manifest.toml @@ -1,11 +1,12 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.7.3" +julia_version = "1.9.0-beta4" manifest_format = "2.0" -project_hash = "830e9599053c6a13e2a97c060ddf2702b0673671" +project_hash = "8189cb1335616d837a2f7f489e849ccb20675510" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" @@ -34,6 +35,7 @@ version = "3.45.0" [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.0.2+0" [[deps.Crayons]] git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" @@ -68,7 +70,9 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" [[deps.DelimitedFiles]] deps = ["Mmap"] +git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +version = "1.9.1" [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] @@ -77,6 +81,13 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[deps.Downloads]] deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "bad72f730e9e91c08d9427d5e8db95478a3c323d" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.4.8+0" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" @@ -91,6 +102,18 @@ version = "0.4.2" deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "51764e6c2e84c37055e846c516e9015b4a291c7d" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.0" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "5cb7515f531a2f9d8e38951b07486aa419bdc203" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.36.1+0" + [[deps.HTTP]] deps = ["Base64", "CodecZlib", "Dates", "IniFile", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] git-tree-sha1 = "ed47af35905b7cc8f1a522ca684b35a212269bd8" @@ -141,10 +164,12 @@ uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" [[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] @@ -153,16 +178,24 @@ uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[deps.LibGit2_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.5.0+1" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c7cb1f5d892775ba13767a87c7ada0b980ea0a71" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.16.1+2" + [[deps.LinearAlgebra]] -deps = ["Libdl", "libblastrampoline_jll"] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.Logging]] @@ -187,6 +220,7 @@ version = "1.1.1" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.0+0" [[deps.Missings]] deps = ["DataAPI"] @@ -199,19 +233,33 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.10.11" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.21+0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9ff31d101d987eb9d66bd8b176ac7c277beccd09" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.20+0" [[deps.OrderedCollections]] git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" version = "1.4.1" +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+0" + [[deps.Parsers]] deps = ["Dates"] git-tree-sha1 = "0044b23da09b5608b4ecacb4e5e6c6332f833a7e" @@ -219,8 +267,9 @@ uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.3.2" [[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.9.0" [[deps.PooledArrays]] deps = ["DataAPI", "Future"] @@ -265,6 +314,7 @@ version = "1.2.2" [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" [[deps.Sandbox]] deps = ["LazyArtifacts", "Libdl", "Preferences", "Random", "SHA", "Scratch", "TOML", "Tar", "Tar_jll", "UserNSSandbox_jll"] @@ -300,12 +350,13 @@ uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" version = "1.0.1" [[deps.SparseArrays]] -deps = ["LinearAlgebra", "Random"] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[deps.Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.9.0" [[deps.StructTypes]] deps = ["Dates", "UUIDs"] @@ -313,9 +364,15 @@ git-tree-sha1 = "d24a825a95a6d98c385001212dc9020d609f2d4f" uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" version = "1.8.1" +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "5.10.1+6" + [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" [[deps.TableTraits]] deps = ["IteratorInterfaceExtensions"] @@ -332,6 +389,7 @@ version = "1.7.0" [[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" [[deps.Tar_jll]] deps = ["Artifacts", "Attr_jll", "JLLWrappers", "Libdl", "Pkg"] @@ -370,15 +428,19 @@ version = "2022.3.30+0" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+0" [[deps.libblastrampoline_jll]] -deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.4.0+0" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" diff --git a/debugging/Project.toml b/debugging/Project.toml index 8313284d..6142389b 100644 --- a/debugging/Project.toml +++ b/debugging/Project.toml @@ -2,11 +2,13 @@ uuid = "21676e69-6767-7562-6564-696365736162" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JLLPrefixes = "afc68a34-7891-4c5a-9da1-1c62935e7b0d" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" Sandbox = "9307e30f-c43e-9ca7-d17c-c2dc59df670d" diff --git a/debugging/julia_checkout.jl b/debugging/julia_checkout.jl index d56e407d..1dc4b807 100644 --- a/debugging/julia_checkout.jl +++ b/debugging/julia_checkout.jl @@ -1,6 +1,7 @@ -using LibGit2, Scratch, SHA +using Git, Scratch, SHA using Base: SHA1 +Base.SHA1(x::SHA1) = x struct GitCheckout repo_url::String commit::SHA1 @@ -15,23 +16,44 @@ struct GitCheckout end end -function cached_git_clone(url::AbstractString; - hash::Union{Nothing, SHA1} = nothing, - downloads_dir::String = @get_scratch!("git-clones")) - repo_path = joinpath(downloads_dir, string(basename(url), "-", bytes2hex(sha256(url)))) +iscommit(repo::String, commit::String) = success(git(["-C", repo, "cat-file", "-e", commit])) +default_clones_dir() = @get_scratch!("git_clones") + +""" + cached_git_clone(url::String; hash = nothing, verbose = false) + +Return the path to a local git clone of the given `url`. If `hash` is given, +then a cached git repository will not be updated if the commit already exists locally. +""" +function cached_git_clone(url::String; + hash::Union{Nothing,String} = nothing, + clones_dir::String = default_clones_dir(), + verbose::Bool = false) + quiet_args = String[] + if !verbose + push!(quiet_args, "-q") + end + + repo_path = joinpath(clones_dir, string(basename(url), "-", bytes2hex(sha256(url)))) if isdir(repo_path) - LibGit2.with(LibGit2.GitRepo(repo_path)) do repo - # In some cases, we know the hash we're looking for, so only fetch() if - # this git repository doesn't contain the hash we're seeking - # this is not only faster, it avoids race conditions when we have - # multiple builders on the same machine all fetching at once. - if hash === nothing || !LibGit2.iscommit(bytes2hex(hash.bytes), repo) - LibGit2.fetch(repo) - end + if verbose + @info("Using cached git repository", url, repo_path) + end + + # If we didn't just mercilessly obliterate the cached git repo, use it! + # In some cases, we know the hash we're looking for, so only fetch() if + # this git repository doesn't contain the hash we're seeking. + # this is not only faster, it avoids race conditions when we have + # multiple builders on the same machine all fetching at once. + if hash === nothing || !iscommit(repo_path, hash) + run(git(["-C", repo_path, "fetch", "-a", quiet_args...])) end else + if verbose + @info("Cloning git repository", url, repo_path) + end # If there is no repo_path yet, clone it down into a bare repository - LibGit2.clone(url, repo_path; isbare=true) + run(git(["clone", "--mirror", url, repo_path, quiet_args...])) end return repo_path end @@ -39,17 +61,21 @@ end function get_checkout(repo_url::String, hash::SHA1, checkout_dir::String; - downloads_dir::String = @get_scratch!("git-clones")) + clones_dir::String = default_clones_dir()) # Clone down (or verify that we've cached) a repository that contains the requested commit - repo_path = cached_git_clone(repo_url; hash, downloads_dir) + repo_path = cached_git_clone(repo_url; hash=bytes2hex(hash.bytes), clones_dir) - # Checkout the desired commit to a temporary directory that `reman` will clean up: - LibGit2.with(LibGit2.clone(repo_path, checkout_dir)) do cloned_repo - LibGit2.checkout!(cloned_repo, bytes2hex(hash.bytes)) - end - return checkout_dir + run(git(["clone", "--shared", repo_path, checkout_dir, "-q"])) + run(git(["-C", checkout_dir, "checkout", bytes2hex(hash.bytes), "-q"])) end function get_checkout(gc::GitCheckout, checkout_prefix::String; kwargs...) return get_checkout(gc.repo_url, gc.commit, joinpath(checkout_prefix, gc.checkout_path); kwargs...) end + +function get_commits_between(repo_url::String, before::SHA1, after::SHA1; + clones_dir::String = default_clones_dir()) + repo_path = cached_git_clone(repo_url; hash=bytes2hex(after.bytes), clones_dir) + lines = readchomp(git(["-C", repo_path, "log", "--reverse", "--pretty=format:%H", string(bytes2hex(before.bytes), "^!"), bytes2hex(after.bytes)])) + return [parse(SHA1, line) for line in split(lines)] +end diff --git a/debugging/parallel_bisect/README.md b/debugging/parallel_bisect/README.md new file mode 100644 index 00000000..1d4d5615 --- /dev/null +++ b/debugging/parallel_bisect/README.md @@ -0,0 +1,12 @@ +# parallel_bisect.jl + +Our bootstrap process is single-threaded and slow. +When bisecting an issue, it sure would be nice if we could make use of all of those extra cores, wouldn't it? + +``` +julia -t5 --project parallel_bisect.jl script_to_test_issue.jl +``` + +Build errors get skipped. +The first run will verify your script on the given good and bad gitsha's to ensure that it reacts properly. +Use the `-t` argument to Julia to specify how many jobs should run (each job will use enuogh threads to hopefully saturate your machine without completely destroying it). diff --git a/debugging/parallel_bisect/parallel_bisect.jl b/debugging/parallel_bisect/parallel_bisect.jl new file mode 100644 index 00000000..fcf770c8 --- /dev/null +++ b/debugging/parallel_bisect/parallel_bisect.jl @@ -0,0 +1,211 @@ +include("../julia_checkout.jl") + +if length(ARGS) < 3 + println("Usage: $(basename(@__FILE__))