From c7d06a6f4343d4362f4d82e6583b85cc2829fb2b Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 17 Feb 2025 13:23:01 -0500 Subject: [PATCH 01/15] feat: Use `gix_path::env::shell()` in `gix-command` for sh `gix_command::Prepare` previously used `sh` on Windows rather than first checking for a usable `sh` implementation associated with a Git for Windows installation. This changes it to use the `gix-path` facility for finding what is likely the best 'sh' implementation for POSIX shell scripts that will operate on Git repositories. This increases consistency across how different crates find 'sh', and brings the benefit of preferring the Git for Windows `sh.exe` on Windows when it can be found reliably. --- Cargo.lock | 1 + gix-command/Cargo.toml | 1 + gix-command/src/lib.rs | 6 ++---- gix-command/tests/command.rs | 36 ++++++++++++++++++++++-------------- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f23529a4a6a..5a0c621181e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1589,6 +1589,7 @@ dependencies = [ "gix-quote 0.4.15", "gix-testtools", "gix-trace 0.1.12", + "once_cell", "shell-words", ] diff --git a/gix-command/Cargo.toml b/gix-command/Cargo.toml index e81cfedb4be..a2f9d763ddf 100644 --- a/gix-command/Cargo.toml +++ b/gix-command/Cargo.toml @@ -24,3 +24,4 @@ shell-words = "1.0" [dev-dependencies] gix-testtools = { path = "../tests/tools" } +once_cell = "1.17.1" diff --git a/gix-command/src/lib.rs b/gix-command/src/lib.rs index ca64d5cc3e7..c8d47d93e32 100644 --- a/gix-command/src/lib.rs +++ b/gix-command/src/lib.rs @@ -280,10 +280,8 @@ mod prepare { cmd } None => { - let mut cmd = Command::new( - prep.shell_program - .unwrap_or(if cfg!(windows) { "sh" } else { "/bin/sh" }.into()), - ); + let shell = prep.shell_program.unwrap_or_else(|| gix_path::env::shell().into()); + let mut cmd = Command::new(shell); cmd.arg("-c"); if !prep.args.is_empty() { if prep.command.to_str().map_or(true, |cmd| !cmd.contains("$@")) { diff --git a/gix-command/tests/command.rs b/gix-command/tests/command.rs index 118c593d45a..b73a4299632 100644 --- a/gix-command/tests/command.rs +++ b/gix-command/tests/command.rs @@ -222,10 +222,13 @@ mod context { } mod prepare { - #[cfg(windows)] - const SH: &str = "sh"; - #[cfg(not(windows))] - const SH: &str = "/bin/sh"; + use once_cell::sync::Lazy; + + static SH: Lazy<&'static str> = Lazy::new(|| { + gix_path::env::shell() + .to_str() + .expect("`prepare` tests must be run where 'sh' path is valid Unicode") + }); fn quoted(input: &[&str]) -> String { input.iter().map(|s| format!("\"{s}\"")).collect::>().join(" ") @@ -275,7 +278,7 @@ mod prepare { if cfg!(windows) { quoted(&["ls", "first", "second", "third"]) } else { - quoted(&[SH, "-c", "ls first second third", "--"]) + quoted(&[*SH, "-c", "ls first second third", "--"]) }, "with shell, this works as it performs word splitting" ); @@ -311,7 +314,8 @@ mod prepare { if cfg!(windows) { quoted(&["ls", "--foo", "a b", "additional"]) } else { - format!(r#""{SH}" "-c" "ls --foo \"a b\" \"$@\"" "--" "additional""#) + let sh = *SH; + format!(r#""{sh}" "-c" "ls --foo \"a b\" \"$@\"" "--" "additional""#) }, "with shell, this works as it performs word splitting" ); @@ -334,7 +338,7 @@ mod prepare { let cmd = std::process::Command::from( gix_command::prepare("ls --foo=\"a b\"").command_may_be_shell_script_disallow_manual_argument_splitting(), ); - assert_eq!(format!("{cmd:?}"), quoted(&[SH, "-c", r#"ls --foo=\"a b\""#, "--"])); + assert_eq!(format!("{cmd:?}"), quoted(&[*SH, "-c", r#"ls --foo=\"a b\""#, "--"])); } #[test] @@ -347,7 +351,7 @@ mod prepare { ); assert_eq!( format!("{cmd:?}"), - quoted(&[SH, "-c", "ls \\\"$@\\\"", "--", "--foo=a b"]) + quoted(&[*SH, "-c", "ls \\\"$@\\\"", "--", "--foo=a b"]) ); } @@ -362,7 +366,7 @@ mod prepare { ); assert_eq!( format!("{cmd:?}"), - quoted(&[SH, "-c", "\\'ls\\' \\\"$@\\\"", "--", "--foo=a b"]), + quoted(&[*SH, "-c", "\\'ls\\' \\\"$@\\\"", "--", "--foo=a b"]), "looks strange thanks to debug printing, but is the right amount of quotes actually" ); } @@ -379,7 +383,7 @@ mod prepare { assert_eq!( format!("{cmd:?}"), quoted(&[ - SH, + *SH, "-c", "\\'C:\\\\Users\\\\O\\'\\\\\\'\\'Shaughnessy\\\\with space.exe\\' \\\"$@\\\"", "--", @@ -394,9 +398,10 @@ mod prepare { let cmd = std::process::Command::from( gix_command::prepare("ls --foo=~/path").command_may_be_shell_script_allow_manual_argument_splitting(), ); + let sh = *SH; assert_eq!( format!("{cmd:?}"), - format!(r#""{SH}" "-c" "ls --foo=~/path" "--""#), + format!(r#""{sh}" "-c" "ls --foo=~/path" "--""#), "splitting can also handle quotes" ); } @@ -405,9 +410,10 @@ mod prepare { fn tilde_path_and_multiple_arguments_as_part_of_command_with_shell() { let cmd = std::process::Command::from(gix_command::prepare("~/bin/exe --foo \"a b\"").command_may_be_shell_script()); + let sh = *SH; assert_eq!( format!("{cmd:?}"), - format!(r#""{SH}" "-c" "~/bin/exe --foo \"a b\"" "--""#), + format!(r#""{sh}" "-c" "~/bin/exe --foo \"a b\"" "--""#), "this always needs a shell as we need tilde expansion" ); } @@ -419,9 +425,10 @@ mod prepare { .command_may_be_shell_script() .arg("store"), ); + let sh = *SH; assert_eq!( format!("{cmd:?}"), - format!(r#""{SH}" "-c" "echo \"$@\" >&2" "--" "store""#), + format!(r#""{sh}" "-c" "echo \"$@\" >&2" "--" "store""#), "this is how credential helpers have to work as for some reason they don't get '$@' added in Git.\ We deal with it by not doubling the '$@' argument, which seems more flexible." ); @@ -435,9 +442,10 @@ mod prepare { .with_quoted_command() .arg("store"), ); + let sh = *SH; assert_eq!( format!("{cmd:?}"), - format!(r#""{SH}" "-c" "echo \"$@\" >&2" "--" "store""#) + format!(r#""{sh}" "-c" "echo \"$@\" >&2" "--" "store""#) ); } } From b9371714b74103c89d3ba9a241a92a1bd989fdad Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 17 Feb 2025 13:49:44 -0500 Subject: [PATCH 02/15] Adjust `gix-credentials` tests for `gix-command` changes Because `gix-command` uses `gix_path::env::shell()` to find sh, and `gix-credentials` uses `gix-command`. --- .../tests/program/from_custom_definition.rs | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/gix-credentials/tests/program/from_custom_definition.rs b/gix-credentials/tests/program/from_custom_definition.rs index 25e9dd2ae4b..c746e0b48af 100644 --- a/gix-credentials/tests/program/from_custom_definition.rs +++ b/gix-credentials/tests/program/from_custom_definition.rs @@ -1,12 +1,17 @@ use gix_credentials::{helper, program::Kind, Program}; +use once_cell::sync::Lazy; -static GIT: once_cell::sync::Lazy<&'static str> = - once_cell::sync::Lazy::new(|| gix_path::env::exe_invocation().to_str().expect("not illformed")); +static GIT: once_cell::sync::Lazy<&'static str> = once_cell::sync::Lazy::new(|| { + gix_path::env::exe_invocation() + .to_str() + .expect("some `from_custom_definition` tests must be run where 'git' path is valid Unicode") +}); -#[cfg(windows)] -const SH: &str = "sh"; -#[cfg(not(windows))] -const SH: &str = "/bin/sh"; +static SH: Lazy<&'static str> = Lazy::new(|| { + gix_path::env::shell() + .to_str() + .expect("some `from_custom_definition` tests must be run where 'sh' path is valid Unicode") +}); #[test] fn empty() { @@ -47,11 +52,12 @@ fn name_with_args() { fn name_with_special_args() { let input = "name --arg --bar=~/folder/in/home"; let prog = Program::from_custom_definition(input); + let sh = *SH; let git = *GIT; assert!(matches!(&prog.kind, Kind::ExternalName{name_and_args} if name_and_args == input)); assert_eq!( format!("{:?}", prog.to_command(&helper::Action::Store("egal".into()))), - format!(r#""{SH}" "-c" "{git} credential-name --arg --bar=~/folder/in/home \"$@\"" "--" "store""#) + format!(r#""{sh}" "-c" "{git} credential-name --arg --bar=~/folder/in/home \"$@\"" "--" "store""#) ); } @@ -73,12 +79,13 @@ fn path_with_args_that_definitely_need_shell() { let input = "/abs/name --arg --bar=\"a b\""; let prog = Program::from_custom_definition(input); assert!(matches!(&prog.kind, Kind::ExternalPath{path_and_args} if path_and_args == input)); + let sh = *SH; assert_eq!( format!("{:?}", prog.to_command(&helper::Action::Store("egal".into()))), if cfg!(windows) { r#""/abs/name" "--arg" "--bar=a b" "store""#.to_owned() } else { - format!(r#""{SH}" "-c" "/abs/name --arg --bar=\"a b\" \"$@\"" "--" "store""#) + format!(r#""{sh}" "-c" "/abs/name --arg --bar=\"a b\" \"$@\"" "--" "store""#) } ); } @@ -100,12 +107,13 @@ fn path_with_simple_args() { let input = "/abs/name a b"; let prog = Program::from_custom_definition(input); assert!(matches!(&prog.kind, Kind::ExternalPath{path_and_args} if path_and_args == input)); + let sh = *SH; assert_eq!( format!("{:?}", prog.to_command(&helper::Action::Store("egal".into()))), if cfg!(windows) { r#""/abs/name" "a" "b" "store""#.to_owned() } else { - format!(r#""{SH}" "-c" "/abs/name a b \"$@\"" "--" "store""#) + format!(r#""{sh}" "-c" "/abs/name a b \"$@\"" "--" "store""#) }, "a shell is used as there are arguments, and it's generally more flexible, but on windows we split ourselves" ); From 10af2d005fbe92a289be01492206c6e8a38ab0bd Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 17 Feb 2025 16:11:42 -0500 Subject: [PATCH 03/15] fix: Use `/` in `gix_path::env::shell()` and check existence This makes the path returned by `gix_path::env::shell()` on Windows more usable by: 1. Adding components with `/` separators. While in principle a `\` should work, the path of the shell itself is used in shell scripts (script files and `sh -c` operands) that may not account for the presence of backslashes, and it is also harder to read paths with `\` in contexts where it appears escaped, which may include various messages from Rust code and shell scripts. The path before what we add will already use `/` and never `\`, unless `GIT_EXEC_PATH` has been set to a strange value, because it is based on `git --exec-path`, which by default gives a path with `/` separators. Thus, ensuring that the part we add uses `/` should be sufficient to get a path without `\` in all cases when it is clearly reasonable to do so. This therefore also usually increases stylistic consistency of the path, which is another factor that makes it more user-friendly in messages. This is needed to get tests to pass since changing `gix-command` to use `gix_path::env::shell()` on Windows, where a path is formatted in away that sometimes quotes `\` characters. Their expectations could be adjusted, but it seems likely that various other software, much of which may otherwise be working, has similar expectations. Using `/` instead of `\` works whether `\` is expected to be displayed quoted or not. 2. Check that the path to the shell plausibly has a shell there, only using it if it a file or a non-broken file symlink. When this is not the case, the fallback short name is used instead. 3. The fallback short name is changed from `sh` to `sh.exe`, since the `.exe` suffix is appended in other short names on Windows, such as `git.exe`, as well as being part of the filename component of the path we build for the shell when using the implementation provided as part of Git for Windows. Those changes only affect Windows. This also adds tests for (1) and (2) above, as well as for the expectation that we get an absolute path, to make sure we don't build a path that would be absolute on a Unix-like system but is relative on Windows (a path that starts with just one `/` or `\`). These tests are not Windows-specific, since all these expectations should already hold on Unix-like systems, where currently we are using the hard-coded path `/bin/sh`, which is an absolute path on those systems. (Some Unix-like systems may technically not have `/bin/sh` or it may not be the best path to use for a shell that should be POSIX-compatible, but we are already relying on this, and handling that better is outside the scope of the changes here.) --- gix-path/src/env/mod.rs | 33 +++++++++++++++++++++++++-------- gix-path/tests/path/env.rs | 25 +++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index 71188fb2a17..333527e5eb7 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -30,18 +30,35 @@ pub fn installation_config_prefix() -> Option<&'static Path> { /// Return the shell that Git would use, the shell to execute commands from. /// -/// On Windows, this is the full path to `sh.exe` bundled with Git, and on -/// Unix it's `/bin/sh` as posix compatible shell. -/// If the bundled shell on Windows cannot be found, `sh` is returned as the name of a shell -/// as it could possibly be found in `PATH`. -/// Note that the returned path might not be a path on disk. +/// On Windows, this is the full path to `sh.exe` bundled with Git for Windows if we can find it. +/// If the bundled shell on Windows cannot be found, `sh.exe` is returned as the name of a shell, +/// as it could possibly be found in `PATH`. On Unix it's `/bin/sh` as the POSIX-compatible shell. +/// +/// Note that the returned path might not be a path on disk, if it is a fallback path or if the +/// file was moved or deleted since the first time this function is called. pub fn shell() -> &'static OsStr { static PATH: Lazy = Lazy::new(|| { if cfg!(windows) { core_dir() - .and_then(|p| p.ancestors().nth(3)) // Skip something like mingw64/libexec/git-core. - .map(|p| p.join("usr").join("bin").join("sh.exe")) - .map_or_else(|| OsString::from("sh"), Into::into) + .and_then(|git_core| { + // Go up above something that is expected to be like mingw64/libexec/git-core. + git_core.ancestors().nth(3) + }) + .map(OsString::from) + .map(|mut raw_path| { + // Go down to where `sh.exe` usually is. To avoid breaking shell scripts that + // wrongly assume the shell's own path contains no `\`, as well as to produce + // more readable messages, append literally with `/` separators. The path from + // `git --exec-path` will already have all `/` separators (and no trailing `/`) + // unless it was explicitly overridden to an unusual value via `GIT_EXEC_PATH`. + raw_path.push("/usr/bin/sh.exe"); + raw_path + }) + .filter(|raw_path| { + // Check if there is something that could be a usable shell there. + Path::new(raw_path).is_file() + }) + .unwrap_or_else(|| "sh.exe".into()) } else { "/bin/sh".into() } diff --git a/gix-path/tests/path/env.rs b/gix-path/tests/path/env.rs index 28f23f9cb7b..c3c7019cbce 100644 --- a/gix-path/tests/path/env.rs +++ b/gix-path/tests/path/env.rs @@ -1,3 +1,5 @@ +use std::path::Path; + #[test] fn exe_invocation() { let actual = gix_path::env::exe_invocation(); @@ -10,8 +12,27 @@ fn exe_invocation() { #[test] fn shell() { assert!( - std::path::Path::new(gix_path::env::shell()).exists(), - "On CI and on Unix we'd expect a full path to the shell that exists on disk" + Path::new(gix_path::env::shell()).exists(), + "On CI and on Unix we expect a usable path to the shell that exists on disk" + ); +} + +#[test] +fn shell_absolute() { + assert!( + Path::new(gix_path::env::shell()).is_absolute(), + "On CI and on Unix we currently expect the path to the shell always to be absolute" + ); +} + +#[test] +fn shell_unix_path() { + let shell = gix_path::env::shell() + .to_str() + .expect("This test depends on the shell path being valid Unicode"); + assert!( + !shell.contains('\\'), + "The path to the shell should have no backslashes, barring strange `GIT_EXEC_PATH` values" ); } From da7d70ede6fe9fe10a0500c284d8f27610f613df Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 17 Feb 2025 16:30:59 -0500 Subject: [PATCH 04/15] Revise `gix_path::env` docstrings for clarity --- gix-path/src/env/mod.rs | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index 333527e5eb7..78e2da294c4 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -67,14 +67,16 @@ pub fn shell() -> &'static OsStr { } /// Return the name of the Git executable to invoke it. +/// /// If it's in the `PATH`, it will always be a short name. /// /// Note that on Windows, we will find the executable in the `PATH` if it exists there, or search it /// in alternative locations which when found yields the full path to it. pub fn exe_invocation() -> &'static Path { if cfg!(windows) { - /// The path to the Git executable as located in the `PATH` or in other locations that it's known to be installed to. - /// It's `None` if environment variables couldn't be read or if no executable could be found. + /// The path to the Git executable as located in the `PATH` or in other locations that it's + /// known to be installed to. It's `None` if environment variables couldn't be read or if + /// no executable could be found. static EXECUTABLE_PATH: Lazy> = Lazy::new(|| { std::env::split_paths(&std::env::var_os("PATH")?) .chain(git::ALTERNATIVE_LOCATIONS.iter().map(Into::into)) @@ -99,11 +101,11 @@ pub fn exe_invocation() -> &'static Path { } } -/// Returns the fully qualified path in the *xdg-home* directory (or equivalent in the home dir) to `file`, -/// accessing `env_var()` to learn where these bases are. +/// Returns the fully qualified path in the *xdg-home* directory (or equivalent in the home dir) to +/// `file`, accessing `env_var()` to learn where these bases are. /// -/// Note that the `HOME` directory should ultimately come from [`home_dir()`] as it handles windows correctly. -/// The same can be achieved by using [`var()`] as `env_var`. +/// Note that the `HOME` directory should ultimately come from [`home_dir()`] as it handles Windows +/// correctly. The same can be achieved by using [`var()`] as `env_var`. pub fn xdg_config(file: &str, env_var: &mut dyn FnMut(&str) -> Option) -> Option { env_var("XDG_CONFIG_HOME") .map(|home| { @@ -153,17 +155,17 @@ pub fn core_dir() -> Option<&'static Path> { GIT_CORE_DIR.as_deref() } -/// Returns the platform dependent system prefix or `None` if it cannot be found (right now only on windows). +/// Returns the platform dependent system prefix or `None` if it cannot be found (right now only on Windows). /// /// ### Performance /// -/// On windows, the slowest part is the launch of the Git executable in the PATH, which only happens when launched -/// from outside of the `msys2` shell. +/// On Windows, the slowest part is the launch of the Git executable in the PATH. This is often +/// avoided by inspecting the environment, when launched from inside a Git Bash MSYS2 shell. /// /// ### When `None` is returned /// -/// This happens only windows if the git binary can't be found at all for obtaining its executable path, or if the git binary -/// wasn't built with a well-known directory structure or environment. +/// This happens only Windows if the git binary can't be found at all for obtaining its executable +/// path, or if the git binary wasn't built with a well-known directory structure or environment. pub fn system_prefix() -> Option<&'static Path> { if cfg!(windows) { static PREFIX: Lazy> = Lazy::new(|| { @@ -194,19 +196,20 @@ pub fn home_dir() -> Option { std::env::var("HOME").map(PathBuf::from).ok() } -/// Tries to obtain the home directory from `HOME` on all platforms, but falls back to [`home::home_dir()`] for -/// more complex ways of obtaining a home directory, particularly useful on Windows. +/// Tries to obtain the home directory from `HOME` on all platforms, but falls back to +/// [`home::home_dir()`] for more complex ways of obtaining a home directory, particularly useful +/// on Windows. /// -/// The reason `HOME` is tried first is to allow Windows users to have a custom location for their linux-style -/// home, as otherwise they would have to accumulate dot files in a directory these are inconvenient and perceived -/// as clutter. +/// The reason `HOME` is tried first is to allow Windows users to have a custom location for their +/// linux-style home, as otherwise they would have to accumulate dot files in a directory these are +/// inconvenient and perceived as clutter. #[cfg(not(target_family = "wasm"))] pub fn home_dir() -> Option { std::env::var_os("HOME").map(Into::into).or_else(home::home_dir) } -/// Returns the contents of an environment variable of `name` with some special handling -/// for certain environment variables (like `HOME`) for platform compatibility. +/// Returns the contents of an environment variable of `name` with some special handling for +/// certain environment variables (like `HOME`) for platform compatibility. pub fn var(name: &str) -> Option { if name == "HOME" { home_dir().map(PathBuf::into_os_string) From 1f269b0d5aa958f25423db1f83d144781bf22024 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 2 Mar 2025 05:58:02 -0500 Subject: [PATCH 05/15] fix: Check prefix and prefer shim in `gix_path::env::shell()` This makes a few changes to make `shell()` more robust: 1. Check the last two components of the path `git --exec-path` gave, to make sure they are `libexec/git-core`. (The check is done in such a way that the separator may be `/` or `\`, though a `\` separator here would be unexpected. We permit it because it may plausibly be present due to an overriden `GIT_EXEC_PATH` that breaks with Git's own behavior of using `/` but that is otherwise fully usable.) If the directory is not named `git-core`, or it is a top-level directory (no parent), or its parent is not named `libexec`, then it is not reasonable to guess that this is in a directory where it would be safe to use `sh.exe` in the expected relative location. (Even if safe, such a layout does not suggest that a `sh.exe` found in it would be better choice than the fallback of just doing a `PATH` search.) 2. Check the grandparent component (that `../..` would go to) of the path `git --exec-path` gave, to make sure it is recognized name of a platform-specific `usr`-like directory that has been used in MSYS2. This is to avoid traversing up out of less common directory trees that have some different and shallower structure than found in a typical Git for Windows or MSYS2 installation. 3. Instead of using only the `(git root)/usr/bin/sh.exe` non-shim, prefer the `(git root)/bin/sh.exe` shim. If that is not found, fall back to the `(git root)/usr/bin/sh.exe` non-shim, mainly to support the Git for Windows SDK, which doesn't have the shim. The reason to prefer the shim is that it sets environment variables, including prepending `bin` directories that provide tools one would expect to have when using it. Without this, common POSIX commands may be unavailable, or different and incompatible implementations of them may be found. In particular, if they are found in a different MSYS2 installation whose `msys-2.0.dll` is of a different version or otherwise a different build, then calling them directly may produce strange behavior. See: - https://cygwin.com/faq.html#faq.using.multiple-copies - https://github.com/GitoxideLabs/gitoxide/pull/1862#issuecomment-2692158831 This makes things more robust overall than either preferring the non-shim or just doing a path search for `sh` as was done before that. But it exacerbates #1868 (as described there), so if the Git for Windows `sh.exe` shim continues to work as it currently does, then further improvements may be called for here. --- gix-path/src/env/mod.rs | 43 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index 78e2da294c4..3ca91e6b2c6 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -39,25 +39,42 @@ pub fn installation_config_prefix() -> Option<&'static Path> { pub fn shell() -> &'static OsStr { static PATH: Lazy = Lazy::new(|| { if cfg!(windows) { + const MSYS_PREFIX_NAMES: &[&str] = &[ + "mingw64", + "mingw32", + "clangarm64", + "clang64", + "clang32", + "ucrt64", + "usr", + ]; + const RAW_SUFFIXES: &[&str] = &[ + "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. + "/usr/bin/sh.exe", + ]; + fn raw_join(path: &Path, raw_suffix: &str) -> OsString { + let mut raw_path = OsString::from(path); + raw_path.push(raw_suffix); + raw_path + } core_dir() - .and_then(|git_core| { - // Go up above something that is expected to be like mingw64/libexec/git-core. - git_core.ancestors().nth(3) + .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) + .and_then(|core| core.ancestors().nth(2)) + .filter(|prefix| { + // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. + MSYS_PREFIX_NAMES.iter().any(|name| prefix.ends_with(name)) }) - .map(OsString::from) - .map(|mut raw_path| { - // Go down to where `sh.exe` usually is. To avoid breaking shell scripts that - // wrongly assume the shell's own path contains no `\`, as well as to produce + .and_then(|prefix| prefix.parent()) + .into_iter() + .flat_map(|git_root| { + // Enumerate the locations where `sh.exe` usually is. To avoid breaking shell + // scripts that assume the shell's own path contains no `\`, and to produce // more readable messages, append literally with `/` separators. The path from // `git --exec-path` will already have all `/` separators (and no trailing `/`) // unless it was explicitly overridden to an unusual value via `GIT_EXEC_PATH`. - raw_path.push("/usr/bin/sh.exe"); - raw_path - }) - .filter(|raw_path| { - // Check if there is something that could be a usable shell there. - Path::new(raw_path).is_file() + RAW_SUFFIXES.iter().map(|raw_suffix| raw_join(git_root, raw_suffix)) }) + .find(|raw_path| Path::new(raw_path).is_file()) .unwrap_or_else(|| "sh.exe".into()) } else { "/bin/sh".into() From 83574e1636e3a25b48c3c6198e8c17e6e81d04e9 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 2 Mar 2025 23:05:00 -0500 Subject: [PATCH 06/15] Don't match `libexec/git-core` in `usr`; refactor - Allowing `usr` as a `` prefix is more likely to produce desired than undesired behavior. But due to the ambiguity of the name `usr` on non-Unix systems, maybe this would lead to problems that are relevant to security. The concern is described and the `usr` prefix, which is never needed to find the shell in a Git for Windows installation, is no longer matched. Note that this only affects it as a path component that `libexec/git-core` is found initially to be in. We do still use /libexec/git-core/../../../usr/bin/sh.exe if we don't find something we can plausibly use at: `/libexec/git-core/../../../bin/sh.exe The helper docstring explains why a security model under which this is reasonable does necessarily entail that it is reasonable to allow a `` of `usr`, *even though* in the path with `usr` that we form, it is `usr` in the `` position. With that said, and even though it does not help find `sh` from Git for Windows, hopefully future research can establish that it is safe to treat `usr/libexec/git-core` the same as platform prefixes like `mingw64/libexec/git-core` and it can be enabled. - Start refactoring by extracting and renaming the recently introduced helper constants and functions. - Extract most of the `cfg!(windows)` branch in `shell()` itself, even with its helpers already extracted, to make it a helper function as well. - Document the recently introduced helper constants. --- gix-path/src/env/mod.rs | 116 +++++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 37 deletions(-) diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index 3ca91e6b2c6..0b27712d969 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -28,6 +28,84 @@ pub fn installation_config_prefix() -> Option<&'static Path> { installation_config().map(git::config_to_base_path) } +/// `usr`-like directory component names that MSYS2 may provide, other than for `/usr` itself. +/// +/// These are the values of the "Prefix" column of the "Environments" and "Legacy Environments" +/// tables in the [MSYS2 Environments](https://www.msys2.org/docs/environments/) documentation, +/// with the leading `/` separator removed, except that this does not list `usr` itself. +/// +/// On Windows, we prefer to use `sh` as provided by Git for Windows, when present. To find it, we +/// run `git --exec-path` to get a path that is usually `/libexec/git-core` in the Git +/// for Windows installation, where `` is something like `mingw64`. It is also acceptable +/// to find `sh` in an environment not provided by Git for Windows, such as an independent MSYS2 +/// environment in which a `git` package has been installed. However, in an unusual installation, +/// or if the user has set a custom value of `GIT_EXEC_PATH`, the output of `git --exec-path` may +/// take a form other than `/libexec/git-core`, such that finding shell at a location +/// like `../../../bin/sh.exe` relative to it should not be attempted. We lower the risk by +/// checking that `` is a plausible value that is not likely to have any other meaning. +/// +/// This involves two tradeoffs. First, it may be reasonable to find `sh.exe` in an environment +/// that is not MSYS2 at all, for which in principle the prefix could be different. But listing +/// more prefixes or matching a broad pattern of platform-like strings might be too broad. So only +/// prefixes that have been used in MSYS2 are considered. +/// +/// Second, we don't recognize `usr` itself here, even though is a plausible prefix. In MSYS2, it +/// is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike the +/// `` names we recognize, `usr` also has an effectively unbounded range of plausible +/// meanings on non-Unix systems, which may occasionally relate to subdirectories whose contents +/// are controlled by different user accounts. +/// +/// If we start with a `libexec/git-core` directory that we already use and trust, and it is in a +/// directory with a name like `mingw64`, we infer that this `mingw64` directory has the expected +/// meaning and that its `usr` sibling, if present, is acceptable to treat as though it is a +/// first-level directory inside an MSYS2-like tree. So we are willing to traverse down to +/// `usr/sh.exe` and attempt to use it. But if the `libexec/git-core` we use and trust is inside a +/// directory named `usr`, that `usr` directory may still not have the meaning we expect of `usr`. +/// +/// The conditions for a privilege escalation attack or other serious malfunction seem unlikely. If +/// research indicates the risk is low enough, `usr` may be added. But for now it is omitted. +const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang64", "clang32", "ucrt64"]; + +/// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. +/// +/// These look like absolute Unix-style paths, but the leading `/` separators are present because +/// they simplify forming paths like `C:/Program Files/Git` obtained by removing trailing +/// components from the output of `git --exec-path`. +const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &[ + "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. + "/usr/bin/sh.exe", +]; + +/// +fn raw_join(path: &Path, raw_suffix: &str) -> OsString { + let mut raw_path = OsString::from(path); + raw_path.push(raw_suffix); + raw_path +} + +/// +fn find_sh_on_windows() -> Option { + core_dir() + .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) + .and_then(|core| core.ancestors().nth(2)) + .filter(|prefix| { + // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. + MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) + }) + .and_then(|prefix| prefix.parent()) + .into_iter() + .flat_map(|git_root| { + // Enumerate locations where `sh.exe` usually is. To avoid breaking scripts that assume the + // shell's own path contains no `\`, and so messages are more readable, append literally + // with `/` separators. The path from `git --exec-path` already uses `/` separators (and no + // trailing `/`) unless explicitly overridden to an unusual value via `GIT_EXEC_PATH`. + RAW_SH_EXE_PATH_SUFFIXES + .iter() + .map(|raw_suffix| raw_join(git_root, raw_suffix)) + }) + .find(|raw_path| Path::new(raw_path).is_file()) +} + /// Return the shell that Git would use, the shell to execute commands from. /// /// On Windows, this is the full path to `sh.exe` bundled with Git for Windows if we can find it. @@ -39,43 +117,7 @@ pub fn installation_config_prefix() -> Option<&'static Path> { pub fn shell() -> &'static OsStr { static PATH: Lazy = Lazy::new(|| { if cfg!(windows) { - const MSYS_PREFIX_NAMES: &[&str] = &[ - "mingw64", - "mingw32", - "clangarm64", - "clang64", - "clang32", - "ucrt64", - "usr", - ]; - const RAW_SUFFIXES: &[&str] = &[ - "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. - "/usr/bin/sh.exe", - ]; - fn raw_join(path: &Path, raw_suffix: &str) -> OsString { - let mut raw_path = OsString::from(path); - raw_path.push(raw_suffix); - raw_path - } - core_dir() - .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) - .and_then(|core| core.ancestors().nth(2)) - .filter(|prefix| { - // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. - MSYS_PREFIX_NAMES.iter().any(|name| prefix.ends_with(name)) - }) - .and_then(|prefix| prefix.parent()) - .into_iter() - .flat_map(|git_root| { - // Enumerate the locations where `sh.exe` usually is. To avoid breaking shell - // scripts that assume the shell's own path contains no `\`, and to produce - // more readable messages, append literally with `/` separators. The path from - // `git --exec-path` will already have all `/` separators (and no trailing `/`) - // unless it was explicitly overridden to an unusual value via `GIT_EXEC_PATH`. - RAW_SUFFIXES.iter().map(|raw_suffix| raw_join(git_root, raw_suffix)) - }) - .find(|raw_path| Path::new(raw_path).is_file()) - .unwrap_or_else(|| "sh.exe".into()) + find_sh_on_windows().unwrap_or_else(|| "sh.exe".into()) } else { "/bin/sh".into() } From 0b75b231c48ef2509a3cef0a8a719418b4ff9d45 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 00:35:15 -0500 Subject: [PATCH 07/15] Move `shell()` helpers to a helper module The new `auxiliary` module within `gix_path::env` is a sibling of the `git` module and is similarly an implementation detail only. So far the only "auxiliary" program this module finds is `sh`. The module is named `auxiliary` rather than `aux` because Windows has problems with files named like `aux` or `aux.rs`, due to `AUX` being a reserved device name. --- gix-path/src/env/auxiliary.rs | 80 ++++++++++++++++++++++++++++++++++ gix-path/src/env/mod.rs | 81 +---------------------------------- 2 files changed, 82 insertions(+), 79 deletions(-) create mode 100644 gix-path/src/env/auxiliary.rs diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs new file mode 100644 index 00000000000..d6285366799 --- /dev/null +++ b/gix-path/src/env/auxiliary.rs @@ -0,0 +1,80 @@ +use std::ffi::OsString; +use std::path::Path; + +/// `usr`-like directory component names that MSYS2 may provide, other than for `/usr` itself. +/// +/// These are the values of the "Prefix" column of the "Environments" and "Legacy Environments" +/// tables in the [MSYS2 Environments](https://www.msys2.org/docs/environments/) documentation, +/// with the leading `/` separator removed, except that this does not list `usr` itself. +/// +/// On Windows, we prefer to use `sh` as provided by Git for Windows, when present. To find it, we +/// run `git --exec-path` to get a path that is usually `/libexec/git-core` in the Git +/// for Windows installation, where `` is something like `mingw64`. It is also acceptable +/// to find `sh` in an environment not provided by Git for Windows, such as an independent MSYS2 +/// environment in which a `git` package has been installed. However, in an unusual installation, +/// or if the user has set a custom value of `GIT_EXEC_PATH`, the output of `git --exec-path` may +/// take a form other than `/libexec/git-core`, such that finding shell at a location +/// like `../../../bin/sh.exe` relative to it should not be attempted. We lower the risk by +/// checking that `` is a plausible value that is not likely to have any other meaning. +/// +/// This involves two tradeoffs. First, it may be reasonable to find `sh.exe` in an environment +/// that is not MSYS2 at all, for which in principle the prefix could be different. But listing +/// more prefixes or matching a broad pattern of platform-like strings might be too broad. So only +/// prefixes that have been used in MSYS2 are considered. +/// +/// Second, we don't recognize `usr` itself here, even though is a plausible prefix. In MSYS2, it +/// is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike the +/// `` names we recognize, `usr` also has an effectively unbounded range of plausible +/// meanings on non-Unix systems, which may occasionally relate to subdirectories whose contents +/// are controlled by different user accounts. +/// +/// If we start with a `libexec/git-core` directory that we already use and trust, and it is in a +/// directory with a name like `mingw64`, we infer that this `mingw64` directory has the expected +/// meaning and that its `usr` sibling, if present, is acceptable to treat as though it is a +/// first-level directory inside an MSYS2-like tree. So we are willing to traverse down to +/// `usr/sh.exe` and attempt to use it. But if the `libexec/git-core` we use and trust is inside a +/// directory named `usr`, that `usr` directory may still not have the meaning we expect of `usr`. +/// +/// The conditions for a privilege escalation attack or other serious malfunction seem unlikely. If +/// research indicates the risk is low enough, `usr` may be added. But for now it is omitted. +const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang64", "clang32", "ucrt64"]; + +/// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. +/// +/// These look like absolute Unix-style paths, but the leading `/` separators are present because +/// they simplify forming paths like `C:/Program Files/Git` obtained by removing trailing +/// components from the output of `git --exec-path`. +const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &[ + "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. + "/usr/bin/sh.exe", +]; + +/// +fn raw_join(path: &Path, raw_suffix: &str) -> OsString { + let mut raw_path = OsString::from(path); + raw_path.push(raw_suffix); + raw_path +} + +/// +pub(super) fn find_sh_on_windows() -> Option { + super::core_dir() + .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) + .and_then(|core| core.ancestors().nth(2)) + .filter(|prefix| { + // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. + MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) + }) + .and_then(|prefix| prefix.parent()) + .into_iter() + .flat_map(|git_root| { + // Enumerate locations where `sh.exe` usually is. To avoid breaking scripts that assume the + // shell's own path contains no `\`, and so messages are more readable, append literally + // with `/` separators. The path from `git --exec-path` already uses `/` separators (and no + // trailing `/`) unless explicitly overridden to an unusual value via `GIT_EXEC_PATH`. + RAW_SH_EXE_PATH_SUFFIXES + .iter() + .map(|raw_suffix| raw_join(git_root, raw_suffix)) + }) + .find(|raw_path| Path::new(raw_path).is_file()) +} diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index 0b27712d969..bb0dd9bd6f9 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -6,6 +6,7 @@ use once_cell::sync::Lazy; use crate::env::git::EXE_NAME; +mod auxiliary; mod git; /// Return the location at which installation specific git configuration file can be found, or `None` @@ -28,84 +29,6 @@ pub fn installation_config_prefix() -> Option<&'static Path> { installation_config().map(git::config_to_base_path) } -/// `usr`-like directory component names that MSYS2 may provide, other than for `/usr` itself. -/// -/// These are the values of the "Prefix" column of the "Environments" and "Legacy Environments" -/// tables in the [MSYS2 Environments](https://www.msys2.org/docs/environments/) documentation, -/// with the leading `/` separator removed, except that this does not list `usr` itself. -/// -/// On Windows, we prefer to use `sh` as provided by Git for Windows, when present. To find it, we -/// run `git --exec-path` to get a path that is usually `/libexec/git-core` in the Git -/// for Windows installation, where `` is something like `mingw64`. It is also acceptable -/// to find `sh` in an environment not provided by Git for Windows, such as an independent MSYS2 -/// environment in which a `git` package has been installed. However, in an unusual installation, -/// or if the user has set a custom value of `GIT_EXEC_PATH`, the output of `git --exec-path` may -/// take a form other than `/libexec/git-core`, such that finding shell at a location -/// like `../../../bin/sh.exe` relative to it should not be attempted. We lower the risk by -/// checking that `` is a plausible value that is not likely to have any other meaning. -/// -/// This involves two tradeoffs. First, it may be reasonable to find `sh.exe` in an environment -/// that is not MSYS2 at all, for which in principle the prefix could be different. But listing -/// more prefixes or matching a broad pattern of platform-like strings might be too broad. So only -/// prefixes that have been used in MSYS2 are considered. -/// -/// Second, we don't recognize `usr` itself here, even though is a plausible prefix. In MSYS2, it -/// is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike the -/// `` names we recognize, `usr` also has an effectively unbounded range of plausible -/// meanings on non-Unix systems, which may occasionally relate to subdirectories whose contents -/// are controlled by different user accounts. -/// -/// If we start with a `libexec/git-core` directory that we already use and trust, and it is in a -/// directory with a name like `mingw64`, we infer that this `mingw64` directory has the expected -/// meaning and that its `usr` sibling, if present, is acceptable to treat as though it is a -/// first-level directory inside an MSYS2-like tree. So we are willing to traverse down to -/// `usr/sh.exe` and attempt to use it. But if the `libexec/git-core` we use and trust is inside a -/// directory named `usr`, that `usr` directory may still not have the meaning we expect of `usr`. -/// -/// The conditions for a privilege escalation attack or other serious malfunction seem unlikely. If -/// research indicates the risk is low enough, `usr` may be added. But for now it is omitted. -const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang64", "clang32", "ucrt64"]; - -/// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. -/// -/// These look like absolute Unix-style paths, but the leading `/` separators are present because -/// they simplify forming paths like `C:/Program Files/Git` obtained by removing trailing -/// components from the output of `git --exec-path`. -const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &[ - "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. - "/usr/bin/sh.exe", -]; - -/// -fn raw_join(path: &Path, raw_suffix: &str) -> OsString { - let mut raw_path = OsString::from(path); - raw_path.push(raw_suffix); - raw_path -} - -/// -fn find_sh_on_windows() -> Option { - core_dir() - .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) - .and_then(|core| core.ancestors().nth(2)) - .filter(|prefix| { - // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. - MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) - }) - .and_then(|prefix| prefix.parent()) - .into_iter() - .flat_map(|git_root| { - // Enumerate locations where `sh.exe` usually is. To avoid breaking scripts that assume the - // shell's own path contains no `\`, and so messages are more readable, append literally - // with `/` separators. The path from `git --exec-path` already uses `/` separators (and no - // trailing `/`) unless explicitly overridden to an unusual value via `GIT_EXEC_PATH`. - RAW_SH_EXE_PATH_SUFFIXES - .iter() - .map(|raw_suffix| raw_join(git_root, raw_suffix)) - }) - .find(|raw_path| Path::new(raw_path).is_file()) -} - /// Return the shell that Git would use, the shell to execute commands from. /// /// On Windows, this is the full path to `sh.exe` bundled with Git for Windows if we can find it. @@ -117,7 +40,7 @@ fn find_sh_on_windows() -> Option { pub fn shell() -> &'static OsStr { static PATH: Lazy = Lazy::new(|| { if cfg!(windows) { - find_sh_on_windows().unwrap_or_else(|| "sh.exe".into()) + auxiliary::find_sh_on_windows().unwrap_or_else(|| "sh.exe".into()) } else { "/bin/sh".into() } From 9d9ec58d5ee77873399b51faa9d5e8ddd1e82f30 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 01:13:46 -0500 Subject: [PATCH 08/15] Divide helpers more logically, expand doc comments This also uses `once_cell::sync::Lazy` for the inferred Git for Windows installation directory based on `git --exec-path` output, though it is only used in one function that is itself only used by a caller that itself caches, so it too is so far effectively just a refactoring. --- gix-path/src/env/auxiliary.rs | 57 ++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index d6285366799..3b260ed6dc9 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -1,5 +1,7 @@ use std::ffi::OsString; -use std::path::Path; +use std::path::{Path, PathBuf}; + +use once_cell::sync::Lazy; /// `usr`-like directory component names that MSYS2 may provide, other than for `/usr` itself. /// @@ -25,20 +27,48 @@ use std::path::Path; /// Second, we don't recognize `usr` itself here, even though is a plausible prefix. In MSYS2, it /// is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike the /// `` names we recognize, `usr` also has an effectively unbounded range of plausible -/// meanings on non-Unix systems, which may occasionally relate to subdirectories whose contents -/// are controlled by different user accounts. +/// meanings on non-Unix systems (for example, what should we take `Z:\usr` to mean?), which might +/// occasionally relate to subdirectories with contents controlled by different *user accounts*. /// /// If we start with a `libexec/git-core` directory that we already use and trust, and it is in a /// directory with a name like `mingw64`, we infer that this `mingw64` directory has the expected -/// meaning and that its `usr` sibling, if present, is acceptable to treat as though it is a -/// first-level directory inside an MSYS2-like tree. So we are willing to traverse down to -/// `usr/sh.exe` and attempt to use it. But if the `libexec/git-core` we use and trust is inside a +/// meaning and accordingly infer that its `usr` sibling, if present, is acceptable to treat as +/// though it is a first-level directory inside an MSYS2-like tree. So we are willing to traverse +/// down to `usr/sh.exe` and try to use it. But if the `libexec/git-core` we use and trust is in a /// directory named `usr`, that `usr` directory may still not have the meaning we expect of `usr`. /// -/// The conditions for a privilege escalation attack or other serious malfunction seem unlikely. If -/// research indicates the risk is low enough, `usr` may be added. But for now it is omitted. +/// Conditions for a privilege escalation attack or other serious malfunction seem far-fetched. If +/// further research finds the risk is low enough, `usr` may be added. But for now it is omitted. const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang64", "clang32", "ucrt64"]; +/// Find a Git for Windows installation directory based on `git --exec-path` output. +/// +/// Currently this is used only for finding the path to an `sh.exe` associated with Git. This is +/// separate from `installation_config()` and `installation_config_prefix()` in `gix_path::env`, +/// which guess where `etc/gitconfig` is based on `EXEPATH` or the location of the highest-scope +/// config file. +/// +/// The techniques might be combined or unified in some way in the future. The techniques those +/// functions currently use shouldn't be used to find `sh.exe`, because `EXEPATH` can take on other +/// values in some environments, and the highest scope config file may be unavailable or in another +/// location if `GIT_CONFIG_SYSTEM` or `GIT_CONFIG_NOSYSTEM` are set or if there are no variables +/// of system scope. Then paths found relative to it could be different. In contrast, the technique +/// used here may be usable for those functions, though may need to cover more directory layouts. +fn git_for_windows_root() -> Option<&'static Path> { + static GIT_ROOT: Lazy> = Lazy::new(|| { + super::core_dir() + .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) + .and_then(|core| core.ancestors().nth(2)) + .filter(|prefix| { + // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. + MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) + }) + .and_then(|prefix| prefix.parent()) + .map(Into::into) + }); + GIT_ROOT.as_deref() +} + /// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. /// /// These look like absolute Unix-style paths, but the leading `/` separators are present because @@ -56,16 +86,9 @@ fn raw_join(path: &Path, raw_suffix: &str) -> OsString { raw_path } -/// +/// Obtain a path to a `sh.exe` on Windows associated with Git, if one can be found. pub(super) fn find_sh_on_windows() -> Option { - super::core_dir() - .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) - .and_then(|core| core.ancestors().nth(2)) - .filter(|prefix| { - // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. - MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) - }) - .and_then(|prefix| prefix.parent()) + git_for_windows_root() .into_iter() .flat_map(|git_root| { // Enumerate locations where `sh.exe` usually is. To avoid breaking scripts that assume the From bd267454e4261ea48e6a35b25ea5febffc0187b3 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 01:19:15 -0500 Subject: [PATCH 09/15] Remove a potentially misleading docstring paragraph That was just added in the previous commit. `installation_config()` and `installation_config_prefix()` should never use, as their only strategy, the technique implemented in the newly introduced `git_for_windows_root()` helper, because if the system-scope configuration file is present elsewhere, including due to `GIT_CONFIG_SYSTEM` being set, then that *should* affect the values returned by those functions (except on Apple Git and any other systems where a separate higher "unknown" scope exists and is typically nonempty). Rather, some uses of `installation_config_prefix()`, such as to find other files that are better looked for relative to installed non-configuration files or installation directory itself rather than relative to its configuration file, might in the future be suitable to look fo9r using `git_for_windows_root()`. --- gix-path/src/env/auxiliary.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 3b260ed6dc9..6f912aa99c8 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -44,16 +44,7 @@ const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang6 /// Find a Git for Windows installation directory based on `git --exec-path` output. /// /// Currently this is used only for finding the path to an `sh.exe` associated with Git. This is -/// separate from `installation_config()` and `installation_config_prefix()` in `gix_path::env`, -/// which guess where `etc/gitconfig` is based on `EXEPATH` or the location of the highest-scope -/// config file. -/// -/// The techniques might be combined or unified in some way in the future. The techniques those -/// functions currently use shouldn't be used to find `sh.exe`, because `EXEPATH` can take on other -/// values in some environments, and the highest scope config file may be unavailable or in another -/// location if `GIT_CONFIG_SYSTEM` or `GIT_CONFIG_NOSYSTEM` are set or if there are no variables -/// of system scope. Then paths found relative to it could be different. In contrast, the technique -/// used here may be usable for those functions, though may need to cover more directory layouts. +/// separate from `installation_config()` and `installation_config_prefix()` in `gix_path::env`. fn git_for_windows_root() -> Option<&'static Path> { static GIT_ROOT: Lazy> = Lazy::new(|| { super::core_dir() From 0a6a0568ce91534f551e87270ac076b79bf5c204 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 02:16:59 -0500 Subject: [PATCH 10/15] Simplify code and comments --- gix-path/src/env/auxiliary.rs | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 6f912aa99c8..6df460acd40 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -62,15 +62,13 @@ fn git_for_windows_root() -> Option<&'static Path> { /// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. /// -/// These look like absolute Unix-style paths, but the leading `/` separators are present because -/// they simplify forming paths like `C:/Program Files/Git` obtained by removing trailing -/// components from the output of `git --exec-path`. -const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &[ - "/bin/sh.exe", // Usually a shim, which currently we prefer, if available. - "/usr/bin/sh.exe", -]; - +/// When appended to the root of a Git for Windows installation, these are locations where `sh.exe` +/// can usually be found. The leading `/` allow these to be used (only) with `raw_join()`. /// +/// These are ordered so that a shim is preferred over a non-shim when they are tried in order. +const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &["/bin/sh.exe", "/usr/bin/sh.exe"]; + +/// Concatenate a path by appending a raw suffix, which must contain its own leading separator. fn raw_join(path: &Path, raw_suffix: &str) -> OsString { let mut raw_path = OsString::from(path); raw_path.push(raw_suffix); @@ -78,17 +76,14 @@ fn raw_join(path: &Path, raw_suffix: &str) -> OsString { } /// Obtain a path to a `sh.exe` on Windows associated with Git, if one can be found. +/// +/// The resulting path uses only `/` separators so long as the path obtained from `git --exec-path` +/// does, which is the case unless it is overridden by setting `GIT_EXEC_PATH` to an unusual value. pub(super) fn find_sh_on_windows() -> Option { - git_for_windows_root() - .into_iter() - .flat_map(|git_root| { - // Enumerate locations where `sh.exe` usually is. To avoid breaking scripts that assume the - // shell's own path contains no `\`, and so messages are more readable, append literally - // with `/` separators. The path from `git --exec-path` already uses `/` separators (and no - // trailing `/`) unless explicitly overridden to an unusual value via `GIT_EXEC_PATH`. - RAW_SH_EXE_PATH_SUFFIXES - .iter() - .map(|raw_suffix| raw_join(git_root, raw_suffix)) - }) + let git_root = git_for_windows_root()?; + + RAW_SH_EXE_PATH_SUFFIXES + .iter() + .map(|raw_suffix| raw_join(git_root, raw_suffix)) .find(|raw_path| Path::new(raw_path).is_file()) } From 17b5c31978a7868b2e2f36cd1c5f57c7a23eab1a Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 02:58:25 -0500 Subject: [PATCH 11/15] Generalize the `gix_path::env::auxiliary` helpers They are now in a form where they take the name of the command to be found and look for it in a way that would find many of the binaries shipped with and reasonable to use with Git for Windows. However, as noted in comments, further refinement as well as new tests would be called for if using it for purposes other than to find `sh`. For now it remains private with `gix_path::env` and is only used to find `sh`, as an implementation detail of `gix_path::shell()`. --- gix-path/src/env/auxiliary.rs | 52 ++++++++++++++++++++++------------- gix-path/src/env/mod.rs | 2 +- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 6df460acd40..3e529e21cc5 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -24,9 +24,9 @@ use once_cell::sync::Lazy; /// more prefixes or matching a broad pattern of platform-like strings might be too broad. So only /// prefixes that have been used in MSYS2 are considered. /// -/// Second, we don't recognize `usr` itself here, even though is a plausible prefix. In MSYS2, it -/// is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike the -/// `` names we recognize, `usr` also has an effectively unbounded range of plausible +/// Second, we don't recognize `usr` itself here, even though it is a plausible prefix. In MSYS2, +/// it is the prefix for MSYS2 non-native programs, i.e. those that use `msys-2.0.dll`. But unlike +/// the `` names we recognize, `usr` also has an effectively unbounded range of plausible /// meanings on non-Unix systems (for example, what should we take `Z:\usr` to mean?), which might /// occasionally relate to subdirectories with contents controlled by different *user accounts*. /// @@ -60,30 +60,44 @@ fn git_for_windows_root() -> Option<&'static Path> { GIT_ROOT.as_deref() } -/// Shell path fragments to concatenate to the root of a Git for Windows or MSYS2 installation. -/// -/// When appended to the root of a Git for Windows installation, these are locations where `sh.exe` -/// can usually be found. The leading `/` allow these to be used (only) with `raw_join()`. +/// `bin` directory paths to try relative to the root of a Git for Windows or MSYS2 installation. /// /// These are ordered so that a shim is preferred over a non-shim when they are tried in order. -const RAW_SH_EXE_PATH_SUFFIXES: &[&str] = &["/bin/sh.exe", "/usr/bin/sh.exe"]; - -/// Concatenate a path by appending a raw suffix, which must contain its own leading separator. -fn raw_join(path: &Path, raw_suffix: &str) -> OsString { - let mut raw_path = OsString::from(path); - raw_path.push(raw_suffix); - raw_path -} +const BIN_DIR_FRAGMENTS: &[&str] = &["bin", "usr/bin"]; -/// Obtain a path to a `sh.exe` on Windows associated with Git, if one can be found. +/// Obtain a path to an executable command on Windows associated with Git, if one can be found. /// /// The resulting path uses only `/` separators so long as the path obtained from `git --exec-path` /// does, which is the case unless it is overridden by setting `GIT_EXEC_PATH` to an unusual value. -pub(super) fn find_sh_on_windows() -> Option { +/// +/// This is currently only used (and only exercised in tests) for finding `sh.exe`. It may be used +/// to find other executables in the future, but may require adjustment. (In particular, depending +/// on the desired semantics, it should possibly also check inside a `cmd` directory, possibly also +/// check inside `super::core_dir()` itself, and could safely check the latter location even if its +/// value is not suitable to use in inferring any other paths.) +fn find_git_associated_windows_executable(stem: &str) -> Option { let git_root = git_for_windows_root()?; - RAW_SH_EXE_PATH_SUFFIXES + BIN_DIR_FRAGMENTS .iter() - .map(|raw_suffix| raw_join(git_root, raw_suffix)) + .map(|bin_dir_fragment| { + // Perform explicit raw concatenation with `/` to avoid introducing any `\` separators. + let mut raw_path = OsString::from(git_root); + raw_path.push("/"); + raw_path.push(bin_dir_fragment); + raw_path.push("/"); + raw_path.push(stem); + raw_path.push(".exe"); + raw_path + }) .find(|raw_path| Path::new(raw_path).is_file()) } + +/// Like `find_associated_windows_executable`, but if not found, fall back to a simple filename. +pub(super) fn find_git_associated_windows_executable_with_fallback(stem: &str) -> OsString { + find_git_associated_windows_executable(stem).unwrap_or_else(|| { + let mut raw_path = OsString::from(stem); + raw_path.push(".exe"); + raw_path + }) +} diff --git a/gix-path/src/env/mod.rs b/gix-path/src/env/mod.rs index bb0dd9bd6f9..1878e1317ba 100644 --- a/gix-path/src/env/mod.rs +++ b/gix-path/src/env/mod.rs @@ -40,7 +40,7 @@ pub fn installation_config_prefix() -> Option<&'static Path> { pub fn shell() -> &'static OsStr { static PATH: Lazy = Lazy::new(|| { if cfg!(windows) { - auxiliary::find_sh_on_windows().unwrap_or_else(|| "sh.exe".into()) + auxiliary::find_git_associated_windows_executable_with_fallback("sh") } else { "/bin/sh".into() } From 56dc3cc008eb9eb9094450b1da327a34bfc9c850 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 03:47:01 -0500 Subject: [PATCH 12/15] Revise `find_git_associated_windows_executable` future directions It didn't mention how expanded use may require directories like `mingw64/bin` to be added to searched locations, and also was somewhat hard to read. This adds that and rephrases. --- gix-path/src/env/auxiliary.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 3e529e21cc5..52d4a353e9b 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -71,10 +71,10 @@ const BIN_DIR_FRAGMENTS: &[&str] = &["bin", "usr/bin"]; /// does, which is the case unless it is overridden by setting `GIT_EXEC_PATH` to an unusual value. /// /// This is currently only used (and only exercised in tests) for finding `sh.exe`. It may be used -/// to find other executables in the future, but may require adjustment. (In particular, depending -/// on the desired semantics, it should possibly also check inside a `cmd` directory, possibly also -/// check inside `super::core_dir()` itself, and could safely check the latter location even if its -/// value is not suitable to use in inferring any other paths.) +/// to find other executables in the future, but may require adjustment. In particular, depending +/// on the desired semantics, it should possibly also check inside a `cmd` directory; directories +/// like `/bin`, for any applicable variants (such as `mingw64`); and `super::core_dir()` +/// itself, which it could safely check even if its value is not safe for inferring other paths. fn find_git_associated_windows_executable(stem: &str) -> Option { let git_root = git_for_windows_root()?; From fb6705913727640a595792403ff35928d4d29d8d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 04:46:05 -0500 Subject: [PATCH 13/15] Add some tests for finding associated executables The tests are not limited to finding `sh`, but this may still not be ready for use in finding other commands, for the commented reasons. These tests are a step toward that, but they are mainly to make sure the search works as expected both when the looked-for command is and is not found in one of the searched `bin` dirs. This is to say that, in the tests, the commands that can be found but in `usr/bin` rather than the first choice of `bin` are to an extent a stand-in for `sh` when searched for in an environment that doesn't have `(git root)/bin` (like the Git for Windows SDK), and the commands that cannot bve found are to an extent a standi-in for `sh` on systems where it cannot be found (such as due to `git` not being installed or installed from MinGit or some other minimal or nonstandard Git installation, or `GIT_EXEC_PATH` being defined and having a value that cannot be used or that can be used but points to a directory structure that does not have a usable `sh`). --- gix-path/src/env/auxiliary.rs | 66 +++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 52d4a353e9b..9667568da07 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -70,9 +70,9 @@ const BIN_DIR_FRAGMENTS: &[&str] = &["bin", "usr/bin"]; /// The resulting path uses only `/` separators so long as the path obtained from `git --exec-path` /// does, which is the case unless it is overridden by setting `GIT_EXEC_PATH` to an unusual value. /// -/// This is currently only used (and only exercised in tests) for finding `sh.exe`. It may be used -/// to find other executables in the future, but may require adjustment. In particular, depending -/// on the desired semantics, it should possibly also check inside a `cmd` directory; directories +/// This is currently only used (and only heavily exercised in tests) for finding `sh.exe`. It may +/// be used to find other executables in the future, but may need adjustment. In particular, +/// depending on desired semantics, it should possibly also check a `cmd` directory; directories /// like `/bin`, for any applicable variants (such as `mingw64`); and `super::core_dir()` /// itself, which it could safely check even if its value is not safe for inferring other paths. fn find_git_associated_windows_executable(stem: &str) -> Option { @@ -101,3 +101,63 @@ pub(super) fn find_git_associated_windows_executable_with_fallback(stem: &str) - raw_path }) } + +#[cfg(all(windows, test))] +mod tests { + use std::path::Path; + + /// Some commands with `.exe` files in `bin` and `usr/bin` that should be found. + /// + /// Tests are expected to run with a full Git for Windows installation (not MinGit). + const SHOULD_FIND: &[&str] = &[ + "sh", "bash", "dash", "diff", "tar", "less", "sed", "awk", "perl", "cygpath", + ]; + + /// Shouldn't find anything nonexistent, or only in PATH or in `bin`s we don't mean to search. + /// + /// If dirs like `mingsw64/bin` are added, `git-credential-manager` should be moved to `SHOULD_FIND`. + /// Likewise, if `super::core_dir()` is added, `git-daemon` should be moved to `SHOULD_FIND`. + const SHOULD_NOT_FIND: &[&str] = &[ + "nonexistent-command", + "cmd", + "powershell", + "explorer", + "git-credential-manager", + "git-daemon", + ]; + + #[test] + fn find_git_associated_windows_executable() { + for stem in SHOULD_FIND { + let path = super::find_git_associated_windows_executable(stem); + assert!(path.is_some(), "should find {stem:?}"); + } + } + + #[test] + fn find_git_associated_windows_executable_no_extra() { + for stem in SHOULD_NOT_FIND { + let path = super::find_git_associated_windows_executable(stem); + assert_eq!(path, None, "should not find {stem:?}"); + } + } + + #[test] + fn find_git_associated_windows_executable_with_fallback() { + for stem in SHOULD_FIND { + let path = super::find_git_associated_windows_executable_with_fallback(stem); + assert!(Path::new(&path).is_absolute(), "should find {stem:?}"); + } + } + + #[test] + fn find_git_associated_windows_executable_with_fallback_falls_back() { + for stem in SHOULD_NOT_FIND { + let path = super::find_git_associated_windows_executable_with_fallback(stem) + .to_str() + .expect("valid Unicode") + .to_owned(); + assert_eq!(path, format!("{stem}.exe"), "should fall back for {stem:?}"); + } + } +} From c824b9297824d52103d6ea40f49356d5d8b489e6 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 04:51:52 -0500 Subject: [PATCH 14/15] Expand `git_for_windows_root()` comments --- gix-path/src/env/auxiliary.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 9667568da07..8f26b5dac8f 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -45,13 +45,25 @@ const MSYS_USR_VARIANTS: &[&str] = &["mingw64", "mingw32", "clangarm64", "clang6 /// /// Currently this is used only for finding the path to an `sh.exe` associated with Git. This is /// separate from `installation_config()` and `installation_config_prefix()` in `gix_path::env`. +/// This is *not* suitable for finding the highest-scoped configuration file, because that could be +/// installed in an unusual place, or customized via `GIT_CONFIG_SYSTEM` or `GIT_CONFIG_NOSYSTEM`, +/// all of which `installation_config()` should reflect. Likewise, `installation_config_prefix()` +/// has strong uses, such as to find a directory inside `ProgramData` containing configuration. +/// But it is possible that some marginal uses of `installation_config_prefix()`, if they do not +/// really relate to configuration, could be replaced with `git_for_windows_root()` in the future. fn git_for_windows_root() -> Option<&'static Path> { static GIT_ROOT: Lazy> = Lazy::new(|| { super::core_dir() - .filter(|core| core.is_absolute() && core.ends_with("libexec/git-core")) + .filter(|core| { + // Only use this if the directory structure resembles a Git installation. This + // accepts installations of common types that are not broken when `GIT_EXEC_PATH` + // is unset, as well as values of `GIT_EXEC_PATH` that are likely to be usable. + core.is_absolute() && core.ends_with("libexec/git-core") + }) .and_then(|core| core.ancestors().nth(2)) .filter(|prefix| { // Only use `libexec/git-core` from inside something `usr`-like, such as `mingw64`. + // See `MSYS_USR_VARIANTS` for details and the rationale for this restriction. MSYS_USR_VARIANTS.iter().any(|name| prefix.ends_with(name)) }) .and_then(|prefix| prefix.parent()) From b70cdb19fbb194a97099afeb2ab208bd1355ee75 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 3 Mar 2025 06:06:14 -0500 Subject: [PATCH 15/15] Compile the new tests on all platforms Since the code under test is currently compiled on all platforms. The current tests' assertions are not guaranteed to hold outside of Windows systems. (It would be unusual to call the functions they are testing outside of Windows. Those functions are themselves mainly not marked to be conditionally compiled so that their callers uses techniques like `if cfg!(windows)` that aren't technically conditional compilation. Those techniques are themselves valuable because they can sometimes be simpler or more readable than conditional compilation or easier to avoid false-positive diagnostics, and because they allow type checking to occur even when building on other platforms, while still usually being fast because "runtime" conditions that are `false` constants still facilitate removal of dead code as an optimization.) So the tests of those functions are likewise built on all targets, but marked conditonally ignored on non-Windows platforms. --- gix-path/src/env/auxiliary.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gix-path/src/env/auxiliary.rs b/gix-path/src/env/auxiliary.rs index 8f26b5dac8f..02169dc5b96 100644 --- a/gix-path/src/env/auxiliary.rs +++ b/gix-path/src/env/auxiliary.rs @@ -114,7 +114,7 @@ pub(super) fn find_git_associated_windows_executable_with_fallback(stem: &str) - }) } -#[cfg(all(windows, test))] +#[cfg(test)] mod tests { use std::path::Path; @@ -139,6 +139,7 @@ mod tests { ]; #[test] + #[cfg_attr(not(windows), ignore)] fn find_git_associated_windows_executable() { for stem in SHOULD_FIND { let path = super::find_git_associated_windows_executable(stem); @@ -147,6 +148,7 @@ mod tests { } #[test] + #[cfg_attr(not(windows), ignore)] fn find_git_associated_windows_executable_no_extra() { for stem in SHOULD_NOT_FIND { let path = super::find_git_associated_windows_executable(stem); @@ -155,6 +157,7 @@ mod tests { } #[test] + #[cfg_attr(not(windows), ignore)] fn find_git_associated_windows_executable_with_fallback() { for stem in SHOULD_FIND { let path = super::find_git_associated_windows_executable_with_fallback(stem); @@ -163,6 +166,7 @@ mod tests { } #[test] + #[cfg_attr(not(windows), ignore)] fn find_git_associated_windows_executable_with_fallback_falls_back() { for stem in SHOULD_NOT_FIND { let path = super::find_git_associated_windows_executable_with_fallback(stem)