Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix discovery #1620

Merged
merged 4 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 17 additions & 112 deletions gix-discover/src/is.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,107 +9,6 @@ pub fn bare(git_dir_candidate: &Path) -> bool {
!(git_dir_candidate.join("index").exists() || (git_dir_candidate.file_name() == Some(OsStr::new(DOT_GIT_DIR))))
}

/// Parse `<git_dir_candidate>/config` quickly to evaluate the value of the `bare` line, or return `true` if the file doesn't exist
/// similar to what`guess_repository_type` seems to be doing.
/// Return `None` if the `bare` line can't be found or the value of `bare` can't be determined.
fn bare_by_config(git_dir_candidate: &Path) -> std::io::Result<Option<bool>> {
match std::fs::read(git_dir_candidate.join("config")) {
Ok(buf) => Ok(config::parse_bare(&buf)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(Some(true)),
Err(err) => Err(err),
}
}

// Copied and adapted from `gix-config-value::boolean`.
mod config {
use bstr::{BStr, ByteSlice};

/// Note that we intentionally turn repositories that have a worktree configuration into bare repos,
/// as we don't actually parse the worktree from the config file and expect the caller to do the right
/// think when seemingly seeing bare repository.
/// The reason we do this is to not incorrectly pretend this is a worktree.
pub(crate) fn parse_bare(buf: &[u8]) -> Option<bool> {
let mut is_bare = None;
let mut has_worktree_configuration = false;
for line in buf.lines() {
if is_bare.is_none() {
if let Some(line) = line.trim().strip_prefix(b"bare") {
is_bare = match line.first() {
None => Some(true),
Some(c) if *c == b'=' => parse_bool(line.get(1..)?.trim_start().as_bstr()),
Some(c) if c.is_ascii_whitespace() => match line.split_once_str(b"=") {
Some((_left, right)) => parse_bool(right.trim_start().as_bstr()),
None => Some(true),
},
Some(_other_char_) => None,
};
continue;
}
}
if line.trim().strip_prefix(b"worktree").is_some() {
has_worktree_configuration = true;
break;
}
}
is_bare.map(|bare| bare || has_worktree_configuration)
}

fn parse_bool(value: &BStr) -> Option<bool> {
Some(if parse_true(value) {
true
} else if parse_false(value) {
false
} else {
use std::str::FromStr;
if let Some(integer) = value.to_str().ok().and_then(|s| i64::from_str(s).ok()) {
integer != 0
} else {
return None;
}
})
}

fn parse_true(value: &BStr) -> bool {
value.eq_ignore_ascii_case(b"yes") || value.eq_ignore_ascii_case(b"on") || value.eq_ignore_ascii_case(b"true")
}

fn parse_false(value: &BStr) -> bool {
value.eq_ignore_ascii_case(b"no")
|| value.eq_ignore_ascii_case(b"off")
|| value.eq_ignore_ascii_case(b"false")
|| value.is_empty()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn various() {
for (input, expected) in [
("bare=true", Some(true)),
("bare=1", Some(true)),
("bare =1", Some(true)),
("bare= yes", Some(true)),
("bare=false", Some(false)),
("bare=0", Some(false)),
("bare=blah", None),
("bare=", Some(false)),
("bare= \n", Some(false)),
("bare = true \n", Some(true)),
("\t bare = false \n", Some(false)),
("\n\tbare=true", Some(true)),
("\n\tbare=true\n\tfoo", Some(true)),
("\n\tbare ", Some(true)),
("\n\tbare", Some(true)),
("not found\nreally", None),
] {
assert_eq!(parse_bare(input.as_bytes()), expected, "{input:?}");
}
}
}
}

/// Returns true if `git_dir` is located within a `.git/modules` directory, indicating it's a submodule clone.
pub fn submodule_git_dir(git_dir: &Path) -> bool {
let mut last_comp = None;
Expand Down Expand Up @@ -141,12 +40,15 @@ pub fn git(git_dir: &Path) -> Result<crate::repository::Kind, crate::is_git::Err
source: err,
path: git_dir.into(),
})?;
git_with_metadata(git_dir, git_dir_metadata)
// precompose-unicode can't be known here, so we just default it to false, hoping it won't matter.
let cwd = gix_fs::current_dir(false)?;
git_with_metadata(git_dir, git_dir_metadata, &cwd)
}

pub(crate) fn git_with_metadata(
git_dir: &Path,
git_dir_metadata: std::fs::Metadata,
cwd: &Path,
) -> Result<crate::repository::Kind, crate::is_git::Error> {
#[derive(Eq, PartialEq)]
enum Kind {
Expand All @@ -166,6 +68,8 @@ pub(crate) fn git_with_metadata(
{
// Fast-path: avoid doing the complete search if HEAD is already not there.
// TODO(reftable): use a ref-store to lookup HEAD if ref-tables should be supported, or detect ref-tables beforehand.
// Actually ref-tables still keep a specially marked `HEAD` around, so nothing might be needed here
// Even though our head-check later would fail without supporting it.
if !dot_git.join("HEAD").exists() {
return Err(crate::is_git::Error::MissingHead);
}
Expand Down Expand Up @@ -236,25 +140,26 @@ pub(crate) fn git_with_metadata(
},
Kind::MaybeRepo => {
let conformed_git_dir = if git_dir == Path::new(".") {
gix_path::realpath(git_dir)
gix_path::realpath_opts(git_dir, cwd, gix_path::realpath::MAX_SYMLINKS)
.map(Cow::Owned)
.unwrap_or(Cow::Borrowed(git_dir))
} else {
Cow::Borrowed(git_dir)
gix_path::normalize(git_dir.into(), cwd).unwrap_or(Cow::Borrowed(git_dir))
};
if bare(conformed_git_dir.as_ref()) || conformed_git_dir.extension() == Some(OsStr::new("git")) {
crate::repository::Kind::PossiblyBare
} else if submodule_git_dir(conformed_git_dir.as_ref()) {
crate::repository::Kind::SubmoduleGitDir
} else if conformed_git_dir.file_name() == Some(OsStr::new(DOT_GIT_DIR))
|| !bare_by_config(conformed_git_dir.as_ref())
.map_err(|err| crate::is_git::Error::Metadata {
source: err,
path: conformed_git_dir.join("config"),
})?
.ok_or(crate::is_git::Error::Inconclusive)?
{
} else if conformed_git_dir.file_name() == Some(OsStr::new(DOT_GIT_DIR)) {
crate::repository::Kind::WorkTree { linked_git_dir: None }
// } else if !bare_by_config(conformed_git_dir.as_ref())
// .map_err(|err| crate::is_git::Error::Metadata {
// source: err,
// path: conformed_git_dir.join("config"),
// })?
// .ok_or(crate::is_git::Error::Inconclusive)?
// {
// crate::repository::Kind::WorktreePossiblyInConfiguration
} else {
crate::repository::Kind::PossiblyBare
}
Expand Down
2 changes: 2 additions & 0 deletions gix-discover/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub mod is_git {
Metadata { source: std::io::Error, path: PathBuf },
#[error("The repository's config file doesn't exist or didn't have a 'bare' configuration or contained core.worktree without value")]
Inconclusive,
#[error("Could not obtain current directory when conforming repository path")]
Copy link
Member

@EliahKagan EliahKagan Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this, and the related conformed_git_dir variable, about something conforming to a standard or expectation? Or are they typos, where "confirming" and confirmed_git_dir should be written instead?

If "conform" is the intended word, then I suggest rephrasing this message for clarity and probably also renaming the conformed_git_dir variable to conforming_git_dir. If "confirm" is intended--i.e. if the intended meaning is that we are verifying that something is a git dir--then I recommend changing this to "confirming" and renaming the variable to confirmed_git_dir.

I'm not sure which meaning is intended--they both seem plausible in context--so I haven't proposed a specific edit at this time. Also, I couldn't propose it in a review comment easily, because at least as applied to the variable name, this is a preexisting ambiguity, and some occurrences of the variable name are in places this PR does not modify. If it is known that one of these two changes is desired, though, then I'd be pleased to open a PR for them after now that this PR is merged.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the hint - the affected variable name is definitely leaking here and I improved the message to be much more specific in this PR.

CurrentDir(#[from] std::io::Error),
}
}

Expand Down
11 changes: 8 additions & 3 deletions gix-discover/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ pub enum Path {
/// The currently checked out or nascent work tree of a git repository
WorkTree(PathBuf),
/// The git repository itself, typically bare and without known worktree.
/// It could also be non-bare with a worktree configured using git configuration, or no worktree at all despite
/// not being bare (due to mis-configuration for example).
///
/// Note that it might still have linked work-trees which can be accessed later, weather bare or not, or it might be a
/// Note that it might still have linked work-trees which can be accessed later, bare or not, or it might be a
/// submodule git directory in the `.git/modules/**/<name>` directory of the parent repository.
Repository(PathBuf),
}
Expand Down Expand Up @@ -112,8 +114,11 @@ pub enum Kind {
/// Note that this is merely a guess at this point as we didn't read the configuration yet.
///
/// Also note that due to optimizing for performance and *just* making an educated *guess in some situations*,
/// we may consider a non-bare repository bare if it it doesn't have an index yet due to be freshly initialized.
/// The caller is has to handle this, typically by reading the configuration.
/// we may consider a non-bare repository bare if it doesn't have an index yet due to be freshly initialized.
/// The caller has to handle this, typically by reading the configuration.
///
/// It could also be a directory which is non-bare by configuration, but is *not* named `.git`.
/// Unusual, but it's possible that a worktree is configured via `core.worktree`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there also the possibility that the worktree path is established by the presence of a GIT_WORK_TREE environment variable? (Or by the presence of a GIT_DIR environment variable together with no GIT_WORK_TREE variable? In this case, I think the worktree is implicitly taken to be the process CWD, at least in Git.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In gitoxide, plumbing crates aren't allowed to read environment variables directly, so this is not handled here.
There is a function in gix though that reads these variables and uses them detect repositories from there.
It's most definitely not fully conforming with what Git does.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks--I've commented #1585 (comment) with a note about including or otherwise revisiting this when I open the other environment-related compatibility issues per the plan in #1585 (comment).

Copy link
Member Author

@Byron Byron Oct 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe as an additional note, despite claiming it's 'most definitely' not conforming, what I tried to say is that I didn't really try to make it conforming in the first place. To me it felt like environment variables are only needed in certain contexts, for instance, when creating a Git hook with gitoxide, and the implementation should rather be so that gitoxide can be used in certain situations (of which I probably only know one), and otherwise never be able to lead to something surprising. This is a general problem with environment variables, their implicitness, and gitoxide already makes them explicit by mapping every one of them to a git configuration variable, which is used in its place.

That's pretty nice in practice as gix config can now list them as well, and it's also clear that they will override what's there as they are (usually) last.

PossiblyBare,
/// A `git` repository along with checked out files in a work tree.
WorkTree {
Expand Down
6 changes: 3 additions & 3 deletions gix-discover/src/upwards/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ pub(crate) mod function {
// Normalize the path so that `Path::parent()` _actually_ gives
// us the parent directory. (`Path::parent` just strips off the last
// path component, which means it will not do what you expect when
// working with paths paths that contain '..'.)
// working with paths that contain '..'.)
let cwd = current_dir.map_or_else(
|| {
// The paths we return are relevant to the repository, but at this time it's impossible to know
// what `core.precomposeUnicode` is going to be. Hence the one using these paths will have to
// what `core.precomposeUnicode` is going to be. Hence, the one using these paths will have to
// transform the paths as needed, because we can't. `false` means to leave the obtained path as is.
gix_fs::current_dir(false).map(Cow::Owned)
},
Expand Down Expand Up @@ -130,7 +130,7 @@ pub(crate) mod function {
cursor_metadata_backup = cursor_metadata.take();
}
if let Ok(kind) = match cursor_metadata.take() {
Some(metadata) => is_git_with_metadata(&cursor, metadata),
Some(metadata) => is_git_with_metadata(&cursor, metadata, &cwd),
None => is_git(&cursor),
} {
match filter_by_trust(&cursor)? {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,30 @@ fn bare_repo_with_index_file_looks_still_looks_like_bare() -> crate::Result {
Ok(())
}

#[test]
fn non_bare_repo_without_workdir() -> crate::Result {
let repo = repo_path()?.join("non-bare-without-worktree");
let kind = gix_discover::is_git(&repo)?;
assert_eq!(
kind,
gix_discover::repository::Kind::PossiblyBare,
"typically due to misconfiguration, but worktrees could also be configured in Git configuration"
);
Ok(())
}

#[test]
fn non_bare_repo_without_workdir_with_index() -> crate::Result {
let repo = repo_path()?.join("non-bare-without-worktree-with-index");
let kind = gix_discover::is_git(&repo)?;
assert_eq!(
kind,
gix_discover::repository::Kind::PossiblyBare,
"this means it has to be validated later"
);
Ok(())
}

#[test]
fn bare_repo_with_index_file_looks_still_looks_like_bare_if_it_was_renamed() -> crate::Result {
for repo_name in ["bare-with-index-bare", "bare-with-index-no-config-bare"] {
Expand Down
File renamed without changes.
File renamed without changes.
11 changes: 11 additions & 0 deletions gix-discover/tests/fixtures/make_basic_repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ mkdir -p some/very/deeply/nested/subdir

git clone --bare --shared . bare.git

git clone --bare --shared . non-bare-without-worktree
(cd non-bare-without-worktree
git config core.bare false
)

git clone --bare --shared . non-bare-without-worktree-with-index
(cd non-bare-without-worktree
git config core.bare false
cp ../.git/index .
)

git worktree add worktrees/a
git worktree add worktrees/b-private-dir-deleted
rm -R .git/worktrees/b-private-dir-deleted
Expand Down
2 changes: 1 addition & 1 deletion gix/src/object/tree/diff/change.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use super::ChangeDetached;
use crate::bstr::{BStr, ByteSlice};
use crate::ext::ObjectIdExt;
use crate::object::tree::diff::Change;
use crate::Repository;
use gix_diff::tree_with_rewrites::Change as ChangeDetached;

impl Change<'_, '_, '_> {
/// Produce a platform for performing a line-diff no matter whether the underlying [Change] is an addition, modification,
Expand Down
2 changes: 2 additions & 0 deletions gix/src/object/tree/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ pub enum Action {
Cancel,
}

pub use gix_diff::tree_with_rewrites::Change as ChangeDetached;

/// Represents any possible change in order to turn one tree into another.
#[derive(Debug, Clone, Copy)]
pub enum Change<'a, 'old, 'new> {
Expand Down
8 changes: 4 additions & 4 deletions gix/src/open/repository.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(clippy::result_large_err)]
use std::{borrow::Cow, path::PathBuf};

use gix_features::threading::OwnShared;
use std::ffi::OsStr;
use std::{borrow::Cow, path::PathBuf};

use super::{Error, Options};
use crate::{
Expand Down Expand Up @@ -86,7 +86,7 @@ impl ThreadSafeRepository {
}
};

// The be altered later based on `core.precomposeUnicode`.
// To be altered later based on `core.precomposeUnicode`.
let cwd = gix_fs::current_dir(false)?;
let (git_dir, worktree_dir) = gix_discover::repository::Path::from_dot_git_dir(path, kind, &cwd)
.expect("we have sanitized path with is_git()")
Expand Down Expand Up @@ -284,7 +284,7 @@ impl ThreadSafeRepository {
}

match worktree_dir {
None if !config.is_bare => {
None if !config.is_bare && refs.git_dir().extension() == Some(OsStr::new(gix_discover::DOT_GIT_DIR)) => {
worktree_dir = Some(git_dir.parent().expect("parent is always available").to_owned());
}
Some(_) => {
Expand Down
Binary file modified gix/tests/fixtures/generated-archives/make_basic_repo.tar
Binary file not shown.
5 changes: 5 additions & 0 deletions gix/tests/fixtures/make_basic_repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ git init all-untracked
git init empty-core-excludes
(cd empty-core-excludes
echo $'[core]\n\texcludesFile = ' >> .git/config
)

git clone --bare . non-bare-without-worktree
(cd non-bare-without-worktree
git config core.bare false
)
1 change: 1 addition & 0 deletions gix/tests/gix/repository/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ mod dirwalk {
("bare.git".into(), Directory),
("empty-core-excludes".into(), Repository),
("non-bare-repo-without-index".into(), Repository),
("non-bare-without-worktree".into(), Directory),
("some".into(), Directory),
];
assert_eq!(
Expand Down
24 changes: 24 additions & 0 deletions gix/tests/gix/repository/open.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,30 @@ fn bare_repo_with_index() -> crate::Result {
repo.is_bare(),
"it's properly classified as it reads the configuration (and has no worktree)"
);
assert_eq!(repo.work_dir(), None);
Ok(())
}

#[test]
fn non_bare_non_git_repo_without_worktree() -> crate::Result {
let repo = named_subrepo_opts(
"make_basic_repo.sh",
"non-bare-without-worktree",
gix::open::Options::isolated(),
)?;
assert!(!repo.is_bare());
assert_eq!(repo.work_dir(), None, "it doesn't assume that workdir exists");

let repo = gix::open_opts(
repo.git_dir().join("objects").join(".."),
gix::open::Options::isolated(),
)?;
assert!(!repo.is_bare());
assert_eq!(
repo.work_dir(),
None,
"it figures this out even if a non-normalized gitdir is used"
);
Ok(())
}

Expand Down
Loading