Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: add branch option to ingest and CLI for repository cloning #108

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pre-commit
pylint
pytest
pytest-asyncio
uvicorn
6 changes: 5 additions & 1 deletion src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
@click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes")
@click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude")
@click.option("--include-pattern", "-i", multiple=True, help="Patterns to include")
@click.option("--branch", "-b", default=None, help="Branch to clone and ingest")
def main(
source: str,
output: str | None,
max_size: int,
exclude_pattern: tuple[str, ...],
include_pattern: tuple[str, ...],
branch: str | None,
) -> None:
"""
Analyze a directory or repository and create a text dump of its contents.
Expand All @@ -41,6 +43,8 @@ def main(
A tuple of patterns to exclude during the analysis. Files matching these patterns will be ignored.
include_pattern : tuple[str, ...]
A tuple of patterns to include during the analysis. Only files matching these patterns will be processed.
branch : str | None
The branch to clone (optional).

Raises
------
Expand All @@ -54,7 +58,7 @@ def main(

if not output:
output = "digest.txt"
summary, _, _ = ingest(source, max_size, include_patterns, exclude_patterns, output=output)
summary, _, _ = ingest(source, max_size, include_patterns, exclude_patterns, branch, output=output)

click.echo(f"Analysis complete! Output written to: {output}")
click.echo("\nSummary:")
Expand Down
7 changes: 5 additions & 2 deletions src/gitingest/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def ingest(
max_file_size: int = 10 * 1024 * 1024, # 10 MB
include_patterns: list[str] | str | None = None,
exclude_patterns: list[str] | str | None = None,
branch: str | None = None,
output: str | None = None,
) -> tuple[str, str, str]:
"""
Expand All @@ -34,6 +35,8 @@ def ingest(
Pattern or list of patterns specifying which files to include. If `None`, all files are included.
exclude_patterns : list[str] | str | None, optional
Pattern or list of patterns specifying which files to exclude. If `None`, no files are excluded.
branch : str | None, optional
The branch to clone and ingest. If `None`, the default branch is used.
output : str | None, optional
File path where the summary and content should be written. If `None`, the results are not written to a file.

Expand Down Expand Up @@ -65,7 +68,7 @@ def ingest(
url=query["url"],
local_path=query["local_path"],
commit=query.get("commit"),
branch=query.get("branch"),
branch=branch,
)
clone_result = clone_repo(clone_config)

Expand All @@ -82,7 +85,7 @@ def ingest(

return summary, tree, content
finally:
# Clean up the temporary directory if it was created
# Clean up temporary directories
if query["url"]:
# Clean up the temporary directory under /tmp/gitingest
cleanup_path = "/tmp/gitingest"
Expand Down
6 changes: 4 additions & 2 deletions src/process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ async def process_query(
slider_position: int,
pattern_type: str = "exclude",
pattern: str = "",
branch: str = None,
is_index: bool = False,
) -> _TemplateResponse:
"""
Expand All @@ -41,6 +42,8 @@ async def process_query(
Type of pattern to use, either "include" or "exclude" (default is "exclude").
pattern : str
Pattern to include or exclude in the query, depending on the pattern type.
branch : str
The branch to clone, by default None.
is_index : bool
Flag indicating whether the request is for the index page (default is False).

Expand Down Expand Up @@ -88,14 +91,13 @@ async def process_query(
url=query["url"],
local_path=query["local_path"],
commit=query.get("commit"),
branch=query.get("branch"),
branch=branch or query.get("branch"),
)
await clone_repo(clone_config)
summary, tree, content = ingest_from_query(query)
with open(f"{clone_config.local_path}.txt", "w", encoding="utf-8") as f:
f.write(tree + "\n" + content)
except Exception as e:
# hack to print error message when query is not defined
if "query" in locals() and query is not None and isinstance(query, dict):
_print_error(query["url"], e, max_file_size, pattern_type, pattern)
else:
Expand Down
6 changes: 6 additions & 0 deletions src/static/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ function handleSubmit(event, showLoading = false) {
formData.append('pattern', pattern.value);
}

const branch = document.getElementById('branch');
if (branch) {
formData.delete('branch');
formData.append('branch', branch.value);
}

const originalContent = submitButton.innerHTML;
const currentStars = document.getElementById('github-stars')?.textContent;

Expand Down
9 changes: 9 additions & 0 deletions src/templates/components/github_form.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@
</div>
<input type="hidden" name="pattern_type" value="exclude">
<input type="hidden" name="pattern" value="">
<div class="relative w-full h-full">
<div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
<input type="text"
name="branch"
id="branch"
placeholder="Branch (optional)"
value="{{ branch if branch else '' }}"
class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
</div>
</form>
<div class="mt-4 relative z-20 flex flex-wrap gap-4 items-start">
<!-- Pattern selector -->
Expand Down
41 changes: 41 additions & 0 deletions tests/test_clone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Tests for the clone module. """

import os
from unittest.mock import AsyncMock, patch

import pytest
Expand Down Expand Up @@ -171,3 +172,43 @@ async def test_check_repo_exists_with_redirect() -> None:
mock_exec.return_value = mock_process

assert await _check_repo_exists(url)


@pytest.mark.asyncio
async def test_clone_specific_branch(tmp_path):
repo_url = "https://github.com/cyclotruc/gitingest.git"
branch_name = "main"
local_path = tmp_path / "gitingest"

config = CloneConfig(url=repo_url, local_path=str(local_path), branch=branch_name)
await clone_repo(config)

# Assertions
assert local_path.exists(), "The repository was not cloned successfully."
assert local_path.is_dir(), "The cloned repository path is not a directory."

# Check the current branch
current_branch = os.popen(f"git -C {local_path} branch --show-current").read().strip()
assert current_branch == branch_name, f"Expected branch '{branch_name}', got '{current_branch}'."


@pytest.mark.asyncio
async def test_clone_branch_with_slashes(tmp_path):
repo_url = "https://github.com/cyclotruc/gitingest.git"
branch_name = "feat/logo"
local_path = tmp_path / "gitingest"

config = CloneConfig(url=repo_url, local_path=str(local_path), branch=branch_name)
with patch("gitingest.clone._check_repo_exists", return_value=True):
with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec:
await clone_repo(config)
mock_exec.assert_called_once_with(
"git",
"clone",
"--depth=1",
"--single-branch",
"--branch",
branch_name,
repo_url,
str(local_path),
)