diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 163c2a8..9fbbf5d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, macos-latest] python-version: ["3.10", "3.11", "3.12", "3.13"] steps: diff --git a/README.md b/README.md index 7e02c46..01ab27d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # GitIngest +[![Image](./docs/frontpage.png "GitIngest main page")](https://gitingest.com) + [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/cyclotruc/gitingest/blob/main/LICENSE) [![PyPI version](https://badge.fury.io/py/gitingest.svg)](https://badge.fury.io/py/gitingest) [![Downloads](https://pepy.tech/badge/gitingest)](https://pepy.tech/project/gitingest) @@ -7,8 +9,6 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Discord](https://dcbadge.limes.pink/api/server/https://discord.com/invite/zerRaGK9EC)](https://discord.com/invite/zerRaGK9EC) -[![Image](./docs/frontpage.png "GitIngest main page")](https://gitingest.com) - Turn any Git repository into a prompt-friendly text ingest for LLMs. You can also replace `hub` with `ingest` in any github url to access the coresponding digest @@ -62,13 +62,6 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest") By default, this won't write a file but can be enabled with the `output` argument -## 🛠️ Using - -- Tailwind CSS - Frontend -- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework -- [tiktoken](https://github.com/openai/tiktoken) - Token estimation -- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics - ## 🌐 Self-host 1. Build the image: @@ -84,35 +77,45 @@ By default, this won't write a file but can be enabled with the `output` argumen ``` The application will be available at `http://localhost:8000` -Ensure environment variables are set before running the application or deploying it via Docker. -## ✔️ Contributing +If you are hosting it on a domain, you can specify the allowed hostnames via env variable `ALLOWED_HOSTS`. -Contributions are welcome! + ```bash + #Default: "gitingest.com,*.gitingest.com,localhost, 127.0.0.1". + ALLOWED_HOSTS="example.com, localhost, 127.0.0.1" + ``` -Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. If you need any help while working with the code, reach out to us on [discord](https://discord.com/invite/zerRaGK9EC) +## 🛠️ Stack -### Ways to contribute +- [Tailwind CSS](https://tailwindcss.com/) - Frontend +- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework +- [Jinja2](https://jinja.palletsprojects.com/) - HTML templating +- [tiktoken](https://github.com/openai/tiktoken) - Token estimation +- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics -1. Provide your feedback and ideas on discord -2. Open an Issue on github to report a bug -3. Create a Pull request - - Fork the repository - - Make your changes and test them locally - - Open a pull request for review and feedback +## ✔️ Contributing to Gitingest -### 🔧 Local dev +Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. + If you need any help while working with the code, reach out to us on [discord](https://discord.com/invite/zerRaGK9EC) -#### Environment Configuration +### Ways to help (non-technical) -- **`ALLOWED_HOSTS`**: Specify allowed hostnames for the application. Default: `"gitingest.com,*.gitingest.com,gitdigest.dev,localhost"`. -You can configure the application using the following environment variables: +- Provide your feedback and ideas on discord +- Open an Issue on github to report a bug / submit an feature request +- Talk about Gitingest on social media -```bash -ALLOWED_HOSTS="gitingest.local,localhost" -``` +### How to submit a PR -#### Run locally +1. Fork the repository & clone it locally +2. Setup the dev environment (see Development section bellow) +3. Run unit tests with `pytest` +4. Commit your changes and run `pre-commit` +5. Open a pull request on Github for review and feedback +6. (Optionnal) Invite project maintainer to your branch for easier collaboration + +## 🔧 Development + +### Run web UI locally 1. Clone the repository @@ -124,7 +127,10 @@ ALLOWED_HOSTS="gitingest.local,localhost" 2. Install dependencies ```bash - pip install -r requirements.txt + pip install -r requirements-dev.txt + python -m venv .venv + source .venv/bin/activate + pre-commit install ``` 3. Run the application: @@ -133,3 +139,25 @@ ALLOWED_HOSTS="gitingest.local,localhost" cd src uvicorn main:app --reload ``` + +4. Run unit tests + + ```bash + pytest + ``` + +The application should be available at `http://localhost:8000` + +### Working on the CLI + +1. Install the package in dev mode + + ```bash + pip install -e . + ``` + +2. Run the CLI + + ```bash + gitingest --help + ``` diff --git a/src/gitingest/tests/test_clone.py b/src/gitingest/tests/test_clone.py index e3b8128..c124730 100644 --- a/src/gitingest/tests/test_clone.py +++ b/src/gitingest/tests/test_clone.py @@ -74,3 +74,98 @@ async def test_check_repo_exists() -> None: # Test failed request mock_process.returncode = 1 assert await _check_repo_exists(url) is False + + +@pytest.mark.asyncio +async def test_clone_repo_invalid_url() -> None: + clone_config = CloneConfig( + url="", + local_path="/tmp/repo", + ) + with pytest.raises(ValueError, match="The 'url' parameter is required."): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_invalid_local_path() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="", + ) + with pytest.raises(ValueError, match="The 'local_path' parameter is required."): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_with_custom_branch() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + branch="feature-branch", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + mock_exec.assert_called_once_with( + "git", + "clone", + "--depth=1", + "--single-branch", + "--branch", + "feature-branch", + clone_config.url, + clone_config.local_path, + ) + + +@pytest.mark.asyncio +async def test_git_command_failure() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", side_effect=RuntimeError("Git command failed")): + with pytest.raises(RuntimeError, match="Git command failed"): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_default_shallow_clone() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + mock_exec.assert_called_once_with( + "git", "clone", "--depth=1", "--single-branch", clone_config.url, clone_config.local_path + ) + + +@pytest.mark.asyncio +async def test_clone_repo_commit_without_branch() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + commit="a" * 40, # Simulating a valid commit hash + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + assert mock_exec.call_count == 2 # Clone and checkout calls + mock_exec.assert_any_call("git", "clone", "--single-branch", clone_config.url, clone_config.local_path) + mock_exec.assert_any_call("git", "-C", clone_config.local_path, "checkout", clone_config.commit) + + +@pytest.mark.asyncio +async def test_check_repo_exists_with_redirect() -> None: + url = "https://github.com/user/repo" + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 302 Found\n", b"") + mock_process.returncode = 0 # Simulate successful request + mock_exec.return_value = mock_process + + assert await _check_repo_exists(url) diff --git a/src/gitingest/tests/test_parse_query.py b/src/gitingest/tests/test_parse_query.py index b87856d..8ce3ff0 100644 --- a/src/gitingest/tests/test_parse_query.py +++ b/src/gitingest/tests/test_parse_query.py @@ -1,7 +1,7 @@ import pytest from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS -from gitingest.parse_query import _parse_url, parse_query +from gitingest.parse_query import _parse_patterns, _parse_url, parse_query def test_parse_url_valid() -> None: @@ -44,3 +44,99 @@ def test_parse_query_invalid_pattern() -> None: url = "https://github.com/user/repo" with pytest.raises(ValueError, match="Pattern.*contains invalid characters"): parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf") + + +def test_parse_url_with_subpaths() -> None: + url = "https://github.com/user/repo/tree/main/subdir/file" + result = _parse_url(url) + assert result["user_name"] == "user" + assert result["repo_name"] == "repo" + assert result["branch"] == "main" + assert result["subpath"] == "/subdir/file" + + +def test_parse_url_invalid_repo_structure() -> None: + url = "https://github.com/user" + with pytest.raises(ValueError, match="Invalid repository URL"): + _parse_url(url) + + +def test_parse_patterns_valid() -> None: + patterns = "*.py, *.md, docs/*" + result = _parse_patterns(patterns) + assert result == ["*.py", "*.md", "docs/*"] + + +def test_parse_patterns_invalid_characters() -> None: + patterns = "*.py;rm -rf" + with pytest.raises(ValueError, match="Pattern.*contains invalid characters"): + _parse_patterns(patterns) + + +def test_parse_query_with_large_file_size() -> None: + url = "https://github.com/user/repo" + result = parse_query(url, max_file_size=10**9, from_web=True) + assert result["max_file_size"] == 10**9 + assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS + + +def test_parse_query_empty_patterns() -> None: + url = "https://github.com/user/repo" + result = parse_query(url, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="") + assert result["include_patterns"] is None + assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS + + +def test_parse_query_include_and_ignore_overlap() -> None: + url = "https://github.com/user/repo" + result = parse_query( + url, + max_file_size=50, + from_web=True, + include_patterns="*.py", + ignore_patterns=["*.py", "*.txt"], + ) + assert result["include_patterns"] == ["*.py"] + assert "*.py" not in result["ignore_patterns"] + assert "*.txt" in result["ignore_patterns"] + + +def test_parse_query_local_path() -> None: + path = "/home/user/project" + result = parse_query(path, max_file_size=100, from_web=False) + assert result["local_path"] == "/home/user/project" + assert result["id"] is not None + assert result["slug"] == "user/project" + + +def test_parse_query_relative_path() -> None: + path = "./project" + result = parse_query(path, max_file_size=100, from_web=False) + assert result["local_path"].endswith("project") + assert result["slug"].endswith("project") + + +def test_parse_query_empty_source() -> None: + with pytest.raises(ValueError, match="Invalid repository URL"): + parse_query("", max_file_size=100, from_web=True) + + +def test_parse_url_branch_and_commit_distinction() -> None: + url_branch = "https://github.com/user/repo/tree/main" + url_commit = "https://github.com/user/repo/tree/abcd1234abcd1234abcd1234abcd1234abcd1234" + + result_branch = _parse_url(url_branch) + result_commit = _parse_url(url_commit) + + assert result_branch["branch"] == "main" + assert result_branch["commit"] is None + + assert result_commit["branch"] is None + assert result_commit["commit"] == "abcd1234abcd1234abcd1234abcd1234abcd1234" + + +def test_parse_query_uuid_uniqueness() -> None: + path = "/home/user/project" + result1 = parse_query(path, max_file_size=100, from_web=False) + result2 = parse_query(path, max_file_size=100, from_web=False) + assert result1["id"] != result2["id"]