From 32bbacb5d2fc17be7d8fc09fbcfd72f86aa4dd65 Mon Sep 17 00:00:00 2001 From: Kerim Incedayi Date: Mon, 30 Dec 2024 14:30:55 +0100 Subject: [PATCH 1/3] feat: add raw response mode to process_query - Add raw_response option to return tuple instead of template - Update extract_content endpoint for plain text output --- README.md | 28 +++++++++++ src/process_query.py | 102 +++++++++++++++++++++++------------------ src/routers/dynamic.py | 37 ++++++++++++++- 3 files changed, 121 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 01ab27d..d29108b 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,9 @@ You can also replace `hub` with `ingest` in any github url to access the corespo - **Easy code context**: Get a text digest from a git repository URL or a directory - **Smart Formatting**: Optimized output format for LLM prompts +- **Flexible Filtering**: + - Include/exclude files by pattern + - Control maximum file size - **Statistics about**: - File and directory structure - Size of the extract @@ -62,6 +65,31 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest") By default, this won't write a file but can be enabled with the `output` argument +## 🌐 Web API Usage + +You can access repositories directly via URL: + +``` +# Basic repository access +https://gitingest.com/user/repo + +# With query parameters +https://gitingest.com/extract/user/repo?max_file_size=243&pattern_type=include&pattern=*.py + +# Parameters: +- max_file_size: Controls maximum file size (default: 243) +- pattern_type: 'include' or 'exclude' +- pattern: File pattern (e.g. "*.py" for Python files) +- summary: Set to true to include summary and tree structure +``` + +## 🛠️ Using + +- Tailwind CSS - Frontend +- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework +- [tiktoken](https://github.com/openai/tiktoken) - Token estimation +- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics + ## 🌐 Self-host 1. Build the image: diff --git a/src/process_query.py b/src/process_query.py index 4053e45..77a1987 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -1,3 +1,4 @@ +from typing import Union from fastapi import Request from fastapi.templating import Jinja2Templates from starlette.templating import _TemplateResponse @@ -18,32 +19,35 @@ async def process_query( pattern_type: str = "exclude", pattern: str = "", is_index: bool = False, -) -> _TemplateResponse: + raw_response: bool = False +) -> Union[_TemplateResponse, tuple[str, str, str]]: """ - Process a query by parsing input, cloning a repository, and generating a summary. - - Handle user input, process GitHub repository data, and prepare - a response for rendering a template with the processed results or an error message. + Process query and return template response or raw data tuple. Parameters ---------- request : Request - The HTTP request object. + HTTP request object input_text : str - Input text provided by the user, typically a GitHub repository URL or slug. + GitHub repository URL or slug slider_position : int - Position of the slider, representing the maximum file size in the query. + Maximum file size position (0-500) pattern_type : str, optional - Type of pattern to use, either "include" or "exclude" (default is "exclude"). + "include" or "exclude" pattern type (default: "exclude") pattern : str, optional - Pattern to include or exclude in the query, depending on the pattern type. + Pattern for including/excluding files is_index : bool, optional - Flag indicating whether the request is for the index page (default is False). + Whether request is for index page (default: False) + raw_response : bool, optional + Return raw data tuple instead of template (default: False) Returns ------- - _TemplateResponse - Rendered template response containing the processed results or an error message. + Union[_TemplateResponse, tuple[str, str, str]] + TemplateResponse: + Rendered HTML template with processed results, summary, and error messages + tuple[str, str, str]: + Raw data as (summary, directory_tree, file_contents) when raw_response=True """ template = "index.jinja" if is_index else "github.jinja" max_file_size = logSliderToSize(slider_position) @@ -51,7 +55,7 @@ async def process_query( if pattern_type == "include": include_patterns = pattern exclude_patterns = None - elif pattern_type == "exclude": + else: exclude_patterns = pattern include_patterns = None @@ -63,17 +67,53 @@ async def process_query( include_patterns=include_patterns, ignore_patterns=exclude_patterns, ) + clone_config = CloneConfig( url=query["url"], local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), ) + await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) + + if raw_response: + return summary, tree, content + with open(f"{clone_config.local_path}.txt", "w") as f: f.write(tree + "\n" + content) + if not raw_response and len(content) > MAX_DISPLAY_SIZE: + content = ( + f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, " + "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE] + ) + + _print_success( + url=query["url"], + max_file_size=max_file_size, + pattern_type=pattern_type, + pattern=pattern, + summary=summary, + ) + return templates.TemplateResponse( + template, + { + "request": request, + "github_url": input_text, + "result": True, + "summary": summary, + "tree": tree, + "content": contents, + "examples": EXAMPLE_REPOS if is_index else [], + "ingest_id": query["id"], + "default_file_size": slider_position, + "pattern_type": pattern_type, + "pattern": pattern, + }, + ) + except Exception as e: # hack to print error message when query is not defined if "query" in locals() and query is not None and isinstance(query, dict): @@ -82,6 +122,9 @@ async def process_query( print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") print(f"{Colors.RED}{e}{Colors.END}") + if raw_response: + raise e + return templates.TemplateResponse( template, { @@ -95,37 +138,6 @@ async def process_query( }, ) - if len(content) > MAX_DISPLAY_SIZE: - content = ( - f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, " - "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE] - ) - - _print_success( - url=query["url"], - max_file_size=max_file_size, - pattern_type=pattern_type, - pattern=pattern, - summary=summary, - ) - - return templates.TemplateResponse( - template, - { - "request": request, - "github_url": input_text, - "result": True, - "summary": summary, - "tree": tree, - "content": content, - "examples": EXAMPLE_REPOS if is_index else [], - "ingest_id": query["id"], - "default_file_size": slider_position, - "pattern_type": pattern_type, - "pattern": pattern, - }, - ) - def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None: """ diff --git a/src/routers/dynamic.py b/src/routers/dynamic.py index bfd6d44..b057bdf 100644 --- a/src/routers/dynamic.py +++ b/src/routers/dynamic.py @@ -1,5 +1,5 @@ from fastapi import APIRouter, Form, Request -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, Response from fastapi.templating import Jinja2Templates from process_query import process_query @@ -8,6 +8,41 @@ router = APIRouter() templates = Jinja2Templates(directory="templates") +@router.get("/extract/{full_path:path}", response_model=None) +async def extract_content( + request: Request, + full_path: str, + summary: bool = False, +) -> Response: + try: + query_params = request.query_params + max_file_size = int(query_params.get("max_file_size", 243)) + pattern_type = query_params.get("pattern_type", "exclude") + pattern = query_params.get("pattern", "") + + result_summary, tree, content = await process_query( + request, + input_text=f"https://github.com/{full_path}", + slider_position=max_file_size, + pattern_type=pattern_type, + pattern=pattern, + is_index=False, + raw_response=True + ) + + response_parts = [] + if summary: + response_parts.append(f"Summary:\n{result_summary}\n") + response_parts.append(f"Tree:\n{tree}\n") + response_parts.append(f"Content:\n{content}") + + return Response(content="\n".join(response_parts), media_type="text/plain") + except Exception as e: + return Response( + content=f"Error during extraction: {str(e)}", + media_type="text/plain", + status_code=500, + ) @router.get("/{full_path:path}") async def catch_all(request: Request, full_path: str) -> HTMLResponse: From 60e6b90a6641e31ebcc860ca59995a34d00400cb Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Tue, 31 Dec 2024 09:30:16 +0100 Subject: [PATCH 2/3] Update src/process_query.py --- src/process_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/process_query.py b/src/process_query.py index 77a1987..1eb75bf 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -105,7 +105,7 @@ async def process_query( "result": True, "summary": summary, "tree": tree, - "content": contents, + "content": content, "examples": EXAMPLE_REPOS if is_index else [], "ingest_id": query["id"], "default_file_size": slider_position, From 51efe003d3a6c92a4900e22d99a8fa31c8a18709 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Wed, 1 Jan 2025 00:07:30 +0100 Subject: [PATCH 3/3] Ran pre-commit on cevatkerim's's branch and fixed type hints for CI to pass --- README.md | 2 +- src/process_query.py | 5 ++--- src/routers/dynamic.py | 16 +++++++++++----- src/routers/index.py | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d29108b..e9743aa 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ By default, this won't write a file but can be enabled with the `output` argumen You can access repositories directly via URL: -``` +```plaintext # Basic repository access https://gitingest.com/user/repo diff --git a/src/process_query.py b/src/process_query.py index 1eb75bf..7ce9597 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -1,4 +1,3 @@ -from typing import Union from fastapi import Request from fastapi.templating import Jinja2Templates from starlette.templating import _TemplateResponse @@ -19,8 +18,8 @@ async def process_query( pattern_type: str = "exclude", pattern: str = "", is_index: bool = False, - raw_response: bool = False -) -> Union[_TemplateResponse, tuple[str, str, str]]: + raw_response: bool = False, +) -> _TemplateResponse | tuple[str, str, str]: """ Process query and return template response or raw data tuple. diff --git a/src/routers/dynamic.py b/src/routers/dynamic.py index b057bdf..503a110 100644 --- a/src/routers/dynamic.py +++ b/src/routers/dynamic.py @@ -1,3 +1,5 @@ +import typing + from fastapi import APIRouter, Form, Request from fastapi.responses import HTMLResponse, Response from fastapi.templating import Jinja2Templates @@ -8,6 +10,7 @@ router = APIRouter() templates = Jinja2Templates(directory="templates") + @router.get("/extract/{full_path:path}", response_model=None) async def extract_content( request: Request, @@ -20,22 +23,24 @@ async def extract_content( pattern_type = query_params.get("pattern_type", "exclude") pattern = query_params.get("pattern", "") - result_summary, tree, content = await process_query( + processed_query = await process_query( request, input_text=f"https://github.com/{full_path}", slider_position=max_file_size, pattern_type=pattern_type, pattern=pattern, is_index=False, - raw_response=True + raw_response=True, ) - + + result_summary, tree, content = typing.cast(tuple[str, str, str], processed_query) + response_parts = [] if summary: response_parts.append(f"Summary:\n{result_summary}\n") response_parts.append(f"Tree:\n{tree}\n") response_parts.append(f"Content:\n{content}") - + return Response(content="\n".join(response_parts), media_type="text/plain") except Exception as e: return Response( @@ -44,6 +49,7 @@ async def extract_content( status_code=500, ) + @router.get("/{full_path:path}") async def catch_all(request: Request, full_path: str) -> HTMLResponse: """ @@ -84,7 +90,7 @@ async def process_catch_all( max_file_size: int = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), -) -> HTMLResponse: +) -> HTMLResponse | tuple[str, str, str]: """ Processes the form submission with user input for query parameters. diff --git a/src/routers/index.py b/src/routers/index.py index 9665bd0..40cbb24 100644 --- a/src/routers/index.py +++ b/src/routers/index.py @@ -47,7 +47,7 @@ async def index_post( max_file_size: int = Form(...), pattern_type: str = Form(...), pattern: str = Form(...), -) -> HTMLResponse: +) -> HTMLResponse | tuple[str, str, str]: """ Processes the form submission with user input for query parameters.