From b724e71b63955f40a521d0eda220ce3896d257c3 Mon Sep 17 00:00:00 2001 From: Kerim Incedayi Date: Mon, 30 Dec 2024 14:30:55 +0100 Subject: [PATCH 1/2] feat: add raw response mode to process_query - Add raw_response option to return tuple instead of template - Update extract_content endpoint for plain text output --- README.md | 21 +++++++++ src/process_query.py | 104 +++++++++++++++++++++++------------------ src/routers/dynamic.py | 37 ++++++++++++++- 3 files changed, 115 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 7e02c46..b01871b 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,9 @@ You can also replace `hub` with `ingest` in any github url to access the corespo - **Easy code context**: Get a text digest from a git repository URL or a directory - **Smart Formatting**: Optimized output format for LLM prompts +- **Flexible Filtering**: + - Include/exclude files by pattern + - Control maximum file size - **Statistics about**: - File and directory structure - Size of the extract @@ -62,6 +65,24 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest") By default, this won't write a file but can be enabled with the `output` argument +## 🌐 Web API Usage + +You can access repositories directly via URL: + +``` +# Basic repository access +https://gitingest.com/user/repo + +# With query parameters +https://gitingest.com/extract/user/repo?max_file_size=243&pattern_type=include&pattern=*.py + +# Parameters: +- max_file_size: Controls maximum file size (default: 243) +- pattern_type: 'include' or 'exclude' +- pattern: File pattern (e.g. "*.py" for Python files) +- summary: Set to true to include summary and tree structure +``` + ## 🛠️ Using - Tailwind CSS - Frontend diff --git a/src/process_query.py b/src/process_query.py index 4053e45..704f88d 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -1,3 +1,4 @@ +from typing import Union from fastapi import Request from fastapi.templating import Jinja2Templates from starlette.templating import _TemplateResponse @@ -18,32 +19,35 @@ async def process_query( pattern_type: str = "exclude", pattern: str = "", is_index: bool = False, -) -> _TemplateResponse: + raw_response: bool = False +) -> Union[_TemplateResponse, tuple[str, str, str]]: """ - Process a query by parsing input, cloning a repository, and generating a summary. - - Handle user input, process GitHub repository data, and prepare - a response for rendering a template with the processed results or an error message. + Process query and return template response or raw data tuple. Parameters ---------- request : Request - The HTTP request object. + HTTP request object input_text : str - Input text provided by the user, typically a GitHub repository URL or slug. + GitHub repository URL or slug slider_position : int - Position of the slider, representing the maximum file size in the query. + Maximum file size position (0-500) pattern_type : str, optional - Type of pattern to use, either "include" or "exclude" (default is "exclude"). + "include" or "exclude" pattern type (default: "exclude") pattern : str, optional - Pattern to include or exclude in the query, depending on the pattern type. - is_index : bool, optional - Flag indicating whether the request is for the index page (default is False). + Pattern for including/excluding files + is_index : bool, optional + Whether request is for index page (default: False) + raw_response : bool, optional + Return raw data tuple instead of template (default: False) Returns ------- - _TemplateResponse - Rendered template response containing the processed results or an error message. + Union[_TemplateResponse, tuple[str, str, str]] + TemplateResponse: + Rendered HTML template with processed results, summary, and error messages + tuple[str, str, str]: + Raw data as (summary, directory_tree, file_contents) when raw_response=True """ template = "index.jinja" if is_index else "github.jinja" max_file_size = logSliderToSize(slider_position) @@ -51,7 +55,7 @@ async def process_query( if pattern_type == "include": include_patterns = pattern exclude_patterns = None - elif pattern_type == "exclude": + else: exclude_patterns = pattern include_patterns = None @@ -63,17 +67,53 @@ async def process_query( include_patterns=include_patterns, ignore_patterns=exclude_patterns, ) + clone_config = CloneConfig( url=query["url"], local_path=query["local_path"], commit=query.get("commit"), branch=query.get("branch"), ) + await clone_repo(clone_config) summary, tree, content = ingest_from_query(query) + + if raw_response: + return summary, tree, content + with open(f"{clone_config.local_path}.txt", "w") as f: f.write(tree + "\n" + content) + if not raw_response and len(content) > MAX_DISPLAY_SIZE: + content = ( + f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, " + "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE] + ) + + _print_success( + url=query["url"], + max_file_size=max_file_size, + pattern_type=pattern_type, + pattern=pattern, + summary=summary, + ) + return templates.TemplateResponse( + template, + { + "request": request, + "github_url": input_text, + "result": True, + "summary": summary, + "tree": tree, + "content": contents, + "examples": EXAMPLE_REPOS if is_index else [], + "ingest_id": query["id"], + "default_file_size": slider_position, + "pattern_type": pattern_type, + "pattern": pattern, + }, + ) + except Exception as e: # hack to print error message when query is not defined if "query" in locals() and query is not None and isinstance(query, dict): @@ -81,6 +121,9 @@ async def process_query( else: print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="") print(f"{Colors.RED}{e}{Colors.END}") + + if raw_response: + raise e return templates.TemplateResponse( template, @@ -95,37 +138,6 @@ async def process_query( }, ) - if len(content) > MAX_DISPLAY_SIZE: - content = ( - f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, " - "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE] - ) - - _print_success( - url=query["url"], - max_file_size=max_file_size, - pattern_type=pattern_type, - pattern=pattern, - summary=summary, - ) - - return templates.TemplateResponse( - template, - { - "request": request, - "github_url": input_text, - "result": True, - "summary": summary, - "tree": tree, - "content": content, - "examples": EXAMPLE_REPOS if is_index else [], - "ingest_id": query["id"], - "default_file_size": slider_position, - "pattern_type": pattern_type, - "pattern": pattern, - }, - ) - def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None: """ diff --git a/src/routers/dynamic.py b/src/routers/dynamic.py index bfd6d44..b057bdf 100644 --- a/src/routers/dynamic.py +++ b/src/routers/dynamic.py @@ -1,5 +1,5 @@ from fastapi import APIRouter, Form, Request -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse, Response from fastapi.templating import Jinja2Templates from process_query import process_query @@ -8,6 +8,41 @@ router = APIRouter() templates = Jinja2Templates(directory="templates") +@router.get("/extract/{full_path:path}", response_model=None) +async def extract_content( + request: Request, + full_path: str, + summary: bool = False, +) -> Response: + try: + query_params = request.query_params + max_file_size = int(query_params.get("max_file_size", 243)) + pattern_type = query_params.get("pattern_type", "exclude") + pattern = query_params.get("pattern", "") + + result_summary, tree, content = await process_query( + request, + input_text=f"https://github.com/{full_path}", + slider_position=max_file_size, + pattern_type=pattern_type, + pattern=pattern, + is_index=False, + raw_response=True + ) + + response_parts = [] + if summary: + response_parts.append(f"Summary:\n{result_summary}\n") + response_parts.append(f"Tree:\n{tree}\n") + response_parts.append(f"Content:\n{content}") + + return Response(content="\n".join(response_parts), media_type="text/plain") + except Exception as e: + return Response( + content=f"Error during extraction: {str(e)}", + media_type="text/plain", + status_code=500, + ) @router.get("/{full_path:path}") async def catch_all(request: Request, full_path: str) -> HTMLResponse: From 02c4b6173a1d37dfc992c1eddc643af06fc36f72 Mon Sep 17 00:00:00 2001 From: Romain Courtois Date: Tue, 31 Dec 2024 09:30:16 +0100 Subject: [PATCH 2/2] Update src/process_query.py --- src/process_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/process_query.py b/src/process_query.py index 704f88d..3556867 100644 --- a/src/process_query.py +++ b/src/process_query.py @@ -105,7 +105,7 @@ async def process_query( "result": True, "summary": summary, "tree": tree, - "content": contents, + "content": content, "examples": EXAMPLE_REPOS if is_index else [], "ingest_id": query["id"], "default_file_size": slider_position,