Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add raw response mode to process_query #79

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ You can also replace `hub` with `ingest` in any github url to access the corespo

- **Easy code context**: Get a text digest from a git repository URL or a directory
- **Smart Formatting**: Optimized output format for LLM prompts
- **Flexible Filtering**:
- Include/exclude files by pattern
- Control maximum file size
- **Statistics about**:
- File and directory structure
- Size of the extract
Expand Down Expand Up @@ -62,6 +65,31 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest")

By default, this won't write a file but can be enabled with the `output` argument

## 🌐 Web API Usage

You can access repositories directly via URL:

```
# Basic repository access
https://gitingest.com/user/repo

# With query parameters
https://gitingest.com/extract/user/repo?max_file_size=243&pattern_type=include&pattern=*.py

# Parameters:
- max_file_size: Controls maximum file size (default: 243)
- pattern_type: 'include' or 'exclude'
- pattern: File pattern (e.g. "*.py" for Python files)
- summary: Set to true to include summary and tree structure
```

## 🛠️ Using

- Tailwind CSS - Frontend
- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework
- [tiktoken](https://github.com/openai/tiktoken) - Token estimation
- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics

## 🌐 Self-host

1. Build the image:
Expand Down
104 changes: 58 additions & 46 deletions src/process_query.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Union
from fastapi import Request
from fastapi.templating import Jinja2Templates
from starlette.templating import _TemplateResponse
Expand All @@ -18,40 +19,43 @@ async def process_query(
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
) -> _TemplateResponse:
raw_response: bool = False
) -> Union[_TemplateResponse, tuple[str, str, str]]:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should now be able to use the _TemplateResponse | tuple[str, str, str] syntax

"""
Process a query by parsing input, cloning a repository, and generating a summary.

Handle user input, process GitHub repository data, and prepare
a response for rendering a template with the processed results or an error message.
Process query and return template response or raw data tuple.

Parameters
----------
request : Request
The HTTP request object.
HTTP request object
input_text : str
Input text provided by the user, typically a GitHub repository URL or slug.
GitHub repository URL or slug
slider_position : int
Position of the slider, representing the maximum file size in the query.
Maximum file size position (0-500)
pattern_type : str, optional
Type of pattern to use, either "include" or "exclude" (default is "exclude").
"include" or "exclude" pattern type (default: "exclude")
pattern : str, optional
Pattern to include or exclude in the query, depending on the pattern type.
is_index : bool, optional
Flag indicating whether the request is for the index page (default is False).
Pattern for including/excluding files
is_index : bool, optional
Whether request is for index page (default: False)
raw_response : bool, optional
Return raw data tuple instead of template (default: False)

Returns
-------
_TemplateResponse
Rendered template response containing the processed results or an error message.
Union[_TemplateResponse, tuple[str, str, str]]
TemplateResponse:
Rendered HTML template with processed results, summary, and error messages
tuple[str, str, str]:
Raw data as (summary, directory_tree, file_contents) when raw_response=True
"""
template = "index.jinja" if is_index else "github.jinja"
max_file_size = logSliderToSize(slider_position)

if pattern_type == "include":
include_patterns = pattern
exclude_patterns = None
elif pattern_type == "exclude":
else:
exclude_patterns = pattern
include_patterns = None

Expand All @@ -63,24 +67,63 @@ async def process_query(
include_patterns=include_patterns,
ignore_patterns=exclude_patterns,
)

clone_config = CloneConfig(
url=query["url"],
local_path=query["local_path"],
commit=query.get("commit"),
branch=query.get("branch"),
)

await clone_repo(clone_config)
summary, tree, content = ingest_from_query(query)

if raw_response:
return summary, tree, content

with open(f"{clone_config.local_path}.txt", "w") as f:
f.write(tree + "\n" + content)

if not raw_response and len(content) > MAX_DISPLAY_SIZE:
content = (
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
)

_print_success(
url=query["url"],
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
summary=summary,
)
return templates.TemplateResponse(
template,
{
"request": request,
"github_url": input_text,
"result": True,
"summary": summary,
"tree": tree,
"content": contents,
cyclotruc marked this conversation as resolved.
Show resolved Hide resolved
"examples": EXAMPLE_REPOS if is_index else [],
"ingest_id": query["id"],
"default_file_size": slider_position,
"pattern_type": pattern_type,
"pattern": pattern,
},
)

except Exception as e:
# hack to print error message when query is not defined
if "query" in locals() and query is not None and isinstance(query, dict):
_print_error(query["url"], e, max_file_size, pattern_type, pattern)
else:
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
print(f"{Colors.RED}{e}{Colors.END}")

if raw_response:
raise e

return templates.TemplateResponse(
template,
Expand All @@ -95,37 +138,6 @@ async def process_query(
},
)

if len(content) > MAX_DISPLAY_SIZE:
content = (
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
)

_print_success(
url=query["url"],
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
summary=summary,
)

return templates.TemplateResponse(
template,
{
"request": request,
"github_url": input_text,
"result": True,
"summary": summary,
"tree": tree,
"content": content,
"examples": EXAMPLE_REPOS if is_index else [],
"ingest_id": query["id"],
"default_file_size": slider_position,
"pattern_type": pattern_type,
"pattern": pattern,
},
)


def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
"""
Expand Down
37 changes: 36 additions & 1 deletion src/routers/dynamic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.responses import HTMLResponse, Response
from fastapi.templating import Jinja2Templates

from process_query import process_query
Expand All @@ -8,6 +8,41 @@
router = APIRouter()
templates = Jinja2Templates(directory="templates")

@router.get("/extract/{full_path:path}", response_model=None)
async def extract_content(
request: Request,
full_path: str,
summary: bool = False,
) -> Response:
try:
query_params = request.query_params
max_file_size = int(query_params.get("max_file_size", 243))
pattern_type = query_params.get("pattern_type", "exclude")
pattern = query_params.get("pattern", "")

result_summary, tree, content = await process_query(
request,
input_text=f"https://github.com/{full_path}",
slider_position=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
is_index=False,
raw_response=True
)

response_parts = []
if summary:
response_parts.append(f"Summary:\n{result_summary}\n")
response_parts.append(f"Tree:\n{tree}\n")
response_parts.append(f"Content:\n{content}")

return Response(content="\n".join(response_parts), media_type="text/plain")
except Exception as e:
return Response(
content=f"Error during extraction: {str(e)}",
media_type="text/plain",
status_code=500,
)

@router.get("/{full_path:path}")
async def catch_all(request: Request, full_path: str) -> HTMLResponse:
Expand Down