Skip to content

Commit

Permalink
feat: add raw response mode to process_query
Browse files Browse the repository at this point in the history
- Add raw_response option to return tuple instead of template
- Update extract_content endpoint for plain text output
  • Loading branch information
cevatkerim committed Dec 30, 2024
1 parent fab90a6 commit b724e71
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 47 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ You can also replace `hub` with `ingest` in any github url to access the corespo

- **Easy code context**: Get a text digest from a git repository URL or a directory
- **Smart Formatting**: Optimized output format for LLM prompts
- **Flexible Filtering**:
- Include/exclude files by pattern
- Control maximum file size
- **Statistics about**:
- File and directory structure
- Size of the extract
Expand Down Expand Up @@ -62,6 +65,24 @@ summary, tree, content = ingest("https://github.com/cyclotruc/gitingest")

By default, this won't write a file but can be enabled with the `output` argument

## 🌐 Web API Usage

You can access repositories directly via URL:

```
# Basic repository access
https://gitingest.com/user/repo
# With query parameters
https://gitingest.com/extract/user/repo?max_file_size=243&pattern_type=include&pattern=*.py
# Parameters:
- max_file_size: Controls maximum file size (default: 243)
- pattern_type: 'include' or 'exclude'
- pattern: File pattern (e.g. "*.py" for Python files)
- summary: Set to true to include summary and tree structure
```

## 🛠️ Using

- Tailwind CSS - Frontend
Expand Down
104 changes: 58 additions & 46 deletions src/process_query.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Union
from fastapi import Request
from fastapi.templating import Jinja2Templates
from starlette.templating import _TemplateResponse
Expand All @@ -18,40 +19,43 @@ async def process_query(
pattern_type: str = "exclude",
pattern: str = "",
is_index: bool = False,
) -> _TemplateResponse:
raw_response: bool = False
) -> Union[_TemplateResponse, tuple[str, str, str]]:
"""
Process a query by parsing input, cloning a repository, and generating a summary.
Handle user input, process GitHub repository data, and prepare
a response for rendering a template with the processed results or an error message.
Process query and return template response or raw data tuple.
Parameters
----------
request : Request
The HTTP request object.
HTTP request object
input_text : str
Input text provided by the user, typically a GitHub repository URL or slug.
GitHub repository URL or slug
slider_position : int
Position of the slider, representing the maximum file size in the query.
Maximum file size position (0-500)
pattern_type : str, optional
Type of pattern to use, either "include" or "exclude" (default is "exclude").
"include" or "exclude" pattern type (default: "exclude")
pattern : str, optional
Pattern to include or exclude in the query, depending on the pattern type.
is_index : bool, optional
Flag indicating whether the request is for the index page (default is False).
Pattern for including/excluding files
is_index : bool, optional
Whether request is for index page (default: False)
raw_response : bool, optional
Return raw data tuple instead of template (default: False)
Returns
-------
_TemplateResponse
Rendered template response containing the processed results or an error message.
Union[_TemplateResponse, tuple[str, str, str]]
TemplateResponse:
Rendered HTML template with processed results, summary, and error messages
tuple[str, str, str]:
Raw data as (summary, directory_tree, file_contents) when raw_response=True
"""
template = "index.jinja" if is_index else "github.jinja"
max_file_size = logSliderToSize(slider_position)

if pattern_type == "include":
include_patterns = pattern
exclude_patterns = None
elif pattern_type == "exclude":
else:
exclude_patterns = pattern
include_patterns = None

Expand All @@ -63,24 +67,63 @@ async def process_query(
include_patterns=include_patterns,
ignore_patterns=exclude_patterns,
)

clone_config = CloneConfig(
url=query["url"],
local_path=query["local_path"],
commit=query.get("commit"),
branch=query.get("branch"),
)

await clone_repo(clone_config)
summary, tree, content = ingest_from_query(query)

if raw_response:
return summary, tree, content

with open(f"{clone_config.local_path}.txt", "w") as f:
f.write(tree + "\n" + content)

if not raw_response and len(content) > MAX_DISPLAY_SIZE:
content = (
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
)

_print_success(
url=query["url"],
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
summary=summary,
)
return templates.TemplateResponse(
template,
{
"request": request,
"github_url": input_text,
"result": True,
"summary": summary,
"tree": tree,
"content": contents,
"examples": EXAMPLE_REPOS if is_index else [],
"ingest_id": query["id"],
"default_file_size": slider_position,
"pattern_type": pattern_type,
"pattern": pattern,
},
)

except Exception as e:
# hack to print error message when query is not defined
if "query" in locals() and query is not None and isinstance(query, dict):
_print_error(query["url"], e, max_file_size, pattern_type, pattern)
else:
print(f"{Colors.BROWN}WARN{Colors.END}: {Colors.RED}<- {Colors.END}", end="")
print(f"{Colors.RED}{e}{Colors.END}")

if raw_response:
raise e

return templates.TemplateResponse(
template,
Expand All @@ -95,37 +138,6 @@ async def process_query(
},
)

if len(content) > MAX_DISPLAY_SIZE:
content = (
f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
"download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
)

_print_success(
url=query["url"],
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
summary=summary,
)

return templates.TemplateResponse(
template,
{
"request": request,
"github_url": input_text,
"result": True,
"summary": summary,
"tree": tree,
"content": content,
"examples": EXAMPLE_REPOS if is_index else [],
"ingest_id": query["id"],
"default_file_size": slider_position,
"pattern_type": pattern_type,
"pattern": pattern,
},
)


def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
"""
Expand Down
37 changes: 36 additions & 1 deletion src/routers/dynamic.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi.responses import HTMLResponse, Response
from fastapi.templating import Jinja2Templates

from process_query import process_query
Expand All @@ -8,6 +8,41 @@
router = APIRouter()
templates = Jinja2Templates(directory="templates")

@router.get("/extract/{full_path:path}", response_model=None)
async def extract_content(
request: Request,
full_path: str,
summary: bool = False,
) -> Response:
try:
query_params = request.query_params
max_file_size = int(query_params.get("max_file_size", 243))
pattern_type = query_params.get("pattern_type", "exclude")
pattern = query_params.get("pattern", "")

result_summary, tree, content = await process_query(
request,
input_text=f"https://github.com/{full_path}",
slider_position=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
is_index=False,
raw_response=True
)

response_parts = []
if summary:
response_parts.append(f"Summary:\n{result_summary}\n")
response_parts.append(f"Tree:\n{tree}\n")
response_parts.append(f"Content:\n{content}")

return Response(content="\n".join(response_parts), media_type="text/plain")
except Exception as e:
return Response(
content=f"Error during extraction: {str(e)}",
media_type="text/plain",
status_code=500,
)

@router.get("/{full_path:path}")
async def catch_all(request: Request, full_path: str) -> HTMLResponse:
Expand Down

0 comments on commit b724e71

Please sign in to comment.