Skip to content

Commit eca97a9

Browse files
committed
refactor(cli): create subdir for helpers
1 parent 270bce6 commit eca97a9

File tree

11 files changed

+175
-31
lines changed

11 files changed

+175
-31
lines changed

.github/workflows/test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ jobs:
1212
steps:
1313
- name: Checkout
1414
uses: actions/checkout@v4
15+
- name: Install git
16+
run: apt-get update && apt-get install -y git
1517
- name: Run pytest
1618
run: uv run poe test
1719
- name: Upload test coverage

grimoire.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ llm:
1010
text_chunk_overlap: 128
1111
code_chunk_size: 512
1212
code_chunk_overlap: 128
13-
docs:
13+
sources:
1414
- url: https://github.com/numpy/numpy
1515
- url: https://github.com/pandas-dev/pandas
16-
code:
17-
- url: https://github.com/pandas-dev/pandas

grimoire/ask.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
22

33
import typer
4-
from langchain.chat_models import init_chat_model
54
from langchain.chat_models.base import BaseChatModel
65

7-
from grimoire.helpers import blue_text, red_text
6+
from grimoire.helpers.rag import setup_llm
7+
from grimoire.helpers.typer import blue_text, red_text
88

99
ask_cli = typer.Typer()
1010

@@ -20,13 +20,7 @@ def get_llm_client() -> BaseChatModel:
2020
raise typer.Exit(code=1)
2121

2222
# TODO: add configuration options for other models and options for model temperature, etc.
23-
return init_chat_model(
24-
"google_genai:gemini-2.0-flash",
25-
api_key=os.getenv("LLM_API_KEY"),
26-
configurable_fields=None,
27-
max_tokens=512,
28-
temperature=0,
29-
)
23+
return setup_llm()
3024

3125

3226
@ask_cli.command("ask", help="Ask a question with project context")

grimoire/configuration.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,15 @@ class DBConfiguration(BaseModel):
3535
password: str
3636

3737

38-
class CodeSource(BaseModel):
39-
url: str
40-
41-
42-
class DocumentSource(BaseModel):
38+
class Source(BaseModel):
4339
url: str
4440

4541

4642
class ProjectConfiguration(BaseModel):
4743
name: str
4844
db: DBConfiguration
4945
llm: LLMConfiguration
50-
docs: list[DocumentSource] | None = None
51-
code: list[CodeSource] | None = None
46+
sources: list[Source] | None = None
5247

5348
@classmethod
5449
def load_from_yaml(cls, file_path: Path) -> "ProjectConfiguration": # noqa: B008

grimoire/helpers/__init__.py

Whitespace-only changes.

grimoire/helpers/rag.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import os
2+
3+
import torch
4+
from langchain.chat_models import init_chat_model
5+
from langchain_core.language_models.chat_models import BaseChatModel
6+
from langchain_core.vectorstores import VectorStore
7+
from langchain_huggingface import HuggingFaceEmbeddings
8+
from langchain_postgres import PGVector
9+
10+
from grimoire.configuration import DBConfiguration
11+
12+
13+
def vectorstore_connection(db: DBConfiguration) -> str:
14+
return PGVector.connection_string_from_db_params(
15+
driver="psycopg",
16+
host=db.host,
17+
port=db.port,
18+
database="postgres", # TODO: make this configurable
19+
user=db.user,
20+
password=db.password,
21+
)
22+
23+
24+
def embeddings() -> HuggingFaceEmbeddings:
25+
device = (
26+
"cuda"
27+
if torch.cuda.is_available()
28+
else "mps"
29+
if torch.backends.mps.is_available()
30+
else "cpu"
31+
)
32+
33+
# https://huggingface.co/BAAI/bge-m3?library=sentence-transformers
34+
# https://python.langchain.com/api_reference/huggingface/embeddings/langchain_huggingface.embeddings.huggingface.HuggingFaceEmbeddings.html
35+
return HuggingFaceEmbeddings(
36+
model_name="BAAI/bge-m3",
37+
model_kwargs={"device": device},
38+
encode_kwargs={"normalize_embeddings": True},
39+
)
40+
41+
42+
def setup_vectorstore(collection: str, connection: str) -> VectorStore | None:
43+
try:
44+
return PGVector(
45+
collection_name=collection,
46+
connection=connection,
47+
embeddings=embeddings(),
48+
)
49+
except Exception as e: # pylint: disable=broad-exception-caught
50+
print(f"Error: {e}")
51+
return None
52+
53+
54+
def clear_collection(collection: str, connection: str) -> None:
55+
try:
56+
PGVector(
57+
connection=connection,
58+
embeddings=embeddings(),
59+
collection_name=collection,
60+
).delete_collection()
61+
except Exception as e: # pylint: disable=broad-exception-caught
62+
print(f"Error: {e}")
63+
64+
65+
def delete_vectorstore(connection: str) -> None:
66+
try:
67+
PGVector(
68+
connection=connection,
69+
embeddings=embeddings(),
70+
).drop_tables()
71+
except Exception as e: # pylint: disable=broad-exception-caught
72+
print(f"Error: {e}")
73+
74+
75+
def setup_llm() -> BaseChatModel:
76+
return init_chat_model(
77+
"google_genai:gemini-2.0-flash",
78+
api_key=os.getenv("LLM_API_KEY"),
79+
configurable_fields=None,
80+
max_tokens=512,
81+
temperature=0,
82+
)
83+
84+
85+
def text_ingestion(
86+
collection: str, text_chunk_size: int, text_chunk_overlap: int
87+
) -> None:
88+
pass
89+
90+
91+
def code_ingestion(
92+
collection: str, code_chunk_size: int, code_chunk_overlap: int
93+
) -> None:
94+
pass
File renamed without changes.

grimoire/init.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,17 @@
44

55
from grimoire.configuration import (
66
CONFIG_FILE_NAME,
7-
CodeSource,
87
DBConfiguration,
9-
DocumentSource,
108
LLMConfiguration,
119
ProjectConfiguration,
10+
Source,
1211
)
1312

1413
init_cli = typer.Typer()
1514

16-
DUMMY_DOCS = [
17-
DocumentSource(url="https://github.com/numpy/numpy"),
18-
DocumentSource(url="https://github.com/pandas-dev/pandas"),
19-
]
20-
21-
DUMMY_CODE = [
22-
CodeSource(url="https://github.com/pandas-dev/pandas"),
15+
DUMMY_SOURCES = [
16+
Source(url="https://github.com/numpy/numpy"),
17+
Source(url="https://github.com/pandas-dev/pandas"),
2318
]
2419

2520

@@ -65,8 +60,7 @@ def get_project_config(path: Path) -> ProjectConfiguration:
6560
name=project_name,
6661
llm=ingestion_config,
6762
db=db_config,
68-
docs=DUMMY_DOCS,
69-
code=DUMMY_CODE,
63+
sources=DUMMY_SOURCES,
7064
)
7165

7266

grimoire/sync.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,36 @@
1+
from pathlib import Path
2+
from tempfile import TemporaryDirectory
3+
14
import typer
5+
from git import Repo
6+
from rich.progress import track
7+
8+
from grimoire.configuration import CONFIG_FILE_NAME, ProjectConfiguration
9+
from grimoire.helpers.rag import text_ingestion
210

311
sync_cli = typer.Typer()
412

513

614
@sync_cli.command("sync", help="Sync the grimoire project with existing configuration")
7-
def sync() -> None:
15+
def sync(
16+
path: Path = typer.Argument( # noqa: B008
17+
Path.cwd(), # noqa: B008
18+
help="Path to the grimoire project",
19+
),
20+
) -> None:
821
typer.echo("Syncing grimoire project")
22+
config = ProjectConfiguration.load_from_yaml(path / CONFIG_FILE_NAME)
23+
24+
if not config.sources:
25+
typer.echo("No sources found in configuration file")
26+
raise typer.Exit()
27+
28+
for repo in track(config.sources, description="Processing ..."):
29+
with TemporaryDirectory() as temp_dir:
30+
temp_path = Path(temp_dir)
31+
Repo.clone_from(repo.url, to_path=temp_path)
32+
text_ingestion(
33+
config.llm.collection,
34+
config.llm.text_chunk_size,
35+
config.llm.text_chunk_overlap,
36+
)

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ requires-python = "<3.13,>=3.12"
99
dependencies = [
1010
"datasets>=3.4.1",
1111
"esprima>=4.0.1",
12+
"gitpython>=3.1.44",
1213
"google-genai>=1.4.0",
1314
"grpcio==1.60.1",
1415
"langchain-postgres>=0.0.13",
1516
"langchain[community,huggingface,google-genai]>=0.3.20",
1617
"psycopg[binary]>=3.2.6",
1718
"pydantic>=2.10.6",
19+
"rich>=13.9.4",
1820
"sentence-transformers>=3.4.1",
1921
"torch>=2.6.0",
2022
"tree-sitter>=0.24.0",

0 commit comments

Comments
 (0)