Skip to content

Commit fcf95e3

Browse files
committed
feat(cli): complete sync pipeline
1 parent 10858cd commit fcf95e3

File tree

8 files changed

+58
-57
lines changed

8 files changed

+58
-57
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ __pycache__
77
dist/
88

99
.pytest_cache
10+
.mypy_cache
1011
.ruff_cache
1112
.venv
1213
*.env

grimoire.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,10 @@ llm:
1616
code_chunk_overlap: 128
1717
include_project: true
1818
project_src: grimoire
19-
sources: null
19+
sources:
20+
- url: https://github.com/fastapi/typer
21+
include_md: true
22+
include_code: false
23+
- url: https://github.com/pydantic/pydantic
24+
include_md: true
25+
include_code: false

grimoire/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class Source(BaseModel):
8585
"""
8686

8787
url: str
88-
branch: str = "default"
88+
branch: str | None = None
8989
include_md: bool = True
9090
include_code: bool = False
9191

grimoire/init.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
DBConfiguration,
88
LLMConfiguration,
99
ProjectConfiguration,
10+
Source,
1011
)
1112
from grimoire.helpers.typer import green_text
1213

@@ -71,12 +72,20 @@ def init(
7172
raise typer.Exit(code=0)
7273

7374
config: ProjectConfiguration = get_project_config(path)
75+
config.sources = [
76+
Source(
77+
url="https://github.com/BDP25/grimoire",
78+
branch="main",
79+
include_md=True,
80+
include_code=False,
81+
)
82+
]
7483
config.save_to_yaml(file_path)
7584

7685
success_message = f"""
7786
{green_text("Grimoire project initialized successfully! 🎉")}
7887
79-
Run `grimoire update` to update all your projects dependencies.
88+
Add your sources to the project by modifying the {green_text("sources")} field in the configuration file.
8089
8190
Configuration: {file_path}
8291
Note: directly modify the "{CONFIG_FILE_NAME}" configuration to your needs.

grimoire/main.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from grimoire.flush import flush_cli
66
from grimoire.init import init_cli
77
from grimoire.sync import sync_cli
8-
from grimoire.update import update_cli
98
from grimoire.verify import verify_cli
109

1110
CLI_NAME = "grimoire"
@@ -27,7 +26,6 @@
2726
cli.add_typer(flush_cli, name=None)
2827
cli.add_typer(init_cli, name=None)
2928
cli.add_typer(sync_cli, name=None)
30-
cli.add_typer(update_cli, name=None)
3129
cli.add_typer(verify_cli, name=None)
3230

3331

grimoire/sync.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import psycopg
66
import typer
77
from git import Repo
8+
from langchain_core.documents import Document
89
from langchain_postgres import PGVector
910
from rich.progress import track
1011

@@ -14,6 +15,7 @@
1415
get_recursive_config,
1516
)
1617
from grimoire.helpers.ingestion import code_ingestion, text_ingestion
18+
from grimoire.helpers.typer import green_text
1719
from grimoire.helpers.vectorstore import (
1820
clear_collection,
1921
setup_vectorstore,
@@ -24,6 +26,25 @@
2426
sync_cli = typer.Typer()
2527

2628

29+
def ingest_documents_to_vectorstore(
30+
text_splits: list[Document],
31+
code_splits: list[Document],
32+
vectorstore: PGVector,
33+
) -> None:
34+
"""
35+
Ingests documents into the vectorstore.
36+
37+
:param text_splits: List of text Document objects.
38+
:param code_splits: List of code Document objects.
39+
:param vectorstore: The vectorstore instance to ingest documents into.
40+
"""
41+
42+
if text_splits:
43+
vectorstore.add_documents(text_splits, metadata={"source": "text"})
44+
if code_splits:
45+
vectorstore.add_documents(code_splits, metadata={"source": "code"})
46+
47+
2748
@sync_cli.command("sync", help="Sync the grimoire project with existing configuration")
2849
def sync(
2950
path: Path | None = typer.Option( # noqa: B008
@@ -44,49 +65,49 @@ def sync(
4465
):
4566
raise typer.Abort()
4667

47-
typer.echo("Syncing grimoire project")
68+
typer.echo("Syncing grimoire project:")
4869
config = ProjectConfiguration.load_from_yaml(path / CONFIG_FILE_NAME)
4970

5071
if not config.sources:
5172
typer.echo("No sources found in configuration file")
5273
raise typer.Abort()
5374

5475
try:
76+
typer.echo("- Flushing vectorstore")
5577
connection = vectorstore_connection(config.db)
5678
clear_collection(config.llm.collection, connection)
5779
except psycopg.OperationalError as e:
5880
typer.echo("Error connecting to the database")
5981
raise e
6082

61-
text_splits = []
62-
code_splits = []
83+
vectorstore = setup_vectorstore(config.llm.collection, connection)
84+
vectorstore = cast(PGVector, vectorstore) # hack to avoid mypy error
6385

6486
if config.include_project:
65-
text_splits += text_ingestion(path, config.llm, exclude=DEFAULT_EXCLUDE)
66-
code_splits += code_ingestion(
87+
typer.echo("- Ingesting project source code")
88+
text_splits = text_ingestion(path, config.llm, exclude=DEFAULT_EXCLUDE)
89+
code_splits = code_ingestion(
6790
path, config.llm, glob=f"{config.project_src}/**/*"
6891
)
92+
ingest_documents_to_vectorstore(text_splits, code_splits, vectorstore)
6993

70-
for repo in track(config.sources, description="Processing sources"):
94+
typer.echo("- Ingesting sources")
95+
for repo in track(config.sources, description="Processing sources..."):
7196
if not repo.include_md and not repo.include_code:
7297
continue
7398

7499
with TemporaryDirectory() as temp_dir:
75100
temp_path = Path(temp_dir)
76-
Repo.clone_from(repo.url, to_path=temp_path)
101+
source = Repo.clone_from(repo.url, to_path=temp_path)
102+
if repo.branch:
103+
source.git.checkout(repo.branch)
77104

78105
if repo.include_md:
79-
text_splits += text_ingestion(temp_path, config.llm)
106+
text_splits = text_ingestion(temp_path, config.llm)
80107

81108
if repo.include_code:
82-
code_splits += code_ingestion(temp_path, config.llm)
83-
84-
if text_splits or code_splits:
85-
vectorstore = setup_vectorstore(config.llm.collection, connection)
86-
vectorstore = cast(PGVector, vectorstore) # hack to avoid mypy error
109+
code_splits = code_ingestion(temp_path, config.llm)
87110

88-
if text_splits:
89-
vectorstore.add_documents(text_splits, metadata={"source": "text"})
111+
ingest_documents_to_vectorstore(text_splits, code_splits, vectorstore)
90112

91-
if code_splits:
92-
vectorstore.add_documents(code_splits, metadata={"source": "code"})
113+
typer.echo(green_text("Sync completed! Happy wizarding! ✨"))

grimoire/update.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

tests/test_main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_help_flag() -> None:
1414

1515
def test_includes_commands() -> None:
1616
result = runner.invoke(cli, ["--help"])
17-
commands = ["ask", "flush", "init", "sync", "update", "verify", "version"]
17+
commands = ["ask", "flush", "init", "sync", "verify", "version"]
1818
for command in commands:
1919
assert command in result.stdout
2020

0 commit comments

Comments
 (0)