diff --git a/README.md b/README.md index c2884a7..22b248d 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,14 @@ A python module that is capable of providing different levels of summary for the #### Installation -``` -pip install git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_explainer&egg=gh_explainer +```bash +bash build.sh ``` #### Example usage ```python -from project_explainer import Explainer +from gh_explainer import Explainer gptExplainer = Explainer("gpt2") @@ -43,12 +43,6 @@ print(gptExplainer.brief("https://github.com/c2siorg/Project-Explainer.git")) Use project explainer as UI -#### Dependencies - -``` -pip install -r project_explainer_ui/requirements.txt -``` - #### Example usage ``` @@ -64,8 +58,8 @@ A simple python module packed with utilities to process files in a project repos #### Installation -``` -pip install git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_processor&egg=gh_processor +```bash +bash build.sh ``` #### Example usage diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..2646557 --- /dev/null +++ b/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Install necessary Python packages for building the project +pip install setuptools wheel build + +# Build the project using Python's build module +python -m build + +# Install the built package from the generated distribution file +pip install ./dist/gh_explainer-0.0.0.tar.gz \ No newline at end of file diff --git a/project_explainer/gh_explainer/summarize.py b/project_explainer/gh_explainer/summarize.py index 266036d..f092455 100644 --- a/project_explainer/gh_explainer/summarize.py +++ b/project_explainer/gh_explainer/summarize.py @@ -1,6 +1,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM -from gh_processor import (download_github_repo, +from project_processor.gh_processor import (download_github_repo, extract_project_description_from_readme, extract_headings_with_paragraphs_from_markdown, remove_tables_from_markdown, @@ -8,6 +8,7 @@ remove_images_from_markdown, remove_links_from_markdown) import os +from git import rmtree from jinja2 import Template @@ -75,8 +76,8 @@ def _model_gen(self, prompt: str) -> str: Raises: TypeError: If the prompt is not a string. """ - inputs=self.tokenizer.encode(prompt, return_tensors='pt', max_length=1024, truncation=True) - output = self.model.generate(inputs, min_length=256, max_length=512) + inputs=self.tokenizer.encode(prompt, return_tensors='pt', max_length=self.tokenizer.model_max_length, truncation=True) + output = self.model.generate(inputs, min_length=256, max_length=self.tokenizer.model_max_length) return self.tokenizer.decode(output[0], skip_special_tokens=True) def brief(self, github_url: str, branch: str = "main") -> dict: @@ -101,6 +102,10 @@ def brief(self, github_url: str, branch: str = "main") -> dict: prompt = {"prompt": project_description} prepared_prompt = self._fill_template(self.brief_prompt_template, prompt) summary=self._model_gen(prepared_prompt) + + # Delete the repo + rmtree(repo_path) + return {"prompt": prompt, "prepared_prompt": prepared_prompt, "summary": str(summary)} def outline(self, github_url: str, branch: str = "main") -> dict: diff --git a/project_explainer_ui/requirements.txt b/project_explainer_ui/requirements.txt deleted file mode 100644 index 44678c9..0000000 --- a/project_explainer_ui/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -gradio --e git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_processor&egg=gh_processor --e git+https://github.com/c2siorg/Project-Explainer.git@main#subdirectory=project_explainer&egg=gh_explainer&egg=gh_explainer \ No newline at end of file diff --git a/project_explainer_ui/ui.py b/project_explainer_ui/ui.py index a836666..27ad069 100644 --- a/project_explainer_ui/ui.py +++ b/project_explainer_ui/ui.py @@ -1,5 +1,5 @@ import gradio as gr -from gh_explainer import Explainer +from project_explainer.gh_explainer import Explainer def summarize(summarization_type, github_project_url, github_project_branch="main", huggingface_model_id="gpt2"): gptExplainer = Explainer(huggingface_model_id) diff --git a/project_processor/gh_processor/github_downloader.py b/project_processor/gh_processor/github_downloader.py index 51ea2d7..1476256 100644 --- a/project_processor/gh_processor/github_downloader.py +++ b/project_processor/gh_processor/github_downloader.py @@ -26,9 +26,12 @@ def download_github_repo(repo_url: str, branch: str = "main") -> str: repo_path (str): Absolute path to downloaded repo """ repo_name = repo_url.split("/")[-1].split(".")[0] - repo_path = os.path.abspath(repo_name) + repo_path = f"./repos/{repo_name}" + + if not os.path.exists("./repos/"): + os.makedirs("./repos/") - Repo.clone_from(repo_url, repo_name, branch=branch) + Repo.clone_from(repo_url, to_path=repo_path, branch=branch) - logger.info(f"Repository '{repo_name}' downloaded successfully!") + logger.info(f"Repository '{repo_name}' downloaded successfully at {repo_path}!") return repo_path diff --git a/project_processor/pyproject.toml b/project_processor/pyproject.toml deleted file mode 100644 index 294e501..0000000 --- a/project_processor/pyproject.toml +++ /dev/null @@ -1,19 +0,0 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" - -[project] -name = "gh_processor" -description = "github repo file level processor utils" -readme = "README.md" -requires-python = ">=3.7" -license = {text = "Apache 2.0"} -classifiers = [ - "Programming Language :: Python :: 3", -] -dependencies = ["setuptools>=42", "wheel", "gitpython", "markdown2", "spacy"] - -dynamic = ["version"] - -[tool.setuptools] -py-modules = ["gh_processor"] diff --git a/project_explainer/pyproject.toml b/pyproject.toml similarity index 74% rename from project_explainer/pyproject.toml rename to pyproject.toml index 5308a5f..d8acc5e 100644 --- a/project_explainer/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,9 @@ license = {text = "Apache 2.0"} classifiers = [ "Programming Language :: Python :: 3", ] -dependencies = ["setuptools>=42", "wheel", "transformers", "jinja2", "torch"] +dependencies = ["setuptools>=42", "wheel", "transformers", "jinja2", "torch", "gitpython", "markdown2", "spacy", "gradio"] dynamic = ["version"] [tool.setuptools] -py-modules = ["gh_explainer"] +packages = { find = { where = ["project_explainer", "project_processor"] } } diff --git a/repos/.gitkeep b/repos/.gitkeep new file mode 100644 index 0000000..e69de29