diff --git a/docs/gen_docstrings.py b/docs/gen_docstrings.py new file mode 100644 index 000000000..84c28fd31 --- /dev/null +++ b/docs/gen_docstrings.py @@ -0,0 +1,33 @@ +import sys +import os +import argparse + +sys.path.append('.') +sys.path.append('./docs/scripts') +from lazynote.manager.custom import CustomManager +import lazyllm +from lazyllm import OnlineChatModule + +parser = argparse.ArgumentParser() +parser.add_argument('--replace', action='store_true', help='Execute the replace part of the code.') +parser.add_argument('--clean', action='store_true', help='clean code docs.') +args = parser.parse_args() + +skip_list = [ + 'lazyllm.components.deploy.relay.server', + 'lazyllm.components.deploy.relay.base', + 'lazyllm.components.finetune.easyllm', + 'lazyllm.tools.rag.component.bm25_retriever', + 'lazyllm.cli' +] + +if args.replace or args.clean: + manager = CustomManager(pattern='clear', skip_on_error=True) + manager.traverse(lazyllm, skip_modules=skip_list) + +if not args.clean: + language = os.getenv('LAZYLLM_LANGUAGE', 'ENGLISH') + language = 'en' if language == 'ENGLISH' else 'zh' + manager = CustomManager(llm=OnlineChatModule(source='deepseek', stream=False), + language=language, pattern='fill', skip_on_error=True) + manager.traverse(lazyllm, skip_modules=skip_list) \ No newline at end of file diff --git a/docs/scripts/lazynote/agent/__init__.py b/docs/scripts/lazynote/agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/docs/scripts/lazynote/agent/git_agent.py b/docs/scripts/lazynote/agent/git_agent.py new file mode 100644 index 000000000..01788b12e --- /dev/null +++ b/docs/scripts/lazynote/agent/git_agent.py @@ -0,0 +1,505 @@ +import os +import sys +from typing import List, Dict +import yaml +import json +import time +import re +import importlib +import inspect +import pkgutil +from lazyllm.module.onlineChatModule.onlineChatModule import OnlineChatModule +from lazyllm import ReactAgent +from lazyllm import fc_register, LOG +from ..manager.custom import CustomManager +from .prompt import ( + README_PROMPT, + GITIGNORE_PROMPT, + LICENSE_PROMPT, + generate_mkdocs_config, + MKDOCS_PROMPT, + TRANSLATE_PROMPT, + PLUGIN_CONFIG, +) + + +class GitAgent: + def __init__(self, project_path: str, llm: OnlineChatModule, language: str = "zh"): + self.project_path = os.path.abspath(project_path) + if not os.path.exists(self.project_path): + raise ValueError(f"Project path does not exist: {self.project_path}") + + self.supported_languages = {"zh", "en", "bilingual"} + if language not in self.supported_languages: + raise ValueError(f"Unsupported language: {language}. " + f"Please choose from {self.supported_languages}.") + self.language = language + + self.docstring_manager = CustomManager( + llm=llm, pattern="fill", skip_on_error=True, language=language + ) + self.module_dict = {} + self.module_doc_dict = {} + if self.project_path not in sys.path: + sys.path.append(self.project_path) + self._gen_module_dict() + self.llm = llm + self.tool_registered = False + + def standardize_project(self, gen_docstrings: bool = True, gen_mkdocs: bool = True) -> None: + """ + Standardize the project as a Git project + """ + self._generate_requirements() + self._generate_gitignore() + if gen_docstrings: + self._generate_docstring() + self._update_module_dict() + self._generate_readme() + if gen_mkdocs: + self._generate_mkdocs() + LOG.info("✨Project standardization completed") + + def _generate_docstring(self) -> None: + """Generate documentation""" + LOG.info("😊 Automatically generating doctring...") + for _, module in self.module_dict.items(): + if "children" in module: + self.docstring_manager.traverse(module["obj"]) + else: + self.docstring_manager.modify_docstring(module["obj"]) + LOG.info("✅ Doctring generation completed...") + + def _register_tools(self): + if self.tool_registered: + return + + @fc_register("tool") + def get_module_doc(module_name: str) -> str: + """ + Get module's docstring by module name. + Args: + module_name (str): Complete module name, from top-level to bottom-level, + separated by dots (.), e.g. "a.b.c". + Returns: + str: Module's docstring + """ + LOG.info(f"module_name: {module_name}") + if module_name not in self.module_doc_dict: + return f"Module {module_name} does not exist" + return self.module_doc_dict[module_name][:2000] + + @fc_register("tool") + def write_doc(path: str, content: str) -> str: + """ + Write given content to file at specified path. + Args: + path (str): Target file's relative path (based on project root). + content (str): Text content to write. + Returns: + str: Returns 'success' if successful, error message if failed. + """ + LOG.info(f"write_doc: {path}") + try: + path = os.path.join(self.project_path, path.strip("/")) + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(content) + LOG.info(f"write_doc {path} success") + return "success" + except Exception as e: + LOG.info(f"write_doc {path} error {e}") + return f"Error writing file: {str(e)}" + + self.tool_registered = True + + def _generate_mkdocs(self): + LOG.info("😊 Generating mkdocs...") + self._register_tools() + agent = ReactAgent(llm=self.llm, tools=["get_module_doc", "write_doc"], max_retries=20) + project_structure = self._generate_project_tree( + module_list=self.module_doc_dict.keys() + ) + language = "en" if self.language == "en" else "zh" + query = MKDOCS_PROMPT.format( + project_structure=project_structure, + mkdocs_config=generate_mkdocs_config( + site_name=os.path.basename(self.project_path), + docs_dir=f"docs/{language}", + ), + language=language, + language_type="英文" if self.language == "en" else "中文", + docs_dir="zh", + ) + LOG.info(agent(query)) + try: + if self.language == "bilingual": + docs_dir_zh = os.path.join(self.project_path, "docs", "zh") + docs_dir_en = os.path.join(self.project_path, "docs", "en") + self._translate_docs(docs_dir_zh, docs_dir_en) + if not os.path.exists(os.path.join(self.project_path, "mkdocs.yml")): + return + with open(os.path.join(self.project_path, "mkdocs.yml"), "r", encoding="utf-8") as file: + config = yaml.safe_load(file) + config["docs_dir"] = "docs" + config["plugins"] = PLUGIN_CONFIG + with open( + os.path.join(self.project_path, "mkdocs.yml"), "w", encoding="utf-8" + ) as file: + yaml.dump(config, file, allow_unicode=True, sort_keys=False) + except Exception as e: + LOG.info(f" ❗ (Error during generating mkdocs {e}") + LOG.info("✅ mkdocs generation completed...") + + def start_mkdocs_server(self, port=8333) -> None: + docs_dir_base = os.path.join(self.project_path, "docs") + if not os.path.exists(docs_dir_base) or not os.path.exists(os.path.join(self.project_path, "mkdocs.yml")): + raise ValueError("Documentation directory or mkdocs.yml file does not exist, \ + please generate automatically or manually first.") + + current_dir = os.getcwd() + try: + import subprocess + import atexit + + os.chdir(self.project_path) + LOG.info("✅ Documentation generation completed, starting mkdocs service") + mkdocs_process = subprocess.Popen( + [ + "mkdocs", + "serve", + "-f", + os.path.join(docs_dir_base, "mkdocs.yml"), + "-a", + f"0.0.0.0:{port}", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + def cleanup(): + mkdocs_process.terminate() + mkdocs_process.wait() + + atexit.register(cleanup) + LOG.info(f"mkdocs 服务器已启动,请访问 http://localhost:{port}") + time.sleep(600) + except Exception as e: + LOG.info(f"启动 mkdocs 服务器时出错: {str(e)}") + finally: + os.chdir(current_dir) + + def _generate_project_tree(self, module_list: list) -> str: + tree_dict = {} + for module_path in module_list: + parts = module_path.split(".") + current = tree_dict + for i, part in enumerate(parts): + if part not in current: + current[part] = {} + current = current[part] + + def process_tree_dict(tree_dict, level=0): + tree = [] + indent = " " * level + for name, children in sorted(tree_dict.items()): + if children: + tree.append(f"{indent}- {name}/") + tree.extend(process_tree_dict(children, level + 1)) + else: + tree.append(f"{indent}- {name}") + return tree + + tree = process_tree_dict(tree_dict) + return "\n".join(tree) + + def _generate_readme(self) -> None: + """Generate README.md file""" + LOG.info("😊 Generating README.md...") + readme_path = os.path.join(self.project_path, "README.md") + if os.path.exists(readme_path): + LOG.info("✅ README.md already exists, skipping generation") + return + + project_structure = self._generate_project_tree(self.module_doc_dict.keys()) + self._register_tools() + prompt = README_PROMPT.format( + project_structure=json.dumps( + project_structure, indent=2, ensure_ascii=False + ), + language="中文" if self.language == "zh" else "英文", + ) + agent = ReactAgent(llm=self.llm, tools=["get_module_doc"], max_retries=20) + readme_content = agent(prompt) + readme_content = re.sub( + r"^.*?\s*", + "", + readme_content, + flags=re.MULTILINE | re.DOTALL, + ) + + with open(readme_path, "w", encoding="utf-8") as f: + f.write(readme_content) + LOG.info("✅ README.md generation completed") + + def _generate_gitignore(self) -> None: + """Generate .gitignore file""" + LOG.info("😊 Generating .gitignore...") + gitignore_path = os.path.join(self.project_path, ".gitignore") + if os.path.exists(gitignore_path): + LOG.info("✅ .gitignore already exists, skipping generation") + return + + project_info = self._analyze_project_type() + + prompt = GITIGNORE_PROMPT.format( + project_info=json.dumps(project_info, indent=2, ensure_ascii=False) + ) + gitignore_content = self.llm(prompt) + + with open(gitignore_path, "w", encoding="utf-8") as f: + f.write(gitignore_content) + LOG.info("✅ .gitignore generation completed") + + def _generate_module_doc_dict(self, module_dict): + """Generate module documentation dictionary, organize module documentation information in markdown format""" + + def add_doc(name: str, subname: str, doc: str, level: str = "#"): + if name not in self.module_doc_dict: + self.module_doc_dict[name] = "" + self.module_doc_dict[name] += f"{level} {subname}:\n{doc or ''}\n\n" + + for name, info in module_dict.items(): + if doc := info.get("doc"): + add_doc(name, name, doc) + if "module" in info: + for module_name, module_info in info["module"].items(): + add_doc(module_name, module_name, module_info.get("doc", "")) + for obj_name, obj_info in module_info.items(): + obj_name = obj_name.split(".")[-1] + if isinstance(obj_info, dict): + add_doc( + module_name, obj_name, obj_info.get("doc", ""), "##" + ) + if "method" in obj_info: + for func_name, func_info in obj_info["method"].items(): + method_name = ( + f"{obj_name}.{func_name.split('.')[-1]}" + ) + add_doc( + module_name, + method_name, + func_info.get("doc", ""), + "###", + ) + + if "children" in info: + self._generate_module_doc_dict(info["children"]) + + def _generate_license(self) -> None: + """Generate LICENSE file""" + license_path = os.path.join(self.project_path, "LICENSE") + if os.path.exists(license_path): + LOG.info("LICENSE already exists, skipping generation") + return + + license_content = self.llm(LICENSE_PROMPT) + + with open(license_path, "w", encoding="utf-8") as f: + f.write(license_content) + + def _generate_requirements(self) -> None: + """Generate requirements.txt file""" + req_path = os.path.join(self.project_path, "requirements.txt") + if os.path.exists(req_path): + LOG.info("✅ requirements.txt already exists, skipping generation") + return + LOG.info("😊 Generating requirements.txt...") + dependencies = self._analyze_dependencies() + project_modules = self.module_dict.keys() + for dep in dependencies: + if any(dep.startswith(mod) for mod in project_modules): + dependencies.remove(dep) + with open(req_path, "w", encoding="utf-8") as f: + f.write("\n".join(dependencies)) + LOG.info("✅ requirements.txt generation completed") + + def _update_module_dict(self): + """ + Update docstrings for all modules in the project path + """ + project_modules = self.module_dict.keys() + modules_to_del = [] + for name in sys.modules: + if any(name.startswith(mod) for mod in project_modules): + modules_to_del.append(name) + for module in modules_to_del: + del sys.modules[module] + self._gen_module_dict() + + def _gen_module_dict(self): + """ + Read all module information under the project path + """ + LOG.info("😊 Analyzing project structure...") + processed_packages = set() + for root, dirs, files in os.walk(self.project_path): + dirs[:] = [d for d in dirs + if not d.startswith(".") and d not in ["__pycache__", "tests", "docs"]] + + if "__init__.py" in files: + rel_path = os.path.relpath(root, self.project_path) + module_name = rel_path.replace(os.sep, ".") + try: + module = importlib.import_module(module_name) + skip_modules = ["docs", "test", "tests"] + if ".".join(module_name.split(".")[:-1]) in processed_packages: + continue + self.module_dict |= self._process_package(module, skip_modules) + processed_packages.add(module_name) + except Exception as e: + LOG.info(f"Processing module {module_name} error: {str(e)}") + + for file in files: + if file.endswith(".py") and file not in {"__init__.py", "setup.py", + "conftest.py", "wsgi.py", "asgi.py"}: + rel_dir = os.path.relpath(root, self.project_path) + package_name = rel_dir.replace(os.sep, ".") + if package_name in processed_packages: + continue + module_path = os.path.join(root, file) + module_name = os.path.splitext(file)[0] + try: + spec = importlib.util.spec_from_file_location(module_name, module_path) + module = importlib.util.module_from_spec(spec) + self.module_dict |= self._process_module(module) + + except Exception as e: + LOG.info(f"Error processing module {module_name}: {str(e)}") + self._generate_module_doc_dict(self.module_dict) + LOG.info("✅ Project structure analysis completed...") + + def _process_module(self, module, f_module_name: str = "") -> Dict: + def _get_abs_name(obj_name): + return f"{f_module_name}.{obj_name}".lstrip('.') + m_dict = {"obj": module} + for name, obj in inspect.getmembers(module, inspect.isclass): + if not getattr(obj, "__module__", "").startswith(module.__name__): + continue + c_dict = {"doc": obj.__doc__, "obj": obj, "method": {}} + for method_name, method_obj in inspect.getmembers(obj, inspect.isfunction): + c_dict["method"][_get_abs_name(f"{name}.{method_name}")] = {"doc": method_obj.__doc__, + "obj": method_obj} + m_dict[_get_abs_name(name)] = c_dict + for name, obj in inspect.getmembers(module, inspect.isfunction): + if not getattr(obj, "__module__", "").startswith(module.__name__): + continue + m_dict[f"{f_module_name}.{name}"] = {"doc": obj.__doc__, "obj": obj} + if not m_dict: + return {} + return {module.__name__: m_dict} + + def _process_package(self, module, skip_modules) -> Dict: + m_dict = {"obj": module, "children": {}, "module": {}} + processed_module = set() + for importer, modname, ispkg in pkgutil.walk_packages(module.__path__, module.__name__ + "."): + if any(modname.startswith(skip_mod) for skip_mod in skip_modules): + continue + if ispkg: + m_dict["children"] |= self._process_package(importer.find_module(modname).load_module(modname), + skip_modules) + processed_module.add(modname) + continue + try: + submodule = importlib.import_module(modname) + if any(modname.startswith(mod) for mod in processed_module): + continue + m_dict["module"] |= self._process_module(submodule, f_module_name=modname) + except Exception as e: + LOG.info(f"Skipping {modname} due to import error", e) + if not m_dict["children"] and not m_dict["module"]: + return {} + return {module.__name__: m_dict} + + def _analyze_project_structure(self) -> Dict: + def pro_dict(d): + for name, module in d.items(): + if "obj" in module: + del module["obj"] + if "children" in module: + pro_dict(module["children"]) + if "method" in module: + pro_dict(module["method"]) + + pro_dict(self.module_dict) + return self.module_dict + + def _analyze_dependencies(self) -> List[str]: + dependencies = set() + for root, _, files in os.walk(self.project_path): + for file in files: + if file.endswith(".py"): + with open(os.path.join(root, file), "r", encoding="utf-8") as f: + content = f.read() + for line in content.split("\n"): + if line.startswith(("import ", "from ")): + module = line.split()[1].split(".")[0] + if not self._is_standard_library(module): + dependencies.add(module) + return list(dependencies) + + def _analyze_project_type(self) -> Dict: + file_extensions = set() + for root, _, files in os.walk(self.project_path): + for file in files: + ext = os.path.splitext(file)[1] + if ext: + file_extensions.add(ext[1:]) # 移除点号 + + return { + "languages": list(file_extensions), + "has_setup_py": os.path.exists(os.path.join(self.project_path, "setup.py")), + "has_requirements": os.path.exists( + os.path.join(self.project_path, "requirements.txt") + ), + "has_tests": any( + d.startswith("test") for d in os.listdir(self.project_path) + ), + } + + @staticmethod + def _is_standard_library(module_name: str) -> bool: + return module_name in sys.stdlib_module_names + + def _translate_docs(self, docs_dir_zh: str, docs_dir_en: str) -> None: + """ + Translate Chinese documents into English documents, keeping the original directory structure. + """ + os.makedirs(docs_dir_en, exist_ok=True) + + for root, dirs, files in os.walk(docs_dir_zh): + rel_path = os.path.relpath(root, docs_dir_zh) + en_dir = os.path.join(docs_dir_en, rel_path) + os.makedirs(en_dir, exist_ok=True) + + for file in files: + if not file.endswith(".md"): + continue + + zh_file_path = os.path.join(root, file) + en_file_path = os.path.join(en_dir, file) + + with open(zh_file_path, "r", encoding="utf-8") as f: + zh_content = f.read() + + query = TRANSLATE_PROMPT.format(zh_content=zh_content) + en_content = self.llm(query, enable_thinking=False) + + with open(en_file_path, "w", encoding="utf-8") as f: + f.write(en_content) + + LOG.info( + f"Translated doc file: {os.path.relpath(zh_file_path, self.project_path)} -> \ + {os.path.relpath(en_file_path, self.project_path)}" + ) diff --git a/docs/scripts/lazynote/agent/prompt.py b/docs/scripts/lazynote/agent/prompt.py new file mode 100644 index 000000000..7e31d8772 --- /dev/null +++ b/docs/scripts/lazynote/agent/prompt.py @@ -0,0 +1,190 @@ +""" +存放 GitAgent 使用的各种 prompt +""" + + +README_PROMPT = """请根据以下项目结构生成一个标准的 README.md 文件: + +项目结构: +{project_structure} + +请尽可能涵盖以下部分(根据项目特点酌情取舍): +1. 项目名称和简介:简要说明项目的主要功能和应用场景。 +2. 功能特性:列出项目的核心功能和亮点。 +3. 安装说明:提供安装步骤和依赖要求。 +4. 使用方法:描述如何运行或使用项目,包含示例代码或命令。 +5. 项目结构:展示项目的目录结构和文件组织方式。 + +注意: +1. 请使用 **{language}** 生成内容,并合理组织,确保简明清晰且实用。 +2. 可调用工具获取模块的文档字符串,辅助生成内容。 +3. 直接输出可写入 README 文件的内容,不包含任何其他说明。 +""" + + +GITIGNORE_PROMPT = """请为以下类型的项目生成一个标准的 .gitignore 文件: + +项目信息: +{project_info} + +请包含: +1. 语言特定的忽略规则 +2. IDE 配置文件 +3. 操作系统生成的文件 +4. 构建输出和缓存文件 +5. 环境文件 + +注意: +1. 直接输出可以粘贴到.gitignore 文件中的内容, 不要包含任何其他说明 +""" + +LICENSE_PROMPT = """请为这个开源项目推荐一个合适的开源许可证,并提供完整的许可证文本。 +建议考虑: +1. MIT License +2. Apache License 2.0 +3. GNU GPL v3 +4. BSD License +""" + + +MKDOCS_PROMPT = """你是一个专业的 Python 文档生成助手。请根据项目的结构信息按照以下要求为项目自动生成文档: + +1. **生成模块 API 文档:** + - 通过模块名调用相关工具获取模块文档,并生成每个模块的独立 Markdown 文件(`.md`),可按需将多个类合并至同一文件。 + - 文档应包含以下部分(根据模块内容灵活调整): + - 模块名称与简介 + - 类和方法列表 + - 每个类和方法的详细说明(包括参数和返回值) + - 示例代码(如有) + - 使用工具调用,将生成的文档保存到 `docs/{language}/API` 目录,合理组织目录结构。 + +2. **生成最佳实践文档:** + - 根据你对项目的理解,生成项目的最佳实践文档(可生成多个),文档文件格式为 Markdown(`.md`): + - 使用工具调用,将生成的文档保存到 `docs/{language}/BestPractice` 目录,并合理组织目录结构。 + +3. **生成主页文档:** + - 生成项目文档的首页文件,‘index.md’: + - 项目名称和简介 + - 文档结构概览 + - 快速开始指引(如有) + - 使用工具调用,将生成的文档保存到 `docs/{language}` 目录。 + +4. **创建 MkDocs 配置文件:** + - 按照给定模板生成一个标准的 `mkdocs.yml` 文件: + - 文档导航设置,需包含已经生成的全部的 API 文档和最佳实践文档。 + - 导航标签一律设置为英文。 + - 导航目录为相对于 `docs/{language}` 的路径。 + - 使用工具调用,将`mkdocs.yml` 配置文件保存到项目的根目录下。 + +**注意:** +- 使用 {language_type} 生成内容。 +- 生成的文档和配置文件应符合 MkDocs 规范,确保能通过 `mkdocs serve` 正常预览和构建。 +- 由于工具调用获取模块文档可能较长,建议自行逐个模块处理,避免超出 token 限制。 + +**项目结构信息:** +{project_structure} +MkDocs 配置文件模板: +{mkdocs_config} +""" + + +TRANSLATE_PROMPT = """请将以下Markdown文档从中文翻译成英文,保持所有Markdown格式和代码块不变,只翻译文本内容: + +{zh_content} + +要求: +1. 保持所有Markdown语法格式 +2. 保持所有代码块内容不变 +3. 只翻译中文文本内容, 且不翻译模块名、注释和代码块 +4. 保持原有的文档结构 +5. 使用专业的技术文档语言风格 +""" + + +def generate_mkdocs_config(site_name, docs_dir): + mkdocs_config = { + 'site_name': site_name, + 'site_description': f'API documentation for {site_name}', + 'docs_dir': docs_dir, + + 'theme': { + 'name': 'material', + 'palette': { + 'primary': 'indigo', + 'accent': 'pink' + }, + 'font': { + 'text': 'Roboto', + 'code': 'Roboto Mono' + } + }, + + 'nav': [ + {'Home': 'index.md'}, + {'API Reference': []} + ], + } + return mkdocs_config + + + +CUSTOM_KMDOCS_PROMPT = """你是一个专业的 Python 文档生成助手,负责根据项目信息按照要求为项目自动生成文档,你有。 + +# 工具: + - get_module_doc:根据模块名获取模块的 docstring,用于获取模块信息以便理解项目。 + - write_doc:将给定内容写入指定路径的文件,用于写生成的文档。 + +你需要按照合适的顺序使用工具,逐步完成任务。可能需要将任务分解为子任务,依次调用工具获取模块文档、生成文档文件并进行组织。 + +# 输出格式 +请使用与问题相同的语言回答,并遵循以下格式: + +想法:用户当前的语言是:(用户的语言)。我需要使用工具来帮助回答问题。 + +{TOKENIZED_PROMPT} + +答案:在此处填写你的答案(使用与用户问题相同的语言)。 + +当前对话:以下是当前对话,由人工和助手消息交织而成。请逐步思考。 + +文档生成要求 +模块 API 文档: + +通过调用 get_module_doc 获取模块 docstring。 + +包含模块名称、简介、类和方法列表、详细说明和示例代码。 + +使用 write_doc 将生成的文档保存至 docs/{language}/API 目录,合理组织目录结构。 + +最佳实践文档: + +基于项目特性生成最佳实践文档,使用 write_doc 保存至 docs/{language}/BestPractice 目录。 + +首页文档: + +包含项目名称、简介、文档结构和快速开始指引,使用 write_doc 保存至 docs/{language}/index.md。 + +MkDocs 配置文件: + +生成标准的 mkdocs.yml 配置文件,包含 API 文档和最佳实践文档的导航,使用 write_doc 保存至 docs 目录。 + """ + +PLUGIN_CONFIG= [ + {'i18n': + {'docs_structure': 'folder','languages': + [ + { + 'locale': 'en', + 'default': True, + 'name': 'English', + 'build': True + }, + { + 'locale': 'zh', + 'name': '简体中文', + 'build': True + } + ] + } + } + ] \ No newline at end of file diff --git a/docs/scripts/lazynote/editor/__init__.py b/docs/scripts/lazynote/editor/__init__.py index 32e28bd21..17cb99e79 100644 --- a/docs/scripts/lazynote/editor/__init__.py +++ b/docs/scripts/lazynote/editor/__init__.py @@ -1,5 +1,7 @@ from lazynote.editor.base import BaseEditor +from lazynote.editor.custom import CustomEditor __all__ = [ 'BaseEditor', + 'CustomEditor', ] diff --git a/docs/scripts/lazynote/editor/base.py b/docs/scripts/lazynote/editor/base.py index 08df82d15..2f25f6b7c 100644 --- a/docs/scripts/lazynote/editor/base.py +++ b/docs/scripts/lazynote/editor/base.py @@ -118,7 +118,7 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c Returns: cst.Module: The updated Module node with a new docstring. """ - return self._update_node_with_new_docstring(original_node, updated_node, self.module.__doc__) + return updated_node def _get_obj_by_name(self, name: str) -> Optional[Any]: """ diff --git a/docs/scripts/lazynote/editor/custom.py b/docs/scripts/lazynote/editor/custom.py new file mode 100644 index 000000000..81ca2d6a8 --- /dev/null +++ b/docs/scripts/lazynote/editor/custom.py @@ -0,0 +1,46 @@ +from typing import Callable, Optional, Any, Dict, Set +from lazyllm import LOG +import libcst as cst +import libcst.matchers as m +from .base import BaseEditor + + +class CustomEditor(BaseEditor): + """ + A custom tool for transforming code text and generating new code text. + """ + def __init__(self, gen_class_docstring: Callable[[Optional[str], str], str], gen_docstring: Callable[[Optional[str], str], str], pattern: str, module: Any) -> None: + super().__init__(gen_docstring, pattern, module) + self.current_class_doc_dict: Dict[str, str] = {} + self.gen_class_docstring = gen_class_docstring + + def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef: + func_name = f"{self.current_class}.{original_node.name.value}".lstrip('.') + if func_name in self.current_class_doc_dict: + docstring = self.current_class_doc_dict[func_name] + else: + obj = self._get_obj_by_name(func_name) + docstring = obj.__doc__ if obj else None + return self._update_node_with_new_docstring(original_node, updated_node, docstring) + + def visit_ClassDef(self, node: cst.ClassDef) -> None: + if f"{self.current_class}.{node.name.value}" in self.current_class_doc_dict: + self.current_class = f'{self.current_class}.{node.name.value}'.lstrip('.') + return + + obj = self._get_obj_by_name(node.name.value) + docstring = obj.__doc__ if obj else None + node_code = cst.Module([]).code_for_node(node) + res = self.gen_class_docstring(docstring, node_code) + self.current_class_doc_dict = {} + for name, docstring in res.items(): + self.current_class_doc_dict[f"{self.current_class}.{name}".lstrip('.')] = docstring + self.current_class = f'{self.current_class}.{node.name.value}'.lstrip('.') + + def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.ClassDef: + new_docstring = self.current_class_doc_dict.get(self.current_class, None) + updated_node = self._update_node_with_new_docstring(original_node, updated_node, new_docstring) + self.current_class = self.current_class[:(lambda x: 0 if x < 0 else x)(self.current_class.rfind('.'))] + return updated_node + + \ No newline at end of file diff --git a/docs/scripts/lazynote/manager/custom.py b/docs/scripts/lazynote/manager/custom.py index a04584837..ed7c3e28f 100644 --- a/docs/scripts/lazynote/manager/custom.py +++ b/docs/scripts/lazynote/manager/custom.py @@ -1,20 +1,192 @@ +import libcst as cst +import inspect +import textwrap +import json from typing import Optional -from lazynote.manager.base import BaseManager +from lazyllm import LOG, OnlineChatModule +from .base import BaseManager, DocstringMode +from lazynote.editor import CustomEditor + + +GENERAL_PROMPT = """你是一个专业代码文档生成器,请根据给定代码为指定目标对象生成文档字符串,严格遵循以下规则: + +要求: +1. 使用{language}生成文档字符串。 +2. 只为目标对象生成文档字符串,且应符合 Google 风格。 +3. 若目标对象为类,仅生成单行注释描述类的核心能力。 +4. 仅以字符串形式输出完整的注释内容,不包含原代吗,不包含三引号。 + +示例输出(函数): +Calculates the sum of two numbers. + +Args: + a (int): First number to add + b (int): Second number to add + +Returns: + int: Sum of the two numbers + +示例输出(类): +Calculates the sum of two numbers. + +现在请为以下代码生成注释: +目标对象:{object_name} +代码内容: +{node_code} +""" + + +CLASS_PROMPT = """你是一个专业代码文档生成器,请为给定代码生成文档字符串,严格遵循以下规则: + +要求: +1. 使用{language}生成文档字符串。 +2. 使用Google风格的文档字符串,语言简洁明了,准确描述代码功能。 +3. 将生成的文档字符串组织为JSON字典输出,遵循给定输出格式,保持文档字符串的正确缩进和格式, 不包含三引号。 +3. 仅输出JSON字符串,勿输出任何额外内容。 + +代码内容: +{node_code} + +输出格式: +{obj_dict} + +""" + class CustomManager(BaseManager): - def gen_docstring(self, old_docstring: Optional[str], pattern: str, node_code: str) -> str: - """ - Custom logic to generate a new docstring. - - Args: - old_docstring (Optional[str]): The old docstring. - pattern (str): The pattern string to be added. - node_code (str): The node code. - - Returns: - str: The new docstring. - """ + """ + 使用大语言模型生成代码注释的管理器。 + """ + + llm: OnlineChatModule = None + language: str = "zh" + + def __init__(self, llm, language, **kwargs): + super().__init__(**kwargs) + self.pattern = DocstringMode.FILL + self.llm = llm + self.language = language + + def modify_docstring(self, module: object) -> Optional[str]: + + try: + source_code = inspect.getsource(module) + source_code = textwrap.dedent(source_code) + tree = cst.parse_module(source_code) + transformer = CustomEditor( + gen_docstring=self.gen_docstring, + gen_class_docstring=self.gen_class_docstring, + pattern=self.pattern, + module=module, + ) + modified_tree = tree.visit(transformer) + self._write_code_to_file(module, modified_tree.code) + return modified_tree.code + except Exception as e: + self._handle_error(f"Skipping module {module.__name__} due to error", e) + return None + + def gen_class_docstring(self, old_docstring: Optional[str], node_code: str) -> str: + try: + module = cst.parse_module(node_code) + obj_dict = {} + for node in module.body: + if isinstance(node, cst.ClassDef): + class_name = node.name.value + obj_dict[class_name] = "" + + def process_class_body(class_node, parent_name): + for sub_node in class_node.body.body: + if isinstance(sub_node, cst.ClassDef): + sub_class_name = f"{parent_name}.{sub_node.name.value}" + obj_dict[sub_class_name] = "" + process_class_body(sub_node, sub_class_name) + elif isinstance(sub_node, cst.FunctionDef): + method_name = f"{parent_name}.{sub_node.name.value}" + obj_dict[method_name] = "" + + process_class_body(node, class_name) + + language = "中文" if self.language == "zh" else "英文" + prompt = CLASS_PROMPT.format( + node_code=node_code, + language=language, + obj_dict=json.dumps(obj_dict, indent=4, ensure_ascii=False), + ) + + res = self.llm(prompt) + def extract_json_from_response(response: str) -> dict: + start = response.find('{') + end = response.rfind('}') + if start == -1 or end == -1: + return response + json_str = response[start:end + 1] + return json.loads(json_str) + + try: + doc_dict = extract_json_from_response(res) + doc_dict = { + name: self._fix_docstring_indent( + docstring, indent=4 * (name.count(".") + 1) + ) + for name, docstring in doc_dict.items() + } + if old_docstring: + obj_dict[class_name] = old_docstring + return doc_dict + except json.JSONDecodeError as e: + LOG.info(f"Error in parsing class docstring: {e}") + return {} + + except Exception as e: + LOG.info(f"Error in generate class docstring: {e}") + return {} + + def gen_docstring(self, old_docstring: Optional[str], node_code: str) -> str: if old_docstring: - return f"{old_docstring}\n\n{pattern}" - else: - return f"{pattern}" + return old_docstring + + object_name = "" + lines = node_code.strip().split("\n") + for line in lines: + line = line.strip() + if line.startswith("@"): + continue + if line.startswith("def "): + object_name = line.split("def ")[1].split("(")[0].strip() + break + elif line.startswith("class "): + object_name = ( + line.split("class ")[1].split("(")[0].split(":")[0].strip() + ) + break + language = "中文" if self.language == "zh" else "英文" + prompt = GENERAL_PROMPT.format( + object_name=object_name, node_code=node_code, language=language + ) + res = self.llm(prompt) + return self._fix_docstring_indent(res) + + def _fix_docstring_indent(self, docstring, indent: int = 4): + if not docstring or not indent: + return "" + + lines = docstring.strip().split("\n") + if len(lines) <= 1: + return docstring.strip() + + def get_indent(line): + return len(line) - len(line.lstrip()) + + non_empty_lines = [line for line in lines[1:] if line.strip()] + if not non_empty_lines: + return lines[0] + min_indent = min(get_indent(line) for line in non_empty_lines) + + result = [lines[0].strip()] + for line in lines[1:]: + if line.strip(): + result.append(" " * indent + line[min_indent:]) + else: + result.append(" " * indent) + return "\n".join(result) diff --git a/docs/scripts/lazynote/parser/base.py b/docs/scripts/lazynote/parser/base.py index 89daaf06f..defc1f0f7 100644 --- a/docs/scripts/lazynote/parser/base.py +++ b/docs/scripts/lazynote/parser/base.py @@ -18,7 +18,7 @@ def parse(self, member, manager, **kwargs): parser(member, manager, **kwargs) def parse_module(self, module, manager, **kwargs): - print(f"--Module: {module.__name__}--") + print(f"--Processing Module: {module.__name__}--") manager.modify_docstring(module) def parse_class(self, cls, manager, **kwargs): diff --git a/lazyllm/cli/main.py b/lazyllm/cli/main.py index b2cf1d244..8220ac950 100644 --- a/lazyllm/cli/main.py +++ b/lazyllm/cli/main.py @@ -3,16 +3,19 @@ from install import install from deploy import deploy from run import run + from standardize import standardize except ImportError: from .install import install from .deploy import deploy from .run import run + from .standardize import standardize def main(): def exit(): print('Usage:\n lazyllm install [full|standard|package_name]\n' ' lazyllm deploy modelname\n lazyllm deploy mcp_server [args ...] [options]\n' - ' lazyllm run graph.json\n lazyllm run chatbot\n lazyllm run rag\n') + ' lazyllm run graph.json\n lazyllm run chatbot\n lazyllm run rag\n' + ' lazyllm standardize \n') sys.exit(1) if len(sys.argv) <= 1: exit() @@ -24,6 +27,8 @@ def exit(): deploy(commands) elif sys.argv[1] == 'run': run(commands) + elif sys.argv[1] == 'standardize': + standardize(commands) else: exit() diff --git a/lazyllm/cli/standardize.py b/lazyllm/cli/standardize.py new file mode 100644 index 000000000..5886d8250 --- /dev/null +++ b/lazyllm/cli/standardize.py @@ -0,0 +1,34 @@ +import os +import sys + +sys.path.append('.') +sys.path.append('./docs/scripts') + + +def standardize(commands): + if not commands or len(commands) < 3: + print("Usage: lazyllm standardize ") + sys.exit(1) + + model = commands[0] + language = commands[1] + project_path = commands[2] + if not os.path.exists(project_path): + print(f"Error: Project path does not exist: {project_path}") + sys.exit(1) + + support_model = ["qwen", "deepseek", "gpt"] + if model not in support_model: + print(f"Please select from the supported models: {support_model}") + sys.exit(1) + + try: + from lazynote.agent.git_agent import GitAgent + from lazyllm import OnlineChatModule + llm = OnlineChatModule(source=model, stream=False) + agent = GitAgent(project_path=project_path, llm=llm, language=language) + agent.standardize_project() + + except Exception as e: + print(f"Error during project standardization: {e}") + sys.exit(1)