diff --git a/docs/gen_docstrings.py b/docs/gen_docstrings.py
new file mode 100644
index 000000000..84c28fd31
--- /dev/null
+++ b/docs/gen_docstrings.py
@@ -0,0 +1,33 @@
+import sys
+import os
+import argparse
+
+sys.path.append('.')
+sys.path.append('./docs/scripts')
+from lazynote.manager.custom import CustomManager
+import lazyllm
+from lazyllm import OnlineChatModule
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--replace', action='store_true', help='Execute the replace part of the code.')
+parser.add_argument('--clean', action='store_true', help='clean code docs.')
+args = parser.parse_args()
+
+skip_list = [
+ 'lazyllm.components.deploy.relay.server',
+ 'lazyllm.components.deploy.relay.base',
+ 'lazyllm.components.finetune.easyllm',
+ 'lazyllm.tools.rag.component.bm25_retriever',
+ 'lazyllm.cli'
+]
+
+if args.replace or args.clean:
+ manager = CustomManager(pattern='clear', skip_on_error=True)
+ manager.traverse(lazyllm, skip_modules=skip_list)
+
+if not args.clean:
+ language = os.getenv('LAZYLLM_LANGUAGE', 'ENGLISH')
+ language = 'en' if language == 'ENGLISH' else 'zh'
+ manager = CustomManager(llm=OnlineChatModule(source='deepseek', stream=False),
+ language=language, pattern='fill', skip_on_error=True)
+ manager.traverse(lazyllm, skip_modules=skip_list)
\ No newline at end of file
diff --git a/docs/scripts/lazynote/agent/__init__.py b/docs/scripts/lazynote/agent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/docs/scripts/lazynote/agent/git_agent.py b/docs/scripts/lazynote/agent/git_agent.py
new file mode 100644
index 000000000..01788b12e
--- /dev/null
+++ b/docs/scripts/lazynote/agent/git_agent.py
@@ -0,0 +1,505 @@
+import os
+import sys
+from typing import List, Dict
+import yaml
+import json
+import time
+import re
+import importlib
+import inspect
+import pkgutil
+from lazyllm.module.onlineChatModule.onlineChatModule import OnlineChatModule
+from lazyllm import ReactAgent
+from lazyllm import fc_register, LOG
+from ..manager.custom import CustomManager
+from .prompt import (
+ README_PROMPT,
+ GITIGNORE_PROMPT,
+ LICENSE_PROMPT,
+ generate_mkdocs_config,
+ MKDOCS_PROMPT,
+ TRANSLATE_PROMPT,
+ PLUGIN_CONFIG,
+)
+
+
+class GitAgent:
+ def __init__(self, project_path: str, llm: OnlineChatModule, language: str = "zh"):
+ self.project_path = os.path.abspath(project_path)
+ if not os.path.exists(self.project_path):
+ raise ValueError(f"Project path does not exist: {self.project_path}")
+
+ self.supported_languages = {"zh", "en", "bilingual"}
+ if language not in self.supported_languages:
+ raise ValueError(f"Unsupported language: {language}. "
+ f"Please choose from {self.supported_languages}.")
+ self.language = language
+
+ self.docstring_manager = CustomManager(
+ llm=llm, pattern="fill", skip_on_error=True, language=language
+ )
+ self.module_dict = {}
+ self.module_doc_dict = {}
+ if self.project_path not in sys.path:
+ sys.path.append(self.project_path)
+ self._gen_module_dict()
+ self.llm = llm
+ self.tool_registered = False
+
+ def standardize_project(self, gen_docstrings: bool = True, gen_mkdocs: bool = True) -> None:
+ """
+ Standardize the project as a Git project
+ """
+ self._generate_requirements()
+ self._generate_gitignore()
+ if gen_docstrings:
+ self._generate_docstring()
+ self._update_module_dict()
+ self._generate_readme()
+ if gen_mkdocs:
+ self._generate_mkdocs()
+ LOG.info("✨Project standardization completed")
+
+ def _generate_docstring(self) -> None:
+ """Generate documentation"""
+ LOG.info("😊 Automatically generating doctring...")
+ for _, module in self.module_dict.items():
+ if "children" in module:
+ self.docstring_manager.traverse(module["obj"])
+ else:
+ self.docstring_manager.modify_docstring(module["obj"])
+ LOG.info("✅ Doctring generation completed...")
+
+ def _register_tools(self):
+ if self.tool_registered:
+ return
+
+ @fc_register("tool")
+ def get_module_doc(module_name: str) -> str:
+ """
+ Get module's docstring by module name.
+ Args:
+ module_name (str): Complete module name, from top-level to bottom-level,
+ separated by dots (.), e.g. "a.b.c".
+ Returns:
+ str: Module's docstring
+ """
+ LOG.info(f"module_name: {module_name}")
+ if module_name not in self.module_doc_dict:
+ return f"Module {module_name} does not exist"
+ return self.module_doc_dict[module_name][:2000]
+
+ @fc_register("tool")
+ def write_doc(path: str, content: str) -> str:
+ """
+ Write given content to file at specified path.
+ Args:
+ path (str): Target file's relative path (based on project root).
+ content (str): Text content to write.
+ Returns:
+ str: Returns 'success' if successful, error message if failed.
+ """
+ LOG.info(f"write_doc: {path}")
+ try:
+ path = os.path.join(self.project_path, path.strip("/"))
+ if not os.path.exists(os.path.dirname(path)):
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ with open(path, "w", encoding="utf-8") as f:
+ f.write(content)
+ LOG.info(f"write_doc {path} success")
+ return "success"
+ except Exception as e:
+ LOG.info(f"write_doc {path} error {e}")
+ return f"Error writing file: {str(e)}"
+
+ self.tool_registered = True
+
+ def _generate_mkdocs(self):
+ LOG.info("😊 Generating mkdocs...")
+ self._register_tools()
+ agent = ReactAgent(llm=self.llm, tools=["get_module_doc", "write_doc"], max_retries=20)
+ project_structure = self._generate_project_tree(
+ module_list=self.module_doc_dict.keys()
+ )
+ language = "en" if self.language == "en" else "zh"
+ query = MKDOCS_PROMPT.format(
+ project_structure=project_structure,
+ mkdocs_config=generate_mkdocs_config(
+ site_name=os.path.basename(self.project_path),
+ docs_dir=f"docs/{language}",
+ ),
+ language=language,
+ language_type="英文" if self.language == "en" else "中文",
+ docs_dir="zh",
+ )
+ LOG.info(agent(query))
+ try:
+ if self.language == "bilingual":
+ docs_dir_zh = os.path.join(self.project_path, "docs", "zh")
+ docs_dir_en = os.path.join(self.project_path, "docs", "en")
+ self._translate_docs(docs_dir_zh, docs_dir_en)
+ if not os.path.exists(os.path.join(self.project_path, "mkdocs.yml")):
+ return
+ with open(os.path.join(self.project_path, "mkdocs.yml"), "r", encoding="utf-8") as file:
+ config = yaml.safe_load(file)
+ config["docs_dir"] = "docs"
+ config["plugins"] = PLUGIN_CONFIG
+ with open(
+ os.path.join(self.project_path, "mkdocs.yml"), "w", encoding="utf-8"
+ ) as file:
+ yaml.dump(config, file, allow_unicode=True, sort_keys=False)
+ except Exception as e:
+ LOG.info(f" ❗ (Error during generating mkdocs {e}")
+ LOG.info("✅ mkdocs generation completed...")
+
+ def start_mkdocs_server(self, port=8333) -> None:
+ docs_dir_base = os.path.join(self.project_path, "docs")
+ if not os.path.exists(docs_dir_base) or not os.path.exists(os.path.join(self.project_path, "mkdocs.yml")):
+ raise ValueError("Documentation directory or mkdocs.yml file does not exist, \
+ please generate automatically or manually first.")
+
+ current_dir = os.getcwd()
+ try:
+ import subprocess
+ import atexit
+
+ os.chdir(self.project_path)
+ LOG.info("✅ Documentation generation completed, starting mkdocs service")
+ mkdocs_process = subprocess.Popen(
+ [
+ "mkdocs",
+ "serve",
+ "-f",
+ os.path.join(docs_dir_base, "mkdocs.yml"),
+ "-a",
+ f"0.0.0.0:{port}",
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+ def cleanup():
+ mkdocs_process.terminate()
+ mkdocs_process.wait()
+
+ atexit.register(cleanup)
+ LOG.info(f"mkdocs 服务器已启动,请访问 http://localhost:{port}")
+ time.sleep(600)
+ except Exception as e:
+ LOG.info(f"启动 mkdocs 服务器时出错: {str(e)}")
+ finally:
+ os.chdir(current_dir)
+
+ def _generate_project_tree(self, module_list: list) -> str:
+ tree_dict = {}
+ for module_path in module_list:
+ parts = module_path.split(".")
+ current = tree_dict
+ for i, part in enumerate(parts):
+ if part not in current:
+ current[part] = {}
+ current = current[part]
+
+ def process_tree_dict(tree_dict, level=0):
+ tree = []
+ indent = " " * level
+ for name, children in sorted(tree_dict.items()):
+ if children:
+ tree.append(f"{indent}- {name}/")
+ tree.extend(process_tree_dict(children, level + 1))
+ else:
+ tree.append(f"{indent}- {name}")
+ return tree
+
+ tree = process_tree_dict(tree_dict)
+ return "\n".join(tree)
+
+ def _generate_readme(self) -> None:
+ """Generate README.md file"""
+ LOG.info("😊 Generating README.md...")
+ readme_path = os.path.join(self.project_path, "README.md")
+ if os.path.exists(readme_path):
+ LOG.info("✅ README.md already exists, skipping generation")
+ return
+
+ project_structure = self._generate_project_tree(self.module_doc_dict.keys())
+ self._register_tools()
+ prompt = README_PROMPT.format(
+ project_structure=json.dumps(
+ project_structure, indent=2, ensure_ascii=False
+ ),
+ language="中文" if self.language == "zh" else "英文",
+ )
+ agent = ReactAgent(llm=self.llm, tools=["get_module_doc"], max_retries=20)
+ readme_content = agent(prompt)
+ readme_content = re.sub(
+ r"^.*?\s*",
+ "",
+ readme_content,
+ flags=re.MULTILINE | re.DOTALL,
+ )
+
+ with open(readme_path, "w", encoding="utf-8") as f:
+ f.write(readme_content)
+ LOG.info("✅ README.md generation completed")
+
+ def _generate_gitignore(self) -> None:
+ """Generate .gitignore file"""
+ LOG.info("😊 Generating .gitignore...")
+ gitignore_path = os.path.join(self.project_path, ".gitignore")
+ if os.path.exists(gitignore_path):
+ LOG.info("✅ .gitignore already exists, skipping generation")
+ return
+
+ project_info = self._analyze_project_type()
+
+ prompt = GITIGNORE_PROMPT.format(
+ project_info=json.dumps(project_info, indent=2, ensure_ascii=False)
+ )
+ gitignore_content = self.llm(prompt)
+
+ with open(gitignore_path, "w", encoding="utf-8") as f:
+ f.write(gitignore_content)
+ LOG.info("✅ .gitignore generation completed")
+
+ def _generate_module_doc_dict(self, module_dict):
+ """Generate module documentation dictionary, organize module documentation information in markdown format"""
+
+ def add_doc(name: str, subname: str, doc: str, level: str = "#"):
+ if name not in self.module_doc_dict:
+ self.module_doc_dict[name] = ""
+ self.module_doc_dict[name] += f"{level} {subname}:\n{doc or ''}\n\n"
+
+ for name, info in module_dict.items():
+ if doc := info.get("doc"):
+ add_doc(name, name, doc)
+ if "module" in info:
+ for module_name, module_info in info["module"].items():
+ add_doc(module_name, module_name, module_info.get("doc", ""))
+ for obj_name, obj_info in module_info.items():
+ obj_name = obj_name.split(".")[-1]
+ if isinstance(obj_info, dict):
+ add_doc(
+ module_name, obj_name, obj_info.get("doc", ""), "##"
+ )
+ if "method" in obj_info:
+ for func_name, func_info in obj_info["method"].items():
+ method_name = (
+ f"{obj_name}.{func_name.split('.')[-1]}"
+ )
+ add_doc(
+ module_name,
+ method_name,
+ func_info.get("doc", ""),
+ "###",
+ )
+
+ if "children" in info:
+ self._generate_module_doc_dict(info["children"])
+
+ def _generate_license(self) -> None:
+ """Generate LICENSE file"""
+ license_path = os.path.join(self.project_path, "LICENSE")
+ if os.path.exists(license_path):
+ LOG.info("LICENSE already exists, skipping generation")
+ return
+
+ license_content = self.llm(LICENSE_PROMPT)
+
+ with open(license_path, "w", encoding="utf-8") as f:
+ f.write(license_content)
+
+ def _generate_requirements(self) -> None:
+ """Generate requirements.txt file"""
+ req_path = os.path.join(self.project_path, "requirements.txt")
+ if os.path.exists(req_path):
+ LOG.info("✅ requirements.txt already exists, skipping generation")
+ return
+ LOG.info("😊 Generating requirements.txt...")
+ dependencies = self._analyze_dependencies()
+ project_modules = self.module_dict.keys()
+ for dep in dependencies:
+ if any(dep.startswith(mod) for mod in project_modules):
+ dependencies.remove(dep)
+ with open(req_path, "w", encoding="utf-8") as f:
+ f.write("\n".join(dependencies))
+ LOG.info("✅ requirements.txt generation completed")
+
+ def _update_module_dict(self):
+ """
+ Update docstrings for all modules in the project path
+ """
+ project_modules = self.module_dict.keys()
+ modules_to_del = []
+ for name in sys.modules:
+ if any(name.startswith(mod) for mod in project_modules):
+ modules_to_del.append(name)
+ for module in modules_to_del:
+ del sys.modules[module]
+ self._gen_module_dict()
+
+ def _gen_module_dict(self):
+ """
+ Read all module information under the project path
+ """
+ LOG.info("😊 Analyzing project structure...")
+ processed_packages = set()
+ for root, dirs, files in os.walk(self.project_path):
+ dirs[:] = [d for d in dirs
+ if not d.startswith(".") and d not in ["__pycache__", "tests", "docs"]]
+
+ if "__init__.py" in files:
+ rel_path = os.path.relpath(root, self.project_path)
+ module_name = rel_path.replace(os.sep, ".")
+ try:
+ module = importlib.import_module(module_name)
+ skip_modules = ["docs", "test", "tests"]
+ if ".".join(module_name.split(".")[:-1]) in processed_packages:
+ continue
+ self.module_dict |= self._process_package(module, skip_modules)
+ processed_packages.add(module_name)
+ except Exception as e:
+ LOG.info(f"Processing module {module_name} error: {str(e)}")
+
+ for file in files:
+ if file.endswith(".py") and file not in {"__init__.py", "setup.py",
+ "conftest.py", "wsgi.py", "asgi.py"}:
+ rel_dir = os.path.relpath(root, self.project_path)
+ package_name = rel_dir.replace(os.sep, ".")
+ if package_name in processed_packages:
+ continue
+ module_path = os.path.join(root, file)
+ module_name = os.path.splitext(file)[0]
+ try:
+ spec = importlib.util.spec_from_file_location(module_name, module_path)
+ module = importlib.util.module_from_spec(spec)
+ self.module_dict |= self._process_module(module)
+
+ except Exception as e:
+ LOG.info(f"Error processing module {module_name}: {str(e)}")
+ self._generate_module_doc_dict(self.module_dict)
+ LOG.info("✅ Project structure analysis completed...")
+
+ def _process_module(self, module, f_module_name: str = "") -> Dict:
+ def _get_abs_name(obj_name):
+ return f"{f_module_name}.{obj_name}".lstrip('.')
+ m_dict = {"obj": module}
+ for name, obj in inspect.getmembers(module, inspect.isclass):
+ if not getattr(obj, "__module__", "").startswith(module.__name__):
+ continue
+ c_dict = {"doc": obj.__doc__, "obj": obj, "method": {}}
+ for method_name, method_obj in inspect.getmembers(obj, inspect.isfunction):
+ c_dict["method"][_get_abs_name(f"{name}.{method_name}")] = {"doc": method_obj.__doc__,
+ "obj": method_obj}
+ m_dict[_get_abs_name(name)] = c_dict
+ for name, obj in inspect.getmembers(module, inspect.isfunction):
+ if not getattr(obj, "__module__", "").startswith(module.__name__):
+ continue
+ m_dict[f"{f_module_name}.{name}"] = {"doc": obj.__doc__, "obj": obj}
+ if not m_dict:
+ return {}
+ return {module.__name__: m_dict}
+
+ def _process_package(self, module, skip_modules) -> Dict:
+ m_dict = {"obj": module, "children": {}, "module": {}}
+ processed_module = set()
+ for importer, modname, ispkg in pkgutil.walk_packages(module.__path__, module.__name__ + "."):
+ if any(modname.startswith(skip_mod) for skip_mod in skip_modules):
+ continue
+ if ispkg:
+ m_dict["children"] |= self._process_package(importer.find_module(modname).load_module(modname),
+ skip_modules)
+ processed_module.add(modname)
+ continue
+ try:
+ submodule = importlib.import_module(modname)
+ if any(modname.startswith(mod) for mod in processed_module):
+ continue
+ m_dict["module"] |= self._process_module(submodule, f_module_name=modname)
+ except Exception as e:
+ LOG.info(f"Skipping {modname} due to import error", e)
+ if not m_dict["children"] and not m_dict["module"]:
+ return {}
+ return {module.__name__: m_dict}
+
+ def _analyze_project_structure(self) -> Dict:
+ def pro_dict(d):
+ for name, module in d.items():
+ if "obj" in module:
+ del module["obj"]
+ if "children" in module:
+ pro_dict(module["children"])
+ if "method" in module:
+ pro_dict(module["method"])
+
+ pro_dict(self.module_dict)
+ return self.module_dict
+
+ def _analyze_dependencies(self) -> List[str]:
+ dependencies = set()
+ for root, _, files in os.walk(self.project_path):
+ for file in files:
+ if file.endswith(".py"):
+ with open(os.path.join(root, file), "r", encoding="utf-8") as f:
+ content = f.read()
+ for line in content.split("\n"):
+ if line.startswith(("import ", "from ")):
+ module = line.split()[1].split(".")[0]
+ if not self._is_standard_library(module):
+ dependencies.add(module)
+ return list(dependencies)
+
+ def _analyze_project_type(self) -> Dict:
+ file_extensions = set()
+ for root, _, files in os.walk(self.project_path):
+ for file in files:
+ ext = os.path.splitext(file)[1]
+ if ext:
+ file_extensions.add(ext[1:]) # 移除点号
+
+ return {
+ "languages": list(file_extensions),
+ "has_setup_py": os.path.exists(os.path.join(self.project_path, "setup.py")),
+ "has_requirements": os.path.exists(
+ os.path.join(self.project_path, "requirements.txt")
+ ),
+ "has_tests": any(
+ d.startswith("test") for d in os.listdir(self.project_path)
+ ),
+ }
+
+ @staticmethod
+ def _is_standard_library(module_name: str) -> bool:
+ return module_name in sys.stdlib_module_names
+
+ def _translate_docs(self, docs_dir_zh: str, docs_dir_en: str) -> None:
+ """
+ Translate Chinese documents into English documents, keeping the original directory structure.
+ """
+ os.makedirs(docs_dir_en, exist_ok=True)
+
+ for root, dirs, files in os.walk(docs_dir_zh):
+ rel_path = os.path.relpath(root, docs_dir_zh)
+ en_dir = os.path.join(docs_dir_en, rel_path)
+ os.makedirs(en_dir, exist_ok=True)
+
+ for file in files:
+ if not file.endswith(".md"):
+ continue
+
+ zh_file_path = os.path.join(root, file)
+ en_file_path = os.path.join(en_dir, file)
+
+ with open(zh_file_path, "r", encoding="utf-8") as f:
+ zh_content = f.read()
+
+ query = TRANSLATE_PROMPT.format(zh_content=zh_content)
+ en_content = self.llm(query, enable_thinking=False)
+
+ with open(en_file_path, "w", encoding="utf-8") as f:
+ f.write(en_content)
+
+ LOG.info(
+ f"Translated doc file: {os.path.relpath(zh_file_path, self.project_path)} -> \
+ {os.path.relpath(en_file_path, self.project_path)}"
+ )
diff --git a/docs/scripts/lazynote/agent/prompt.py b/docs/scripts/lazynote/agent/prompt.py
new file mode 100644
index 000000000..7e31d8772
--- /dev/null
+++ b/docs/scripts/lazynote/agent/prompt.py
@@ -0,0 +1,190 @@
+"""
+存放 GitAgent 使用的各种 prompt
+"""
+
+
+README_PROMPT = """请根据以下项目结构生成一个标准的 README.md 文件:
+
+项目结构:
+{project_structure}
+
+请尽可能涵盖以下部分(根据项目特点酌情取舍):
+1. 项目名称和简介:简要说明项目的主要功能和应用场景。
+2. 功能特性:列出项目的核心功能和亮点。
+3. 安装说明:提供安装步骤和依赖要求。
+4. 使用方法:描述如何运行或使用项目,包含示例代码或命令。
+5. 项目结构:展示项目的目录结构和文件组织方式。
+
+注意:
+1. 请使用 **{language}** 生成内容,并合理组织,确保简明清晰且实用。
+2. 可调用工具获取模块的文档字符串,辅助生成内容。
+3. 直接输出可写入 README 文件的内容,不包含任何其他说明。
+"""
+
+
+GITIGNORE_PROMPT = """请为以下类型的项目生成一个标准的 .gitignore 文件:
+
+项目信息:
+{project_info}
+
+请包含:
+1. 语言特定的忽略规则
+2. IDE 配置文件
+3. 操作系统生成的文件
+4. 构建输出和缓存文件
+5. 环境文件
+
+注意:
+1. 直接输出可以粘贴到.gitignore 文件中的内容, 不要包含任何其他说明
+"""
+
+LICENSE_PROMPT = """请为这个开源项目推荐一个合适的开源许可证,并提供完整的许可证文本。
+建议考虑:
+1. MIT License
+2. Apache License 2.0
+3. GNU GPL v3
+4. BSD License
+"""
+
+
+MKDOCS_PROMPT = """你是一个专业的 Python 文档生成助手。请根据项目的结构信息按照以下要求为项目自动生成文档:
+
+1. **生成模块 API 文档:**
+ - 通过模块名调用相关工具获取模块文档,并生成每个模块的独立 Markdown 文件(`.md`),可按需将多个类合并至同一文件。
+ - 文档应包含以下部分(根据模块内容灵活调整):
+ - 模块名称与简介
+ - 类和方法列表
+ - 每个类和方法的详细说明(包括参数和返回值)
+ - 示例代码(如有)
+ - 使用工具调用,将生成的文档保存到 `docs/{language}/API` 目录,合理组织目录结构。
+
+2. **生成最佳实践文档:**
+ - 根据你对项目的理解,生成项目的最佳实践文档(可生成多个),文档文件格式为 Markdown(`.md`):
+ - 使用工具调用,将生成的文档保存到 `docs/{language}/BestPractice` 目录,并合理组织目录结构。
+
+3. **生成主页文档:**
+ - 生成项目文档的首页文件,‘index.md’:
+ - 项目名称和简介
+ - 文档结构概览
+ - 快速开始指引(如有)
+ - 使用工具调用,将生成的文档保存到 `docs/{language}` 目录。
+
+4. **创建 MkDocs 配置文件:**
+ - 按照给定模板生成一个标准的 `mkdocs.yml` 文件:
+ - 文档导航设置,需包含已经生成的全部的 API 文档和最佳实践文档。
+ - 导航标签一律设置为英文。
+ - 导航目录为相对于 `docs/{language}` 的路径。
+ - 使用工具调用,将`mkdocs.yml` 配置文件保存到项目的根目录下。
+
+**注意:**
+- 使用 {language_type} 生成内容。
+- 生成的文档和配置文件应符合 MkDocs 规范,确保能通过 `mkdocs serve` 正常预览和构建。
+- 由于工具调用获取模块文档可能较长,建议自行逐个模块处理,避免超出 token 限制。
+
+**项目结构信息:**
+{project_structure}
+MkDocs 配置文件模板:
+{mkdocs_config}
+"""
+
+
+TRANSLATE_PROMPT = """请将以下Markdown文档从中文翻译成英文,保持所有Markdown格式和代码块不变,只翻译文本内容:
+
+{zh_content}
+
+要求:
+1. 保持所有Markdown语法格式
+2. 保持所有代码块内容不变
+3. 只翻译中文文本内容, 且不翻译模块名、注释和代码块
+4. 保持原有的文档结构
+5. 使用专业的技术文档语言风格
+"""
+
+
+def generate_mkdocs_config(site_name, docs_dir):
+ mkdocs_config = {
+ 'site_name': site_name,
+ 'site_description': f'API documentation for {site_name}',
+ 'docs_dir': docs_dir,
+
+ 'theme': {
+ 'name': 'material',
+ 'palette': {
+ 'primary': 'indigo',
+ 'accent': 'pink'
+ },
+ 'font': {
+ 'text': 'Roboto',
+ 'code': 'Roboto Mono'
+ }
+ },
+
+ 'nav': [
+ {'Home': 'index.md'},
+ {'API Reference': []}
+ ],
+ }
+ return mkdocs_config
+
+
+
+CUSTOM_KMDOCS_PROMPT = """你是一个专业的 Python 文档生成助手,负责根据项目信息按照要求为项目自动生成文档,你有。
+
+# 工具:
+ - get_module_doc:根据模块名获取模块的 docstring,用于获取模块信息以便理解项目。
+ - write_doc:将给定内容写入指定路径的文件,用于写生成的文档。
+
+你需要按照合适的顺序使用工具,逐步完成任务。可能需要将任务分解为子任务,依次调用工具获取模块文档、生成文档文件并进行组织。
+
+# 输出格式
+请使用与问题相同的语言回答,并遵循以下格式:
+
+想法:用户当前的语言是:(用户的语言)。我需要使用工具来帮助回答问题。
+
+{TOKENIZED_PROMPT}
+
+答案:在此处填写你的答案(使用与用户问题相同的语言)。
+
+当前对话:以下是当前对话,由人工和助手消息交织而成。请逐步思考。
+
+文档生成要求
+模块 API 文档:
+
+通过调用 get_module_doc 获取模块 docstring。
+
+包含模块名称、简介、类和方法列表、详细说明和示例代码。
+
+使用 write_doc 将生成的文档保存至 docs/{language}/API 目录,合理组织目录结构。
+
+最佳实践文档:
+
+基于项目特性生成最佳实践文档,使用 write_doc 保存至 docs/{language}/BestPractice 目录。
+
+首页文档:
+
+包含项目名称、简介、文档结构和快速开始指引,使用 write_doc 保存至 docs/{language}/index.md。
+
+MkDocs 配置文件:
+
+生成标准的 mkdocs.yml 配置文件,包含 API 文档和最佳实践文档的导航,使用 write_doc 保存至 docs 目录。
+ """
+
+PLUGIN_CONFIG= [
+ {'i18n':
+ {'docs_structure': 'folder','languages':
+ [
+ {
+ 'locale': 'en',
+ 'default': True,
+ 'name': 'English',
+ 'build': True
+ },
+ {
+ 'locale': 'zh',
+ 'name': '简体中文',
+ 'build': True
+ }
+ ]
+ }
+ }
+ ]
\ No newline at end of file
diff --git a/docs/scripts/lazynote/editor/__init__.py b/docs/scripts/lazynote/editor/__init__.py
index 32e28bd21..17cb99e79 100644
--- a/docs/scripts/lazynote/editor/__init__.py
+++ b/docs/scripts/lazynote/editor/__init__.py
@@ -1,5 +1,7 @@
from lazynote.editor.base import BaseEditor
+from lazynote.editor.custom import CustomEditor
__all__ = [
'BaseEditor',
+ 'CustomEditor',
]
diff --git a/docs/scripts/lazynote/editor/base.py b/docs/scripts/lazynote/editor/base.py
index 08df82d15..2f25f6b7c 100644
--- a/docs/scripts/lazynote/editor/base.py
+++ b/docs/scripts/lazynote/editor/base.py
@@ -118,7 +118,7 @@ def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> c
Returns:
cst.Module: The updated Module node with a new docstring.
"""
- return self._update_node_with_new_docstring(original_node, updated_node, self.module.__doc__)
+ return updated_node
def _get_obj_by_name(self, name: str) -> Optional[Any]:
"""
diff --git a/docs/scripts/lazynote/editor/custom.py b/docs/scripts/lazynote/editor/custom.py
new file mode 100644
index 000000000..81ca2d6a8
--- /dev/null
+++ b/docs/scripts/lazynote/editor/custom.py
@@ -0,0 +1,46 @@
+from typing import Callable, Optional, Any, Dict, Set
+from lazyllm import LOG
+import libcst as cst
+import libcst.matchers as m
+from .base import BaseEditor
+
+
+class CustomEditor(BaseEditor):
+ """
+ A custom tool for transforming code text and generating new code text.
+ """
+ def __init__(self, gen_class_docstring: Callable[[Optional[str], str], str], gen_docstring: Callable[[Optional[str], str], str], pattern: str, module: Any) -> None:
+ super().__init__(gen_docstring, pattern, module)
+ self.current_class_doc_dict: Dict[str, str] = {}
+ self.gen_class_docstring = gen_class_docstring
+
+ def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef:
+ func_name = f"{self.current_class}.{original_node.name.value}".lstrip('.')
+ if func_name in self.current_class_doc_dict:
+ docstring = self.current_class_doc_dict[func_name]
+ else:
+ obj = self._get_obj_by_name(func_name)
+ docstring = obj.__doc__ if obj else None
+ return self._update_node_with_new_docstring(original_node, updated_node, docstring)
+
+ def visit_ClassDef(self, node: cst.ClassDef) -> None:
+ if f"{self.current_class}.{node.name.value}" in self.current_class_doc_dict:
+ self.current_class = f'{self.current_class}.{node.name.value}'.lstrip('.')
+ return
+
+ obj = self._get_obj_by_name(node.name.value)
+ docstring = obj.__doc__ if obj else None
+ node_code = cst.Module([]).code_for_node(node)
+ res = self.gen_class_docstring(docstring, node_code)
+ self.current_class_doc_dict = {}
+ for name, docstring in res.items():
+ self.current_class_doc_dict[f"{self.current_class}.{name}".lstrip('.')] = docstring
+ self.current_class = f'{self.current_class}.{node.name.value}'.lstrip('.')
+
+ def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.ClassDef:
+ new_docstring = self.current_class_doc_dict.get(self.current_class, None)
+ updated_node = self._update_node_with_new_docstring(original_node, updated_node, new_docstring)
+ self.current_class = self.current_class[:(lambda x: 0 if x < 0 else x)(self.current_class.rfind('.'))]
+ return updated_node
+
+
\ No newline at end of file
diff --git a/docs/scripts/lazynote/manager/custom.py b/docs/scripts/lazynote/manager/custom.py
index a04584837..ed7c3e28f 100644
--- a/docs/scripts/lazynote/manager/custom.py
+++ b/docs/scripts/lazynote/manager/custom.py
@@ -1,20 +1,192 @@
+import libcst as cst
+import inspect
+import textwrap
+import json
from typing import Optional
-from lazynote.manager.base import BaseManager
+from lazyllm import LOG, OnlineChatModule
+from .base import BaseManager, DocstringMode
+from lazynote.editor import CustomEditor
+
+
+GENERAL_PROMPT = """你是一个专业代码文档生成器,请根据给定代码为指定目标对象生成文档字符串,严格遵循以下规则:
+
+要求:
+1. 使用{language}生成文档字符串。
+2. 只为目标对象生成文档字符串,且应符合 Google 风格。
+3. 若目标对象为类,仅生成单行注释描述类的核心能力。
+4. 仅以字符串形式输出完整的注释内容,不包含原代吗,不包含三引号。
+
+示例输出(函数):
+Calculates the sum of two numbers.
+
+Args:
+ a (int): First number to add
+ b (int): Second number to add
+
+Returns:
+ int: Sum of the two numbers
+
+示例输出(类):
+Calculates the sum of two numbers.
+
+现在请为以下代码生成注释:
+目标对象:{object_name}
+代码内容:
+{node_code}
+"""
+
+
+CLASS_PROMPT = """你是一个专业代码文档生成器,请为给定代码生成文档字符串,严格遵循以下规则:
+
+要求:
+1. 使用{language}生成文档字符串。
+2. 使用Google风格的文档字符串,语言简洁明了,准确描述代码功能。
+3. 将生成的文档字符串组织为JSON字典输出,遵循给定输出格式,保持文档字符串的正确缩进和格式, 不包含三引号。
+3. 仅输出JSON字符串,勿输出任何额外内容。
+
+代码内容:
+{node_code}
+
+输出格式:
+{obj_dict}
+
+"""
+
class CustomManager(BaseManager):
- def gen_docstring(self, old_docstring: Optional[str], pattern: str, node_code: str) -> str:
- """
- Custom logic to generate a new docstring.
-
- Args:
- old_docstring (Optional[str]): The old docstring.
- pattern (str): The pattern string to be added.
- node_code (str): The node code.
-
- Returns:
- str: The new docstring.
- """
+ """
+ 使用大语言模型生成代码注释的管理器。
+ """
+
+ llm: OnlineChatModule = None
+ language: str = "zh"
+
+ def __init__(self, llm, language, **kwargs):
+ super().__init__(**kwargs)
+ self.pattern = DocstringMode.FILL
+ self.llm = llm
+ self.language = language
+
+ def modify_docstring(self, module: object) -> Optional[str]:
+
+ try:
+ source_code = inspect.getsource(module)
+ source_code = textwrap.dedent(source_code)
+ tree = cst.parse_module(source_code)
+ transformer = CustomEditor(
+ gen_docstring=self.gen_docstring,
+ gen_class_docstring=self.gen_class_docstring,
+ pattern=self.pattern,
+ module=module,
+ )
+ modified_tree = tree.visit(transformer)
+ self._write_code_to_file(module, modified_tree.code)
+ return modified_tree.code
+ except Exception as e:
+ self._handle_error(f"Skipping module {module.__name__} due to error", e)
+ return None
+
+ def gen_class_docstring(self, old_docstring: Optional[str], node_code: str) -> str:
+ try:
+ module = cst.parse_module(node_code)
+ obj_dict = {}
+ for node in module.body:
+ if isinstance(node, cst.ClassDef):
+ class_name = node.name.value
+ obj_dict[class_name] = ""
+
+ def process_class_body(class_node, parent_name):
+ for sub_node in class_node.body.body:
+ if isinstance(sub_node, cst.ClassDef):
+ sub_class_name = f"{parent_name}.{sub_node.name.value}"
+ obj_dict[sub_class_name] = ""
+ process_class_body(sub_node, sub_class_name)
+ elif isinstance(sub_node, cst.FunctionDef):
+ method_name = f"{parent_name}.{sub_node.name.value}"
+ obj_dict[method_name] = ""
+
+ process_class_body(node, class_name)
+
+ language = "中文" if self.language == "zh" else "英文"
+ prompt = CLASS_PROMPT.format(
+ node_code=node_code,
+ language=language,
+ obj_dict=json.dumps(obj_dict, indent=4, ensure_ascii=False),
+ )
+
+ res = self.llm(prompt)
+ def extract_json_from_response(response: str) -> dict:
+ start = response.find('{')
+ end = response.rfind('}')
+ if start == -1 or end == -1:
+ return response
+ json_str = response[start:end + 1]
+ return json.loads(json_str)
+
+ try:
+ doc_dict = extract_json_from_response(res)
+ doc_dict = {
+ name: self._fix_docstring_indent(
+ docstring, indent=4 * (name.count(".") + 1)
+ )
+ for name, docstring in doc_dict.items()
+ }
+ if old_docstring:
+ obj_dict[class_name] = old_docstring
+ return doc_dict
+ except json.JSONDecodeError as e:
+ LOG.info(f"Error in parsing class docstring: {e}")
+ return {}
+
+ except Exception as e:
+ LOG.info(f"Error in generate class docstring: {e}")
+ return {}
+
+ def gen_docstring(self, old_docstring: Optional[str], node_code: str) -> str:
if old_docstring:
- return f"{old_docstring}\n\n{pattern}"
- else:
- return f"{pattern}"
+ return old_docstring
+
+ object_name = ""
+ lines = node_code.strip().split("\n")
+ for line in lines:
+ line = line.strip()
+ if line.startswith("@"):
+ continue
+ if line.startswith("def "):
+ object_name = line.split("def ")[1].split("(")[0].strip()
+ break
+ elif line.startswith("class "):
+ object_name = (
+ line.split("class ")[1].split("(")[0].split(":")[0].strip()
+ )
+ break
+ language = "中文" if self.language == "zh" else "英文"
+ prompt = GENERAL_PROMPT.format(
+ object_name=object_name, node_code=node_code, language=language
+ )
+ res = self.llm(prompt)
+ return self._fix_docstring_indent(res)
+
+ def _fix_docstring_indent(self, docstring, indent: int = 4):
+ if not docstring or not indent:
+ return ""
+
+ lines = docstring.strip().split("\n")
+ if len(lines) <= 1:
+ return docstring.strip()
+
+ def get_indent(line):
+ return len(line) - len(line.lstrip())
+
+ non_empty_lines = [line for line in lines[1:] if line.strip()]
+ if not non_empty_lines:
+ return lines[0]
+ min_indent = min(get_indent(line) for line in non_empty_lines)
+
+ result = [lines[0].strip()]
+ for line in lines[1:]:
+ if line.strip():
+ result.append(" " * indent + line[min_indent:])
+ else:
+ result.append(" " * indent)
+ return "\n".join(result)
diff --git a/docs/scripts/lazynote/parser/base.py b/docs/scripts/lazynote/parser/base.py
index 89daaf06f..defc1f0f7 100644
--- a/docs/scripts/lazynote/parser/base.py
+++ b/docs/scripts/lazynote/parser/base.py
@@ -18,7 +18,7 @@ def parse(self, member, manager, **kwargs):
parser(member, manager, **kwargs)
def parse_module(self, module, manager, **kwargs):
- print(f"--Module: {module.__name__}--")
+ print(f"--Processing Module: {module.__name__}--")
manager.modify_docstring(module)
def parse_class(self, cls, manager, **kwargs):
diff --git a/lazyllm/cli/main.py b/lazyllm/cli/main.py
index b2cf1d244..8220ac950 100644
--- a/lazyllm/cli/main.py
+++ b/lazyllm/cli/main.py
@@ -3,16 +3,19 @@
from install import install
from deploy import deploy
from run import run
+ from standardize import standardize
except ImportError:
from .install import install
from .deploy import deploy
from .run import run
+ from .standardize import standardize
def main():
def exit():
print('Usage:\n lazyllm install [full|standard|package_name]\n'
' lazyllm deploy modelname\n lazyllm deploy mcp_server [args ...] [options]\n'
- ' lazyllm run graph.json\n lazyllm run chatbot\n lazyllm run rag\n')
+ ' lazyllm run graph.json\n lazyllm run chatbot\n lazyllm run rag\n'
+ ' lazyllm standardize \n')
sys.exit(1)
if len(sys.argv) <= 1: exit()
@@ -24,6 +27,8 @@ def exit():
deploy(commands)
elif sys.argv[1] == 'run':
run(commands)
+ elif sys.argv[1] == 'standardize':
+ standardize(commands)
else:
exit()
diff --git a/lazyllm/cli/standardize.py b/lazyllm/cli/standardize.py
new file mode 100644
index 000000000..5886d8250
--- /dev/null
+++ b/lazyllm/cli/standardize.py
@@ -0,0 +1,34 @@
+import os
+import sys
+
+sys.path.append('.')
+sys.path.append('./docs/scripts')
+
+
+def standardize(commands):
+ if not commands or len(commands) < 3:
+ print("Usage: lazyllm standardize ")
+ sys.exit(1)
+
+ model = commands[0]
+ language = commands[1]
+ project_path = commands[2]
+ if not os.path.exists(project_path):
+ print(f"Error: Project path does not exist: {project_path}")
+ sys.exit(1)
+
+ support_model = ["qwen", "deepseek", "gpt"]
+ if model not in support_model:
+ print(f"Please select from the supported models: {support_model}")
+ sys.exit(1)
+
+ try:
+ from lazynote.agent.git_agent import GitAgent
+ from lazyllm import OnlineChatModule
+ llm = OnlineChatModule(source=model, stream=False)
+ agent = GitAgent(project_path=project_path, llm=llm, language=language)
+ agent.standardize_project()
+
+ except Exception as e:
+ print(f"Error during project standardization: {e}")
+ sys.exit(1)