diff --git a/.github/issue_template/bug_report.yml b/.github/issue_template/bug_report.yml
new file mode 100644
index 000000000..7a0c2f1a5
--- /dev/null
+++ b/.github/issue_template/bug_report.yml
@@ -0,0 +1,43 @@
+name: 🐞 Bug report
+description: Submit Bug feedback
+title: "[Bug]: "
+labels: ["bug"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thank you for reporting the bug. Please fill in the following information to help us reproduce the issue.
+  - type: input
+    id: env
+    attributes:
+      label: Environment Information
+      description: System version / Browser / Runtime environment
+      placeholder: macOS 14.5, Chrome 118
+    validations:
+      required: true
+  - type: textarea
+    id: steps
+    attributes:
+      label: Reproduction Steps
+      description: Write down step by step how to trigger this bug
+    validations:
+      required: true
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected Behavior
+      description: What should happen ideally
+    validations:
+      required: true
+  - type: textarea
+    id: actual
+    attributes:
+      label: Actual Behavior
+      description: What actually happened
+    validations:
+      required: true
+  - type: textarea
+    id: screenshots
+    attributes:
+      label: Screenshots / Logs
+      description: Provide screenshots or logs (optional)
diff --git a/.github/issue_template/enhancement.yml b/.github/issue_template/enhancement.yml
new file mode 100644
index 000000000..c16631bb8
--- /dev/null
+++ b/.github/issue_template/enhancement.yml
@@ -0,0 +1,24 @@
+name: ⚙️ Enhancement
+description: Propose optimization suggestions for existing features
+title: "[Enhancement]: "
+labels: ["enhancement"]
+body:
+  - type: textarea
+    id: current
+    attributes:
+      label: Current Behavior
+      description: How existing features currently work
+    validations:
+      required: true
+  - type: textarea
+    id: improved
+    attributes:
+      label: Improvement Suggestions
+      description: How you would like to improve it
+    validations:
+      required: true
+  - type: textarea
+    id: benefit
+    attributes:
+      label: Benefits of Improvement
+      description: Benefits after improvement
diff --git a/.github/issue_template/feature_request.yml b/.github/issue_template/feature_request.yml
new file mode 100644
index 000000000..3314f3de9
--- /dev/null
+++ b/.github/issue_template/feature_request.yml
@@ -0,0 +1,28 @@
+name: ✨ Feature request
+description: Submit new feature requirements
+title: "[Feature]: "
+labels: ["feature"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please describe the new feature you would like to add and its use cases.
+  - type: textarea
+    id: description
+    attributes:
+      label: Feature Description
+      description: Briefly describe what the feature does
+    validations:
+      required: true
+  - type: textarea
+    id: usecase
+    attributes:
+      label: Use Cases
+      description: How this feature will be used
+    validations:
+      required: true
+  - type: textarea
+    id: design
+    attributes:
+      label: Design Ideas / Technical Details
+      description: (Optional) Your thoughts on implementation approach
diff --git a/.github/issue_template/performance.yml b/.github/issue_template/performance.yml
new file mode 100644
index 000000000..0345ab893
--- /dev/null
+++ b/.github/issue_template/performance.yml
@@ -0,0 +1,22 @@
+name: 🚀 Performance issue
+description: Submit performance related issues or optimization suggestions
+title: "[Performance]: "
+labels: ["performance"]
+body:
+  - type: textarea
+    id: bottleneck
+    attributes:
+      label: Current Bottleneck
+      description: Describe the performance issue
+    validations:
+      required: true
+  - type: textarea
+    id: metrics
+    attributes:
+      label: Performance Metrics
+      description: Provide test data / benchmarks
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Optimization Suggestions
+      description: Your optimization ideas
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 000000000..80a7550fe
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,60 @@
+# Pull Request
+
+## Description
+Brief description of the changes
+
+## Type of Change
+- [ ] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Changes Made
+- [ ] Change 1
+- [ ] Change 2
+- [ ] Change 3
+
+## Testing
+- [ ] Unit tests pass
+- [ ] Integration tests pass
+- [ ] Manual testing completed
+- [ ] Performance testing (if applicable)
+
+## Documentation
+- [ ] Code comments added/updated
+- [ ] README updated (if applicable)
+- [ ] API documentation updated (if applicable)
+
+## Breaking Changes
+- [ ] No breaking changes
+- [ ] Breaking changes documented
+- [ ] Migration guide provided
+
+## Related Issues
+Closes #123, Related to #456
+
+## Additional Notes
+Any additional information or context
+
+---
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/README.md b/.github/pull_request_template/README.md
new file mode 100644
index 000000000..45a0c6fb4
--- /dev/null
+++ b/.github/pull_request_template/README.md
@@ -0,0 +1,55 @@
+# Pull Request Templates
+
+This directory contains specialized PR templates for different types of changes. Choose the appropriate template based on your change type.
+
+## Available Templates
+
+### 🚀 [Feature](feature.md)
+Use this template when adding new functionality or features to the project.
+
+### 🐛 [Bug Fix](bugfix.md)
+Use this template when fixing bugs or issues in the existing codebase.
+
+### ⚡ [Performance](performance.md)
+Use this template when optimizing performance or improving efficiency.
+
+### 🔧 [Refactor](refactor.md)
+Use this template when restructuring or improving existing code without changing functionality.
+
+### 📚 [Documentation](documentation.md)
+Use this template when updating documentation, README files, or code comments.
+
+### 🧪 [Testing](testing.md)
+Use this template when adding or improving tests, test coverage, or testing infrastructure.
+
+### 🔒 [Security](security.md)
+Use this template when fixing security vulnerabilities or implementing security improvements.
+
+## How to Use
+
+1. **Choose the appropriate template** based on your change type
+2. **Copy the template content** into your PR description
+3. **Fill in all required sections** with relevant information
+4. **Check the appropriate boxes** for change type, impact scope, and priority
+5. **Provide detailed information** in the release notes points section
+
+## Standardized Fields
+
+All templates include standardized fields to help with:
+- **Automated categorization** of PRs
+- **Release note generation** 
+- **Impact assessment**
+- **Priority determination**
+
+## Benefits
+
+These templates help:
+- **Standardize PR descriptions** across the project
+- **Ensure completeness** of information provided
+- **Facilitate automated release note generation**
+- **Improve code review efficiency**
+- **Maintain consistent documentation**
+
+## Default Template
+
+If none of the specialized templates fit your needs, use the default template at `.github/pull_request_template.md`. 
\ No newline at end of file
diff --git a/.github/pull_request_template/bugfix.md b/.github/pull_request_template/bugfix.md
new file mode 100644
index 000000000..268fbd269
--- /dev/null
+++ b/.github/pull_request_template/bugfix.md
@@ -0,0 +1,61 @@
+# 🐛 Bug Fix
+
+## Problem Description
+Detailed description of the encountered bug phenomenon
+
+## Problem Analysis
+- Root cause of the bug
+- Scope of impact
+- Reproduction steps
+
+## Fix Solution
+- Specific fix method
+- Why this fix approach was chosen
+- Other possible solutions
+
+## Fix Verification
+- [ ] Post-fix testing
+- [ ] Regression testing
+- [ ] Edge case testing
+
+## Impact Assessment
+- Impact of the fix on existing functionality
+- Performance impact
+- Backward compatibility
+
+## Prevention Measures
+- How to avoid similar issues
+- Whether test cases need to be added
+- Code review points
+
+## Related Issues
+Fixes #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [x] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/documentation.md b/.github/pull_request_template/documentation.md
new file mode 100644
index 000000000..79863a631
--- /dev/null
+++ b/.github/pull_request_template/documentation.md
@@ -0,0 +1,59 @@
+# 📚 Documentation Update
+
+## Update Scope
+- Types of documentation to update
+- Document chapters involved
+- Depth and breadth of updates
+
+## Update Content
+- New documentation content
+- Modified documentation content
+- Outdated content to remove
+
+## Update Reasons
+- Why updates are needed
+- User feedback
+- Feature change synchronization
+
+## Documentation Quality
+- [ ] Content accuracy
+- [ ] Language expression
+- [ ] Format standards
+- [ ] Example completeness
+
+## User Impact
+- Impact on user usage
+- Whether user training is needed
+- Migration guide requirements
+
+## Related Issues
+Updates documentation for #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [x] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/feature.md b/.github/pull_request_template/feature.md
new file mode 100644
index 000000000..654cccc13
--- /dev/null
+++ b/.github/pull_request_template/feature.md
@@ -0,0 +1,68 @@
+# 🚀 Feature
+
+## Feature Description
+Brief description of the new feature
+
+## Feature Details
+- [ ] Specific feature point 1
+- [ ] Specific feature point 2
+- [ ] Specific feature point 3
+
+## Use Cases
+Describe the use cases and applicable scenarios for this feature
+
+## Technical Implementation
+- Implementation approach
+- Technologies involved
+- Architectural considerations
+
+## Test Coverage
+- [ ] Unit tests
+- [ ] Integration tests
+- [ ] Manual testing
+
+## Documentation Updates
+- [ ] API documentation
+- [ ] User guide
+- [ ] Example code
+
+## Backward Compatibility
+- [ ] Fully compatible
+- [ ] Migration required
+- [ ] Breaking changes
+
+## Performance Impact
+- Performance improvement/degradation
+- Resource consumption changes
+
+## Related Issues
+Closes #123, Related to #456
+
+---
+
+## Change Type
+- [x] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/performance.md b/.github/pull_request_template/performance.md
new file mode 100644
index 000000000..e96c50bfe
--- /dev/null
+++ b/.github/pull_request_template/performance.md
@@ -0,0 +1,63 @@
+# ⚡ Performance Optimization
+
+## Optimization Goals
+- Specific performance metrics to optimize
+- Expected results
+
+## Performance Analysis
+- Current performance bottlenecks
+- Performance test data
+- Performance analysis tools used
+
+## Optimization Strategy
+- Specific optimization strategies
+- Algorithm/data structure improvements
+- Cache strategy optimization
+- Concurrency processing optimization
+
+## Optimization Results
+- Performance improvement data
+- Resource consumption changes
+- Benchmark test results
+
+## Optimization Verification
+- [ ] Performance testing
+- [ ] Stress testing
+- [ ] Regression testing
+
+## Trade-off Considerations
+- Side effects of optimization
+- Code complexity changes
+- Maintenance cost impact
+
+## Related Issues
+Addresses #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [ ] Bug fix
+- [x] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/refactor.md b/.github/pull_request_template/refactor.md
new file mode 100644
index 000000000..8f4983e61
--- /dev/null
+++ b/.github/pull_request_template/refactor.md
@@ -0,0 +1,63 @@
+# 🔧 Code Refactoring
+
+## Refactoring Goals
+- Code quality improvement objectives
+- Maintainability enhancement
+- Code structure optimization
+
+## Refactoring Content
+- Specific modules/functions to refactor
+- Before and after comparison
+- Refactoring level (function/class/module level)
+
+## Refactoring Strategy
+- Refactoring methodology
+- Step-by-step refactoring plan
+- Risk control measures
+
+## Refactoring Results
+- Code quality metric changes
+- Readability improvements
+- Test coverage changes
+
+## Backward Compatibility
+- [ ] Fully compatible
+- [ ] Adaptation required
+- [ ] Breaking changes
+
+## Testing Verification
+- [ ] Functional testing
+- [ ] Regression testing
+- [ ] Performance testing
+
+## Related Issues
+Refactors #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [x] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/security.md b/.github/pull_request_template/security.md
new file mode 100644
index 000000000..2e1326547
--- /dev/null
+++ b/.github/pull_request_template/security.md
@@ -0,0 +1,58 @@
+# 🔒 Security Fix
+
+## Security Vulnerability
+- Vulnerability type and severity
+- Impact scope and attack vectors
+- How the vulnerability was discovered
+
+## Fix Solution
+- Specific fix measures
+- Security best practices applied
+- Defense in depth strategy
+
+## Risk Assessment
+- Risk level before fix
+- Risk level after fix
+- Remaining risk analysis
+
+## Security Testing
+- [ ] Vulnerability verification testing
+- [ ] Penetration testing
+- [ ] Security scanning
+
+## User Notification
+- Whether users need immediate updates
+- Security advisory publication
+- User guidance documentation
+
+## Related Issues
+Security fix for #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [ ] Testing related
+- [x] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/pull_request_template/testing.md b/.github/pull_request_template/testing.md
new file mode 100644
index 000000000..8889f4ad7
--- /dev/null
+++ b/.github/pull_request_template/testing.md
@@ -0,0 +1,59 @@
+# 🧪 Testing Related
+
+## Test Types
+- Unit tests
+- Integration tests
+- End-to-end tests
+- Performance tests
+
+## Test Coverage
+- New test cases added
+- Code coverage scope
+- Test scenario completeness
+
+## Testing Tools
+- Testing frameworks used
+- Test data management
+- Test environment configuration
+
+## Test Quality
+- Test case design
+- Edge condition coverage
+- Exception handling
+
+## Continuous Integration
+- CI/CD process updates
+- Automated testing integration
+- Test report generation
+
+## Related Issues
+Adds tests for #123, Related to #456
+
+---
+
+## Change Type
+- [ ] Feature addition
+- [ ] Bug fix
+- [ ] Performance optimization
+- [ ] Code refactoring
+- [ ] Documentation update
+- [x] Testing related
+- [ ] Security fix
+
+## Impact Scope
+- [ ] User interface
+- [ ] API interface
+- [ ] Database
+- [ ] Configuration files
+- [ ] Dependencies
+
+## Priority
+- [ ] High - Release immediately
+- [ ] Medium - Next version
+- [ ] Low - Future version
+
+## Release Notes Points
+- User-visible changes
+- Configuration change instructions
+- Migration steps
+- Known issues 
\ No newline at end of file
diff --git a/.github/workflows/macOS_test.yml b/.github/workflows/macOS_test.yml
index c05e8137e..e0d2d3b23 100644
--- a/.github/workflows/macOS_test.yml
+++ b/.github/workflows/macOS_test.yml
@@ -4,10 +4,20 @@ on:
   push:
     branches:
       - main
+    paths-ignore:
+      - "**.md"
+      - ".git*"
+      - "docs/assets/**"
+      - "docs/**"
   pull_request_target:
     types: [opened, synchronize, labeled, reopened]
     branches:
       - main
+    paths-ignore:
+      - "**.md"
+      - ".git*"
+      - "docs/assets/**"
+      - "docs/**"
   workflow_dispatch:
 
 concurrency:
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 81e75e042..0ee94a44a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -4,6 +4,11 @@ on:
   push:
     branches:
       - main
+    paths-ignore:
+      - "**.md"
+      - ".git*"
+      - "docs/assets/**"
+      - "docs/**"
   pull_request:
     branches:
       - main
@@ -11,6 +16,7 @@ on:
       - "**.md"
       - ".git*"
       - "docs/assets/**"
+      - "docs/**"
 env:
   CI_PATH: '/home/mnt/platform_ci/GitHub/${{ github.repository }}/${GITHUB_RUN_NUMBER}'
 
diff --git a/.github/workflows/win_test.yml b/.github/workflows/win_test.yml
index c3e0af390..d10edf947 100644
--- a/.github/workflows/win_test.yml
+++ b/.github/workflows/win_test.yml
@@ -4,10 +4,20 @@ on:
   push:
     branches:
       - main
+    paths-ignore:
+      - "**.md"
+      - ".git*"
+      - "docs/assets/**"
+      - "docs/**"
   pull_request_target:
     types: [opened, synchronize, labeled, reopened]
     branches:
       - main
+    paths-ignore:
+      - "**.md"
+      - ".git*"
+      - "docs/assets/**"
+      - "docs/**"
   workflow_dispatch:
 
 concurrency:
diff --git a/LazyLLM-Env b/LazyLLM-Env
index 0fb5bc5f6..cf704323d 160000
--- a/LazyLLM-Env
+++ b/LazyLLM-Env
@@ -1 +1 @@
-Subproject commit 0fb5bc5f68fefb5fb2e6c8d410e16a5adff86866
+Subproject commit cf704323d5dd229770d397b1150e267187a2e87b
diff --git a/README.CN.md b/README.CN.md
index e11ba6e52..db31b417a 100644
--- a/README.CN.md
+++ b/README.CN.md
@@ -14,6 +14,7 @@
 
 LazyLLM是一款低代码构建**多Agent**大模型应用的开发工具，协助开发者用极低的成本构建复杂的AI应用，并可以持续的迭代优化效果。LazyLLM提供了便捷的搭建应用的workflow，并且为应用开发过程中的各个环节提供了大量的标准流程和工具。<br>
 基于LazyLLM的AI应用构建流程是**原型搭建 -> 数据回流 -> 迭代优化**，即您可以先基于LazyLLM快速跑通应用的原型，再结合场景任务数据进行bad-case分析，然后对应用中的关键环节进行算法迭代和模型微调，进而逐步提升整个应用的效果。<br>
+LazyLLM致力于敏捷与效率的统一，开发者可以高效的迭代算法，然后将迭代好的算法应用到工业生产中，支持多用户、容错和高并发。
 **用户文档**： https://docs.lazyllm.ai/ <br>
 
 微信扫描下方二维码加入交流群(左)或通过观看视频了解更多(右)<br>
@@ -340,3 +341,62 @@ Flow 是LazyLLM中定义的数据流，描述了数据如何从一个可调用
 1. 您可以方便地组合、添加和替换各个模块和组件；Flow 的设计使得添加新功能变得简单，不同模块甚至项目之间的协作也变得更加容易。
 2. 通过一套标准化的接口和数据流机制，Flow 减少了开发人员在处理数据传递和转换时的重复工作。开发人员可以将更多精力集中在核心业务逻辑上，从而提高整体开发效率。
 3. 部分Flow 支持异步处理模式和并行执行，在处理大规模数据或复杂任务时，可以显著提高响应速度和系统性能。
+
+
+## 九、 后续计划
+
+### 9.1 时间线
+V0.6 预计从9.1日开始，历时3个月，中间会不间断发布小版本，如v0.6.1, v0.6.2
+V0.7 预计从12.1日开始，历时3个月，中间会不间断发布小版本，如v0.7.1, v0.7.2
+
+### 9.2 功能模块
+9.2.1 RAG
+  - 9.2.1.1 工程
+    - 沉淀LazyRAG中的能力到LazyLLM  （V0.6 ）
+    - RAG的宏观问答能力扩展到多知识库 （V0.6 ）
+    - RAG模块完全支持横向扩容，支持多机部署RAG的算法协同工作 （V0.6 ）
+    - 知识图谱接入至少1个开源框架 （V0.6 ）
+    - 支持常用的数据切分策略，不少于20种，覆盖各种类型的文档 （V0.6 ）
+  - 9.2.1.2 数据能力
+    - 表格解析（V0.6 - 0.7 ）
+    - CAD图片解析（V0.7 -  ）
+  - 9.2.1.3 算法能力
+    - 支持对CSV等相对结构化的文本的处理 （V0.6 ）
+    - 多跳检索（文档中的链接，参考文献等）  （V0.6 ）
+    - 信息冲突处理 （V0.7 ）
+    - AgenticRL & 写代码解问题能力（V0.7 ）
+
+9.2.2 功能模块
+  - 支持记忆的能力 （V0.6 ）
+  - 分布式Launcher的支持 （V0.7）
+  - 基于数据库的Globals支持 （V0.6 ）
+  - ServerModule可以发布成mcp服务（v0.7）
+  - 线上沙箱服务的集成（v0.7）
+
+9.2.3 模型训推
+  - 支持OpenAI接口的部署和推理 （V0.6 ）
+  - 统一微调和推理的提示词 （V0.7 ）
+  - Example中给出微调示例 （V0.7 ）
+  - 集成2-3个提示词仓库，可以直接选择提示词仓库中的提示词 （V0.6 ）
+  - 支持更智能的模型类型判断和推理框架选择，重构和简化auto-finetune选框架的逻辑 （V0.6 ）
+  - GRPO全链路支持 （V0.7 ）
+
+9.2.4 文档
+  - 完善API文档，确保每个公开接口都有API文档，文档参数和函数参数一致，且有可执行的样例代码 （V0.6 ）
+  - 完善CookBook文档，案例增加至50个，并有和LangChain / Llamaindex的对比 （代码量，速度，扩展性） （V0.6 ）
+  - 完善Environment文档，补充在win/linux/macos的安装方式，补充对包的切分策略 （V0.6 ）
+  - 完善Learn文档，先教大家用大模型；然后教大家构建agent；然后教大家用workflow；再教大家搭建rag； （V0.6 ）
+
+9.2.5 质量
+  - 通过对大部分模块进行Mock，将CI的时间降低到10分钟以内 （V0.6 ）
+  - 增加每日构建，高耗时 / token的任务放到每日构建中执行 （V0.6 ）
+
+9.2.6 开发、部署与发布
+  - Debug优化（v0.7）
+  - 过程监控  [输出 + 性能]（v0.7）
+  - 依赖的训推框架的环境隔离和环境的自动建设（V0.6 ）
+
+9.2.7 生态
+  - 推动LazyCraft的开源 （V0.6 ）
+  - 推动LazyRAG的开源 （V0.7 ）
+  - 将代码传至Github以外的2个代码托管网站，并争取取得社区合作（V0.6 ）
\ No newline at end of file
diff --git a/README.md b/README.md
index 2317ac3e4..b0e3df7e3 100644
--- a/README.md
+++ b/README.md
@@ -348,3 +348,61 @@ Flow in LazyLLM defines the data stream, describing how data is passed from one
 1. You can easily combine, add, and replace various modules and components; the design of Flow makes adding new features simple and facilitates collaboration between different modules and even projects.
 2. Through a standardized interface and data flow mechanism, Flow reduces the repetitive work developers face when handling data transfer and transformation. Developers can focus more on core business logic, thus improving overall development efficiency.
 3. Some Flows support asynchronous processing and parallel execution, significantly enhancing response speed and system performance when dealing with large-scale data or complex tasks.
+
+## Future Plans
+
+### Timeline
+V0.6 Expected to start from September 1st, lasting 3 months, with continuous small version releases in between, such as v0.6.1, v0.6.2
+V0.7 Expected to start from December 1st, lasting 3 months, with continuous small version releases in between, such as v0.7.1, v0.7.2
+
+### Feature Modules
+RAG
+  - Engineering
+    - Integrate LazyRAG capabilities into LazyLLM (V0.6)
+    - Extend RAG's macro Q&A capabilities to multiple knowledge bases (V0.6)
+    - RAG modules fully support horizontal scaling, supporting multi-machine deployment of RAG algorithm collaboration (V0.6)
+    - Integrate at least 1 open-source knowledge graph framework (V0.6)
+    - Support common data splitting strategies, no less than 20 types, covering various document types (V0.6)
+  - Data Capabilities
+    - Table parsing (V0.6 - 0.7)
+    - CAD image parsing (V0.7 -)
+  - Algorithm Capabilities
+    - Support processing of relatively structured texts like CSV (V0.6)
+    - Multi-hop retrieval (links in documents, references, etc.) (V0.6)
+    - Information conflict handling (V0.7)
+    - AgenticRL & code-writing problem-solving capabilities (V0.7)
+
+Functional Modules
+  - Support memory capabilities (V0.6)
+  - Support for distributed Launcher (V0.7)
+  - Database-based Globals support (V0.6)
+  - ServerModule can be published as MCP service (v0.7)
+  - Integration of online sandbox services (v0.7)
+
+Model Training and Inference
+  - Support OpenAI interface deployment and inference (V0.6)
+  - Unify fine-tuning and inference prompts (V0.7)
+  - Provide fine-tuning examples in Examples (V0.7)
+  - Integrate 2-3 prompt repositories, allowing direct selection of prompts from prompt repositories (V0.6)
+  - Support more intelligent model type judgment and inference framework selection, refactor and simplify auto-finetune framework selection logic (V0.6)
+  - Full-chain GRPO support (V0.7)
+
+Documentation
+  - Complete API documentation, ensure every public interface has API documentation, with consistent documentation parameters and function parameters, and executable sample code (V0.6)
+  - Complete CookBook documentation, increase cases to 50, with comparisons to LangChain/LlamaIndex (code volume, speed, extensibility) (V0.6)
+  - Complete Environment documentation, supplement installation methods on win/linux/macos, supplement package splitting strategies (V0.6)
+  - Complete Learn documentation, first teach how to use large models; then teach how to build agents; then teach how to use workflows; finally teach how to build RAG (V0.6)
+
+Quality
+  - Reduce CI time to within 10 minutes by mocking most modules (V0.6)
+  - Add daily builds, put high-time-consuming/token tasks in daily builds (V0.6)
+
+Development, Deployment and Release
+  - Debug optimization (v0.7)
+  - Process monitoring [output + performance] (v0.7)
+  - Environment isolation and automatic environment setup for dependent training and inference frameworks (V0.6)
+
+Ecosystem
+  - Promote LazyCraft open source (V0.6)
+  - Promote LazyRAG open source (V0.7)
+  - Upload code to 2 code hosting websites other than Github and strive for community collaboration (V0.6)
diff --git a/docs/assets/env/git_bash.png b/docs/assets/env/git_bash.png
new file mode 100644
index 000000000..c0f4bc819
Binary files /dev/null and b/docs/assets/env/git_bash.png differ
diff --git a/docs/assets/env/install_python.png b/docs/assets/env/install_python.png
new file mode 100644
index 000000000..6ed43de4a
Binary files /dev/null and b/docs/assets/env/install_python.png differ
diff --git a/docs/assets/env/map.png b/docs/assets/env/map.png
new file mode 100644
index 000000000..1cf495eb2
Binary files /dev/null and b/docs/assets/env/map.png differ
diff --git a/docs/assets/env/set_python_install_path.png b/docs/assets/env/set_python_install_path.png
new file mode 100644
index 000000000..84fa6bac4
Binary files /dev/null and b/docs/assets/env/set_python_install_path.png differ
diff --git a/docs/assets/env/virtualize.png b/docs/assets/env/virtualize.png
new file mode 100644
index 000000000..ddb590aac
Binary files /dev/null and b/docs/assets/env/virtualize.png differ
diff --git a/docs/assets/env/virtualize_2.png b/docs/assets/env/virtualize_2.png
new file mode 100644
index 000000000..16ca0c282
Binary files /dev/null and b/docs/assets/env/virtualize_2.png differ
diff --git a/docs/assets/env/virtualize_3.png b/docs/assets/env/virtualize_3.png
new file mode 100644
index 000000000..d218cf4c0
Binary files /dev/null and b/docs/assets/env/virtualize_3.png differ
diff --git a/docs/assets/env/virtualize_4.png b/docs/assets/env/virtualize_4.png
new file mode 100644
index 000000000..1e970cfe8
Binary files /dev/null and b/docs/assets/env/virtualize_4.png differ
diff --git a/docs/assets/env/vscode_extensions.png b/docs/assets/env/vscode_extensions.png
new file mode 100644
index 000000000..45c649604
Binary files /dev/null and b/docs/assets/env/vscode_extensions.png differ
diff --git a/docs/assets/env/vscode_interpret.png b/docs/assets/env/vscode_interpret.png
new file mode 100644
index 000000000..84c9ae36d
Binary files /dev/null and b/docs/assets/env/vscode_interpret.png differ
diff --git a/docs/assets/env/vscode_interpret_manual.png b/docs/assets/env/vscode_interpret_manual.png
new file mode 100644
index 000000000..31445e421
Binary files /dev/null and b/docs/assets/env/vscode_interpret_manual.png differ
diff --git a/docs/assets/env/winversion.png b/docs/assets/env/winversion.png
new file mode 100644
index 000000000..81a5f24cc
Binary files /dev/null and b/docs/assets/env/winversion.png differ
diff --git a/docs/assets/env/winversion_2.png b/docs/assets/env/winversion_2.png
new file mode 100644
index 000000000..518fa23bc
Binary files /dev/null and b/docs/assets/env/winversion_2.png differ
diff --git a/docs/assets/env/wsl_passward.png b/docs/assets/env/wsl_passward.png
new file mode 100644
index 000000000..57ac9713d
Binary files /dev/null and b/docs/assets/env/wsl_passward.png differ
diff --git a/docs/assets/js/lang-redirect.js b/docs/assets/js/lang-redirect.js
new file mode 100644
index 000000000..febc999b7
--- /dev/null
+++ b/docs/assets/js/lang-redirect.js
@@ -0,0 +1,42 @@
+document.addEventListener("DOMContentLoaded", function() {
+  console.log("[i18n] Language redirect initialized");
+
+  const currentUrl = new URL(window.location.href);
+  const currentPath = currentUrl.pathname;
+  const currentHash = currentUrl.hash;  // #xxxx
+  const currentSearch = currentUrl.search; // ?key=value
+
+  const currentLang = currentPath.startsWith('/zh-cn/') ? 'zh' : 
+                     currentPath.startsWith('/en/') ? 'en' : 
+                     'default';
+
+  document.querySelectorAll('a[lang], a[hreflang]').forEach(link => {
+    const targetLang = link.getAttribute('lang') || link.getAttribute('hreflang');
+
+    if (targetLang === currentLang) {
+      link.addEventListener('click', (e) => {
+        e.preventDefault();
+        console.log(`[i18n] Blocked redundant switch to ${targetLang}`);
+      });
+      return;
+    }
+
+    const newUrl = new URL(link.href, window.location.origin);
+
+    if (currentPath.startsWith('/zh-cn/')) {
+      newUrl.pathname = currentPath.replace('/zh-cn/', '/en/');
+    } 
+    else if (currentPath.startsWith('/en/')) {
+      newUrl.pathname = currentPath.replace('/en/', '/zh-cn/');
+    }
+    else {
+      newUrl.pathname = targetLang === 'en' ? '/en/' : '/zh-cn/';
+    }
+
+    newUrl.search = currentSearch;
+    newUrl.hash = currentHash;
+
+    link.href = newUrl.toString();
+    console.log(`[i18n] Converted to: ${newUrl}`);
+  });
+});
diff --git a/docs/en/API Reference/common.md b/docs/en/API Reference/common.md
index 3df31ac99..8ea2b35c5 100644
--- a/docs/en/API Reference/common.md	
+++ b/docs/en/API Reference/common.md	
@@ -4,8 +4,23 @@
     options:
       heading_level: 3
 
+::: lazyllm.common.registry.LazyDict
+    options:
+      heading_level: 3
+      members: [remove, set_default]
+
 ---
 
+::: lazyllm.common.common.ResultCollector
+    members: 
+    - keys
+    - items
+    exclude-members:
+
+::: lazyllm.common.common.EnvVarContextManager
+    members: 
+    exclude-members:
+
 ## Bind
 
 ::: lazyllm.common.bind
@@ -20,6 +35,12 @@
     options:
       heading_level: 3
 
+## Identity
+
+::: lazyllm.common.Identity
+    options:
+      heading_level: 3
+
 ---
 
 ## Compilation
@@ -33,4 +54,53 @@
 ::: lazyllm.common.FileSystemQueue
     members: enqueue, dequeue, peek, size, clear
     exclude-members:
+
+::: lazyllm.common.ReadOnlyWrapper
+    members: set, isNone
+    exclude-members:
+
+::: lazyllm.common.queue.RedisQueue
+    members: 
+    exclude-members:
+
+::: lazyllm.common.CaseInsensitiveDict
+    members: 
+    exclude-members:
+
+::: lazyllm.common.ProcessPoolExecutor
+    members: submit
+    exclude-members:
+
+## Multiprocessing
+
+::: lazyllm.common.ForkProcess
+    members: work, start
+    exclude-members:
+
+## Options
+
+::: lazyllm.common.Option
+    members: 
+    exclude-members:
+
+::: lazyllm.common.multiprocessing.SpawnProcess
+    members: start
+    exclude-members:
+
+::: lazyllm.common.queue.SQLiteQueue
+    options:
+      heading_level: 3
+
+## Threading
+
+::: lazyllm.common.Thread
+    members: work, get_result
+    exclude-members:
+    
+
+## LazyLLMCMD
+
+::: lazyllm.common.LazyLLMCMD
+    members: with_cmd, get_args
+    exclude-members:
     
\ No newline at end of file
diff --git a/docs/en/API Reference/components.md b/docs/en/API Reference/components.md
index cff6b2c84..fe90b9ac0 100644
--- a/docs/en/API Reference/components.md	
+++ b/docs/en/API Reference/components.md	
@@ -12,6 +12,14 @@
     options:
       heading_level: 3
 
+::: lazyllm.components.deploy.LazyLLMDeployBase
+    options:
+      heading_level: 3
+
+::: lazyllm.components.deploy.LazyLLMDeployBase.extract_result
+    options:
+      heading_level: 3
+      
 ::: lazyllm.components.finetune.FlagembeddingFinetune
     options:
       heading_level: 3
@@ -29,6 +37,7 @@
 ::: lazyllm.components.deploy.Lightllm
     options:
       heading_level: 3
+      members: [cmd, geturl, extract_result]
 
 ::: lazyllm.components.deploy.Vllm
     options:
@@ -37,44 +46,49 @@
 ::: lazyllm.components.deploy.LMDeploy
     options:
       heading_level: 3
+      members: [cmd, geturl, extract_result]
 
-::: lazyllm.components.auto.AutoDeploy
+::: lazyllm.components.deploy.base.DummyDeploy
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.EmbeddingDeploy
+::: lazyllm.components.auto.AutoDeploy
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.embed.RerankDeploy
+::: lazyllm.components.deploy.embed.AbstractEmbedding
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.Mindie
+::: lazyllm.components.deploy.EmbeddingDeploy
     options:
       heading_level: 3
-::: lazyllm.components.deploy.OCRDeploy
+
+::: lazyllm.components.deploy.embed.RerankDeploy
     options:
       heading_level: 3
----
 
-## Launcher
+::: lazyllm.components.deploy.embed.LazyHuggingFaceRerank
+    options:
+      heading_level: 3
+      members: [load_reranker, rebuild]
 
-::: lazyllm.launcher.EmptyLauncher
+::: lazyllm.components.deploy.Mindie
     options:
       heading_level: 3
 
-::: lazyllm.launcher.RemoteLauncher
+      
+::: lazyllm.components.deploy.OCRDeploy
     options:
       heading_level: 3
+---
 
-::: lazyllm.launcher.SlurmLauncher
+::: lazyllm.components.deploy.relay.base.RelayServer
     options:
       heading_level: 3
-      filters:
-      - '!get_idle'
+      members: [cmd, geturl]
 
-::: lazyllm.launcher.ScoLauncher
+::: lazyllm.components.deploy.OCRDeploy
     options:
       heading_level: 3
 
@@ -83,6 +97,22 @@
 ## Prompter
 
 ::: lazyllm.components.prompter.LazyLLMPrompterBase
+    options:
+      heading_level: 3
+    inherited_members:
+      - generate_prompt
+      - get_response
+    members: [pre_hook]
+
+::: lazyllm.components.prompter.EmptyPrompter
+    options:
+      heading_level: 3
+      members: true
+
+::: lazyllm.components.Prompter
+    options:
+      heading_level: 3
+      members: [from_dict, from_template, from_file, empty, generate_prompt, get_response]
   options:
     heading_level: 3
     inherited_members:
@@ -118,11 +148,45 @@
 
 ---
 
-## Register
+## MultiModal
 
-::: lazyllm.common.Register
+### Text to Image
+
+::: lazyllm.components.StableDiffusionDeploy
     options:
-      heading_level: 3
+      heading_level: 4
+
+### Visual Question Answering
+
+Reference [LMDeploy][lazyllm.components.deploy.LMDeploy], which supports the Visual Question Answering model.
+
+### Text to Sound
+
+::: lazyllm.components.TTSDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.ChatTTSDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.BarkDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.MusicGenDeploy
+    options:
+      heading_level: 4
+
+### Speech to Text
+
+::: lazyllm.components.SenseVoiceDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.deploy.speech_to_text.sense_voice.SenseVoice
+    options:
+      heading_level: 4
 
 ---
 
@@ -168,48 +232,47 @@
     options:
       heading_level: 3
 
-::: lazyllm.components.JsonFormatter
+::: lazyllm.components.formatter.formatterbase.JsonLikeFormatter
     options:
       heading_level: 3
 
-::: lazyllm.components.EmptyFormatter
+::: lazyllm.components.formatter.formatterbase.PythonFormatter
     options:
       heading_level: 3
 
----
-
-## MultiModal
-
-### Text to Image
-
-::: lazyllm.components.StableDiffusionDeploy
+::: lazyllm.components.formatter.FileFormatter
     options:
-      heading_level: 4
-
-### Visual Question Answering
+      heading_level: 3
 
-Reference [LMDeploy][lazyllm.components.deploy.LMDeploy], which supports the Visual Question Answering model.
+::: lazyllm.components.formatter.YamlFormatter
+    options:
+      heading_level: 3
 
-### Text to Sound
+::: lazyllm.components.formatter.encode_query_with_filepaths
+    options:
+      heading_level: 3
 
-::: lazyllm.components.TTSDeploy
+::: lazyllm.components.formatter.decode_query_with_filepaths
     options:
-      heading_level: 4
+      heading_level: 3
 
-::: lazyllm.components.ChatTTSDeploy
+::: lazyllm.components.formatter.lazyllm_merge_query
     options:
-      heading_level: 4
+      heading_level: 3
 
-::: lazyllm.components.BarkDeploy
+::: lazyllm.components.JsonFormatter
     options:
-      heading_level: 4
+      heading_level: 3
 
-::: lazyllm.components.MusicGenDeploy
+::: lazyllm.components.EmptyFormatter
     options:
-      heading_level: 4
+      heading_level: 3
 
-### Speech to Text
+---
 
-::: lazyllm.components.SenseVoiceDeploy
+## ComponentBase
+
+::: lazyllm.components.core.ComponentBase
     options:
-      heading_level: 4
+      heading_level: 3
+      members: [apply, cmd]
diff --git a/docs/en/API Reference/configs.md b/docs/en/API Reference/configs.md
index 7f92ce611..74c3db8dd 100644
--- a/docs/en/API Reference/configs.md	
+++ b/docs/en/API Reference/configs.md	
@@ -4,4 +4,7 @@
       - done
       - getenv
       - add
-      - get_all_configs
\ No newline at end of file
+      - get_all_configs
+      - get_config
+      - temp
+      - refresh
\ No newline at end of file
diff --git a/docs/en/API Reference/flow.md b/docs/en/API Reference/flow.md
index f21ceeaf3..3eb015de1 100644
--- a/docs/en/API Reference/flow.md	
+++ b/docs/en/API Reference/flow.md	
@@ -1,19 +1,26 @@
 ::: lazyllm.flow.FlowBase
-    members: is_root, ancestor, for_each
+    members: is_root, ancestor, for_each, id
     exclude-members:
 
 ::: lazyllm.flow.LazyLLMFlowsBase
     members: 
+    - register_hook
+    - unregister_hook
+    - clear_hooks
+    - set_sync
+    - wait
+    - invoke
+    - bind
     exclude-members:
 
 ::: lazyllm.flow.Pipeline
-    members: 
+    members: output
     exclude-members:
 
 ::: lazyllm.flow.save_pipeline_result
 
 ::: lazyllm.flow.Parallel
-    members: 
+    members: join, sequential
     exclude-members:
 
 ::: lazyllm.flow.Diverter
diff --git a/docs/en/API Reference/hook.md b/docs/en/API Reference/hook.md
new file mode 100644
index 000000000..d84ad89da
--- /dev/null
+++ b/docs/en/API Reference/hook.md	
@@ -0,0 +1,3 @@
+::: lazyllm.hook.LazyLLMHook
+    members: pre_hook, post_hook, report
+    exclude-members:
\ No newline at end of file
diff --git a/docs/en/API Reference/launcher.md b/docs/en/API Reference/launcher.md
new file mode 100644
index 000000000..7a4a72dd9
--- /dev/null
+++ b/docs/en/API Reference/launcher.md	
@@ -0,0 +1,35 @@
+::: lazyllm.LazyLLMLaunchersBase
+    options:
+      members:
+      - makejob
+      - launch
+      - cleanup
+      - wait
+      - clone
+
+::: lazyllm.launcher.EmptyLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.RemoteLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.SlurmLauncher
+    options:
+      heading_level: 3
+      filters:
+      - '!get_idle'
+
+::: lazyllm.launcher.ScoLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.Job
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.K8sLauncher
+    options:
+      heading_level: 3
+      members: [makejob, launch]
\ No newline at end of file
diff --git a/docs/en/API Reference/module.md b/docs/en/API Reference/module.md
index 48e6c9ca4..cc30b5627 100644
--- a/docs/en/API Reference/module.md	
+++ b/docs/en/API Reference/module.md	
@@ -10,7 +10,14 @@
       - start
       - restart
       - update
-        
+
+::: lazyllm.module.servermodule.LLMBase
+    options:
+      members:
+      - prompt
+      - formatter
+      - share
+
 ::: lazyllm.module.ActionModule
     options:
       members:
@@ -46,17 +53,25 @@
       members:
 
 ::: lazyllm.module.TrialModule
-    members: start
+    members: [start]
     exclude-members:
 
 ::: lazyllm.module.OnlineChatModule
     members:
     exclude-members:
 
+::: lazyllm.module.llms.onlinemodule.supplier.doubao.DoubaoModule
+    members:
+    exclude-members:
+
 ::: lazyllm.module.OnlineEmbeddingModule
     members:
     exclude-members:
 
+::: lazyllm.module.llms.onlinemodule.supplier.openai.OpenAIEmbedding
+    members:
+    exclude-members:
+
 ::: lazyllm.module.OnlineChatModuleBase
     options:
       members:
@@ -66,3 +81,12 @@
 ::: lazyllm.module.OnlineEmbeddingModuleBase
     members:
     exclude-members: forward
+
+::: lazyllm.module.llms.onlinemodule.supplier.doubao.DoubaoEmbedding
+    options:
+      members:
+
+::: lazyllm.module.llms.onlinemodule.fileHandler.FileHandlerBase
+    members: get_finetune_data
+    exclude-members: 
+    
diff --git a/docs/en/API Reference/tools.md b/docs/en/API Reference/tools.md
index 0e6db035b..e7c3e1c92 100644
--- a/docs/en/API Reference/tools.md	
+++ b/docs/en/API Reference/tools.md	
@@ -1,15 +1,13 @@
-::: lazyllm.tools.Document
+::: lazyllm.tools.IntentClassifier
     members:
+    - intent_promt_hook
+    - post_process_result
     exclude-members:
 
-::: lazyllm.tools.rag.store.ChromadbStore
+::: lazyllm.tools.Document
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.store.MilvusStore
-    members:
-    exclude-members:
-    
 ::: lazyllm.tools.rag.store.ChromadbStore
     members:
     exclude-members:
@@ -20,135 +18,132 @@
     
 ::: lazyllm.tools.rag.readers.ReaderBase
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.component.bm25.BM25
+::: lazyllm.tools.rag.readers.PandasCSVReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaItem
+::: lazyllm.tools.rag.readers.PandasExcelReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocGenreAnalyser
+::: lazyllm.tools.rag.readers.PDFReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaAnalyser
+::: lazyllm.tools.rag.readers.PPTXReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoExtractor
+::: lazyllm.tools.rag.readers.VideoAudioReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocToDbProcessor
+::: lazyllm.tools.SqlManager
     members: 
-        - extract_info_from_docs
-        - analyze_info_schema_by_llm
-	exclude-members:
-
-::: lazyllm.tools.rag.doc_to_db.extract_db_schema_from_files
-
-::: lazyllm.tools.rag.readers.DocxReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.EpubReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.HWPReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.ImageReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.IPYNBReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.MagicPDFReader
-    members:
-	exclude-members:
+        - get_session
+        - check_connection
+        - set_desc
+        - get_all_tables
+        - get_table_orm_class
+        - execute_commit
+        - execute_query
+        - create_table
+        - drop_table
+        - insert_values
+    exclude-members:
 
-::: lazyllm.tools.rag.readers.MarkdownReader
+::: lazyllm.tools.Reranker
     members:
-        - remove_images
-        - remove_hyperlinks
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.readers.MboxReader
+::: lazyllm.tools.rag.readers.readerBase.LazyLLMReaderBase
     members:
 	exclude-members:
 
-::: lazyllm.tools.rag.component.bm25.BM25
+::: lazyllm.tools.rag.component.bm25
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaItem
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocGenreAnalyser
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaAnalyser
-    members:
+    members: analyse_info_schema
 	exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoExtractor
-    members:
+    members: extract_doc_info
 	exclude-members:
 
+::: lazyllm.tools.rag.doc_to_db.DocInfoExtractor
+    members:
+    exclude-members:
+
 ::: lazyllm.tools.rag.doc_to_db.DocToDbProcessor
     members: 
         - extract_info_from_docs
         - analyze_info_schema_by_llm
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.extract_db_schema_from_files
 
 ::: lazyllm.tools.rag.readers.DocxReader
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.EpubReader
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.HWPReader
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.ImageReader
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.IPYNBReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.readers.MagicPDFReader
+::: lazyllm.tools.rag.readers.MineruPDFReader
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.MarkdownReader
     members:
         - remove_images
         - remove_hyperlinks
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.readers.MboxReader
     members:
 	exclude-members:
 
+::: lazyllm.tools.SqlCall
+    members: 
+        - sql_query_promt_hook
+        - sql_explain_prompt_hook
+        - extract_sql_from_response
+	exclude-members:
+
+::: lazyllm.tools.rag.default_index.DefaultIndex
+    members:
+        - update
+        - remove
+        - query
+    exclude-members: 
+
 ::: lazyllm.tools.Reranker
-    members: register_reranker
-    members: register_reranker
+    members: [register_reranker]
     exclude-members: forward
 
 ::: lazyllm.tools.Retriever
@@ -156,17 +151,46 @@
     exclude-members: forward
 
 ::: lazyllm.tools.rag.retriever.TempDocRetriever
-    members:
+    members: [create_node_group, add_subretriever]
     exclude-members: 
 
-::: lazyllm.tools.rag.retriever.TempDocRetriever
-    members:
+::: lazyllm.tools.rag.retriever.UrlDocument
+    members: [find]
     exclude-members: 
 
 ::: lazyllm.tools.rag.DocManager
     members:
 	exclude-members:
 
+::: lazyllm.tools.rag.utils.SqliteDocListManager
+    members:
+      - table_inited
+      - get_status_cond_and_params
+      - validate_paths
+      - update_need_reparsing
+      - list_files
+      - get_docs
+      - set_docs_new_meta
+      - fetch_docs_changed_meta
+      - list_all_kb_group
+      - add_kb_group
+      - list_kb_group_files
+      - delete_unreferenced_doc
+      - get_docs_need_reparse
+      - get_existing_paths_by_pattern
+      - update_file_message
+      - update_file_status
+      - add_files_to_kb_group
+      - delete_files_from_kb_group
+      - get_file_status
+      - update_kb_group
+      - release
+	exclude-members:
+
+::: lazyllm.tools.rag.data_loaders.DirectoryReader
+    members: load_data
+	exclude-members:
+
 ::: lazyllm.tools.SentenceSplitter
     members:
     exclude-members:
@@ -191,45 +215,34 @@ lazyllm.tools.rag.transform.NodeTransform
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.dataReader.SimpleDirectoryReader
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.dataReader.FileReader
-    members:
+::: lazyllm.tools.rag.doc_processor.DocumentProcessor
+    members: register_algorithm, drop_algorithm
+::: lazyllm.tools.rag.doc_node.QADocNode
+    members: get_text
     exclude-members:
 
-lazyllm.tools.rag.transform.NodeTransform
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.transform.TransformArgs
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.similarity.register_similarity
+::: lazyllm.tools.rag.dataReader.SimpleDirectoryReader
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.doc_node.DocNode
+::: lazyllm.tools.rag.dataReader.FileReader
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.dataReader.SimpleDirectoryReader
-    members:
+::: lazyllm.tools.rag.transform.FuncNodeTransform
+    members: transform
     exclude-members:
 
-::: lazyllm.tools.rag.dataReader.FileReader
+::: lazyllm.tools.rag.web.DocWebModule
     members:
-    exclude-members:
-    
+    exclude-members:    
     
 ::: lazyllm.tools.WebModule
     members:
     exclude-members: forward
 
 ::: lazyllm.tools.CodeGenerator
-    members: 
+    members: [choose_prompt]
     exclude-members: forward
 
 ::: lazyllm.tools.ParameterExtractor
@@ -237,7 +250,7 @@ lazyllm.tools.rag.transform.NodeTransform
     exclude-members: forward
 
 ::: lazyllm.tools.QustionRewrite
-    members: 
+    members: choose_prompt
     exclude-members: forward
 
 ::: lazyllm.tools.agent.toolsManager.ToolManager
@@ -272,8 +285,20 @@ lazyllm.tools.rag.transform.NodeTransform
     members: 
     exclude-members: forward
 
-::: lazyllm.tools.IntentClassifier
-    members: 
+::: lazyllm.tools.rag.smart_embedding_index.SmartEmbeddingIndex
+    members: update, remove, query
+    exclude-members:
+
+::: lazyllm.tools.rag.doc_node.ImageDocNode
+    members: do_embedding, get_content, get_text
+    exclude-members:
+
+::: lazyllm.tools.rag.transform.AdaptiveTransform
+    members: transform
+    exclude-members:
+
+::: lazyllm.tools.rag.rerank.ModuleReranker
+    members: forward
     exclude-members:
 ::: lazyllm.tools.rag.utils.DocListManager
     members: 
@@ -281,6 +306,19 @@ lazyllm.tools.rag.transform.NodeTransform
 ::: lazyllm.tools.rag.global_metadata.GlobalMetadataDesc
     members: 
     exclude-members: 
+
+::: lazyllm.tools.rag.IndexBase.update
+    members:
+	exclude-members: 
+
+::: lazyllm.tools.rag.IndexBase.remove
+    members:
+	exclude-members: 
+
+::: lazyllm.tools.rag.IndexBase.query
+    members:
+	exclude-members:
+
 ::: lazyllm.tools.rag.index_base.IndexBase
     members: 
 
@@ -317,61 +355,36 @@ lazyllm.tools.rag.transform.NodeTransform
     exclude-members:
 
 ::: lazyllm.tools.DBManager
-    members: 
+    members: execute_query
     exclude-members:
 
 ::: lazyllm.tools.MongoDBManager
     members: 
     exclude-members:
-::: lazyllm.tools.rag.utils.DocListManager
-    members: 
-    exclude-members: 
-::: lazyllm.tools.rag.global_metadata.GlobalMetadataDesc
-    members: 
-    exclude-members: 
-::: lazyllm.tools.rag.index_base.IndexBase
-    members: 
 
-::: lazyllm.tools.BaseEvaluator
-    members: 
-    exclude-members:
-
-::: lazyllm.tools.ResponseRelevancy
-    members: 
-    exclude-members:    
-
-::: lazyllm.tools.Faithfulness
-    members: 
-    exclude-members: 
-
-::: lazyllm.tools.LLMContextRecall
-    members: 
-    exclude-members: 
-
-::: lazyllm.tools.NonLLMContextRecall
+::: lazyllm.tools.HttpTool
     members: 
     exclude-members:
 
-::: lazyllm.tools.ContextRelevance
+::: lazyllm.tools.agent.functionCall.StreamResponse
     members: 
     exclude-members:
 
-::: lazyllm.tools.HttpRequest
-    members: 
+::: lazyllm.tools.MCPClient
+    members: [call_tool, list_tools, get_tools, aget_tools, deploy]
     exclude-members:
 
-::: lazyllm.tools.JobDescription
-    members: 
-    exclude-members:
+::: lazyllm.tools.tools.GoogleSearch
+    members: forward
 
-::: lazyllm.tools.DBManager
+::: lazyllm.tools.tools.tencent_search.TencentSearch
     members: 
     exclude-members:
 
-::: lazyllm.tools.MongoDBManager
+::: lazyllm.tools.rag.web.WebUi
     members: 
     exclude-members:
 
-::: lazyllm.tools.HttpTool
-    members: 
+::: lazyllm.tools.http_request.http_executor_response.HttpExecutorResponse
+    members: extract_file, get_content_type
     exclude-members:
\ No newline at end of file
diff --git a/docs/en/Home/environment.md b/docs/en/Home/environment.md
index c0023ef14..64bc6cf0d 100644
--- a/docs/en/Home/environment.md
+++ b/docs/en/Home/environment.md
@@ -20,3 +20,143 @@
 - gradio_client: The Gradio client library allows users to load and use Gradio interfaces from a remote server.
 - protobuf: Google's Protocol Buffers Python implementation, used for serializing structured data.
 - setuptools: A Python package installation and distribution tool, used for packaging and distributing Python applications and libraries.
+
+
+## Install on Different Operating Systems
+
+### Windows
+
+#### Step 1: Install Git
+Download and install from:
+https://github.com/git-for-windows/git/releases/download/v2.50.1.windows.1/Git-2.50.1-64-bit.exe
+
+#### Step 2: Install Python
+Official website: https://python.p2hp.com/downloads/
+Recommended: Python 3.10.9
+1. Select the corresponding version to download, choose "Customize installation" during installation to customize the installation path, and check "Add to PATH" below
+!!! Note
+    If already installed, you can choose "uninstall" to remove it and reinstall
+
+![install_python](../assets/env/install_python.png)
+
+2. Customize the installation path, you can set it to D:\Python\Python310
+
+![set_python_install_path](../assets/env/set_python_install_path.png)
+
+#### Step 3: Install and Use VS Code
+1. Download and install VS Code
+2. Install Python extensions
+
+![vscode_extensions](../assets/env/vscode_extensions.png)
+
+3. After opening any Python file in VS Code, you can select the Python interpreter at the bottom
+
+![vscode_interpret](../assets/env/vscode_interpret.png)
+
+4. It will automatically detect all interpreters by default, choose one; or manually input D:\Python\Python310\python.exe twice
+
+![vscode_interpret_manual](../assets/env/vscode_interpret_manual.png)
+
+5. Choose Git Bash in the terminal to use a Linux-like command line environment
+
+![git_bash](../assets/env/git_bash.png)
+
+#### Step 4: Install LazyLLM
+1. Install lazyllm through command line in the terminal
+```bash
+pip install lazyllm
+```
+
+2. Set environment variable keys
+
+In PowerShell, set them using the following code:
+```powershell
+$env:LAZYLLM_SENSENOVA_API_KEY = "7ACAxxxxxxxxxxxxxxx"
+$env:LAZYLLM_SENSENOVA_SECRET_KEY = "2B0F7xxxxxxxxxxxxxxxx"
+```
+
+In Bash, set them using the following code:
+```bash
+export LAZYLLM_SENSENOVA_API_KEY="7ACACxxxxxxxxxxxxxxx"
+export LAZYLLM_SENSENOVA_SECRET_KEY="2B0F72xxxxxxxxxxxxxx"
+```
+
+### Windows with WSL
+
+#### Prerequisites
+1. Check the internal version, press Win + r and input "winver", requires greater than 19041; otherwise, you need to update the Windows system
+
+![winversion](../assets/env/winversion.png)
+![winversion2](../assets/env/winversion_2.png)
+
+2. Open Task Manager and confirm that CPU virtualization is enabled.
+
+![virtualize](../assets/env/virtualize.png)
+![winversion2](../assets/env/virtualize_2.png)
+
+If not enabled, you need to enable it and restart your computer
+
+![winversion3](../assets/env/virtualize_3.png)
+![winversion4](../assets/env/virtualize_4.png)
+
+#### Download WSL2 Kernel Update Package
+WSL 2 Linux kernel update package address: https://aka.ms/wsl2kernel
+After downloading, run the file directly
+
+#### Install Linux System
+1. Open PowerShell as Administrator, then view the list of available Linux distributions from the online store
+```powershell
+PS C:\Users\name> wsl --list --online
+The following is a list of valid distributions that can be installed.
+Use "wsl --install -d <distribution>" to install.
+
+NAME                            FRIENDLY NAME
+Ubuntu                          Ubuntu
+Debian                          Debian GNU/Linux
+kali-linux                      Kali Linux Rolling
+Ubuntu-18.04                    Ubuntu 18.04 LTS
+Ubuntu-20.04                    Ubuntu 20.04 LTS
+Ubuntu-22.04                    Ubuntu 22.04 LTS
+Ubuntu-24.04                    Ubuntu 24.04 LTS
+OracleLinux_7_9                 Oracle Linux 7.9
+OracleLinux_8_10                Oracle Linux 8.10
+OracleLinux_9_5                 Oracle Linux 9.5
+openSUSE-Leap-15.6              openSUSE Leap 15.6
+SUSE-Linux-Enterprise-15-SP6    SUSE Linux Enterprise 15 SP6
+openSUSE-Tumbleweed             openSUSE Tumbleweed
+```
+
+2. View installed systems (none by default)
+```powershell
+PS C:\Users\name> wsl --list --verbose
+No distributions have been installed for the Windows Subsystem for Linux.
+You can install distributions by visiting the Microsoft Store:
+https://aka.ms/wslstore
+```
+
+3. Install the specified system
+```powershell
+PS C:\Users\name>  wsl --install -d Ubuntu-22.04
+Installing: Ubuntu 22.04 LTS
+[=                          3.0%  
+```
+
+4. After installation, you need to input a username and password
+
+![passward](../assets/env/wsl_passward.png)
+
+5. View the mapped local path
+Press Win + r and input \\wsl$
+Click on the Ubuntu folder, right-click, and click "Map network drive" to add it to My Computer. Note that you can only open this disk after starting Ubuntu.
+
+![map](../assets/env/map.png)
+
+#### Using WSL in VS Code
+1. Install the WSL extension
+2. Open WSL in the terminal
+3. Install Python and lazyllm
+
+#### Using Local Command Line
+Search for WSL directly, open it, and you can enter the subsystem
+
+### macOS
diff --git a/docs/gen_mkdocs_yaml.py b/docs/gen_mkdocs_yaml.py
index efe3e3915..d6e9b1be0 100644
--- a/docs/gen_mkdocs_yaml.py
+++ b/docs/gen_mkdocs_yaml.py
@@ -1,15 +1,18 @@
 import os
+import yaml
 
-language = os.getenv('LAZYLLM_LANGUAGE', 'ENGLISH')
+language = os.getenv('LAZYLLM_LANGUAGE', 'ENGLISH').upper()
 assert language in ('ENGLISH', 'CHINESE')
 
 with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'mkdocs.template.yml')) as f:
-    content = f.read()
+    config = yaml.safe_load(f)
 
 doc_dir = 'en' if language == 'ENGLISH' else 'zh'
-en_default = 'true' if language == 'ENGLISH' else 'false'
-zh_default = 'true' if language == 'CHINESE' else 'false'
-content = content.format(doc_dir=doc_dir, en_default=en_default, zh_default=zh_default)
+config['docs_dir'] = f'docs/{doc_dir}'
 
-with open(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'mkdocs.yml'), 'w+') as f:
-    f.write(content)
+nav_file = 'nav_en.yml' if language == 'ENGLISH' else 'nav_zh.yml'
+with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), nav_file)) as f:
+    config['nav'] = yaml.safe_load(f)
+
+with open(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'mkdocs.yml'), 'w') as f:
+    yaml.dump(config, f, allow_unicode=True, sort_keys=False)
diff --git a/docs/mkdocs.template.yml b/docs/mkdocs.template.yml
index 605c889b6..079252bf3 100644
--- a/docs/mkdocs.template.yml
+++ b/docs/mkdocs.template.yml
@@ -2,59 +2,6 @@ site_name: LazyLLM
 repo_url: https://github.com/LazyAGI/LazyLLM
 repo_name: LazyAGI/LazyLLM
 docs_dir: docs/{doc_dir}
-nav:
-- Home:
-  - Getting Started: index.md
-  - FAQ: Home/FAQ.md
-  - Environment: Home/environment.md
-  - Supported Models: Home/model_list.md
-- Cookbook:
-  - Chatbot: Cookbook/robot.md
-  - Painting Master: Cookbook/painting_master.md
-  - Multimodal Chatbot: Cookbook/multimodal_robot.md
-  - Great Writer: Cookbook/great_writer.md
-  - RAG: Cookbook/rag.md
-  - Streaming: Cookbook/streaming.md
-- Best Practice:
-  - Flow: Best Practice/flow.md
-  - Flowapp: Best Practice/flowapp.md
-  - Module: Best Practice/module.md
-  - Prompt: Best Practice/prompt.md
-  - Rag: Best Practice/rag.md
-  - FunctionCall: Best Practice/functionCall.md
-  - Stream: Best Practice/stream.md
-- Advanced Topics:
-  - Contribution: Advanced Topics/contribution.md
-  - Changelog: Advanced Topics/changelog.md
-- Api Reference:
-  - Cli: API Reference/cli.md
-  - Common: API Reference/common.md
-  - Components: API Reference/components.md
-  - Configs: API Reference/configs.md
-  - Flow: API Reference/flow.md
-  - Module: API Reference/module.md
-  - Tools: API Reference/tools.md
-- Tutorials:
-  - Overview: Tutorial/index.md
-  - Lesson 1: Tutorial/1.md
-  - Lesson 2: Tutorial/2.md
-  - Lesson 3: Tutorial/3.md
-  - Lesson 4: Tutorial/4.md
-  - Lesson 5: Tutorial/5.md
-  - Lesson 6: Tutorial/6.md
-  - Lesson 7: Tutorial/7.md
-  - Lesson 8: Tutorial/8.md
-  - Lesson 9: Tutorial/9.md
-  - Lesson 10: Tutorial/10.md
-  - Lesson 11: Tutorial/11.md
-  - Lesson 12: Tutorial/12.md
-  - Lesson 13: Tutorial/13.md
-  - Lesson 14: Tutorial/14.md
-  - Lesson 15: Tutorial/15.md
-  - Lesson 16: Tutorial/16.md
-  - Lesson 17: Tutorial/17.md
-  - Lesson 18: Tutorial/18.md
-  - Lesson 19: Tutorial/19.md
 theme:
   language: en
   name: material
@@ -109,6 +56,7 @@ extra:
     link: https://github.com/LazyAGI/LazyLLM
 extra_javascript:
   - 'assets/js/assistant.js'
+  - 'assets/js/lang-redirect.js'
   - 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js'
 plugins:
 - search:
diff --git a/docs/nav_en.yml b/docs/nav_en.yml
new file mode 100644
index 000000000..e8df2b178
--- /dev/null
+++ b/docs/nav_en.yml
@@ -0,0 +1,54 @@
+- Home:
+  - Getting Started: index.md
+  - FAQ: Home/FAQ.md
+  - Environment: Home/environment.md
+  - Supported Models: Home/model_list.md
+- Cookbook:
+  - Chatbot: Cookbook/robot.md
+  - Painting Master: Cookbook/painting_master.md
+  - Multimodal Chatbot: Cookbook/multimodal_robot.md
+  - Great Writer: Cookbook/great_writer.md
+  - RAG: Cookbook/rag.md
+  - Streaming: Cookbook/streaming.md
+- Best Practice:
+  - Flow: Best Practice/flow.md
+  - Flowapp: Best Practice/flowapp.md
+  - Module: Best Practice/module.md
+  - Prompt: Best Practice/prompt.md
+  - Rag: Best Practice/rag.md
+  - FunctionCall: Best Practice/functionCall.md
+  - Stream: Best Practice/stream.md
+- Advanced Topics:
+  - Contribution: Advanced Topics/contribution.md
+  - Changelog: Advanced Topics/changelog.md
+- Api Reference:
+  - CLI: API Reference/cli.md
+  - Common: API Reference/common.md
+  - Components: API Reference/components.md
+  - Flow: API Reference/flow.md
+  - Module: API Reference/module.md
+  - Tools: API Reference/tools.md
+  - Configs: API Reference/configs.md
+  - Launcher: API Reference/launcher.md
+  - Hook: API Reference/hook.md
+- Tutorials:
+  - Overview: Tutorial/index.md
+  # - 1. RAG Fundamentals: Tutorial/1.md
+  # - 2. Quickstart with RAG: Tutorial/2.md
+  # - 3. Mastering LLM with LazyLLM: Tutorial/3.md
+  # - 4. Engineering Basics: Tutorial/4.md
+  # - 5. Custom Document Readers: Tutorial/5.md
+  # - 6. Retrieval Optimization: Tutorial/6.md
+  # - 7. Hands-on Retrieval Tuning: Tutorial/7.md
+  # - 8. Custom Retrieval Strategies: Tutorial/8.md
+  # - 9. Domain-Specific Fine-Tuning: Tutorial/9.md
+  # - 10. Deepseek Integration: Tutorial/10.md
+  # - 11. Performance Optimization: Tutorial/11.md
+  # - 12. Speed-Up Techniques: Tutorial/12.md
+  # - 13. Multimodal RAG: Tutorial/13.md
+  # - 14. Academic Paper QA: Tutorial/14.md
+  # - 15. Statistical RAG: Tutorial/15.md
+  # - 16. Advanced Paper QA: Tutorial/16.md
+  # - 17. Enterprise RAG Solutions: Tutorial/17.md
+  # - 18. Agentic RAG: Tutorial/18.md
+  # - 19. Knowledge Graph RAG: Tutorial/19.md
diff --git a/docs/nav_zh.yml b/docs/nav_zh.yml
new file mode 100644
index 000000000..167380f22
--- /dev/null
+++ b/docs/nav_zh.yml
@@ -0,0 +1,54 @@
+- 首页:
+  - 快速开始: index.md
+  - 常见问题: Home/FAQ.md
+  - 环境配置: Home/environment.md
+  - 支持模型: Home/model_list.md
+- 使用示例:
+  - 聊天机器人: Cookbook/robot.md
+  - 绘画大师: Cookbook/painting_master.md
+  - 多模态聊天: Cookbook/multimodal_robot.md
+  - 写作大师: Cookbook/great_writer.md
+  - 检索增强: Cookbook/rag.md
+  - 流式输出: Cookbook/streaming.md
+- 最佳实践:
+  - 工作流: Best Practice/flow.md
+  - 流程应用: Best Practice/flowapp.md
+  - 模块: Best Practice/module.md
+  - 提示词: Best Practice/prompt.md
+  - 检索增强: Best Practice/rag.md
+  - 函数调用: Best Practice/functionCall.md
+  - 流式处理: Best Practice/stream.md
+- 高级主题:
+  - 贡献指南: Advanced Topics/contribution.md
+  - 更新日志: Advanced Topics/changelog.md
+- API参考:
+  - 命令行: API Reference/cli.md
+  - 通用: API Reference/common.md
+  - 组件: API Reference/components.md
+  - 工作流: API Reference/flow.md
+  - 模块: API Reference/module.md
+  - 工具: API Reference/tools.md
+  - 配置: API Reference/configs.md
+  - 启动器: API Reference/launcher.md
+  - 钩子: API Reference/hook.md
+- 教程:
+  - 概述: Tutorial/index.md
+  - 1. RAG原理解读: Tutorial/1.md
+  - 2. 快速上手RAG: Tutorial/2.md
+  - 3. Lazy玩转LLM: Tutorial/3.md
+  - 4. 工程化入门: Tutorial/4.md
+  - 5. 自定义Reader: Tutorial/5.md
+  - 6. 召回优化技巧: Tutorial/6.md
+  - 7. 召回优化实战: Tutorial/7.md
+  - 8. 自定义召回策略: Tutorial/8.md
+  - 9. 领域微调实践: Tutorial/9.md
+  - 10. Deepseek实战: Tutorial/10.md
+  - 11. 性能优化指南: Tutorial/11.md
+  - 12. 性能加速实践: Tutorial/12.md
+  - 13. 多模态RAG: Tutorial/13.md
+  - 14. 学术论文问答: Tutorial/14.md
+  - 15. RAG的统计问题: Tutorial/15.md
+  - 16. 论文问答进阶: Tutorial/16.md
+  - 17. 企业级RAG方案: Tutorial/17.md
+  - 18. Agentic RAG: Tutorial/18.md
+  - 19. 知识图谱 RAG: Tutorial/19.md
diff --git a/docs/scripts/lazynote/manager/base.py b/docs/scripts/lazynote/manager/base.py
index 3e4dc4aeb..eb93fb922 100644
--- a/docs/scripts/lazynote/manager/base.py
+++ b/docs/scripts/lazynote/manager/base.py
@@ -137,7 +137,7 @@ def traverse(self, obj: object, skip_modules: Optional[List[str]] = None) -> Non
             skip_modules = []
 
         if get_member_type(obj) == MemberType.PACKAGE:
-            for importer, modname, ispkg in pkgutil.walk_packages(obj.__path__, obj.__name__ + "."):
+            for _, modname, ispkg in pkgutil.walk_packages(obj.__path__, obj.__name__ + "."):
                 if any(modname.startswith(skip_mod) for skip_mod in skip_modules):
                     continue
                 if ispkg:
@@ -172,7 +172,7 @@ async def atraverse(self, obj: object, skip_modules: Optional[List[str]] = None,
 
         if get_member_type(obj) == MemberType.PACKAGE:
             tasks = []
-            for importer, modname, ispkg in pkgutil.walk_packages(obj.__path__, obj.__name__ + "."):
+            for _, modname, ispkg in pkgutil.walk_packages(obj.__path__, obj.__name__ + "."):
                 if any(modname.startswith(skip_mod) for skip_mod in skip_modules):
                     continue
                 if ispkg:
diff --git a/docs/zh/API Reference/common.md b/docs/zh/API Reference/common.md
index 3df31ac99..c18768b2c 100644
--- a/docs/zh/API Reference/common.md	
+++ b/docs/zh/API Reference/common.md	
@@ -4,8 +4,23 @@
     options:
       heading_level: 3
 
+::: lazyllm.common.registry.LazyDict
+    options:
+      heading_level: 3
+      members: [remove, set_default]
+
 ---
 
+::: lazyllm.common.common.ResultCollector
+    members: 
+    - keys
+    - items
+    exclude-members:
+
+::: lazyllm.common.common.EnvVarContextManager
+    members: 
+    exclude-members:
+
 ## Bind
 
 ::: lazyllm.common.bind
@@ -22,15 +37,88 @@
 
 ---
 
+## Identity
+
+::: lazyllm.common.Identity
+    options:
+      heading_level: 3
+
+---
+
 ## Compilation
 
 ::: lazyllm.common.compile_func
     options:
       heading_level: 3
 
+---
+
 ## Queue
 
 ::: lazyllm.common.FileSystemQueue
     members: enqueue, dequeue, peek, size, clear
     exclude-members:
-    
\ No newline at end of file
+
+::: lazyllm.common.multiprocessing.SpawnProcess
+    members: start
+
+::: lazyllm.common.queue.SQLiteQueue
+    options:
+      heading_level: 3
+
+::: lazyllm.common.ReadOnlyWrapper
+    members: set, isNone
+    exclude-members:
+
+::: lazyllm.common.queue.RedisQueue
+    members: 
+    exclude-members:
+
+---
+
+## Multiprocessing
+
+::: lazyllm.common.ForkProcess
+    members: work, start
+    exclude-members:
+
+---
+
+## Options
+
+::: lazyllm.common.Option
+    members: 
+    exclude-members:
+
+---
+
+## DynamicDescriptor
+
+::: lazyllm.common.DynamicDescriptor
+    members:
+    - Impl
+    exclude-members:
+
+::: lazyllm.common.CaseInsensitiveDict
+    members: 
+    exclude-members:
+
+::: lazyllm.common.ProcessPoolExecutor
+    members: submit
+    exclude-members:
+
+---
+
+## Threading
+
+::: lazyllm.common.Thread
+    members: work, get_result
+    exclude-members:
+
+---
+
+## LazyLLMCMD
+
+::: lazyllm.common.LazyLLMCMD
+    members: with_cmd, get_args
+    exclude-members:
diff --git a/docs/zh/API Reference/components.md b/docs/zh/API Reference/components.md
index cff6b2c84..b87852208 100644
--- a/docs/zh/API Reference/components.md	
+++ b/docs/zh/API Reference/components.md	
@@ -12,6 +12,14 @@
     options:
       heading_level: 3
 
+::: lazyllm.components.deploy.LazyLLMDeployBase
+    options:
+      heading_level: 3
+
+::: lazyllm.components.deploy.LazyLLMDeployBase.extract_result
+    options:
+      heading_level: 3
+      
 ::: lazyllm.components.finetune.FlagembeddingFinetune
     options:
       heading_level: 3
@@ -29,6 +37,7 @@
 ::: lazyllm.components.deploy.Lightllm
     options:
       heading_level: 3
+      members: [cmd, geturl, extract_result]
 
 ::: lazyllm.components.deploy.Vllm
     options:
@@ -37,44 +46,49 @@
 ::: lazyllm.components.deploy.LMDeploy
     options:
       heading_level: 3
+      members: [cmd, geturl, extract_result]
 
-::: lazyllm.components.auto.AutoDeploy
+::: lazyllm.components.deploy.base.DummyDeploy
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.EmbeddingDeploy
+::: lazyllm.components.auto.AutoDeploy
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.embed.RerankDeploy
+::: lazyllm.components.deploy.embed.AbstractEmbedding
     options:
       heading_level: 3
 
-::: lazyllm.components.deploy.Mindie
+::: lazyllm.components.deploy.EmbeddingDeploy
     options:
       heading_level: 3
-::: lazyllm.components.deploy.OCRDeploy
+
+::: lazyllm.components.deploy.embed.RerankDeploy
     options:
       heading_level: 3
----
 
-## Launcher
+::: lazyllm.components.deploy.embed.LazyHuggingFaceRerank
+    options:
+      heading_level: 3
+      members: [load_reranker, rebuild]
 
-::: lazyllm.launcher.EmptyLauncher
+::: lazyllm.components.deploy.Mindie
     options:
       heading_level: 3
 
-::: lazyllm.launcher.RemoteLauncher
+      
+::: lazyllm.components.deploy.OCRDeploy
     options:
       heading_level: 3
+---
 
-::: lazyllm.launcher.SlurmLauncher
+::: lazyllm.components.deploy.relay.base.RelayServer
     options:
       heading_level: 3
-      filters:
-      - '!get_idle'
+      members: [cmd, geturl]
 
-::: lazyllm.launcher.ScoLauncher
+::: lazyllm.components.deploy.OCRDeploy
     options:
       heading_level: 3
 
@@ -83,12 +97,12 @@
 ## Prompter
 
 ::: lazyllm.components.prompter.LazyLLMPrompterBase
-  options:
-    heading_level: 3
+    options:
+      heading_level: 3
     inherited_members:
       - generate_prompt
       - get_response
-    members: false
+    members: [pre_hook]
 
 ::: lazyllm.components.prompter.EmptyPrompter
     options:
@@ -118,12 +132,45 @@
 
 ---
 
-## Register
+## MultiModal
+
+### Text to Image
 
-::: lazyllm.common.Register
+::: lazyllm.components.StableDiffusionDeploy
     options:
-      heading_level: 3
+      heading_level: 4
+
+### Visual Question Answering
+
+Reference [LMDeploy][lazyllm.components.deploy.LMDeploy], which supports the Visual Question Answering model.
+
+### Text to Sound
+
+::: lazyllm.components.TTSDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.ChatTTSDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.BarkDeploy
+    options:
+      heading_level: 4
 
+::: lazyllm.components.MusicGenDeploy
+    options:
+      heading_level: 4
+
+### Speech to Text
+
+::: lazyllm.components.SenseVoiceDeploy
+    options:
+      heading_level: 4
+
+::: lazyllm.components.deploy.speech_to_text.sense_voice.SenseVoice
+    options:
+      heading_level: 4
 ---
 
 ## ModelManager
@@ -178,38 +225,8 @@
 
 ---
 
-## MultiModal
-
-### Text to Image
-
-::: lazyllm.components.StableDiffusionDeploy
+## ComponentBase
+::: lazyllm.components.core.ComponentBase
     options:
-      heading_level: 4
-
-### Visual Question Answering
-
-Reference [LMDeploy][lazyllm.components.deploy.LMDeploy], which supports the Visual Question Answering model.
-
-### Text to Sound
-
-::: lazyllm.components.TTSDeploy
-    options:
-      heading_level: 4
-
-::: lazyllm.components.ChatTTSDeploy
-    options:
-      heading_level: 4
-
-::: lazyllm.components.BarkDeploy
-    options:
-      heading_level: 4
-
-::: lazyllm.components.MusicGenDeploy
-    options:
-      heading_level: 4
-
-### Speech to Text
-
-::: lazyllm.components.SenseVoiceDeploy
-    options:
-      heading_level: 4
+      heading_level: 3
+      members: [apply, cmd]
diff --git a/docs/zh/API Reference/configs.md b/docs/zh/API Reference/configs.md
index 7f92ce611..74c3db8dd 100644
--- a/docs/zh/API Reference/configs.md	
+++ b/docs/zh/API Reference/configs.md	
@@ -4,4 +4,7 @@
       - done
       - getenv
       - add
-      - get_all_configs
\ No newline at end of file
+      - get_all_configs
+      - get_config
+      - temp
+      - refresh
\ No newline at end of file
diff --git a/docs/zh/API Reference/flow.md b/docs/zh/API Reference/flow.md
index f21ceeaf3..3eb015de1 100644
--- a/docs/zh/API Reference/flow.md	
+++ b/docs/zh/API Reference/flow.md	
@@ -1,19 +1,26 @@
 ::: lazyllm.flow.FlowBase
-    members: is_root, ancestor, for_each
+    members: is_root, ancestor, for_each, id
     exclude-members:
 
 ::: lazyllm.flow.LazyLLMFlowsBase
     members: 
+    - register_hook
+    - unregister_hook
+    - clear_hooks
+    - set_sync
+    - wait
+    - invoke
+    - bind
     exclude-members:
 
 ::: lazyllm.flow.Pipeline
-    members: 
+    members: output
     exclude-members:
 
 ::: lazyllm.flow.save_pipeline_result
 
 ::: lazyllm.flow.Parallel
-    members: 
+    members: join, sequential
     exclude-members:
 
 ::: lazyllm.flow.Diverter
diff --git a/docs/zh/API Reference/hook.md b/docs/zh/API Reference/hook.md
new file mode 100644
index 000000000..d84ad89da
--- /dev/null
+++ b/docs/zh/API Reference/hook.md	
@@ -0,0 +1,3 @@
+::: lazyllm.hook.LazyLLMHook
+    members: pre_hook, post_hook, report
+    exclude-members:
\ No newline at end of file
diff --git a/docs/zh/API Reference/launcher.md b/docs/zh/API Reference/launcher.md
new file mode 100644
index 000000000..7a4a72dd9
--- /dev/null
+++ b/docs/zh/API Reference/launcher.md	
@@ -0,0 +1,35 @@
+::: lazyllm.LazyLLMLaunchersBase
+    options:
+      members:
+      - makejob
+      - launch
+      - cleanup
+      - wait
+      - clone
+
+::: lazyllm.launcher.EmptyLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.RemoteLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.SlurmLauncher
+    options:
+      heading_level: 3
+      filters:
+      - '!get_idle'
+
+::: lazyllm.launcher.ScoLauncher
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.Job
+    options:
+      heading_level: 3
+
+::: lazyllm.launcher.K8sLauncher
+    options:
+      heading_level: 3
+      members: [makejob, launch]
\ No newline at end of file
diff --git a/docs/zh/API Reference/module.md b/docs/zh/API Reference/module.md
index af1c71769..4d8f579a9 100644
--- a/docs/zh/API Reference/module.md	
+++ b/docs/zh/API Reference/module.md	
@@ -10,7 +10,14 @@
       - start
       - restart
       - update
-        
+
+::: lazyllm.module.servermodule.LLMBase
+    options:
+      members:
+      - prompt
+      - formatter
+      - share
+
 ::: lazyllm.module.ActionModule
     options:
       members:
@@ -48,17 +55,25 @@
       members:
 
 ::: lazyllm.module.TrialModule
-    members: start
+    members: [start]
     exclude-members:
 
 ::: lazyllm.module.OnlineChatModule
     members:
     exclude-members:
 
+::: lazyllm.module.llms.onlinemodule.supplier.doubao.DoubaoModule
+    members:
+    exclude-members:
+
 ::: lazyllm.module.OnlineEmbeddingModule
     members:
     exclude-members:
 
+::: lazyllm.module.llms.onlinemodule.supplier.openai.OpenAIEmbedding
+    members:
+    exclude-members:
+
 ::: lazyllm.module.OnlineChatModuleBase
     options:
       members:
@@ -68,3 +83,11 @@
 ::: lazyllm.module.OnlineEmbeddingModuleBase
     members:
     exclude-members: forward
+
+::: lazyllm.module.llms.onlinemodule.supplier.doubao.DoubaoEmbedding
+    options:
+      members:
+    
+::: lazyllm.module.llms.onlinemodule.fileHandler.FileHandlerBase
+    members: get_finetune_data
+    exclude-members: 
diff --git a/docs/zh/API Reference/tools.md b/docs/zh/API Reference/tools.md
index 4d8da9bad..0f674d9ab 100644
--- a/docs/zh/API Reference/tools.md	
+++ b/docs/zh/API Reference/tools.md	
@@ -1,15 +1,13 @@
-::: lazyllm.tools.Document
+::: lazyllm.tools.IntentClassifier
     members:
+    - intent_promt_hook
+    - post_process_result
     exclude-members:
 
-::: lazyllm.tools.rag.store.ChromadbStore
+::: lazyllm.tools.Document
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.store.MilvusStore
-    members:
-    exclude-members:
-    
 ::: lazyllm.tools.rag.store.ChromadbStore
     members:
     exclude-members:
@@ -20,95 +18,67 @@
     
 ::: lazyllm.tools.rag.readers.ReaderBase
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.component.bm25
+::: lazyllm.tools.rag.readers.readerBase.LazyLLMReaderBase
     members:
 	exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaItem
+::: lazyllm.tools.rag.readers.PandasExcelReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocGenreAnalyser
+::: lazyllm.tools.rag.readers.PDFReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaAnalyser
+::: lazyllm.tools.rag.readers.PPTXReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocInfoExtractor
+::: lazyllm.tools.rag.readers.VideoAudioReader
     members:
-	exclude-members:
+    exclude-members:
 
-::: lazyllm.tools.rag.doc_to_db.DocToDbProcessor
+::: lazyllm.tools.SqlManager
     members: 
-        - extract_info_from_docs
-        - analyze_info_schema_by_llm
-	exclude-members:
-
-::: lazyllm.tools.rag.doc_to_db.extract_db_schema_from_files
-
-::: lazyllm.tools.rag.readers.DocxReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.EpubReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.HWPReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.ImageReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.IPYNBReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.MagicPDFReader
-    members:
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.MarkdownReader
-    members:
-        - remove_images
-        - remove_hyperlinks
-	exclude-members:
-
-::: lazyllm.tools.rag.readers.MboxReader
-    members:
-	exclude-members:
+        - get_session
+        - check_connection
+        - set_desc
+        - get_all_tables
+        - get_table_orm_class
+        - execute_commit
+        - execute_query
+        - create_table
+        - drop_table
+        - insert_values
+    exclude-members:
 
-::: lazyllm.tools.rag.component.bm25
+::: lazyllm.tools.rag.component.bm25.BM25
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaItem
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocGenreAnalyser
     members:
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoSchemaAnalyser
-    members:
+    members: analyse_info_schema
 	exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocInfoExtractor
-    members:
+    members: extract_doc_info
 	exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.DocToDbProcessor
     members: 
         - extract_info_from_docs
         - analyze_info_schema_by_llm
-	exclude-members:
+    exclude-members:
 
 ::: lazyllm.tools.rag.doc_to_db.extract_db_schema_from_files
 
@@ -132,7 +102,7 @@
     members:
 	exclude-members:
 
-::: lazyllm.tools.rag.readers.MagicPDFReader
+::: lazyllm.tools.rag.readers.MineruPDFReader
     members:
 	exclude-members:
 
@@ -146,9 +116,15 @@
     members:
 	exclude-members:
 
+::: lazyllm.tools.rag.default_index.DefaultIndex
+    members:
+        - update
+        - remove
+        - query
+    exclude-members: 
+
 ::: lazyllm.tools.Reranker
-    members: register_reranker
-    members: register_reranker
+    members: [register_reranker]
     exclude-members: forward
 
 ::: lazyllm.tools.Retriever
@@ -156,17 +132,46 @@
     exclude-members: forward
 
 ::: lazyllm.tools.rag.retriever.TempDocRetriever
-    members:
+    members: [create_node_group, add_subretriever]
     exclude-members: 
 
-::: lazyllm.tools.rag.retriever.TempDocRetriever
-    members:
+::: lazyllm.tools.rag.retriever.UrlDocument
+    members: [find]
     exclude-members: 
 
 ::: lazyllm.tools.rag.DocManager
     members:
 	exclude-members:
 
+::: lazyllm.tools.rag.utils.SqliteDocListManager
+    members:
+      - table_inited
+      - get_status_cond_and_params
+      - validate_paths
+      - update_need_reparsing
+      - list_files
+      - get_docs
+      - set_docs_new_meta
+      - fetch_docs_changed_meta
+      - list_all_kb_group
+      - add_kb_group
+      - list_kb_group_files
+      - delete_unreferenced_doc
+      - get_docs_need_reparse
+      - get_existing_paths_by_pattern
+      - update_file_message
+      - update_file_status
+      - add_files_to_kb_group
+      - delete_files_from_kb_group
+      - get_file_status
+      - update_kb_group
+      - release
+	exclude-members:
+
+::: lazyllm.tools.rag.data_loaders.DirectoryReader
+    members: load_data
+	exclude-members:
+
 ::: lazyllm.tools.SentenceSplitter
     members:
     exclude-members:
@@ -191,28 +196,12 @@ lazyllm.tools.rag.transform.NodeTransform
     members:
     exclude-members:
 
-::: lazyllm.tools.rag.dataReader.SimpleDirectoryReader
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.dataReader.FileReader
-    members:
-    exclude-members:
-
-lazyllm.tools.rag.transform.NodeTransform
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.transform.TransformArgs
-    members:
-    exclude-members:
-
-::: lazyllm.tools.rag.similarity.register_similarity
-    members:
+::: lazyllm.tools.rag.doc_node.QADocNode
+    members: get_text
     exclude-members:
 
-::: lazyllm.tools.rag.doc_node.DocNode
-    members:
+::: lazyllm.tools.rag.doc_processor.DocumentProcessor
+    members: register_algorithm, drop_algorithm
     exclude-members:
 
 ::: lazyllm.tools.rag.dataReader.SimpleDirectoryReader
@@ -222,14 +211,17 @@ lazyllm.tools.rag.transform.NodeTransform
 ::: lazyllm.tools.rag.dataReader.FileReader
     members:
     exclude-members:
-    
+
+::: lazyllm.tools.rag.web.DocWebModule
+    members:
+    exclude-members:    
     
 ::: lazyllm.tools.WebModule
     members:
     exclude-members: forward
 
 ::: lazyllm.tools.CodeGenerator
-    members: 
+    members: [choose_prompt]
     exclude-members: forward
 
 ::: lazyllm.tools.ParameterExtractor
@@ -237,7 +229,7 @@ lazyllm.tools.rag.transform.NodeTransform
     exclude-members: forward
 
 ::: lazyllm.tools.QustionRewrite
-    members: 
+    members: choose_prompt
     exclude-members: forward
 
 ::: lazyllm.tools.agent.toolsManager.ToolManager
@@ -272,8 +264,20 @@ lazyllm.tools.rag.transform.NodeTransform
     members: 
     exclude-members: forward
 
-::: lazyllm.tools.IntentClassifier
-    members: 
+::: lazyllm.tools.rag.smart_embedding_index.SmartEmbeddingIndex
+    members: update, remove, query
+    exclude-members:
+
+::: lazyllm.tools.rag.doc_node.ImageDocNode
+    members: do_embedding, get_content, get_text
+    exclude-members:
+
+::: lazyllm.tools.rag.transform.AdaptiveTransform
+    members: transform
+    exclude-members:
+
+::: lazyllm.tools.rag.rerank.ModuleReranker
+    members: forward
     exclude-members:
 ::: lazyllm.tools.rag.utils.DocListManager
     members: 
@@ -284,6 +288,18 @@ lazyllm.tools.rag.transform.NodeTransform
 ::: lazyllm.tools.rag.index_base.IndexBase
     members: 
 
+::: lazyllm.tools.rag.IndexBase.update
+    members:
+	exclude-members: 
+
+::: lazyllm.tools.rag.IndexBase.remove
+    members:
+	exclude-members: 
+
+::: lazyllm.tools.rag.IndexBase.query
+    members:
+	exclude-members:
+
 ::: lazyllm.tools.BaseEvaluator
     members: 
     exclude-members:
@@ -317,61 +333,36 @@ lazyllm.tools.rag.transform.NodeTransform
     exclude-members:
 
 ::: lazyllm.tools.DBManager
-    members: 
+    members: execute_query
     exclude-members:
 
 ::: lazyllm.tools.MongoDBManager
     members: 
     exclude-members:
-::: lazyllm.tools.rag.utils.DocListManager
-    members: 
-    exclude-members: 
-::: lazyllm.tools.rag.global_metadata.GlobalMetadataDesc
-    members: 
-    exclude-members: 
-::: lazyllm.tools.rag.index_base.IndexBase
-    members: 
-
-::: lazyllm.tools.BaseEvaluator
-    members: 
-    exclude-members:
-
-::: lazyllm.tools.ResponseRelevancy
-    members: 
-    exclude-members:    
 
-::: lazyllm.tools.Faithfulness
-    members: 
-    exclude-members: 
-
-::: lazyllm.tools.LLMContextRecall
-    members: 
-    exclude-members: 
-
-::: lazyllm.tools.NonLLMContextRecall
+::: lazyllm.tools.HttpTool
     members: 
     exclude-members:
 
-::: lazyllm.tools.ContextRelevance
+::: lazyllm.tools.agent.functionCall.StreamResponse
     members: 
     exclude-members:
 
-::: lazyllm.tools.HttpRequest
-    members: 
+::: lazyllm.tools.MCPClient
+    members: [call_tool, list_tools, get_tools, aget_tools, deploy]
     exclude-members:
 
-::: lazyllm.tools.JobDescription
-    members: 
-    exclude-members:
+::: lazyllm.tools.tools.GoogleSearch
+    members: forward
 
-::: lazyllm.tools.DBManager
+::: lazyllm.tools.tools.tencent_search.TencentSearch
     members: 
     exclude-members:
 
-::: lazyllm.tools.MongoDBManager
+::: lazyllm.tools.rag.web.WebUi
     members: 
     exclude-members:
 
-::: lazyllm.tools.HttpTool
-    members: 
+::: lazyllm.tools.http_request.http_executor_response.HttpExecutorResponse
+    members: extract_file, get_content_type
     exclude-members:
\ No newline at end of file
diff --git a/docs/zh/Home/environment.md b/docs/zh/Home/environment.md
index 6754c4fe7..b095fc30c 100644
--- a/docs/zh/Home/environment.md
+++ b/docs/zh/Home/environment.md
@@ -20,3 +20,145 @@
 - gradio_client: Gradio的客户端库，允许用户从远程服务器加载和使用Gradio界面。
 - protobuf: Google的Protocol Buffers的Python实现，用于序列化结构化数据。
 - setuptools: 一个Python包安装和分发工具，用于打包和分发Python应用程序和库。
+
+
+## 在不同操作系统上安装
+
+### windows
+
+#### step 1: 安装git 
+下载并安装：
+https://github.com/git-for-windows/git/releases/download/v2.50.1.windows.1/Git-2.50.1-64-bit.exe
+
+#### step 2: 安装python
+官网：https://python.p2hp.com/downloads/
+推荐： python3.10.9
+1. 选择对应版本下载，安装时选择 Customize installation 自定义安装路径，勾选下面的加入PATH
+!!! Note
+    如果已经安装过可选择 uninstall 卸载后重新安装
+
+![install_python](../assets/env/install_python.png)
+
+2. 自定义安装路径为，可以设置为 D:\Python\Python310
+
+![set_python_install_path](../assets/env/set_python_install_path.png)
+
+
+#### step 3: 安装和使用VS Code
+1. 下载vscode并安装
+2. 安装python组件
+
+![vscode_extensions](../assets/env/vscode_extensions.png)
+
+3. 在vscode中随便打开一个python文件后，可在最下面选择python解释器
+
+![vscode_interpret](../assets/env/vscode_interpret.png)
+
+4. 默认会识别到所有的解释器，选择一个；或者手动输入两遍 D:\Python\Python310\python.exe
+
+![vscode_interpret_manual](../assets/env/vscode_interpret_manual.png)
+
+5. 终端中选用git bash 就可以使用类似 Linux 的命令行环境
+
+![git_bash](../assets/env/git_bash.png)
+
+#### step 4: 安装LazyLLM
+1. 在终端中通过命令行安装lazyllm
+```code
+pip install lazyllm
+```
+
+2. 设置环境变量 key
+
+在powershell中，通过如下代码设置
+```code
+$env:LAZYLLM_SENSENOVA_API_KEY = "7ACAxxxxxxxxxxxxxxx"
+$env:LAZYLLM_SENSENOVA_SECRET_KEY = "2B0F7xxxxxxxxxxxxxxxx"
+```
+
+在bash中，通过如下代码设置
+```code
+export LAZYLLM_SENSENOVA_API_KEY="7ACACxxxxxxxxxxxxxxx"
+export LAZYLLM_SENSENOVA_SECRET_KEY="2B0F72xxxxxxxxxxxxxx"
+```
+
+### windows with wsl
+
+#### 前置条件
+1. 查看内部版本,Win + r 输入winver 要求大于19041；否则需更新windows系统
+
+![winversion](../assets/env/winversion.png)
+![winversion2](../assets/env/winversion_2.png)
+
+2. 打开任务管理器，确认cpu虚拟化开启。
+
+![virtualize](../assets/env/virtualize.png)
+![winversion2](../assets/env/virtualize_2.png)
+
+如果没有的话，需打开，并重启电脑
+
+![winversion3](../assets/env/virtualize_3.png)
+![winversion4](../assets/env/virtualize_4.png)
+
+#### 下载wsl2内核更新包
+WSL 2 Linux内核更新包地址：https://aka.ms/wsl2kernel
+下载好后，直接运行文件
+
+#### 安装linux系统
+1. 调出powershell 以管理员身份运行，然后查看在线商店下载的可用 Linux 分发版的列表
+```code
+PS C:\Users\name> wsl --list --online
+以下是可安装的有效分发的列表。
+请使用“wsl --install -d <分发>”安装。
+
+NAME                            FRIENDLY NAME
+Ubuntu                          Ubuntu
+Debian                          Debian GNU/Linux
+kali-linux                      Kali Linux Rolling
+Ubuntu-18.04                    Ubuntu 18.04 LTS
+Ubuntu-20.04                    Ubuntu 20.04 LTS
+Ubuntu-22.04                    Ubuntu 22.04 LTS
+Ubuntu-24.04                    Ubuntu 24.04 LTS
+OracleLinux_7_9                 Oracle Linux 7.9
+OracleLinux_8_10                Oracle Linux 8.10
+OracleLinux_9_5                 Oracle Linux 9.5
+openSUSE-Leap-15.6              openSUSE Leap 15.6
+SUSE-Linux-Enterprise-15-SP6    SUSE Linux Enterprise 15 SP6
+openSUSE-Tumbleweed             openSUSE Tumbleweed
+```
+
+2. 查看已安装的系统，（默认没有安装过）
+```code
+PS C:\Users\name> wsl --list --verbose
+适用于 Linux 的 Windows 子系统没有已安装的分发版。
+可以通过访问 Microsoft Store 来安装分发版:
+https://aka.ms/wslstore
+```
+
+3. 安装指定系统
+```code
+PS C:\Users\name>  wsl --install -d Ubuntu-22.04
+正在安装: Ubuntu 22.04 LTS
+[=                          3.0%  
+```
+
+4. 安装完后要输入一个账密
+
+![passward](../assets/env/wsl_passward.png)
+
+5. 查看映射的本地路径
+Win + r 输入 \\wsl$
+点击Ubantu文件夹，右键，点击映射网络驱动器就可以添加到我的电脑里了，注意只有启动Ubantu之后才可以打开该磁盘。
+
+![map](../assets/env/map.png)
+
+#### 在vscode中使用wsl
+1. 安装插件wsl
+2. 终端打开wsl
+3. 安装python和lazyllm
+
+#### 本地命令行使用
+直接搜索wsl，打开，即可进入子系统
+
+### macOS
+
diff --git a/examples/rag_map_store_with_milvus_index.py b/examples/rag_map_store_with_milvus_index.py
index 1687ffc44..792d91bf8 100644
--- a/examples/rag_map_store_with_milvus_index.py
+++ b/examples/rag_map_store_with_milvus_index.py
@@ -6,59 +6,61 @@
 import tempfile
 
 def run(query):
-    _, store_file = tempfile.mkstemp(suffix=".db")
-
-    milvus_store_conf = {
-        'type': 'map',
-        'indices': {
-            'smart_embedding_index': {
-                'backend': 'milvus',
-                'kwargs': {
-                    'uri': store_file,
-                    'index_kwargs': {
-                        'index_type': 'HNSW',
-                        'metric_type': 'COSINE',
-                    }
+    fd, store_file = tempfile.mkstemp(suffix=".db")
+    os.close(fd)
+    try:
+        milvus_store_conf = {
+            'type': 'map',
+            'indices': {
+                'smart_embedding_index': {
+                    'backend': 'milvus',
+                    'kwargs': {
+                        'uri': store_file,
+                        'index_kwargs': {
+                            'index_type': 'FLAT',
+                            'metric_type': 'COSINE',
+                        }
+                    },
                 },
             },
-        },
-    }
+        }
 
-    documents = lazyllm.Document(dataset_path="rag_master",
-                                 embed=lazyllm.TrainableModule("bge-large-zh-v1.5"),
-                                 manager=False,
-                                 store_conf=milvus_store_conf)
+        documents = lazyllm.Document(dataset_path="rag_master",
+                                     embed=lazyllm.TrainableModule("bge-large-zh-v1.5"),
+                                     manager=False,
+                                     store_conf=milvus_store_conf)
 
-    documents.create_node_group(name="sentences",
-                                transform=lambda s: '。'.split(s))
+        documents.create_node_group(name="sentences",
+                                    transform=lambda s: [x for x in s.split('。') if x.strip()])
 
-    prompt = 'You will play the role of an AI Q&A assistant and complete a dialogue task.'\
-        ' In this task, you need to provide your answer based on the given context and question.'
+        prompt = 'You will play the role of an AI Q&A assistant and complete a dialogue task.'\
+            ' In this task, you need to provide your answer based on the given context and question.'
 
-    with lazyllm.pipeline() as ppl:
-        ppl.retriever = lazyllm.Retriever(doc=documents, group_name="sentences", topk=3,
-                                          index='smart_embedding_index')
+        with lazyllm.pipeline() as ppl:
+            ppl.retriever = lazyllm.Retriever(doc=documents, group_name="sentences", topk=3)
 
-        ppl.reranker = lazyllm.Reranker(name='ModuleReranker',
-                                        model="bge-reranker-large",
-                                        topk=1,
-                                        output_format='content',
-                                        join=True) | bind(query=ppl.input)
+            ppl.reranker = lazyllm.Reranker(name='ModuleReranker',
+                                            model="bge-reranker-large",
+                                            topk=1,
+                                            output_format='content',
+                                            join=True) | bind(query=ppl.input)
 
-        ppl.formatter = (
-            lambda nodes, query: dict(context_str=nodes, query=query)
-        ) | bind(query=ppl.input)
+            ppl.formatter = (
+                lambda nodes, query: dict(context_str=nodes, query=query)
+            ) | bind(query=ppl.input)
 
-        ppl.llm = lazyllm.TrainableModule('internlm2-chat-7b').prompt(
-            lazyllm.ChatPrompter(instruction=prompt, extra_keys=['context_str']))
+            ppl.llm = lazyllm.TrainableModule('internlm2-chat-7b').prompt(
+                lazyllm.ChatPrompter(instruction=prompt, extra_keys=['context_str']))
 
         rag = lazyllm.ActionModule(ppl)
         rag.start()
         res = rag(query)
-
-    os.remove(store_file)
-
-    return res
+        return res
+    finally:
+        try:
+            os.remove(store_file)
+        except Exception:
+            pass
 
 if __name__ == '__main__':
     res = run('何为天道？')
diff --git a/lazyllm/__init__.py b/lazyllm/__init__.py
index 2f2042a3f..1cd672ffd 100644
--- a/lazyllm/__init__.py
+++ b/lazyllm/__init__.py
@@ -18,14 +18,16 @@
                     FunctionCallAgent, fc_register, ReactAgent, PlanAndSolveAgent, ReWOOAgent, SentenceSplitter,
                     LLMParser)
 from .docs import add_doc
-from . import patch
+from .patch import patch_os_env
 
 config.done()
+patch_os_env(lambda key, value: config.refresh(key), config.refresh)
 
 
 del LazyLLMRegisterMetaClass  # noqa F821
+del LazyLLMRegisterMetaABCClass  # noqa F821
 del _get_base_cls_from_registry  # noqa F821
-del patch
+del patch_os_env
 
 
 __all__ = [
diff --git a/lazyllm/cli/install.py b/lazyllm/cli/install.py
index 04a40dc4a..5e46c9311 100644
--- a/lazyllm/cli/install.py
+++ b/lazyllm/cli/install.py
@@ -130,6 +130,18 @@ def install_multiple_packages(package_names_with_versions):
         packages_to_install.append(package_with_version)
     install_packages(packages_to_install)
 
+def install_mineru():
+    try:
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip', '-i',
+                               'https://mirrors.aliyun.com/pypi/simple/'])
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'uv', '-i',
+                               'https://mirrors.aliyun.com/pypi/simple/'])
+        subprocess.check_call([sys.executable, '-m', 'uv', 'pip', 'install',
+                               'mineru[all]==2.1.10', '-i', 'https://mirrors.aliyun.com/pypi/simple/'])
+    except subprocess.CalledProcessError as e:
+        logging.error(f"Mineru installation failed: {e}")
+        sys.exit(1)
+
 def install(commands):  # noqa C901
     extras_desc = load_extras_descriptions()
     epilog_lines = ["Supported extras groups:"]
@@ -157,10 +169,14 @@ def install(commands):  # noqa C901
         logging.error("Extras for finetune/local inference are not supported on macOS/Windows.")
         sys.exit(1)
 
-    extras = load_extras()        # dict of extras
+    extras = load_extras()  # dict of extras
     deps = load_dependencies()  # dict of dependencies
     to_install = OrderedDict()
 
+    if "mineru" in items:
+        install_mineru()
+        items.remove("mineru")
+
     for cmd in items:
         if cmd in extras:
             for pkg in extras[cmd]:
diff --git a/lazyllm/common/__init__.py b/lazyllm/common/__init__.py
index aa8d1f883..b135bca1a 100644
--- a/lazyllm/common/__init__.py
+++ b/lazyllm/common/__init__.py
@@ -1,4 +1,4 @@
-from .registry import LazyLLMRegisterMetaClass, _get_base_cls_from_registry, Register
+from .registry import LazyLLMRegisterMetaClass, LazyLLMRegisterMetaABCClass, _get_base_cls_from_registry, Register
 from .common import package, kwargs, arguments, LazyLLMCMD, timeout, final, ReadOnlyWrapper, DynamicDescriptor, override
 from .common import FlatList, Identity, ResultCollector, ArgsDict, CaseInsensitiveDict
 from .common import ReprRule, make_repr, modify_repr, is_valid_url, is_valid_path
@@ -17,6 +17,7 @@
 __all__ = [
     # registry
     'LazyLLMRegisterMetaClass',
+    'LazyLLMRegisterMetaABCClass',
     '_get_base_cls_from_registry',
     'Register',
 
@@ -95,5 +96,5 @@
     'LOG',
 
     # file-system queue
-    'FileSystemQueue',
+    'FileSystemQueue'
 ]
diff --git a/lazyllm/common/bind.py b/lazyllm/common/bind.py
index b1a1d6c24..c40a65335 100644
--- a/lazyllm/common/bind.py
+++ b/lazyllm/common/bind.py
@@ -1,14 +1,14 @@
 import copy
 import builtins
 import itertools
-from typing import Callable, Any
+from typing import Callable, Any, Optional, List
 from .globals import globals
 from .common import package
 
 
 class AttrTree(object):
-    def __init__(self, name=None, pres=[]):
-        self._path = copy.deepcopy(pres)
+    def __init__(self, name: Optional[str] = None, pres: Optional[List[str]] = None):
+        self._path = copy.deepcopy(pres or [])
         if name is not None:
             self._path.append(name)
 
@@ -58,7 +58,7 @@ def __reduce__(self) -> tuple[Any, ...]:
 
 def _setattr(self, key, v):
     raise RuntimeError(f'Cannot set attr for Placeholder, you want to set {key}={v}')
-setattr(Placeholder, '__setattr__', _setattr)
+Placeholder.__setattr__ = _setattr
 
 
 class _MetaBind(type):
@@ -152,4 +152,4 @@ def __setattr__(self, __name: str, __value: Any) -> None:
         return super(__class__, self).__setattr__(__name, __value)
 
 
-setattr(builtins, 'bind', Bind)
+builtins.bind = Bind
diff --git a/lazyllm/common/common.py b/lazyllm/common/common.py
index e1de1c6d7..7e9ad245b 100644
--- a/lazyllm/common/common.py
+++ b/lazyllm/common/common.py
@@ -2,7 +2,7 @@
 import os
 import builtins
 import typing
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, List, Dict
 from contextlib import contextmanager
 import copy
 import threading
@@ -110,7 +110,7 @@ def append(self, x):
         return self
 
 
-setattr(builtins, 'package', package)
+builtins.package = package
 
 
 class LazyLLMCMD(object):
@@ -248,7 +248,9 @@ def check_combine(cls, cate, type, subs):
 def rreplace(s, old, new, count):
     return (s[::-1].replace(old[::-1], new[::-1], count))[::-1]
 
-def make_repr(category, type, *, name=None, subs=[], attrs=dict(), **kw):
+def make_repr(category: str, type: str, *, name: Optional[str] = None,
+              subs: Optional[List[str]] = None, attrs: Optional[Dict[str, Any]] = None, **kw):
+    subs, attrs = subs or [], attrs or {}
     if len(kw) > 0:
         assert len(attrs) == 0, 'Cannot provide attrs and kwargs at the same time'
         attrs = kw
diff --git a/lazyllm/common/multiprocessing.py b/lazyllm/common/multiprocessing.py
index 64b802db2..960886ff4 100644
--- a/lazyllm/common/multiprocessing.py
+++ b/lazyllm/common/multiprocessing.py
@@ -24,8 +24,8 @@ def start(self):
 
 class ForkProcess(multiprocessing.Process):
     def __init__(self, group=None, target=None, name=None, args=(),
-                 kwargs={}, *, daemon=None, sync=True):
-        super().__init__(group, ForkProcess.work(target, sync), name, args, kwargs, daemon=daemon)
+                 kwargs=None, *, daemon=None, sync=True):
+        super().__init__(group, ForkProcess.work(target, sync), name, args, kwargs or {}, daemon=daemon)
 
     @staticmethod
     def work(f, sync):
diff --git a/lazyllm/common/registry.py b/lazyllm/common/registry.py
index 3b25891f7..67676e4e9 100644
--- a/lazyllm/common/registry.py
+++ b/lazyllm/common/registry.py
@@ -5,6 +5,7 @@
 from .bind import _MetaBind
 from ..configs import config
 from typing import Optional
+from abc import ABCMeta
 
 # Special Dict for lazy programmer. Suppose we have a LazyDict as follows：
 #    >>> ld = LazyDict(name='ld', ALd=int)
@@ -106,6 +107,9 @@ def __new__(metas, name, bases, attrs):
         return new_cls
 
 
+class LazyLLMRegisterMetaABCClass(LazyLLMRegisterMetaClass, ABCMeta): pass
+
+
 def _get_base_cls_from_registry(cls_str, *, registry=LazyLLMRegisterMetaClass.all_clses):
     if cls_str == '':
         return registry.base
diff --git a/lazyllm/components/auto/autodeploy.py b/lazyllm/components/auto/autodeploy.py
index 68d99b7ec..7a3700b59 100644
--- a/lazyllm/components/auto/autodeploy.py
+++ b/lazyllm/components/auto/autodeploy.py
@@ -37,7 +37,6 @@ def _get_embed_deployer(launcher, type, kw):
     def get_deployer(cls, base_model: str, source: Optional[str] = None, trust_remote_code: bool = True,
                      launcher: Optional[LazyLLMLaunchersBase] = None, type: Optional[str] = None,
                      log_path: Optional[str] = None, **kw):
-        base_model = ModelManager(source).download(base_model) or ''
         model_name = get_model_name(base_model)
         kw['log_path'], kw['trust_remote_code'] = log_path, trust_remote_code
         if not type:
diff --git a/lazyllm/components/auto/autofinetune.py b/lazyllm/components/auto/autofinetune.py
index be42b3269..97ca26495 100644
--- a/lazyllm/components/auto/autofinetune.py
+++ b/lazyllm/components/auto/autofinetune.py
@@ -9,7 +9,7 @@
 
 class AutoFinetune(LazyLLMFinetuneBase):
     def __new__(cls, base_model, target_path, source=lazyllm.config['model_source'], merge_path=None, ctx_len=1024,
-                batch_size=32, lora_r=8, launcher=launchers.remote(ngpus=1), **kw):
+                batch_size=32, lora_r=8, launcher=launchers.remote(ngpus=1), **kw):  # noqa B008
         base_model = ModelManager(source).download(base_model) or ''
         model_name = get_model_name(base_model)
         model_type = ModelManager.get_model_type(model_name)
diff --git a/lazyllm/components/core.py b/lazyllm/components/core.py
index c95533988..2cb288f04 100644
--- a/lazyllm/components/core.py
+++ b/lazyllm/components/core.py
@@ -5,7 +5,7 @@
 from typing import Union
 
 class ComponentBase(object, metaclass=LazyLLMRegisterMetaClass):
-    def __init__(self, *, launcher=launchers.empty()):
+    def __init__(self, *, launcher=launchers.empty()):  # noqa B008
         self._llm_name = None
         self.job = ReadOnlyWrapper()
         if isinstance(launcher, LazyLLMLaunchersBase):
diff --git a/lazyllm/components/deploy/base.py b/lazyllm/components/deploy/base.py
index 5391950b0..54f3def19 100644
--- a/lazyllm/components/deploy/base.py
+++ b/lazyllm/components/deploy/base.py
@@ -19,7 +19,7 @@ class LazyLLMDeployBase(ComponentBase):
     def extract_result(output, inputs):
         return output
 
-    def __init__(self, *, launcher=launchers.remote()):
+    def __init__(self, *, launcher=launchers.remote()):  # noqa B008
         super().__init__(launcher=launcher)
 
 
@@ -33,7 +33,7 @@ class DummyDeploy(LazyLLMDeployBase, flows.Pipeline):
         }
     }
 
-    def __init__(self, launcher=launchers.remote(sync=False), *, stream=False, **kw):
+    def __init__(self, launcher=launchers.remote(sync=False), *, stream=False, **kw):  # noqa B008
         super().__init__(launcher=launcher)
 
         def func():
diff --git a/lazyllm/components/deploy/infinity.py b/lazyllm/components/deploy/infinity.py
index 7b39a091c..60919c55b 100644
--- a/lazyllm/components/deploy/infinity.py
+++ b/lazyllm/components/deploy/infinity.py
@@ -19,7 +19,7 @@ class Infinity(LazyLLMDeployBase):
     default_headers = {'Content-Type': 'application/json'}
     target_name = 'embeddings'
 
-    def __init__(self, launcher=launchers.remote(ngpus=1), model_type='embed', log_path=None, **kw):
+    def __init__(self, launcher=launchers.remote(ngpus=1), model_type='embed', log_path=None, **kw):  # noqa B008
         super().__init__(launcher=launcher)
         self.kw = ArgsDict({
             'host': '0.0.0.0',
diff --git a/lazyllm/components/deploy/lightllm.py b/lazyllm/components/deploy/lightllm.py
index 23ebfc927..5931b19bf 100644
--- a/lazyllm/components/deploy/lightllm.py
+++ b/lazyllm/components/deploy/lightllm.py
@@ -33,7 +33,7 @@ class Lightllm(LazyLLMDeployBase):
     stream_url_suffix = '_stream'
     stream_parse_parameters = {"delimiter": b"\n\n"}
 
-    def __init__(self, trust_remote_code=True, launcher=launchers.remote(ngpus=1), log_path=None, **kw):
+    def __init__(self, trust_remote_code=True, launcher=launchers.remote(ngpus=1), log_path=None, **kw):  # noqa B008
         super().__init__(launcher=launcher)
         self.kw = ArgsDict({
             'tp': 1,
diff --git a/lazyllm/components/deploy/lmdeploy.py b/lazyllm/components/deploy/lmdeploy.py
index 9e62fe65c..43176e92f 100644
--- a/lazyllm/components/deploy/lmdeploy.py
+++ b/lazyllm/components/deploy/lmdeploy.py
@@ -44,7 +44,7 @@ class LMDeploy(LazyLLMDeployBase):
     }
     stream_parse_parameters = {"delimiter": b"\n"}
 
-    def __init__(self, launcher=launchers.remote(ngpus=1), trust_remote_code=True, log_path=None, **kw):
+    def __init__(self, launcher=launchers.remote(ngpus=1), trust_remote_code=True, log_path=None, **kw):  # noqa B008
         super().__init__(launcher=launcher)
         self.kw = ArgsDict({
             'server-name': '0.0.0.0',
diff --git a/lazyllm/components/deploy/mindie.py b/lazyllm/components/deploy/mindie.py
index db7e61ee7..3d8ec58de 100644
--- a/lazyllm/components/deploy/mindie.py
+++ b/lazyllm/components/deploy/mindie.py
@@ -34,7 +34,7 @@ class Mindie(LazyLLMDeployBase):
         'max_seq_len': ('maxSeqLen', int)
     }
 
-    def __init__(self, trust_remote_code=True, launcher=launchers.remote(), log_path=None, **kw):
+    def __init__(self, trust_remote_code=True, launcher=launchers.remote(), log_path=None, **kw):  # noqa B008
         super().__init__(launcher=launcher)
         assert lazyllm.config['mindie_home'], 'Ensure you have installed MindIE and \
                                   "export LAZYLLM_MINDIE_HOME=/path/to/mindie/latest"'
diff --git a/lazyllm/components/deploy/mineru/__init__.py b/lazyllm/components/deploy/mineru/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/lazyllm/components/deploy/mineru/mineru_patches.py b/lazyllm/components/deploy/mineru/mineru_patches.py
new file mode 100644
index 000000000..f52910f06
--- /dev/null
+++ b/lazyllm/components/deploy/mineru/mineru_patches.py
@@ -0,0 +1,196 @@
+import copy
+from mineru.backend.pipeline import pipeline_middle_json_mkcontent
+from mineru.backend.pipeline.pipeline_middle_json_mkcontent import merge_para_with_text as pipeline_merge_para_with_text
+from mineru.backend.vlm import vlm_middle_json_mkcontent
+from mineru.backend.vlm.vlm_middle_json_mkcontent import merge_para_with_text as vlm_merge_para_with_text
+from mineru.utils.enum_class import BlockType, ContentType
+
+# patches to mineru (to output bbox)
+
+def _parse_line_spans(para_block, page_idx):
+    lines_metas = []
+    if 'lines' in para_block:
+        for line_info in para_block['lines']:
+            if not line_info['spans']:
+                continue
+            line_meta = copy.deepcopy(line_info['spans'][0])
+            line_meta.pop('score', None)
+            cross_page = line_meta.pop('cross_page', None)
+            line_meta['page'] = page_idx + 1 if cross_page is True else page_idx
+            lines_metas.append(line_meta)
+    return lines_metas
+
+
+# patches to pipeline
+
+def pipeline_make_blocks_to_content_list(para_block, img_buket_path, page_idx):  # noqa: C901
+    para_type = para_block['type']
+    para_content = {}
+    if para_type in [BlockType.TEXT, BlockType.LIST, BlockType.INDEX]:
+        para_content = {
+            'type': ContentType.TEXT,
+            'text': pipeline_merge_para_with_text(para_block),
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+    elif para_type == BlockType.TITLE:
+        para_content = {
+            'type': ContentType.TEXT,
+            'text': pipeline_merge_para_with_text(para_block),
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+        title_level = pipeline_middle_json_mkcontent.get_title_level(para_block)
+        if title_level != 0:
+            para_content['text_level'] = title_level
+    elif para_type == BlockType.INTERLINE_EQUATION:
+        if len(para_block['lines']) == 0 or len(para_block['lines'][0]['spans']) == 0:
+            return None
+        para_content = {
+            'type': ContentType.EQUATION,
+            'img_path': f"{img_buket_path}/{para_block['lines'][0]['spans'][0].get('image_path', '')}",
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+        if para_block['lines'][0]['spans'][0].get('content', ''):
+            para_content['text'] = pipeline_merge_para_with_text(para_block)
+            para_content['text_format'] = 'latex'
+    elif para_type == BlockType.IMAGE:
+        image_lines_metas = []
+        para_content = {
+            'type': ContentType.IMAGE,
+            'img_path': '',
+            BlockType.IMAGE_CAPTION: [],
+            BlockType.IMAGE_FOOTNOTE: []
+        }
+        for block in para_block['blocks']:
+            image_lines_metas.extend(_parse_line_spans(block, page_idx))
+            if block['type'] == BlockType.IMAGE_BODY:
+                for line in block['lines']:
+                    for span in line['spans']:
+                        if span['type'] == ContentType.IMAGE:
+                            if span.get('image_path', ''):
+                                para_content['img_path'] = f"{img_buket_path}/{span['image_path']}"
+            if block['type'] == BlockType.IMAGE_CAPTION:
+                para_content[BlockType.IMAGE_CAPTION].append(
+                    pipeline_merge_para_with_text(block))
+            if block['type'] == BlockType.IMAGE_FOOTNOTE:
+                para_content[BlockType.IMAGE_FOOTNOTE].append(
+                    pipeline_merge_para_with_text(block))
+        para_content['lines'] = image_lines_metas
+    elif para_type == BlockType.TABLE:
+        para_content = {
+            'type': ContentType.TABLE,
+            'img_path': '',
+            BlockType.TABLE_CAPTION: [],
+            BlockType.TABLE_FOOTNOTE: []
+        }
+        table_lines_metas = []
+        for block in para_block['blocks']:
+            table_lines_metas.extend(_parse_line_spans(block, page_idx))
+            if block['type'] == BlockType.TABLE_BODY:
+                for line in block['lines']:
+                    for span in line['spans']:
+                        if span['type'] == ContentType.TABLE:
+                            if span.get('html', ''):
+                                para_content[BlockType.TABLE_BODY] = f"{span['html']}"
+
+                            if span.get('image_path', ''):
+                                para_content['img_path'] = f"{img_buket_path}/{span['image_path']}"
+
+            if block['type'] == BlockType.TABLE_CAPTION:
+                para_content[BlockType.TABLE_CAPTION].append(
+                    pipeline_merge_para_with_text(block))
+            if block['type'] == BlockType.TABLE_FOOTNOTE:
+                para_content[BlockType.TABLE_FOOTNOTE].append(
+                    pipeline_merge_para_with_text(block))
+        para_content['lines'] = table_lines_metas
+
+    para_content['page_idx'] = page_idx
+    para_content['bbox'] = para_block['bbox']
+    return para_content
+
+
+pipeline_middle_json_mkcontent.make_blocks_to_content_list = pipeline_make_blocks_to_content_list
+
+
+# patches to vlm
+
+def vlm_make_blocks_to_content_list(para_block, img_buket_path, page_idx):  # noqa: C901
+    para_type = para_block['type']
+    para_content = {}
+    if para_type in [BlockType.TEXT, BlockType.LIST, BlockType.INDEX]:
+        para_content = {
+            'type': ContentType.TEXT,
+            'text': vlm_merge_para_with_text(para_block),
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+    elif para_type == BlockType.TITLE:
+        title_level = vlm_middle_json_mkcontent.get_title_level(para_block)
+        para_content = {
+            'type': ContentType.TEXT,
+            'text': vlm_merge_para_with_text(para_block),
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+        if title_level != 0:
+            para_content['text_level'] = title_level
+    elif para_type == BlockType.INTERLINE_EQUATION:
+        para_content = {
+            'type': ContentType.EQUATION,
+            'text': vlm_merge_para_with_text(para_block),
+            'text_format': 'latex',
+            'lines': _parse_line_spans(para_block, page_idx)
+        }
+    elif para_type == BlockType.IMAGE:
+        image_lines_metas = []
+        para_content = {
+            'type': ContentType.IMAGE,
+            'img_path': '',
+            BlockType.IMAGE_CAPTION: [],
+            BlockType.IMAGE_FOOTNOTE: []
+        }
+        for block in para_block['blocks']:
+            image_lines_metas.extend(_parse_line_spans(block, page_idx))
+            if block['type'] == BlockType.IMAGE_BODY:
+                for line in block['lines']:
+                    for span in line['spans']:
+                        if span['type'] == ContentType.IMAGE:
+                            if span.get('image_path', ''):
+                                para_content['img_path'] = f"{img_buket_path}/{span['image_path']}"
+            if block['type'] == BlockType.IMAGE_CAPTION:
+                para_content[BlockType.IMAGE_CAPTION].append(
+                    vlm_merge_para_with_text(block))
+            if block['type'] == BlockType.IMAGE_FOOTNOTE:
+                para_content[BlockType.IMAGE_FOOTNOTE].append(
+                    vlm_merge_para_with_text(block))
+        para_content['lines'] = image_lines_metas
+    elif para_type == BlockType.TABLE:
+        table_lines_metas = []
+        para_content = {
+            'type': ContentType.TABLE,
+            'img_path': '',
+            BlockType.TABLE_CAPTION: [],
+            BlockType.TABLE_FOOTNOTE: []
+        }
+        for block in para_block['blocks']:
+            table_lines_metas.extend(_parse_line_spans(block, page_idx))
+            if block['type'] == BlockType.TABLE_BODY:
+                for line in block['lines']:
+                    for span in line['spans']:
+                        if span['type'] == ContentType.TABLE:
+                            if span.get('html', ''):
+                                para_content[BlockType.TABLE_BODY] = f"{span['html']}"
+
+                            if span.get('image_path', ''):
+                                para_content['img_path'] = f"{img_buket_path}/{span['image_path']}"
+
+            if block['type'] == BlockType.TABLE_CAPTION:
+                para_content[BlockType.TABLE_CAPTION].append(
+                    vlm_merge_para_with_text(block))
+            if block['type'] == BlockType.TABLE_FOOTNOTE:
+                para_content[BlockType.TABLE_FOOTNOTE].append(
+                    vlm_merge_para_with_text(block))
+        para_content['lines'] = table_lines_metas
+
+    para_content['page_idx'] = page_idx
+    para_content['bbox'] = para_block['bbox']
+    return para_content
+
+vlm_middle_json_mkcontent.make_blocks_to_content_list = vlm_make_blocks_to_content_list
diff --git a/lazyllm/components/deploy/mineru/mineru_server_module.py b/lazyllm/components/deploy/mineru/mineru_server_module.py
new file mode 100644
index 000000000..97d820043
--- /dev/null
+++ b/lazyllm/components/deploy/mineru/mineru_server_module.py
@@ -0,0 +1,417 @@
+import os
+import json
+import subprocess
+import platform
+import shutil
+import hashlib
+import uuid
+import tempfile
+import atexit
+from pathlib import Path
+from fastapi import UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse
+from typing import List, Optional, Union
+
+from lazyllm import ServerModule, LOG
+from lazyllm import FastapiApp as app
+
+from mineru.cli.common import aio_do_parse, read_fn, pdf_suffixes, image_suffixes
+from . import mineru_patches  # noqa: F401
+
+
+def _check_libreoffice():
+    system = platform.system()
+
+    if system != 'Linux':
+        LOG.warning(f'[MINERU SERVER] The current system type only supports PDF parsing: {system}')
+        return False
+
+    libreoffice_installed = False
+    commands = ['libreoffice', 'soffice']
+
+    for cmd in commands:
+        try:
+            result = subprocess.run([cmd, '--version'], capture_output=True, text=True, timeout=5)
+            if result.returncode == 0:
+                version = result.stdout.strip().split('\n')[0]
+                LOG.info(f'[MINERU SERVER] LibreOffice is installed: {version}')
+                libreoffice_installed = True
+                break
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            continue
+
+    if not libreoffice_installed:
+        LOG.warning('[MINERU SERVER] LibreOffice is not installed, only PDF is supported')
+        return False
+
+    try:
+        output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
+        if not output.strip():
+            LOG.warning('[MINERU SERVER] No Chinese fonts were detected, \
+                        the converted document may not display Chinese content properly. \
+                        It is recommended to install Chinese fonts: sudo apt install fonts-noto-cjk')
+    except Exception:
+        LOG.error('[MINERU SERVER] Font check failed')
+
+    return True
+
+
+class MineruServerBase:
+    def __init__(self, cache_dir: str = None, image_save_dir: str = None,
+                 default_backend: str = 'pipeline', default_lang: str = 'ch_server',
+                 default_parse_method: str = 'auto', default_formula_enable: bool = True,
+                 default_table_enable: bool = True, default_return_md: bool = False,
+                 default_return_content_list: bool = True, mem_fraction_static: float = 0.8):
+        if default_backend not in ['pipeline', 'vlm-sglang-engine', 'vlm-transformers']:
+            raise ValueError(f'Invalid backend: {default_backend}, \
+                             only support pipeline, vlm-sglang-engine, vlm-transformers')
+        if default_lang not in ['ch', 'ch_server', 'ch_lite', 'en']:
+            raise ValueError(f'Invalid language: {default_lang}, \
+                             only support ch, ch_server, ch_lite, en')
+        self._default_backend = default_backend
+        self._cache_dir = cache_dir
+        if image_save_dir:
+            self._image_save_dir = os.path.join(image_save_dir, 'images')
+        else:
+            self._image_save_dir = None
+        self._default_lang = default_lang
+        self._default_parse_method = default_parse_method
+        self._default_formula_enable = default_formula_enable
+        self._default_table_enable = default_table_enable
+        self._default_return_md = default_return_md
+        self._default_return_content_list = default_return_content_list
+        self._mem_fraction_static = mem_fraction_static
+        self._supported_office_types = ['.pptx', '.ppt', '.docx', '.doc'] if _check_libreoffice() else []
+        LOG.info(f'[MINERU SERVER] Supported office types: {self._supported_office_types}')
+        self._middle_file_dir = tempfile.mkdtemp()
+        atexit.register(lambda: shutil.rmtree(self._middle_file_dir, ignore_errors=True))
+        try:
+            for path in [self._cache_dir, self._image_save_dir]:
+                if path:
+                    os.makedirs(path, exist_ok=True)
+        except Exception as e:
+            raise Exception(f'Failed to create directory: {e}')
+
+    @app.post('/api/v1/pdf_parse')
+    async def parse_pdf(self,   # noqa: C901
+                        files: List[str] = Form([]),  # noqa B008
+                        upload_files: List[UploadFile] = File([]),  # noqa B008
+                        use_cache: bool = Form(False, description='if True, chache_dir should be set'),  # noqa B008
+                        lang: str = Form('ch_server',  # noqa B008
+                                         description='only use for pipeline,ch|ch_server|ch_lite|en'),
+                        backend: str = Form(None, description='Parsing mode, vlm-sglang-engine|pipeline'),  # noqa B008
+                        parse_method: str = Form('auto'),  # noqa B008
+                        formula_enable: bool = Form(None, description='Whether to enable formula parsing'),  # noqa B008
+                        table_enable: bool = Form(None, description='Whether to enable table parsing'),  # noqa B008
+                        return_md: bool = Form(None, description='Whether to return markdown content'),  # noqa B008
+                        return_content_list: bool = Form(None, description='Whether to return content list')):  # noqa B008
+        if files and upload_files:
+            raise HTTPException(status_code=400, detail='Either provide only \'files\' or only \'upload_files\'!')
+        for file in files:
+            if not os.path.isfile(file):
+                raise HTTPException(status_code=400, detail=f'File Not Found: {file}')
+
+        if lang and lang not in ['ch', 'ch_server', 'ch_lite', 'en']:
+            raise HTTPException(status_code=400, detail=f'Invalid language: {lang}, \
+                                only support ch, ch_server, ch_lite, en')
+
+        if backend and backend not in ['pipeline', 'vlm-sglang-engine', 'vlm-transformers']:
+            raise HTTPException(status_code=400, detail=f'Invalid backend: {backend}, \
+                                only support pipeline, vlm-sglang-engine, vlm-transformers')
+
+        unique_id = str(uuid.uuid4())
+        unique_dir = os.path.join(self._middle_file_dir, unique_id)
+        os.makedirs(unique_dir, exist_ok=True)
+
+        if upload_files:
+            files = await self._resolve_upload_files(upload_files, unique_dir)
+
+        for file in files:
+            if Path(file).suffix.lower() not in self._supported_office_types + ['.pdf']:
+                raise HTTPException(status_code=400, detail=f'Unsupported file type: {Path(file).suffix}')
+
+        backend = backend or self._default_backend
+        lang = lang or self._default_lang
+        parse_method = parse_method or self._default_parse_method
+        formula_enable = formula_enable if formula_enable is not None else self._default_formula_enable
+        table_enable = table_enable if table_enable is not None else self._default_table_enable
+        return_md = return_md if return_md is not None else self._default_return_md
+        return_content_list = return_content_list if return_content_list is not None \
+            else self._default_return_content_list
+
+        LOG.info(f'[MINERU SERVER] GOT FILE {[Path(file).stem for file in files]} --- BACKEND: {backend}')
+
+        try:
+            results = {file: {} for file in files}
+            if use_cache and not self._cache_dir:
+                LOG.warning('[MINERU SERVER] CACHE_DIR is not set, the Cache will not be used!')
+
+            files_to_process = files
+            if use_cache and self._cache_dir:
+                results, files_to_process = self._check_cache(files, results, backend,
+                                                              return_md, return_content_list,
+                                                              table_enable, formula_enable)
+                if not files_to_process:
+                    LOG.info(f'[MINERU SERVER] RETURN RESULTS FROM CACHE: {files}')
+                    results = [results[file] for file in files]
+                    return JSONResponse(status_code=200, content={'result': results, 'unique_id': unique_id})
+
+            mineru_results = await self._run_mineru(files_to_process, unique_dir, backend, lang,
+                                                    parse_method, formula_enable, table_enable,
+                                                    return_md, return_content_list)
+            results.update(mineru_results)
+            results = [results[file] for file in files]
+            LOG.info(f'[MINERU SERVER] RETURN RESULTS: {files}')
+            return JSONResponse(status_code=200,
+                                content={'result': results, 'unique_id': unique_id})
+        except Exception as e:
+            LOG.error(f'[MINERU SERVER] Parse Failed: {str(e)}')
+            return JSONResponse(status_code=500,
+                                content={'error': f'Failed to process file: {str(e)}'})
+        finally:
+            shutil.rmtree(unique_dir)
+
+    async def _run_mineru(self, files_to_process, unique_dir, backend, lang,  # noqa: C901
+                          parse_method, formula_enable, table_enable,
+                          return_md, return_content_list):
+        results = {file: {} for file in files_to_process}
+
+        pdf_file_names = []
+        pdf_bytes_list = []
+
+        for file in files_to_process:
+            pdf_file_name, pdf_byte = self._load_files(Path(file), unique_dir)
+            pdf_file_names.append(pdf_file_name)
+            pdf_bytes_list.append(pdf_byte)
+
+        lang_list = [lang] * len(pdf_bytes_list)
+
+        params = dict(output_dir=unique_dir, pdf_file_names=pdf_file_names,
+                      pdf_bytes_list=pdf_bytes_list, p_lang_list=lang_list, backend=backend,
+                      parse_method=parse_method, formula_enable=formula_enable,
+                      table_enable=table_enable, f_draw_layout_bbox=False, f_draw_span_bbox=False,
+                      f_dump_md=True, f_dump_middle_json=False, f_dump_model_output=False,
+                      f_dump_orig_pdf=False, f_dump_content_list=True)
+        if backend == 'vlm-sglang-engine':
+            params['mem_fraction_static'] = self._mem_fraction_static
+
+        await aio_do_parse(**params)
+
+        for pdf_name, pdf_path in zip(pdf_file_names, files_to_process):
+            # Directory output by mineru
+            if backend.startswith('pipeline'):
+                parse_dir = os.path.join(unique_dir, pdf_name, parse_method)
+            else:
+                parse_dir = os.path.join(unique_dir, pdf_name, 'vlm')
+
+            if os.path.exists(parse_dir):
+                hash_id = self._file_sha256(pdf_path)
+                md_content = self._read_parse_result('.md', pdf_name, parse_dir)
+                content_list = self._read_parse_result('_content_list.json', pdf_name, parse_dir)
+
+                if return_md:
+                    if md_content:
+                        results[pdf_path]['md_content'] = md_content
+                if return_content_list:
+                    if content_list:
+                        results[pdf_path]['content_list'] = content_list
+                if self._cache_dir:
+                    self._cache_parse_result(hash_id, results[pdf_path], mode=backend,
+                                             table_enable=table_enable,
+                                             formula_enable=formula_enable)
+
+                if self._image_save_dir:
+                    source_dir = Path(f'{parse_dir}/images/')
+                    target_dir = Path(self._image_save_dir)
+                    for jpg_file in source_dir.glob('*.jpg'):
+                        shutil.move(str(jpg_file), str(target_dir / jpg_file.name))
+
+        return results
+
+    async def _resolve_upload_files(self, upload_files: List[UploadFile], unique_dir: str) -> List[str]:
+        if not upload_files:
+            return []
+
+        temp_upload_dir = os.path.join(self._middle_file_dir, f'{unique_dir}/upload')
+        os.makedirs(temp_upload_dir, exist_ok=True)
+        file_paths = []
+        for upload_file in upload_files:
+            content = await upload_file.read()
+            temp_file_path = os.path.join(temp_upload_dir, upload_file.filename)
+            with open(temp_file_path, 'wb') as f:
+                f.write(content)
+            file_paths.append(temp_file_path)
+        return file_paths
+
+    def _get_func_suffix(self, table_enable, formula_enable):
+        if table_enable and formula_enable:
+            return '_a'
+        elif table_enable:
+            return '_t'
+        elif formula_enable:
+            return '_f'
+        else:
+            return '_n'
+
+    def _check_cache(self, files, results, backend, return_md, return_content_list,
+                     table_enable, formula_enable):
+        if not self._cache_dir:
+            return results, files
+
+        func_suffix = self._get_func_suffix(table_enable, formula_enable)
+        func_suffix_map = {'_a': ['_a'],
+                           '_t': ['_t', '_a'],
+                           '_f': ['_f', '_a'],
+                           '_n': ['_n', '_a', '_t', '_f']}
+        func_suffix_list = func_suffix_map[func_suffix]
+
+        uncached_files = []
+
+        for file in files:
+            file_hash = self._file_sha256(file)
+            valid_hash_ids = [file_hash + func_suffix for func_suffix in func_suffix_list]
+            result = {}
+
+            file_content_list_found = False
+            file_md_found = False
+
+            if return_content_list:
+                for valid_hash in valid_hash_ids:
+                    json_path = os.path.join(self._cache_dir, backend, f'{valid_hash}_content_list.json')
+                    if os.path.isfile(json_path):
+                        with open(json_path, 'r', encoding='utf-8') as f:
+                            result['content_list'] = json.load(f)
+                        file_content_list_found = True
+                        break
+
+            if return_md:
+                for valid_hash in valid_hash_ids:
+                    md_path = os.path.join(self._cache_dir, backend, f'{valid_hash}.md')
+                    if os.path.isfile(md_path):
+                        with open(md_path, 'r', encoding='utf-8') as f:
+                            result['md_content'] = f.read()
+                        file_md_found = True
+                        break
+
+            results[file].update(result)
+
+            file_cache_complete = True
+            if return_content_list and not file_content_list_found:
+                file_cache_complete = False
+            if return_md and not file_md_found:
+                file_cache_complete = False
+
+            if not file_cache_complete:
+                uncached_files.append(file)
+
+        return results, uncached_files
+
+    def _read_parse_result(self, file_suffix_identifier: str,
+                           pdf_name: str, parse_dir: str) -> Optional[Union[str, dict]]:
+        result_file_path = os.path.join(parse_dir, f'{pdf_name}{file_suffix_identifier}')
+        if os.path.exists(result_file_path):
+            try:
+                if file_suffix_identifier == '.md':
+                    with open(result_file_path, 'r', encoding='utf-8') as fp:
+                        return fp.read()
+                elif file_suffix_identifier == '_content_list.json':
+                    with open(result_file_path, 'r', encoding='utf-8') as fp:
+                        return json.load(fp)
+            except Exception:
+                LOG.error(f'[MINERU SERVER] Failed to read result file {result_file_path}')
+                return None
+        return None
+
+    def _cache_parse_result(self, hash_id: str, result: dict, mode: str,
+                            table_enable: bool, formula_enable: bool):
+        try:
+            cache_subdir = os.path.join(self._cache_dir, mode)
+            os.makedirs(cache_subdir, exist_ok=True)
+
+            if table_enable and formula_enable:
+                func_suffix = '_a'
+            elif table_enable:
+                func_suffix = '_t'
+            elif formula_enable:
+                func_suffix = '_f'
+            else:
+                func_suffix = '_n'
+
+            hash_id += func_suffix
+            md_content = result.get('md_content', None)
+            if md_content:
+                cache_path = os.path.join(cache_subdir, f'{hash_id}.md')
+                with open(cache_path, 'w', encoding='utf-8') as f:
+                    f.write(md_content)
+
+            content_list = result.get('content_list', None)
+            if content_list:
+                cache_path = os.path.join(cache_subdir, f'{hash_id}_content_list.json')
+                with open(cache_path, 'w', encoding='utf-8') as f:
+                    json.dump(content_list, f, ensure_ascii=False, indent=4)
+
+        except Exception as e:
+            LOG.error(f'Failed to cache data for {hash_id}: {e}')
+
+    def _load_files(self, file_path: str, unique_dir: str):
+        suffix = file_path.suffix.lower()
+        if suffix in pdf_suffixes + image_suffixes + self._supported_office_types:
+            if suffix in self._supported_office_types:
+                self._convert_file_to_pdf(file_path, unique_dir)
+                output_path = os.path.join(unique_dir, file_path.name.replace(suffix, '.pdf'))
+                file_path = Path(output_path)
+            try:
+                pdf_bytes = read_fn(file_path)
+                return (file_path.stem, pdf_bytes)
+            except Exception as e:
+                raise HTTPException(status_code=400, detail=f'File Not Found: {file_path}: {e}')
+        else:
+            raise HTTPException(status_code=400, detail=f'Unsupported file type: {file_path.suffix}')
+
+    def _convert_file_to_pdf(self, input_path, output_dir):
+        if not os.path.isfile(input_path):
+            raise FileNotFoundError(f'The input file {input_path} does not exist.')
+
+        os.makedirs(output_dir, exist_ok=True)
+
+        cmd = [
+            'libreoffice',
+            '--headless',
+            '--norestore',
+            '--invisible',
+            '--convert-to', 'pdf',
+            '--outdir', str(output_dir),
+            str(input_path)
+        ]
+
+        process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+        if process.returncode != 0:
+            raise Exception(f'LibreOffice convert failed: {process.stderr.decode()}')
+
+    def _file_sha256(self, file_path: str) -> str:
+        hasher = hashlib.sha256()
+        with open(file_path, 'rb') as f:
+            for chunk in iter(lambda: f.read(8192), b''):
+                hasher.update(chunk)
+        return hasher.hexdigest()
+
+
+class MineruServer(ServerModule):
+    def __init__(self,
+                 cache_dir: str = None,
+                 image_save_dir: str = None,
+                 default_backend: str = 'pipeline',
+                 default_lang: str = 'ch_server',
+                 default_parse_method: str = 'auto',
+                 default_formula_enable: bool = True,
+                 default_table_enable: bool = True,
+                 default_return_md: bool = False,
+                 default_return_content_list: bool = True,
+                 *args, **kwargs):
+        mineru_server = MineruServerBase(
+            cache_dir=cache_dir, image_save_dir=image_save_dir, default_backend=default_backend,
+            default_lang=default_lang, default_parse_method=default_parse_method,
+            default_formula_enable=default_formula_enable, default_table_enable=default_table_enable,
+            default_return_md=default_return_md, default_return_content_list=default_return_content_list)
+        super().__init__(mineru_server, *args, **kwargs)
diff --git a/lazyllm/components/deploy/ray.py b/lazyllm/components/deploy/ray.py
index 02ec43808..e8be6aec4 100644
--- a/lazyllm/components/deploy/ray.py
+++ b/lazyllm/components/deploy/ray.py
@@ -33,7 +33,7 @@ def reallocate_launcher(launcher):
 
 class Distributed(LazyLLMDeployBase):
 
-    def __init__(self, launcher=launchers.remote(ngpus=1), port=None):
+    def __init__(self, launcher=launchers.remote(ngpus=1), port=None):  # noqa B008
         super().__init__(launcher=launcher)
         self.port = port or random.randint(30000, 40000)
         self.finetuned_model = None
diff --git a/lazyllm/components/deploy/relay/base.py b/lazyllm/components/deploy/relay/base.py
index fd338b8eb..fab892c5b 100644
--- a/lazyllm/components/deploy/relay/base.py
+++ b/lazyllm/components/deploy/relay/base.py
@@ -14,7 +14,7 @@ class RelayServer(LazyLLMDeployBase):
     message_format = None
 
     def __init__(self, port=None, *, func=None, pre_func=None, post_func=None,
-                 pythonpath=None, log_path=None, cls=None, launcher=launchers.remote(sync=False)):
+                 pythonpath=None, log_path=None, cls=None, launcher=launchers.remote(sync=False)):  # noqa B008
         # func must dump in __call__ to wait for dependancies.
         self.func = func
         self.pre = dump_obj(pre_func)
diff --git a/lazyllm/components/deploy/vllm.py b/lazyllm/components/deploy/vllm.py
index 8a2c0bced..48220b225 100644
--- a/lazyllm/components/deploy/vllm.py
+++ b/lazyllm/components/deploy/vllm.py
@@ -41,7 +41,8 @@ class Vllm(LazyLLMDeployBase, metaclass=_VllmStreamParseParametersMeta):
     optional_keys = set(["max-model-len"])
 
     # TODO(wangzhihong): change default value for `openai_api` argument to True
-    def __init__(self, trust_remote_code: bool = True, launcher: LazyLLMLaunchersBase = launchers.remote(ngpus=1),
+    def __init__(self, trust_remote_code: bool = True,
+                 launcher: LazyLLMLaunchersBase = launchers.remote(ngpus=1),  # noqa B008
                  log_path: str = None, openai_api: bool = False, **kw):
         self.launcher_list, launcher = reallocate_launcher(launcher)
         super().__init__(launcher=launcher)
diff --git a/lazyllm/components/finetune/alpaca-lora/finetune.py b/lazyllm/components/finetune/alpaca-lora/finetune.py
index 380f9db23..115f7b450 100755
--- a/lazyllm/components/finetune/alpaca-lora/finetune.py
+++ b/lazyllm/components/finetune/alpaca-lora/finetune.py
@@ -60,7 +60,7 @@ def train( # noqa C901
     # model/data params
     base_model: str = "",  # the only required argument
     data_path: str = "",
-    output_dir: str = os.path.abspath("./output_dir"),
+    output_dir: str = os.path.abspath("./output_dir"),  # noqa B008
     # training hyperparams
     batch_size: int = 128,
     micro_batch_size: int = 4,
@@ -73,7 +73,7 @@ def train( # noqa C901
     lora_r: int = 8,
     lora_alpha: int = 16,
     lora_dropout: float = 0.05,
-    lora_target_modules: List[str] = [
+    lora_target_modules: List[str] = [  # noqa B006
         "q_proj",
         "v_proj",
     ],
@@ -220,7 +220,7 @@ def generate_and_tokenize_prompt(data_point):
         else:
             datas.append(load_dataset(data_path))
     elif os.path.isdir(data_path):
-        for root, dirs, files in os.walk(data_path):
+        for root, _, files in os.walk(data_path):
             for file in files:
                 if file.endswith(".json") or file.endswith(".jsonl"):
                     file_path = os.path.join(root, file)
diff --git a/lazyllm/components/finetune/alpacalora.py b/lazyllm/components/finetune/alpacalora.py
index c0571aa59..6d64f73c4 100644
--- a/lazyllm/components/finetune/alpacalora.py
+++ b/lazyllm/components/finetune/alpacalora.py
@@ -36,7 +36,7 @@ def __init__(self,
                  merge_path=None,
                  model_name='LLM',
                  cp_files='tokeniz*',
-                 launcher=launchers.remote(ngpus=1),
+                 launcher=launchers.remote(ngpus=1),  # noqa B008
                  **kw
                  ):
         if not merge_path:
diff --git a/lazyllm/components/finetune/base.py b/lazyllm/components/finetune/base.py
index 78182c50f..616b2d45e 100644
--- a/lazyllm/components/finetune/base.py
+++ b/lazyllm/components/finetune/base.py
@@ -5,7 +5,7 @@
 class LazyLLMFinetuneBase(ComponentBase):
     __reg_overwrite__ = 'cmd'
 
-    def __init__(self, base_model, target_path, *, launcher=launchers.remote()):
+    def __init__(self, base_model, target_path, *, launcher=launchers.remote()):  # noqa B008
         super().__init__(launcher=launcher)
         self.base_model = base_model
         self.target_path = target_path
@@ -20,7 +20,7 @@ def __call__(self, *args, **kw):
 
 
 class DummyFinetune(LazyLLMFinetuneBase):
-    def __init__(self, base_model='base', target_path='target', *, launcher=launchers.remote(), **kw):
+    def __init__(self, base_model='base', target_path='target', *, launcher=launchers.remote(), **kw):  # noqa B008
         super().__init__(base_model, target_path, launcher=launchers.empty)
         self.kw = kw
 
diff --git a/lazyllm/components/finetune/collie.py b/lazyllm/components/finetune/collie.py
index 1659fae9f..bc6b542c3 100644
--- a/lazyllm/components/finetune/collie.py
+++ b/lazyllm/components/finetune/collie.py
@@ -35,7 +35,7 @@ def __init__(self,
                  merge_path=None,
                  model_name='LLM',
                  cp_files='tokeniz*',
-                 launcher=launchers.remote(ngpus=1),
+                 launcher=launchers.remote(ngpus=1),  # noqa B008
                  **kw
                  ):
         if not merge_path:
diff --git a/lazyllm/components/finetune/flagembedding.py b/lazyllm/components/finetune/flagembedding.py
index dada5c9e6..da9e4c22c 100644
--- a/lazyllm/components/finetune/flagembedding.py
+++ b/lazyllm/components/finetune/flagembedding.py
@@ -62,7 +62,7 @@ def __init__(
         self,
         base_model,
         target_path,
-        launcher=launchers.remote(ngpus=1, sync=True),
+        launcher=launchers.remote(ngpus=1, sync=True),  # noqa B008
         **kw
     ):
         model_type = ModelManager.get_model_type(base_model.split('/')[-1])
diff --git a/lazyllm/components/finetune/llamafactory.py b/lazyllm/components/finetune/llamafactory.py
index 440db9f56..d58b9a50f 100644
--- a/lazyllm/components/finetune/llamafactory.py
+++ b/lazyllm/components/finetune/llamafactory.py
@@ -27,7 +27,7 @@ def __init__(self,
                  lora_r=None,
                  modules_to_save=None,
                  lora_target_modules=None,
-                 launcher=launchers.remote(ngpus=1, sync=True),
+                 launcher=launchers.remote(ngpus=1, sync=True),  # noqa B008
                  **kw
                  ):
         if not os.path.exists(base_model):
diff --git a/lazyllm/components/utils/downloader/model_downloader.py b/lazyllm/components/utils/downloader/model_downloader.py
index e752ff480..9c7e58390 100644
--- a/lazyllm/components/utils/downloader/model_downloader.py
+++ b/lazyllm/components/utils/downloader/model_downloader.py
@@ -173,7 +173,7 @@ def _do_download(self, model='', call_back=None):
         try:
             return self.hub_downloader.download(model, full_model_dir, call_back)
         # Use `BaseException` to capture `KeyboardInterrupt` and normal `Exceptioin`.
-        except BaseException as e:
+        except BaseException as e:  # noqa B036
             lazyllm.LOG.warning(f"Download encountered an error: {e}")
             if not self.token and 'Permission denied' not in str(e):
                 lazyllm.LOG.warning('Token is empty, which may prevent private models from being downloaded, '
diff --git a/lazyllm/configs.py b/lazyllm/configs.py
index 1a1366c2e..78896e184 100644
--- a/lazyllm/configs.py
+++ b/lazyllm/configs.py
@@ -1,7 +1,7 @@
 import os
 from enum import Enum
 import json
-from typing import List, Union
+from typing import List, Union, Optional
 from contextlib import contextmanager
 import logging
 
@@ -13,7 +13,7 @@ class Mode(Enum):
 
 
 class Config(object):
-    def __init__(self, prefix='LAZYLLM', home=os.path.join(os.path.expanduser('~'), '.lazyllm')):
+    def __init__(self, prefix='LAZYLLM', home=os.path.join(os.path.expanduser('~'), '.lazyllm')):  # noqa B008
         self._config_params = dict()
         self._env_map_name = dict()
         self.prefix = prefix
@@ -49,7 +49,7 @@ def temp(self, name, value):
         yield
         self.impl[name] = old_value
 
-    def add(self, name, type, default=None, env=None):
+    def add(self, name: str, type: type, default: Optional[Union[int, str, bool]] = None, env: Union[str, dict] = None):
         update_params = (type, default, env)
         if name not in self._config_params or self._config_params[name] != update_params:
             if name in self._config_params:
@@ -64,7 +64,8 @@ def add(self, name, type, default=None, env=None):
         self._update_impl(name, type, default, env)
         return self
 
-    def _update_impl(self, name, type, default=None, env=None):
+    def _update_impl(self, name: str, type: type, default: Optional[Union[int, str, bool]] = None,
+                     env: Union[str, dict] = None):
         self.impl[name] = self.cfgs.pop(name) if name in self.cfgs else default
         if isinstance(env, dict):
             for k, v in env.items():
@@ -78,15 +79,17 @@ def _update_impl(self, name, type, default=None, env=None):
 
     def __getitem__(self, name):
         try:
+            if isinstance(name, bytes): name = name.decode('utf-8')
             return self.impl[name]
         except KeyError:
-            raise RuntimeError(f'Key {name} is not in lazyllm global config')
+            raise RuntimeError(f'Key `{name}` is not in lazyllm global config')
 
     def __str__(self):
         return str(self.impl)
 
-    def refresh(self, targets: Union[str, List[str]] = None) -> None:
+    def refresh(self, targets: Union[bytes, str, List[str]] = None) -> None:
         names = targets
+        if isinstance(targets, bytes): targets = targets.decode('utf-8')
         if isinstance(targets, str):
             names = targets.lower()
             if names.startswith('lazyllm_'):
@@ -97,7 +100,7 @@ def refresh(self, targets: Union[str, List[str]] = None) -> None:
             names = list(set([self._env_map_name[key] for key in curr_envs if key in self._env_map_name]))
         assert isinstance(names, list)
         for name in names:
-            self._update_impl(name, *self._config_params[name])
+            if name in self.impl: self._update_impl(name, *self._config_params[name])
 
 config = Config().add('mode', Mode, Mode.Normal, dict(DISPLAY=Mode.Display, DEBUG=Mode.Debug)
                 ).add('repr_ml', bool, False, 'REPR_USE_ML'
diff --git a/lazyllm/docs/common.py b/lazyllm/docs/common.py
index d40f029f3..b95476668 100644
--- a/lazyllm/docs/common.py
+++ b/lazyllm/docs/common.py
@@ -71,6 +71,83 @@
 # ...     return input
 # ''')
 
+add_chinese_doc('registry.LazyDict', '''\
+一个为懒惰的程序员设计的特殊字典类。支持多种便捷的访问和操作方式。
+
+特性：
+1. 使用点号代替['str']访问字典元素
+2. 支持首字母小写来使语句更像函数调用
+3. 当字典只有一个元素时支持直接调用
+4. 支持动态默认键
+5. 如果组名出现在名称中，允许省略组名
+
+参数:
+    name (str): 字典的名称，默认为空字符串。
+    base: 基类引用，默认为None。
+    *args: 位置参数，传递给dict父类。
+    **kw: 关键字参数，传递给dict父类。
+''')
+
+add_english_doc('registry.LazyDict', '''\
+A special dictionary class designed for lazy programmers. Supports various convenient access and operation methods.
+
+Features:
+1. Use dot notation instead of ['str'] to access dictionary elements
+2. Support lowercase first character to make statements more like function calls
+3. Support direct calls when dictionary has only one element
+4. Support dynamic default keys
+5. Allow omitting group name if it appears in the name
+
+Args:
+    name (str): Name of the dictionary, defaults to empty string.
+    base: Base class reference, defaults to None.
+    *args: Positional arguments passed to dict parent class.
+    **kw: Keyword arguments passed to dict parent class.
+''')
+
+add_chinese_doc('registry.LazyDict.remove', '''\
+从字典中移除指定的键值对。
+
+参数:
+    key (str): 要移除的键。支持与__getattr__相同的键匹配规则，包括首字母小写和组名省略等特性。
+
+注意:
+    如果找不到匹配的键，将抛出AttributeError异常。
+''')
+
+add_english_doc('registry.LazyDict.remove', '''\
+Remove the specified key-value pair from the dictionary.
+
+Args:
+    key (str): The key to remove. Supports the same key matching rules as __getattr__, 
+              including lowercase first character and group name omission features.
+
+Note:
+    Raises AttributeError if no matching key is found.
+''')
+
+add_chinese_doc('registry.LazyDict.set_default', '''\
+设置字典的默认键。设置后可以通过.default属性访问该键对应的值。
+
+参数:
+    key (str): 要设置为默认的键名。
+
+注意:
+    - key必须是字符串类型
+    - 设置后可以通过.default访问，或在字典只有一个元素时直接调用
+''')
+
+add_english_doc('registry.LazyDict.set_default', '''\
+Set the default key for the dictionary. After setting, the value can be accessed through the .default property.
+
+Args:
+    key (str): The key name to set as default.
+
+Note:
+    - key must be a string type
+    - After setting, can be accessed via .default, or called directly when dictionary has only one element
+''')
+
 add_chinese_doc('compile_func', '''
 将一段 python 函数字符串编译成一个可执行函数并返回。
 
@@ -94,6 +171,114 @@
 assert identity('hello') == 'hello'
 ''')
 
+# ============= Threading
+# Thread
+add_chinese_doc('Thread', '''\
+LazyLLM 提供的增强线程类，继承自 Python 标准库的 `threading.Thread`。此类提供了额外的功能，包括会话ID管理、预钩子函数支持和异常处理机制。
+
+Args:
+    group: 线程组，默认为 ``None``
+    target: 要在线程中执行的函数，默认为 ``None``
+    name: 线程名称，默认为 ``None``
+    args: 传递给目标函数的参数元组，默认为 ``()``
+    kwargs: 传递给目标函数的关键字参数字典，默认为 ``None``
+    prehook: 在线程执行前要调用的函数或函数列表，默认为 ``None``
+    daemon: 是否为守护线程，默认为 ``None``
+''')
+
+add_english_doc('Thread', '''\
+Enhanced thread class provided by LazyLLM, inheriting from Python's standard library `threading.Thread`. This class provides additional functionality including session ID management, pre-hook function support, and exception handling mechanisms.
+
+Args:
+    group: Thread group, default to ``None``
+    target: Function to be executed in the thread, default to ``None``
+    name: Thread name, default to ``None``
+    args: Tuple of arguments to pass to the target function, default to ``()``
+    kwargs: Dictionary of keyword arguments to pass to the target function, default to ``None``
+    prehook: Function or list of functions to call before thread execution, default to ``None``
+    daemon: Whether the thread is a daemon thread, default to ``None``
+''')
+
+add_example('Thread', '''\
+>>> import lazyllm
+>>> from lazyllm.common.threading import Thread
+>>> import time
+>>> def simple_task(name):
+...     time.sleep(0.1)
+...     return f"Hello from {name}"
+>>> thread = Thread(target=simple_task, args=("Worker",))
+>>> thread.start()
+>>> result = thread.get_result()
+>>> print(result)
+Hello from Worker
+>>> def setup_environment():
+...     print("Setting up environment...")
+...     return "environment_ready"
+>>> def validate_input(data):
+...     print(f"Validating input: {data}")
+...     if not isinstance(data, (int, float)):
+...         raise ValueError("Input must be numeric")
+>>> def process_data(data):
+...     print(f"Processing data: {data}")
+...     time.sleep(0.1) 
+...     return data * 2
+>>> thread = Thread(
+...     target=process_data,
+...     args=(42,),
+...     prehook=[setup_environment, lambda: validate_input(42)]
+... )
+>>> thread.start()
+Setting up environment...
+Validating input: 42
+Processing data: 42
+>>> result = thread.get_result()
+>>> print(f"Final result: {result}")
+Final result: 84
+''')
+
+# Thread.work
+add_chinese_doc('Thread.work', '''\
+线程的核心工作方法，负责执行预钩子函数、目标函数，并处理异常和结果。
+
+Args:
+    prehook: 预钩子函数列表，在线程执行前调用
+    target: 要执行的目标函数
+    args: 传递给目标函数的参数
+    **kw: 传递给目标函数的关键字参数
+
+**注意**: 此方法由 `Thread` 类内部调用，用户通常不需要直接调用此方法。
+''')
+
+add_english_doc('Thread.work', '''\
+Core working method of the thread, responsible for executing pre-hook functions, target function, and handling exceptions and results.
+
+Args:
+    prehook: List of pre-hook functions to call before thread execution
+    target: Target function to execute
+    args: Arguments to pass to the target function
+    **kw: Keyword arguments to pass to the target function
+
+**Note**: This method is called internally by the `Thread` class, users typically don't need to call this method directly.
+''')
+
+# Thread.get_result
+add_chinese_doc('Thread.get_result', '''\
+获取线程执行结果的方法。此方法会阻塞直到线程执行完成，然后返回执行结果或重新抛出异常。
+
+**Returns:**\n
+- 线程执行的结果。如果目标函数正常执行，返回其返回值；如果发生异常，会重新抛出该异常。
+
+**注意**: 此方法应该在调用 `thread.start()` 之后使用，用于获取线程的执行结果。
+''')
+
+add_english_doc('Thread.get_result', '''\
+Method to retrieve the thread execution result. This method blocks until the thread execution is complete, then returns the execution result or re-raises the exception.
+
+**Returns:**\n
+- The result of thread execution. If the target function executes normally, returns its return value; if an exception occurs, re-raises that exception.
+
+**Note**: This method should be used after calling `thread.start()` to retrieve the thread execution result.
+''')
 # ============= Bind/bind
 add_chinese_doc('bind', '''\
 Bind 类用于函数绑定与延迟调用，支持动态参数传入和上下文参数解析，实现灵活的函数组合与流水线式调用。
@@ -110,6 +295,70 @@
     **kw: 绑定时固定的关键字参数，可以包含占位符。
 ''')
 
+add_chinese_doc('common.CaseInsensitiveDict', '''\
+大小写不敏感的字典类。
+
+CaseInsensitiveDict 继承自 dict，提供大小写不敏感的键值存储和检索功能。所有的键都会被转换为小写形式存储，确保无论使用大写、小写或混合大小写的键名都能访问到相同的值。
+
+特点：
+    - 所有键在存储时自动转换为小写
+    - 支持标准的字典操作（获取、设置、检查包含关系）
+    - 保持字典的原有功能，只是键名处理方式不同
+
+Args:
+    *args: 传递给父类 dict 的位置参数
+    **kwargs: 传递给父类 dict 的关键字参数
+''')
+
+add_english_doc('common.CaseInsensitiveDict', '''\
+Case-insensitive dictionary class.
+
+CaseInsensitiveDict inherits from dict and provides case-insensitive key-value storage and retrieval. All keys are converted to lowercase when stored, ensuring that values can be accessed regardless of whether the key name is uppercase, lowercase, or mixed case.
+
+Features:
+    - All keys are automatically converted to lowercase when stored
+    - Supports standard dictionary operations (get, set, check containment)
+    - Maintains all original dict functionality, only differs in key name handling
+
+Args:
+    *args: Positional arguments passed to the parent dict class
+    **kwargs: Keyword arguments passed to the parent dict class
+''')
+
+add_example('common.CaseInsensitiveDict', '''\
+>>> from lazyllm.common import CaseInsensitiveDict
+>>> # 创建大小写不敏感的字典
+>>> d = CaseInsensitiveDict({'Name': 'John', 'AGE': 25, 'City': 'New York'})
+>>> 
+>>> # 使用不同大小写访问相同的键
+>>> print(d['name'])      # 使用小写
+... 'John'
+>>> print(d['NAME'])      # 使用大写
+... 'John'
+>>> print(d['Name'])      # 使用首字母大写
+... 'John'
+>>> 
+>>> # 设置值时也会转换为小写
+>>> d['EMAIL'] = 'john@example.com'
+>>> print(d['email'])     # 使用小写访问
+... 'john@example.com'
+>>> 
+>>> # 检查键是否存在（大小写不敏感）
+>>> 'AGE' in d
+True
+>>> 'age' in d
+True
+>>> 'Age' in d
+True
+>>> 
+>>> # 支持标准字典操作
+>>> d['PHONE'] = '123-456-7890'
+>>> print(d.get('phone'))
+... '123-456-7890'
+>>> print(len(d))
+... 5
+''')
+
 add_english_doc('bind', '''\
 The Bind class provides function binding and deferred invocation capabilities, supporting dynamic argument passing and context-based argument resolution for flexible function composition and pipeline-style calls.
 
@@ -362,4 +611,457 @@
 0
 >>> queue.peek() is None
 True
-""")
\ No newline at end of file
+""")
+
+
+add_chinese_doc('common.ResultCollector', '''\
+结果收集器，用于在流程或任务执行过程中按名称存储和访问结果。  
+它通过调用自身（传入 name）返回一个可调用的 Impl 对象来收集指定名称的结果。  
+适用于需要跨步骤共享中间结果的场景。
+''')
+
+add_english_doc('common.ResultCollector', '''\
+A result collector used to store and access results by name during the execution of a flow or task.  
+Calling the instance with a name returns a callable Impl object that collects results for that name.  
+Useful for scenarios where intermediate results need to be shared across steps.
+''')
+add_chinese_doc('common.ResultCollector.Impl', '''\
+ResultCollector 的内部实现类，负责为指定名称收集结果。  
+不应直接实例化，需通过 ResultCollector(name) 获取。
+
+Args:
+    name (str): 结果名称。
+    value (dict): 存储结果的字典引用。
+''')
+
+add_english_doc('common.ResultCollector.Impl', '''\
+Internal implementation class of ResultCollector, responsible for collecting results for a given name.  
+Should not be instantiated directly; obtain via ResultCollector(name).
+
+Args:
+    name (str): The result name.
+    value (dict): A reference to the dictionary where results are stored.
+''')
+
+
+add_chinese_doc('common.ResultCollector.keys', '''\
+获取所有已存储结果的名称。
+
+**Returns**\n
+- KeysView[str]: 结果名称集合。
+''')
+
+add_english_doc('common.ResultCollector.keys', '''\
+Get all stored result names.
+
+**Returns**\n
+- KeysView[str]: A set-like object containing result names.
+''')
+
+add_chinese_doc('common.ResultCollector.items', '''\
+获取所有已存储的 (名称, 值) 对。
+
+**Returns**\n
+- ItemsView[str, Any]: 结果的键值对集合。
+''')
+
+add_english_doc('common.ResultCollector.items', '''\
+Get all stored (name, value) pairs.
+
+**Returns**\n
+- ItemsView[str, Any]: A set-like object containing name-value pairs of results.
+''')
+
+add_chinese_doc('common.EnvVarContextManager', '''\
+环境变量上下文管理器，用于 在代码块执行期间临时设置环境变量，退出时自动恢复原始环境变量。
+
+Args:
+    env_vars_dict (dict): 需要临时设置的环境变量字典，值为 None 的变量将被忽略。
+''')
+
+add_english_doc('common.EnvVarContextManager', '''\
+Environment variable context manager used to temporarily set environment variables during the execution of a code block, automatically restoring original environment variables upon exit.
+
+Args:
+    env_vars_dict (dict): Dictionary of environment variables to temporarily set; variables with None values are ignored.
+''')
+
+add_chinese_doc('ReadOnlyWrapper', '''\ 
+一个轻量级只读包装器，用于包裹任意对象并对外提供只读访问（实际并未完全禁止修改，但复制时不会携带原始对象）。包装器可以动态替换内部对象，并提供判断对象是否为空的辅助方法。
+Args:
+    obj (Optional[Any]): 初始被包装的对象，默认为 None。
+''')
+
+add_english_doc('ReadOnlyWrapper', '''\
+A lightweight read-only wrapper that holds an arbitrary object and exposes its attributes. It supports swapping the internal object dynamically and provides utility for checking emptiness. Note: it does not enforce deep immutability, but deepcopy drops the wrapped object.
+Args:
+    obj (Optional[Any]): The initial wrapped object, defaults to None.
+''')
+
+add_chinese_doc('ReadOnlyWrapper.set', '''\ 
+替换当前包装的内部对象。
+
+Args:
+    obj (Any): 新的内部对象。
+''')
+
+add_english_doc('ReadOnlyWrapper.set', '''\
+Replace the currently wrapped internal object.
+
+Args:
+    obj (Any): New object to wrap.
+''')
+
+add_chinese_doc('ReadOnlyWrapper.isNone', '''\ 
+检查当前包装器是否未持有任何对象。
+
+Args:
+    None.
+
+**Returns**\n
+- bool: 如果内部对象为 None 返回 True，否则 False。
+''')
+
+add_english_doc('ReadOnlyWrapper.isNone', '''\
+Check whether the wrapper currently holds no object.
+
+Args:
+    None.
+
+**Returns**\n
+- bool: True if the internal object is None, otherwise False.
+''')
+
+add_chinese_doc('queue.RedisQueue', '''\ 
+基于 Redis 实现的文件系统队列（继承自 FileSystemQueue），用于跨进程/节点的消息传递与队列管理。内部使用指定的 redis_url 初始化并管理底层存储，同时提供线程安全的初始化逻辑。
+
+Args:
+    klass (str): 队列的分类名称，用于区分不同队列实例，默认值为 '__default__'。
+''')
+
+add_english_doc('queue.RedisQueue', '''\
+Redis-backed file system queue (inherits from FileSystemQueue) for cross-process/node message passing and queue management. It initializes its underlying storage using a configured Redis URL and employs thread-safe setup logic.
+
+Args:
+    klass (str): Classification name for the queue instance to distinguish different queues. Defaults to '__default__'.
+''')
+
+
+add_chinese_doc('Identity', '''\
+恒等模块，用于直接返回输入值。
+
+该模块常用于模块拼接结构中占位，无实际处理逻辑。若输入为多个参数，将自动打包为一个整体结构输出。
+
+Args:
+    *args: 可选的位置参数，占位用。
+    **kw: 可选的关键字参数，占位用。
+''')
+
+add_english_doc('Identity', '''\
+Identity module that directly returns the input as output.
+
+This module serves as a no-op placeholder in composition pipelines. If multiple inputs are provided, they are packed together before returning.
+
+Args:
+    *args: Optional positional arguments for placeholder compatibility.
+    **kw: Optional keyword arguments for placeholder compatibility.
+''')
+
+
+
+add_chinese_doc('ProcessPoolExecutor.submit', '''\
+将任务提交到进程池中执行。
+
+此方法将一个函数及其参数序列化后提交到进程池中执行，返回一个 `Future` 对象，用于获取任务执行结果或状态。
+
+Args:
+    fn (Callable): 要执行的函数。
+    *args: 传递给函数的位置参数。
+    **kwargs: 传递给函数的关键字参数。
+
+Returns:
+    concurrent.futures.Future: 表示任务执行状态的 `Future` 对象。
+''')
+
+add_english_doc('ProcessPoolExecutor.submit', '''\
+Submit a task to the process pool for execution.
+
+This method serializes a function and its arguments, then submits them to the process pool for execution. It returns a `Future` object to track the task's status or result.
+
+Args:
+    fn (Callable): The function to execute.
+    *args: Positional arguments passed to the function.
+    **kwargs: Keyword arguments passed to the function.
+
+Returns:
+    concurrent.futures.Future: A `Future` object representing the task's execution status.
+''')
+
+add_example('ProcessPoolExecutor.submit', '''\
+>>> from lazyllm.common.multiprocessing import ProcessPoolExecutor
+>>> import time
+>>> 
+>>> def task(x):
+...     time.sleep(1)
+...     return x * 2
+... 
+>>> with ProcessPoolExecutor(max_workers=2) as executor:
+...     future = executor.submit(task, 5)
+...     result = future.result()
+...     print(result)
+10
+''')
+
+
+# ============= Multiprocessing
+# ForkProcess
+add_chinese_doc('ForkProcess', '''\
+LazyLLM 提供的增强进程类，继承自 Python 标准库的 `multiprocessing.Process`。此类专门使用 fork 启动方法来创建子进程，并提供了同步/异步执行模式的支持。
+
+Args:
+    group: 进程组，默认为 ``None``
+    target: 要在进程中执行的函数，默认为 ``None``
+    name: 进程名称，默认为 ``None``
+    args: 传递给目标函数的参数元组，默认为 ``()``
+    kwargs: 传递给目标函数的关键字参数字典，默认为 ``{}``
+    daemon: 是否为守护进程，默认为 ``None``
+    sync: 是否为同步模式，默认为 ``True``。在同步模式下，进程执行完目标函数后会自动退出；在异步模式下，进程会持续运行直到被手动终止。
+
+**注意**: 此类主要用于 LazyLLM 内部的进程管理，特别是在需要长期运行的服务器进程中。
+''')
+
+add_english_doc('ForkProcess', '''\
+Enhanced process class provided by LazyLLM, inheriting from Python's standard library `multiprocessing.Process`. This class specifically uses the fork start method to create child processes and provides support for synchronous/asynchronous execution modes.
+
+Args:
+    group: Process group, default to ``None``
+    target: Function to be executed in the process, default to ``None``
+    name: Process name, default to ``None``
+    args: Tuple of arguments to pass to the target function, default to ``()``
+    kwargs: Dictionary of keyword arguments to pass to the target function, default to ``{}``
+    daemon: Whether the process is a daemon process, default to ``None``
+    sync: Whether to use synchronous mode, default to ``True``. In synchronous mode, the process automatically exits after executing the target function; in asynchronous mode, the process continues running until manually terminated.
+
+**Note**: This class is primarily used for LazyLLM's internal process management, especially in long-running server processes.
+''')
+
+add_example('ForkProcess', '''\
+>>> import lazyllm
+>>> from lazyllm.common import ForkProcess
+>>> import time
+>>> import os
+>>> def simple_task(task_id):
+...     print(f"Process {os.getpid()} executing task {task_id}")
+...     time.sleep(0.1)  
+...     return f"Task {task_id} completed by process {os.getpid()}"
+>>> process = ForkProcess(target=simple_task, args=(1,), sync=True)
+>>> process.start()
+Process 12345 executing task 1
+''')
+
+# ForkProcess.work
+add_chinese_doc('ForkProcess.work', '''\
+ForkProcess 的核心工作方法，负责包装目标函数并处理同步/异步执行逻辑。
+
+Args:
+    f: 要执行的目标函数
+    sync: 是否为同步模式。在同步模式下，执行完目标函数后进程会退出；在异步模式下，进程会持续运行。
+''')
+
+add_english_doc('ForkProcess.work', '''\
+Core working method of ForkProcess, responsible for wrapping the target function and handling synchronous/asynchronous execution logic.
+
+Args:
+    f: Target function to execute
+    sync: Whether to use synchronous mode. In synchronous mode, the process exits after executing the target function; in asynchronous mode, the process continues running.
+''')
+
+# ForkProcess.start
+add_chinese_doc('ForkProcess.start', '''\
+启动 ForkProcess 进程。此方法会使用 fork 启动方法来创建子进程，并开始执行目标函数。
+
+此方法的特点：
+
+- **Fork 启动**: 使用 fork 方法创建子进程，在 Unix/Linux 系统上提供更好的性能
+- **上下文管理**: 自动管理进程启动方法的上下文，确保使用正确的启动方式
+- **继承父类**: 继承自 `multiprocessing.Process.start()` 的所有功能
+
+**注意**: 此方法会实际创建新的进程并开始执行，调用后进程会立即开始运行。
+
+''')
+
+add_english_doc('ForkProcess.start', '''\
+Start the ForkProcess. This method uses the fork start method to create a child process and begin executing the target function.
+
+Features of this method:
+
+- **Fork Start**: Uses fork method to create child processes, providing better performance on Unix/Linux systems
+- **Context Management**: Automatically manages the context of process start methods, ensuring the correct start method is used
+- **Parent Inheritance**: Inherits all functionality from `multiprocessing.Process.start()`
+
+**Note**: This method actually creates a new process and begins execution, the process starts running immediately after calling.
+
+''')
+
+# ============= Options
+# Option
+add_chinese_doc('Option', '''\
+LazyLLM 提供的选项管理类，用于管理多个选项值并在它们之间进行迭代。此类主要用于参数网格搜索和超参数调优场景。
+
+Args:
+    *obj: 一个或多个选项值，可以是任意类型的对象。如果传入单个列表或元组，会自动展开。
+
+此类的主要特性：
+
+- **多选项管理**: 可以管理多个不同的选项值
+- **迭代支持**: 支持标准的 Python 迭代协议，可以遍历所有选项
+- **当前值访问**: 始终可以访问当前选中的选项值
+- **深度复制**: 支持深度复制当前选中的选项值
+- **多进程兼容**: 支持在多进程环境中使用
+
+**注意**: 此类主要用于 LazyLLM 内部的参数搜索和试验管理，特别是在 TrialModule 中进行参数网格搜索时。
+
+''')
+
+add_english_doc('Option', '''\
+Option management class provided by LazyLLM, used for managing multiple option values and iterating between them. This class is primarily used for parameter grid search and hyperparameter tuning scenarios.
+
+Args:
+    *obj: One or more option values, which can be objects of any type. If a single list or tuple is passed, it will be automatically expanded.
+
+Key features of this class:
+
+- **Multi-option Management**: Can manage multiple different option values
+- **Iteration Support**: Supports standard Python iteration protocol, can iterate through all options
+- **Current Value Access**: Always can access the currently selected option value
+- **Deep Copy**: Supports deep copying of the currently selected option value
+- **Multi-process Compatibility**: Supports usage in multi-process environments
+
+**Note**: This class is primarily used for LazyLLM's internal parameter search and trial management, especially in TrialModule for parameter grid search.
+
+''')
+
+add_example('Option', '''\
+>>> import lazyllm
+>>> from lazyllm.common.option import Option
+>>> learning_rates = Option(0.001, 0.01, 0.1)
+>>> print(f"当前学习率: {learning_rates}")
+当前学习率: <Option options="(0.001, 0.01, 0.1)" curr="0.001">
+>>> print(f"所有选项: {list(learning_rates)}")
+所有选项: [0.001, 0.01, 0.1]
+''')
+
+add_chinese_doc('multiprocessing.SpawnProcess.start', '''
+使用spawn方式启动进程。
+
+此方法在启动进程时强制使用spawn方式，这种方式会创建一个全新的Python解释器进程。spawn方式相比fork更安全，特别是在多线程环境下。
+
+**说明:**
+- 使用spawn方式启动新进程，避免了fork可能带来的问题
+- 会临时切换启动方式为spawn，执行完后恢复原有启动方式
+- 继承自multiprocessing.Process.start()的所有功能
+''')
+
+add_english_doc('multiprocessing.SpawnProcess.start', '''
+Start the process using spawn method.
+
+This method forces the use of spawn method when starting the process, which creates a brand new Python interpreter process. Spawn is safer than fork, especially in multi-threaded environments.
+
+**Notes:**
+- Uses spawn method to start new process, avoiding potential issues with fork
+- Temporarily switches to spawn method and restores original method after execution
+- Inherits all functionality from multiprocessing.Process.start()
+''')
+
+add_example('multiprocessing.SpawnProcess.start', '''
+```python
+from lazyllm.common.multiprocessing import SpawnProcess
+
+def worker():
+    print("Worker process running")
+
+# Create and start a process using spawn method
+process = SpawnProcess(target=worker)
+process.start()
+process.join()
+```
+''')
+
+add_chinese_doc('LazyLLMCMD', '''\
+命令行操作封装类，提供安全、灵活的命令行管理功能。
+
+Args:
+    cmd (Union[str, List[str], Callable]):命令行指令，支持三种形式：字符串命令,命令列表,可调用对象。
+    return_value (Any):预设返回值。
+    checkf(Any):命令验证函数。
+    no_displays(Any):需要过滤的敏感参数名。
+''')
+
+add_english_doc('LazyLLMCMD', '''\
+Command line operation wrapper class providing secure and flexible command management.
+
+Args:
+    cmd (Union[str, List[str], Callable]):Command input, supports three formats:String command,Command list,Callable object.
+    return_value (Any):Preset return value.
+    checkf(Any):Command validation function with signature.
+    no_displays(Any):Sensitive parameter names to filter.
+
+''')
+
+add_example('LazyLLMCMD', '''\
+>>> from lazyllm.common import LazyLLMCMD
+>>> cmd = LazyLLMCMD("run --epochs=50 --batch-size=32")
+>>> print(cmd.get_args("epochs"))
+50
+>>> print(cmd.get_args("batch-size")) 
+32
+>>> base = LazyLLMCMD("python train.py", checkf=lambda x: True)
+>>> new = base.with_cmd("python predict.py")
+
+''')
+
+add_chinese_doc('LazyLLMCMD.with_cmd', '''\
+创建新命令对象并继承当前配置。
+
+参数:
+    cmd: 新的命令内容（类型需与原始命令一致）
+
+''')
+
+add_english_doc('LazyLLMCMD.with_cmd', '''\
+Create new command object inheriting current configuration.
+
+Args:
+    cmd: New command content (must be same type as original)
+
+''')
+
+add_chinese_doc('LazyLLMCMD.get_args', '''\
+从命令字符串中提取指定参数的值。
+
+参数:
+    key: 要提取的参数名
+''')
+
+add_english_doc('LazyLLMCMD.get_args', '''\
+Extracts specified argument value from command string.
+
+Args:
+    key: Argument name
+''')
+
+add_chinese_doc('queue.SQLiteQueue', '''\
+基于 SQLite 的持久化文件系统队列。
+该类扩展自 FileSystemQueue，使用 SQLite 数据库存储队列数据，通过 position 字段保证先进先出顺序，并支持并发安全的消息入队、出队、查看队头、队列大小查询和清空操作。
+队列数据库默认存储在 ~/.lazyllm_filesystem_queue.db，通过文件锁机制确保多进程安全访问。
+Args:
+    klass (str): 队列分类名，用于逻辑隔离不同的队列，默认为 '__default__'。
+''')
+
+add_english_doc('queue.SQLiteQueue', '''\
+Persistent file system queue backed by SQLite.
+This class extends FileSystemQueue and stores queue data in an SQLite database. Messages are ordered by a position field to preserve FIFO behavior. The class supports concurrent-safe operations including enqueue, dequeue, peek, size checking, and clearing the queue.
+The queue database is saved at ~/.lazyllm_filesystem_queue.db, with a file lock mechanism ensuring safe access in multi-process environments.
+Args:
+    klass (str): Name of the queue category used to logically separate queues. Default is '__default__'.
+''')
diff --git a/lazyllm/docs/components.py b/lazyllm/docs/components.py
index f6d22cfe4..6fc4a83cf 100644
--- a/lazyllm/docs/components.py
+++ b/lazyllm/docs/components.py
@@ -232,7 +232,38 @@
 >>> from lazyllm import finetune
 >>> trainer = finetune.collie('path/to/base/model', 'path/to/target')
 ''')
+add_chinese_doc('finetune.AlpacaloraFinetune.cmd', """\
+生成用于执行Alpaca-LoRA微调和模型合并的shell命令序列。
 
+Args:
+    trainset (str): 训练数据集路径，支持相对data_path配置的路径或绝对路径
+    valset (str, optional): 验证数据集路径，未指定时将从训练集中自动划分
+
+Returns:
+    str or list: 当不需要合并模型时返回单个命令字符串，需要合并时返回包含微调命令、合并命令和文件拷贝命令的列表
+
+""")
+
+add_english_doc('finetune.AlpacaloraFinetune.cmd', """\
+Generate shell command sequence for Alpaca-LoRA fine-tuning and model merging.
+
+Args:
+    trainset (str): Training dataset path, supports both relative path (to configured data_path) and absolute path
+    valset (str, optional): Validation dataset path, will auto-split from trainset if not specified
+
+Returns:
+    str or list: Returns a single command string when no merging needed, otherwise returns a list containing:
+                 [fine-tune command, merge command, file copy command]
+
+
+""")
+
+add_example('finetune.AlpacaloraFinetune.cmd', """\
+>>> from lazyllm import finetune
+>>> trainer = finetune.alpacalora('path/to/base/model', 'path/to/target')
+>>> cmd = trainer.cmd("my_dataset.json")
+
+""")
 # Finetune-LlamafactoryFinetune
 add_chinese_doc('finetune.LlamafactoryFinetune', '''\
 此类是 ``LazyLLMFinetuneBase`` 的子类，基于 [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) 框架提供的训练能力，用于对大语言模型(或视觉语言模型)进行训练。
@@ -345,6 +376,45 @@
 >>> trainer = finetune.llamafactory('internlm2-chat-7b', 'path/to/target')
 <lazyllm.llm.finetune type=LlamafactoryFinetune>
 ''')
+add_chinese_doc('finetune.LlamafactoryFinetune.cmd', """\
+生成LLaMA-Factory微调命令序列，包括训练和模型合并命令。
+
+Args:
+    trainset (str): 训练数据集路径(支持相对lazyllm.config['data_path']的路径)
+    valset (str, optional): 验证数据集路径(当前实现中未直接使用)
+
+返回:
+    str: 完整的shell命令字符串，包含:
+         - 训练命令(自动配置参数)
+         - 日志重定向(保存到目标路径)
+         - 可选的模型合并命令(当配置LoRA时)
+
+注意事项:
+    - 自动生成带时间戳的训练日志文件
+    - 临时文件会在使用后自动清理
+    - 支持多种数据格式(alpaca/sharegpt等)
+    - 多模态数据(图像/视频/音频)会自动检测处理
+""")
+
+add_english_doc('finetune.LlamafactoryFinetune.cmd', """\
+Generate LLaMA-Factory fine-tuning command sequence, including training and model merge commands.
+
+Args:
+    trainset (str): Training dataset path (supports relative path to lazyllm.config['data_path'])
+    valset (str, optional): Validation dataset path (not directly used in current implementation)
+
+Returns:
+    str: Complete shell command string containing:
+         - Training command (with auto-configured parameters)
+         - Log redirection (saved to target path)
+         - Optional model merge command (when LoRA is configured)
+
+Notes:
+    - Automatically generates timestamped training log files
+    - Temporary files are automatically cleaned up after use
+    - Supports multiple data formats (alpaca/sharegpt etc.)
+    - Multimodal data (images/videos/audios) is automatically detected and handled
+""")
 
 # Finetune-FlagembeddingFinetune
 add_chinese_doc('finetune.FlagembeddingFinetune', '''\
@@ -517,6 +587,97 @@
 ''')
 
 # ============= Deploy
+
+add_chinese_doc('LazyLLMDeployBase', '''\
+此类是 ``ComponentBase`` 的一个子类，提供了LazyLLM部署的基础功能。它支持多种媒体类型的编码转换，并提供了结果提取和流式处理的配置选项。
+
+Args:
+    launcher (LauncherBase): 用于部署的启动器实例，默认为远程启动器(``launchers.remote()``)。
+
+注意事项: 
+    - 继承此类时需要实现具体的部署逻辑
+    - 可以通过重写extract_result方法来自定义结果提取逻辑
+''')
+
+add_english_doc('LazyLLMDeployBase', '''\
+This class is a subclass of ``ComponentBase`` that provides basic functionality for LazyLLM deployment. It supports encoding conversion for various media types and provides configuration options for result extraction and streaming processing.
+
+Args:
+    launcher (LauncherBase): Launcher instance for deployment, defaults to remote launcher (``launchers.remote()``).
+
+Notes: 
+    - Need to implement specific deployment logic when inheriting this class
+    - Can customize result extraction logic by overriding the extract_result method
+''')
+
+add_example('LazyLLMDeployBase', '''\
+>>> import lazyllm
+>>> from lazyllm.components.deploy.base import LazyLLMDeployBase
+>>> class MyDeployer(LazyLLMDeployBase):
+...     def __call__(self, inputs):
+...         return processed_result
+        def extract_result(output, inputs):
+...         return output.json()['result']
+>>> deployer = MyDeployer()
+>>> result = deployer.extract_result(raw_output, input_data)
+''')
+
+add_chinese_doc('LazyLLMDeployBase.extract_result', """\
+从模型输出中提取最终结果，默认实现直接返回原始输出，子类可重写此方法实现自定义结果提取逻辑。
+
+Args:
+    output: 模型原始输出
+    inputs: 原始输入数据，可用于结果后处理
+
+Returns:
+    处理后的最终结果
+
+""")
+
+add_english_doc('LazyLLMDeployBase.extract_result', """\
+Extract final result from model output. The default implementation returns raw output directly, subclasses can override this method to implement custom result extraction logic.
+
+Args:
+    output: Raw model output
+    inputs: Original input data, can be used for post-processing
+
+Returns:
+    Processed final result
+
+""")
+
+
+# Deploy-AbstractEmbedding
+add_chinese_doc('deploy.embed.AbstractEmbedding', '''\
+抽象嵌入基类，为所有嵌入模型提供统一的接口和基础功能。此类定义了嵌入模型的标准接口，包括模型加载、调用和序列化等功能。
+
+Args:
+    base_embed (str): 嵌入模型的基础路径或标识符，用于指定要加载的嵌入模型。
+    source (str, optional): 模型来源，默认为 ``None``。如果未指定，将使用 LazyLLM 配置中的默认模型来源。
+    init (bool): 是否在初始化时立即加载模型，默认为 ``False``。如果为 ``True``，将在对象创建时立即调用 ``load_embed()`` 方法。
+''')
+
+add_english_doc('deploy.embed.AbstractEmbedding', '''\
+Abstract embedding base class that provides unified interface and basic functionality for all embedding models. This class defines the standard interface for embedding models, including model loading, calling, and serialization capabilities.
+
+Args:
+    base_embed (str): The base path or identifier of the embedding model, used to specify which embedding model to load.
+    source (str, optional): Model source, default to ``None``. If not specified, will use the default model source from LazyLLM configuration.
+    init (bool): Whether to load the model immediately during initialization, default to ``False``. If ``True``, will call the ``load_embed()`` method immediately when the object is created.
+''')
+
+add_chinese_doc('deploy.embed.AbstractEmbedding.load_embed', '''\
+加载嵌入模型的抽象方法。此方法由子类实现，用于执行具体的模型加载逻辑。
+
+**注意**: 此方法目前正在开发中。
+''')
+
+add_english_doc('deploy.embed.AbstractEmbedding.load_embed', '''\
+Abstract method for loading embedding models. This method is implemented by subclasses to perform specific model loading logic.
+
+**Note**: This method is currently under development.
+''')
+
 # Deploy-Lightllm
 add_chinese_doc('deploy.Lightllm', '''\
 此类是 ``LazyLLMDeployBase`` 的子类，基于 [LightLLM](https://github.com/ModelTC/lightllm) 框架提供的推理能力，用于对大语言模型进行推理。
@@ -577,6 +738,76 @@
 >>> infer = deploy.lightllm()
 ''')
 
+add_chinese_doc('deploy.Lightllm.cmd', '''\
+该方法用于生成启动LightLLM服务的命令。
+
+参数:
+    finetuned_model (str): 微调后的模型路径。
+    base_model (str): 基础模型路径，当finetuned_model无效时使用。
+
+返回值:
+    LazyLLMCMD: 一个包含启动命令的LazyLLMCMD对象。
+''')
+
+add_english_doc('deploy.Lightllm.cmd', '''\
+This method generates the command to start the LightLLM service.
+
+Args:
+    finetuned_model (str): Path to the fine-tuned model.
+    base_model (str): Path to the base model, used when finetuned_model is invalid.
+
+Returns:
+    LazyLLMCMD: A LazyLLMCMD object containing the startup command.
+''')
+
+add_chinese_doc('deploy.Lightllm.geturl', '''\
+获取LightLLM服务的URL地址。
+
+参数:
+    job (optional): 任务对象，默认为None，此时使用self.job。
+
+返回值:
+    str: 服务的URL地址，格式为"http://{ip}:{port}/generate"。
+''')
+
+add_english_doc('deploy.Lightllm.geturl', '''\
+Get the URL address of the LightLLM service.
+
+Args:
+    job (optional): Job object, defaults to None, in which case self.job is used.
+
+Returns:
+    str: The service URL address in the format "http://{ip}:{port}/generate".
+''')
+
+add_chinese_doc('deploy.Lightllm.extract_result', '''\
+从服务响应中提取生成的文本结果。
+
+参数:
+    x (str): 服务返回的响应文本。
+    inputs (str): 输入文本。
+
+返回值:
+    str: 提取出的生成文本。
+
+异常:
+    Exception: 当解析JSON响应失败时抛出异常。
+''')
+
+add_english_doc('deploy.Lightllm.extract_result', '''\
+Extract generated text from the service response.
+
+Args:
+    x (str): Response text from the service.
+    inputs (str): Input text.
+
+Returns:
+    str: The extracted generated text.
+
+Raises:
+    Exception: When JSON response parsing fails.
+''')
+
 # Deploy-Vllm
 add_chinese_doc('deploy.Vllm', '''\
 此类是 ``LazyLLMDeployBase`` 的子类，基于 [VLLM](https://github.com/vllm-project/vllm) 框架提供的推理能力，用于对大语言模型进行推理。
@@ -743,6 +974,209 @@
 >>> result = rerank_service(input_data)
 ''')
 
+# Deploy-embed
+add_chinese_doc('deploy.embed.LazyHuggingFaceRerank', '''\
+基于 HuggingFace CrossEncoder 的重排序（Rerank）封装类。  
+用于根据查询与候选文档的相关性分数，对文档进行排序。  
+支持在初始化时下载并加载指定的重排序模型，并可选择延迟加载以提升启动性能。
+
+Args:
+    base_rerank (str): 重排序模型名称或本地路径。支持 HuggingFace Hub 模型标识符或本地路径。
+    source (Optional[str]): 模型来源，支持 `huggingface` 和 `modelscope`，默认为全局配置项 `model_source`。
+    init (bool): 是否在实例化时立即加载模型。若为 `False`，将在首次调用时延迟加载。
+''')
+
+add_english_doc('deploy.embed.LazyHuggingFaceRerank', '''\
+Wrapper class for HuggingFace CrossEncoder-based reranking.  
+Ranks candidate documents by relevance score with respect to a given query.  
+Supports downloading and loading a specified rerank model at initialization, with optional lazy loading for faster startup.
+
+Args:
+    base_rerank (str): Name or local path of the rerank model. Supports HuggingFace Hub identifiers or local paths.
+    source (Optional[str]): Source of the model, supports `huggingface` and `modelscope`. Defaults to global config `model_source`.
+    init (bool): Whether to load the model immediately upon instantiation. If `False`, the model will be loaded lazily on first call.
+''')
+
+add_chinese_doc('deploy.embed.LazyHuggingFaceRerank.load_reranker', '''\
+加载重排序模型。  
+该方法会使用 `sentence_transformers.CrossEncoder` 从指定的 `base_rerank` 路径或名称加载模型，  
+通常在延迟加载模式下由首次调用实例时自动触发。
+''')
+
+add_english_doc('deploy.embed.LazyHuggingFaceRerank.load_reranker', '''\
+Load the rerank model.  
+Uses `sentence_transformers.CrossEncoder` to load the model from the specified `base_rerank` path or name.  
+Typically triggered automatically on first call when lazy loading is enabled.
+''')
+
+add_chinese_doc('deploy.embed.LazyHuggingFaceRerank.rebuild', '''\
+重建 `LazyHuggingFaceRerank` 实例的类方法。  
+主要用于序列化（pickle/cloudpickle）时的反序列化过程，根据提供的参数重新实例化对象。
+
+Args:
+    base_rerank (str): 模型名称或路径。
+    init (bool): 是否在重建时立即加载模型。
+
+**Returns:**\n
+- LazyHuggingFaceRerank: 重新构建的类实例。
+''')
+
+add_english_doc('deploy.embed.LazyHuggingFaceRerank.rebuild', '''\
+Class method to rebuild a `LazyHuggingFaceRerank` instance.  
+Used primarily for deserialization during pickle/cloudpickle operations,  
+reinstantiating the object with the provided parameters.
+
+Args:
+    base_rerank (str): Model name or path.
+    init (bool): Whether to load the model immediately upon rebuilding.
+
+**Returns:**\n
+- LazyHuggingFaceRerank: The rebuilt class instance.
+''')
+
+add_chinese_doc('deploy.embed.LazyFlagEmbedding', '''\
+支持懒加载的 FlagEmbedding 嵌入模块封装。
+
+该类包装了 FlagEmbedding 的加载和调用逻辑，提供对稀疏和稠密嵌入的支持，并通过 lazyllm.once_flag() 机制实现懒加载。适用于嵌入模型的本地/远程下载、初始化与编码流程的封装，便于与 LazyLLM 系统集成。
+
+Args:
+    base_embed (str): 嵌入模型名称或路径。
+    sparse (bool): 是否使用稀疏嵌入模式，默认为 False。
+    source (str, optional): 模型下载源，若未提供则使用 lazyllm 全局配置。
+    init (bool): 是否在初始化时立即加载模型，默认为 False。
+''')
+
+add_english_doc('deploy.embed.LazyFlagEmbedding', '''\
+A lazily loaded wrapper for the FlagEmbedding module.
+
+This class encapsulates loading and usage of FlagEmbedding, with support for both sparse and dense embeddings. It leverages the lazyllm.once_flag() mechanism to initialize only once on demand, and integrates with LazyLLM's model downloading utilities.
+
+Args:
+    base_embed (str): The model name or path to be used as the embedding backend.
+    sparse (bool): Whether to enable sparse embedding output. Defaults to False.
+    source (str, optional): Source URL or identifier for model downloading. Defaults to global config.
+    init (bool): Whether to initialize the model immediately upon construction. Defaults to False.
+''')
+
+add_chinese_doc('deploy.embed.LazyFlagEmbedding.load_embed', '''\
+加载嵌入模型并初始化到设备上。
+
+该方法根据系统是否支持 CUDA 自动选择运行设备（GPU 或 CPU），并从本地或远程加载预训练的 FlagEmbedding 模型。
+''')
+
+add_english_doc('deploy.embed.LazyFlagEmbedding.load_embed', '''\
+Load the embedding model onto the appropriate device.
+
+This method selects the available device (GPU or CPU) and initializes the pretrained FlagEmbedding model from the provided path or model hub.
+''')
+
+add_chinese_doc('deploy.embed.LazyFlagEmbedding.rebuild', '''\
+重建 LazyFlagEmbedding 实例的方法。
+
+该类方法用于在序列化或跨进程传递时，重新构造带有初始化配置的 LazyFlagEmbedding 实例。
+
+Args:
+    base_embed (str): 嵌入模型的路径或模型名称。
+    sparse (bool): 是否启用稀疏嵌入。
+    init (bool): 是否在构造时立即加载模型。
+
+Returns:
+    LazyFlagEmbedding: 一个新的 LazyFlagEmbedding 实例。
+''')
+
+add_english_doc('deploy.embed.LazyFlagEmbedding.rebuild', '''\
+Rebuild a LazyFlagEmbedding instance.
+
+This class method reconstructs an instance of LazyFlagEmbedding, typically used during deserialization or multiprocessing scenarios.
+
+Args:
+    base_embed (str): The path or name of the embedding model.
+    sparse (bool): Whether to enable sparse embedding mode.
+    init (bool): Whether to load the model immediately during instantiation.
+
+Returns:
+    LazyFlagEmbedding: A newly constructed LazyFlagEmbedding instance.
+''')
+
+
+add_chinese_doc('deploy.Vllm.cmd', '''\
+构造用于启动 vLLM 推理服务的命令。
+
+该方法会自动检测模型路径是否有效，并根据当前配置参数动态生成可执行命令，支持多节点部署时自动加入 ray 启动命令。
+
+Args:
+    finetuned_model (str): 微调后的模型路径。
+    base_model (str): 备用基础模型路径（当 finetuned_model 无效时启用）。
+    master_ip (str): 分布式部署中的主节点 IP，仅在多节点时启用。
+
+Returns:
+    LazyLLMCMD: 可执行命令对象，包含启动指令、结果回调函数及健康检查方法。
+''')
+
+add_english_doc('deploy.Vllm.cmd', '''\
+Build the command to launch the vLLM inference service.
+
+This method validates the model path and constructs an executable command string based on current configuration. In distributed mode, it will also prepend the ray cluster start command.
+
+Args:
+    finetuned_model (str): Path to the fine-tuned model.
+    base_model (str): Fallback base model path if finetuned_model is invalid.
+    master_ip (str): IP address of the master node in a distributed setup.
+
+Returns:
+    LazyLLMCMD: The command object with shell instruction, return value handler, and health checker.
+''')
+
+add_chinese_doc('deploy.Vllm.geturl', '''\
+获取 vLLM 服务的推理地址。
+
+根据运行模式（Display 模式或实际部署）返回相应的 URL，用于访问模型的生成接口。
+
+Args:
+    job (Job, optional): 部署任务对象。默认取当前模块绑定的 job。
+
+Returns:
+    str: 推理服务的 HTTP 地址。
+''')
+
+add_english_doc('deploy.Vllm.geturl', '''\
+Get the inference service URL for the vLLM deployment.
+
+Depending on the execution mode (Display or actual deployment), this method returns the appropriate URL for accessing the model's generate endpoint.
+
+Args:
+    job (Job, optional): Deployment job object. Defaults to the module's associated job.
+
+Returns:
+    str: The HTTP URL for inference service.
+''')
+
+add_chinese_doc('deploy.Vllm.extract_result', '''\
+从 vLLM 返回结果中提取文本。
+
+该函数从 JSON 格式的返回值中提取模型输出的文本部分。
+
+Args:
+    x (str): JSON 格式的原始返回结果字符串。
+    inputs (dict): 原始输入数据（用于兼容接口，当前未使用）。
+
+Returns:
+    str: 提取出的文本内容。
+''')
+
+add_english_doc('deploy.Vllm.extract_result', '''\
+Extract the generated text from a vLLM response.
+
+This function parses the returned JSON and extracts the model-generated text content.
+
+Args:
+    x (str): Raw JSON string returned from the API.
+    inputs (dict): Original input data (unused; kept for compatibility).
+
+Returns:
+    str: The generated text extracted from the response.
+''')
+
 # Deploy-Mindie
 add_chinese_doc('deploy.Mindie', '''\
 此类是 ``LazyLLMDeployBase`` 的一个子类, 用于部署和管理MindIE大模型推理服务。它封装了MindIE服务的配置生成、进程启动和API交互的全流程。
@@ -783,6 +1217,180 @@
 
 
 add_example('deploy.Mindie', '''\
+>>> import lazyllm
+>>> from lazyllm.components.deploy import Mindie            
+>>> deployer = Mindie(
+...     port=30000,
+...     launcher=lazyllm.launchers.remote(),
+...     max_seq_len=32000,
+...     log_path="/path/to/logs"
+... )
+>>> cmd = deployer.cmd(
+...     finetuned_model="/path/to/finetuned_model",
+...     base_model="/path/to/base_model")
+>>> print("Service URL:", cmd.geturl())
+
+''')
+add_english_doc('deploy.Mindie.load_config', '''\
+Loads and parses the MindIE configuration file.
+
+Args:
+    config_path (str): Path to the JSON configuration file
+
+Returns:
+    dict: Parsed configuration dictionary
+
+Notes:
+    - Handles both default and custom configuration files
+    - Uses JSON format for configuration
+    - Creates backup of original config before modification
+''')
+
+add_chinese_doc('deploy.Mindie.load_config', '''\
+加载并解析MindIE配置文件。
+
+Args:
+    config_path (str): JSON配置文件的路径
+
+Returns:
+    dict: 解析后的配置字典
+
+注意事项:
+    - 处理默认和自定义配置文件
+    - 使用JSON格式配置
+    - 修改前会创建原始配置的备份
+''')
+
+add_english_doc('deploy.Mindie.save_config', '''\
+Saves the current configuration to file.
+
+Notes:
+    - Automatically creates backup of existing config
+    - Writes to the standard MindIE config location
+    - Uses JSON format with proper indentation
+    - Called automatically during deployment
+''')
+
+add_chinese_doc('deploy.Mindie.save_config', '''\
+保存当前配置到文件。
+
+注意事项:
+    - 自动创建现有配置的备份
+    - 写入到标准MindIE配置位置
+    - 使用带缩进的JSON格式
+    - 部署时自动调用
+''')
+
+add_english_doc('deploy.Mindie.update_config', '''\
+Updates the configuration dictionary with current settings.
+
+Notes:
+    - Handles multiple configuration sections:
+        - Model deployment parameters
+        - Server settings
+        - Scheduling parameters
+''')
+
+add_chinese_doc('deploy.Mindie.update_config', '''\
+使用当前设置更新配置字典。
+
+注意事项:
+    - 处理多个配置部分:
+        - 模型部署参数
+        - 服务器设置
+        - 调度参数
+''')
+
+add_english_doc('deploy.Mindie.cmd', '''\
+Generates the command to start the MindIE service.
+
+Args:
+    finetuned_model (str): Path to the fine-tuned model
+    base_model (str): Path to the base model (fallback if finetuned_model is invalid)
+    master_ip (str): Master node IP address (currently unused)
+
+Returns:
+    LazyLLMCMD: Command object for starting the service
+
+Notes:
+    - Automatically handles model path validation
+    - Updates configuration before service start
+    - Supports random port allocation when configured
+''')
+
+add_chinese_doc('deploy.Mindie.cmd', '''\
+生成启动MindIE服务的命令。
+
+Args:
+    finetuned_model (str): 微调模型路径
+    base_model (str): 基础模型路径(当微调模型无效时作为后备)
+    master_ip (str): 主节点IP地址(当前未使用)
+
+返回:
+    LazyLLMCMD: 启动服务的命令对象
+
+注意事项:
+    - 自动处理模型路径验证
+    - 启动服务前更新配置
+    - 支持配置随机端口分配
+''')
+
+add_english_doc('deploy.Mindie.geturl', '''\
+Gets the service URL after deployment.
+
+Args:
+    job: Job object (optional, defaults to self.job)
+
+Returns:
+    str: The generate endpoint URL
+
+Notes:
+    - Returns different formats based on display mode
+    - Includes port number from configuration
+''')
+
+add_chinese_doc('deploy.Mindie.geturl', '''\
+获取部署后的服务URL。
+
+Args:
+    job: 任务对象(可选，默认为self.job)
+
+返回:
+    str: generate接口的URL
+
+注意事项:
+    - 根据显示模式返回不同格式
+    - 包含配置中的端口号
+''')
+
+add_english_doc('deploy.Mindie.extract_result', '''\
+Extracts the generated text from the API response.
+
+Args:
+    x: Raw API response
+    inputs: Original inputs (unused)
+
+Returns:
+    str: The generated text
+
+Notes:
+    - Parses JSON response
+    - Returns first text entry from response
+''')
+
+add_chinese_doc('deploy.Mindie.extract_result', '''\
+从API响应中提取生成的文本。
+
+Args:
+    x: 原始API响应
+    inputs: 原始输入(未使用)
+
+返回:
+    str: 生成的文本
+
+注意事项:
+    - 解析JSON响应
+    - 返回响应中的第一个文本条目
 ''')
 
 # Deploy-LMDeploy
@@ -818,29 +1426,93 @@
     log_path (str): Path for log file, defaults to ``None``.
     kw: Keyword arguments for updating default training parameters. Note that no additional keyword arguments beyond those listed below can be passed.
 
-Keyword Args: 
-    tp (int): Tensor parallelism parameter, defaults to ``1``.
-    server-name (str): The IP address of the service, defaults to ``0.0.0.0``.
-    server-port (int): The port number of the service, defaults to ``None``. In this case, LazyLLM will automatically generate a random port number.
-    max-batch-size (int): Maximum batch size, defaults to ``128``.
-    chat-template (str): Path to chat template file, defaults to ``None``. If the model is not a vision-language model and no template is specified, a default template will be used.
-    eager-mode (bool): Whether to enable eager mode, controlled by environment variable ``LMDEPLOY_EAGER_MODE``, defaults to ``False``.
+Keyword Args: 
+    tp (int): Tensor parallelism parameter, defaults to ``1``.
+    server-name (str): The IP address of the service, defaults to ``0.0.0.0``.
+    server-port (int): The port number of the service, defaults to ``None``. In this case, LazyLLM will automatically generate a random port number.
+    max-batch-size (int): Maximum batch size, defaults to ``128``.
+    chat-template (str): Path to chat template file, defaults to ``None``. If the model is not a vision-language model and no template is specified, a default template will be used.
+    eager-mode (bool): Whether to enable eager mode, controlled by environment variable ``LMDEPLOY_EAGER_MODE``, defaults to ``False``.
+
+''')
+
+add_example('deploy.LMDeploy', '''\
+>>> # Basic use:
+>>> from lazyllm import deploy
+>>> infer = deploy.LMDeploy()
+>>>
+>>> # MultiModal:
+>>> import lazyllm
+>>> from lazyllm import deploy, globals
+>>> from lazyllm.components.formatter import encode_query_with_filepaths
+>>> chat = lazyllm.TrainableModule('Mini-InternVL-Chat-2B-V1-5').deploy_method(deploy.LMDeploy)
+>>> chat.update_server()
+>>> inputs = encode_query_with_filepaths('What is it?', ['path/to/image'])
+>>> res = chat(inputs)
+''')
+
+add_chinese_doc('deploy.LMDeploy.cmd', '''\
+该方法用于生成启动LMDeploy服务的命令。
+
+参数:
+    finetuned_model (str): 微调后的模型路径。
+    base_model (str): 基础模型路径，当finetuned_model无效时使用。
+
+返回值:
+    LazyLLMCMD: 一个包含启动命令的LazyLLMCMD对象。
+''')
+
+add_english_doc('deploy.LMDeploy.cmd', '''\
+This method generates the command to start the LMDeploy service.
+
+Args:
+    finetuned_model (str): Path to the fine-tuned model.
+    base_model (str): Path to the base model, used when finetuned_model is invalid.
+
+Returns:
+    LazyLLMCMD: A LazyLLMCMD object containing the startup command.
+''')
+
+add_chinese_doc('deploy.LMDeploy.geturl', '''\
+获取LMDeploy服务的URL地址。
+
+参数:
+    job (optional): 任务对象，默认为None，此时使用self.job。
+
+返回值:
+    str: 服务的URL地址，格式为"http://{ip}:{port}/v1/chat/interactive"。
+''')
+
+add_english_doc('deploy.LMDeploy.geturl', '''\
+Get the URL address of the LMDeploy service.
+
+Args:
+    job (optional): Job object, defaults to None, in which case self.job is used.
+
+Returns:
+    str: The service URL address in the format "http://{ip}:{port}/v1/chat/interactive".
+''')
+
+add_chinese_doc('deploy.LMDeploy.extract_result', '''\
+从服务响应中提取生成的文本结果。
 
+参数:
+    x (str): 服务返回的响应文本。
+    inputs (str): 输入文本。
+
+返回值:
+    str: 提取出的生成文本。
 ''')
 
-add_example('deploy.LMDeploy', '''\
->>> # Basic use:
->>> from lazyllm import deploy
->>> infer = deploy.LMDeploy()
->>>
->>> # MultiModal:
->>> import lazyllm
->>> from lazyllm import deploy, globals
->>> from lazyllm.components.formatter import encode_query_with_filepaths
->>> chat = lazyllm.TrainableModule('Mini-InternVL-Chat-2B-V1-5').deploy_method(deploy.LMDeploy)
->>> chat.update_server()
->>> inputs = encode_query_with_filepaths('What is it?', ['path/to/image'])
->>> res = chat(inputs)
+add_english_doc('deploy.LMDeploy.extract_result', '''\
+Extract generated text from the service response.
+
+Args:
+    x (str): Response text from the service.
+    inputs (str): Input text.
+
+Returns:
+    str: The extracted generated text.
 ''')
 
 # Deploy-Infinity
@@ -881,6 +1553,154 @@
 <lazyllm.llm.deploy type=Infinity>
 ''')
 
+# RelayServer class documentation
+add_chinese_doc('deploy.relay.base.RelayServer', '''\
+RelayServer类是一个用于部署FastAPI服务的基类，它可以将一个函数转换为HTTP服务。这个类支持设置前处理函数、后处理函数，
+并可以自动分配端口号。它主要用于将模型推理功能转换为HTTP服务，便于分布式部署和调用。
+
+主要参数：
+    port: 服务端口号，如果为None则随机分配30000-40000之间的端口
+    func: 要部署的主函数
+    pre_func: 请求预处理函数
+    post_func: 响应后处理函数
+    pythonpath: 额外的Python路径
+    log_path: 日志存储路径
+    cls: 服务名称
+    launcher: 启动器类型，默认为异步远程启动
+''')
+
+add_english_doc('deploy.relay.base.RelayServer', '''\
+RelayServer is a base class for deploying FastAPI services that converts a function into an HTTP service. It supports 
+setting pre-processing and post-processing functions, and can automatically allocate port numbers. It's mainly used 
+to convert model inference functionality into HTTP services for distributed deployment and invocation.
+
+Main parameters:
+    port: Service port number, randomly assigned between 30000-40000 if None
+    func: Main function to be deployed
+    pre_func: Request pre-processing function
+    post_func: Response post-processing function
+    pythonpath: Additional Python path
+    log_path: Log storage path
+    cls: Service name
+    launcher: Launcher type, defaults to asynchronous remote launch
+''')
+
+add_example('deploy.relay.base.RelayServer', '''\
+>>> from lazyllm.components.deploy.relay.base import RelayServer
+>>> def my_function(text):
+...     return f"Processed: {text}"
+>>> server = RelayServer(port=35000, func=my_function)
+>>> server.start()  # This will start the server
+>>> print(server.geturl())  # Get the service URL
+http://localhost:35000/generate
+''')
+
+# cmd method documentation
+add_chinese_doc('deploy.relay.base.RelayServer.cmd', '''\
+cmd方法用于生成启动服务器的命令。它会将当前的函数和配置转换为一个可执行的命令字符串。
+
+参数：
+    func: 可选，要部署的新函数。如果不提供，则使用初始化时的函数。
+
+返回值：
+    返回一个LazyLLMCMD对象，包含服务器启动命令和相关配置。
+''')
+
+add_english_doc('deploy.relay.base.RelayServer.cmd', '''\
+The cmd method generates the command to start the server. It converts the current function and configuration into 
+an executable command string.
+
+Args:
+    func: Optional, new function to deploy. If not provided, uses the function from initialization.
+
+Returns:
+    Returns a LazyLLMCMD object containing the server start command and related configuration.
+''')
+
+add_example('deploy.relay.base.RelayServer.cmd', '''\
+>>> server = RelayServer(port=35000)
+>>> def new_function(text):
+...     return f"New process: {text}"
+>>> cmd_obj = server.cmd(new_function)
+>>> print(cmd_obj)  # Will show the command that would be executed
+''')
+
+# geturl method documentation
+add_chinese_doc('deploy.relay.base.RelayServer.geturl', '''\
+geturl方法用于获取服务的访问URL。该URL可用于向服务发送HTTP请求。
+
+参数：
+    job: 可选，指定的任务对象。如果为None，则使用当前实例的任务。
+
+返回值：
+    返回服务的完整URL地址，格式为 http://<ip>:<port>/generate
+''')
+
+add_english_doc('deploy.relay.base.RelayServer.geturl', '''\
+The geturl method returns the access URL for the service. This URL can be used to send HTTP requests to the service.
+
+Args:
+    job: Optional, specified job object. If None, uses the current instance's job.
+
+Returns:
+    Returns the complete URL of the service in the format http://<ip>:<port>/generate
+''')
+
+add_example('deploy.relay.base.RelayServer.geturl', '''\
+>>> server = RelayServer(port=35000)
+>>> server.start()
+>>> url = server.geturl()
+>>> print(url)  # Shows the service endpoint URL
+http://localhost:35000/generate
+>>> # You can now use this URL to make HTTP requests to your service
+''')
+
+add_chinese_doc('deploy.base.DummyDeploy', '''\
+DummyDeploy(launcher=launchers.remote(sync=False), *, stream=False, **kw)
+
+一个用于测试的模拟部署类，继承自 `LazyLLMDeployBase` 和 `flows.Pipeline`，实现了一个简单的流水线风格部署服务，
+支持流式输出（可选）。
+
+该类主要用于内部测试和示例用途。它接收符合 `message_format` 格式的输入，根据是否启用 `stream` 参数，返回
+字符串或逐步输出的模拟响应。
+
+属性：
+- keys_name_handle (dict): 输入字段名的映射。
+- message_format (dict): 默认请求模板，包括输入内容与生成参数。
+
+参数：
+- launcher: 部署器实例，默认值为 `launchers.remote(sync=False)`。
+- stream (bool): 是否以流式方式输出结果。
+- kw: 其他传递给父类的关键字参数。
+
+方法：
+- __call__(*args): 启动部署并返回服务地址。
+- __repr__(): 返回流水线的字符串表示。
+''')
+
+add_english_doc('deploy.base.DummyDeploy', '''\
+DummyDeploy(launcher=launchers.remote(sync=False), *, stream=False, **kw)
+
+A mock deployment class for testing purposes. It extends both `LazyLLMDeployBase` and `flows.Pipeline`,
+simulating a simple pipeline-style deployable service with optional streaming support.
+
+This class is primarily intended for internal testing and demonstration. It receives inputs in the format defined
+by `message_format`, and returns a dummy response or a streaming response depending on the `stream` flag.
+
+Attributes:
+- keys_name_handle (dict): Mapping of input keys for request formatting.
+- message_format (dict): Default request template including input and generation parameters.
+
+Parameters:
+- launcher: Deployment launcher instance, defaulting to `launchers.remote(sync=False)`.
+- stream (bool): Whether to simulate streaming output.
+- kw: Additional keyword arguments passed to the superclass.
+
+Methods:
+- __call__(*args): Starts the deployment and returns the service URL.
+- __repr__(): Returns a string representation of the underlying pipeline.
+''')
+
 # Deploy-Auto
 add_chinese_doc('auto.AutoDeploy', '''\
 此类是 ``LazyLLMDeployBase`` 的子类，可根据输入的参数自动选择合适的推理框架和参数，以对大语言模型进行推理。
@@ -1611,6 +2431,26 @@
 {'messages': [{'role': 'system', 'content': 'You are an AI-Agent developed by LazyLLM.\\\\nins hello world\\\\n\\\\n'}, {'role': 'user', 'content': ''}]}
 ''')
 
+add_chinese_doc('prompter.PrompterBase.pre_hook', '''\
+设置预处理钩子函数，供外部在生成提示词前对输入数据进行自定义处理。
+
+Args:
+    func (Optional[Callable]): 一个可调用对象，作为预处理钩子函数，接收并处理输入数据。
+
+**Returns:**\n
+- LazyLLMPrompterBase: 返回自身实例，方便链式调用。
+''')
+
+add_english_doc('prompter.PrompterBase.pre_hook', '''\
+Sets a pre-processing hook function, allowing external custom processing of input data before prompt generation.
+
+Args:
+    func (Optional[Callable]): A callable object to be used as the pre-processing hook function, which receives and processes input data.
+
+**Returns:**\n
+- LazyLLMPrompterBase: Returns the instance itself to support method chaining.
+''')
+
 add_chinese_doc('prompter.PrompterBase.generate_prompt', '''\
 根据用户输入，生成对应的Prompt.
 
@@ -2100,6 +2940,122 @@
 ... xxxxxxxxxxxxxxxx
 '''])
 
+add_english_doc('deploy.speech_to_text.sense_voice.SenseVoice', '''\
+SenseVoice(base_path, source=None, init=False)
+
+A speech-to-text wrapper using FunASR models for lazy initialization and audio transcription.
+This class supports automatic model downloading, safe initialization, and inference from audio paths or URLs.
+
+Parameters:
+- base_path (str): Path or model identifier to download the STT model.
+- source (str, optional): Model source name; defaults to `lazyllm.config['model_source']`.
+- init (bool): Whether to initialize the model immediately on creation.
+
+Attributes:
+- base_path (str): Final resolved path of the model after download.
+- model: Loaded FunASR model instance.
+- init_flag: A lazy flag used to ensure model is only loaded once.
+
+Methods:
+- __call__(string: str | dict) -> str:
+    Transcribes the input audio file or URL to text. Accepts base64-encoded content, file paths, or URLs.
+- load_stt():
+    Loads the FunASR speech-to-text model and related VAD (Voice Activity Detection).
+- rebuild(base_path, init):
+    Rebuilds the class instance (used for serialization).
+- __reduce__():
+    Supports pickling by ensuring proper lazy-loading on deserialization.
+''')
+
+add_chinese_doc('deploy.speech_to_text.sense_voice.SenseVoice', '''\
+SenseVoice(base_path, source=None, init=False)
+
+使用 FunASR 模型进行语音转文本的包装类，支持懒加载与自动模型下载。
+支持从音频路径、URL 或 base64 编码音频进行转写，适用于延迟初始化和高效部署。
+
+参数：
+- base_path (str): 用于下载语音识别模型的路径或模型标识。
+- source (str, 可选): 模型来源，默认使用 `lazyllm.config['model_source']`。
+- init (bool): 是否在初始化时立即加载模型。
+
+属性：
+- base_path (str): 下载后模型的实际路径。
+- model: 加载的 FunASR 模型对象。
+- init_flag: 用于懒加载的初始化标志，保证模型只加载一次。
+
+方法：
+- __call__(string: str | dict) -> str:
+    将输入的音频文件或 URL 转换为文本。支持 base64 编码、文件路径或 URL 输入。
+- load_stt():
+    加载 FunASR 的语音识别模型和语音活动检测（VAD）模型。
+- rebuild(base_path, init):
+    用于重新构造类实例（常用于序列化）。
+- __reduce__():
+    实现 pickling 支持，确保在反序列化时正确懒加载。
+''')
+
+add_english_doc('deploy.speech_to_text.sense_voice.SenseVoice.load_stt', '''\
+load_stt()
+
+Loads the speech-to-text model using FunASR with optional support for Huawei NPU via `torch_npu`.
+
+The method initializes the model with the following characteristics:
+- Uses `fsmn-vad` for voice activity detection with long utterance support.
+- Sets maximum single segment time to 30 seconds.
+- Selects `cuda:0` as the default inference device.
+
+The model is stored in `self.model` and will be used to transcribe audio input.
+
+Note:
+If `torch_npu` is available in the environment, the function attempts to load it for potential Huawei Ascend acceleration.
+''')
+
+add_chinese_doc('deploy.speech_to_text.sense_voice.SenseVoice.load_stt', '''\
+load_stt()
+
+使用 FunASR 加载语音转文本模型，支持华为 NPU（如存在 `torch_npu`）。
+
+此方法将初始化模型，包含以下设置：
+- 使用 `fsmn-vad` 进行语音活动检测（VAD），支持最长 30 秒的单段语音。
+- 设置推理设备为 `cuda:0`（默认使用 GPU）。
+- 将模型实例保存在 `self.model` 中，用于后续音频转写。
+
+注意：
+如果当前环境中存在 `torch_npu`，函数将自动导入以支持华为昇腾设备加速。
+''')
+
+add_english_doc('deploy.speech_to_text.sense_voice.SenseVoice.rebuild', '''\
+rebuild(base_path: str, init: bool) -> SenseVoice
+
+Class method used to reconstruct a `SenseVoice` instance during deserialization (e.g., when using `cloudpickle`).
+
+Parameters:
+- base_path (str): Path to the speech-to-text model.
+- init (bool): Whether to immediately initialize and load the model upon creation.
+
+Returns:
+- A new instance of `SenseVoice` with the specified configuration.
+
+Note:
+This method is internally used to support model serialization and multiprocessing compatibility.
+''')
+
+add_chinese_doc('deploy.speech_to_text.sense_voice.SenseVoice.rebuild', '''\
+rebuild(base_path: str, init: bool) -> SenseVoice
+
+该类方法用于反序列化（如 `cloudpickle`）过程中重新构建 `SenseVoice` 实例。
+
+参数：
+- base_path (str)：语音识别模型的路径。
+- init (bool)：是否在实例化时立即加载模型。
+
+返回：
+- 一个新的 `SenseVoice` 实例。
+
+说明：
+该方法主要用于支持对象的序列化与多进程环境下的兼容重建操作。
+''')
+
 add_english_doc('TTSDeploy', '''\
 TTSDeploy is a factory class for creating instances of different Text-to-Speech (TTS) deployment types based on the specified name.
 
@@ -2362,6 +3318,81 @@
 >>> launcher = lazyllm.launchers.slurm(partition='partition_name', nnode=1, nproc=1, ngpus=1, sync=False)
 ''')
 
+# SlurmLauncher methods
+add_chinese_doc('SlurmLauncher.makejob', '''\
+创建并返回一个 SlurmLauncher.Job 对象。
+
+Args:
+    cmd: 要执行的命令字符串。
+
+Returns:
+    SlurmLauncher.Job: 配置好的 Slurm 作业对象。
+''')
+
+add_english_doc('SlurmLauncher.makejob', '''\
+Creates and returns a SlurmLauncher.Job object.
+
+Args:
+    cmd: The command string to execute.
+
+Returns:
+    SlurmLauncher.Job: A configured Slurm job object.
+''')
+
+add_chinese_doc('SlurmLauncher.get_idle_nodes', '''\
+获取指定分区中当前可用的节点数量，基于可用 GPU 数量。
+
+该方法通过查询 Slurm 队列状态和节点信息，计算每个节点的可用 GPU 数量，并返回一个字典，其中键为节点 IP，值为可用 GPU 数量。
+
+Args:
+    partion (str, optional): 要查询的分区名称。默认为 ``None``，此时使用当前启动器的分区。
+
+Returns:
+    dict: 以节点 IP 为键、可用 GPU 数量为值的字典。
+''')
+
+add_english_doc('SlurmLauncher.get_idle_nodes', '''\
+Obtains the current number of available nodes in the specified partition based on the available number of GPUs.
+
+This method queries the Slurm queue status and node information to calculate the number of available GPUs for each node, and returns a dictionary with node IP as the key and the number of available GPUs as the value.
+
+Args:
+    partion (str, optional): The partition name to query. Defaults to ``None``, in which case the current launcher's partition will be used.
+
+Returns:
+    dict: A dictionary with node IP as the key and the number of available GPUs as the value.
+''')
+
+add_chinese_doc('SlurmLauncher.launch', '''\
+启动 Slurm 作业并管理其执行。
+
+该方法启动指定的 Slurm 作业，并根据同步设置决定是否等待作业完成。如果设置为同步执行，会持续监控作业状态直到完成，然后停止作业。
+
+Args:
+    job: 要启动的 SlurmLauncher.Job 对象。
+
+Returns:
+    作业的返回值。
+
+Raises:
+    AssertionError: 如果传入的 job 不是 SlurmLauncher.Job 类型。
+''')
+
+add_english_doc('SlurmLauncher.launch', '''\
+Launches a Slurm job and manages its execution.
+
+This method starts the specified Slurm job and decides whether to wait for job completion based on the sync setting. If set to synchronous execution, it continuously monitors the job status until completion, then stops the job.
+
+Args:
+    job: The SlurmLauncher.Job object to launch.
+
+Returns:
+    The return value of the job.
+
+Raises:
+    AssertionError: If the provided job is not a SlurmLauncher.Job type.
+''')
+
 # Launcher-ScoLauncher
 add_chinese_doc('ScoLauncher', '''\
 此类是 ``LazyLLMLaunchersBase`` 的子类，作为SCO (Sensecore)启动器。
@@ -2434,3 +3465,246 @@
 >>> import lazyllm
 >>> launcher = lazyllm.launchers.remote(ngpus=1)
 ''')
+
+# core.py
+add_chinese_doc('lazyllm.components.core.ComponentBase', '''\
+组件基类，提供统一的接口与基础实现，便于创建不同类型的组件。  
+组件通过指定的 Launcher 来执行任务，支持自定义任务执行逻辑。
+
+Args:
+    launcher (LazyLLMLaunchersBase or type, optional): 组件使用的启动器实例或启动器类，默认为空启动器（empty）。
+''')
+
+add_english_doc('lazyllm.components.core.ComponentBase', '''\
+Base class for components, providing a unified interface and basic implementation to facilitate creation of various components.  
+Components execute tasks via a specified launcher and support custom task execution logic.
+
+Args:
+    launcher (LazyLLMLaunchersBase or type, optional): Launcher instance or launcher class used by the component, defaults to empty launcher.
+''')
+
+add_example('lazyllm.components.core.ComponentBase', '''\
+>>> from lazyllm.components.core import ComponentBase
+>>> class MyComponent(ComponentBase):
+...     def apply(self, x):
+...         return x * 2
+>>> comp = MyComponent()
+>>> comp.name = "ExampleComponent"
+>>> print(comp.name)
+ExampleComponent
+>>> result = comp(10)
+>>> print(result)
+20
+>>> print(comp.apply(5))
+10
+''')
+
+add_chinese_doc('lazyllm.components.core.ComponentBase.apply', '''\
+组件执行的核心方法，需由子类实现。  
+定义组件的具体业务逻辑或任务执行步骤。  
+
+**注意:**  
+调用组件时，如果子类重写了此方法，则会调用此方法执行任务。  
+''')
+
+add_english_doc('lazyllm.components.core.ComponentBase.apply', '''\
+Core execution method of the component, to be implemented by subclasses.  
+Defines the specific business logic or task execution steps of the component.
+
+**Note:**  
+If this method is overridden by the subclass, it will be called when the component is invoked.
+''')
+
+add_chinese_doc('lazyllm.components.core.ComponentBase.cmd', '''\
+生成组件的执行命令，需由子类实现。  
+返回的命令可以是字符串、元组或列表，表示具体执行任务的指令。  
+
+**注意:**  
+调用组件时，如果未重写 `apply` 方法，将通过此命令生成任务并由启动器执行。  
+''')
+
+add_english_doc('lazyllm.components.core.ComponentBase.cmd', '''\
+Generates the execution command of the component, to be implemented by subclasses.  
+The returned command can be a string, tuple, or list, representing the instruction to execute the task.
+
+**Note:**  
+If the `apply` method is not overridden, this command will be used to create a job for the launcher to run.
+''')
+
+add_chinese_doc('Job', '''\
+通用任务调度执行类。
+该类用于封装一个通过启动器（launcher）调度执行的任务，支持命令包装、同步控制、返回值提取、命令固定等功能。
+Args:
+    cmd (LazyLLMCMD): 要执行的命令对象。
+    launcher (Any): 启动器实例，用于实际任务调度执行。
+    sync (bool): 是否为同步执行，默认为 True。
+''')
+
+add_english_doc('Job', '''\
+Generic task scheduling executor.
+This class wraps a task that is launched via a launcher, with features like command fixing, output handling, sync control, and return value capturing.
+Args:
+    cmd (LazyLLMCMD): The command object to be executed.
+    launcher (Any): Launcher instance responsible for task dispatching.
+    sync (bool): Whether the task should run synchronously. Defaults to True.
+''')
+
+add_chinese_doc('Job.get_executable_cmd', '''\
+生成最终可执行命令。
+如果已缓存固定命令（fixed），则直接返回。否则根据原始命令进行包裹（wrap）并缓存为 `_fixed_cmd`。
+Args:
+    fixed (bool): 是否使用已固定的命令对象（若已存在）。
+Returns:
+    LazyLLMCMD: 可直接执行的命令对象。
+''')
+
+add_english_doc('Job.get_executable_cmd', '''\
+Generate the final executable command.
+If a fixed command already exists, return it. Otherwise, wrap the original command and cache it as `_fixed_cmd`.
+Args:
+    fixed (bool): Whether to use the cached fixed command.
+Returns:
+    LazyLLMCMD: The executable command object.
+''')
+
+add_chinese_doc('Job.start', '''\
+对外接口：启动作业，并支持失败时的自动重试。
+若作业执行失败，会根据 `restart` 参数控制重试次数。
+Args:
+    restart (int): 重试次数。默认为 3。
+    fixed (bool): 是否使用固定后的命令。用于避免多次构建。
+''')
+
+add_english_doc('Job.start', '''\
+Public interface to start the job with optional retry on failure.
+If the job fails, retries execution based on the `restart` parameter.
+Args:
+    restart (int): Number of times to retry upon failure. Default is 3.
+    fixed (bool): Whether to use the fixed version of the command.
+''')
+
+add_chinese_doc('Job.restart', '''\
+重新启动作业流程。
+该函数会先停止已有进程，等待 2 秒后重新启动作业。
+Args:
+    fixed (bool): 是否使用固定后的命令。
+''')
+
+add_english_doc('Job.restart', '''\
+Restart the job by first stopping it and then restarting after a short delay.
+Args:
+    fixed (bool): Whether to reuse the fixed command object.
+''')
+
+add_chinese_doc('Job.wait', '''\
+挂起当前线程，等待作业执行完成。当前实现为空方法（子类可重写）。
+''')
+
+add_english_doc('Job.wait', '''\
+Suspend the current thread until the job finishes.
+Empty implementation by default; can be overridden in subclasses.
+''')
+
+add_chinese_doc('Job.stop', '''\
+停止当前作业。
+该方法为接口定义，需子类实现，当前抛出 NotImplementedError。
+''')
+
+add_english_doc('Job.stop', '''\
+Stop the current job.
+This method is an interface placeholder and must be implemented by subclasses.
+''')
+
+add_chinese_doc('Job.status', '''\
+当前作业状态。
+该属性为接口定义，需子类实现，当前抛出 NotImplementedError。
+''')
+
+add_english_doc('Job.status', '''\
+Current job status.
+This property is abstract and must be implemented by subclasses.
+''')
+
+add_chinese_doc('K8sLauncher', '''\
+K8sLauncher是一个基于Kubernetes的部署启动器，用于在Kubernetes集群中部署和管理服务。
+
+参数:
+    kube_config_path (str): Kubernetes配置文件路径。
+    resource_config_path (str): 资源配置文件路径。
+    image (str): 容器镜像。
+    volume_configs (list): 卷配置列表。
+    svc_type (str): 服务类型，默认为"LoadBalancer"。
+    namespace (str): Kubernetes命名空间，默认为"default"。
+    gateway_name (str): 网关名称，默认为"lazyllm-gateway"。
+    gateway_class_name (str): 网关类名称，默认为"istio"。
+    host (str): HTTP主机名，默认为None。
+    path (str): HTTP路径，默认为'/generate'。
+    gateway_retry (int): 网关重试次数。
+''')
+
+add_english_doc('K8sLauncher', '''\
+K8sLauncher is a Kubernetes-based deployment launcher for deploying and managing services in a Kubernetes cluster.
+
+Args:
+    kube_config_path (str): Path to the Kubernetes configuration file.
+    resource_config_path (str): Path to the resource configuration file.
+    image (str): Container image.
+    volume_configs (list): List of volume configurations.
+    svc_type (str): Service type, defaults to "LoadBalancer".
+    namespace (str): Kubernetes namespace, defaults to "default".
+    gateway_name (str): Gateway name, defaults to "lazyllm-gateway".
+    gateway_class_name (str): Gateway class name, defaults to "istio".
+    host (str): HTTP hostname, defaults to None.
+    path (str): HTTP path, defaults to '/generate'.
+    gateway_retry (int): Number of gateway retries.
+''')
+
+add_chinese_doc('K8sLauncher.makejob', '''\
+创建一个Kubernetes作业实例。
+
+参数:
+    cmd (str): 要执行的命令。
+
+返回值:
+    K8sLauncher.Job: 一个新的Kubernetes作业实例。
+''')
+
+add_english_doc('K8sLauncher.makejob', '''\
+Create a Kubernetes job instance.
+
+Args:
+    cmd (str): The command to execute.
+
+Returns:
+    K8sLauncher.Job: A new Kubernetes job instance.
+''')
+
+add_chinese_doc('K8sLauncher.launch', '''\
+启动一个Kubernetes作业或可调用对象。
+
+参数:
+    f (K8sLauncher.Job): 要启动的Kubernetes作业实例。
+    *args: 位置参数。
+    **kw: 关键字参数。
+
+返回值:
+    Any: 作业的返回值。
+
+异常:
+    RuntimeError: 当提供的不是Deployment对象时抛出。
+''')
+
+add_english_doc('K8sLauncher.launch', '''\
+Launch a Kubernetes job or callable object.
+
+Args:
+    f (K8sLauncher.Job): The Kubernetes job instance to launch.
+    *args: Positional arguments.
+    **kw: Keyword arguments.
+
+Returns:
+    Any: The return value of the job.
+
+Raises:
+    RuntimeError: When the provided object is not a Deployment object.
+''')
diff --git a/lazyllm/docs/configs.py b/lazyllm/docs/configs.py
index 013980ece..a15f44e71 100644
--- a/lazyllm/docs/configs.py
+++ b/lazyllm/docs/configs.py
@@ -105,3 +105,51 @@
 >>> config.get_all_configs()
 {'home': '~/.lazyllm/', 'mode': <Mode.Normal: (1,)>, 'repr_ml': False, 'rag_store': 'None', 'redis_url': 'None', ...}
 ''')
+
+add_chinese_doc('Config.get_config', r'''\ 
+将传入的配置字典原样返回。该方法可作为处理 config.json 解析结果的扩展点，用户可根据需要重写该方法实现自定义转换逻辑。
+
+Args:
+    cfg (dict): 从配置文件中读取的配置字典。
+''')
+
+add_english_doc('Config.get_config', '''
+Return the provided configuration dictionary as-is.  
+This method can be overridden to customize the transformation logic for the parsed config.json content.
+
+Args:
+    cfg (dict): The configuration dictionary read from the config file.
+''')
+
+add_chinese_doc('Config.temp', '''
+在上下文管理器作用域内临时修改某个配置项的值，退出上下文时恢复原值。
+
+Args:
+    name (str): 要临时修改的配置项名称。
+    value (Any): 临时设置的值。
+''')
+
+add_english_doc('Config.temp', '''
+Temporarily override a configuration item within a context manager scope, restoring the original value upon exit.
+
+Args:
+    name (str): The name of the configuration item to temporarily change.
+    value (Any): The temporary value to set.
+''')
+
+add_chinese_doc('Config.refresh', '''
+根据环境变量的最新值刷新配置项。如果传入 targets 为字符串，则按单个配置项更新；如果为列表，则批量更新；如果为 None，则扫描所有已映射到环境变量的配置项并更新。
+
+Args:
+    targets (str | list[str] | None): 要刷新的配置项名称或列表，传 None 表示刷新所有可从环境变量读取的项。
+''')
+
+add_english_doc('Config.refresh', '''
+Refresh configuration items based on the latest environment variable values.  
+If `targets` is a string, updates the single corresponding configuration item;  
+if it's a list, updates multiple;  
+if None, scans all environment-variable-mapped configuration items and updates them.
+
+Args:
+    targets (str | list[str] | None): Name of the config key or list of keys to refresh, or None to refresh all environment-backed keys.
+''')
\ No newline at end of file
diff --git a/lazyllm/docs/flow.py b/lazyllm/docs/flow.py
index 594ec9d1e..dd5513e65 100644
--- a/lazyllm/docs/flow.py
+++ b/lazyllm/docs/flow.py
@@ -16,6 +16,7 @@
     item_names (list of str, optional): 对应于项目的名称列表。这允许通过名称访问项目。如果未提供，则只能通过索引访问项目。
     auto_capture (bool, optional): 如果为 True，在上下文管理器模式下将自动捕获当前作用域中新定义的变量加入流中。默认为 ``False``。
 
+    auto_capture: 如果为 True，在上下文管理器模式下将自动捕获当前作用域中新定义的变量加入流中。默认为 False。
 """)
 
 add_english_doc('FlowBase', """\
@@ -26,8 +27,27 @@
 Args:
     items (iterable): An iterable of items to be included in the flow. These can be instances of ``FlowBase`` or other objects.
     item_names (list of str, optional): A list of names corresponding to the items. This allows items to be accessed by name. If not provided, items can only be accessed by index.
-    auto_capture (bool, optional): If True, variables newly defined within the ``with`` block will be automatically added to the flow. Defaults to ``False``.
+    auto_capture: If True, variables newly defined within the ``with`` block will be automatically added to the flow. Defaults to ``False``.
+""")
+
+add_chinese_doc('FlowBase.id', """\
+获取模块或流程的 ID。如果传入字符串则原样返回；如果传入已绑定的模块则返回其对应的 item_id；不传参时返回整个 flow 的唯一 id。
 
+Args:
+    module (Optional[Union[str, Any]]): 目标模块或字符串标识。
+
+**Returns:**\n
+- str: 对应的 ID 字符串。
+""")
+
+add_english_doc('FlowBase.id', """\
+Get the identifier for a module or the flow itself. If a string is provided, it is returned as-is. If a bound module is provided, returns its associated item_id. If no argument is given, returns the unique id of the entire flow.
+
+Args:
+    module (Optional[Union[str, Any]]): Target module or string identifier.
+
+**Returns:**\n
+- str: Corresponding identifier string.
 """)
 
 add_chinese_doc('FlowBase.is_root', """\
@@ -134,7 +154,6 @@
                    ↑             ↓
                pre_hook       post_hook
 ```
-                
 Args:
     args: 可变长度参数列表。
     post_action: 在主流程结束后对输出进行进一步处理的可调用对象。默认为 ``None``。
@@ -152,7 +171,6 @@
                    ↑             ↓
                pre_hook       post_hook
 ```
-                
 Args:
     args: A sequence of callables representing the flow modules.
     post_action: An optional callable applied to the output after main flow execution. Defaults to ``None``。
@@ -160,6 +178,120 @@
 
 """)
 
+add_chinese_doc('LazyLLMFlowsBase.register_hook', '''\
+注册一个 Hook 类型，用于在流程执行前后进行额外处理。
+
+Args:
+    hook_type (LazyLLMHook): 要注册的 Hook 类型或实例。
+''')
+
+add_english_doc('LazyLLMFlowsBase.register_hook', '''\
+Register a hook type for additional processing before and after the flow execution.
+
+Args:
+    hook_type (LazyLLMHook): The hook type or instance to register.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.unregister_hook', '''\
+注销已注册的 Hook。
+
+Args:
+    hook_type (LazyLLMHook): 要移除的 Hook 类型或实例。
+''')
+
+add_english_doc('LazyLLMFlowsBase.unregister_hook', '''\
+Unregister a previously registered hook.
+
+Args:
+    hook_type (LazyLLMHook): The hook type or instance to remove.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.clear_hooks', '''\
+清空所有已注册的 Hook。
+''')
+
+add_english_doc('LazyLLMFlowsBase.clear_hooks', '''\
+Clear all registered hooks.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.set_sync', '''\
+设置流程是否同步执行。
+
+Args:
+    sync (bool): 是否同步执行，默认为 True。
+
+**Returns**\n
+- LazyLLMFlowsBase: 当前实例。
+''')
+
+add_english_doc('LazyLLMFlowsBase.set_sync', '''\
+Set whether the flow executes synchronously.
+
+Args:
+    sync (bool): Whether to execute synchronously. Default is True.
+
+**Returns**\n
+- LazyLLMFlowsBase: The current instance.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.wait', '''\
+等待流程中所有异步任务完成。
+
+**Returns**\n
+- LazyLLMFlowsBase: 当前实例。
+''')
+
+add_english_doc('LazyLLMFlowsBase.wait', '''\
+Wait for all asynchronous tasks in the flow to complete.
+
+**Returns**\n
+- LazyLLMFlowsBase: The current instance.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.invoke', '''\
+调用指定对象（可为函数、模块或 bind 对象）并传入输入数据。  
+支持对 bind 对象进行 root/pipeline 输出替换。
+
+Args:
+    it (Callable | bind): 要调用的对象。
+    __input (Any): 输入数据。
+    bind_args_source (Any, optional): 绑定参数来源。
+    **kw: 其他关键字参数。
+''')
+
+add_english_doc('LazyLLMFlowsBase.invoke', '''\
+Invoke a target (function, module, or bind object) with the given input.  
+Supports root/pipeline output replacement for bind objects.
+
+Args:
+    it (Callable | bind): The target to invoke.
+    __input (Any): Input data.
+    bind_args_source (Any, optional): Source of bind arguments.
+    **kw: Additional keyword arguments.
+''')
+
+add_chinese_doc('LazyLLMFlowsBase.bind', '''\
+为当前流程绑定参数，生成一个 bind 对象。
+
+Args:
+    *args: 位置参数。
+    **kw: 关键字参数。
+
+**Returns**\n
+- bind: 绑定后的 bind 对象。
+''')
+
+add_english_doc('LazyLLMFlowsBase.bind', '''\
+Bind arguments to the current flow, producing a bind object.
+
+Args:
+    *args: Positional arguments.
+    **kw: Keyword arguments.
+
+**Returns**\n
+- bind: The bound bind object.
+''')
+
 add_chinese_doc('Parallel', """\
 用于管理LazyLLMFlows中的并行流的类。
 
@@ -181,11 +313,11 @@
 ```
 
 Args:
+    args: 基类的可变长度参数列表。
     _scatter (bool, optional): 如果为 ``True``，输入将在项目之间分割。如果为 ``False``，相同的输入将传递给所有项目。默认为 ``False``。
     _concurrent (bool, optional): 如果为 ``True``，操作将使用线程并发执行。如果为 ``False``，操作将顺序执行。默认为 ``True``。
     multiprocessing (bool, optional): 如果为 ``True``，将使用多进程而不是多线程进行并行执行。这可以提供真正的并行性，但会增加进程间通信的开销。默认为 ``False``。
     auto_capture (bool, optional): 如果为 True，在上下文管理器模式下将自动捕获当前作用域中新定义的变量加入流中。默认为 ``False``。
-    args: 基类的可变长度参数列表。
     kwargs: 基类的任意关键字参数。
 
 <span style="font-size: 20px;">&ensp;**`asdict property`**</span>
@@ -230,11 +362,11 @@
 ```
 
 Args:
+    args: Variable length argument list for the base class.
     _scatter (bool, optional): If ``True``, the input is split across the items. If ``False``, the same input is passed to all items. Defaults to ``False``.
-    _concurrent (bool, optional): If ``True``, operations will be executed concurrently using threading. If ``False``, operations will be executed sequentially. Defaults to ``True``.
+    _concurrent (Union[bool, int], optional): If ``True``, operations will be executed concurrently using threading. If an integer, specifies the maximum number of concurrent executions. If ``False``, operations will be executed sequentially. Defaults to ``True``.
     multiprocessing (bool, optional): If ``True``, multiprocessing will be used instead of multithreading for parallel execution. This can provide true parallelism but adds overhead for inter-process communication. Defaults to ``False``.
     auto_capture (bool, optional): If True, variables newly defined within the ``with`` block will be automatically added to the flow. Defaults to ``False``.
-    args: Variable length argument list for the base class.
     kwargs: Arbitrary keyword arguments for the base class.
 
 `asdict property`
@@ -284,6 +416,85 @@
 ''',
 )
 
+add_chinese_doc('Parallel.sequential', """\
+创建一个顺序执行的Parallel实例。
+
+这个类方法会将 ``_concurrent`` 设置为 ``False``，使得所有操作按顺序执行而不是并行执行。
+
+可以这样可视化 ``Parallel.sequential`` 方法：
+
+```text
+# input -> module21 -> ... -> module2N -> out2 -> 
+```
+
+Args:
+    args: 传递给 Parallel 构造函数的可变长度参数列表。
+    kwargs: 传递给 Parallel 构造函数的关键字参数。
+
+**Returns:**\n
+- Parallel: 一个新的顺序执行的 Parallel 实例。
+""")
+
+add_english_doc('Parallel.sequential', """\
+Creates a Parallel instance that executes sequentially.
+
+This class method sets ``_concurrent`` to ``False``, causing all operations to be executed in sequence rather than in parallel.
+
+The ``Parallel.sequential`` method can be visualized as follows:
+
+```text
+# input -> module21 -> ... -> module2N -> out2 -> 
+```
+
+Args:
+    args: Variable length argument list passed to the Parallel constructor.
+    kwargs: Keyword arguments passed to the Parallel constructor.
+    _scatter (bool, optional): If ``True``, the input is split across the items. If ``False``, the same input is passed to all items. Defaults to ``False``.
+    _concurrent (bool, optional): If ``True``, operations will be executed concurrently using threading. If ``False``, operations will be executed sequentially. Defaults to ``True``.
+    multiprocessing (bool, optional): If ``True``, multiprocessing will be used instead of multithreading for parallel execution. This can provide true parallelism but adds overhead for inter-process communication. Defaults to ``False``.
+    auto_capture (bool, optional): If True, variables newly defined within the ``with`` block will be automatically added to the flow. Defaults to ``False``.
+    args: Variable length argument list for the base class.
+    kwargs: Arbitrary keyword arguments for the base class.
+
+**Returns:**\n
+- Parallel: A new Parallel instance configured for sequential execution.
+""")
+
+add_chinese_doc('Parallel.join', """\
+标记Parallel，使得每次调用时的返回值通过指定字符串连接。
+
+Args:
+    string (str): 用于连接结果的字符串。默认为空字符串。
+
+**Returns:**\n
+- Parallel: 返回当前 Parallel 实例，其结果将被字符串连接。
+
+**示例:**\n
+```python
+>>> ppl = lazyllm.parallel(a=test1, b=test2, c=test3).join('\\n')
+>>> ppl(1)
+'2\\n4\\n0.5'
+```
+""")
+
+add_english_doc('Parallel.join', """\
+Marks the Parallel instance to join its results with the specified string on each call.
+
+Args:
+    string (str): The string to use for joining results. Defaults to an empty string.
+
+**Returns:**\n
+- Parallel: Returns the current Parallel instance configured to join results with the specified string.
+
+**Example:**\n
+```python
+>>> ppl = lazyllm.parallel(a=test1, b=test2, c=test3).join('\\n')
+>>> ppl(1)
+'2\\n4\\n0.5'
+```
+""")
+
+
 add_chinese_doc('Pipeline', """\
 一个形成处理阶段管道的顺序执行模型。
 
@@ -327,6 +538,28 @@
 <Function type=lambda>
 """)
 
+add_chinese_doc('Pipeline.output', '''\
+获取流水线中指定模块的输出结果。
+
+参数:
+    module: 要获取输出的模块。可以是模块对象或模块名称。
+    unpack (bool): 是否解包输出结果。默认为False。
+
+返回值:
+    bind.Args: 一个绑定参数对象，用于在流水线中传递数据。
+''')
+
+add_english_doc('Pipeline.output', '''\
+Get the output result of a specified module in the pipeline.
+
+Args:
+    module: The module to get output from. Can be a module object or module name.
+    unpack (bool): Whether to unpack the output result. Defaults to False.
+
+Returns:
+    bind.Args: A bound argument object for data passing in the pipeline.
+''')
+
 add_chinese_doc('save_pipeline_result', """\
 一个上下文管理器，用于临时设置是否保存流水线中的中间执行结果。
 
@@ -472,9 +705,9 @@
 
 Args:
     args: 可变长度参数列表，交替提供条件和对应的流或函数。条件可以是返回布尔值的可调用对象或与输入表达式进行比较的值。
+    conversion (callable, optional): 在进行条件匹配之前，对判定表达式 ``exp`` 进行转换或预处理的函数。默认为 ``None``。
     post_action (callable, optional): 在执行选定流后要调用的函数。默认为 ``None``。
     judge_on_full_input(bool): 如果设置为 ``True`` ， 则通过 ``switch`` 的输入进行条件判断，否则会将输入拆成判定条件和真实的输入两部分，仅对判定条件进行判断。
-    kwargs: 代表命名条件和对应流或函数的任意关键字参数。
 
 抛出:
     TypeError: 如果提供的参数数量为奇数，或者如果第一个参数不是字典且条件没有成对提供。
@@ -494,9 +727,9 @@
 
 Args:
     args: A variable length argument list, alternating between conditions and corresponding flows or functions. Conditions are either callables returning a boolean or values to be compared with the input expression.
+    conversion (callable, optional): A function used to transform or preprocess the evaluation expression ``exp`` before performing condition matching. Defaults to ``None``.
     post_action (callable, optional): A function to be called on the output after the selected flow is executed. Defaults to ``None``.
     judge_on_full_input(bool): If set to ``True``, the conditional judgment will be performed through the input of ``switch``, otherwise the input will be split into two parts: the judgment condition and the actual input, and only the judgment condition will be judged.
-    kwargs: Arbitrary keyword arguments representing named conditions and corresponding flows or functions.
 
 Raises:
     TypeError: If an odd number of arguments are provided, or if the first argument is not a dictionary and the conditions are not provided in pairs.
@@ -654,6 +887,11 @@
 >>> warp = lazyllm.warp(lazyllm.pipeline(lambda x: x * 2, lambda x: f'get {x}'))
 >>> warp(1, 2, 3, 4)
 ('get 2', 'get 4', 'get 6', 'get 8')
+
+>>> from lazyllm import package
+>>> warp1 = lazyllm.warp(lambda x, y: x * 2 + y)
+>>> print(warp1([package(1,2), package(10, 20)]))
+(4, 40)
 """)
 
 add_chinese_doc('Graph', """\
diff --git a/lazyllm/docs/hook.py b/lazyllm/docs/hook.py
new file mode 100644
index 000000000..497cf78d5
--- /dev/null
+++ b/lazyllm/docs/hook.py
@@ -0,0 +1,83 @@
+# flake8: noqa E501
+from . import utils
+import functools
+import lazyllm
+
+# ============= Hook
+
+add_chinese_doc = functools.partial(utils.add_chinese_doc, module=lazyllm.hook)
+add_english_doc = functools.partial(utils.add_english_doc, module=lazyllm.hook)
+add_example = functools.partial(utils.add_example, module=lazyllm.hook)
+
+# LazyLLMHook
+add_chinese_doc('LazyLLMHook', '''\
+LazyLLM 提供的钩子系统抽象基类，用于在函数或方法执行前后插入自定义逻辑。
+
+此类是一个抽象基类（ABC），定义了钩子系统的基本接口。通过继承此类并实现其抽象方法，可以创建自定义的钩子来监控、记录或修改函数执行过程。
+
+Args:
+    obj: 要监控的对象（通常是函数或方法）。此对象会被存储在钩子实例中，供其他方法使用。
+                
+**注意**: 此类是抽象基类，不能直接实例化。必须继承此类并实现所有抽象方法才能使用。
+''')
+
+add_english_doc('LazyLLMHook', '''\
+Abstract base class for LazyLLM's hook system, used to insert custom logic before and after function or method execution.
+
+This class is an abstract base class (ABC) that defines the basic interface for the hook system. By inheriting from this class and implementing its abstract methods, you can create custom hooks to monitor, log, or modify function execution processes.
+
+Args:
+    obj: The object to monitor (usually a function or method). This object will be stored in the hook instance for use by other methods.
+
+**Note**: This class is an abstract base class and cannot be instantiated directly. You must inherit from this class and implement all abstract methods to use it.
+''')
+
+add_chinese_doc('LazyLLMHook.pre_hook', '''\
+前置钩子方法，在函数执行前调用。
+
+Args:
+    *args: 传递给被监控函数的参数
+    **kwargs: 传递给被监控函数的关键字参数
+''')
+
+add_english_doc('LazyLLMHook.pre_hook', '''\
+Pre-hook method, called before function execution.
+
+Args:
+    *args: Arguments passed to the monitored function
+    **kwargs: Keyword arguments passed to the monitored function
+''')
+
+add_chinese_doc('LazyLLMHook.post_hook', '''\
+后置钩子方法，在函数执行后调用。
+
+Args:
+    output: 被监控函数的返回值
+
+**Returns:**\n
+- 处理后的输出值。通常返回原始输出，但也可以修改或包装输出。
+''')
+
+add_english_doc('LazyLLMHook.post_hook', '''\
+Post-hook method, called after function execution.
+
+Args:
+    output: The return value of the monitored function
+
+**Returns:**\n
+- The processed output value. Usually returns the original output, but can also modify or wrap the output.
+''')
+
+add_chinese_doc('LazyLLMHook.report', '''\
+生成钩子执行报告。
+
+**Returns:**\n
+- 钩子执行的相关信息或统计数据。
+''')
+
+add_english_doc('LazyLLMHook.report', '''\
+Generate a report of hook execution.
+
+**Returns:**\n
+- Relevant information or statistics about hook execution.
+''')
diff --git a/lazyllm/docs/launcher.py b/lazyllm/docs/launcher.py
new file mode 100644
index 000000000..4a8f80b84
--- /dev/null
+++ b/lazyllm/docs/launcher.py
@@ -0,0 +1,116 @@
+# flake8: noqa E501
+from . import utils
+import functools
+import lazyllm
+
+
+add_chinese_doc = functools.partial(utils.add_chinese_doc, module=lazyllm)
+add_english_doc = functools.partial(utils.add_english_doc, module=lazyllm)
+add_example = functools.partial(utils.add_example, module=lazyllm)
+
+
+add_chinese_doc('LazyLLMLaunchersBase', '''\
+用于统一管理外部进程或分布式作业（训练/推理等）生命周期的启动器抽象基类。不同平台（本地、SLURM、K8s、云资源等）的具体启动器应继承该类并实现核心接口。
+
+Args:
+    None.
+''')
+
+add_english_doc('LazyLLMLaunchersBase', '''\
+An abstract base class that standardizes the lifecycle management of external processes or distributed jobs 
+(training/inference, etc.). Concrete launchers for different backends (local, SLURM, K8s, cloud, etc.) should 
+inherit and implement the core interfaces.
+
+Args:
+    None.
+''')
+
+add_chinese_doc('LazyLLMLaunchersBase.makejob', '''\
+根据给定命令创建并返回作业/进程句柄。需由子类实现。
+
+Args:
+    cmd: 用于创建作业的命令或配置（如字符串、参数列表或作业描述对象）。
+
+Raises:
+    NotImplementedError: 基类未实现，子类必须覆盖。
+''')
+
+add_english_doc('LazyLLMLaunchersBase.makejob', '''\
+Create and return a job/process handle for the given command. Must be implemented by subclasses.
+
+Args:
+    cmd: The command or specification to create a job (e.g., string, argv list, or a job spec object).
+
+Raises:
+    NotImplementedError: The base class does not implement this method.
+''')
+
+add_chinese_doc('LazyLLMLaunchersBase.launch', '''\
+启动一个或多个作业，并将其登记到 all_processes[self._id] 中。需由子类实现。
+
+Args:
+    *args: 与具体实现相关的位置参数。
+    **kw: 与具体实现相关的关键字参数。
+
+Raises:
+    NotImplementedError: 基类未实现，子类必须覆盖。
+''')
+
+add_english_doc('LazyLLMLaunchersBase.launch', '''\
+Launch one or more jobs and register them under all_processes[self._id]. Must be implemented by subclasses.
+
+Args:
+    *args: Implementation-specific positional arguments.
+    **kw: Implementation-specific keyword arguments.
+
+Raises:
+    NotImplementedError: The base class does not implement this method.
+''')
+
+add_chinese_doc('LazyLLMLaunchersBase.cleanup', '''\
+停止并清理当前启动器登记的所有作业，从 all_processes 中移除相应记录，并在最后阻塞等待作业结束。
+
+Args:
+    None.
+''')
+
+add_english_doc('LazyLLMLaunchersBase.cleanup', '''\
+Stop and clean up all jobs registered under this launcher, remove them from all_processes, and finally wait for termination.
+
+Args:
+    None.
+''')
+
+add_chinese_doc('LazyLLMLaunchersBase.wait', '''\
+阻塞等待当前启动器登记的所有作业结束。
+
+Args:
+    None.
+''')
+
+add_english_doc('LazyLLMLaunchersBase.wait', '''\
+Block until all jobs registered under this launcher finish.
+
+Args:
+    None.
+''')
+
+add_chinese_doc('LazyLLMLaunchersBase.clone', '''\
+深拷贝当前启动器实例并分配新的唯一 _id，返回克隆后的实例。
+
+Args:
+    None.
+
+Returns:
+    LazyLLMLaunchersBase: 克隆出的启动器实例。
+''')
+
+add_english_doc('LazyLLMLaunchersBase.clone', '''\
+Deep-copy this launcher, assign a new unique _id, and return the cloned instance.
+
+Args:
+    None.
+
+Returns:
+    LazyLLMLaunchersBase: The cloned launcher.
+''')
\ No newline at end of file
diff --git a/lazyllm/docs/module.py b/lazyllm/docs/module.py
index 776b4653f..1b2dc18ee 100644
--- a/lazyllm/docs/module.py
+++ b/lazyllm/docs/module.py
@@ -346,6 +346,96 @@
 # ['get 16', 'get 24', 'get 32']
 # ''')
 
+add_chinese_doc('servermodule.LLMBase', '''\
+大语言模型模块的基类，继承自 ModuleBase。  
+负责管理流式输出、Prompt 和格式化器的初始化与切换，处理输入中的文件信息，支持实例共享。
+
+Args:
+    stream (bool 或 dict): 是否启用流式输出或流式配置，默认为 False。
+    return_trace (bool): 是否返回执行过程的 trace，默认为 False。
+    init_prompt (bool): 是否在初始化时自动创建默认 Prompt，默认为 True。
+''')
+
+add_english_doc('servermodule.LLMBase', '''\
+Base class for large language model modules, inheriting from ModuleBase.  
+Manages initialization and switching of streaming output, prompts, and formatters; processes file information in inputs; supports instance sharing.
+
+Args:
+    stream (bool or dict): Whether to enable streaming output or streaming configuration, default is False.
+    return_trace (bool): Whether to return execution trace, default is False.
+    init_prompt (bool): Whether to automatically create a default prompt at initialization, default is True.
+''')
+
+add_chinese_doc('servermodule.LLMBase.prompt', '''\
+设置或切换 Prompt。支持 None、PrompterBase 子类或字符串/字典类型创建 ChatPrompter。
+
+Args:
+    prompt (str/dict/PrompterBase/None): 要设置的 Prompt。
+    history (list): 对话历史，仅当 prompt 为字符串或字典时有效。
+
+**Returns**\n
+- self: 便于链式调用。
+''')
+
+add_english_doc('servermodule.LLMBase.prompt', '''\
+Set or switch the prompt. Supports None, PrompterBase subclass, or string/dict to create ChatPrompter.
+
+Args:
+    prompt (str/dict/PrompterBase/None): The prompt to set.
+    history (list): Conversation history, only valid when prompt is str or dict.
+
+**Returns**\n
+- self: For chaining calls.
+''')
+
+add_chinese_doc('servermodule.LLMBase.formatter', '''\
+设置或切换输出格式化器。支持 None、FormatterBase 子类或可调用对象。
+
+Args:
+    format (FormatterBase/Callable/None): 格式化器对象或函数，默认为 None。
+
+**Returns**\n
+- self: 便于链式调用。
+''')
+
+add_english_doc('servermodule.LLMBase.formatter', '''\
+Set or switch the output formatter. Supports None, FormatterBase subclass or callable.
+
+Args:
+    format (FormatterBase/Callable/None): Formatter object or function, default is None.
+
+**Returns**\n
+- self: For chaining calls.
+''')
+
+add_chinese_doc('servermodule.LLMBase.share', '''\
+创建当前实例的浅拷贝，并可重新设置 prompt、formatter、stream 等属性。  
+适用于多会话或多 Agent 共享基础配置但个性化部分参数的场景。
+
+Args:
+    prompt (str/dict/PrompterBase/None): 新的 Prompt，可选。
+    format (FormatterBase/None): 新的格式化器，可选。
+    stream (bool/dict/None): 新的流式设置，可选。
+    history (list/None): 新的对话历史，仅在设置 Prompt 时有效。
+
+**Returns**\n
+- LLMBase: 新的共享实例。
+''')
+
+add_english_doc('servermodule.LLMBase.share', '''\
+Creates a shallow copy of the current instance, with optional resetting of prompt, formatter, and stream attributes.  
+Useful for scenarios where multiple sessions or agents share a base configuration but customize certain parameters.
+
+Args:
+    prompt (str/dict/PrompterBase/None): New prompt, optional.
+    format (FormatterBase/None): New formatter, optional.
+    stream (bool/dict/None): New streaming settings, optional.
+    history (list/None): New conversation history, effective only when setting prompt.
+
+**Returns**\n
+- LLMBase: The new shared instance.
+''')
+
 add_chinese_doc('TrainableModule', '''\
 可训练模块，所有模型（包括LLM、Embedding等）都通过TrainableModule提供服务
 
@@ -724,6 +814,88 @@
 'processed: Hello'
 ''')
 
+add_english_doc("TrainableModule.get_all_models", '''\
+get_all_models() -> List[str]
+
+Returns a list of all fine-tuned model paths under the current target path.
+
+Returns:
+- List[str]: A list of fine-tuned model identifiers or directories.
+''')
+
+add_chinese_doc("TrainableModule.get_all_models", '''\
+get_all_models() -> List[str]
+
+返回当前目标路径下所有微调模型的路径列表。
+
+返回值：
+- List[str]：所有微调模型的名称或路径列表。
+''')
+
+add_english_doc("TrainableModule.status", '''\
+status(task_name: Optional[str] = None) -> str
+
+Returns the current status of a specific task in the module.
+
+Args:
+- task_name (Optional[str]): Name of the task (e.g., 'deploy'). Defaults to 'deploy' if not provided.
+
+Returns:
+- str: Status string such as 'running', 'finished', or 'stopped'.
+''')
+
+add_chinese_doc("TrainableModule.status", '''\
+status(task_name: Optional[str] = None) -> str
+
+返回模块中指定任务的当前状态。
+
+参数：
+- task_name (Optional[str])：任务名称（如 'deploy'），默认返回 'deploy' 任务的状态。
+
+返回值：
+- str：状态字符串，例如 'running'、'finished' 或 'stopped'。
+''')
+
+add_english_doc("TrainableModule.set_specific_finetuned_model", '''\
+set_specific_finetuned_model(model_path: str) -> None
+
+Sets the model to be used from a specific fine-tuned model path.
+
+Args:
+- model_path (str): The path to the fine-tuned model to use.
+''')
+
+add_chinese_doc("TrainableModule.set_specific_finetuned_model", '''\
+set_specific_finetuned_model(model_path: str) -> None
+
+设置要使用的特定微调模型路径。
+
+参数：
+- model_path (str)：要使用的微调模型的路径。
+''')
+
+add_english_doc("TrainableModule.set_default_parameters", '''\
+set_default_parameters(*, optional_keys: List[str] = [], **kw) -> None
+
+Sets the default parameters to be used during inference or evaluation.
+
+Args:
+- optional_keys (List[str]): A list of optional keys to allow additional parameters without error.
+- **kw: Key-value pairs for default parameters such as temperature, top_k, etc.
+
+''')
+
+add_chinese_doc("TrainableModule.set_default_parameters", '''\
+set_default_parameters(*, optional_keys: List[str] = [], **kw) -> None
+
+设置用于推理或评估的默认参数。
+
+参数：
+- optional_keys (List[str])：允许传入额外参数的可选键列表。
+- **kw：用于设置默认参数的键值对，如 temperature、top_k 等。
+
+''')
+
 # add_example('TrainableModule', '''\
 # >>> import lazyllm
 # >>> m = lazyllm.module.TrainableModule().finetune_method(finetune.dummy).trainset('/file/to/path').deploy_method(None).mode('finetune')
@@ -750,18 +922,20 @@
 可以将ServerModule部署得到的Url包装成一个Module，调用 ``__call__`` 时会访问该服务。
 
 Args:
-    url (str): 要包装的服务的Url
-    stream (bool): 是否流式请求和输出，默认为非流式
+    url (str): 要包装的服务的Url，默认为空字符串
+    stream (bool|Dict[str, str]): 是否流式请求和输出，默认为非流式
     return_trace (bool): 是否将结果记录在trace中，默认为False
+    init_prompt (bool): 是否初始化prompt，默认为True
 ''')
 
 add_english_doc('UrlModule', '''\
 The URL obtained from deploying the ServerModule can be wrapped into a Module. When calling ``__call__`` , it will access the service.
 
 Args:
-    url (str): The URL of the service to be wrapped.
-    stream (bool): Whether to request and output in streaming mode, default is non-streaming.
+    url (str): The URL of the service to be wrapped, defaults to empty string.
+    stream (bool|Dict[str, str]): Whether to request and output in streaming mode, default is non-streaming.
     return_trace (bool): Whether to record the results in trace, default is False.
+    init_prompt (bool): Whether to initialize prompt, defaults to True.
 ''')
 
 add_example('UrlModule', '''\
@@ -1038,6 +1212,48 @@
 >>> print(vlm(inputs))
 ''')
 
+add_chinese_doc('llms.onlinemodule.supplier.doubao.DoubaoModule', '''\
+豆包（Doubao）在线对话模块。
+该类封装了对字节跳动豆包 API 的调用，用于进行多轮对话。默认使用模型 `doubao-1-5-pro-32k-250115`，支持流式输出和调用链跟踪。
+Args:
+    model (str): 使用的模型名称，默认为 `doubao-1-5-pro-32k-250115`。
+    base_url (str): API 的基础 URL，默认为 "https://ark.cn-beijing.volces.com/api/v3/"。
+    api_key (str): 豆包 API Key。若未提供，则从 lazyllm.config['doubao_api_key'] 读取。
+    stream (bool): 是否启用流式输出，默认为 True。
+    return_trace (bool): 是否返回调用链跟踪信息，默认为 False。
+    **kwargs: 其他传递给基类的参数。
+''')
+
+add_english_doc('llms.onlinemodule.supplier.doubao.DoubaoModule', '''\
+Doubao online chat module.
+This class wraps the Doubao API (from ByteDance) for multi-turn chat. It defaults to model `doubao-1-5-pro-32k-250115` and supports streaming and optional trace return.
+Args:
+    model (str): The name of the model to use. Defaults to `doubao-1-5-pro-32k-250115`.
+    base_url (str): The base URL for the API. Defaults to "https://ark.cn-beijing.volces.com/api/v3/".
+    api_key (str): Doubao API key. If not provided, it will be read from `lazyllm.config['doubao_api_key']`.
+    stream (bool): Whether to use streaming output. Defaults to True.
+    return_trace (bool): Whether to return trace information. Defaults to False.
+    **kwargs: Additional arguments passed to the base class.
+''')
+
+add_chinese_doc('llms.onlinemodule.supplier.openai.OpenAIEmbedding', '''\
+OpenAI 在线嵌入模块。
+该类封装了对 OpenAI 嵌入 API 的调用，默认使用模型 `text-embedding-ada-002`，用于将文本编码为向量表示。
+Args:
+    embed_url (str): OpenAI 嵌入 API 的 URL，默认为 "https://api.openai.com/v1/embeddings"。
+    embed_model_name (str): 使用的嵌入模型名称，默认为 "text-embedding-ada-002"。
+    api_key (str, optional): OpenAI 的 API Key。若未提供，则从 lazyllm.config 中读取。
+''')
+
+add_english_doc('llms.onlinemodule.supplier.openai.OpenAIEmbedding', '''\
+Online embedding module using OpenAI.
+This class wraps the OpenAI Embedding API, defaulting to the `text-embedding-ada-002` model, and converts text into vector representations.
+Args:
+    embed_url (str): The URL endpoint of the OpenAI embedding API. Default is "https://api.openai.com/v1/embeddings".
+    embed_model_name (str): The name of the embedding model to use. Default is "text-embedding-ada-002".
+    api_key (str, optional): The OpenAI API key. If not provided, it will be read from `lazyllm.config`.
+''')
+
 add_chinese_doc('OnlineEmbeddingModule', '''\
 用来管理创建目前市面上的在线Embedding服务模块，目前支持openai、sensenova、glm、qwen、doubao
 
@@ -1188,3 +1404,95 @@
 ...         pass
 ...         return embedding
 ''')
+
+add_chinese_doc('llms.onlinemodule.supplier.doubao.DoubaoEmbedding', '''\
+豆包嵌入类，继承自 OnlineEmbeddingModuleBase，封装了调用豆包在线文本嵌入服务的功能。  
+通过指定服务接口 URL、模型名称及 API Key，支持远程获取文本向量表示。
+
+Args:
+    embed_url (Optional[str]): 豆包文本嵌入服务的接口 URL，默认指向北京区域的服务地址。
+    embed_model_name (Optional[str]): 使用的豆包嵌入模型名称，默认为 "doubao-embedding-text-240715"。
+    api_key (Optional[str]): 访问豆包服务的 API Key，若未提供则从 lazyllm 配置中读取。
+''')
+
+add_english_doc('llms.onlinemodule.supplier.doubao.DoubaoEmbedding', '''\
+DoubaoEmbedding class inherits from OnlineEmbeddingModuleBase, encapsulating the functionality to call Doubao's online text embedding service.  
+It supports remote text vector representation retrieval by specifying the service URL, model name, and API key.
+
+Args:
+    embed_url (Optional[str]): URL of the Doubao text embedding service, defaulting to the Beijing region endpoint.
+    embed_model_name (Optional[str]): Name of the Doubao embedding model used, default is "doubao-embedding-text-240715".
+    api_key (Optional[str]): API key for accessing the Doubao service. If not provided, it is read from lazyllm config.
+''')
+
+add_chinese_doc('llms.onlinemodule.fileHandler.FileHandlerBase', '''\
+FileHandlerBase是用于处理微调数据文件的基类，主要用于验证和转换微调数据格式。该类本身不支持直接实例化，需要子类继承该类并实现特定的文件格式转换逻辑。
+
+FileHandlerBase提供以下功能：\n
+1. 验证微调数据文件的格式是否符合标准（.jsonl格式）\n
+2. 检查数据内容是否符合预期的消息格式（包含role和content字段）\n
+3. 验证角色类型是否在允许的范围内（system、knowledge、user、assistant）\n
+4. 确保每个对话示例都包含assistant回复\n
+5. 提供临时文件存储机制用于后续处理\n
+''')
+
+add_english_doc('llms.onlinemodule.fileHandler.FileHandlerBase', '''\
+FileHandlerBase is a base class for handling fine-tuning data files, primarily used for validating and converting fine-tuning data formats. This class itself does not support direct instantiation; it requires subclasses to inherit from this class and implement specific file format conversion logic.
+
+FileHandlerBase provides the following capabilities:\n
+1. Validate that the fine-tuning data file format conforms to standards (.jsonl format)\n
+2. Check if the data content conforms to the expected message format (containing role and content fields)\n
+3. Verify that role types are within the allowed range (system, knowledge, user, assistant)\n
+4. Ensure that each conversation example contains an assistant response\n
+5. Provide temporary file storage mechanism for subsequent processing\n
+''')
+
+add_example('llms.onlinemodule.fileHandler.FileHandlerBase', '''\
+>>> import lazyllm
+>>> from lazyllm.module.llms.onlinemodule.fileHandler import FileHandlerBase
+>>> import tempfile
+>>> import json
+>>> sample_data = [
+...     {"messages": [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}]},
+...     {"messages": [{"role": "user", "content": "How are you?"}, {"role": "assistant", "content": "I'm doing well, thank you!"}]}
+... ] 
+>>> with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as f:
+...     for item in sample_data:
+...         f.write(json.dumps(item, ensure_ascii=False) + '\n')
+...     temp_file_path = f.name
+>>> class CustomFileHandler(FileHandlerBase):
+...     def _convert_file_format(self, filepath: str) -> str:
+...         with open(filepath, 'r', encoding='utf-8') as f:
+...             data = [json.loads(line) for line in f]
+...         converted_data = []
+...         for item in data:
+...             messages = item.get('messages', [])
+...             conversation = []
+...             for msg in messages:
+...                 conversation.append(f"{msg['role']}: {msg['content']}")
+...             converted_data.append('\n'.join(conversation))
+...         return '\n---\n'.join(converted_data)
+>>> handler = CustomFileHandler()
+>>> try:
+...     result = handler.get_finetune_data(temp_file_path)
+...     print("数据验证和转换成功")
+... except Exception as e:
+...     print(f"错误: {e}")
+... finally:
+...     import os
+...     os.unlink(temp_file_path)
+''')
+
+add_chinese_doc('llms.onlinemodule.fileHandler.FileHandlerBase.get_finetune_data', '''\
+获取并处理微调数据文件，包括验证文件格式和转换为目标平台支持的格式。
+
+Args:
+    filepath (str): 微调数据文件的路径，必须是.jsonl格式
+''')
+
+add_english_doc('llms.onlinemodule.fileHandler.FileHandlerBase.get_finetune_data', '''\
+Get and process fine-tuning data files, including validating file format and converting to the format supported by the target platform.
+
+Args:
+    filepath (str): Path to the fine-tuning data file, must be in .jsonl format
+''')
diff --git a/lazyllm/docs/tools.py b/lazyllm/docs/tools.py
index 6eba2efd2..ef4334fb5 100644
--- a/lazyllm/docs/tools.py
+++ b/lazyllm/docs/tools.py
@@ -12,8 +12,119 @@
 add_tools_english_doc = functools.partial(utils.add_english_doc, module=lazyllm.tools.tools)
 add_tools_example = functools.partial(utils.add_example, module=lazyllm.tools.tools)
 
+# functions for lazyllm.tools.agent
+add_agent_chinese_doc = functools.partial(utils.add_chinese_doc, module=lazyllm.tools.agent)
+add_agent_english_doc = functools.partial(utils.add_english_doc, module=lazyllm.tools.agent)
+add_agent_example = functools.partial(utils.add_example, module=lazyllm.tools.agent)
+
 # ---------------------------------------------------------------------------- #
 
+# classifier/intent_classifier.py
+
+add_chinese_doc('IntentClassifier', '''\
+意图分类模块，用于根据输入文本在给定的意图列表中进行分类。  
+支持中英文自动选择提示模板，并可通过示例、提示、约束和注意事项增强分类效果。
+
+Args:
+    llm: 用于意图分类的大语言模型实例。
+    intent_list (list): 可选，意图类别列表，例如 ["聊天", "天气", "问答"]。
+    prompt (str): 可选，自定义提示语，插入到系统提示模板中。
+    constrain (str): 可选，分类约束条件说明。
+    attention (str): 可选，提示注意事项。
+    examples (list[list[str, str]]): 可选，分类示例列表，每个元素为 [输入文本, 标签]。
+    return_trace (bool): 是否返回执行过程的 trace，默认为 False。
+''')
+
+add_english_doc('IntentClassifier', '''\
+Intent classification module that classifies input text into a given intent list.  
+Supports automatic selection of Chinese or English prompt templates, and allows enhancement through examples, prompt text, constraints, and attention notes.
+
+Args:
+    llm: The large language model instance used for intent classification.
+    intent_list (list): Optional, list of intent categories, e.g., ["chat", "weather", "QA"].
+    prompt (str): Optional, custom prompt inserted into the system prompt template.
+    constrain (str): Optional, classification constraint description.
+    attention (str): Optional, attention notes for classification.
+    examples (list[list[str, str]]): Optional, classification examples, each element is [input text, label].
+    return_trace (bool): Whether to return execution trace. Default is False.
+''')
+
+
+add_example(
+    "IntentClassifier",
+    """\
+    >>> import lazyllm
+    >>> from lazyllm.tools import IntentClassifier
+    >>> classifier_llm = lazyllm.OnlineChatModule(source="openai")
+    >>> chatflow_intent_list = ["Chat", "Financial Knowledge Q&A", "Employee Information Query", "Weather Query"]
+    >>> classifier = IntentClassifier(classifier_llm, intent_list=chatflow_intent_list)
+    >>> classifier.start()
+    >>> print(classifier('What is the weather today'))
+    Weather Query
+    >>>
+    >>> with IntentClassifier(classifier_llm) as ic:
+    >>>     ic.case['Weather Query', lambda x: '38.5°C']
+    >>>     ic.case['Chat', lambda x: 'permission denied']
+    >>>     ic.case['Financial Knowledge Q&A', lambda x: 'Calling Financial RAG']
+    >>>     ic.case['Employee Information Query', lambda x: 'Beijing']
+    ...
+    >>> ic.start()
+    >>> print(ic('What is the weather today'))
+    38.5°C
+""",
+)
+
+
+add_chinese_doc('IntentClassifier.intent_promt_hook', '''\
+意图分类的预处理 Hook。  
+将输入文本与意图列表打包为 JSON，并生成历史对话信息字符串。
+
+Args:
+    input (str | List | Dict | None): 输入文本，仅支持字符串类型。
+    history (List): 历史对话记录，默认为空列表。
+    tools (List[Dict] | None): 工具信息，可选。
+    label (str | None): 标签，可选。
+
+**Returns**\n
+- tuple: (输入数据字典, 历史记录列表, 工具信息, 标签)
+''')
+
+add_english_doc('IntentClassifier.intent_promt_hook', '''\
+Pre-processing hook for intent classification.  
+Packages the input text and intent list into JSON and generates a string of conversation history.
+
+Args:
+    input (str | List | Dict | None): The input text, only string type is supported.
+    history (List): Conversation history, default empty list.
+    tools (List[Dict] | None): Optional tool information.
+    label (str | None): Optional label.
+
+**Returns**\n
+- tuple: (input data dict, history list, tools, label)
+''')
+
+add_chinese_doc('IntentClassifier.post_process_result', '''\
+意图分类结果的后处理。  
+如果结果在意图列表中则直接返回，否则返回意图列表的第一个元素。
+
+Args:
+    input (str): 分类模型输出结果。
+
+**Returns**\n
+- str: 最终的分类标签。
+''')
+
+add_english_doc('IntentClassifier.post_process_result', '''\
+Post-processing of intent classification result.  
+Returns the result directly if it is in the intent list, otherwise returns the first element of the intent list.
+
+Args:
+    input (str): Output result from the classification model.
+
+**Returns**\n
+- str: The final classification label.
+''')
+
 # rag/document.py
 
 add_english_doc('Document', '''\
@@ -31,7 +142,7 @@
     launcher (optional): An object or function responsible for launching the server module. If not provided, the default asynchronous launcher from `lazyllm.launchers` is used (`sync=False`).
     doc_fields (optional): Configure the fields that need to be stored and retrieved along with their corresponding types (currently only used by the Milvus backend).
     doc_files (Optional[List[str]]): List of temporary document files (alternative to dataset_path).When used, dataset_path must be None and only map store is supported.
-    store_conf (optional): Configure which storage backend and index backend to use.      
+    store_conf (optional): Configure which storage backend, MapStore is the default choice.      
 ''')
 
 add_chinese_doc('Document', '''\
@@ -48,7 +159,7 @@
     name (Optional[str]):文档集合的名称标识符。云服务模式下必须提供
     launcher (optional): 负责启动服务器模块的对象或函数。如果未提供，则使用 `lazyllm.launchers` 中的默认异步启动器 (`sync=False`)。            
     doc_files (Optional[List[str]]):临时文档文件列表（dataset_path的替代方案）。使用时dataset_path必须为None且仅支持map存储类型
-    store_conf (optional): 配置使用哪种存储后端和索引后端。
+    store_conf (optional): 配置使用哪种存储后端, 默认使用MapStore将切片数据存于内存中。
 ''')
 
 add_example('Document', '''\
@@ -60,16 +171,19 @@
 >>> document1 = Document(dataset_path='your_doc_path', embed={"online": m, "local": m1}, manager=False)
 
 >>> store_conf = {
->>>     'type': 'chroma',
->>>     'indices': {
->>>         'smart_embedding_index': {
->>>             'backend': 'milvus',
->>>             'kwargs': {
->>>                 'uri': '/tmp/tmp.db',
->>>                 'index_kwargs': {
->>>                     'index_type': 'HNSW',
->>>                     'metric_type': 'COSINE'
->>>                  }
+>>>     "segment_store": {
+>>>         "type": "map",
+>>>         "kwargs": {
+>>>             "uri": "/tmp/tmp_segments.db",
+>>>         },
+>>>     },
+>>>     "vector_store": {
+>>>         "type": "milvus",
+>>>         "kwargs": {
+>>>             "uri": "/tmp/tmp_milvus.db",
+>>>             "index_kwargs": {
+>>>                 "index_type": "FLAT",
+>>>                 "metric_type": "COSINE",
 >>>             },
 >>>         },
 >>>     },
@@ -117,14 +231,14 @@
 Find the parent node of the specified node.
 
 Args:
-    group (str): The name of the node for which to find the parent.
+    group (str): The name of the node group for which to find the parent.
 ''')
 
 add_chinese_doc('Document.find_parent', '''
 查找指定节点的父节点。
 
 Args:
-    group (str): 需要查找的节点名称
+    group (str): 需要查找的节点组名称
 ''')
 
 add_example('Document.find_parent', '''
@@ -141,14 +255,14 @@
 Find the child nodes of the specified node.
 
 Args:
-    group (str): The name of the node for which to find the children.
+    group (str): The name of the node group for which to find the children.
 ''')
 
 add_chinese_doc('Document.find_children', '''
 查找指定节点的子节点。
 
 Args:
-    group (str): 需要查找的名称
+    group (str): 需要查找的节点组名称
 ''')
 
 add_example('Document.find_children', '''
@@ -287,6 +401,94 @@
 # Call the class YmlReader.
 ''')
 
+add_chinese_doc('rag.readers.PandasCSVReader', '''\
+用于读取 CSV 文件并使用 pandas 进行解析。
+
+Args:
+    concat_rows (bool): 是否将所有行拼接为一个文本块，默认为 True。
+    col_joiner (str): 列之间的连接符。
+    row_joiner (str): 行之间的连接符。
+    pandas_config (Optional[Dict]): pandas.read_csv 的可选配置项。
+    return_trace (bool): 是否返回处理过程的 trace。
+''')
+
+add_english_doc('rag.readers.PandasCSVReader', '''\
+Reader for parsing CSV files using pandas.
+
+Args:
+    concat_rows (bool): Whether to concatenate all rows into a single text block. Default is True.
+    col_joiner (str): String used to join column values.
+    row_joiner (str): String used to join rows.
+    pandas_config (Optional[Dict]): Optional config for pandas.read_csv.
+    return_trace (bool): Whether to return the processing trace.
+''')
+
+add_chinese_doc('rag.readers.PandasExcelReader', '''\
+用于读取 Excel 文件（.xlsx），并将内容提取为文本。
+
+Args:
+    concat_rows (bool): 是否将所有行拼接为一个文本块。
+    sheet_name (Optional[str]): 要读取的工作表名称。若为 None，则读取所有工作表。
+    pandas_config (Optional[Dict]): pandas.read_excel 的可选配置项。
+    return_trace (bool): 是否返回处理过程的 trace。
+''')
+
+add_english_doc('rag.readers.PandasExcelReader', '''\
+Reader for extracting text content from Excel (.xlsx) files.
+
+Args:
+    concat_rows (bool): Whether to concatenate all rows into a single block.
+    sheet_name (Optional[str]): Name of the sheet to read. If None, all sheets will be read.
+    pandas_config (Optional[Dict]): Optional config for pandas.read_excel.
+    return_trace (bool): Whether to return the processing trace.
+''')
+
+add_chinese_doc('rag.readers.PDFReader', '''\
+用于读取 PDF 文件并提取其中的文本内容。
+
+Args:
+    return_full_document (bool): 是否将整份 PDF 合并为一个文档节点。若为 False，则每页作为一个节点。
+    return_trace (bool): 是否返回处理过程的 trace，默认为 True。
+''')
+
+add_english_doc('rag.readers.PDFReader', '''\
+Reader for extracting text content from PDF files.
+
+Args:
+    return_full_document (bool): Whether to merge the entire PDF into a single document node. If False, each page becomes a separate node.
+    return_trace (bool): Whether to return the processing trace. Default is True.
+''')
+
+add_chinese_doc('rag.readers.PPTXReader', '''\
+用于解析 PPTX（PowerPoint）文件的读取器，能够提取幻灯片中的文本，并对嵌入图像进行视觉描述生成。
+
+Args:
+    return_trace (bool): 是否记录处理过程的 trace，默认为 True。
+''')
+
+add_english_doc('rag.readers.PPTXReader', '''\
+Reader for PPTX (PowerPoint) files. Extracts text from slides and generates captions for embedded images using a vision-language model.
+
+Args:
+    return_trace (bool): Whether to record the processing trace. Default is True.
+''')
+
+add_chinese_doc('rag.readers.VideoAudioReader', '''\
+用于从视频或音频文件中提取语音内容的读取器，依赖 OpenAI 的 Whisper 模型进行语音识别。
+
+Args:
+    model_version (str): Whisper 模型的版本（如 "base", "small", "medium", "large"），默认为 "base"。
+    return_trace (bool): 是否返回处理过程的 trace，默认为 True。
+''')
+
+add_english_doc('rag.readers.VideoAudioReader', '''\
+Reader for extracting speech content from video or audio files using OpenAI's Whisper model for transcription.
+
+Args:
+    model_version (str): Whisper model version (e.g., "base", "small", "medium", "large"). Default is "base".
+    return_trace (bool): Whether to return the processing trace. Default is True.
+''')
+
 add_chinese_doc('rag.component.bm25.BM25', '''\
 基于 BM25 算法实现的检索器，用于从节点集合中根据查询词检索最相关的文本节点。
 
@@ -372,6 +574,31 @@
 [{'key': 'party_a', 'desc': 'The first party', 'type': 'str'}, ...]
 ''')
 
+# DocInfoSchemaAnalyser.analyse_info_schema
+add_chinese_doc('rag.doc_to_db.DocInfoSchemaAnalyser.analyse_info_schema', '''\
+分析文档信息模式的方法，用于从指定类型的文档中提取关键信息字段的结构定义。
+
+Args:
+    llm (Union[OnlineChatModule, TrainableModule]): 用于生成信息模式的LLM模型
+    doc_type (str): 文档类型，用于指导LLM生成相应的信息模式
+    doc_paths (list[str]): 文档路径列表，用于分析的信息来源
+
+**Returns:**\n
+- DocInfoSchema: 包含关键信息字段定义的模式列表，每个字段包含key、desc、type三个属性
+''')
+
+add_english_doc('rag.doc_to_db.DocInfoSchemaAnalyser.analyse_info_schema', '''\
+Method for analyzing document information schema, used to extract structural definitions of key information fields from documents of a specified type.
+
+Args:
+    llm (Union[OnlineChatModule, TrainableModule]): LLM model used to generate information schema
+    doc_type (str): Document type, used to guide the LLM in generating corresponding information schema
+    doc_paths (list[str]): List of document paths, used as information sources for analysis
+
+**Returns:**\n
+- DocInfoSchema: List of schema containing key information field definitions, each field includes key, desc, and type attributes
+''')
+
 add_chinese_doc('rag.doc_to_db.DocInfoExtractor', '''\
 根据给定的字段结构（schema）从文档中抽取具体的关键信息值，返回格式为 key-value 字典。
 
@@ -397,6 +624,103 @@
 {'party_a': 'ABC Corp'}
 ''')
 
+add_chinese_doc('rag.doc_to_db.DocInfoExtractor.extract_doc_info', '''\
+根据提供的字段结构（schema）从指定文档中抽取具体的关键信息值。
+
+该方法使用大语言模型分析文档内容，根据预定义的字段结构提取相应的信息值，返回格式为 key-value 字典。
+
+Args:
+    llm (Union[OnlineChatModule, TrainableModule]): 用于文档信息抽取的大语言模型。
+    doc_path (str): 要分析的文档路径。
+    info_schema (DocInfoSchema): 字段结构定义，包含需要提取的字段信息。
+    extra_desc (str, optional): 额外的描述信息，用于指导信息抽取。默认为空字符串。
+
+Returns:
+    dict: 提取出的关键信息字典，键为字段名，值为对应的信息值。
+''')
+
+add_english_doc('rag.doc_to_db.DocInfoExtractor.extract_doc_info', '''\
+Extracts specific key information values from a document according to a provided schema.
+
+This method uses a large language model to analyze document content and extract corresponding information values based on predefined field structure, returning a key-value dictionary.
+
+Args:
+    llm (Union[OnlineChatModule, TrainableModule]): The large language model used for document information extraction.
+    doc_path (str): Path to the document to be analyzed.
+    info_schema (DocInfoSchema): Field structure definition containing the information to be extracted.
+    extra_desc (str, optional): Additional description information to guide the extraction process. Defaults to empty string.
+
+Returns:
+    dict: Extracted key information dictionary with field names as keys and corresponding information values as values.
+''')
+
+add_chinese_doc('http_request.http_executor_response.HttpExecutorResponse.get_content_type', '''\
+获取HTTP响应的内容类型。
+
+从响应头中提取 'content-type' 字段的值，用于判断响应内容的类型。
+
+Returns:
+    str: 响应的内容类型，如果未找到则返回空字符串。
+''')
+
+add_english_doc('http_request.http_executor_response.HttpExecutorResponse.get_content_type', '''\
+Get the content type of the HTTP response.
+
+Extracts the 'content-type' field value from the response headers to determine the type of response content.
+
+Returns:
+    str: The content type of the response, or empty string if not found.
+''')
+
+add_example('http_request.http_executor_response.HttpExecutorResponse.get_content_type', '''\
+>>> from lazyllm.tools.http_request.http_executor_response import HttpExecutorResponse
+>>> import httpx
+>>> response = httpx.Response(200, headers={'content-type': 'application/json'})
+>>> http_response = HttpExecutorResponse(response)
+>>> content_type = http_response.get_content_type()
+>>> print(content_type)
+... 'application/json'
+''')
+
+add_chinese_doc('http_request.http_executor_response.HttpExecutorResponse.extract_file', '''\
+从HTTP响应中提取文件内容。
+
+如果响应内容类型是文件相关类型（如图片、音频、视频），则提取文件的内容类型和二进制数据。
+
+Returns:
+    tuple[str, bytes]: 包含内容类型和文件二进制数据的元组。如果不是文件类型，则返回空字符串和空字节。
+''')
+
+add_english_doc('http_request.http_executor_response.HttpExecutorResponse.extract_file', '''\
+Extract file content from HTTP response.
+
+If the response content type is file-related (such as image, audio, video), extracts the content type and binary data of the file.
+
+Returns:
+    tuple[str, bytes]: A tuple containing the content type and binary data of the file. If not a file type, returns empty string and empty bytes.
+''')
+
+add_example('http_request.http_executor_response.HttpExecutorResponse.extract_file', '''\
+>>> from lazyllm.tools.http_request.http_executor_response import HttpExecutorResponse
+>>> import httpx
+>>> # 模拟图片响应
+>>> response = httpx.Response(200, headers={'content-type': 'image/jpeg'}, content=b'fake_image_data')
+>>> http_response = HttpExecutorResponse(response)
+>>> content_type, file_data = http_response.extract_file()
+>>> print(content_type)
+... 'image/jpeg'
+>>> print(len(file_data))
+... 15
+>>> # 模拟JSON响应
+>>> response = httpx.Response(200, headers={'content-type': 'application/json'}, content=b'{"key": "value"}')
+>>> http_response = HttpExecutorResponse(response)
+>>> content_type, file_data = http_response.extract_file()
+>>> print(content_type)
+... ''
+>>> print(file_data)
+... b''
+''')
+
 add_chinese_doc('rag.doc_to_db.DocToDbProcessor', '''\
 用于将文档信息抽取并导出到数据库中。
 
@@ -607,22 +931,28 @@
     return_trace (bool): Whether to record processing trace. Default is True.
 ''')
 
-add_chinese_doc('rag.readers.MagicPDFReader', '''\
-用于通过 MagicPDF 服务解析 PDF 文件内容的模块。支持上传文件或通过 URL 方式调用解析接口，解析结果经过回调函数处理成文档节点列表。
+add_chinese_doc('rag.readers.MineruPDFReader', '''\
+用于通过 MinerU 服务解析 PDF 文件内容的模块。支持上传文件或通过 URL 方式调用解析接口，解析结果经过回调函数处理成文档节点列表。
 
 Args:
-    magic_url (str): MagicPDF 服务的接口 URL。
-    callback (Optional[Callable[[List[dict], Path, dict], List[DocNode]]]): 解析结果回调函数，接收解析元素列表、文件路径及额外信息，返回文档节点列表。默认将所有文本合并为一个节点。
+    url (str): MineruPDFReader 服务的接口 URL。
     upload_mode (bool): 是否采用文件上传模式调用接口，默认为 False，即通过 JSON 请求文件路径。
+    extract_table (bool): 是否提取表格，默认为 True。
+    extract_formula (bool): 是否提取公式，默认为 True。
+    split_doc (bool): 是否分割文档，默认为 True。
+    post_func (Optional[Callable]): 后处理函数。
 ''')
 
-add_english_doc('rag.readers.MagicPDFReader', '''\
-Module to parse PDF content via the MagicPDF service. Supports file upload or URL-based parsing, with a callback to process the parsed elements into document nodes.
+add_english_doc('rag.readers.MineruPDFReader', '''\
+Module to parse PDF content via the MineruPDFReader service. Supports file upload or URL-based parsing, with a callback to process the parsed elements into document nodes.
 
 Args:
-    magic_url (str): The MagicPDF service API URL.
-    callback (Optional[Callable[[List[dict], Path, dict], List[DocNode]]]): A callback function that takes parsed element list, file path, and extra info, returns a list of DocNode. Defaults to merging all text into a single node.
+    url (str): The MineruPDFReader service API URL.
     upload_mode (bool): Whether to use file upload mode for the API call. Default is False, meaning JSON request with file path.
+    extract_table (bool): Whether to extract tables. Default is True.
+    extract_formula (bool): Whether to extract formulas. Default is True.
+    split_doc (bool): Whether to split the document. Default is True.
+    post_func (Optional[Callable]): Post-processing function.
 ''')
 
 add_chinese_doc('rag.readers.MarkdownReader', '''\
@@ -703,424 +1033,374 @@
 
 
 add_english_doc('rag.store.ChromadbStore', '''
-Inherits from the abstract base class StoreBase. This class is mainly used to store and manage document nodes (DocNode), supporting operations such as node addition, deletion, modification, query, index management, and persistent storage.
-Args:
-    group_embed_keys (Dict[str, Set[str]]): Specifies the embedding fields associated with each document group.
-    embed (Dict[str, Callable]): A dictionary of embedding generation functions, supporting multiple embedding sources.
-    embed_dims (Dict[str, int]): The embedding dimensions corresponding to each embedding type.
-    dir (str): Path to the chromadb persistent storage directory.
-    kwargs (Dict): Additional optional parameters passed to the parent class or internal components.
-''')
+ChromadbStore is a vector-capable implementation of LazyLLMStoreBase, leveraging ChromaDB for persistence and vector search.
 
-
-add_chinese_doc('rag.store.ChromadbStore', '''
-继承自 StoreBase 抽象基类。它主要用于存储和管理文档节点(DocNode)，支持节点增删改查、索引管理和持久化存储。
 Args:
-     group_embed_keys (Dict[str, Set[str]]): 指定每个文档分组所对应的嵌入字段。
-    embed (Dict[str, Callable]): 嵌入生成函数或其映射，支持多嵌入源。
-    embed_dims (Dict[str, int]): 每种嵌入类型对应的维度。
-    dir (str): chromadb 数据库存储路径。
-    kwargs (Dict): 其他可选参数，传递给父类或内部组件。
+    dir (Optional[str]): Filesystem path for on-disk ChromaDB storage. If provided, a PersistentClient will be used.
+    host (Optional[str]): Hostname for ChromaDB HTTP server. Used if `dir` is not set.
+    port (Optional[int]): Port number for ChromaDB HTTP server. Used if `dir` is not set.
+    index_kwargs (Optional[Union[Dict, List]]): Configuration parameters for ChromaDB collections, e.g., index type and metrics.
+    client_kwargs (Optional[Dict]): Additional keyword arguments passed to the ChromaDB client constructor.
 ''')
 
-add_example('rag.store.ChromadbStore', '''
->>> from lazyllm.tools.rag.chroma_store import ChromadbStore
->>> from typing import Dict, List
->>> import numpy as np
->>> store = ChromadbStore(
-...     group_embed_keys={"articles": {"title_embed", "content_embed"}},
-...     embed={
-...         "title_embed": lambda x: np.random.rand(128).tolist(),
-...         "content_embed": lambda x: np.random.rand(256).tolist()
-...     },
-...     embed_dims={"title_embed": 128, "content_embed": 256},
-...     dir="./chroma_data"
-... )
->>> store.update_nodes([node1, node2])
->>> results = store.query(query_text="文档内容", group_name="articles", top_k=2)
->>> for node in results:
-...     print(f"找到文档: {node._content[:20]}...")
->>> store.remove_nodes(doc_ids=["doc1"])
-''')
+add_chinese_doc('rag.store.ChromadbStore', '''
+ChromadbStore 是基于 ChromaDB 的向量存储实现，继承自 LazyLLMStoreBase，支持向量写入、检索与持久化。
 
-add_english_doc('rag.store.ChromadbStore.update_nodes', '''
-Update a group of DocNode objects.
 Args:
-    nodes (DocNode): The list of DocNode objects to be updated.
+    dir (Optional[str]): 本地持久化存储目录，优先使用 PersistentClient 模式。
+    host (Optional[str]): HTTP 访问模式下的 ChromaDB 服务主机名。
+    port (Optional[int]): HTTP 模式下的 ChromaDB 服务端口。
+    index_kwargs (Optional[Union[Dict, List]]): Collection 配置参数，如索引类型、度量方式等。
+    client_kwargs (Optional[Dict]): 传递给 ChromaDB 客户端的额外参数。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.dir', '''
+Directory property of the store.
 
-add_chinese_doc('rag.store.ChromadbStore.update_nodes', '''
-更新一组 DocNode 节点。
-Args:
-    nodes(DocNode): 需要更新的 DocNode 列表。
+Returns:
+    Optional[str]: Normalized directory path ending with a slash, or None if not set.
 ''')
 
+add_chinese_doc('rag.store.ChromadbStore.dir', '''
+存储目录属性。
 
-add_english_doc('rag.store.ChromadbStore.remove_nodes', '''
-Delete nodes based on specified conditions.
-Args:
-    doc_ids (str): Delete by document ID.
-    group_name (str): Specify the group name for deletion.
-    uids (str): Delete by unique node ID.
+Returns:
+    Optional[str]: 以斜杠结尾的目录路径，若未配置则返回 None。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.connect', '''
+Initialize the ChromaDB client and configure embedding and metadata settings.
 
-add_chinese_doc('rag.store.ChromadbStore.remove_nodes', '''
-删除指定条件的节点。
 Args:
-    doc_ids(str): 按文档 ID 删除。
-    group_name(str): 限定删除的组名。
-    uids(str): 按节点唯一 ID 删除。
+    embed_dims (Dict[str, int]): Dimensions for each embedding key.
+    embed_datatypes (Dict[str, DataType]): Data types for global metadata fields.
+    global_metadata_desc (Dict[str, GlobalMetadataDesc]): Descriptions of global metadata fields.
 ''')
 
+add_chinese_doc('rag.store.ChromadbStore.connect', '''
+初始化 ChromaDB 客户端并配置向量化及元数据相关设定。
 
-add_english_doc('rag.store.ChromadbStore.update_doc_meta', '''
-Update the metadata of a document.
 Args:
-    doc_id (str): The ID of the document to be updated.
-    metadata (dict): The new metadata (key-value pairs).
+    embed_dims (Dict[str, int]): 每个嵌入键对应的向量维度。
+    embed_datatypes (Dict[str, DataType]): 全局元数据字段的数据类型。
+    global_metadata_desc (Dict[str, GlobalMetadataDesc]): 全局元数据字段的描述。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.upsert', '''
+Insert or update a batch of records(segment's uid and vectors) into ChromaDB.
 
-add_chinese_doc('rag.store.ChromadbStore.update_doc_meta', '''
-更新文档的元数据。。
 Args:
-    doc_id(str):需要更新的文档 ID。
-    metadata(dict):新的元数据（键值对）。
+    collection_name (str): Logical name for the collection.
+    data (List[dict]): List of documents.
+
+Returns:
+    bool: True if operation succeeds, False otherwise.
 ''')
 
+add_chinese_doc('rag.store.ChromadbStore.upsert', '''
+批量写入或更新记录（切片的id及向量数据）到 ChromaDB。
 
-add_english_doc('rag.store.ChromadbStore.get_nodes', '''
-Query nodes based on specified conditions.
 Args:
-    group_name (str): The name of the group to which the nodes belong.
-    uids (List[str]): A list of unique node IDs.
-    doc_ids (Set[str]): A set of document IDs.
-    **kwargs: Additional optional parameters.
+    collection_name (str): 集合名称。
+    data (List[dict]): 文档切片数据列表。
+
+Returns:
+    bool: 操作成功返回 True，否则 False。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.delete', '''
+Delete an entire collection or specific records.
 
-add_chinese_doc('rag.store.ChromadbStore.get_nodes', '''
-根据条件查询节点。
 Args:
-    group_name(str]):节点所属的组名。
-    uids(List[str]):节点唯一 ID 列表。
-    doc_ids	(Set[str])：文档 ID 集合。
-    **kwargs:其他扩展参数。
+    collection_name (str): Name of the collection.
+    criteria (Optional[dict]): If None, drop the collection. Otherwise, filter dict to delete matching records (e.x. delete by doc_id/uid/kb_id).
+
+Returns:
+    bool: True if deletion succeeds, False otherwise.
 ''')
 
+add_chinese_doc('rag.store.ChromadbStore.delete', '''
+删除整个集合或指定记录。
 
-add_english_doc('rag.store.ChromadbStore.activate_group', '''
-Activate the specified group.
 Args:
-    group_names([str, List[str]]): Activate by group name.
+    collection_name (str): 集合名称。
+    criteria (Optional[dict]): 若为 None，删除整个集合；否则按条件删除匹配记录（例如按照切片id、切片所属文件id、切片所属知识库id删除）。
+
+Returns:
+    bool: 删除成功返回 True，否则 False。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.get', '''
+Retrieve records matching criteria.
 
-add_chinese_doc('rag.store.ChromadbStore.activate_group', '''
-激活指定的组。
 Args:
-    group_names([str, List[str]])：按组名激活。
-''')
+    collection_name (str): Name of the collection.
+    criteria (Optional[dict]): Filter conditions such as primary key or metadata (docid/kb_id).
 
-add_english_doc('rag.store.ChromadbStore.activated_groups', '''
-Activate groups. Return the list of currently activated group names.
+Returns:
+    List[dict]: Each dict contains 'uid' and 'embedding'.
 ''')
 
+add_chinese_doc('rag.store.ChromadbStore.get', '''
+根据条件检索记录。
 
-add_chinese_doc('rag.store.ChromadbStore.activated_groups', '''
-激活组，返回当前激活的组名列表。
-''')
-add_english_doc('rag.store.ChromadbStore.query', '''
-Execute a query using the default index.
 Args:
-    args: Query parameters.
-    kwargs: Additional optional parameters.
+    collection_name (str): 集合名称。
+    criteria (Optional[dict]): 过滤条件，如主键或元数据（例如文档id或知识库id）。
+
+Returns:
+    List[dict]: 每项包含 'uid' 和 'embedding'。
 ''')
 
+add_english_doc('rag.store.ChromadbStore.search', '''
+Perform a vector similarity search.
 
-add_chinese_doc('rag.store.ChromadbStore.query', '''
-通过默认索引执行查询。
 Args:
-    args：查询参数。
-    kwargs：其他扩展参数。
-''')
+    collection_name (str): Collection to query.
+    query_embedding (List[float]): Vector to search with.
+    embed_key (str): Which embedding to use.
+    topk (int): Number of top results to return.
+    filters (Optional[Dict[str, Union[str, int, List, Set]]]): Metadata filter conditions.
 
-add_english_doc('rag.store.ChromadbStore.is_group_active', '''
-Check whether the specified group is active.
-Args:
-    name (str): The name of the group.
+Returns:
+    List[dict]: Each dict has 'uid' and 'score' (similarity).
 ''')
 
-add_chinese_doc('rag.store.ChromadbStore.is_group_active', '''
-检查指定组是否激活。
-Args:
-    name(str)：组名。
-''')
+add_chinese_doc('rag.store.ChromadbStore.search', '''
+执行向量相似度检索。
 
+Args:
+    collection_name (str): 要查询的集合名称。
+    query_embedding (List[float]): 用于检索的向量。
+    embed_key (str): 使用的向量模型的key。
+    topk (int): 返回的结果数量。
+    filters (Optional[Dict[str, Union[str, int, List, Set]]]): 元数据过滤条件。
 
-add_english_doc('rag.store.ChromadbStore.all_groups', '''
-Return the list of all group names.
+Returns:
+    List[dict]: 每项包含 'uid' 及 'score'（相似度）。
 ''')
 
+add_english_doc('rag.store.MilvusStore', '''
+Vector store implementation based on Milvus, inheriting from StoreBase. Supports vector insertion, deletion, flexible querying (including scalar filtering).
 
-add_chinese_doc('rag.store.ChromadbStore.all_groups', '''
-返回所有组名列表。
-''')
-
-add_english_doc('rag.store.ChromadbStore.register_index', '''
-Register a custom index.
 Args:
-    type (str): The name of the index type.
-    index (IndexBase): An object implementing the IndexBase interface.
+    uri (str): Milvus connection URI (e.g., "tcp://localhost:19530"). If scheme is local file path, uses milvus-lite version; otherwise remote (need to set up a milvus service, e.x. standalone/distributed version).
+    db_name (str): Database name to use in Milvus. Defaults to "lazyllm".
+    index_kwargs (Optional[Union[Dict, List]]): Index creation parameters (e.g., {"index_type": "IVF_FLAT", "metric_type": "COSINE"} or a list of per-embed-key configs).
+    client_kwargs (Optional[Dict]): Additional keyword arguments for milvus client.
 ''')
 
+add_chinese_doc('rag.store.MilvusStore', '''
+基于 Milvus 的向量存储实现，继承自 StoreBase。支持向量写入、删除、相似度检索，兼容标量过滤。
 
-add_chinese_doc('rag.store.ChromadbStore.register_index', '''
-注册自定义索引。
 Args:
-    type(str):索引类型名称。
-    index(IndexBase):实现 IndexBase 的对象。
+    uri (str): Milvus 连接 URI（如 "tcp://localhost:19530"）。如果为本地路径则使用milvus-lite，否则为远程模式（需要独立部署milvus服务，例如standalone/distributed版本）。
+    db_name (str): Milvus 中使用的数据库名称，默认为 "lazyllm"。
+    index_kwargs (Optional[Union[Dict, List]]): 索引创建参数（例如 {"index_type": "IVF_FLAT", "metric_type": "CONSINE"} ，支持按向量模型的key配置列表）。
+    client_kwargs (Optional[Dict]): 传递给 milvus 客户端的额外参数。
 ''')
 
+add_english_doc('rag.store.MilvusStore.dir', '''
+Local storage directory derived from URI if running embedded. Returns None when using remote Milvus.
 
-add_english_doc('rag.store.ChromadbStore.get_index', '''
-Get the index of the specified type.
-Args:
-    type (str): The type of the index.
+Returns:
+    Optional[str]: Directory path for local milvus.db file, or None if remote.
 ''')
 
+add_chinese_doc('rag.store.MilvusStore.dir', '''
+存储目录属性，基于 URI 推断。远程模式返回 None。
 
-add_chinese_doc('rag.store.ChromadbStore.get_index', '''
-获取指定类型的索引。
-Args:
-    type(str):索引类型
+Returns:
+    Optional[str]: 本地 milvus.db 文件的目录路径，或 None。
 ''')
 
+add_english_doc('rag.store.MilvusStore.connect', '''
+Initialize Milvus client, pass in embedding model parameters and global metadata descriptions.
 
-add_english_doc('rag.store.ChromadbStore.clear_cache', '''
-Clear the ChromaDB collections and memory cache for specified groups or all groups.
 Args:
-    group_names (List[str]): List of group names. If None, clear all groups.
+    embed_dims (Dict[str, int]): Embedding dimensions per embed key.
+    embed_datatypes (Dict[str, DataType]): Data types for each embed key.
+    global_metadata_desc (Dict[str, GlobalMetadataDesc]): Descriptions for metadata fields.
 ''')
 
+add_chinese_doc('rag.store.MilvusStore.connect', '''
+初始化 Milvus 客户端，传入向量化模型参数和全局元数据描述。
 
-add_chinese_doc('rag.store.ChromadbStore.clear_cache', '''
-清除指定组或所有组的 ChromaDB 集合和内存缓存。
 Args:
-    group_names(List[str])：组名列表，为 None 时清除所有组。
+    embed_dims (Dict[str, int]): 每个嵌入键对应的向量维度。
+    embed_datatypes (Dict[str, DataType]): 每个嵌入键的数据类型。
+    global_metadata_desc (Dict[str, GlobalMetadataDesc]): 全局元数据字段的描述。
 ''')
 
+add_english_doc('rag.store.MilvusStore.upsert', '''
+Insert or update a batch of segment data into the Milvus collection.
 
-
-
-
-add_english_doc('rag.store.MilvusStore', '''
-Inherits from the StoreBase abstract base class. Implements a vector database based on Milvus. Its functionality is similar to ChromadbStore, used for storing, managing, indexing, and querying embedded document nodes (DocNode).
 Args:
-    group_embed_keys (Dict[str, Set[str]]): Specifies the embedding fields for each group.
-    embed (Dict[str, Callable]): Embedding functions for each field.
-    embed_dims (Dict[str, int]): Vector dimensions for each embedding field.
-    embed_datatypes (Dict[str, DataType]): Vector types for each embedding field (must comply with Milvus types).
-    global_metadata_desc (Dict[str, GlobalMetadataDesc]): Description of global metadata fields, used to configure other non-vector fields in Milvus.
-    url (str): Milvus connection address, supporting local or remote connections.
-    index_kwargs (Union[Dict, List]): Optional index parameters for creating Milvus vector indexes, such as IVF, HNSW parameters.
-    db_name (str): Optional, defaults to "lazyllm". Represents the database name in Milvus.
+    collection_name (str): Collection name (per embed key grouping).
+    data (List[dict]): List of segment data.
+Returns:
+    bool: True if successful, False otherwise.
 ''')
 
+add_chinese_doc('rag.store.MilvusStore.upsert', '''
+批量写入或更新切片数据到 Milvus 集合。
 
-add_chinese_doc('rag.store.MilvusStore', '''
-继承自 StoreBase 抽象基类。基于 Milvus 向量数据库实现。其功能和 ChromadbStore 类似, 用于存储、管理、索引和查询嵌入向量化后的文档节点(DocNode)。
 Args:
-    group_embed_keys (Dict[str, Set[str]]): 指定每个group所对应的嵌入字段。
-    embed (Dict[str, Callable]): 每种字段对应的 embedding 函数.
-    embed_dims (Dict[str, int]): 每个嵌入字段的向量维度。
-    embed_datatypes(Dict[str, DataType]): 每个嵌入字段的向量类型（需符合 Milvus 类型）。
-    global_metadata_descDict([str, GlobalMetadataDesc])：全局元数据字段的说明，用于配置 Milvus 中的其他非向量字段。
-    url(str):Milvus 的连接地址，支持本地或远程。
-    index_kwargs:([Union[Dict, List]]):可选的索引参数，用于创建 Milvus 的向量索引，例如 IVF、HNSW 参数。
-    db_name(str):可选，默认 "lazyllm"。表示 Milvus 中的数据库名。
+    collection_name (str): 集合名称，通常为 "group_embedKey" 格式。
+    data (List[dict]): 切片数据列表。
+Returns:
+    bool: 操作成功返回 True，否则 False。
 ''')
 
-add_example('rag.store.MilvusStore', '''
->>> from lazyllm.tools.rag.milvus_store import MilvusStore
->>> from typing import Dict, List
->>> import numpy as np
->>> store = MilvusStore(
-...     group_embed_keys={
-...         "articles": {"text"},
-...         "faqs": {"question"}
-...     },
-...     embed={
-...         "text": lambda x: np.random.rand(128).tolist(),
-...         "question": lambda x: np.random.rand(128).tolist()
-...     },
-...     embed_dims={"text": 128, "question": 128},
-...     embed_datatypes={"text": DataType.FLOAT_VECTOR, "question": DataType.FLOAT_VECTOR},
-...     global_metadata_desc=None,
-...     uri="http://localhost:19530",
-...     index_kwargs={"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}},
-...     db_name="test_db"
-... )
->>> store.update_nodes([node1, node2])
->>> results = store.query(query_text="文档内容", group_name="articles", top_k=2)
->>> for node in results:
-...     print(f"找到文档: {node._content[:20]}...")
->>> store.remove_nodes(doc_ids=["doc1"])
-''')
+add_english_doc('rag.store.MilvusStore.delete', '''
+Delete entire collection or subset of records by criteria.
 
-add_english_doc('rag.store.MilvusStore.update_nodes', '''
-Update or insert nodes into Milvus collections and memory store.
 Args:
-    nodes (List[DocNode]): List of document nodes to update.
+    collection_name (str): Target collection.
+    criteria (Optional[dict]): If None, drop the entire collection; otherwise a dict of filters (uid list or metadata conditions).
+Returns:
+    bool: True if deletion succeeds, False otherwise.
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.update_nodes', '''
-更新或插入节点到 Milvus 集合和内存存储中。
-Args:
-    nodes (List[DocNode]): 需要更新的文档节点列表。
-''')
+add_chinese_doc('rag.store.MilvusStore.delete', '''
+删除整个集合或按条件删除指定记录。
 
-add_english_doc('rag.store.MilvusStore.update_doc_meta', '''
-Update metadata for a document and sync to all related nodes.
 Args:
-    doc_id (str): Target document ID.
-    metadata (dict): New metadata key-value pairs.
+    collection_name (str): 目标集合名称。
+    criteria (Optional[dict]): 若为 None 则删除整个集合；否则按 uid 列表或元数据条件过滤。
+Returns:
+    bool: 删除成功返回 True，否则 False。
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.update_doc_meta', '''
-更新文档元数据并同步到所有关联节点。
-Args:
-    doc_id (str): 目标文档ID。
-    metadata (dict): 新的元数据键值对。
-''')
+add_english_doc('rag.store.MilvusStore.get', '''
+Retrieve records matching primary-key or metadata filters.
 
-add_english_doc('rag.store.MilvusStore.remove_nodes', '''
-Remove nodes by document IDs, group name, or node UIDs.
 Args:
-    doc_ids (Optional[List[str]]): Document IDs filter.
-    group_name (Optional[str]): Group name filter.
-    uids (Optional[List[str]]): Node UIDs filter.
+    collection_name (str): Collection to query.
+    criteria (Optional[dict]): Dict containing 'uid' list or metadata field filters.
+Returns: 
+    List[dict]: Each entry contains 'uid' and 'embedding'.
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.remove_nodes', '''
-通过文档ID、组名或节点UID删除节点。
-Args:
-    doc_ids (Optional[List[str]]): 文档ID过滤条件。
-    group_name (Optional[str]): 组名过滤条件。
-    uids (Optional[List[str]]): 节点UID过滤条件。
-''')
-add_english_doc('rag.store.MilvusStore.get_nodes', '''
-Query nodes with flexible filtering options.
+add_chinese_doc('rag.store.MilvusStore.get', '''
+检索匹配主键或元数据过滤条件的记录。
+
 Args:
-    group_name (Optional[str]): Group name filter.
-    uids (Optional[List[str]]): Node UIDs filter.
-    doc_ids (Optional[Set[str]]): Document IDs filter.
-    **kwargs: Additional query parameters.
+    collection_name (str): 待查询集合。
+    criteria (Optional[dict]): 包含 'uid' 列表或元数据字段过滤条件。
 Returns:
-    List[DocNode]: Matched document nodes.
+    List[dict]: 每项包含 'uid' 及 'embedding' 映射。
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.get_nodes', '''
-通过多条件查询节点。
+add_english_doc('rag.store.MilvusStore.search', '''
+Perform vector similarity search with optional metadata filtering.
+
 Args:
-    group_name (Optional[str]): 组名过滤条件。
-    uids (Optional[List[str]]): 节点UID过滤条件。
-    doc_ids (Optional[Set[str]]): 文档ID过滤条件。
-    **kwargs: 其他查询参数。
+    collection_name (str): Collection to search.
+    query_embedding (List[float]): Query vector.
+    topk (int): Number of nearest neighbors.
+    filters (Optional[Dict[str, Union[List, Set]]]): Metadata filter map.
+    embed_key (str): Which embedding field to use.
 Returns:
-    List[DocNode]: 匹配的文档节点列表。
+    List[dict]: Each dict has 'uid' and similarity 'score'.
 ''')
 
-add_english_doc('rag.store.MilvusStore.query', '''
-Semantic search with vector similarity.
+add_chinese_doc('rag.store.MilvusStore.search', '''
+执行向量相似度检索，并可按元数据过滤。
+
 Args:
-    query (str): Query text.
-    group_name (str): Target group name.
-    similarity_cut_off (Optional[Union[float, Dict[str, float]]]): Similarity threshold.
-    topk (int): Number of results to return.
-    embed_keys (List[str]): Embedding keys for search.
-    filters (Optional[Dict]): Metadata filters.
+    collection_name (str): 待搜索集合。
+    query_embedding (List[float]): 查询向量。
+    topk (int): 返回邻近数量。
+    filters (Optional[Dict[str, Union[List, Set]]]): 元数据过滤映射。
+    embed_key (str): 使用的嵌入字段。
 Returns:
-    List[DocNode]: Nodes with similarity scores.
+    List[dict]: 每项包含 'uid' 及相似度 'score'。
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.query', '''
-基于向量相似度的语义搜索。
+add_chinese_doc('rag.default_index.DefaultIndex', r'''\ 
+默认的索引实现，负责通过 embedding 和文本相似度在底层存储中查询、更新和删除文档节点。支持多种相似度度量方式，并在必要时对查询和节点进行 embedding 计算与更新。
+
 Args:
-    query (str): 查询文本。
-    group_name (str): 目标组名。
-    similarity_cut_off (Optional[Union[float, Dict[str, float]]): 相似度阈值。
-    topk (int): 返回结果数量。
-    embed_keys (List[str]): 用于搜索的嵌入键。
-    filters (Optional[Dict]): 元数据过滤条件。
-Returns:
-    List[DocNode]: 带相似度分数的节点列表。
+    embed (Dict[str, Callable]): 用于生成查询和节点 embedding 的字典，key 是 embedding 名称，value 是接收字符串返回向量的函数。
+    store (StoreBase): 底层存储，用于持久化和检索 DocNode 节点。
+    **kwargs: 预留扩展参数。
 ''')
 
-add_english_doc('rag.store.MilvusStore.activate_group', '''
-Activate one or multiple groups for operations.
+add_english_doc('rag.default_index.DefaultIndex', '''\
+Default index implementation responsible for querying, updating, and removing document nodes in the underlying store using embedding or text similarity. Supports multiple similarity metrics and performs embedding computation and node updates when needed.
+
 Args:
-    group_names (Union[str, List[str]]): Group name(s) to activate.
+    embed (Dict[str, Callable]): Mapping of embedding names to functions that generate vector representations from strings.
+    store (StoreBase): Underlying storage to persist and retrieve DocNode objects.
+    **kwargs: Reserved for future extension.
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.activate_group', '''
-激活一个或多个组用于后续操作。
+add_chinese_doc('rag.default_index.DefaultIndex.update', r'''\ 
+根据提供的节点列表更新索引中的内容。具体行为由子类或外部实现填充（此处为空实现，需在实际使用中覆盖/扩展）。
+
 Args:
-    group_names (Union[str, List[str]]): 要激活的组名（单个或列表）。
+    nodes (List[DocNode]): 需要更新（新增或替换）的文档节点列表。
 ''')
 
-add_english_doc('rag.store.MilvusStore.get_index', '''
-Get index instance by type.
+add_english_doc('rag.default_index.DefaultIndex.update', '''\
+Update the index with the given list of document nodes. This is a placeholder implementation and should be provided/extended in concrete usage.
+
 Args:
-    type (Optional[str]): Index type name, defaults to "default".
+    nodes (List[DocNode]): Document nodes to add or update in the index.
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.get_index', '''
-获取指定类型的索引实例。
+add_chinese_doc('rag.default_index.DefaultIndex.remove', r'''\ 
+从索引中删除指定 UID 的节点，可选指定分组名称以限定作用域。当前为空实现，使用时需要补全逻辑。
+
 Args:
-    type (Optional[str]): 索引类型名称，默认为"default"。
+    uids (List[str]): 要删除的节点唯一标识列表。
+    group_name (Optional[str]): 可选的分组名称，用于限定删除范围。
 ''')
 
-add_english_doc('rag.store.MilvusStore.register_index', '''
-Register custom index type.
+add_english_doc('rag.default_index.DefaultIndex.remove', '''\
+Remove nodes with specified UIDs from the index. Optionally scoped to a group. This is a no-op placeholder and should be implemented in concrete usage.
+
 Args:
-    type (str): Index type name.
-    index (IndexBase): Custom index instance.
+    uids (List[str]): List of unique IDs of nodes to remove.
+    group_name (Optional[str]): Optional group name to scope the removal.
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.register_index', '''
-注册自定义索引类型。
+add_chinese_doc('rag.default_index.DefaultIndex.query', r'''\ 
+执行一次查询，支持 embedding 和文本两种模式，依据相似度函数过滤并返回符合条件的 DocNode 结果。
+
 Args:
-    type (str): 索引类型名称。
-    index (IndexBase): 自定义索引实例。
-''')
+    query (str): 原始查询文本。
+    group_name (str): 要检索的节点组名称。
+    similarity_name (str): 使用的相似度度量名称，必须在 registered_similarities 中注册。
+    similarity_cut_off (Union[float, Dict[str, float]]): 相似度阈值或每个 embedding 对应的阈值字典，用于过滤结果。
+    topk (int): 每个相似度渠道最多保留的候选数量。
+    embed_keys (Optional[List[str]]): 指定用于 embedding 的 key 列表，若为空则使用所有可用 embedding。
+    filters (Optional[Dict[str, List]]): 额外的节点过滤器，应用在计算相似度前。
+    **kwargs: 传递给相似度函数的额外参数。
 
-add_english_doc('rag.store.MilvusStore.activated_groups', '''
-Get names of all activated groups.
-Returns:
-    List[str]: Active group names.
+**Returns**\n
+    - list: List[DocNode]: 经过相似度计算与阈值过滤后去重的文档节点列表。
 ''')
 
-add_chinese_doc('rag.store.MilvusStore.activated_groups', '''
-获取所有已激活的组名。
-Returns:
-    List[str]: 活跃组名列表。
-''')
+add_english_doc('rag.default_index.DefaultIndex.query', '''\
+Perform a query against the index, supporting both embedding-based and text-based similarity modes. Filters and ranks nodes according to similarity functions and cutoffs.
 
-add_english_doc('rag.store.MilvusStore.is_group_active', '''
-Check if a group is activated.
 Args:
-    name (str): Group name to check.
-''')
+    query (str): The raw query string.
+    group_name (str): The group name from which to retrieve nodes.
+    similarity_name (str): Name of the similarity metric to use; must be registered in registered_similarities.
+    similarity_cut_off (Union[float, Dict[str, float]]): Similarity threshold(s) used to filter results; can be a single float or a mapping per embedding.
+    topk (int): Maximum number of candidates to keep per similarity channel before final filtering.
+    embed_keys (Optional[List[str]]): Specific embedding keys to use; defaults to all available if not provided.
+    filters (Optional[Dict[str, List]]): Additional pre-filters applied to nodes before similarity computation.
+    **kwargs: Extra keyword arguments forwarded to the similarity function.
 
-add_chinese_doc('rag.store.MilvusStore.is_group_active', '''
-检查指定组是否激活。
-Args:
-    name (str): 要检查的组名。
+**Returns**\n
+    - list: List[DocNode]: Deduplicated list of document nodes passing similarity and cutoff criteria.
 ''')
 
+
 # ---------------------------------------------------------------------------- #
 
 # rag/rerank.py
@@ -1296,6 +1576,7 @@ def my_reranker(node: DocNode, **kwargs):
 
 add_english_doc('rag.retriever.TempDocRetriever', '''
 A temporary document retriever that inherits from ModuleBase and _PostProcess, used for quickly processing temporary files and performing retrieval tasks.
+
 Args:
     embed: The embedding function.
     output_format: The format of the output result (e.g., JSON). Optional, defaults to None.
@@ -1304,6 +1585,7 @@ def my_reranker(node: DocNode, **kwargs):
 
 add_chinese_doc('rag.retriever.TempDocRetriever', '''
 临时文档检索器，继承自 ModuleBase 和 _PostProcess，用于快速处理临时文件并执行检索任务。
+
 Args:
     embed:嵌入函数。
     output_format:结果输出格式(如json),可选默认为None
@@ -1323,6 +1605,7 @@ def my_reranker(node: DocNode, **kwargs):
 
 add_english_doc('rag.retriever.TempDocRetriever.create_node_group', '''
 Create a node group with specific processing pipeline.
+
 Args:
     name (str): Name of the node group. Auto-generated if None.
     transform (Callable): Function to process documents in this group.
@@ -1334,6 +1617,7 @@ def my_reranker(node: DocNode, **kwargs):
 
 add_chinese_doc('rag.retriever.TempDocRetriever.create_node_group', '''
 创建具有特定处理流程的节点组。
+
 Args:
     name (str): 节点组名称，None时自动生成。
     transform (Callable): 该组文档的处理函数。
@@ -1345,39 +1629,81 @@ def my_reranker(node: DocNode, **kwargs):
 
 add_english_doc('rag.retriever.TempDocRetriever.add_subretriever', '''
 Add a sub-retriever with search configuration.
+
 Args:
     group (str): Target node group name.
     **kwargs: Retriever parameters (e.g., similarity='cosine').
-Returns:
-    self: For method chaining.
+
+**Returns:**\n
+- self: For method chaining.
 ''')
 
 add_chinese_doc('rag.retriever.TempDocRetriever.add_subretriever', '''
 添加带搜索配置的子检索器。
+
 Args:
     group (str): 目标节点组名称。
     **kwargs: 检索器参数（如similarity='cosine'）。
-Returns:
-    self: 支持链式调用。
+
+**Returns:**\n
+- self: 支持链式调用。
 ''')
 
-add_english_doc('rag.doc_node.DocNode', '''
-Execute assigned tasks on the specified document.
+add_chinese_doc('rag.document.UrlDocument', '''\
+UrlDocument类继承自ModuleBase，用于通过指定的URL和名称管理远程文档资源。  
+内部通过lazyllm的UrlModule代理实际调用，支持文档查找、检索和活跃节点分组查询。  
+
 Args:
-    uid (str): Unique identifier.
-    content (Union[str, List[Any]]): Node content.
-    group (str): Document group name.
-    embedding (Dict[str, List[float]]): Dictionary of embedding vectors.
-    parent (Union[str, "DocNode"]): Reference to the parent node.
-    store: Storage representation.
-    node_groups (Dict[str, Dict]): Node storage groups.
-    metadata (Dict[str, Any]): Node-level metadata.
-    global_metadata (Dict[str, Any]): Document-level metadata.
-    text (str): Node content, mutually exclusive with content.
+    url (str): 远程文档资源的访问URL。
+    name (str): 当前文档分组名称，用于标识文档分组。
 ''')
 
-add_chinese_doc('rag.doc_node.DocNode', '''
-在指定的文档上执行设定的任务。
+add_english_doc('rag.document.UrlDocument', '''\
+UrlDocument class inherits from ModuleBase, used to manage remote document resources by specifying a URL and a name.  
+Internally delegates calls to lazyllm's UrlModule, supporting document find, retrieve, and querying active node groups.
+
+Args:
+    url (str): Access URL for the remote document resource.
+    name (str): Current document group name used to identify the document group.
+''')
+
+add_chinese_doc('rag.document.UrlDocument.find', '''\
+生成一个部分应用函数，用于在当前文档组中查找指定目标。
+
+Args:
+    target (str): 需要查找的目标标识。
+
+**Returns:**\n
+- Callable: 调用时会执行查找操作的部分应用函数。
+''')
+
+add_english_doc('rag.document.UrlDocument.find', '''\
+Creates a partially applied function to find a specified target within the current document group.
+
+Args:
+    target (str): The target identifier to find.
+
+**Returns:**\n
+- Callable: A partially applied function that executes the find operation when called.
+''')
+
+add_english_doc('rag.doc_node.DocNode', '''
+Execute assigned tasks on the specified document.
+Args:
+    uid (str): Unique identifier.
+    content (Union[str, List[Any]]): Node content.
+    group (str): Document group name.
+    embedding (Dict[str, List[float]]): Dictionary of embedding vectors.
+    parent (Union[str, "DocNode"]): Reference to the parent node.
+    store: Storage representation.
+    node_groups (Dict[str, Dict]): Node storage groups.
+    metadata (Dict[str, Any]): Node-level metadata.
+    global_metadata (Dict[str, Any]): Document-level metadata.
+    text (str): Node content, mutually exclusive with content.
+''')
+
+add_chinese_doc('rag.doc_node.DocNode', '''
+在指定的文档上执行设定的任务。
 Args:
     uid(str): 唯一标识符。
     content(Union[str, List[Any]]):节点内容
@@ -1482,6 +1808,138 @@ def my_reranker(node: DocNode, **kwargs):
     score: 相似度得分
 ''')
 
+add_chinese_doc('rag.doc_processor.DocumentProcessor', """
+文档处理器类，用于管理文档的添加、删除和更新操作。
+
+Args:
+    server (bool): 是否以服务器模式运行。默认为True。
+    port (Optional[int]): 服务器端口号。默认为None。
+    url (Optional[str]): 远程服务URL。默认为None。
+
+**说明:**
+- 支持异步处理文档任务
+- 提供文档元数据更新功能
+- 支持任务状态回调通知
+- 可配置数据库存储
+""")
+
+add_english_doc('rag.doc_processor.DocumentProcessor', """
+Document processor class for managing document addition, deletion and update operations.
+
+Args:
+    server (bool): Whether to run in server mode. Defaults to True.
+    port (Optional[int]): Server port number. Defaults to None.
+    url (Optional[str]): Remote service URL. Defaults to None.
+
+**Notes:**
+- Supports asynchronous document task processing
+- Provides document metadata update functionality
+- Supports task status callback notifications
+- Configurable database storage
+""")
+
+add_example('rag.doc_processor.DocumentProcessor', """
+```python
+# Create local document processor
+processor = DocumentProcessor(server=False)
+
+# Create server mode document processor
+processor = DocumentProcessor(server=True, port=8080)
+
+# Create remote document processor
+processor = DocumentProcessor(url="http://remote-server:8080")
+```
+""")
+
+add_chinese_doc('rag.doc_processor.DocumentProcessor.register_algorithm', """
+注册算法到文档处理器。
+
+Args:
+    name (str): 算法名称，作为唯一标识符。
+    store (StoreBase): 存储实例，用于管理文档数据。
+    reader (ReaderBase): 读取器实例，用于解析文档内容。
+    node_groups (Dict[str, Dict]): 节点组配置信息。
+    force_refresh (bool): 是否强制刷新已存在的算法。默认为False。
+
+**说明:**
+- 如果算法名称已存在且force_refresh为False，将跳过注册
+- 注册成功后可以使用该算法处理文档
+""")
+
+add_english_doc('rag.doc_processor.DocumentProcessor.register_algorithm', """
+Register an algorithm to the document processor.
+
+Args:
+    name (str): Algorithm name as unique identifier.
+    store (StoreBase): Storage instance for managing document data.
+    reader (ReaderBase): Reader instance for parsing document content.
+    node_groups (Dict[str, Dict]): Node group configuration information.
+    force_refresh (bool): Whether to force refresh existing algorithm. Defaults to False.
+
+**Notes:**
+- If algorithm name exists and force_refresh is False, registration will be skipped
+- After successful registration, the algorithm can be used to process documents
+""")
+
+add_example('rag.doc_processor.DocumentProcessor.register_algorithm', """
+```python
+from lazyllm.rag import DocumentProcessor, FileStore, PDFReader
+
+# Create storage and reader instances
+store = FileStore(path="./data")
+reader = PDFReader()
+
+# Define node group configuration
+node_groups = {
+    "text": {"transform": "text", "parent": "root"},
+    "summary": {"transform": "summary", "parent": "text"}
+}
+
+# Register algorithm
+processor = DocumentProcessor()
+processor.register_algorithm(
+    name="pdf_processor",
+    store=store,
+    reader=reader,
+    node_groups=node_groups
+)
+```
+""")
+
+add_chinese_doc('rag.doc_processor.DocumentProcessor.drop_algorithm', """
+从文档处理器中移除指定算法。
+
+Args:
+    name (str): 要移除的算法名称。
+    clean_db (bool): 是否清理相关数据库数据。默认为False。
+
+**说明:**
+- 如果算法名称不存在，将输出警告信息
+- 移除后该算法将无法继续使用
+""")
+
+add_english_doc('rag.doc_processor.DocumentProcessor.drop_algorithm', """
+Remove specified algorithm from document processor.
+
+Args:
+    name (str): Name of the algorithm to remove.
+    clean_db (bool): Whether to clean related database data. Defaults to False.
+
+**Notes:**
+- If algorithm name does not exist, a warning message will be output
+- After removal, the algorithm will no longer be available
+""")
+
+add_example('rag.doc_processor.DocumentProcessor.drop_algorithm', """
+```python
+# Remove algorithm
+processor.drop_algorithm("pdf_processor")
+
+# Remove algorithm and clean database
+processor.drop_algorithm("pdf_processor", clean_db=True)
+```
+""")
+
 add_english_doc('rag.dataReader.SimpleDirectoryReader', '''
 A modular document directory reader that inherits from ModuleBase, supporting reading various document formats from the file system and converting them into standardized DocNode objects.
 Args:
@@ -1525,6 +1983,48 @@ def my_reranker(node: DocNode, **kwargs):
 >>> documents = reader.load_data()
 ''')
 
+add_chinese_doc('rag.dataReader.SimpleDirectoryReader.load_file', '''\
+load_file(input_file, metadata_genf, file_extractor, encoding='utf-8', pathm=Path, fs=None, metadata=None)
+
+使用指定的 Reader 将单个文件加载为 `DocNode` 列表。
+
+该方法会根据文件名匹配合适的读取器（reader），并遵循以下优先级生成元数据：
+`用户提供 > reader 自动生成 > metadata_genf 生成`。支持自定义文件读取器，同时在配置允许的情况下支持回退到原始文本读取。
+
+参数说明：
+- input_file (Path): 要读取的文件路径。
+- metadata_genf (Callable): 用于根据路径生成元数据的函数。
+- file_extractor (Dict[str, Callable]): 文件扩展名与 reader 的映射表。
+- encoding (str): 文件读取时使用的文本编码，默认为 "utf-8"。
+- pathm (PurePath): 路径处理模块，适用于本地或远程路径。
+- fs (AbstractFileSystem): 可选的文件系统对象，支持 fsspec 抽象。
+- metadata (Dict): 可选的用户自定义元数据，优先于自动生成。
+
+返回：
+- List[DocNode]: 从文件中提取的文档对象列表。
+''')
+
+add_english_doc('rag.dataReader.SimpleDirectoryReader.load_file', '''\
+load_file(input_file, metadata_genf, file_extractor, encoding='utf-8', pathm=Path, fs=None, metadata=None)
+
+Load a single file into a list of `DocNode` objects using the appropriate reader.
+
+This method supports automatic reader selection based on file extension patterns, and applies a priority order to metadata:
+`user > reader > metadata_genf`. It supports both default and user-supplied readers and can fall back to raw text decoding
+if enabled in config.
+
+Parameters:
+- input_file (Path): Path to the input file.
+- metadata_genf (Callable): Function to generate metadata from file path.
+- file_extractor (Dict[str, Callable]): Mapping of file extension patterns to reader callables.
+- encoding (str): Text encoding to use when reading files. Default is "utf-8".
+- pathm (PurePath): Path handling module to support local or remote paths.
+- fs (AbstractFileSystem): Optional filesystem abstraction from fsspec.
+- metadata (Dict): Optional user-defined metadata to override reader-generated data.
+
+Returns:
+- List[DocNode]: List of parsed documents extracted from the file.
+''')
 
 add_english_doc('rag.dataReader.FileReader', '''
 File content reader whose main function is to convert various input file formats into concatenated plain text content.
@@ -1545,6 +2045,111 @@ def my_reranker(node: DocNode, **kwargs):
 >>> content = reader("yourpath/") 
 ''')
 
+add_chinese_doc('rag.readers.readerBase.LazyLLMReaderBase', '''
+基础文档读取器类，提供了文档加载的基本接口。继承自ModuleBase，使用LazyLLMRegisterMetaClass作为元类。
+
+Args:
+    return_trace (bool): 是否返回处理过程的追踪信息。默认为True。
+
+**说明:**
+- 提供了惰性加载和普通加载两种方式
+- 子类需要实现_lazy_load_data方法
+- 支持批量处理文档
+- 自动转换为标准化的DocNode格式
+''')
+
+add_english_doc('rag.readers.readerBase.LazyLLMReaderBase', '''
+Base document reader class that provides basic interfaces for document loading. Inherits from ModuleBase and uses LazyLLMRegisterMetaClass as metaclass.
+
+Args:
+    return_trace (bool): Whether to return processing trace information. Defaults to True.
+
+**Notes:**
+- Provides both lazy loading and regular loading methods
+- Subclasses need to implement _lazy_load_data method
+- Supports batch document processing
+- Automatically converts to standardized DocNode format
+''')
+
+add_example('rag.readers.readerBase.LazyLLMReaderBase', '''
+```python
+from lazyllm.tools.rag.readers.readerBase import LazyLLMReaderBase
+from lazyllm.tools.rag.doc_node import DocNode
+from typing import Iterable
+
+class CustomReader(LazyLLMReaderBase):
+    def _lazy_load_data(self, file_paths: list, **kwargs) -> Iterable[DocNode]:
+        for file_path in file_paths:
+            # Process each file and yield DocNode
+            content = self._read_file(file_path)
+            yield DocNode(
+                text=content,
+                metadata={"source": file_path}
+            )
+
+# Create reader instance
+reader = CustomReader(return_trace=True)
+
+# Load documents
+documents = reader.forward(file_paths=["doc1.txt", "doc2.txt"])
+```
+''')
+
+
+add_chinese_doc('rag.doc_node.QADocNode', '''\
+问答文档节点类，用于存储问答对数据。
+
+参数:
+    query (str): 问题文本。
+    answer (str): 答案文本。
+    uid (str): 唯一标识符。
+    group (str): 文档组名。
+    embedding (Dict[str, List[float]]): 嵌入向量字典。
+    parent (DocNode): 父节点引用。
+    metadata (Dict[str, Any]): 节点级元数据。
+    global_metadata (Dict[str, Any]): 文档级元数据。
+    text (str): 节点内容，与query互斥。
+''')
+
+add_english_doc('rag.doc_node.QADocNode', '''\
+Question-Answer document node class for storing QA pair data.
+
+Args:
+    query (str): The question text.
+    answer (str): The answer text.
+    uid (str): Unique identifier.
+    group (str): Document group name.
+    embedding (Dict[str, List[float]]): Dictionary of embedding vectors.
+    parent (DocNode): Reference to the parent node.
+    metadata (Dict[str, Any]): Node-level metadata.
+    global_metadata (Dict[str, Any]): Document-level metadata.
+    text (str): Node content, mutually exclusive with query.
+''')
+
+add_chinese_doc('rag.doc_node.QADocNode.get_text', '''\
+获取节点的文本内容。
+
+参数:
+    metadata_mode (MetadataMode): 元数据模式，默认为MetadataMode.NONE。
+        当设置为MetadataMode.LLM时，返回格式化的问答对。
+        其他模式下返回基类的文本格式。
+
+返回值:
+    str: 格式化后的文本内容。
+''')
+
+add_english_doc('rag.doc_node.QADocNode.get_text', '''\
+Get the text content of the node.
+
+Args:
+    metadata_mode (MetadataMode): Metadata mode, defaults to MetadataMode.NONE.
+        When set to MetadataMode.LLM, returns formatted QA pair.
+        For other modes, returns base class text format.
+
+Returns:
+    str: The formatted text content.
+''')
+
 # ---------------------------------------------------------------------------- #
 
 # rag/transform.py
@@ -1851,6 +2456,65 @@ def my_reranker(node: DocNode, **kwargs):
 - BaseResponse: 删除操作结果。
 """)
 
+add_chinese_doc('rag.DocManager.delete_files_from_group', """
+删除指定分组中的文件的接口。
+
+Args:
+    request (FileGroupRequest): 包含文件ID列表和分组名称的请求参数。
+
+**Returns:**\n
+- BaseResponse: 删除操作结果。
+""")
+
+add_chinese_doc('rag.DocManager.add_metadata', """
+为指定文档添加或更新元数据的接口。
+
+Args:
+    add_metadata_request (AddMetadataRequest): 包含文档ID列表和键值对元数据的请求。
+
+**Returns:**\n
+- BaseResponse: 操作结果信息。
+""")
+
+add_chinese_doc('rag.DocManager.delete_metadata_item', """
+删除指定文档的元数据字段或字段值的接口。
+
+Args:
+    del_metadata_request (DeleteMetadataRequest): 包含文档ID列表、字段名和键值对删除条件的请求。
+
+**Returns:**\n
+- BaseResponse: 操作结果信息。
+""")
+
+add_chinese_doc('rag.DocManager.update_or_create_metadata_keys', """
+更新或创建文档元数据字段的接口。
+Args:
+    update_metadata_request (UpdateMetadataRequest): 包含文档ID列表和需更新或新增的键值对元数据。
+
+**Returns:**\n
+- BaseResponse: 操作结果信息。
+""")
+
+add_chinese_doc('rag.DocManager.reset_metadata', """
+重置指定文档的所有元数据字段。
+
+Args:
+    reset_metadata_request (ResetMetadataRequest): 包含文档ID列表和新的元数据字典。
+
+**Returns:**\n
+- BaseResponse: 操作结果信息。
+""")
+
+add_chinese_doc('rag.DocManager.query_metadata', """
+查询指定文档的元数据。
+
+Args:
+    query_metadata_request (QueryMetadataRequest): 请求参数，包含文档ID和可选的字段名。
+
+**Returns:**\n
+- BaseResponse: 若指定了 key 且存在，返回对应字段值；否则返回整个 metadata；key 不存在时报错。
+""")
+
 add_english_doc('rag.DocManager', """
 The `DocManager` class manages document lists and related operations, providing APIs for uploading, deleting, and grouping documents.
 
@@ -1885,13 +2549,14 @@ def my_reranker(node: DocNode, **kwargs):
 **Returns:**\n
 - BaseResponse: Upload results and file IDs.
 """)
-
 add_english_doc('rag.DocManager.add_files', """
 Batch add files.
+
 Args:
     files (List[UploadFile]): List of uploaded files.
     group_name (str): Target knowledge base group name; if empty, files are not added to any group.
     metadatas (Optional[str]): Metadata of the files in JSON format.
+
 **Returns:**\n
 - BaseResponse: Returns a list of unique file IDs corresponding to all input files, including newly added and existing ones. In case of exceptions, returns error codes and exception information.
 """)
@@ -1954,340 +2619,1286 @@ def my_reranker(node: DocNode, **kwargs):
 - BaseResponse: Deletion operation result.
 """)
 
-# ---------------------------------------------------------------------------- #
-
-# rag/utils.py
-
-add_chinese_doc('rag.DocListManager.table_inited', """\
-检查数据库表是否已初始化。
+add_english_doc('rag.DocManager.delete_files_from_group', """
+An endpoint to delete specified files in a group.
 
-**Returns:**
-- bool: 如果表已初始化，则返回True；否则返回False。
-""")
+Args:
+    request (FileGroupRequest): Request containing a list of file IDs and the group name.
 
-add_chinese_doc('rag.DocListManager._init_tables', """\
-初始化数据库表。此方法应在未初始化表时调用，用于创建必要的表结构。
+**Returns:**\n
+- BaseResponse: Deletion operation result.
 """)
 
-add_chinese_doc('rag.DocListManager.list_files', """\
-列出符合条件的文件。
+add_english_doc('rag.DocManager.add_metadata', """
+An endpoint to add or update metadata for specified documents.
 
 Args:
-    limit (int, optional): 要返回的文件数限制。
-    details (bool): 如果为True，则返回文件的详细信息。
-    status (str or list of str, optional): 要筛选的文件状态。
-    exclude_status (str or list of str, optional): 要排除的文件状态。
-
-**Returns:**
-- list: 文件列表。
-""")
+    add_metadata_request (AddMetadataRequest): Request containing list of document IDs and key-value metadata.
 
-add_chinese_doc('rag.DocListManager.list_all_kb_group', """\
-列出所有知识库分组的名称。
-
-**Returns:**
-- list: 知识库分组名称列表。
+**Returns:**\n
+- BaseResponse: Operation result information.
 """)
 
-add_chinese_doc('rag.DocListManager.add_kb_group', """\
-添加一个新的知识库分组。
+add_english_doc('rag.DocManager.delete_metadata_item', """
+An endpoint to delete metadata fields or field values from specified documents.
 
 Args:
-    name (str): 要添加的分组名称。
+    del_metadata_request (DeleteMetadataRequest): Request containing list of document IDs, field names, and/or deletion rules.
+
+**Returns:**\n
+- BaseResponse: Deletion operation result.
 """)
 
-add_chinese_doc('rag.DocListManager.list_kb_group_files', """\
-列出指定知识库分组中的文件。
+add_english_doc('rag.DocManager.update_or_create_metadata_keys', """
+An endpoint to update or create metadata fields for specified documents.
 
 Args:
-    group (str, optional): 分组名称。默认为None，表示所有分组。
-    limit (int, optional): 要返回的文件数限制。
-    details (bool): 如果为True，则返回文件的详细信息。
-    status (str or list of str, optional): 要筛选的文件状态。
-    exclude_status (str or list of str, optional): 要排除的文件状态。
-    upload_status (str, optional): 要筛选的上传状态。
-    exclude_upload_status (str or list of str, optional): 要排除的上传状态。
+    update_metadata_request (UpdateMetadataRequest): Request containing a list of document IDs and metadata key-value pairs to update or create.
 
-**Returns:**
-- list: 文件列表。
+**Returns:**\n
+- BaseResponse: Deletion operation result.
 """)
 
-add_chinese_doc('rag.DocListManager.add_files', """\
-将文件添加到数据库中。
+add_english_doc('rag.DocManager.reset_metadata', """
+An endpoint to reset all metadata fields of specified documents.
 
 Args:
-    files (list of str): 要添加的文件路径列表。
-    metadatas (list, optional): 与文件相关的元数据。
-    status (str, optional): 文件状态。
+    reset_metadata_request (ResetMetadataRequest): Request containing a list of document IDs and the new metadata dictionary to apply.
 
-**Returns:**
-- list: 文件的ID列表。
+**Returns:**\n
+- BaseResponse: Deletion operation result.
 """)
 
-add_chinese_doc('rag.DocListManager.update_file_message', """\
-更新指定文件的消息。
+add_english_doc('rag.DocManager.query_metadata', """
+An endpoint to query metadata of a specific document.
 
 Args:
-    fileid (str): 文件ID。
-    **kw: 需要更新的其他键值对。
-""")
+    query_metadata_request (QueryMetadataRequest): Request containing the document ID and an optional metadata field name.
 
-add_chinese_doc('rag.DocListManager.add_files_to_kb_group', """\
-将文件添加到指定的知识库分组中。
-
-Args:
-    file_ids (list of str): 要添加的文件ID列表。
-    group (str): 要添加的分组名称。
+**Returns:**\n
+- BaseResponse: Returns the field value if key is specified and exists; otherwise returns full metadata. If the key does not exist, returns an error.
 """)
+# ---------------------------------------------------------------------------- #
 
-add_chinese_doc('rag.DocListManager.delete_files', """\
-将与文件关联的知识库条目设为删除中，并由各知识库进行异步删除解析结果及关联记录。
+# rag/data_loaders.py
 
-Args:
-    file_ids (list of str): 要删除的文件ID列表
-""")
+add_english_doc('rag.data_loaders.DirectoryReader', '''\
+A directory reader class for loading and processing documents from file directories.
 
-add_chinese_doc('rag.DocListManager.delete_files_from_kb_group', """\
-从指定的知识库分组中删除文件。
+This class provides functionality to read documents from specified directories and convert them into document nodes. It supports both local and global file readers, and can handle different types of documents including images.
 
 Args:
-    file_ids (list of str): 要删除的文件ID列表。
-    group (str): 分组名称。
-""")
+    input_files (Optional[List[str]]): A list of file paths to read. If None, files will be loaded when calling load_data method.
+    local_readers (Optional[Dict]): A dictionary of local file readers specific to this instance. Keys are file patterns, values are reader functions.
+    global_readers (Optional[Dict]): A dictionary of global file readers shared across all instances. Keys are file patterns, values are reader functions.
+''')
 
-add_chinese_doc('rag.DocListManager.get_file_status', """\
-获取指定文件的状态。
+add_chinese_doc('rag.data_loaders.DirectoryReader', '''\
+用于从文件目录加载和处理文档的目录读取器类。
 
-Args:
-    fileid (str): 文件ID。
+此类提供从指定目录读取文档并将其转换为文档节点的功能。它支持本地和全局文件读取器，并且可以处理不同类型的文档，包括图像。
 
-**Returns:**
-- str: 文件的当前状态。
-""")
+Args:
+    input_files (Optional[List[str]]): 要读取的文件路径列表。如果为None，文件将在调用load_data方法时加载。
+    local_readers (Optional[Dict]): 特定于此实例的本地文件读取器字典。键是文件模式，值是读取器函数。
+    global_readers (Optional[Dict]): 在所有实例间共享的全局文件读取器字典。键是文件模式，值是读取器函数。
+''')
 
-add_chinese_doc('rag.DocListManager.update_file_status', """\
-更新指定文件的状态。
+add_example('rag.data_loaders.DirectoryReader', '''\
+>>> from lazyllm.tools.rag.data_loaders import DirectoryReader
+>>> from lazyllm.tools.rag.readers import DocxReader, PDFReader
+>>> local_readers = {
+...     "**/*.docx": DocxReader,
+...     "**/*.pdf": PDFReader
+>>> }
+>>> reader = DirectoryReader(
+...     input_files=["path/to/documents"],
+...     local_readers=local_readers,
+...     global_readers={}
+>>> )
+>>> documents = reader.load_data()
+>>> print(f"加载了 {len(documents)} 个文档")
+''')
 
-Args:
-    file_ids (list of str): 要更新状态的文件ID列表。
-    status (str): 新的文件状态。
-""")
+add_english_doc('rag.data_loaders.DirectoryReader.load_data', '''\
+Load and process documents from the specified input files.
 
-add_chinese_doc('rag.DocListManager.update_kb_group', """\
-更新指定知识库分组中的内容。
+This method reads documents from the input files using the configured file readers (both local and global), processes them into document nodes, and optionally separates image nodes from text nodes.
 
 Args:
-    cond_file_ids (list of str, optional): 过滤使用的文件ID列表，默认为None。
-    cond_group (str, optional): 过滤使用的知识库分组名称，默认为None。
-    cond_status_list (list of str, optional): 过滤使用的状态列表，默认为None。
-    new_status (str, optional): 新状态, 默认为None。
-    new_need_reparse (bool, optinoal): 新的是否需重解析标志。
+    input_files (Optional[List[str]]): A list of file paths to read. If None, uses the files specified during initialization.
+    metadatas (Optional[Dict]): Additional metadata to associate with the loaded documents.
+    split_image_nodes (bool): Whether to separate image nodes from text nodes. If True, returns a tuple of (text_nodes, image_nodes). If False, returns all nodes together.
 
 **Returns:**
-- list: 得到更新的列表list of (doc_id, group_name)
-""")
+- Union[List[DocNode], Tuple[List[DocNode], List[ImageDocNode]]]: If split_image_nodes is False, returns a list of all document nodes. If True, returns a tuple containing text nodes and image nodes separately.
 
-add_chinese_doc('rag.DocListManager.release', """\
-释放当前管理器的资源。
+''')
 
-""")
+add_chinese_doc('rag.data_loaders.DirectoryReader.load_data', '''\
+从指定的输入文件加载和处理文档。
+
+此方法使用配置的文件读取器（本地和全局）从输入文件读取文档，将它们处理成文档节点，并可选地将图像节点与文本节点分离。
 
-add_english_doc('rag.DocListManager.table_inited', """\
-Checks if the database tables have been initialized.
+Args:
+    input_files (Optional[List[str]]): 要读取的文件路径列表。如果为None，使用初始化时指定的文件。
+    metadatas (Optional[Dict]): 与加载文档关联的额外元数据。
+    split_image_nodes (bool): 是否将图像节点与文本节点分离。如果为True，返回(text_nodes, image_nodes)的元组。如果为False，一起返回所有节点。
 
 **Returns:**
-- bool: True if the tables have been initialized, False otherwise.
-""")
+- Union[List[DocNode], Tuple[List[DocNode], List[ImageDocNode]]]: 如果split_image_nodes为False，返回所有文档节点的列表。如果为True，返回包含文本节点和图像节点的元组。
+''')
 
-add_english_doc('rag.DocListManager._init_tables', """\
-Initializes the database tables. This method should be called when the tables have not been initialized yet, creating the necessary table structures.
-""")
+# ---------------------------------------------------------------------------- #
 
-add_english_doc('rag.DocListManager.list_files', """\
-Lists files that meet the specified criteria.
+# rag/utils.py
+add_chinese_doc('rag.utils.DocListManager', """\
+抽象基类，用于管理文档列表和监控文档目录变化。
 
 Args:
-    limit (int, optional): Limit on the number of files to return.
-    details (bool): If True, return detailed file information.
-    status (str or list of str, optional): Filter files by status.
-    exclude_status (str or list of str, optional): Exclude files with these statuses.
+    path:要监控的文档目录路径。
+    name:管理器名称。
+    enable_path_monitoring:启用路径监控。
 
-**Returns:**
-- list: List of files.
 """)
 
-add_english_doc('rag.DocListManager.list_all_kb_group', """\
-Lists all the knowledge base group names.
-
-**Returns:**
-- list: List of knowledge base group names.
+add_chinese_doc('rag.utils.DocListManager.init_tables', """\
+确保数据库表默认分组存在。
 """)
 
-add_english_doc('rag.DocListManager.add_kb_group', """\
-Adds a new knowledge base group.
+add_chinese_doc('rag.utils.DocListManager.delete_files', """\
+将与文件关联的知识库条目设为删除中，并由各知识库进行异步删除解析结果及关联记录。
 
 Args:
-    name (str): Name of the group to add.
+    file_ids (list of str): 要删除的文件ID列表
 """)
 
-add_english_doc('rag.DocListManager.list_kb_group_files', """\
-Lists files in the specified knowledge base group.
-
-Args:
-    group (str, optional): Group name. Defaults to None, meaning all groups.
-    limit (int, optional): Limit on the number of files to return.
-    details (bool): If True, return detailed file information.
-    status (str or list of str, optional): Filter files by status.
-    exclude_status (str or list of str, optional): Exclude files with these statuses.
-    upload_status (str, optional): Filter by upload status.
-    exclude_upload_status (str or list of str, optional): Exclude files with these upload statuses.
-
-**Returns:**
-- list: List of files.
+add_chinese_doc('rag.utils.DocListManager.table_inited', """\
+检查数据库中的 `documents` 表是否已初始化。此方法在访问数据库时确保线程安全。
+判断数据库中是否存在 `documents` 表。
+返回值:
+    bool: 如果 `documents` 表存在，返回 `True`；否则返回 `False`。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保对数据库的安全访问。
+    - 通过 `self._db_path` 连接 SQLite 数据库，并使用 `check_same_thread` 配置选项。
+    - 执行 SQL 查询：`SELECT name FROM sqlite_master WHERE type='table' AND name='documents'` 来检查表是否存在。
 """)
 
-add_english_doc('rag.DocListManager.add_files', """\
-Adds files to the database.
+add_chinese_doc('rag.utils.DocListManager.validate_paths', '''\
+验证一组文件路径，以确保它们可以被正常处理。
+此方法检查提供的路径是否是新的、已处理的或当前正在处理的，并确保处理文档时不会发生冲突。
+参数:
+    paths (List[str]): 要验证的文件路径列表。
+返回值:
+    Tuple[bool, str, List[bool]]: 返回一个元组，包括：
+        - `bool`: 如果所有路径有效，则返回 `True`；否则返回 `False`。
+        - `str`: 表示成功或失败原因的消息。
+        - `List[bool]`: 一个布尔值列表，每个元素对应一个路径是否为新路径（`True` 表示新路径，`False` 表示已存在）。
+说明:
+    - 如果任何文档仍在处理中或需要重新解析，该方法会返回 `False`，并附带相应的错误消息。
+    - 方法通过数据库会话和线程安全锁 (`self._db_lock`) 检索文档状态信息。
+    - 不安全状态包括 `working` 和 `waiting`。
 
-Args:
-    files (list of str): List of file paths to add.
-    metadatas (list, optional): Metadata associated with the files.
-    status (str, optional): File status.
+''')
 
-**Returns:**
-- list: List of file IDs.
+add_chinese_doc('rag.utils.DocListManager.update_need_reparsing', '''\
+更新 `KBGroupDocuments` 表中某个文档的 `need_reparse` 状态。
+此方法设置指定文档的 `need_reparse` 标志，并可选限定到特定分组。
+参数:
+    doc_id (str): 要更新的文档ID。
+    need_reparse (bool): `need_reparse` 标志的新值。
+    group_name (Optional[str]): 如果提供，仅对指定分组应用更新；如果未提供，则对包含该文档的所有分组应用更新。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
+    - `group_name` 参数允许将更新限定到特定分组；如果未提供，则更新应用于包含该文档的所有分组。
+    - 方法会立刻将更改提交到数据库。
+''')
+
+add_chinese_doc('rag.utils.DocListManager.list_files', """\
+从 `documents` 表中列出文件，并支持过滤、限制返回结果以及返回详细信息。
+此方法根据指定的条件，从数据库中检索文件ID或详细文件信息。
+参数:
+    limit (Optional[int]): 返回的最大文件数量。如果为 `None`，则返回所有匹配的文件。
+    details (bool): 是否返回详细的文件信息（`True`）或仅返回文件ID（`False`）。
+    status (Union[str, List[str]]): 要包含的状态或状态列表，默认为所有状态。
+    exclude_status (Optional[Union[str, List[str]]]): 要排除的状态或状态列表，默认为 `None`。
+返回值:
+    List: 如果 `details=False`，则返回文件ID列表；如果 `details=True`，则返回详细文件行的列表。
+说明:
+    - 该方法根据 `status` 和 `exclude_status` 条件动态构造查询。
+    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
+    - 如果指定了 `limit`，查询会附加 `LIMIT` 子句。
 """)
 
-add_english_doc('rag.DocListManager.update_file_message', """\
-Updates the message for a specified file.
+add_chinese_doc('rag.utils.DocListManager.get_docs', '''\
+从数据库中检索类型为 `KBDocument` 的文档对象，基于提供的文档 ID 列表。
 
 Args:
-    fileid (str): File ID.
-    **kw: Additional key-value pairs to update.
-""")
+    doc_ids (List[str]): 要获取的文档 ID 列表。
+**Returns:**
+    List[KBDocument]: 与提供的文档 ID 对应的 `KBDocument` 对象列表。如果没有找到文档，将返回空列表。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问的安全性。
+    - 查询使用 SQL 的 `IN` 子句，通过 `doc_id` 字段进行过滤。
+    - 如果 `doc_ids` 为空，函数将直接返回空列表，而不会查询数据库。
+''')
 
-add_english_doc('rag.DocListManager.add_files_to_kb_group', """\
-Adds files to the specified knowledge base group.
+add_chinese_doc('rag.utils.DocListManager.set_docs_new_meta', """\
+批量更新文档的元数据。
 
 Args:
-    file_ids (list of str): List of file IDs to add.
-    group (str): Name of the group to add the files to.
+    doc_meta (Dict[str, dict]): 文档ID到新元数据的映射字典。
+
 """)
 
-add_english_doc('rag.DocListManager.delete_files', """\
-Set the knowledge base entries associated with the document to "deleting," and have each knowledge base asynchronously delete parsed results and associated records.
+add_chinese_doc('rag.utils.DocListManager.fetch_docs_changed_meta', '''\
+获取指定组中元数据已更改的文档，并将其 `new_meta` 字段重置为 `None`。
+此方法检索元数据已更改（即 `new_meta` 不为 `None`）的所有文档，基于提供的组名。检索后，会将这些文档的 `new_meta` 字段重置为 `None`。
 
 Args:
-    file_ids (list of str): List of file IDs to delete.
-""")
+    group (str): 用于过滤文档的组名。
+**Returns:**
+    List[DocMetaChangedRow]: 包含文档 `doc_id` 和 `new_meta` 字段的行列表，表示元数据已更改的文档。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
+    - 方法通过 SQL `JOIN` 操作连接 `KBDocument` 和 `KBGroupDocuments` 表以检索相关行。
+    - 在获取数据后，将受影响行的 `new_meta` 字段更新为 `None`，并将更改提交到数据库。
+''')
 
-add_english_doc('rag.DocListManager.delete_files_from_kb_group', """\
-Deletes files from the specified knowledge base group.
+add_chinese_doc('rag.utils.DocListManager.add_kb_group', """\
+添加一个新的知识库分组。
 
 Args:
-    file_ids (list of str): List of file IDs to delete.
-    group (str): Name of the group.
+    name (str): 要添加的分组名称。
 """)
 
-add_english_doc('rag.DocListManager.get_file_status', """\
-Retrieves the status of a specified file.
+add_chinese_doc('rag.utils.DocListManager.list_kb_group_files', '''\
+列出指定知识库组中的文件。
 
 Args:
-    fileid (str): File ID.
+    group (str): 用于过滤文件的 KB 组名。默认为 `None`。
+    limit (Optional[int]): 返回的最大文件数量。如果为 `None`，则返回所有匹配的文件。
+    details (bool): 返回详细的文件信息或仅返回文件 ID 和路径。
+    status (Union[str, List[str]]): 包含在结果中的 KB 组状态或状态列表。默认为所有状态。
+    exclude_status (Optional[Union[str, List[str]]): 从结果中排除的 KB 组状态或状态列表。默认为 `None`。
+    upload_status (Union[str, List[str]]): 包含在结果中的文档上传状态或状态列表。默认为所有状态。
+    exclude_upload_status (Optional[Union[str, List[str]]): 从结果中排除的文档上传状态或状态列表。默认为 `None`。
+    need_reparse (Optional[bool]): 过滤需要重新解析的文件或不需要重新解析的文件。默认为 `None`。
+**Returns:**:
+    List: 如果 `details=False`，返回包含 `(doc_id, path)` 的元组列表。
+          如果 `details=True`，返回包含附加元数据的详细行列表。
+说明:
+    - 方法根据提供的过滤条件动态构建 SQL 查询。
+    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
+    - 如果 `status` 或 `upload_status` 参数为列表，则会使用 SQL 的 `IN` 子句进行处理。
+''')
+
+add_chinese_doc('rag.utils.DocListManager.list_all_kb_group', """\
+列出所有知识库分组的名称。
 
 **Returns:**
-- str: The current status of the file.
+- list: 知识库分组名称列表。
 """)
 
-add_english_doc('rag.DocListManager.update_kb_group', """\
-Updates the record of kb_group_document.
+add_chinese_doc('rag.utils.DocListManager.add_files', '''\
+批量向文档列表中添加文件，可选附加元数据、状态，并支持分批处理。
+此方法将文件列表添加到数据库中，并为每个文件设置可选的元数据和初始状态。文件会以批量方式处理以提高效率。在文件添加完成后，它们会自动关联到默认的知识库 (KB) 组。
+Args:
+    files (List[str]): 添加的文件路径列表。
+    metadatas (Optional[List[Dict[str, Any]]]): 与文件对应的元数据字典列表。默认为 `None`。
+    status (Optional[str]): 添加文件的初始状态。默认为 `Status.waiting`。
+    batch_size (int): 每批处理的文件数量。默认为 64。
+**Returns:**:
+    List[DocPartRow]: 包含已添加文件及其相关信息的 `DocPartRow` 对象列表。
+说明:
+    - 方法首先通过辅助函数 `_add_doc_records` 创建文档记录。
+    - 文件添加后，会自动关联到默认的知识库组 (`DocListManager.DEFAULT_GROUP_NAME`)。
+    - 批量处理确保在添加大量文件时具有良好的可扩展性。
+''')
+
+add_chinese_doc('rag.utils.DocListManager.delete_unreferenced_doc', '''\
+删除数据库中标记为 "删除中" 且不再被引用的文档。
+此方法从数据库中删除满足以下条件的文档：
+1. 文档状态为 `DocListManager.Status.deleting`。
+2. 文档的引用计数 (`count`) 为 0。
+''')
 
+add_chinese_doc('rag.utils.DocListManager.get_docs_need_reparse', '''\
+获取需要重新解析 (`need_reparse=True`)的指定组中的文档。
+此方法检索标记为需要重新解析 (`need_reparse=True`) 的文档，基于提供的组名。仅包含状态为 `success` 或 `failed` 的文档。
 Args:
-    cond_file_ids (list of str, optional): a list of file IDs to filter by, default None.
-    cond_group (str, optional): a kb_group name to filter by, default None.
-    cond_status_list (list of str, optional): a list of statuses to filter by, default None.
-    new_status (str, optional): the new status to update to, default None
-    new_need_reparse (bool, optinoal): the new need_reparse flag to update to, default None
+    group (str): 用于过滤文档的组名。
+**Returns:**:
+    List[KBDocument]: 需要重新解析的 `KBDocument` 对象列表。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
+    - 查询通过 SQL `JOIN` 操作连接 `KBDocument` 和 `KBGroupDocuments` 表，并基于组名和重新解析状态进行过滤。
+    - 仅状态为 `success` 或 `failed` 且 `need_reparse=True` 的文档会被检索出来。
+''')
 
-**Returns:**
-- list: updated records, list of (doc_id, group_name)
-""")
+add_chinese_doc('rag.utils.DocListManager.get_existing_paths_by_pattern', '''\
+根据给定的模式，检索符合条件的文档路径。
+此方法从数据库中获取所有符合提供的 SQL `LIKE` 模式的文档路径。
+Args:
+    pattern (str): 用于过滤文档路径的 SQL `LIKE` 模式。例如，`%example%` 匹配包含单词 "example" 的路径。
+**Returns:**:
+    List[str]: 符合给定模式的文档路径列表。如果没有匹配的路径，则返回空列表。
+说明:
+    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
+    - SQL 查询中的 `LIKE` 操作符用于对文档路径进行模式匹配。
+''')
 
-add_english_doc('rag.DocListManager.release', """\
-Releases the resources of the current manager.
+add_chinese_doc('rag.utils.DocListManager.update_file_message', """\
+更新指定文件的消息。
+
+Args:
+    fileid (str): 文件ID。
+    **kw: 需要更新的其他键值对。
 """)
 
-# ---------------------------------------------------------------------------- #
+add_chinese_doc('rag.utils.DocListManager.update_file_status', """\
+更新指定文件的状态。
 
-add_chinese_doc('WebModule', '''\
-WebModule是LazyLLM为开发者提供的基于Web的交互界面。在初始化并启动一个WebModule之后，开发者可以从页面上看到WebModule背后的模块结构，并将Chatbot组件的输入传输给自己开发的模块进行处理。
-模块返回的结果和日志会直接显示在网页的“处理日志”和Chatbot组件上。除此之外，WebModule支持在网页上动态加入Checkbox或Text组件用于向模块发送额外的参数。
-WebModule页面还提供“使用上下文”，“流式输出”和“追加输出”的Checkbox，可以用来改变页面和后台模块的交互方式。
+Args:
+    file_ids (list of str): 更新状态的文件ID列表。
+    status (str): 目标状态。
+    cond_status_list(Union[None, List[str]]):限制只更新处于这些状态的文档
+""")
 
-<span style="font-size: 20px;">&ensp;**`WebModule.init_web(component_descs) -> gradio.Blocks`**</span>
-使用gradio库生成演示web页面，初始化session相关数据以便在不同的页面保存各自的对话和日志，然后使用传入的component_descs参数为页面动态添加Checkbox和Text组件，最后设置页面上的按钮和文本框的相应函数
-之后返回整个页面。WebModule的__init__函数调用此方法生成页面。
+add_chinese_doc('rag.utils.DocListManager.add_files_to_kb_group', """\
+将文件添加到指定的知识库分组中。
 
 Args:
-    component_descs (list): 用于动态向页面添加组件的列表。列表中的每个元素也是一个列表，其中包含5个元素，分别是组件对应的模块ID，模块名，组件名，组件类型（目前仅支持Checkbox和Text），组件默认值。
-''')
+    file_ids (list of str): 要添加的文件ID列表。
+    group (str): 要添加的分组名称。
+""")
 
-add_english_doc('WebModule', '''\
-WebModule is a web-based interactive interface provided by LazyLLM for developers. After initializing and starting
-a WebModule, developers can see structure of the module they provides behind the WebModule, and transmit the input
-of the Chatbot component to their modules. The results and logs returned by the module will be displayed on the
-“Processing Logs” and Chatbot component on the web page. In addition, Checkbox or Text components can be added
-programmatically to the web page for additional parameters to the background module. Meanwhile, The WebModule page
-provides Checkboxes of “Use Context,” “Stream Output,” and “Append Output,” which can be used to adjust the
-interaction between the page and the module behind.
+add_chinese_doc('rag.utils.DocListManager.delete_files_from_kb_group', """\
+从指定的知识库分组中删除文件。
 
-<span style="font-size: 20px;">&ensp;**`WebModule.init_web(component_descs) -> gradio.Blocks`**</span>
+Args:
+    file_ids (list of str): 要删除的文件ID列表。
+    group (str): 分组名称。
+""")
 
-Generate a demonstration web page based on gradio. The function initializes session-related data to save chat history
-and logs for different pages, then dynamically add Checkbox and Text components to the page according to component_descs
-parameter, and set the corresponding functions for the buttons and text boxes on the page at last.
-WebModule’s __init__ function calls this method to generate the page.
+add_chinese_doc('rag.utils.DocListManager.get_file_status', """\
+获取指定文件的状态。
 
 Args:
-    component_descs (list): A list used to add components to the page. Each element in the list is also a list containing
-    5 elements, which are the module ID, the module name, the component name, the component type (currently only
-    supports Checkbox and Text), and the default value of the component.
+    fileid (str): 文件ID。
 
-''')
+**Returns:**
+- str: 文件的当前状态。
+""")
 
-add_example('WebModule', '''\
->>> import lazyllm
->>> def func2(in_str, do_sample=True, temperature=0.0, *args, **kwargs):
-...     return f"func2:{in_str}|do_sample:{str(do_sample)}|temp:{temperature}"
-...
->>> m1=lazyllm.ActionModule(func2)
->>> m1.name="Module1"
->>> w = lazyllm.WebModule(m1, port=[20570, 20571, 20572], components={
-...         m1:[('do_sample', 'Checkbox', True), ('temperature', 'Text', 0.1)]},
-...                       text_mode=lazyllm.tools.WebModule.Mode.Refresh)
->>> w.start()
-193703: 2024-06-07 10:26:00 lazyllm SUCCESS: ...
-''')
+add_chinese_doc('rag.utils.DocListManager.update_kb_group', """\
+更新指定知识库分组中的内容。
 
-#actors/codegenerator
-add_chinese_doc('CodeGenerator', '''\
-代码生成模块。
+Args:
+    cond_file_ids (list of str, optional): 过滤使用的文件ID列表，默认为None。
+    cond_group (str, optional): 过滤使用的知识库分组名称，默认为None。
+    cond_status_list (list of str, optional): 过滤使用的状态列表，默认为None。
+    new_status (str, optional): 新状态, 默认为None。
+    new_need_reparse (bool, optinoal): 新的是否需重解析标志。
 
-该模块基于用户提供的提示词生成代码，会根据提示内容自动选择中文或英文的系统提示词，并从输出中提取 Python 代码片段。
+**Returns:**
+- list: 得到更新的列表list of (doc_id, group_name)
+""")
 
-`__init__(self, base_model, prompt="")`
-初始化代码生成器。
+add_chinese_doc('rag.utils.DocListManager.release', """\
+释放当前管理器的资源。
 
+""")
+
+add_chinese_doc('rag.utils.DocListManager.enable_path_monitoring', '''\
+启用或禁用文档管理器的路径监控功能。
+此方法用于启用或禁用文档管理器的路径监控功能。当启用时，会启动一个监控线程处理与路径相关的操作；当禁用时，会停止该线程并等待它终止。
 Args:
-    base_model (Union[str, TrainableModule, OnlineChatModuleBase]): 模型路径字符串，或已初始化的模型实例。
-    prompt (str): 用户自定义的代码生成提示词，可为中文或英文。
+    val (bool): 启用或禁用路径监控。
+说明:
+    - 如果 `val` 为 `True`，路径监控功能会通过将 `_monitor_continue` 设置为 `True` 并启动 `_monitor_thread` 来启用。
+    - 如果 `val` 为 `False`，路径监控功能会通过将 `_monitor_continue` 设置为 `False` 并等待 `_monitor_thread` 终止来禁用。
+    - 方法在管理监控线程时确保线程操作是安全的。
 ''')
 
+add_english_doc('rag.utils.DocListManager', """\
+Abstract base class for managing document lists and monitoring changes in a document directory.
 
-add_english_doc('CodeGenerator', '''\
-Code Generation Module.
+Args:
+    path: Path of the document directory to monitor.
+    name: Name of the manager.
+    enable_path_monitoring: Whether to enable path monitoring.
+""")
+
+add_english_doc('rag.utils.DocListManager.init_tables', """\
+Ensure that the default group exists in the database tables.
+""")
+
+add_english_doc('rag.utils.DocListManager.delete_files', """\
+Set the knowledge base entries associated with the document to "deleting," and have each knowledge base asynchronously delete parsed results and associated records.
+
+Args:
+    file_ids (list of str): List of file IDs to delete.
+""")
+
+add_english_doc('rag.utils.DocListManager.table_inited', """\
+Checks if the database table `documents` is initialized. This method ensures thread-safety when accessing the database.
+Determines whether the `documents` table exists in the database.
+Returns:
+    bool: `True` if the `documents` table exists, `False` otherwise.
+Notes:
+    - Uses a thread-safe lock (`self._db_lock`) to ensure safe access to the database.
+    - Establishes a connection to the SQLite database at `self._db_path` with the `check_same_thread` option.
+    - Executes the SQL query: `SELECT name FROM sqlite_master WHERE type='table' AND name='documents'` to check for the table.
+""")
+
+add_english_doc('rag.utils.DocListManager.validate_paths', '''\
+Validates a list of file paths to ensure they are ready for processing.
+This method checks whether the provided paths are new, already processed, or currently being processed. It ensures there are no conflicts in processing the documents.
+Args
+    paths (List[str]): A list of file paths to validate.
+Returns:
+    Tuple[bool, str, List[bool]]: A tuple containing:
+        - `bool`: `True` if all paths are valid, `False` otherwise.
+        - `str`: A message indicating success or the reason for failure.
+        - `List[bool]`: A list where each element corresponds to whether a path is new (`True`) or already exists (`False`).
+Notes:
+    - If any document is still being processed or needs reparsing, the method returns `False` with an appropriate error message.
+    - The method uses a database session and thread-safe lock (`self._db_lock`) to retrieve document status information.
+    - Unsafe statuses include `working` and `waiting`.
+
+''')
+
+
+add_english_doc('rag.utils.DocListManager.update_need_reparsing', '''\
+Updates the `need_reparse` status of a document in the `KBGroupDocuments` table.
+This method sets the `need_reparse` flag for a specific document, optionally scoped to a given group.
+Args:
+    doc_id (str): The ID of the document to update.
+    need_reparse (bool): The new value for the `need_reparse` flag.
+    group_name (Optional[str]): If provided, the update will be applied only to the specified group.
+Notes:
+    - Uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
+    - The `group_name` parameter allows scoping the update to a specific group; if not provided, the update applies to all groups containing the document.
+    - The method commits the change to the database immediately.
+''')
+
+add_english_doc('rag.utils.DocListManager.list_files', """\
+Lists files from the `documents` table with optional filtering, limiting, and returning details.
+This method retrieves file IDs or detailed file information from the database, based on the specified filtering conditions.
+Args:
+    limit (Optional[int]): Maximum number of files to return. If `None`, all matching files will be returned.
+    details (bool): Whether to return detailed file information (`True`) or just file IDs (`False`).
+    status (Union[str, List[str]]): The status or list of statuses to include in the results. Defaults to all statuses.
+    exclude_status (Optional[Union[str, List[str]]]): The status or list of statuses to exclude from the results. Defaults to `None`.
+Returns:
+    List: A list of file IDs if `details=False`, or a list of detailed file rows if `details=True`.
+Notes:
+    - The method constructs a query dynamically based on the provided `status` and `exclude_status` conditions.
+    - A thread-safe lock (`self._db_lock`) ensures safe database access.
+    - The `LIMIT` clause is applied if `limit` is specified.
+""")
+
+add_english_doc('rag.utils.DocListManager.get_docs', '''\
+This method retrieves document objects of type `KBDocument` from the database for the provided list of document IDs.
+Args:
+    doc_ids (List[str]): A list of document IDs to fetch.
+Returns:
+    List[KBDocument]: A list of `KBDocument` objects corresponding to the provided document IDs. If no documents are found, an empty list is returned.
+Notes:
+    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
+    - It performs a SQL join between `KBDocument` and `KBGroupDocuments` to retrieve the relevant rows.
+    - After fetching, it updates the `new_meta` field of the affected rows to `None` and commits the changes to the database.
+''')
+
+add_english_doc('rag.utils.DocListManager.set_docs_new_meta', """\
+Batch update metadata for documents.
+
+Args:
+    doc_meta (Dict[str, dict]): A dictionary mapping document IDs to their new metadata.
+""")
+
+add_english_doc('rag.utils.DocListManager.fetch_docs_changed_meta', '''\
+List files in a specific knowledge base (KB) group with optional filters, limiting, and details.
+This method retrieves files from the `kb_group_documents` table, optionally filtering by group, document status, upload status, and whether reparsing is needed.
+Args:
+    group (str): The name of the group to filter documents by.
+**Returns:**
+    List[DocMetaChangedRow]: A list of rows, where each row contains the `doc_id` and the `new_meta` field of documents with changed metadata.
+Notes:
+    - This method constructs a SQL query dynamically based on the provided filters.
+    - Uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
+    - If `status` or `upload_status` are provided as lists, they are processed with SQL `IN` clauses.
+''')
+
+add_english_doc('rag.DocListManager.list_all_kb_group', """\
+Lists all the knowledge base group names.
+
+**Returns:**
+- list: List of knowledge base group names.
+""")
+
+add_english_doc('rag.DocListManager.add_kb_group', """\
+Adds a new knowledge base group.
+
+Args:
+    name (str): Name of the group to add.
+""")
+
+add_english_doc('rag.utils.DocListManager.list_kb_group_files', '''\
+List files in a specific knowledge base group .
+
+Args:
+    group (str): The name of the KB group to filter files by. Defaults to `None` .
+    limit (Optional[int]): Maximum number of files to return. If `None`, returns all matching files.
+    details (bool): Whether to return detailed file information or only file IDs and paths.
+    status (Union[str, List[str]]): The KB group status or list of statuses to include in the results. Defaults to all statuses.
+    exclude_status (Optional[Union[str, List[str]]): The KB group status or list of statuses to exclude from the results. Defaults to `None`.
+    upload_status (Union[str, List[str]]): The document upload status or list of statuses to include in the results. Defaults to all statuses.
+    exclude_upload_status (Optional[Union[str, List[str]]): The document upload status or list of statuses to exclude from the results. Defaults to `None`.
+    need_reparse (Optional[bool]): Whether to filter files that need reparsing or not . Defaults to `None` .
+**Returns:**:
+    List: If `details=False`, returns a list of tuples containing `(doc_id, path)`. 
+          If `details=True`, returns a list of detailed rows with additional metadata.
+Notes:
+    - The method first creates document records using the `_add_doc_records` helper function.
+    - After the files are added, they are automatically linked to the default KB group (`DocListManager.DEFAULT_GROUP_NAME`).
+    - Batch processing ensures scalability when adding a large number of files.
+''')
+
+add_english_doc('rag.utils.DocListManager.add_files', '''\
+Add multiple files to the document list with optional metadata, status, and batch processing.
+This method adds a list of files to the database and sets optional metadata and initial status for each file. The files are processed in batches for efficiency. After the files are added, they are automatically associated with the default knowledge base (KB) group.
+Args:
+    files (List[str]): A list of file paths to add to the database.
+    metadatas (Optional[List[Dict[str, Any]]]): A list of metadata dictionaries corresponding to the files. If `None`, no metadata will be associated. Defaults to `None`.
+    status (Optional[str]): The initial status for the added files. Defaults to `Status.waiting`.
+    batch_size (int): The number of files to process in each batch. Defaults to 64.
+**Returns:**:
+    List[DocPartRow]: A list of `DocPartRow` objects representing the added files and their associated information.
+Notes:
+- The method first creates document records using the helper function _add_doc_records.
+- After the files are added, they are automatically linked to the default knowledge base group (DocListManager.DEFAULT_GROUP_NAME).
+- Batch processing ensures good scalability when adding a large number of files.
+
+
+''')
+
+add_english_doc('rag.utils.DocListManager.delete_unreferenced_doc', '''\
+Delete documents marked as "deleting" and no longer referenced in the database.
+This method removes documents from the database that meet the following conditions:
+1. Their status is set to `DocListManager.Status.deleting`.
+2. Their reference count (`count`) is 0.
+''')
+
+add_english_doc('rag.utils.DocListManager.get_docs_need_reparse', '''\
+Retrieve documents that require reparsing for a specific group.
+This method fetches documents that are marked as needing reparsing (`need_reparse=True`) for the given group. Only documents with a status of `success` or `failed` are included in the results.
+Args:
+    group (str): The name of the group to filter documents by.
+**Returns:**:
+    List[KBDocument]: A list of `KBDocument` objects that need reparsing.
+Notes:
+    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
+    - The query performs a SQL `JOIN` between `KBDocument` and `KBGroupDocuments` to filter by group and reparse status.
+    - Documents with `need_reparse=True` and a status of `success` or `failed` are considered for reparsing.
+''')
+
+add_english_doc('rag.utils.DocListManager.get_existing_paths_by_pattern', '''\
+Retrieve existing document paths that match a given pattern.
+This method fetches all document paths from the database that match the provided SQL `LIKE` pattern.
+Args:
+    pattern (str): The SQL `LIKE` pattern to filter document paths. For example, `%example%` matches paths containing the word "example".
+**Returns:**:
+    List[str]: A list of document paths that match the given pattern. If no paths match, an empty list is returned.
+Notes:
+    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
+    - The `LIKE` operator in the SQL query is used to perform pattern matching on document paths.
+''')
+
+add_english_doc('rag.DocListManager.update_file_message', """\
+Updates the message for a specified file.
+
+Args:
+    fileid (str): File ID.
+    **kw: Additional key-value pairs to update.
+""")
+
+add_english_doc('rag.DocListManager.update_file_status', """\
+Update the status of specified files.
+
+Args:
+    file_ids (list of str): List of file IDs whose status needs to be updated.
+    status (str): Target status to set.
+    cond_status_list (Union[None, List[str]]): Optional. Only update files currently in these statuses.
+""")
+
+add_english_doc('rag.DocListManager.add_files_to_kb_group', """\
+Adds files to the specified knowledge base group.
+
+Args:
+    file_ids (list of str): List of file IDs to add.
+    group (str): Name of the group to add the files to.
+""")
+
+add_english_doc('rag.DocListManager.delete_files_from_kb_group', """\
+Deletes files from the specified knowledge base group.
+
+Args:
+    file_ids (list of str): List of file IDs to delete.
+    group (str): Name of the group.
+""")
+
+add_english_doc('rag.DocListManager.get_file_status', """\
+Retrieves the status of a specified file.
+
+Args:
+    fileid (str): File ID.
+
+**Returns:**
+- str: The current status of the file.
+""")
+
+add_english_doc('rag.DocListManager.update_kb_group', """\
+Updates the record of kb_group_document.
+
+Args:
+    cond_file_ids (list of str, optional): a list of file IDs to filter by, default None.
+    cond_group (str, optional): a kb_group name to filter by, default None.
+    cond_status_list (list of str, optional): a list of statuses to filter by, default None.
+    new_status (str, optional): the new status to update to, default None
+    new_need_reparse (bool, optinoal): the new need_reparse flag to update to, default None
+
+**Returns:**
+- list: updated records, list of (doc_id, group_name)
+""")
+
+add_english_doc('rag.DocListManager.release', """\
+Releases the resources of the current manager.
+""")
+
+add_english_doc('rag.utils.DocListManager.enable_path_monitoring', '''\
+Enable or disable path monitoring for the document manager.
+This method enables or disables the path monitoring functionality in the document manager. When enabled, a monitoring thread starts to handle path-related operations. When disabled, the thread stops and joins (waits for it to terminate).
+Args:
+    val (bool): Whether to enable or disable path monitoring.
+Notes:
+    - If `val` is `True`, path monitoring is enabled by setting `_monitor_continue` to `True` and starting the `_monitor_thread`.
+    - If `val` is `False`, path monitoring is disabled by setting `_monitor_continue` to `False` and joining the `_monitor_thread` if it is running.
+    - This method ensures thread-safe operation when managing the monitoring thread.
+''')
+
+add_example('rag.utils.DocListManager', '''
+>>> import lazyllm
+>>> from lazyllm.rag.utils import DocListManager
+>>> manager = DocListManager(path='your_file_path/', name="test_manager", enable_path_monitoring=False)
+>>> added_docs = manager.add_files([test_file_list])
+>>> manager.enable_path_monitoring(True)
+>>> deleted = manager.delete_files([delete_file_list])
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager', '''\
+基于 SQLite 的文档管理器，用于本地文件的持久化存储、状态管理与元信息追踪。
+
+该类继承自 DocListManager，利用 SQLite 数据库存储文档记录。适用于管理具有唯一标识符的本地文档资源，并提供便捷的插入、查询、更新与状态过滤接口，支持可选的路径监控功能。
+
+Args:
+    path (str): 数据库存储路径。
+    name (str): 数据库文件名（不包含路径）。
+    enable_path_monitoring (bool): 是否启用对文件路径的变动监控，默认为 True。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager', '''\
+SQLite-based document manager for persistent local file storage, status tracking, and metadata management.
+
+This class inherits from DocListManager and uses a SQLite backend to store document records. It is suitable for managing locally identified documents with support for inserting, querying, updating, and filtering based on status. Optional file path monitoring is also supported.
+
+Args:
+    path (str): Directory path to store the database.
+    name (str): Name of the SQLite database file (without path).
+    enable_path_monitoring (bool): Whether to enable path monitoring. Defaults to True.
+''')
+
+add_example('rag.utils.SqliteDocListManager', '''\
+>>> from lazyllm.tools.rag.utils import SqliteDocListManager
+>>> manager = SqliteDocListManager(path="./data", name="docs.sqlite")
+>>> manager.insert({"uid": "doc_001", "name": "example.txt", "status": "ready"})
+>>> print(manager.get("doc_001"))
+>>> files = manager.list_files(limit=5, details=True)
+>>> print(files)
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.table_inited', '''\
+检查数据库中是否已存在名为 "documents" 的表。
+
+该方法通过查询 sqlite_master 元信息表，判断数据表是否已初始化。
+
+**Returns:**\n
+- bool: 如果 "documents" 表存在，返回 True；否则返回 False。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.table_inited', '''\
+Checks whether the "documents" table has been initialized in the database.
+
+The method queries the sqlite_master metadata table to verify if the "documents" table exists.
+
+**Returns:**\n
+- bool: True if the "documents" table exists, False otherwise.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.get_status_cond_and_params', '''\
+生成用于文档状态筛选的 SQL 条件语句及其参数列表。
+
+根据传入的包含状态和排除状态，构造 WHERE 子句中使用的 SQL 表达式。支持字段名前缀，用于联表查询等场景。
+
+Args:
+    status (str 或 list of str): 要包含的文档状态。若为 "all"，不添加包含条件。
+    exclude_status (str 或 list of str, optional): 要排除的文档状态。不能为 "all"。
+    prefix (str, optional): 字段名前缀（如联表查询中的别名），将应用于字段名。
+
+**Returns:**\n
+- Tuple[str, list]: 包含 SQL 条件语句和对应参数的元组。
+''')
+
+
+add_english_doc('rag.utils.SqliteDocListManager.get_status_cond_and_params', '''\
+Generates SQL condition expressions and parameter values for filtering documents by status.
+
+Builds WHERE clause components using the given inclusion and exclusion statuses. Supports field name prefixing for use in joined queries.
+
+Args:
+    status (str or list of str): Document status(es) to include. If set to "all", no inclusion condition will be applied.
+    exclude_status (str or list of str, optional): Status(es) to exclude. Must not be "all".
+    prefix (str, optional): Optional field prefix (e.g., table alias) to prepend to the status field.
+
+**Returns:**\n
+- Tuple[str, list]: A tuple containing the SQL condition string and its corresponding parameter values.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.validate_paths', '''\
+验证输入路径所对应的文档是否可以安全添加到数据库。
+
+该方法会检查每个路径是否对应已有文档，若已存在，需判断其状态是否允许重解析。
+若文档正在解析或等待解析，或上次重解析未完成，则视为不可用。
+
+Args:
+    paths (List[str]): 文件路径列表。
+
+**Returns:**\n
+- Tuple[bool, str, List[bool]]: 
+    - bool: 是否所有路径都验证通过。
+    - str: 成功或失败的描述信息。
+    - List[bool]: 与输入路径一一对应的布尔列表，表示该路径是否为新文档（True 为新文档，False 为已存在）。
+        若验证失败，返回值为 None。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.validate_paths', '''\
+Validates whether the documents corresponding to the given paths can be safely added to the database.
+
+The method checks if the document already exists. If it exists, it verifies whether the document is currently
+being parsed, waiting to be parsed, or was not successfully re-parsed last time.
+
+Args:
+    paths (List[str]): A list of file paths to validate.
+
+**Returns:**\n
+- Tuple[bool, str, List[bool]]: 
+    - bool: Whether all paths passed validation.
+    - str: Description message of the validation result.
+    - List[bool]: A boolean list corresponding to input paths, indicating whether each path is new (True) or already exists (False).
+      If validation fails, this value is None.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.update_need_reparsing', '''\
+更新指定文档的重解析标志位。
+
+该方法用于设置某个文档是否需要重新解析。可以选择性地指定知识库分组进行精确匹配。
+
+Args:
+    doc_id (str): 文档的唯一标识符。
+    need_reparse (bool): 是否需要重新解析文档。
+    group_name (Optional[str]): 可选，所属的知识库分组名称。如果提供，将仅更新指定分组中的文档。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.update_need_reparsing', '''\
+Updates the re-parsing flag for a specific document.
+
+This method sets whether a document should be re-parsed. If a group name is provided, the update is scoped to that group only.
+
+Args:
+    doc_id (str): The unique identifier of the document.
+    need_reparse (bool): Whether the document needs to be re-parsed.
+    group_name (Optional[str]): Optional. The knowledge base group name to filter by. If provided, only documents in the specified group will be updated.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.list_files', """\
+列出文档数据库中符合状态条件的文件，并根据参数选择返回完整记录或仅返回文件路径。
+
+Args:
+    limit (Optional[int]): 要返回的记录数上限，若为 None 则返回所有符合条件的记录。
+    details (bool): 是否返回完整的数据库行信息，若为 False 则仅返回文档路径（ID）。
+    status (Union[str, List[str]]): 要包含在结果中的状态值，默认为包含所有状态。
+    exclude_status (Optional[Union[str, List[str]]]): 要从结果中排除的状态值。
+
+**Returns:**\n
+- list: 文件记录列表或文档路径列表，具体取决于 `details` 参数。
+""")
+
+add_english_doc('rag.utils.SqliteDocListManager.list_files', """\
+Lists files in the document database based on status filters and returns either full records or file paths.
+
+Args:
+    limit (Optional[int]): The maximum number of records to return. If None, all matching records are returned.
+    details (bool): Whether to return full database rows or just file paths (document IDs).
+    status (Union[str, List[str]]): Status values to include in the result. Defaults to including all.
+    exclude_status (Optional[Union[str, List[str]]]): Status values to exclude from the result.
+
+**Returns:**\n
+- list: A list of file records or document paths depending on the `details` flag.
+""")
+
+add_chinese_doc('rag.utils.SqliteDocListManager.get_docs', '''\
+根据给定的文档ID列表，从数据库中获取对应的文档对象列表。
+
+Args:
+    doc_ids (List[str]): 需要查询的文档ID列表。
+
+**Returns:**\n
+- List[KBDocument]: 匹配的文档对象列表。如果没有匹配项，返回空列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.get_docs', '''\
+Fetches document objects from the database corresponding to the given list of document IDs.
+
+Args:
+    doc_ids (List[str]): A list of document IDs to query.
+
+**Returns:**\n
+- List[KBDocument]: A list of matching document objects. Returns an empty list if no matches found.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.set_docs_new_meta', '''\
+批量更新文档的元数据（meta），同时更新对应知识库分组中文档的 new_meta 字段（非等待状态的文档）。
+
+Args:
+    doc_meta (Dict[str, dict]): 字典，键为文档ID，值为对应的新元数据字典。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.set_docs_new_meta', '''\
+Batch updates the metadata (meta) of documents, and simultaneously updates the new_meta field of documents in knowledge base groups for documents that are not in waiting status.
+
+Args:
+    doc_meta (Dict[str, dict]): A dictionary mapping document IDs to their new metadata dictionaries.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.fetch_docs_changed_meta', '''\
+获取指定知识库分组中元数据发生变化的文档列表，并将对应的 new_meta 字段清空。
+
+Args:
+    group (str): 知识库分组名称。
+
+**Returns:**\n
+- List[DocMetaChangedRow]: 包含文档ID及其对应新元数据的列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.fetch_docs_changed_meta', '''\
+Fetches the list of documents within a specified knowledge base group that have updated metadata, and resets the new_meta field for those documents.
+
+Args:
+    group (str): Name of the knowledge base group.
+
+**Returns:**\n
+- List[DocMetaChangedRow]: A list containing document IDs and their updated metadata.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.list_all_kb_group', '''\
+列出数据库中所有的知识库分组名称。
+
+**Returns:**\n
+- List[str]: 知识库分组名称列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.list_all_kb_group', '''\
+Lists all knowledge base group names stored in the database.
+
+**Returns:**\n
+- List[str]: A list of knowledge base group names.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.add_kb_group', '''\
+向数据库中添加一个新的知识库分组名称，若已存在则忽略。
+
+Args:
+    name (str): 要添加的知识库分组名称。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.add_kb_group', '''\
+Adds a new knowledge base group name to the database; ignores if the group already exists.
+
+Args:
+    name (str): The name of the knowledge base group to add.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.list_kb_group_files', '''\
+列出指定知识库分组中的文件信息，可根据多种条件进行过滤。
+
+Args:
+    group (str, optional): 知识库分组名称，若为 None 则不按分组过滤。
+    limit (int, optional): 限制返回的文件数量。
+    details (bool): 是否返回详细的文件信息。
+    status (str or List[str], optional): 过滤知识库分组中文件的状态。
+    exclude_status (str or List[str], optional): 排除指定状态的文件。
+    upload_status (str or List[str], optional): 过滤文件上传状态。
+    exclude_upload_status (str or List[str], optional): 排除指定的上传状态。
+    need_reparse (bool, optional): 是否只返回需要重新解析的文件。
+
+**Returns:**\n
+- list: 
+    - 如果 details 为 False，返回列表，每个元素为 (doc_id, path) 元组。
+    - 如果 details 为 True，返回包含文件详细信息的元组列表，包括文档ID、路径、状态、元数据，
+      知识库分组名、分组内状态及日志。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.list_kb_group_files', '''\
+Lists files in a specified knowledge base group, with support for multiple filters.
+
+Args:
+    group (str, optional): Knowledge base group name to filter by. If None, no group filtering is applied.
+    limit (int, optional): Limit on the number of files to return.
+    details (bool): Whether to return detailed file information.
+    status (str or List[str], optional): Filter files by group document status.
+    exclude_status (str or List[str], optional): Exclude files with these group document statuses.
+    upload_status (str or List[str], optional): Filter files by upload document status.
+    exclude_upload_status (str or List[str], optional): Exclude files with these upload document statuses.
+    need_reparse (bool, optional): If set, only returns files marked as needing reparse.
+
+**Returns:**\n
+- list: 
+    - If details is False, returns a list of tuples (doc_id, path).
+    - If details is True, returns a list of tuples containing detailed file information:
+      document ID, path, status, metadata, group name, group status, and group log.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.delete_unreferenced_doc', '''\
+删除数据库中标记为删除且未被任何知识库分组引用的文档记录。
+
+该方法会查找状态为“deleting”且引用计数为0的文档，删除这些文档记录，并记录删除操作日志。
+
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.delete_unreferenced_doc', '''\
+Deletes documents from the database that are marked for deletion and are no longer referenced by any knowledge base group.
+
+This method queries documents with status "deleting" and a reference count of zero, deletes them from the database,
+and adds operation logs for these deletions.
+
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.get_docs_need_reparse', '''\
+获取指定知识库分组中需要重新解析的文档列表。
+
+仅返回状态为“success”或“failed”的文档，且其对应的知识库分组记录标记为需要重新解析。
+
+Args:
+    group (str): 知识库分组名称。
+
+**Returns:**\n
+- List[KBDocument]: 需要重新解析的文档列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.get_docs_need_reparse', '''\
+Retrieves the list of documents that require re-parsing within a specified knowledge base group.
+
+Only documents with status "success" or "failed" and marked as needing reparse in the group are returned.
+
+Args:
+    group (str): Name of the knowledge base group.
+
+**Returns:**\n
+- List[KBDocument]: List of documents that need to be re-parsed.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.get_existing_paths_by_pattern', '''\
+根据路径匹配模式获取已存在的文档路径列表。
+
+Args:
+    pattern (str): 路径匹配模式，支持SQL的LIKE通配符。
+
+**Returns:**\n
+- List[str]: 匹配到的已存在文档路径列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.get_existing_paths_by_pattern', '''\
+Retrieves a list of existing document paths that match a given pattern.
+
+Args:
+    pattern (str): Path matching pattern, supports SQL LIKE wildcards.
+
+**Returns:**\n
+- List[str]: List of existing document paths matching the pattern.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.update_file_message', '''\
+更新指定文件的字段信息。
+
+Args:
+    fileid (str): 文件的唯一标识符（doc_id）。
+    **kw: 需要更新的字段及其对应的值，键值对形式传入。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.update_file_message', '''\
+Updates fields of the specified file record.
+
+Args:
+    fileid (str): Unique identifier of the file (doc_id).
+    **kw: Key-value pairs of fields to update and their new values.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.update_file_status', '''\
+更新多个文件的状态，支持根据当前状态进行条件过滤。
+
+Args:
+    file_ids (List[str]): 需要更新状态的文件ID列表。
+    status (str): 要设置的新状态。
+    cond_status_list (Union[None, List[str]], optional): 仅更新当前状态在此列表中的文件，默认为 None，表示不筛选。
+
+**Returns:**\n
+- List[DocPartRow]: 返回更新后的文件ID和路径列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.update_file_status', '''\
+Updates the status of multiple files, optionally filtered by current status.
+
+Args:
+    file_ids (List[str]): List of file IDs to update.
+    status (str): New status to set.
+    cond_status_list (Union[None, List[str]], optional): List of statuses to filter files that can be updated. Defaults to None.
+
+**Returns:**\n
+- List[DocPartRow]: List of updated file IDs and their paths.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.add_files_to_kb_group', '''\
+将多个文件添加到指定的知识库分组中。
+
+该方法会将文件状态设置为等待处理（waiting），
+若添加成功，则对应文档的计数（count）加一。
+
+Args:
+    file_ids (List[str]): 需要添加的文件ID列表。
+    group (str): 知识库分组名称。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.add_files_to_kb_group', '''\
+Adds multiple files to the specified knowledge base group.
+
+This method sets the file status to waiting.
+If successfully added, increments the document's count.
+
+Args:
+    file_ids (List[str]): List of file IDs to add.
+    group (str): Name of the knowledge base group.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.delete_files_from_kb_group', '''\
+从指定的知识库分组中删除多个文件。
+
+删除成功后，对应文档的计数（count）减少，但不会低于0。
+若文档不存在，会记录警告日志。
+
+Args:
+    file_ids (List[str]): 需要删除的文件ID列表。
+    group (str): 知识库分组名称。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.delete_files_from_kb_group', '''\
+Deletes multiple files from the specified knowledge base group.
+
+After deletion, decrements the document's count but not below zero.
+If the document is not found, logs a warning.
+
+Args:
+    file_ids (List[str]): List of file IDs to delete.
+    group (str): Name of the knowledge base group.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.get_file_status', '''\
+获取指定文件的状态。
+
+Args:
+    fileid (str): 文件的唯一标识符。
+
+**Returns:**\n
+- Optional[Tuple]: 返回包含状态的元组，若文件不存在则返回 None。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.get_file_status', '''\
+Gets the status of a specified file.
+
+Args:
+    fileid (str): Unique identifier of the file.
+
+**Returns:**\n
+- Optional[Tuple]: A tuple containing the status, or None if the file does not exist.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.update_kb_group', '''\
+更新知识库分组中指定文件的状态和重解析需求。
+
+根据给定的文件ID列表、分组名及状态列表，批量更新对应文件在知识库分组中的状态及是否需要重解析标志。
+
+Args:
+    cond_file_ids (List[str]): 需要更新的文件ID列表。
+    cond_group (Optional[str]): 分组名称，若指定则只更新该分组内的文件。
+    cond_status_list (Optional[List[str]]): 仅更新状态匹配此列表的文件。
+    new_status (Optional[str]): 新的文件状态。
+    new_need_reparse (Optional[bool]): 新的重解析需求标志。
+
+**Returns:**\n
+- List[Tuple]: 返回更新后文件的doc_id、group_name及状态列表。
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.update_kb_group', '''\
+Updates the status and reparse need flag of specified files in a knowledge base group.
+
+Batch updates files' status and need_reparse flag within a knowledge base group based on file IDs, group name, and optional status filter.
+
+Args:
+    cond_file_ids (List[str]): List of file IDs to update.
+    cond_group (Optional[str]): Group name to filter files, if specified only updates files in this group.
+    cond_status_list (Optional[List[str]]): Only update files whose status is in this list.
+    new_status (Optional[str]): New status to set.
+    new_need_reparse (Optional[bool]): New flag indicating if reparse is needed.
+
+**Returns:**\n
+- List[Tuple]: List of tuples of updated files containing doc_id, group_name, and status.
+''')
+
+add_chinese_doc('rag.utils.SqliteDocListManager.release', '''\
+清空数据库中的所有文档、分组及相关操作日志数据。
+
+该操作会删除 documents、document_groups、kb_group_documents 和 operation_logs 表中的所有记录。
+
+''')
+
+add_english_doc('rag.utils.SqliteDocListManager.release', '''\
+Clears all documents, groups, and operation logs from the database.
+
+This operation deletes all records from documents, document_groups, kb_group_documents, and operation_logs tables.
+
+''')
+
+# ---------------------------------------------------------------------------- #
+
+add_chinese_doc('WebModule', '''\
+WebModule是LazyLLM为开发者提供的基于Web的交互界面。在初始化并启动一个WebModule之后，开发者可以从页面上看到WebModule背后的模块结构，并将Chatbot组件的输入传输给自己开发的模块进行处理。
+模块返回的结果和日志会直接显示在网页的“处理日志”和Chatbot组件上。除此之外，WebModule支持在网页上动态加入Checkbox或Text组件用于向模块发送额外的参数。
+WebModule页面还提供“使用上下文”，“流式输出”和“追加输出”的Checkbox，可以用来改变页面和后台模块的交互方式。
+
+<span style="font-size: 20px;">&ensp;**`WebModule.init_web(component_descs) -> gradio.Blocks`**</span>
+使用gradio库生成演示web页面，初始化session相关数据以便在不同的页面保存各自的对话和日志，然后使用传入的component_descs参数为页面动态添加Checkbox和Text组件，最后设置页面上的按钮和文本框的相应函数
+之后返回整个页面。WebModule的__init__函数调用此方法生成页面。
+
+Args:
+    component_descs (list): 用于动态向页面添加组件的列表。列表中的每个元素也是一个列表，其中包含5个元素，分别是组件对应的模块ID，模块名，组件名，组件类型（目前仅支持Checkbox和Text），组件默认值。
+''')
+
+add_english_doc('WebModule', '''\
+WebModule is a web-based interactive interface provided by LazyLLM for developers. After initializing and starting
+a WebModule, developers can see structure of the module they provides behind the WebModule, and transmit the input
+of the Chatbot component to their modules. The results and logs returned by the module will be displayed on the
+“Processing Logs” and Chatbot component on the web page. In addition, Checkbox or Text components can be added
+programmatically to the web page for additional parameters to the background module. Meanwhile, The WebModule page
+provides Checkboxes of “Use Context,” “Stream Output,” and “Append Output,” which can be used to adjust the
+interaction between the page and the module behind.
+
+<span style="font-size: 20px;">&ensp;**`WebModule.init_web(component_descs) -> gradio.Blocks`**</span>
+
+Generate a demonstration web page based on gradio. The function initializes session-related data to save chat history
+and logs for different pages, then dynamically add Checkbox and Text components to the page according to component_descs
+parameter, and set the corresponding functions for the buttons and text boxes on the page at last.
+WebModule’s __init__ function calls this method to generate the page.
+
+Args:
+    component_descs (list): A list used to add components to the page. Each element in the list is also a list containing
+    5 elements, which are the module ID, the module name, the component name, the component type (currently only
+    supports Checkbox and Text), and the default value of the component.
+
+''')
+
+add_example('WebModule', '''\
+>>> import lazyllm
+>>> def func2(in_str, do_sample=True, temperature=0.0, *args, **kwargs):
+...     return f"func2:{in_str}|do_sample:{str(do_sample)}|temp:{temperature}"
+...
+>>> m1=lazyllm.ActionModule(func2)
+>>> m1.name="Module1"
+>>> w = lazyllm.WebModule(m1, port=[20570, 20571, 20572], components={
+...         m1:[('do_sample', 'Checkbox', True), ('temperature', 'Text', 0.1)]},
+...                       text_mode=lazyllm.tools.WebModule.Mode.Refresh)
+>>> w.start()
+193703: 2024-06-07 10:26:00 lazyllm SUCCESS: ...
+''')
+
+add_chinese_doc('WebModule.init_web', '''\
+初始化 Web UI 页面。
+该方法使用 Gradio 构建对话界面，并将组件绑定到事件，支持会话选择、流式输出、上下文控制、多模态输入等功能。该方法返回构建完成的 Gradio Blocks 对象。
+Args:
+    component_descs (List[Tuple]): 组件描述列表，每项为五元组 (module, group_name, name, component_type, value)，
+        例如：('MyModule', 'GroupA', 'use_cache', 'Checkbox', True)。
+Returns:
+    gr.Blocks: 构建好的 Gradio 页面对象，可用于 launch 启动 Web 服务。
+''')
+
+add_english_doc('WebModule.init_web', '''\
+Initialize the Web UI page.
+This method uses Gradio to build the interactive chat interface and binds all components to the appropriate logic. It supports session selection, streaming output, context toggling, multimodal input, and control tools. The method returns the constructed Gradio Blocks object.
+Args:
+    component_descs (List[Tuple]): A list of component descriptors. Each element is a 5-tuple 
+        (module, group_name, name, component_type, value), e.g. ('MyModule', 'GroupA', 'use_cache', 'Checkbox', True).
+Returns:
+    gr.Blocks: The constructed Gradio UI object, which can be launched via `.launch()`.
+''')
+
+add_chinese_doc('WebModule.wait', '''\
+阻塞主线程，等待 Web 页面关闭。
+该方法会阻塞当前线程直到 Web 页面（Gradio demo）被关闭，适用于部署后阻止程序提前退出的场景。
+''')
+
+add_english_doc('WebModule.wait', '''\
+Block the main thread until the web interface is closed.
+This method blocks the current thread until the Gradio demo is closed. Useful in deployment scenarios to prevent premature program exit.
+''')
+
+add_chinese_doc('WebModule.stop', '''\
+关闭 Web 页面并清理资源。
+如果 Web 页面已初始化，则关闭 Gradio demo，释放资源并重置 `demo` 与 `url` 属性。
+''')
+
+add_english_doc('WebModule.stop', '''\
+Stop the web interface and clean up resources.
+If the web demo has been initialized, this method closes the Gradio demo, frees related resources, and resets `demo` and `url` attributes.
+''')
+
+#actors/codegenerator
+add_chinese_doc('CodeGenerator', '''\
+代码生成模块。
+
+该模块基于用户提供的提示词生成代码，会根据提示内容自动选择中文或英文的系统提示词，并从输出中提取 Python 代码片段。
+
+`__init__(self, base_model, prompt="")`
+初始化代码生成器。
+
+Args:
+    base_model (Union[str, TrainableModule, OnlineChatModuleBase]): 模型路径字符串，或已初始化的模型实例。
+    prompt (str): 用户自定义的代码生成提示词，可为中文或英文。
+''')
 
-This module generates code based on a user-defined prompt. It automatically selects a Chinese or English system prompt based on the input, and extracts Python code snippets from the output.
+
+add_english_doc('CodeGenerator', '''\
+Code Generation Module.
+
+This module generates code based on a user-defined prompt. It automatically selects a Chinese or English system prompt based on the input, and extracts Python code snippets from the output.
 
 `__init__(self, base_model, prompt="")`
 Initializes the code generator with a base model and prompt.
@@ -2308,6 +3919,28 @@ def my_reranker(node: DocNode, **kwargs):
 ...     return fibonacci(n-1) + fibonacci(n-2)
 '''])
 
+add_chinese_doc('CodeGenerator.choose_prompt', '''\
+根据输入的提示文本内容选择合适的代码生成提示模板。  
+如果提示中包含中文字符，则返回中文提示模板；否则返回英文提示模板。
+
+Args:
+    prompt (str): 输入的提示文本。
+
+**Returns:**\n
+- str: 选择的代码生成提示模板字符串。
+''')
+
+add_english_doc('CodeGenerator.choose_prompt', '''\
+Selects an appropriate code generation prompt template based on the content of the input prompt.  
+Returns the Chinese prompt template if Chinese characters are detected; otherwise returns the English prompt template.
+
+Args:
+    prompt (str): Input prompt text.
+
+**Returns:**\n
+- str: The selected code generation prompt template string.
+''')
+
 #actors/parameter_extractor
 add_chinese_doc('ParameterExtractor', '''\
 参数提取模块。
@@ -2392,6 +4025,50 @@ def my_reranker(node: DocNode, **kwargs):
 ... ['中国的最高山峰是哪一座？', '中国海拔最高的山是什么？']
 '''])
 
+# QustionRewrite.choose_prompt
+add_english_doc('QustionRewrite.choose_prompt', '''
+Choose the appropriate prompt template based on the language of the input prompt.
+
+This method analyzes the input prompt string and determines whether to use the Chinese or English prompt template. It checks each character in the prompt string and if any character falls within the Chinese Unicode range (\\u4e00-\\u9fff), it returns the Chinese prompt template; otherwise, it returns the English prompt template.
+
+Args:
+    prompt (str): The input prompt string to be analyzed for language detection.
+
+Returns:
+    str: The selected prompt template string (either Chinese or English version).
+''')
+
+add_chinese_doc('QustionRewrite.choose_prompt', '''
+根据输入提示的语言选择合适的提示模板。
+
+此方法分析输入提示字符串并确定使用中文还是英文提示模板。它检查提示字符串中的每个字符，如果任何字符落在中文字符Unicode范围内（\\u4e00-\\u9fff），则返回中文提示模板；否则返回英文提示模板。
+
+Args:
+    prompt (str): 要分析语言检测的输入提示字符串。
+
+Returns:
+    str: 选定的提示模板字符串（中文或英文版本）。
+''')
+
+add_example('QustionRewrite.choose_prompt', '''
+>>> from lazyllm.tools.actors.qustion_rewrite import QustionRewrite
+
+# Example 1: English prompt (no Chinese characters)
+>>> rewriter = QustionRewrite("gpt-3.5-turbo")
+>>> prompt_template = rewriter.choose_prompt("How to implement machine learning?")
+>>> print("Template contains Chinese:", "中文" in prompt_template)
+Template contains Chinese: False
+
+# Example 2: Chinese prompt (contains Chinese characters)
+>>> prompt_template = rewriter.choose_prompt("如何实现机器学习？")
+>>> print("Template contains Chinese:", "中文" in prompt_template)
+Template contains Chinese: True
+
+# Example 3: Mixed language prompt (contains Chinese characters)
+>>> prompt_template = rewriter.choose_prompt("What is 机器学习?")
+>>> print("Template contains Chinese:", "中文" in prompt_template)
+Template contains Chinese: True
+''')
 
 add_chinese_doc('ToolManager', '''\
 ToolManager是一个工具管理类，用于提供工具信息和工具调用给function call。
@@ -2512,25 +4189,76 @@ def my_reranker(node: DocNode, **kwargs):
 8
 """)
 
+add_chinese_doc("ModuleTool.apply", '''
+抽象方法，需在子类中实现具体逻辑。
+
+此方法应根据传入的参数执行特定任务。
+
+Raises:
+    NotImplementedError: 如果未在子类中重写该方法。
+''')
+
+add_english_doc("ModuleTool.apply", '''
+Abstract method to be implemented in subclasses.
+
+This method should perform a specific task based on the provided arguments.
+
+Raises:
+    NotImplementedError: If the method is not overridden in a subclass.
+''')
+
+add_chinese_doc("ModuleTool.validate_parameters", '''
+验证参数是否满足所需条件。
+
+此方法会检查参数字典是否包含所有必须字段，并尝试进一步进行格式验证。
+
+Args:
+    arguments (Dict[str, Any]): 传入的参数字典。
+
+Returns:
+    bool: 若参数合法且完整，返回 True；否则返回 False。
+''')
+
+add_english_doc("ModuleTool.validate_parameters", '''
+Validate whether the provided arguments meet the required criteria.
+
+This method checks if all required keys are present in the input dictionary and attempts format validation.
+
+Args:
+    arguments (Dict[str, Any]): Dictionary of input arguments.
+
+Returns:
+    bool: True if valid and complete; False otherwise.
+''')
 
 add_chinese_doc('FunctionCall', '''\
-FunctionCall是单轮工具调用类，如果LLM中的信息不足以回答用户的问题，必需结合外部知识来回答用户问题，则调用该类。如果LLM输出需要工具调用，则进行工具调用，并输出工具调用结果，输出结果为List类型，包含当前轮的输入、模型输出、工具输出。如果不需要工具调用，则直接输出LLM结果，输出结果为string类型。
+FunctionCall是单轮工具调用类。当LLM自身信息不足以回答用户问题，需要结合外部工具获取辅助信息时，调用此类。  
+若LLM输出需要调用工具，则执行工具调用并返回调用结果；输出结果为List类型，包含当前轮的输入、模型输出和工具输出。  
+若不需工具调用，则直接返回LLM输出结果，输出为字符串类型。
 
 Args:
-    llm (ModuleBase): 要使用的LLM可以是TrainableModule或OnlineChatModule。
-    tools (List[Union[str, Callable]]): LLM使用的工具名称或者 Callable 列表
+    llm (ModuleBase): 使用的LLM实例，支持TrainableModule或OnlineChatModule。
+    tools (List[Union[str, Callable]]): LLM可调用的工具名称或Callable对象列表。
+    return_trace (Optional[bool]): 是否返回调用轨迹，默认为False。
+    stream (Optional[bool]): 是否启用流式输出，默认为False。
+    _prompt (Optional[str]): 自定义工具调用提示语，默认根据llm类型自动设置。
 
-注意：tools 中使用的工具必须带有 `__doc__` 字段，按照 [Google Python Style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) 的要求描述清楚工具的用途和参数。
+注意：tools中的工具需包含`__doc__`字段，且须遵循[Google Python Style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings)规范说明用途与参数。
 ''')
 
 add_english_doc('FunctionCall', '''\
-FunctionCall is a single-round tool call class. If the information in LLM is not enough to answer the uesr's question, it is necessary to combine external knowledge to answer the user's question. If the LLM output required a tool call, the tool call is performed and the tool call result is output. The output result is of List type, including the input, model output, and tool output of the current round. If a tool call is not required, the LLM result is directly output, and the output result is of string type.
-
-Note: The tools used in `tools` must have a `__doc__` field, clearly describing the purpose and parameters of the tool according to the [Google Python Style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) requirements.
+FunctionCall is a single-turn tool invocation class. It is used when the LLM alone cannot answer user queries and requires external knowledge through tool calls.  
+If the LLM output requires tool calls, the tools are invoked and the combined results (input, model output, tool output) are returned as a list.  
+If no tool calls are needed, the LLM output is returned directly as a string.
 
 Args:
-    llm (ModuleBase): The LLM to be used can be either TrainableModule or OnlineChatModule.
-    tools (List[Union[str, Callable]]): A list of tool names for LLM to use.
+    llm (ModuleBase): The LLM instance to use, which can be either a TrainableModule or OnlineChatModule.
+    tools (List[Union[str, Callable]]): A list of tool names or callable objects that the LLM can use.
+    return_trace (Optional[bool]): Whether to return the invocation trace, defaults to False.
+    stream (Optional[bool]): Whether to enable streaming output, defaults to False.
+    _prompt (Optional[str]): Custom prompt for function call, defaults to automatic selection based on llm type.
+
+Note: Tools in `tools` must include a `__doc__` attribute and describe their purpose and parameters according to the [Google Python Style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings).
 ''')
 
 add_example('FunctionCall', """\
@@ -2885,60 +4613,6 @@ def my_reranker(node: DocNode, **kwargs):
 """,
 )
 
-add_chinese_doc(
-    "IntentClassifier",
-    """\
-IntentClassifier 是一个基于语言模型的意图识别器，用于根据用户提供的输入文本及对话上下文识别预定义的意图，并通过预处理和后处理步骤确保准确识别意图。
-
-Arguments:
-    llm: 用于意图识别的语言模型对象，OnlineChatModule或TrainableModule类型
-    intent_list (list): 包含所有可能意图的字符串列表。可以包含中文或英文的意图。
-    prompt (str): 用户附加的提示词。
-    constrain (str): 用户附加的限制。
-    examples (list[list]): 额外的示例，格式为 `[[query, intent], [query, intent], ...]` 。
-    return_trace (bool, 可选): 如果设置为 True，则将结果记录在trace中。默认为 False。
-""",
-)
-
-add_english_doc(
-    "IntentClassifier",
-    """\
-IntentClassifier is an intent recognizer based on a language model that identifies predefined intents based on user-provided input text and conversational context.
-It can handle intent lists and ensures accurate intent recognition through preprocessing and postprocessing steps.
-
-Arguments:
-    llm: A language model object used for intent recognition, which can be of type OnlineChatModule or TrainableModule.
-    intent_list (list): A list of strings containing all possible intents. This list can include intents in either Chinese or English.
-    prompt (str): User-attached prompt words.
-    constrain (str): User-attached constrain words.
-    examples (list[list]): extra examples，format is `[[query, intent], [query, intent], ...]`.
-    return_trace (bool, optional): If set to True, the results will be recorded in the trace. Defaults to False.
-""",
-)
-
-add_example(
-    "IntentClassifier",
-    """\
-    >>> import lazyllm
-    >>> from lazyllm.tools import IntentClassifier
-    >>> classifier_llm = lazyllm.OnlineChatModule(source="openai")
-    >>> chatflow_intent_list = ["Chat", "Financial Knowledge Q&A", "Employee Information Query", "Weather Query"]
-    >>> classifier = IntentClassifier(classifier_llm, intent_list=chatflow_intent_list)
-    >>> classifier.start()
-    >>> print(classifier('What is the weather today'))
-    Weather Query
-    >>>
-    >>> with IntentClassifier(classifier_llm) as ic:
-    >>>     ic.case['Weather Query', lambda x: '38.5°C']
-    >>>     ic.case['Chat', lambda x: 'permission denied']
-    >>>     ic.case['Financial Knowledge Q&A', lambda x: 'Calling Financial RAG']
-    >>>     ic.case['Employee Information Query', lambda x: 'Beijing']
-    ...
-    >>> ic.start()
-    >>> print(ic('What is the weather today'))
-    38.5°C
-""",
-)
 
 #eval/eval_base.py
 add_chinese_doc('BaseEvaluator', '''\
@@ -3308,6 +4982,40 @@ def my_reranker(node: DocNode, **kwargs):
 ... Executed: SELECT * FROM test
 '''])
 
+add_chinese_doc('DBManager.execute_query', '''\
+执行数据库查询语句的抽象方法。此方法需要由具体的数据库管理器子类实现，用于执行各种数据库操作。
+
+Args:
+    statement: 要执行的数据库查询语句，可以是 SQL 语句或其他数据库特定的查询语言
+
+此方法的特点：
+
+- **抽象方法**: 需要在子类中实现具体的数据库操作逻辑
+- **统一接口**: 为不同的数据库类型提供统一的查询接口
+- **错误处理**: 子类实现应该包含适当的错误处理和状态报告
+- **结果格式化**: 返回格式化的字符串结果，便于后续处理
+
+**注意**: 此方法是数据库管理器的核心方法，所有具体的数据库操作都通过此方法执行。
+
+''')
+
+add_english_doc('DBManager.execute_query', '''\
+Abstract method for executing database query statements. This method needs to be implemented by specific database manager subclasses to execute various database operations.
+
+Args:
+    statement: The database query statement to execute, which can be SQL statements or other database-specific query languages
+
+Features of this method:
+
+- **Abstract Method**: Requires implementation of specific database operation logic in subclasses
+- **Unified Interface**: Provides a unified query interface for different database types
+- **Error Handling**: Subclass implementations should include appropriate error handling and status reporting
+- **Result Formatting**: Returns formatted string results for subsequent processing
+
+**Note**: This method is the core method of the database manager, and all specific database operations are executed through this method.
+
+''')
+
 add_chinese_doc(
     "SqlManager",
     """\
@@ -3681,6 +5389,78 @@ def my_reranker(node: DocNode, **kwargs):
 """,
 )
 
+add_english_doc('SqlCall.sql_query_promt_hook', '''\
+Hook to prepare the prompt inputs for generating a database query from user input.
+
+Args:
+    input (Union[str, List, Dict[str, str], None]): The user's natural language query.
+    history (List[Union[List[str], Dict[str, Any]]]): Conversation history.
+    tools (Union[List[Dict[str, Any]], None]): Available tool descriptions.
+    label (Union[str, None]): Optional label for the prompt.
+
+Returns:
+    Tuple: A tuple containing the formatted prompt dict (with current_date, db_type, desc, user_query), history, tools, and label.
+''')
+
+add_chinese_doc('SqlCall.sql_query_promt_hook', r'''\ 
+为从用户输入生成数据库查询准备 prompt 的 hook。
+
+Args:
+    input (Union[str, List, Dict[str, str], None]): 用户的自然语言查询。
+    history (List[Union[List[str], Dict[str, Any]]]): 会话历史。
+    tools (Union[List[Dict[str, Any]], None]): 可用工具描述。
+    label (Union[str, None]): 可选标签。
+
+Returns:
+    Tuple: 包含格式化后的 prompt 字典（包括 current_date、db_type、desc、user_query）、history、tools 和 label。
+''')
+
+add_english_doc('SqlCall.sql_explain_prompt_hook', '''\
+Hook to prepare the prompt for explaining the execution result of a database query.
+
+Args:
+    input (Union[str, List, Dict[str, str], None]): A list containing the query and its result.
+    history (List[Union[List[str], Dict[str, Any]]]): Conversation history.
+    tools (Union[List[Dict[str, Any]], None]): Available tool descriptions.
+    label (Union[str, None]): Optional label for the prompt.
+
+Returns:
+    Tuple: A tuple containing the formatted prompt dict (history_info, desc, query, result, explain_query), history, tools, and label.
+''')
+
+add_chinese_doc('SqlCall.sql_explain_prompt_hook', r'''\ 
+为解释数据库查询执行结果准备 prompt 的 hook。
+
+Args:
+    input (Union[str, List, Dict[str, str], None]): 包含查询和结果的列表。
+    history (List[Union[List[str], Dict[str, Any]]]): 会话历史。
+    tools (Union[List[Dict[str, Any]], None]): 可用工具描述。
+    label (Union[str, None]): 可选标签。
+
+Returns:
+    Tuple: 包含格式化后的 prompt 字典（history_info、desc、query、result、explain_query）、history、tools 和 label。
+''')
+
+add_english_doc('SqlCall.extract_sql_from_response', '''\
+Extract SQL (or MongoDB pipeline) statement from the raw LLM response.
+
+Args:
+    str_response (str): Raw text returned by the LLM which may contain code fences.
+
+Returns:
+    tuple[bool, str]: A tuple where the first element indicates whether extraction succeeded, and the second is the cleaned or original content. If sql_post_func is provided, it is applied to the extracted content.
+''')
+
+add_chinese_doc('SqlCall.extract_sql_from_response', r'''\ 
+从原始 LLM 响应中提取 SQL（或 MongoDB pipeline）语句。
+
+Args:
+    str_response (str): LLM 返回的原始文本，可能包含代码块。
+
+Returns:
+    tuple[bool, str]: 第一个元素表示是否成功提取，第二个是清洗后的或原始内容。如果提供了 sql_post_func，则会应用于提取结果。
+''')
+
 # ---------------------------------------------------------------------------- #
 
 add_chinese_doc("HttpTool", """
@@ -3923,6 +5703,8 @@ def my_reranker(node: DocNode, **kwargs):
 add_chinese_doc('MCPClient', '''\
 MCP客户端，用于连接MCP服务器。同时支持本地服务器和sse服务器。
 
+如果传入的 'command_or_url' 是一个 URL 字符串（以 'http' 或 'https' 开头），则将连接到远程服务器；否则，将启动并连接到本地服务器。
+
 
 Args:
     command_or_url (str): 用于启动本地服务器或连接远程服务器的命令或 URL 字符串。
@@ -3951,11 +5733,53 @@ def my_reranker(node: DocNode, **kwargs):
 
 
 add_english_doc('MCPClient.list_tools', '''\
-Retrieves the list of tools from the currently connected MCP client.
+Retrieve the list of tools from the currently connected MCP client.
+
+**Returns:**\n
+- Any: The list of tools returned by the MCP client.
 ''')
 
 add_chinese_doc('MCPClient.list_tools', '''\
-获取当前连接MCP客户端的工具列表。
+获取当前连接的 MCP 客户端的工具列表。
+
+**Returns:**\n
+- Any: MCP 客户端返回的工具列表。
+''')
+
+
+add_english_doc('MCPClient.get_tools', '''\
+Retrieve a filtered list of tools from the MCP client.
+
+Args:
+    allowed_tools (Optional[list[str]]): List of tool names to filter. If None, all tools are returned.
+
+**Returns:**\n
+- Any: List of tools that match the filter criteria.
+''')
+
+add_chinese_doc('MCPClient.get_tools', '''\
+从 MCP 客户端获取经过筛选的工具列表。
+
+Args:
+    allowed_tools (Optional[list[str]]): 要筛选的工具名称列表，若为 None，则返回所有工具。
+
+**Returns:**\n
+- Any: 符合筛选条件的工具列表。
+''')
+
+
+add_english_doc('MCPClient.deploy', '''\
+Deploys the MCP client with the specified SSE server settings asynchronously.
+
+Args:
+    sse_settings (SseServerSettings): Configuration settings for the SSE server.
+''')
+
+add_chinese_doc('MCPClient.deploy', '''\
+使用指定的 SSE 服务器设置异步部署 MCP 客户端。
+
+Args:
+    sse_settings (SseServerSettings): SSE 服务器的配置设置。
 ''')
 
 
@@ -4019,362 +5843,284 @@ def my_reranker(node: DocNode, **kwargs):
 将 MCP 服务器提供的工具转换为 LazyLLM 代理使用的函数。
 
 Args:
-    client (mcp.ClientSession): 连接到MCP服务器的MCP客户端。
-    mcp_tool (mcp.types.Tool): 由MCP服务器提供的工具。
-''')
-
-add_english_doc('rag.utils.DocListManager.table_inited', '''\
-Checks if the database table `documents` is initialized. This method ensures thread-safety when accessing the database.
-`table_inited(self)`
-Determines whether the `documents` table exists in the database.
-Returns:
-    bool: `True` if the `documents` table exists, `False` otherwise.
-Notes:
-    - Uses a thread-safe lock (`self._db_lock`) to ensure safe access to the database.
-    - Establishes a connection to the SQLite database at `self._db_path` with the `check_same_thread` option.
-    - Executes the SQL query: `SELECT name FROM sqlite_master WHERE type='table' AND name='documents'` to check for the table.
-''')
-
-add_chinese_doc('rag.utils.DocListManager.table_inited', '''\
-检查数据库中的 `documents` 表是否已初始化。此方法在访问数据库时确保线程安全。
-`table_inited(self)`
-判断数据库中是否存在 `documents` 表。
-返回值:
-    bool: 如果 `documents` 表存在，返回 `True`；否则返回 `False`。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保对数据库的安全访问。
-    - 通过 `self._db_path` 连接 SQLite 数据库，并使用 `check_same_thread` 配置选项。
-    - 执行 SQL 查询：`SELECT name FROM sqlite_master WHERE type='table' AND name='documents'` 来检查表是否存在。
-''')
-
-add_english_doc('rag.utils.DocListManager.validate_paths', '''\
-Validates a list of file paths to ensure they are ready for processing.
-`validate_paths(self, paths: List[str]) -> Tuple[bool, str, List[bool]]`
-This method checks whether the provided paths are new, already processed, or currently being processed. It ensures there are no conflicts in processing the documents.
-Args
-    paths (List[str]): A list of file paths to validate.
-Returns:
-    Tuple[bool, str, List[bool]]: A tuple containing:
-        - `bool`: `True` if all paths are valid, `False` otherwise.
-        - `str`: A message indicating success or the reason for failure.
-        - `List[bool]`: A list where each element corresponds to whether a path is new (`True`) or already exists (`False`).
-Notes:
-    - If any document is still being processed or needs reparsing, the method returns `False` with an appropriate error message.
-    - The method uses a database session and thread-safe lock (`self._db_lock`) to retrieve document status information.
-    - Unsafe statuses include `working` and `waiting`.
-''')
-
-add_chinese_doc('rag.utils.DocListManager.validate_paths', '''\
-验证一组文件路径，以确保它们可以被正常处理。
-`validate_paths(self, paths: List[str]) -> Tuple[bool, str, List[bool]]`
-此方法检查提供的路径是否是新的、已处理的或当前正在处理的，并确保处理文档时不会发生冲突。
-参数:
-    paths (List[str]): 要验证的文件路径列表。
-返回值:
-    Tuple[bool, str, List[bool]]: 返回一个元组，包括：
-        - `bool`: 如果所有路径有效，则返回 `True`；否则返回 `False`。
-        - `str`: 表示成功或失败原因的消息。
-        - `List[bool]`: 一个布尔值列表，每个元素对应一个路径是否为新路径（`True` 表示新路径，`False` 表示已存在）。
-说明:
-    - 如果任何文档仍在处理中或需要重新解析，该方法会返回 `False`，并附带相应的错误消息。
-    - 方法通过数据库会话和线程安全锁 (`self._db_lock`) 检索文档状态信息。
-    - 不安全状态包括 `working` 和 `waiting`。
+    client (mcp.ClientSession): 连接到MCP服务器的MCP客户端。
+    mcp_tool (mcp.types.Tool): 由MCP服务器提供的工具。
 ''')
 
-add_english_doc('rag.utils.DocListManager.update_need_reparsing', '''\
-Updates the `need_reparse` status of a document in the `KBGroupDocuments` table.
-`update_need_reparsing(self, doc_id: str, need_reparse: bool, group_name: Optional[str] = None)`
-This method sets the `need_reparse` flag for a specific document, optionally scoped to a given group.
+
+add_english_doc('rag.doc_node.ImageDocNode', '''\
+A specialized document node for handling image content in RAG systems.
+
+ImageDocNode extends DocNode to provide specialized functionality for image processing and embedding generation. It automatically handles image loading, base64 encoding for embedding, and PIL Image objects for LLM processing.
+
 Args:
-    doc_id (str): The ID of the document to update.
-    need_reparse (bool): The new value for the `need_reparse` flag.
-    group_name (Optional[str]): If provided, the update will be applied only to the specified group.
-Notes:
-    - Uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - The `group_name` parameter allows scoping the update to a specific group; if not provided, the update applies to all groups containing the document.
-    - The method commits the change to the database immediately.
+    image_path (str): The file path to the image file. This should be a valid path to an image file (e.g., .jpg, .png, .jpeg).
+    uid (Optional[str]): Unique identifier for the document node. If not provided, a UUID will be automatically generated.
+    group (Optional[str]): The group name this node belongs to. Used for organizing and filtering nodes.
+    embedding (Optional[Dict[str, List[float]]]): Pre-computed embeddings for the image. Keys are embedding model names, values are embedding vectors.
+    parent (Optional[DocNode]): Parent node in the document hierarchy. Used for building document trees.
+    metadata (Optional[Dict[str, Any]]): Additional metadata associated with the image node.
+    global_metadata (Optional[Dict[str, Any]]): Global metadata that applies to all nodes in the document.
+    text (Optional[str]): Optional text description or caption for the image.
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.update_need_reparsing', '''\
-更新 `KBGroupDocuments` 表中某个文档的 `need_reparse` 状态。
-`update_need_reparsing(self, doc_id: str, need_reparse: bool, group_name: Optional[str] = None)`
-此方法设置指定文档的 `need_reparse` 标志，并可选限定到特定分组。
-参数:
-    doc_id (str): 要更新的文档ID。
-    need_reparse (bool): `need_reparse` 标志的新值。
-    group_name (Optional[str]): 如果提供，仅对指定分组应用更新；如果未提供，则对包含该文档的所有分组应用更新。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
-    - `group_name` 参数允许将更新限定到特定分组；如果未提供，则更新应用于包含该文档的所有分组。
-    - 方法会立刻将更改提交到数据库。
-''')
+add_chinese_doc('rag.doc_node.ImageDocNode', '''\
+专门用于处理RAG系统中图像内容的文档节点。
+
+ImageDocNode继承自DocNode，为图像处理和嵌入生成提供专门的功能。它自动处理图像加载、用于嵌入的base64编码，以及用于LLM处理的PIL图像对象。
 
-add_english_doc('rag.utils.DocListManager.list_files', '''\
-Lists files from the `documents` table with optional filtering, limiting, and returning details.
-`list_files(self, limit: Optional[int] = None, details: bool = False, status: Union[str, List[str]] = DocListManager.Status.all, exclude_status: Optional[Union[str, List[str]]] = None)`
-This method retrieves file IDs or detailed file information from the database, based on the specified filtering conditions.
 Args:
-    limit (Optional[int]): Maximum number of files to return. If `None`, all matching files will be returned.
-    details (bool): Whether to return detailed file information (`True`) or just file IDs (`False`).
-    status (Union[str, List[str]]): The status or list of statuses to include in the results. Defaults to all statuses.
-    exclude_status (Optional[Union[str, List[str]]]): The status or list of statuses to exclude from the results. Defaults to `None`.
-Returns:
-    List: A list of file IDs if `details=False`, or a list of detailed file rows if `details=True`.
-Notes:
-    - The method constructs a query dynamically based on the provided `status` and `exclude_status` conditions.
-    - A thread-safe lock (`self._db_lock`) ensures safe database access.
-    - The `LIMIT` clause is applied if `limit` is specified.
+    image_path (str): 图像文件的文件路径。这应该是一个有效的图像文件路径（例如.jpg、.png、.jpeg）。
+    uid (Optional[str]): 文档节点的唯一标识符。如果未提供，将自动生成UUID。
+    group (Optional[str]): 此节点所属的组名。用于组织和过滤节点。
+    embedding (Optional[Dict[str, List[float]]]): 图像的预计算嵌入。键是嵌入模型名称，值是嵌入向量。
+    parent (Optional[DocNode]): 文档层次结构中的父节点。用于构建文档树。
+    metadata (Optional[Dict[str, Any]]): 与图像节点关联的附加元数据。
+    global_metadata (Optional[Dict[str, Any]]): 适用于文档中所有节点的全局元数据。
+    text (Optional[str]): 图像的可选文本描述或标题。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.list_files', '''\
-从 `documents` 表中列出文件，并支持过滤、限制返回结果以及返回详细信息。
-`list_files(self, limit: Optional[int] = None, details: bool = False, status: Union[str, List[str]] = DocListManager.Status.all, exclude_status: Optional[Union[str, List[str]]] = None)`
-此方法根据指定的条件，从数据库中检索文件ID或详细文件信息。
-参数:
-    limit (Optional[int]): 返回的最大文件数量。如果为 `None`，则返回所有匹配的文件。
-    details (bool): 是否返回详细的文件信息（`True`）或仅返回文件ID（`False`）。
-    status (Union[str, List[str]]): 要包含的状态或状态列表，默认为所有状态。
-    exclude_status (Optional[Union[str, List[str]]]): 要排除的状态或状态列表，默认为 `None`。
-返回值:
-    List: 如果 `details=False`，则返回文件ID列表；如果 `details=True`，则返回详细文件行的列表。
-说明:
-    - 该方法根据 `status` 和 `exclude_status` 条件动态构造查询。
-    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
-    - 如果指定了 `limit`，查询会附加 `LIMIT` 子句。
+add_example('rag.doc_node.ImageDocNode', '''\
+>>> from lazyllm.tools.rag.doc_node import ImageDocNode, MetadataMode
+>>> import numpy as np
+>>> image_node = ImageDocNode(
+...     image_path="/home/mnt/yehongfei/Code/Test/framework.jpg",
+...     text="这是一张照片"
+)
+>>> def clip_emb(content, modality="image"):
+...     if modality == "image":
+...         return [np.random.rand(512).tolist()]
+...     return [np.random.rand(256).tolist()]
+>>> embed_functions = {"clip": clip_emb}
+>>> image_node.do_embedding(embed_functions)
+>>> print(f"嵌入维度: {len(image_node.embedding['clip'])}")
+>>> text_representation = image_node.get_text()
+>>> content_representation = image_node.get_content(MetadataMode.EMBED)
+>>> print(f"text属性: {text_representation}")
+>>> print(f"content属性: {content_representation}")    
 ''')
 
-add_english_doc('rag.utils.DocListManager.get_docs', '''\
-Fetch documents from the database based on a list of document IDs.
-`get_docs(self, doc_ids: List[str]) -> List[KBDocument]`
-This method retrieves document objects of type `KBDocument` from the database for the provided list of document IDs.
+add_english_doc('rag.doc_node.ImageDocNode.do_embedding', '''\
+Generate embeddings for the image using the provided embedding functions.
+
+This method overrides the parent class method to handle image-specific embedding generation. It automatically converts the image to the appropriate format (base64 for embedding) and calls the embedding functions with the image modality.
+
 Args:
-    doc_ids (List[str]): A list of document IDs to fetch.
-Returns:
-    List[KBDocument]: A list of `KBDocument` objects corresponding to the provided document IDs. If no documents are found, an empty list is returned.
-Notes:
-    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - The query filters documents using the `doc_id` field with an SQL `IN` clause.
-    - If `doc_ids` is empty, the function will return an empty list without querying the database.
+    embed (Dict[str, Callable]): Dictionary of embedding functions. Keys are embedding model names, values are callable functions that accept (content, modality) and return embedding vectors.
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.get_docs', '''\
-根据文档 ID 列表从数据库中获取文档对象。
-`get_docs(self, doc_ids: List[str]) -> List[KBDocument]`
-此方法从数据库中检索类型为 `KBDocument` 的文档对象，基于提供的文档 ID 列表。
-参数:
-    doc_ids (List[str]): 要获取的文档 ID 列表。
-返回值:
-    List[KBDocument]: 与提供的文档 ID 对应的 `KBDocument` 对象列表。如果没有找到文档，将返回空列表。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问的安全性。
-    - 查询使用 SQL 的 `IN` 子句，通过 `doc_id` 字段进行过滤。
-    - 如果 `doc_ids` 为空，函数将直接返回空列表，而不会查询数据库。
-''')
+add_chinese_doc('rag.doc_node.ImageDocNode.do_embedding', '''\
+使用提供的嵌入函数为图像生成嵌入。
+
+此方法重写父类方法以处理图像特定的嵌入生成。它自动将图像转换为适当的格式（用于嵌入的base64），并使用图像模态调用嵌入函数。
 
-add_english_doc('rag.utils.DocListManager.fetch_docs_changed_meta', '''\
-Fetch documents with changed metadata for a specific group and reset their `new_meta` field to `None`.
-`fetch_docs_changed_meta(self, group: str) -> List[DocMetaChangedRow]`
-This method retrieves all documents where metadata has changed (`new_meta` is not `None`) for the given group. After fetching, it resets the `new_meta` field to `None` for those documents.
 Args:
-    group (str): The name of the group to filter documents by.
-Returns:
-    List[DocMetaChangedRow]: A list of rows, where each row contains the `doc_id` and the `new_meta` field of documents with changed metadata.
-Notes:
-    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - It performs a SQL join between `KBDocument` and `KBGroupDocuments` to retrieve the relevant rows.
-    - After fetching, it updates the `new_meta` field of the affected rows to `None` and commits the changes to the database.
+    embed (Dict[str, Callable]): 嵌入函数字典。键是嵌入模型名称，值是接受(content, modality)并返回嵌入向量的可调用函数。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.fetch_docs_changed_meta', '''\
-获取指定组中元数据已更改的文档，并将其 `new_meta` 字段重置为 `None`。
-`fetch_docs_changed_meta(self, group: str) -> List[DocMetaChangedRow]`
-此方法检索元数据已更改（即 `new_meta` 不为 `None`）的所有文档，基于提供的组名。检索后，会将这些文档的 `new_meta` 字段重置为 `None`。
-参数:
-    group (str): 用于过滤文档的组名。
-返回值:
-    List[DocMetaChangedRow]: 包含文档 `doc_id` 和 `new_meta` 字段的行列表，表示元数据已更改的文档。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保数据库访问安全。
-    - 方法通过 SQL `JOIN` 操作连接 `KBDocument` 和 `KBGroupDocuments` 表以检索相关行。
-    - 在获取数据后，将受影响行的 `new_meta` 字段更新为 `None`，并将更改提交到数据库。
+add_english_doc('rag.doc_node.ImageDocNode.get_content', '''\
+Get the image content in different formats based on the metadata mode.
+
+This method returns the image content in different formats depending on the intended use case. For LLM processing, it returns a PIL Image object. For embedding generation, it returns a base64-encoded image string.
+
+Args:
+    metadata_mode (MetadataMode, optional): The mode for content retrieval. Defaults to MetadataMode.LLM.
+        - MetadataMode.LLM: Returns PIL Image object for LLM processing
+        - MetadataMode.EMBED: Returns base64-encoded image for embedding generation
+        - Other modes: Returns the image path as text
+
+**Returns:**\n
+- Union[PIL.Image.Image, List[str], str]: The image content in the requested format.
 ''')
 
-add_english_doc('rag.utils.DocListManager.list_kb_group_files', '''\
-List files in a specific knowledge base (KB) group with optional filters, limiting, and details.
-`list_kb_group_files(self, group: str = None, limit: Optional[int] = None, details: bool = False, status: Union[str, List[str]] = DocListManager.Status.all, exclude_status: Optional[Union[str, List[str]]] = None, upload_status: Union[str, List[str]] = DocListManager.Status.all, exclude_upload_status: Optional[Union[str, List[str]]] = None, need_reparse: Optional[bool] = None)`
-This method retrieves files from the `kb_group_documents` table, optionally filtering by group, document status, upload status, and whether reparsing is needed.
+add_chinese_doc('rag.doc_node.ImageDocNode.get_content', '''\
+根据元数据模式获取不同格式的图像内容。
+
+此方法根据预期用例返回不同格式的图像内容。对于LLM处理，它返回PIL图像对象。对于嵌入生成，它返回base64编码的图像字符串。
+
 Args:
-    group (str): The name of the KB group to filter files by. Defaults to `None` (no group filter).
-    limit (Optional[int]): Maximum number of files to return. If `None`, returns all matching files.
-    details (bool): Whether to return detailed file information (`True`) or only file IDs and paths (`False`).
-    status (Union[str, List[str]]): The KB group status or list of statuses to include in the results. Defaults to all statuses.
-    exclude_status (Optional[Union[str, List[str]]): The KB group status or list of statuses to exclude from the results. Defaults to `None`.
-    upload_status (Union[str, List[str]]): The document upload status or list of statuses to include in the results. Defaults to all statuses.
-    exclude_upload_status (Optional[Union[str, List[str]]): The document upload status or list of statuses to exclude from the results. Defaults to `None`.
-    need_reparse (Optional[bool]): Whether to filter files that need reparsing (`True`) or not (`False`). Defaults to `None` (no filtering).
-Returns:
-    List: If `details=False`, returns a list of tuples containing `(doc_id, path)`. 
-          If `details=True`, returns a list of detailed rows with additional metadata.
-Notes:
-    - This method constructs a SQL query dynamically based on the provided filters.
-    - Uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - If `status` or `upload_status` are provided as lists, they are processed with SQL `IN` clauses.
+    metadata_mode (MetadataMode, optional): 内容检索模式。默认为MetadataMode.LLM。
+        - MetadataMode.LLM: 返回用于LLM处理的PIL图像对象
+        - MetadataMode.EMBED: 返回用于嵌入生成的base64编码图像
+        - 其他模式: 返回图像路径作为文本
+
+**Returns:**\n
+- Union[PIL.Image.Image, List[str], str]: 请求格式的图像内容。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.list_kb_group_files', '''\
-列出指定知识库 (KB) 组中的文件，并支持过滤、限制返回以及返回详细信息。
-`list_kb_group_files(self, group: str = None, limit: Optional[int] = None, details: bool = False, status: Union[str, List[str]] = DocListManager.Status.all, exclude_status: Optional[Union[str, List[str]]] = None, upload_status: Union[str, List[str]] = DocListManager.Status.all, exclude_upload_status: Optional[Union[str, List[str]]] = None, need_reparse: Optional[bool] = None)`
-此方法从 `kb_group_documents` 表中检索文件，支持基于组名、文档状态、上传状态以及是否需要重新解析的过滤。
-参数:
-    group (str): 用于过滤文件的 KB 组名。默认为 `None`（不过滤组名）。
-    limit (Optional[int]): 返回的最大文件数量。如果为 `None`，则返回所有匹配的文件。
-    details (bool): 是否返回详细的文件信息（`True`）或仅返回文件 ID 和路径（`False`）。
-    status (Union[str, List[str]]): 要包含在结果中的 KB 组状态或状态列表。默认为所有状态。
-    exclude_status (Optional[Union[str, List[str]]): 要从结果中排除的 KB 组状态或状态列表。默认为 `None`。
-    upload_status (Union[str, List[str]]): 要包含在结果中的文档上传状态或状态列表。默认为所有状态。
-    exclude_upload_status (Optional[Union[str, List[str]]): 要从结果中排除的文档上传状态或状态列表。默认为 `None`。
-    need_reparse (Optional[bool]): 是否过滤需要重新解析的文件（`True`）或不需要重新解析的文件（`False`）。默认为 `None`（不进行过滤）。
-返回值:
-    List: 如果 `details=False`，返回包含 `(doc_id, path)` 的元组列表。
-          如果 `details=True`，返回包含附加元数据的详细行列表。
-说明:
-    - 方法根据提供的过滤条件动态构建 SQL 查询。
-    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
-    - 如果 `status` 或 `upload_status` 参数为列表，则会使用 SQL 的 `IN` 子句进行处理。
+add_english_doc('rag.doc_node.ImageDocNode.get_text', '''\
+Get the image path as text representation.
+
+This method overrides the parent class method to return the image path instead of the content field, since ImageDocNode doesn't use the content field for storing text.
+
+**Returns:**\n
+- str: The image file path.
 ''')
 
-add_english_doc('rag.utils.DocListManager.add_files', '''\
-Add multiple files to the document list with optional metadata, status, and batch processing.
-`add_files(self, files: List[str], metadatas: Optional[List[Dict[str, Any]]] = None, status: Optional[str] = Status.waiting, batch_size: int = 64) -> List[DocPartRow]`
-This method adds a list of files to the database with optional metadata and a specified initial status. Files are processed in batches for efficiency. After adding the documents, they are associated with the default knowledge base (KB) group.
-Args:
-    files (List[str]): A list of file paths to add to the database.
-    metadatas (Optional[List[Dict[str, Any]]]): A list of metadata dictionaries corresponding to the files. If `None`, no metadata will be associated. Defaults to `None`.
-    status (Optional[str]): The initial status for the added files. Defaults to `Status.waiting`.
-    batch_size (int): The number of files to process in each batch. Defaults to 64.
-Returns:
-    List[DocPartRow]: A list of `DocPartRow` objects representing the added files and their associated information.
-Notes:
-    - The method first creates document records using the `_add_doc_records` helper function.
-    - After the files are added, they are automatically linked to the default KB group (`DocListManager.DEFAULT_GROUP_NAME`).
-    - Batch processing ensures scalability when adding a large number of files.
+add_chinese_doc('rag.doc_node.ImageDocNode.get_text', '''\
+获取图像路径作为文本表示。
+
+此方法重写父类方法以返回图像路径而不是内容字段，因为ImageDocNode不使用内容字段存储文本。
+
+**Returns:**\n
+- str: 图像文件路径。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.add_files', '''\
-批量向文档列表中添加文件，可选附加元数据、状态，并支持分批处理。
-`add_files(self, files: List[str], metadatas: Optional[List[Dict[str, Any]]] = None, status: Optional[str] = Status.waiting, batch_size: int = 64) -> List[DocPartRow]`
-此方法将文件列表添加到数据库中，并为每个文件设置可选的元数据和初始状态。文件会以批量方式处理以提高效率。在文件添加完成后，它们会自动关联到默认的知识库 (KB) 组。
-参数:
-    files (List[str]): 要添加到数据库的文件路径列表。
-    metadatas (Optional[List[Dict[str, Any]]]): 与文件对应的元数据字典列表。如果为 `None`，则不会附加元数据。默认为 `None`。
-    status (Optional[str]): 添加文件的初始状态。默认为 `Status.waiting`。
-    batch_size (int): 每批处理的文件数量。默认为 64。
-返回值:
-    List[DocPartRow]: 包含已添加文件及其相关信息的 `DocPartRow` 对象列表。
-说明:
-    - 方法首先通过辅助函数 `_add_doc_records` 创建文档记录。
-    - 文件添加后，会自动关联到默认的知识库组 (`DocListManager.DEFAULT_GROUP_NAME`)。
-    - 批量处理确保在添加大量文件时具有良好的可扩展性。
+add_english_doc('rag.transform.AdaptiveTransform', '''\
+A flexible document transformation system that applies different transforms based on document patterns.
+
+AdaptiveTransform allows you to define multiple transformation strategies and automatically selects the appropriate one based on the document's file path or custom pattern matching. This is particularly useful when you have different types of documents that require different processing approaches.
+
+Args:
+    transforms (Union[List[Union[TransformArgs, Dict]], Union[TransformArgs, Dict]]): A list of transform configurations or a single transform configuration. 
+    num_workers (int, optional): Number of worker threads for parallel processing. Defaults to 0.
 ''')
 
+add_chinese_doc('rag.transform.AdaptiveTransform', '''\
+一个灵活的文档转换系统，根据文档模式应用不同的转换策略。
 
-#delete_unreferenced_doc
-add_english_doc('rag.utils.DocListManager.delete_unreferenced_doc', '''\
-Delete documents marked as "deleting" and no longer referenced in the database.
-`delete_unreferenced_doc(self)`
-This method removes documents from the database that meet the following conditions:
-1. Their status is set to `DocListManager.Status.deleting`.
-2. Their reference count (`count`) is 0.
+AdaptiveTransform允许您定义多种转换策略，并根据文档的文件路径或自定义模式匹配自动选择适当的转换方法。当您有不同类型的文档需要不同处理方法时，这特别有用。
+
+Args:
+    transforms (Union[List[Union[TransformArgs, Dict]], Union[TransformArgs, Dict]]): 转换配置列表或单个转换配置。
+    num_workers (int, optional): 并行处理的工作线程数。默认为0。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.delete_unreferenced_doc', '''\
-删除数据库中标记为 "删除中" 且不再被引用的文档。
-`delete_unreferenced_doc(self)`
-此方法从数据库中删除满足以下条件的文档：
-1. 文档状态为 `DocListManager.Status.deleting`。
-2. 文档的引用计数 (`count`) 为 0。
+add_example('rag.transform.AdaptiveTransform', '''\
+>>> from lazyllm.tools.rag.transform import AdaptiveTransform, DocNode, SentenceSplitter
+>>> doc1 = DocNode(text="这是第一个文档的内容。它包含多个句子。")
+>>> doc2 = DocNode(text="这是第二个文档的内容。")
+>>> transforms = [
+...     {
+...         'f': SentenceSplitter,
+...         'pattern': '*.txt',
+...         'kwargs': {'chunk_size': 50, 'chunk_overlap': 10}
+...     },
+...     {
+...         'f': SentenceSplitter,
+...         'pattern': '*.pdf',
+...         'kwargs': {'chunk_size': 100, 'chunk_overlap': 20}
+...     }
+... ]
+>>> adaptive = AdaptiveTransform(transforms)
+>>> results1 = adaptive.transform(doc1)
+>>> print(f"文档1转换结果: {len(results1)} 个块")
+>>> for i, result in enumerate(results1):
+...     print(f"  块 {i+1}: {result.text}")
+>>> results2 = adaptive.transform(doc2)
+>>> print(f"文档2转换结果: {len(results2)} 个块")
+>>> for i, result in enumerate(results2):
+...     print(f"  块 {i+1}: {result.text}")      
 ''')
 
-#get_docs_need_reparse
-add_english_doc('rag.utils.DocListManager.get_docs_need_reparse', '''\
-Retrieve documents that require reparsing for a specific group.
-`get_docs_need_reparse(self, group: str) -> List[KBDocument]`
-This method fetches documents that are marked as needing reparsing (`need_reparse=True`) for the given group. Only documents with a status of `success` or `failed` are included in the results.
+add_english_doc('rag.transform.AdaptiveTransform.transform', '''\
+Transform a document using the appropriate transformation strategy based on pattern matching.
+
+This method evaluates each transform configuration in order and applies the first one that matches the document's path pattern. The matching logic supports both glob patterns and custom callable functions.
+
 Args:
-    group (str): The name of the group to filter documents by.
-Returns:
-    List[KBDocument]: A list of `KBDocument` objects that need reparsing.
-Notes:
-    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - The query performs a SQL `JOIN` between `KBDocument` and `KBGroupDocuments` to filter by group and reparse status.
-    - Documents with `need_reparse=True` and a status of `success` or `failed` are considered for reparsing.
+    document (DocNode): The document node to be transformed.
+    **kwargs: Additional keyword arguments passed to the transform function.
+
+**Returns:**\n
+- List[Union[str, DocNode]]: A list of transformed results (strings or DocNode objects).
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.get_docs_need_reparse', '''\
-获取需要重新解析的指定组中的文档。
-`get_docs_need_reparse(self, group: str) -> List[KBDocument]`
-此方法检索标记为需要重新解析 (`need_reparse=True`) 的文档，基于提供的组名。仅包含状态为 `success` 或 `failed` 的文档。
-参数:
-    group (str): 用于过滤文档的组名。
-返回值:
-    List[KBDocument]: 需要重新解析的 `KBDocument` 对象列表。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
-    - 查询通过 SQL `JOIN` 操作连接 `KBDocument` 和 `KBGroupDocuments` 表，并基于组名和重新解析状态进行过滤。
-    - 仅状态为 `success` 或 `failed` 且 `need_reparse=True` 的文档会被检索出来。
+add_chinese_doc('rag.transform.AdaptiveTransform.transform', '''\
+根据模式匹配使用适当的转换策略转换文档。
+
+此方法按顺序评估每个转换配置，并应用第一个匹配文档路径模式的转换。匹配逻辑支持glob模式和自定义可调用函数。
+
+Args:
+    document (DocNode): 要转换的文档节点。
+    **kwargs: 传递给转换函数的附加关键字参数。
+
+**Returns:**\n
+- List[Union[str, DocNode]]: 转换结果列表（字符串或DocNode对象）。
 ''')
 
-add_english_doc('rag.utils.DocListManager.get_existing_paths_by_pattern', '''\
-Retrieve existing document paths that match a given pattern.
-`get_existing_paths_by_pattern(self, pattern: str) -> List[str]`
-This method fetches all document paths from the database that match the provided SQL `LIKE` pattern.
+add_english_doc('rag.rerank.ModuleReranker', '''\
+A reranker that uses trainable modules to reorder documents based on relevance to a query.
+
+ModuleReranker is a specialized reranker that leverages trainable models (such as BGE-reranker, Cohere rerank, etc.) to improve the relevance of retrieved documents. It takes a list of documents and a query, then returns the documents reordered by their relevance scores.
+
 Args:
-    pattern (str): The SQL `LIKE` pattern to filter document paths. For example, `%example%` matches paths containing the word "example".
-Returns:
-    List[str]: A list of document paths that match the given pattern. If no paths match, an empty list is returned.
-Notes:
-    - The method uses a thread-safe lock (`self._db_lock`) to ensure safe database access.
-    - The `LIKE` operator in the SQL query is used to perform pattern matching on document paths.
+    name (str): The name of the reranker. Defaults to "ModuleReranker".
+    model (Union[Callable, str]): The reranking model. Can be either a model name (string) or a callable function.
+    target (Optional[str]): Defaults to None.
+    output_format (Optional[str]): The format for output processing. Defaults to None.
+    join (Union[bool, str]): Whether to join the results. Defaults to False.
+    **kwargs: Additional keyword arguments passed to the reranker model.
+''')
+
+add_chinese_doc('rag.rerank.ModuleReranker', '''\
+使用可训练模块根据查询相关性重新排序文档的重排序器。
 
+ModuleReranker是一个专门的重排序器，利用可训练模型（如BGE-reranker、Cohere rerank等）来提高检索文档的相关性。它接收文档列表和查询，然后返回按相关性分数重新排序的文档。
+
+Args:
+    name (str): 重排序器的名称。默认为"ModuleReranker"。
+    model (Union[Callable, str]): 重排序模型。可以是模型名称（字符串）或可调用函数。
+    target (Optional[str]): 默认为None。
+    output_format (Optional[str]): 输出处理格式。默认为None。
+    join (Union[bool, str]): 是否连接结果。默认为False。
+    **kwargs: 传递给重排序模型模型的附加关键字参数。
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.get_existing_paths_by_pattern', '''\
-根据给定的模式，检索符合条件的文档路径。
-`get_existing_paths_by_pattern(self, pattern: str) -> List[str]`
-此方法从数据库中获取所有符合提供的 SQL `LIKE` 模式的文档路径。
-参数:
-    pattern (str): 用于过滤文档路径的 SQL `LIKE` 模式。例如，`%example%` 匹配包含单词 "example" 的路径。
-返回值:
-    List[str]: 符合给定模式的文档路径列表。如果没有匹配的路径，则返回空列表。
-说明:
-    - 使用线程安全锁 (`self._db_lock`) 确保多线程环境下的数据库访问安全。
-    - SQL 查询中的 `LIKE` 操作符用于对文档路径进行模式匹配。
+add_example('rag.rerank.ModuleReranker', '''\
+>>> from lazyllm.tools.rag.rerank import ModuleReranker, DocNode
+>>> def simple_reranker(query, documents, top_n):
+...     query_lower = query.lower()
+...     scores = []
+...     for i, doc in enumerate(documents):
+...         score = sum(1 for word in query_lower.split() if word in doc)
+...         scores.append((i, score))
+...     scores.sort(key=lambda x: x[1], reverse=True)
+...     return scores[:top_n]
+>>> reranker = ModuleReranker(
+...     model=simple_reranker,
+...     topk=2
+... )
+>>> docs = [
+...     DocNode(text="机器学习算法在数据分析中应用广泛"),
+...     DocNode(text="深度学习模型需要大量训练数据"),
+...     DocNode(text="自然语言处理技术发展迅速"),
+...     DocNode(text="计算机视觉在自动驾驶中的应用")
+... ]
+>>> query = "机器学习"
+>>> results = reranker.forward(docs, query)
+>>> for i, doc in enumerate(results):
+...     print(f"  {i+1}. : {doc.text}")
+...     print(f"     相关性分数: {doc.relevance_score:.4f}")        
 ''')
 
-add_english_doc('rag.utils.DocListManager.enable_path_monitoring', '''\
-Enable or disable path monitoring for the document manager.
-`enable_path_monitoring(self, val: bool)`
-This method enables or disables the path monitoring functionality in the document manager. When enabled, a monitoring thread starts to handle path-related operations. When disabled, the thread stops and joins (waits for it to terminate).
+add_english_doc('rag.rerank.ModuleReranker.forward', '''\
+Forward pass of the reranker that reorders documents based on relevance to the query.
+
+This method takes a list of documents and a query, then uses the underlying reranking model to score and reorder the documents by relevance. The documents are processed in MetadataMode.EMBED format to ensure compatibility with the reranking model.
+
 Args:
-    val (bool): A boolean value indicating whether to enable (`True`) or disable (`False`) path monitoring.
-Notes:
-    - If `val` is `True`, path monitoring is enabled by setting `_monitor_continue` to `True` and starting the `_monitor_thread`.
-    - If `val` is `False`, path monitoring is disabled by setting `_monitor_continue` to `False` and joining the `_monitor_thread` if it is running.
-    - This method ensures thread-safe operation when managing the monitoring thread.
+    nodes (List[DocNode]): List of document nodes to be reranked.
+    query (str): The query string to rank documents against. Defaults to "".
+
+**Returns:**\n
+- List[DocNode]: List of document nodes reordered by relevance score, with relevance_score attribute added.
 ''')
 
-add_chinese_doc('rag.utils.DocListManager.enable_path_monitoring', '''\
-启用或禁用文档管理器的路径监控功能。
-`enable_path_monitoring(self, val: bool)`
-此方法用于启用或禁用文档管理器的路径监控功能。当启用时，会启动一个监控线程处理与路径相关的操作；当禁用时，会停止该线程并等待它终止。
-参数:
-    val (bool): 布尔值，指示是否启用 (`True`) 或禁用 (`False`) 路径监控。
-说明:
-    - 如果 `val` 为 `True`，路径监控功能会通过将 `_monitor_continue` 设置为 `True` 并启动 `_monitor_thread` 来启用。
-    - 如果 `val` 为 `False`，路径监控功能会通过将 `_monitor_continue` 设置为 `False` 并等待 `_monitor_thread` 终止来禁用。
-    - 方法在管理监控线程时确保线程操作是安全的。
+add_chinese_doc('rag.rerank.ModuleReranker.forward', '''\
+重排序器的前向传播，根据与查询的相关性重新排序文档。
+
+此方法接收文档列表和查询，然后使用底层重排序模型对文档进行评分和重新排序。文档以MetadataMode.EMBED格式处理，以确保与重排序模型的兼容性。
+
+Args:
+    nodes (List[DocNode]): 要重排序的文档节点列表。
+    query (str): 用于排序文档的查询字符串。默认为""。
+
+**Returns:**\n
+- List[DocNode]: 按相关性分数重新排序的文档节点列表，添加了relevance_score属性。
 ''')
 
+
 add_english_doc('rag.global_metadata.GlobalMetadataDesc', '''\
 A descriptor for global metadata, defining its type, optional element type, default value, and size constraints.
 `class GlobalMetadataDesc`
@@ -4436,4 +6182,529 @@ def my_reranker(node: DocNode, **kwargs):
 >>> index.query()
 Querying nodes...
 [DocNode(uid="2", content="Document 2")]
-''')
\ No newline at end of file
+''')
+
+# agent/functionCall.py
+add_agent_chinese_doc('functionCall.StreamResponse', '''\
+StreamResponse类用于封装带有前缀和颜色配置的流式输出行为。  
+当启用流式模式时，调用实例会将带颜色的文本推送到文件系统队列中，用于异步处理或显示。
+
+Args:
+    prefix (str): 输出内容前的前缀文本，通常用于标识信息来源或类别。
+    prefix_color (Optional[str]): 前缀文本的颜色，支持终端颜色代码，默认无颜色。
+    color (Optional[str]): 主体内容文本颜色，支持终端颜色代码，默认无颜色。
+    stream (bool): 是否启用流式输出模式，启用后会将文本推送至文件系统队列，默认关闭。
+''')
+
+add_agent_english_doc('functionCall.StreamResponse', '''\
+StreamResponse class encapsulates streaming output behavior with configurable prefix and colors.  
+When streaming is enabled, calling the instance enqueues colored text to a filesystem queue for asynchronous processing or display.
+
+Args:
+    prefix (str): Prefix text before the output, typically used to indicate the source or category.
+    prefix_color (Optional[str]): Color of the prefix text, supports terminal color codes, defaults to None.
+    color (Optional[str]): Color of the main content text, supports terminal color codes, defaults to None.
+    stream (bool): Whether to enable streaming output mode, which enqueues text to the filesystem queue, defaults to False.
+''')
+
+add_agent_example('functionCall.StreamResponse', '''\
+>>> from lazyllm.tools.agent.functionCall import StreamResponse
+>>> resp = StreamResponse(prefix="[INFO]", prefix_color="green", color="white", stream=True)
+>>> resp("Hello, world!")
+Hello, world!
+''')
+ 
+add_chinese_doc('rag.web.DocWebModule', """\
+文档Web界面模块，继承自ModuleBase，提供基于Web的文档管理交互界面。
+
+Args:
+    doc_server (ServerModule): 文档服务模块实例，提供后端API支持
+    title (str): 界面标题，默认为"文档管理演示终端"
+    port (int/range/list): 服务端口号或端口范围，默认为20800-20999
+    history (list): 初始聊天历史记录，默认为空列表
+    text_mode (Mode): 文本处理模式，默认为Mode.Dynamic(动态模式)
+    trace_mode (Mode): 追踪模式，默认为Mode.Refresh(刷新模式)
+
+类属性:
+    Mode: 模式枚举类，包含:
+        - Dynamic: 动态模式
+        - Refresh: 刷新模式
+        - Appendix: 附录模式
+
+注意事项:
+    - 需要配合有效的doc_server实例使用
+    - 端口冲突时会自动尝试范围内其他端口
+    - 服务停止后会释放相关资源
+""")
+
+add_english_doc('rag.web.DocWebModule', """\
+Document Web Interface Module, inherits from ModuleBase, provides web-based document management interface.
+
+Args:
+    doc_server (ServerModule): Document server module instance providing backend API support
+    title (str): Interface title, defaults to "文档管理演示终端"
+    port (int/range/list): Service port number or range, defaults to 20800-20999
+    history (list): Initial chat history, defaults to empty list
+    text_mode (Mode): Text processing mode, defaults to Mode.Dynamic
+    trace_mode (Mode): Trace mode, defaults to Mode.Refresh
+
+Class Attributes:
+    Mode: Mode enumeration class containing:
+        - Dynamic: Dynamic mode
+        - Refresh: Refresh mode
+        - Appendix: Appendix mode
+
+Notes:
+    - Requires a valid doc_server instance to work with
+    - Automatically tries other ports in range when port conflict occurs
+    - Releases resources when service is stopped
+""")
+
+add_chinese_doc('rag.web.DocWebModule.Mode', """\
+文档Web模块运行模式枚举类。
+
+取值:
+    Dynamic (0): 动态模式，实时更新内容
+    Refresh (1): 刷新模式，定期刷新内容
+    Appendix (2): 附录模式，将新内容作为附录添加
+
+""")
+
+add_english_doc('rag.web.DocWebModule.Mode', """\
+Operation mode enumeration class for DocWebModule.
+
+Values:
+    Dynamic (0): Dynamic mode, updates content in real-time
+    Refresh (1): Refresh mode, periodically refreshes content
+    Appendix (2): Appendix mode, adds new content as appendix
+
+""")
+
+
+add_example('rag.web.DocWebModule', '''\
+>>> import lazyllm
+>>> from lazyllm.tools.rag.web import DocWebModule
+>>> from lazyllm import
+>>> doc_server = ServerModule(url="your_url")
+>>> doc_web = DocWebModule(
+>>>   doc_server=doc_server,
+>>>   title="文档管理演示终端",
+>>>   port=range(20800, 20805)  # 自动寻找可用端口)
+>>> deploy_task = doc_web._get_deploy_tasks()
+>>> deploy_task()  
+>>> print(doc_web.url)
+>>> doc_web.stop()
+''')
+
+add_english_doc('rag.web.DocWebModule.wait', '''\
+Blocks the current thread to keep the web interface running until manually stopped.
+
+''')
+
+add_chinese_doc('rag.web.DocWebModule.wait', '''\
+阻塞当前线程以保持Web界面运行，直到手动停止。
+
+''')
+
+add_english_doc('rag.web.DocWebModule.stop', '''\
+Stops the web interface service and releases related resources.
+
+''')
+
+add_chinese_doc('rag.web.DocWebModule.stop', '''\
+停止Web界面服务并释放相关资源。
+
+''')
+
+# FuncNodeTransform
+add_english_doc('rag.transform.FuncNodeTransform', '''
+A wrapper class for user-defined functions that transforms document nodes.
+
+This wrapper supports two modes of operation:
+1. When trans_node is False (default): transforms text strings
+2. When trans_node is True: transforms DocNode objects
+
+The wrapper can handle various function signatures:
+- str -> List[str]: transform=lambda t: t.split('\\\\n')
+- str -> str: transform=lambda t: t[:3]
+- DocNode -> List[DocNode]: pipeline(lambda x:x, SentenceSplitter)
+- DocNode -> DocNode: pipeline(LLMParser)
+
+Args:
+    func (Union[Callable[[str], List[str]], Callable[[DocNode], List[DocNode]]]): The user-defined function to be wrapped.
+    trans_node (bool, optional): Determines whether the function operates on DocNode objects (True) or text strings (False). Defaults to None.
+    num_workers (int): Controls the number of threads or processes used for parallel processing. Defaults to 0.
+''')
+
+add_chinese_doc('rag.transform.FuncNodeTransform', '''
+用于包装用户自定义函数的转换器类。
+
+此包装器支持两种操作模式：
+1. 当 trans_node 为 False（默认）：转换文本字符串
+2. 当 trans_node 为 True：转换 DocNode 对象
+
+包装器可以处理各种函数签名：
+- str -> List[str]: transform=lambda t: t.split('\\\\n')
+- str -> str: transform=lambda t: t[:3]
+- DocNode -> List[DocNode]: pipeline(lambda x:x, SentenceSplitter)
+- DocNode -> DocNode: pipeline(LLMParser)
+
+Args:
+    func (Union[Callable[[str], List[str]], Callable[[DocNode], List[DocNode]]]): 要包装的用户自定义函数。
+    trans_node (bool, optional): 确定函数是操作 DocNode 对象（True）还是文本字符串（False）。默认为 None。
+    num_workers (int): 控制并行处理的线程/进程数量。默认为 0。
+''')
+
+add_example('rag.transform.FuncNodeTransform', '''
+>>> import lazyllm
+>>> from lazyllm.tools.rag import FuncNodeTransform
+>>> from lazyllm.tools import Document, SentenceSplitter
+
+# Example 1: Text-based transformation (trans_node=False)
+>>> def split_by_comma(text):
+...     return text.split(',')
+>>> text_transform = FuncNodeTransform(split_by_comma, trans_node=False)
+
+# Example 2: Node-based transformation (trans_node=True)
+>>> def custom_node_transform(node):
+...     # Process the DocNode and return a list of DocNodes
+...     return [node]  # Simple pass-through
+>>> node_transform = FuncNodeTransform(custom_node_transform, trans_node=True)
+
+# Example 3: Using with Document
+>>> m = lazyllm.OnlineEmbeddingModule(source="glm")
+>>> documents = Document(dataset_path='your_doc_path', embed=m, manager=False)
+>>> documents.create_node_group(name="custom", transform=text_transform)
+''')
+
+# FuncNodeTransform.transform
+add_english_doc('rag.transform.FuncNodeTransform.transform', '''
+Transform a document node using the wrapped user-defined function.
+
+This method applies the user-defined function to either the text content of the node (when trans_node=False) or the node itself (when trans_node=True).
+
+Args:
+    node (DocNode): The document node to be transformed.
+    **kwargs: Additional keyword arguments passed to the transformation function.
+
+Returns:
+    List[Union[str, DocNode]]: The transformed results, which can be either strings or DocNode objects depending on the function implementation.
+''')
+
+add_chinese_doc('rag.transform.FuncNodeTransform.transform', '''
+使用包装的用户自定义函数转换文档节点。
+
+此方法将用户自定义函数应用于节点的文本内容（当 trans_node=False 时）或节点本身（当 trans_node=True 时）。
+
+Args:
+    node (DocNode): 要转换的文档节点。
+    **kwargs: 传递给转换函数的额外关键字参数。
+
+Returns:
+    List[Union[str, DocNode]]: 转换结果，根据函数实现可以是字符串或 DocNode 对象。
+''')
+
+
+add_chinese_doc('rag.web.WebUi', """\
+基于 Gradio 的知识库文件管理 Web UI 工具类。
+
+该类用于构建一个简单的 Web 界面，支持创建分组、上传文件、列出/删除分组或文件，并通过 RESTful API 与后端交互。支持快速集成与展示文件管理能力。
+
+Args:
+    base_url (str): 后端 API 服务的基础地址。
+""")
+
+add_english_doc('rag.web.WebUi', """\
+A Gradio-based web UI for managing knowledge base files.
+
+This class provides an interactive UI to create/delete groups, upload files, list files, and perform deletion operations via RESTful APIs. It is designed for rapid integration of file and group management.
+
+Args:
+    base_url (str): Base URL of the backend API service.
+""")
+
+add_chinese_doc("rag.web.WebUi.basic_headers", '''
+生成通用的 HTTP 请求头。
+
+Args:
+    content_type (bool): 是否包含 Content-Type 头信息（默认为 True）。
+
+Returns:
+    dict: HTTP 请求头字典。
+''')
+
+add_english_doc("rag.web.WebUi.basic_headers", '''
+Generate standard HTTP headers.
+
+Args:
+    content_type (bool): Whether to include Content-Type in the headers (default: True).
+
+Returns:
+    dict: Dictionary of HTTP headers.
+''')
+
+add_chinese_doc("rag.web.WebUi.muti_headers", '''
+生成用于上传文件的 HTTP 请求头。
+
+Returns:
+    dict: HTTP 请求头字典。
+''')
+
+add_english_doc("rag.web.WebUi.muti_headers", '''
+Generate HTTP headers for file upload.
+
+Returns:
+    dict: Dictionary of HTTP headers.
+''')
+
+add_chinese_doc("rag.web.WebUi.post_request", '''
+发送 POST 请求。
+
+Args:
+    url (str): 请求地址。
+    data (dict): 请求数据，将被转为 JSON。
+
+Returns:
+    dict: 响应结果的 JSON。
+''')
+
+add_english_doc("rag.web.WebUi.post_request", '''
+Send a POST request.
+
+Args:
+    url (str): Target request URL.
+    data (dict): Request data (will be serialized as JSON).
+
+Returns:
+    dict: JSON response from the server.
+''')
+
+add_chinese_doc("rag.web.WebUi.get_request", '''
+发送 GET 请求。
+
+Args:
+    url (str): 请求地址。
+
+Returns:
+    dict: 响应结果的 JSON。
+''')
+
+add_english_doc("rag.web.WebUi.get_request", '''
+Send a GET request.
+
+Args:
+    url (str): Target request URL.
+
+Returns:
+    dict: JSON response from the server.
+''')
+
+add_chinese_doc("rag.web.WebUi.new_group", '''
+创建新的文件分组。
+
+Args:
+    group_name (str): 分组名称。
+
+Returns:
+    str: 创建结果的提示信息。
+''')
+
+add_english_doc("rag.web.WebUi.new_group", '''
+Create a new file group.
+
+Args:
+    group_name (str): Name of the new group.
+
+Returns:
+    str: Server message about the creation result.
+''')
+
+add_chinese_doc("rag.web.WebUi.delete_group", '''
+删除指定的文件分组。
+
+Args:
+    group_name (str): 分组名称。
+
+Returns:
+    str: 删除结果信息。
+''')
+
+add_english_doc("rag.web.WebUi.delete_group", '''
+Delete a specific file group.
+
+Args:
+    group_name (str): Name of the group to delete.
+
+Returns:
+    str: Server message about the deletion.
+''')
+
+add_chinese_doc("rag.web.WebUi.list_groups", '''
+列出所有文件分组。
+
+Returns:
+    List[str]: 分组名称列表。
+''')
+
+add_english_doc("rag.web.WebUi.list_groups", '''
+List all available file groups.
+
+Returns:
+    List[str]: List of group names.
+''')
+
+add_chinese_doc("rag.web.WebUi.upload_files", '''
+向指定分组上传文件。
+
+Args:
+    group_name (str): 分组名称。
+    override (bool): 是否覆盖已存在的文件（默认 True）。
+
+Returns:
+    Any: 后端返回的上传结果数据。
+''')
+
+add_english_doc("rag.web.WebUi.upload_files", '''
+Upload files to a specified group.
+
+Args:
+    group_name (str): Name of the group.
+    override (bool): Whether to override existing files (default: True).
+
+Returns:
+    Any: Data returned by the backend.
+''')
+
+add_chinese_doc("rag.web.WebUi.list_files_in_group", '''
+列出指定分组下的所有文件。
+
+Args:
+    group_name (str): 分组名称。
+
+Returns:
+    List: 文件信息列表。
+''')
+
+add_english_doc("rag.web.WebUi.list_files_in_group", '''
+List all files within a specific group.
+
+Args:
+    group_name (str): Name of the group.
+
+Returns:
+    List: List of file information.
+''')
+
+add_chinese_doc("rag.web.WebUi.delete_file", '''
+从指定分组中删除文件。
+
+Args:
+    group_name (str): 分组名称。
+    file_ids (List[str]): 要删除的文件 ID 列表。
+
+Returns:
+    str: 删除结果提示。
+''')
+
+add_english_doc("rag.web.WebUi.delete_file", '''
+Delete specific files from a group.
+
+Args:
+    group_name (str): Name of the group.
+    file_ids (List[str]): IDs of files to delete.
+
+Returns:
+    str: Deletion result message.
+''')
+
+add_chinese_doc("rag.web.WebUi.gr_show_list", '''
+以 Gradio 表格的形式展示字符串列表。
+
+Args:
+    str_list (List): 字符串或子项列表。
+    list_name (Union[str, List]): 表头名称或列名列表。
+
+Returns:
+    gr.DataFrame: Gradio 表格组件。
+''')
+
+add_english_doc("rag.web.WebUi.gr_show_list", '''
+Display a list of strings as a Gradio DataFrame.
+
+Args:
+    str_list (List): List of strings or rows.
+    list_name (Union[str, List]): Column name(s) for the table.
+
+Returns:
+    gr.DataFrame: Gradio DataFrame component.
+''')
+
+add_chinese_doc("rag.web.WebUi.create_ui", '''
+构建基于 Gradio 的文件管理图形界面，包含分组列表、上传、查看、删除等功能标签页。
+
+Returns:
+    gr.Blocks: 完整的 Gradio UI 应用实例。
+''')
+
+add_english_doc("rag.web.WebUi.create_ui", '''
+Build a Gradio-based file management UI, including tabs for group listing, file uploading, viewing, and deletion.
+
+Returns:
+    gr.Blocks: A complete Gradio application instance.
+''')
+
+add_chinese_doc('rag.index_base.IndexBase.update', '''\
+更新索引内容。
+
+该方法接收一组文档节点对象，并将其添加或更新到索引结构中。通常用于增量构建或刷新索引。
+
+Args:
+    nodes (List[DocNode]): 需要更新的文档节点列表。
+''')
+
+add_english_doc('rag.index_base.IndexBase.update', '''\
+Update index contents.
+
+This method receives a list of document nodes and updates or inserts them into the index structure. Typically used for incremental indexing or refreshing data.
+
+Args:
+    nodes (List[DocNode]): A list of document nodes to update or insert.
+''')
+
+add_chinese_doc('rag.index_base.IndexBase.remove', '''\
+从索引中移除指定文档节点。
+
+可根据唯一标识符列表删除索引中的文档节点，可选地指定组名称以限定范围。
+
+Args:
+    uids (List[str]): 需要移除的文档节点的唯一标识符列表。
+    group_name (Optional[str]): 可选的组名称，用于限定要删除的范围。
+''')
+
+add_english_doc('rag.index_base.IndexBase.remove', '''\
+Remove specific document nodes from the index.
+
+Removes document nodes based on their unique identifiers, optionally scoped by group name.
+
+Args:
+    uids (List[str]): List of unique IDs corresponding to the document nodes to remove.
+    group_name (Optional[str]): Optional group name to scope the removal operation.
+''')
+
+add_chinese_doc('rag.index_base.IndexBase.query', '''\
+执行索引查询。
+
+根据传入的参数执行查询操作，返回匹配的文档节点列表。具体查询逻辑由实现类定义。
+
+Returns:
+    List[DocNode]: 查询结果的文档节点列表。
+''')
+
+add_english_doc('rag.index_base.IndexBase.query', '''\
+Execute a query over the index.
+
+Performs a query based on the given arguments and returns matching document nodes. The logic depends on the specific implementation.
+
+Returns:
+    List[DocNode]: A list of matched document nodes from the index.
+''')
diff --git a/lazyllm/docs/utils.py b/lazyllm/docs/utils.py
index 8b3f32df9..ec0f83838 100644
--- a/lazyllm/docs/utils.py
+++ b/lazyllm/docs/utils.py
@@ -1,5 +1,7 @@
 import lazyllm
 import ast
+import threading
+from typing import Tuple
 
 cpp_add_doc_code = '''
 namespace py = pybind11;
@@ -23,6 +25,28 @@
 }
 '''
 
+
+class DuplicateDocError(RuntimeError):
+    pass
+
+_doc_registry: set[Tuple[str, str, str, str]] = set()
+_doc_registry_lock = threading.Lock()
+
+def _reg_key(obj_name: str, module, append: str) -> Tuple[str, str, str, str]:
+    lang = str(lazyllm.config['language']).upper()
+    sec = 'BASE' if not append else f'APPEND:{append.strip()}'
+    return (lang, getattr(module, '__name__', str(module)), obj_name, sec)
+
+def _guard_register_once(obj_name: str, module, append: str = '') -> None:
+    key = _reg_key(obj_name, module, append)
+    with _doc_registry_lock:
+        if key in _doc_registry:
+            raise DuplicateDocError(
+                f"Doc for {key[1]}.{obj_name} [{key[0]} / {key[3]}] already added"
+            )
+        _doc_registry.add(key)
+
+
 all_examples = []
 
 def get_all_examples():   # Examples are not always exported, so process them in case of need.
@@ -42,6 +66,8 @@ def get_all_examples():   # Examples are not always exported, so process them in
 lazyllm.config.add('language', str, 'ENGLISH', 'LANGUAGE')
 
 def add_doc(obj_name, docstr, module, append=''):
+    _guard_register_once(obj_name, module, append)
+
     obj = module
     for n in obj_name.split('.'):
         if isinstance(obj, type): obj = obj.__dict__[n]
diff --git a/lazyllm/engine/engine.py b/lazyllm/engine/engine.py
index b3c618746..db782b208 100644
--- a/lazyllm/engine/engine.py
+++ b/lazyllm/engine/engine.py
@@ -18,7 +18,7 @@
 import functools
 from itertools import repeat
 import copy
-from abc import ABC, abstractclassmethod
+from abc import ABC, abstractmethod
 from enum import Enum
 from datetime import datetime, timedelta
 import requests
@@ -80,7 +80,7 @@ def start(self, nodes: Dict[str, Any]) -> None:
         ...
 
     @overload
-    def start(self, nodes: List[Dict] = [], edges: List[Dict] = [], resources: List[Dict] = [],
+    def start(self, nodes: List[Dict] = [], edges: List[Dict] = [], resources: List[Dict] = [],  # noqa B006
               gid: Optional[str] = None, name: Optional[str] = None, _history_ids: Optional[List[str]] = None) -> str:
         ...
 
@@ -89,11 +89,14 @@ def update(self, nodes: List[Dict]) -> None:
         ...
 
     @overload
-    def update(self, gid: str, nodes: List[Dict], edges: List[Dict] = [],
-               resources: List[Dict] = []) -> str:
+    def update(self, gid: str, nodes: List[Dict], edges: List[Dict] = [],  # noqa B006
+               resources: List[Dict] = []) -> str:  # noqa B006
         ...
 
+    @abstractmethod
     def release_node(self, nodeid: str): pass
+
+    @abstractmethod
     def stop(self, node_id: Optional[str] = None, task_name: Optional[str] = None): pass
 
     def build_node(self, node) -> Node:
@@ -133,13 +136,16 @@ def _impl(nid, recursive):
                 if recursive: yield from self.subnodes(id, True)
         return list(_impl(nodeid, recursive))
 
-    @abstractclassmethod
+    @classmethod
+    @abstractmethod
     def launch_localllm_train_service(self): pass
 
-    @abstractclassmethod
+    @classmethod
+    @abstractmethod
     def launch_localllm_infer_service(self): pass
 
-    @abstractclassmethod
+    @classmethod
+    @abstractmethod
     def get_infra_handle(self, token, mid) -> lazyllm.TrainableModule: pass
 
 
@@ -249,8 +255,10 @@ def make_server_resource(kind: str, graph: ServerGraph, args: Dict[str, Any]):
 
 
 @NodeConstructor.register('Graph', 'SubGraph', subitems=['nodes', 'resources'])
-def make_graph(nodes: List[dict], edges: List[Union[List[str], dict]] = [],
-               resources: List[dict] = [], enable_server: bool = True, _history_ids: Optional[List[str]] = None):
+def make_graph(nodes: List[dict], edges: Optional[List[Union[List[str], dict]]] = None,
+               resources: Optional[List[dict]] = None, enable_server: bool = True,
+               _history_ids: Optional[List[str]] = None):
+    edges, resources = edges or [], resources or []
     engine = Engine()
     server_resources = dict(server=None, web=None)
     for resource in resources:
@@ -292,8 +300,8 @@ def make_graph(nodes: List[dict], edges: List[Union[List[str], dict]] = [],
 
 
 @NodeConstructor.register('App')
-def make_subapp(nodes: List[dict], edges: List[dict], resources: List[dict] = []):
-    return make_graph(nodes, edges, resources)
+def make_subapp(nodes: List[dict], edges: List[dict], resources: Optional[List[dict]] = None):
+    return make_graph(nodes, edges, resources or [])
 
 
 # Note: It will be very dangerous if provided to C-end users as a SAAS service
@@ -335,9 +343,9 @@ def make_diverter(nodes: List[dict]):
 
 
 @NodeConstructor.register('Warp', subitems=['nodes', 'resources'])
-def make_warp(nodes: List[dict], edges: List[dict] = [], resources: List[dict] = [],
+def make_warp(nodes: List[dict], edges: Optional[List[dict]] = None, resources: Optional[List[dict]] = None,
               batch_flags: Optional[List[int]] = None):
-    wp = lazyllm.warp(make_graph(nodes, edges, resources, enable_server=False))
+    wp = lazyllm.warp(make_graph(nodes, edges or [], resources or [], enable_server=False))
     if batch_flags and len(batch_flags) > 1:
         def transform(*args):
             args = [a if b else repeat(a) for a, b in zip(args, batch_flags)]
@@ -348,12 +356,12 @@ def transform(*args):
 
 
 @NodeConstructor.register('Loop', subitems=['nodes', 'resources'])
-def make_loop(nodes: List[dict], edges: List[dict] = [], resources: List[dict] = [],
+def make_loop(nodes: List[dict], edges: Optional[List[dict]] = None, resources: Optional[List[dict]] = None,
               stop_condition: Optional[str] = None, judge_on_full_input: bool = True, count=sys.maxsize):
     assert stop_condition is not None or count > 1, 'stop_condition or count is required'
     if stop_condition is not None:
         stop_condition = make_code(stop_condition)
-    return lazyllm.loop(make_graph(nodes, edges, resources, enable_server=False),
+    return lazyllm.loop(make_graph(nodes, edges or [], resources or [], enable_server=False),
                         stop_condition=stop_condition, judge_on_full_input=judge_on_full_input, count=count)
 
 
@@ -380,18 +388,18 @@ def make_intention(base_model: str, nodes: Dict[str, List[dict]],
                    prompt: str = '', constrain: str = '', attention: str = ''):
     with IntentClassifier(Engine().build_node(base_model).func,
                           prompt=prompt, constrain=constrain, attention=attention) as ic:
-        for cond, nodes in nodes.items():
-            if isinstance(nodes, list) and len(nodes) > 1:
-                f = pipeline([Engine().build_node(node).func for node in nodes])
+        for cond, sub_nodes in nodes.items():
+            if isinstance(sub_nodes, list) and len(sub_nodes) > 1:
+                f = pipeline([Engine().build_node(node).func for node in sub_nodes])
             else:
-                f = Engine().build_node(nodes[0] if isinstance(nodes, list) else nodes).func
+                f = Engine().build_node(sub_nodes[0] if isinstance(sub_nodes, list) else sub_nodes).func
             ic.case[cond::f]
     return ic
 
 
 @NodeConstructor.register('Document', need_id=True)
 def make_document(dataset_path: str, _node_id: str, embed: Node = None, create_ui: bool = False, server: bool = False,
-                  node_group: List[Dict] = [], activated_groups: List[Tuple[str, Optional[List[Node]]]] = []):
+                  node_group: List[Dict] = [], activated_groups: List[Tuple[str, Optional[List[Node]]]] = []):  # noqa B006
     groups = [[g, None] if isinstance(g, str) else g for g in activated_groups]
     groups += [[g['name'], g.pop('embed', None)] for g in node_group]
     groups = [[g, e] if (not e or isinstance(e, list)) else [g, [e]] for g, e in groups]
@@ -422,7 +430,8 @@ def make_retriever(doc: str, group_name: str, similarity: str = 'cosine', simila
 
 @NodeConstructor.register('Reranker')
 def make_reranker(type: str = 'ModuleReranker', target: Optional[str] = None,
-                  output_format: Optional[str] = None, join: Union[bool, str] = False, arguments: Dict = {}):
+                  output_format: Optional[str] = None, join: Union[bool, str] = False, arguments: Optional[Dict] = None):
+    arguments = arguments or {}
     if type == 'ModuleReranker' and not isinstance(arguments['model'], lazyllm.TrainableModule):
         if node := Engine().build_node(arguments['model']):
             arguments['model'] = node.func
@@ -490,9 +499,9 @@ def make_tools_for_llm(tools: List[str]):
     return lazyllm.tools.ToolManager(_get_tools(tools))
 
 @NodeConstructor.register('MCPTool', subitems=['tools'])
-def make_mcp_tool(command_or_url: str, tool_name: str, args: List[str] = [], env: Dict[str, str] = None,
+def make_mcp_tool(command_or_url: str, tool_name: str, args: Optional[List[str]] = None, env: Dict[str, str] = None,
                   headers: Dict[str, str] = None, timeout: float = 5):
-    client = MCPClient(command_or_url, args, env, headers, timeout)
+    client = MCPClient(command_or_url, args or [], env, headers, timeout)
     tools = client.get_tools([tool_name])
     assert len(tools) == 1, f"Current MCP client does not support tool '{tool_name}'. \
         Please check if the tool name is correct."
@@ -790,13 +799,13 @@ def make_shared_model(llm: str, local: bool = True, prompt: Optional[str] = None
 def make_online_llm(source: str = None, base_model: Optional[str] = None, prompt: Optional[str] = None,
                     api_key: Optional[str] = None, secret_key: Optional[str] = None,
                     stream: bool = False, token: Optional[str] = None, base_url: Optional[str] = None,
-                    history: Optional[List[List[str]]] = None, static_params: Optional[Dict[str, Any]] = {}):
+                    history: Optional[List[List[str]]] = None, static_params: Optional[Dict[str, Any]] = None):
     if source: source = source.lower()
     if source == 'lazyllm':
         return make_shared_llm(base_model, False, prompt, token, stream, history=history)
     else:
         return lazyllm.OnlineChatModule(base_model, source, base_url, stream, api_key=api_key, secret_key=secret_key,
-                                        static_params=static_params).prompt(prompt, history=history)
+                                        static_params=static_params or {}).prompt(prompt, history=history)
 
 
 class LLM(lazyllm.ModuleBase):
@@ -971,8 +980,8 @@ def make_sql_manager(db_type: str = None, user: str = None, password: str = None
                                     db_name=db_name, options_str=options_str, tables_info_dict=tables_info_dict)
 
 @NodeConstructor.register('HTTP')
-def make_http(method: str, url: str, api_key: str = '', headers: dict = {}, params: dict = {}, body: str = ''):
-    return HttpRequest(method=method, url=url, api_key=api_key, headers=headers, params=params, body=body)
+def make_http(method: str, url: str, api_key: str = '', headers: dict = None, params: dict = None, body: str = ''):
+    return HttpRequest(method=method, url=url, api_key=api_key, headers=headers or {}, params=params or {}, body=body)
 
 
 class SD(lazyllm.Module):
diff --git a/lazyllm/engine/lightengine.py b/lazyllm/engine/lightengine.py
index 947431581..1c761e821 100644
--- a/lazyllm/engine/lightengine.py
+++ b/lazyllm/engine/lightengine.py
@@ -339,7 +339,8 @@ def update_node(self, node):
         self._nodes[node.id] = super(__class__, self).build_node(node)
         return self._nodes[node.id]
 
-    def start(self, nodes, edges=[], resources=[], gid=None, name=None, _history_ids=None):
+    def start(self, nodes, edges=None, resources=None, gid=None, name=None, _history_ids=None):
+        edges, resources = edges or [], resources or []
         if isinstance(nodes, str):
             assert not edges and not resources and not gid and not name
             self.build_node(nodes).func.start()
@@ -380,7 +381,8 @@ def stop(self, node_id: Optional[str] = None, task_name: Optional[str] = None):
                 node.func.stop()
 
     def update(self, gid_or_nodes: Union[str, Dict, List[Dict]], nodes: List[Dict],
-               edges: List[Dict] = [], resources: List[Dict] = []) -> str:
+               edges: Optional[List[Dict]] = None, resources: Optional[List[Dict]] = None) -> str:
+        edges, resources = edges or [], resources or []
         if isinstance(gid_or_nodes, str):
             assert (gid := gid_or_nodes) in self._nodes
             name = self._nodes[gid].name
diff --git a/lazyllm/flow/flow.py b/lazyllm/flow/flow.py
index cc8d0100b..22d5b0f7b 100644
--- a/lazyllm/flow/flow.py
+++ b/lazyllm/flow/flow.py
@@ -18,6 +18,7 @@
 from collections import deque
 import uuid
 from ..hook import LazyLLMHook
+from itertools import repeat
 
 
 class _FuncWrap(object):
@@ -43,14 +44,14 @@ def new_ins(obj, cls):
         return True if (cls is _FuncWrap or (_oldins(cls, (tuple, list)) and _FuncWrap in cls)) else _oldins(obj._f, cls)
     return _oldins(obj, cls)
 
-setattr(builtins, 'isinstance', new_ins)
+builtins.isinstance = new_ins
 
 def _is_function(f):
     return isinstance(f, (types.BuiltinFunctionType, types.FunctionType,
                           types.BuiltinMethodType, types.MethodType, types.LambdaType))
 
 class FlowBase(metaclass=_MetaBind):
-    def __init__(self, *items, item_names=[], auto_capture=False) -> None:
+    def __init__(self, *items, item_names=None, auto_capture=False) -> None:
         self._father = None
         self._items, self._item_names, self._item_ids = [], [], []
         self._auto_capture = auto_capture
@@ -58,7 +59,7 @@ def __init__(self, *items, item_names=[], auto_capture=False) -> None:
         self._curr_frame = None
         self._flow_id = str(uuid.uuid4().hex)
 
-        for k, v in zip(item_names if item_names else [None] * len(items), items):
+        for k, v in zip(item_names if item_names else repeat(None), items):
             self._add(k, v)
 
         self._capture = False
@@ -146,8 +147,8 @@ def _bind_enter(self):
 def _bind_exit(self, exc_type, exc_val, exc_tb):
     return self._f.__exit__(exc_type, exc_val, exc_tb)
 
-setattr(bind, '__enter__', _bind_enter)
-setattr(bind, '__exit__', _bind_exit)
+bind.__enter__ = _bind_enter
+bind.__exit__ = _bind_exit
 
 
 # TODO(wangzhihong): support workflow launcher.
@@ -642,7 +643,7 @@ def compute_node(self, sid, node, intermediate_results, futures):
 
         def get_input(name):
             if name.startswith('_lazyllm_constant_'):
-                return self._constants[int(name.strip('_lazyllm_constant_'))]
+                return self._constants[int(name.removeprefix('_lazyllm_constant_'))]
             if name not in intermediate_results['values']:
                 r = futures[name].result()
                 with intermediate_results['lock']:
diff --git a/lazyllm/launcher.py b/lazyllm/launcher.py
index 6178a0a11..6795c5871 100644
--- a/lazyllm/launcher.py
+++ b/lazyllm/launcher.py
@@ -861,7 +861,7 @@ def wait_for_service_ready(self, timeout=300):
 
         def _is_gateway_ready(self, timeout):
             url = f"http://{self.get_jobip()}:{self.deployment_port}{self.path}"
-            for i in range(self.gateway_retry):
+            for _ in range(self.gateway_retry):
                 try:
                     response = requests.get(url, timeout=timeout)
                     if response.status_code != 503:
diff --git a/lazyllm/module/llms/onlinemodule/base/onlineChatModuleBase.py b/lazyllm/module/llms/onlinemodule/base/onlineChatModuleBase.py
index 08dc2db34..0257f5cca 100644
--- a/lazyllm/module/llms/onlinemodule/base/onlineChatModuleBase.py
+++ b/lazyllm/module/llms/onlinemodule/base/onlineChatModuleBase.py
@@ -34,7 +34,7 @@ class OnlineChatModuleBase(LLMBase):
 
     def __init__(self, model_series: str, api_key: str, base_url: str, model_name: str,
                  stream: Union[bool, Dict[str, str]], return_trace: bool = False,
-                 skip_auth: bool = False, static_params: StaticParams = {}, **kwargs):
+                 skip_auth: bool = False, static_params: Optional[StaticParams] = None, **kwargs):
         super().__init__(stream=stream, return_trace=return_trace)
         self._model_series = model_series
         if skip_auth and not api_key:
@@ -48,7 +48,7 @@ def __init__(self, model_series: str, api_key: str, base_url: str, model_name: s
         self._is_trained = False
         self._model_optional_params = {}
         self._vlm_force_format_input_with_files = False
-        self._static_params = static_params
+        self._static_params = static_params or {}
 
     @property
     def series(self):
diff --git a/lazyllm/module/llms/onlinemodule/supplier/__init__.py b/lazyllm/module/llms/onlinemodule/supplier/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/lazyllm/module/llms/onlinemodule/supplier/glm.py b/lazyllm/module/llms/onlinemodule/supplier/glm.py
index 53357cf68..7b565c42b 100644
--- a/lazyllm/module/llms/onlinemodule/supplier/glm.py
+++ b/lazyllm/module/llms/onlinemodule/supplier/glm.py
@@ -283,7 +283,7 @@ def __init__(self, model_name: str = None, api_key: str = None, return_trace: bo
                                or lazyllm.config['glm_stt_model_name'], api_key=api_key,
                                return_trace=return_trace, **kwargs)
 
-    def _forward(self, files: List[str] = [], **kwargs):
+    def _forward(self, files: List[str] = [], **kwargs):  # noqa B006
         assert len(files) == 1, "GLMSTTModule only supports one file"
         assert os.path.exists(files[0]), f"File {files[0]} not found"
         transcriptResponse = self._client.audio.transcriptions.create(
diff --git a/lazyllm/module/llms/onlinemodule/supplier/qwen.py b/lazyllm/module/llms/onlinemodule/supplier/qwen.py
index 027c1f169..634087c8b 100644
--- a/lazyllm/module/llms/onlinemodule/supplier/qwen.py
+++ b/lazyllm/module/llms/onlinemodule/supplier/qwen.py
@@ -378,7 +378,7 @@ def __init__(self, model: str = None, api_key: str = None, return_trace: bool =
                                 model_name=model or lazyllm.config['qwen_stt_model_name'] or QwenSTTModule.MODEL_NAME,
                                 return_trace=return_trace, **kwargs)
 
-    def _forward(self, files: List[str] = [], **kwargs):
+    def _forward(self, files: List[str] = [], **kwargs):  # noqa B006
         assert any(file.startswith('http') for file in files), "QwenSTTModule only supports http file urls"
         call_params = {'model': self._model_name, 'file_urls': files, **kwargs}
         if self._api_key: call_params['api_key'] = self._api_key
diff --git a/lazyllm/module/llms/onlinemodule/supplier/sensenova.py b/lazyllm/module/llms/onlinemodule/supplier/sensenova.py
index c7d893b2d..bd488a8b3 100644
--- a/lazyllm/module/llms/onlinemodule/supplier/sensenova.py
+++ b/lazyllm/module/llms/onlinemodule/supplier/sensenova.py
@@ -17,7 +17,7 @@ class _SenseNovaBase(object):
     def _get_api_key(self, api_key: str, secret_key: str):
         if not api_key and not secret_key:
             api_key, secret_key = lazyllm.config['sensenova_api_key'], lazyllm.config['sensenova_secret_key']
-        if secret_key.startswith('sk-'): api_key, secret_key = secret_key, None
+        if secret_key and secret_key.startswith('sk-'): api_key, secret_key = secret_key, None
         if not api_key: raise ValueError('api_key is required for sensecore')
         if not api_key.startswith('sk-'):
             if ':' in api_key: api_key, secret_key = api_key.split(':', 1)
diff --git a/lazyllm/module/llms/trainablemodule.py b/lazyllm/module/llms/trainablemodule.py
index 738e22aa1..673e6d7b0 100644
--- a/lazyllm/module/llms/trainablemodule.py
+++ b/lazyllm/module/llms/trainablemodule.py
@@ -78,7 +78,7 @@ def __init__(self, base_model: str = '', target_path: str = '', stream: bool = F
         if base_model and deploy: self.deploy_method(deploy)
         self._prepare_deploy = lambda target_path, base_model: lazyllm.package(target_path, base_model)
 
-    def _get_train_or_deploy_args(self, arg_cls: str, disable: List[str] = []):
+    def _get_train_or_deploy_args(self, arg_cls: str, disable: List[str] = []):  # noqa B006
         args = getattr(self, f'_{arg_cls}_args', dict()).copy()
         if len(set(args.keys()).intersection(set(disable))) > 0:
             raise ValueError(f'Key `{", ".join(disable)}` can not be set in '
@@ -128,7 +128,7 @@ def after_train(real_target_path):
     def _get_all_finetuned_models(self):
         valid_paths = []
         invalid_paths = []
-        for root, dirs, files in os.walk(self._target_path):
+        for root, _, files in os.walk(self._target_path):
             if root.endswith('lazyllm_merge'):
                 model_path = os.path.abspath(root)
                 model_id = model_path.split(os.sep)[-2].split(self._delimiter)[0]
@@ -453,7 +453,7 @@ def _record_usage_impl(self, usage: dict):
             for k in globals["usage"][par_muduleid]:
                 globals["usage"][par_muduleid][k] += usage[k]
 
-    def forward(self, __input: Union[Tuple[Union[str, Dict], str], str, Dict] = package(),
+    def forward(self, __input: Union[Tuple[Union[str, Dict], str], str, Dict] = package(),  # noqa B008
                 *, llm_chat_history=None, lazyllm_files=None, tools=None, stream_output=False, **kw):
         __input, files = self._get_files(__input, lazyllm_files)
         text_input_for_token_usage = __input = self._prompt.generate_prompt(__input, llm_chat_history, tools)
@@ -481,7 +481,7 @@ def forward(self, __input: Union[Tuple[Union[str, Dict], str], str, Dict] = pack
     def _maybe_has_fc(self, token: str, chunk: str) -> bool:
         return token and (token.startswith(chunk if token.startswith('\n') else chunk.lstrip('\n')) or token in chunk)
 
-    def _forward_impl(self, data: Union[Tuple[Union[str, Dict], str], str, Dict] = package(), *,
+    def _forward_impl(self, data: Union[Tuple[Union[str, Dict], str], str, Dict] = package(), *,  # noqa B008
                       url: str, stream_output: Optional[Union[bool, Dict]] = None, text_input: Optional[str] = None):
         headers = self.template_headers or {'Content-Type': 'application/json'}
         parse_parameters = self.stream_parse_parameters if stream_output else {"delimiter": b"<|lazyllm_delimiter|>"}
@@ -516,7 +516,7 @@ def _forward_impl(self, data: Union[Tuple[Union[str, Dict], str], str, Dict] = p
             if text_input: self._record_usage(text_input, temp_output)
             return self._formatter(temp_output)
 
-    def _modify_parameters(self, paras: dict, kw: dict, *, optional_keys: Union[List[str], str] = []):
+    def _modify_parameters(self, paras: dict, kw: dict, *, optional_keys: Union[List[str], str] = None):
         for key, value in paras.items():
             if key == self.keys_name_handle['inputs']: continue
             elif isinstance(value, dict):
@@ -526,10 +526,10 @@ def _modify_parameters(self, paras: dict, kw: dict, *, optional_keys: Union[List
                 else: [setattr(value, k, kw.pop(k)) for k in value.keys() if k in kw]
             elif key in kw: paras[key] = kw.pop(key)
 
-        if isinstance(optional_keys, str): optional_keys = [optional_keys]
+        optional_keys = [optional_keys] if isinstance(optional_keys, str) else (optional_keys or [])
         assert set(kw.keys()).issubset(set(optional_keys)), f'{kw.keys()} is not in {optional_keys}'
         paras.update(kw)
         return paras
 
-    def set_default_parameters(self, *, optional_keys: List[str] = [], **kw):
-        self._modify_parameters(self.template_message, kw, optional_keys=optional_keys)
+    def set_default_parameters(self, *, optional_keys: Optional[List[str]] = None, **kw):
+        self._modify_parameters(self.template_message, kw, optional_keys=optional_keys or [])
diff --git a/lazyllm/module/module.py b/lazyllm/module/module.py
index 46d4fa640..67b2c21bf 100644
--- a/lazyllm/module/module.py
+++ b/lazyllm/module/module.py
@@ -283,7 +283,7 @@ def flow_start(self):
 
 
 lazyllm.ReprRule.add_rule('Module', 'Action', 'Flow')
-setattr(lazyllm.LazyLLMFlowsBase, 'start', flow_start)
+lazyllm.LazyLLMFlowsBase.start = flow_start
 
 
 class ModuleRegistryBase(ModuleBase, metaclass=lazyllm.LazyLLMRegisterMetaClass):
diff --git a/lazyllm/module/servermodule.py b/lazyllm/module/servermodule.py
index 54bd26775..a0236124d 100644
--- a/lazyllm/module/servermodule.py
+++ b/lazyllm/module/servermodule.py
@@ -221,7 +221,7 @@ def _call(self, fname, *args, **kwargs):
         r = requests.post(url, json=(fname, args, kwargs), headers={'Content-Type': 'application/json'})
         return pickle.loads(codecs.decode(r.content, "base64"))
 
-    def forward(self, __input: Union[Tuple[Union[str, Dict], str], str, Dict] = package(), **kw):
+    def forward(self, __input: Union[Tuple[Union[str, Dict], str], str, Dict] = package(), **kw):  # noqa B008
         headers = {
             'Content-Type': 'application/json',
             'Global-Parameters': encode_request(globals._pickle_data),
diff --git a/lazyllm/module/utils.py b/lazyllm/module/utils.py
index 8d38c3534..8a57bae39 100644
--- a/lazyllm/module/utils.py
+++ b/lazyllm/module/utils.py
@@ -9,5 +9,5 @@ def _impl(self):
             assert self._get_deploy_tasks.flag, f'{cls.__name__[1:-4]} shoule be deployed before used'
             return rebuild, (self._module_id,)
         return super(cls, self).__reduce__()
-    setattr(cls, '__reduce__', _impl)
+    cls.__reduce__ = _impl
     return cls
diff --git a/lazyllm/patch.py b/lazyllm/patch.py
index ef9af4552..689d7b477 100644
--- a/lazyllm/patch.py
+++ b/lazyllm/patch.py
@@ -3,6 +3,7 @@
 from urllib.parse import urlparse
 import ipaddress
 import os
+from typing import Callable
 
 def _is_ip_address_url(url: str) -> bool:
     try:
@@ -54,7 +55,7 @@ def new_httpx_func(method, url, **kwargs):
         except Exception: pass
     return _old_httpx_func(method, url, **kwargs)
 
-setattr(httpx, 'request', new_httpx_func)
+httpx.request = new_httpx_func
 
 
 def patch_httpx_func(fname):
@@ -71,3 +72,23 @@ def new_func(url, **kwargs):
 
 for fname in ['get', 'options', 'post', 'delete', 'put', 'patch', 'head']:
     patch_httpx_func(fname)
+
+
+def patch_os_env(set_action: Callable[[str, str], None], unset_action: Callable[[str], None]):
+
+    old_setitem = os._Environ.__setitem__
+
+    def new_setitem(self, key, value):
+        old_setitem(self, key, value)
+        if isinstance(key, bytes): key = key.decode('utf-8')
+        if key.lower().startswith('lazyllm_'): set_action(key, value)
+
+    old_delitem = os._Environ.__delitem__
+
+    def new_delitem(self, key):
+        old_delitem(self, key)
+        if isinstance(key, bytes): key = key.decode('utf-8')
+        if key.lower().startswith('lazyllm_'): unset_action(key)
+
+    os._Environ.__setitem__ = new_setitem
+    os._Environ.__delitem__ = new_delitem
diff --git a/lazyllm/thirdparty/__init__.py b/lazyllm/thirdparty/__init__.py
index 1ca87e01a..dcfb7502f 100644
--- a/lazyllm/thirdparty/__init__.py
+++ b/lazyllm/thirdparty/__init__.py
@@ -11,6 +11,7 @@
     'flash_attn': 'flash-attn',
     'sklearn': 'scikit-learn',
     'volcenginesdkarkruntime': 'volcengine-python-sdk[ark]',
+    'opensearchpy': 'opensearch-py',
 }
 
 requirements = {}
@@ -69,7 +70,7 @@ def __getattribute__(self, __name):
         try:
             return getattr(importlib.import_module(
                 self._Wrapper__key, package=self._Wrapper__package), __name)
-        except (ImportError, ModuleNotFoundError):
+        except ImportError:
             pip_cmd = get_pip_install_cmd([self._Wrapper__key])
             if pip_cmd:
                 err_msg = f'Cannot import module {self._Wrapper__key}, please install it by {pip_cmd}'
@@ -89,6 +90,6 @@ def __setattr__(self, __name, __value):
            'sentence_transformers', 'gradio', 'chromadb', 'nltk', 'PIL', 'httpx', 'bm25s', 'kubernetes', 'pymongo',
            'rapidfuzz', 'FlagEmbedding', 'mcp', 'diffusers', 'pypdf', 'pptx', 'html2text', 'ebooklib', 'docx2txt',
            'zlib', 'struct', 'olefile', 'spacy', 'tarfile', 'boto3', 'botocore', 'paddleocr', 'volcenginesdkarkruntime',
-           'zhipuai', 'dashscope']
+           'zhipuai', 'dashscope', 'mineru', 'opensearchpy']
 for m in modules:
     vars()[m] = PackageWrapper(m)
diff --git a/lazyllm/tools/agent/planAndSolveAgent.py b/lazyllm/tools/agent/planAndSolveAgent.py
index 1edda15f7..a27809c29 100644
--- a/lazyllm/tools/agent/planAndSolveAgent.py
+++ b/lazyllm/tools/agent/planAndSolveAgent.py
@@ -26,7 +26,7 @@
 )
 
 class PlanAndSolveAgent(ModuleBase):
-    def __init__(self, llm: Union[ModuleBase, None] = None, tools: List[str] = [], *,
+    def __init__(self, llm: Union[ModuleBase, None] = None, tools: List[str] = [], *,  # noqa B006
                  plan_llm: Union[ModuleBase, None] = None, solve_llm: Union[ModuleBase, None] = None,
                  max_retries: int = 5, return_trace: bool = False, stream: bool = False):
         super().__init__(return_trace=return_trace)
diff --git a/lazyllm/tools/agent/rewooAgent.py b/lazyllm/tools/agent/rewooAgent.py
index 698abab6b..6bbb52a81 100644
--- a/lazyllm/tools/agent/rewooAgent.py
+++ b/lazyllm/tools/agent/rewooAgent.py
@@ -31,7 +31,7 @@
                    "the answer directly with no extra words.\n\n")
 
 class ReWOOAgent(ModuleBase):
-    def __init__(self, llm: Union[ModuleBase, None] = None, tools: List[Union[str, Callable]] = [], *,
+    def __init__(self, llm: Union[ModuleBase, None] = None, tools: List[Union[str, Callable]] = [], *,  # noqa B006
                  plan_llm: Union[ModuleBase, None] = None, solve_llm: Union[ModuleBase, None] = None,
                  return_trace: bool = False, stream: bool = False):
         super().__init__(return_trace=return_trace)
diff --git a/lazyllm/tools/agent/toolsManager.py b/lazyllm/tools/agent/toolsManager.py
index 7530538f0..47c5b422e 100644
--- a/lazyllm/tools/agent/toolsManager.py
+++ b/lazyllm/tools/agent/toolsManager.py
@@ -78,7 +78,7 @@ def _load_function_schema(self, func: Callable) -> Type[BaseModel]:
 
         signature = inspect.signature(func)
         has_var_args = False
-        for name, param in signature.parameters.items():
+        for _, param in signature.parameters.items():
             if param.kind == inspect.Parameter.VAR_POSITIONAL or\
                param.kind == inspect.Parameter.VAR_KEYWORD:
                 has_var_args = True
diff --git a/lazyllm/tools/classifier/intent_classifier.py b/lazyllm/tools/classifier/intent_classifier.py
index 0eacfb251..37fbaa0b2 100644
--- a/lazyllm/tools/classifier/intent_classifier.py
+++ b/lazyllm/tools/classifier/intent_classifier.py
@@ -2,7 +2,7 @@
 from lazyllm.components import AlpacaPrompter
 from lazyllm import pipeline, globals, switch
 from lazyllm.tools.utils import chat_history_to_str
-from typing import Dict, Union, Any, List
+from typing import Dict, Union, Any, List, Optional
 import json
 
 
@@ -64,11 +64,11 @@
 class IntentClassifier(ModuleBase):
     def __init__(self, llm, intent_list: list = None,
                  *, prompt: str = '', constrain: str = '', attention: str = '',
-                 examples: list[list[str, str]] = [], return_trace: bool = False) -> None:
+                 examples: Optional[list[list[str, str]]] = None, return_trace: bool = False) -> None:
         super().__init__(return_trace=return_trace)
         self._intent_list = intent_list or []
         self._llm = llm
-        self._prompt, self._constrain, self._attention, self._examples = prompt, constrain, attention, examples
+        self._prompt, self._constrain, self._attention, self._examples = prompt, constrain, attention, examples or []
         if self._intent_list:
             self._init()
 
@@ -95,7 +95,7 @@ def choose_prompt():
     def intent_promt_hook(
         self,
         input: Union[str, List, Dict[str, str], None] = None,
-        history: List[Union[List[str], Dict[str, Any]]] = [],
+        history: List[Union[List[str], Dict[str, Any]]] = [],  # noqa B006
         tools: Union[List[Dict[str, Any]], None] = None,
         label: Union[str, None] = None,
     ):
diff --git a/lazyllm/tools/infer_service/serve.py b/lazyllm/tools/infer_service/serve.py
index 23c1580d9..35e813035 100644
--- a/lazyllm/tools/infer_service/serve.py
+++ b/lazyllm/tools/infer_service/serve.py
@@ -127,7 +127,7 @@ def _get_log_path(self, log_dir):
         return newest_file
 
     @app.post('/v1/deploy/jobs')
-    async def create_job(self, job: JobDescription, token: str = Header(None)):
+    async def create_job(self, job: JobDescription, token: str = Header(None)):  # noqa B008
         if not token:
             raise HTTPException(status_code=401, detail='Invalid token')
         # await self.authorize_current_user(token)
@@ -188,7 +188,7 @@ async def create_job(self, job: JobDescription, token: str = Header(None)):
         return {'job_id': job_id, 'status': status}
 
     @app.post('/v1/deploy/jobs/{job_id}/cancel')
-    async def cancel_job(self, job_id: str, token: str = Header(None)):
+    async def cancel_job(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_active_jobs(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
@@ -214,7 +214,7 @@ async def cancel_job(self, job_id: str, token: str = Header(None)):
         return {'status': status}
 
     @app.get('/v1/deploy/jobs')
-    async def list_jobs(self, token: str = Header(None)):
+    async def list_jobs(self, token: str = Header(None)):  # noqa B008
         if not self._in_user_job_info(token):
             self._update_user_job_info(token)
         server_running_dict = self._read_user_job_info(token)
@@ -239,7 +239,7 @@ async def list_jobs(self, token: str = Header(None)):
         return server_running_dict
 
     @app.get('/v1/deploy/jobs/{job_id}')
-    async def get_job_info(self, job_id: str, token: str = Header(None)):
+    async def get_job_info(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_user_job_info(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
@@ -249,7 +249,7 @@ async def get_job_info(self, job_id: str, token: str = Header(None)):
         return self._read_user_job_info(token, job_id)
 
     @app.get('/v1/deploy/jobs/{job_id}/events')
-    async def get_job_log(self, job_id: str, token: str = Header(None)):
+    async def get_job_log(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_user_job_info(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
diff --git a/lazyllm/tools/mcp/client.py b/lazyllm/tools/mcp/client.py
index d426e7851..73be39a95 100644
--- a/lazyllm/tools/mcp/client.py
+++ b/lazyllm/tools/mcp/client.py
@@ -1,6 +1,6 @@
 import importlib.util
 
-from typing import Any
+from typing import Any, Optional
 from urllib.parse import urlparse
 from contextlib import asynccontextmanager
 from lazyllm.thirdparty import mcp
@@ -14,13 +14,13 @@ class MCPClient(object):
     def __init__(
         self,
         command_or_url: str,
-        args: list[str] = [],
+        args: Optional[list[str]] = None,
         env: dict[str, str] = None,
         headers: dict[str, Any] = None,
         timeout: float = 5,
     ):
         self._command_or_url = command_or_url
-        self._args = args
+        self._args = args or []
         self._env = env
         self._headers = headers
         self._timeout = timeout
diff --git a/lazyllm/tools/rag/__init__.py b/lazyllm/tools/rag/__init__.py
index e18ec7276..1880166e0 100644
--- a/lazyllm/tools/rag/__init__.py
+++ b/lazyllm/tools/rag/__init__.py
@@ -11,7 +11,7 @@
 from .global_metadata import GlobalMetadataDesc as DocField
 from .data_type import DataType
 from .index_base import IndexBase
-from .store import StoreBase
+from .store import LazyLLMStoreBase
 
 __all__ = [
     "Document",
@@ -44,6 +44,6 @@
     'DocField',
     'DataType',
     'IndexBase',
-    'StoreBase',
+    'LazyLLMStoreBase',
     "FileReader",
 ]
diff --git a/lazyllm/tools/rag/dataReader.py b/lazyllm/tools/rag/dataReader.py
index 6b36f45e7..5b1115652 100644
--- a/lazyllm/tools/rag/dataReader.py
+++ b/lazyllm/tools/rag/dataReader.py
@@ -188,10 +188,10 @@ def _exclude_metadata(self, documents: List[DocNode]) -> List[DocNode]:
         for doc in documents:
             doc._excluded_embed_metadata_keys.extend(
                 ["file_name", "file_type", "file_size", "creation_date",
-                 "last_modified_date", "last_accessed_date"])
+                 "last_modified_date", "last_accessed_date", "lazyllm_store_num"])
             doc._excluded_llm_metadata_keys.extend(
                 ["file_name", "file_type", "file_size", "creation_date",
-                 "last_modified_date", "last_accessed_date"])
+                 "last_modified_date", "last_accessed_date", "lazyllm_store_num"])
         return documents
 
     @staticmethod
diff --git a/lazyllm/tools/rag/data_type.py b/lazyllm/tools/rag/data_type.py
index 5ed32415a..6d1a7f6d8 100644
--- a/lazyllm/tools/rag/data_type.py
+++ b/lazyllm/tools/rag/data_type.py
@@ -6,3 +6,7 @@ class DataType(IntEnum):
     INT32 = 2
     FLOAT_VECTOR = 3
     SPARSE_FLOAT_VECTOR = 4
+    BOOLEAN = 5
+    FLOAT = 6
+    INT64 = 7
+    STRING = 8
diff --git a/lazyllm/tools/rag/default_index.py b/lazyllm/tools/rag/default_index.py
index c33e0dabc..552558693 100644
--- a/lazyllm/tools/rag/default_index.py
+++ b/lazyllm/tools/rag/default_index.py
@@ -3,14 +3,13 @@
 from .index_base import IndexBase
 from lazyllm import LOG
 from lazyllm.common import override
-from .store import StoreBase
 from .utils import parallel_do_embedding, generic_process_filters, is_sparse
 from .similarity import registered_similarities
 
 # ---------------------------------------------------------------------------- #
 
 class DefaultIndex(IndexBase):
-    def __init__(self, embed: Dict[str, Callable], store: StoreBase, **kwargs):
+    def __init__(self, embed: Dict[str, Callable], store, **kwargs):
         self.embed = embed
         self.store = store
 
@@ -41,7 +40,7 @@ def query(
             )
         similarity_func, mode, descend = registered_similarities[similarity_name]
 
-        nodes = self.store.get_nodes(group_name)
+        nodes = self.store.get_nodes(group=group_name)
         if filters:
             nodes = generic_process_filters(nodes, filters)
 
diff --git a/lazyllm/tools/rag/doc_impl.py b/lazyllm/tools/rag/doc_impl.py
index d84e8d0a6..53ae07ef3 100644
--- a/lazyllm/tools/rag/doc_impl.py
+++ b/lazyllm/tools/rag/doc_impl.py
@@ -1,5 +1,7 @@
 import json
 import ast
+import threading
+import time
 from enum import Enum
 from functools import wraps
 from typing import Callable, Dict, List, Optional, Set, Union, Tuple, Any
@@ -7,18 +9,15 @@
 from .transform import (NodeTransform, FuncNodeTransform, SentenceSplitter, LLMParser,
                         TransformArgs, TransformArgs as TArgs)
 from .index_base import IndexBase
-from .store import (MapStore, MilvusStore, ChromadbStore, SenseCoreStore, StoreBase,
-                    LAZY_ROOT_NAME, LAZY_IMAGE_GROUP)
-from .smart_embedding_index import SmartEmbeddingIndex
+from .store import (LAZY_ROOT_NAME, LAZY_IMAGE_GROUP, LazyLLMStoreBase)
+from .store.document_store import _DocumentStore
 from .doc_node import DocNode
 from .data_loaders import DirectoryReader
 from .utils import DocListManager, is_sparse
-from .global_metadata import GlobalMetadataDesc
+from .global_metadata import GlobalMetadataDesc, RAG_KB_ID
 from .data_type import DataType
 from .doc_processor import _Processor, DocumentProcessor
 from dataclasses import dataclass
-import threading
-import time
 from itertools import repeat
 
 _transmap = dict(function=FuncNodeTransform, sentencesplitter=SentenceSplitter, llm=LLMParser)
@@ -63,12 +62,12 @@ class EmbedPlaceholder:
 
 
 class NodeGroupType(str, Enum):
-    ORIGINAL = "Original Source"
-    CHUNK = "Chunk"
-    SUMMARY = "Summary"
-    IMAGE_INFO = "Image Info"
-    QUESTION_ANSWER = "Question Answer"
-    OTHER = "Other"
+    ORIGINAL = 'Original Source'
+    CHUNK = 'Chunk'
+    SUMMARY = 'Summary'
+    IMAGE_INFO = 'Image Info'
+    QUESTION_ANSWER = 'Question Answer'
+    OTHER = 'Other'
 
 
 class BuiltinGroups(object):
@@ -100,7 +99,8 @@ class DocImpl:
 
     def __init__(self, embed: Dict[str, Callable], dlm: Optional[DocListManager] = None,
                  doc_files: Optional[str] = None, kb_group_name: Optional[str] = None,
-                 global_metadata_desc: Dict[str, GlobalMetadataDesc] = None, store_conf: Optional[Dict] = None,
+                 global_metadata_desc: Dict[str, GlobalMetadataDesc] = None,
+                 store: Optional[Union[Dict, LazyLLMStoreBase]] = None,
                  processor: Optional[DocumentProcessor] = None, algo_name: Optional[str] = None,
                  display_name: Optional[str] = None, description: Optional[str] = None):
         super().__init__()
@@ -109,12 +109,12 @@ def __init__(self, embed: Dict[str, Callable], dlm: Optional[DocListManager] = N
         self._dlm, self._doc_files = dlm, doc_files
         self._reader = DirectoryReader(None, self._local_file_reader, DocImpl._registered_file_reader)
         self.node_groups: Dict[str, Dict] = {
-            LAZY_ROOT_NAME: dict(parent=None, display_name="Original Source", group_type=NodeGroupType.ORIGINAL),
-            LAZY_IMAGE_GROUP: dict(parent=None, display_name="Image Node", group_type=NodeGroupType.OTHER)
+            LAZY_ROOT_NAME: dict(parent=None, display_name='Original Source', group_type=NodeGroupType.ORIGINAL),
+            LAZY_IMAGE_GROUP: dict(parent=None, display_name='Image Node', group_type=NodeGroupType.OTHER)
         }
         self.embed = {k: embed_wrapper(e) for k, e in embed.items()}
         self._global_metadata_desc = global_metadata_desc
-        self.store = store_conf  # NOTE: will be initialized in _lazy_init()
+        self.store = store  # NOTE: will be initialized in _lazy_init()
         self._activated_groups = set([LAZY_ROOT_NAME, LAZY_IMAGE_GROUP])
         # activated_embeddings maintains all node_groups and active embeddings
         self._activated_embeddings = {LAZY_ROOT_NAME: set(), LAZY_IMAGE_GROUP: set()}  # {group_name: {em1, em2, ...}}
@@ -140,7 +140,7 @@ def _init_node_groups(self):
                 if not parent_group or parent_group in self._activated_groups: break
                 self._activated_groups.add(group := parent_group)
 
-    def _init_store(self):
+    def _create_store(self):
         if self.store is None: self.store = {'type': 'map'}
         embed_dims, embed_datatypes = {}, {}
         for k, e in self.embed.items():
@@ -151,16 +151,16 @@ def _init_store(self):
                 embed_dims[k] = len(embedding)
                 embed_datatypes[k] = DataType.FLOAT_VECTOR
 
-        if isinstance(self.store, Dict):
-            self.store = self._create_store(store_conf=self.store, embed_dims=embed_dims,
-                                            embed_datatypes=embed_datatypes)
-        elif not isinstance(self.store, StoreBase):
-            raise ValueError(f'store type [{type(self.store)}] is not a dict.')
+        self.store = _DocumentStore(algo_name=self._algo_name, store=self.store,
+                                    group_embed_keys=self._activated_embeddings, embed=self.embed,
+                                    embed_dims=embed_dims, embed_datatypes=embed_datatypes,
+                                    global_metadata_desc=self._global_metadata_desc)
+        self.store.activate_group(self._activated_groups)
 
     @once_wrapper(reset_on_pickle=True)
     def _lazy_init(self) -> None:
         self._init_node_groups()
-        self._init_store()
+        self._create_store()
         cloud = not (self._dlm or self._doc_files is not None)
 
         self._resolve_index_pending_registrations()
@@ -173,13 +173,12 @@ def _lazy_init(self) -> None:
                                          self._description)
 
         # init files when `cloud` is False
-        if not cloud and not self.store.is_group_active(LAZY_ROOT_NAME):
+        if not cloud and self.store.is_group_empty(LAZY_ROOT_NAME):
             ids, pathes, metadatas = self._list_files(upload_status=DocListManager.Status.success)
             self._processor.add_doc(pathes, ids, metadatas)
             if pathes and self._dlm:
                 self._dlm.update_kb_group(cond_file_ids=ids, cond_group=self._kb_group_name,
                                           new_status=DocListManager.Status.success)
-
         if self._dlm:
             self._init_monitor_event = threading.Event()
             self._daemon = threading.Thread(target=self.worker)
@@ -194,58 +193,6 @@ def _resolve_index_pending_registrations(self):
             self.store.register_index(index_type, index_cls(*args, **kwargs))
         self._index_pending_registrations.clear()
 
-    def _create_store(self, store_conf: Optional[Dict], embed_dims: Optional[Dict[str, int]] = None,
-                      embed_datatypes: Optional[Dict[str, DataType]] = None) -> StoreBase:
-        store_type = store_conf.get('type')
-        if not store_type:
-            raise ValueError('store type is not specified.')
-
-        kwargs = store_conf.get('kwargs', {})
-        if not isinstance(kwargs, Dict):
-            raise ValueError('`kwargs` in store conf is not a dict.')
-
-        if store_type == "map":
-            store = MapStore(node_groups=list(self._activated_embeddings.keys()), embed=self.embed, **kwargs)
-        elif store_type == "chroma":
-            store = ChromadbStore(group_embed_keys=self._activated_embeddings, embed=self.embed,
-                                  embed_dims=embed_dims, **kwargs)
-        elif store_type == "milvus":
-            store = MilvusStore(group_embed_keys=self._activated_embeddings, embed=self.embed,
-                                embed_dims=embed_dims, embed_datatypes=embed_datatypes,
-                                global_metadata_desc=self._global_metadata_desc, **kwargs)
-        elif store_type == "sensecore":
-            store = SenseCoreStore(group_embed_keys=self._activated_embeddings,
-                                   global_metadata_desc=self._global_metadata_desc, **kwargs)
-        else:
-            raise NotImplementedError(
-                f"Not implemented store type for {store_type}"
-            )
-        store.activate_group(self._activated_groups)
-
-        indices_conf = store_conf.get('indices', {})
-        if not isinstance(indices_conf, Dict):
-            raise ValueError(f"`indices`'s type [{type(indices_conf)}] is not a dict")
-
-        for index_type, conf in indices_conf.items():
-            if index_type == 'smart_embedding_index':
-                backend_type = conf.get('backend')
-                if not backend_type:
-                    raise ValueError('`backend` is not specified in `smart_embedding_index`.')
-                kwargs = conf.get('kwargs', {})
-                index = SmartEmbeddingIndex(backend_type=backend_type,
-                                            group_embed_keys=self._activated_embeddings,
-                                            embed=self.embed,
-                                            embed_dims=embed_dims,
-                                            embed_datatypes=embed_datatypes,
-                                            global_metadata_desc=self._global_metadata_desc,
-                                            **kwargs)
-            else:
-                raise ValueError(f'unsupported index type [{index_type}]')
-
-            store.register_index(type=index_type, index=index)
-
-        return store
-
     @staticmethod
     def _create_node_group_impl(cls, group_name, name, transform: Union[str, Callable],
                                 parent: str = LAZY_ROOT_NAME, *, trans_node: Optional[bool] = None,
@@ -446,9 +393,7 @@ def activate_group(self, group_name: str, embed_keys: List[str]):
                 if parent in self._activated_groups: break
                 self.store.activate_group(parent)
                 self._activated_groups.add(parent)
-            # BUG: when using reparse here, nodes created from recurse method will not be set children correctly
-            # (For parent nodes has been upserted before creating child nodes)
-            if not self.store.is_group_active(group_name): self._processor.reparse(group_name)
+            if self.store.is_group_empty(group_name): self._processor.reparse(group_name)
 
     def active_node_groups(self):
         return {k: v for k, v in self._activated_embeddings.items() if k in self._activated_groups}
@@ -457,21 +402,20 @@ def retrieve(self, query: str, group_name: str, similarity: str, similarity_cut_
                  index: str, topk: int, similarity_kws: dict, embed_keys: Optional[List[str]] = None,
                  filters: Optional[Dict[str, Union[str, int, List, Set]]] = None, **kwargs) -> List[DocNode]:
         self._lazy_init()
-        if index is None or index == 'default':
-            nodes = self.store.query(query=query, group_name=group_name, similarity_name=similarity,
-                                     similarity_cut_off=similarity_cut_off, topk=topk, embed_keys=embed_keys,
-                                     filters=filters, **similarity_kws, **kwargs)
+
+        if index and index != 'default':
+            query_instance = self.store.get_index(type=index)
+            if query_instance is None:
+                raise NotImplementedError(f"Index type '{index}' is not registered in the store.")
         else:
-            index_instance = self.store.get_index(type=index)
-            if not index_instance:
-                raise NotImplementedError(f"index type '{index}' is not supported currently.")
+            query_instance = self.store.get_index(type='default') or self.store
+        try:
+            nodes = query_instance.query(query=query, group_name=group_name, similarity_name=similarity,
+                                         similarity_cut_off=similarity_cut_off, topk=topk, embed_keys=embed_keys,
+                                         filters=filters, **similarity_kws, **kwargs)
+        except Exception as e:
+            raise RuntimeError(f'index type `{index}` of store `{type(self.store.impl)}` query failed: {e}')
 
-            try:
-                nodes = index_instance.query(query=query, group_name=group_name, similarity_name=similarity,
-                                             similarity_cut_off=similarity_cut_off, topk=topk,
-                                             embed_keys=embed_keys, filters=filters, **similarity_kws, **kwargs)
-            except Exception as e:
-                raise RuntimeError(f'index type `{index}` of store `{type(self.store)}` query failed: {e}')
         for n in nodes:
             n._store = self.store
             n._node_groups = self.node_groups
@@ -482,37 +426,54 @@ def find(self, nodes: List[DocNode], group: str) -> List[DocNode]:
         if len(nodes) == 0: return nodes
         self._lazy_init()
 
-        def get_depth(name):
-            cnt = 0
+        def get_full_path(name: str) -> List[str]:
+            path = [name]
             while name != LAZY_ROOT_NAME:
-                cnt += 1
                 name = self.node_groups[name]['parent']
-            return cnt
+                path.append(name)
+            return list(reversed(path))
 
-        for n in nodes:
-            n._store = self.store
-            n._node_groups = self.node_groups
+        cur_group = nodes[0]._group
+        path_cur = get_full_path(cur_group)
+        path_tgt = get_full_path(group)
+
+        idx = 0
+        for a, b in zip(path_cur, path_tgt):
+            if a == b:
+                idx += 1
+            else:
+                break
+        parent_path = list(reversed(path_cur[idx - 1:-1]))
+        child_path = path_tgt[idx:]
+
+        for next_group in parent_path:
+            if not nodes: break
+            nodes = self.find_parent(nodes, next_group)
+
+        for next_group in child_path:
+            if not nodes: break
+            nodes = self.find_children(nodes, next_group)
 
-        # 1. find lowest common ancestor
-        left, right = nodes[0]._group, group
-        curr_depth, target_depth = get_depth(left), get_depth(right)
-        if curr_depth > target_depth:
-            for i in range(curr_depth - target_depth): left = self.node_groups[left]['parent']
-        elif curr_depth < target_depth:
-            for i in range(target_depth - curr_depth): right = self.node_groups[right]['parent']
-        while (left != right):
-            left = self.node_groups[left]['parent']
-            right = self.node_groups[right]['parent']
-        ancestor = left
-
-        # 2. if ancestor != current group, go to ancestor; then if ancestor != target group, go to target group
-        if nodes and nodes[0]._group != ancestor:
-            nodes = self.find_parent(nodes, ancestor)
-        if nodes and nodes[0]._group != group:
-            nodes = self.find_children(nodes, group)
+        if not nodes:
+            LOG.warning(f"We can not find any nodes for group `{group}`, please check your input")
+            return []
         return nodes
 
     def find_parent(self, nodes: List[DocNode], group: str) -> List[DocNode]:
+        if isinstance(nodes[0].parent, DocNode):
+            result = self._find_parent_with_node(nodes, group)
+        else:
+            result = self._find_parent_with_uid(nodes, group)
+        if not result:
+            LOG.warning(
+                f"We can not find any nodes for group `{group}`, please check your input"
+            )
+        LOG.debug(f"Found parent node for {group}: {result}")
+        return result
+
+    def _find_parent_with_node(self, nodes: list[DocNode], group: str):
+        result = set()
+
         def recurse_parents(node: DocNode, visited: Set[DocNode]) -> None:
             if node.parent:
                 if node.parent._group == group:
@@ -520,51 +481,29 @@ def recurse_parents(node: DocNode, visited: Set[DocNode]) -> None:
                 else:
                     recurse_parents(node.parent, visited)
 
-        result = set()
         for node in nodes:
             recurse_parents(node, result)
-        if not result:
-            LOG.warning(
-                f"We can not find any nodes for group `{group}`, please check your input"
-            )
-        LOG.debug(f"Found parent node for {group}: {result}")
         return list(result)
 
-    def find_children(self, nodes: List[DocNode], group: str) -> List[DocNode]:  # noqa:C901
-        def recurse_children(node: DocNode, visited: Set[DocNode]) -> bool:
-            if group in node.children:
-                visited.update(node.children[group])
-                return True
-
-            found_in_any_child = False
-
-            for children_list in node.children.values():
-                for child in children_list:
-                    if recurse_children(child, visited):
-                        found_in_any_child = True
-                    else:
-                        break
-
-            return found_in_any_child
-
-        result = set()
-
-        for node in nodes:
-            if group in node.children:
-                result.update(node.children[group])
-            else:
-                if not recurse_children(node, result):
-                    LOG.warning(
-                        f"Node {node} and its children do not contain any nodes with the group `{group}`. "
-                        "Skipping further search in this branch."
-                    )
-                    break
-
+    def _find_parent_with_uid(self, nodes: list[DocNode], group: str):
+        cur_group = nodes[0]._group
+        cur_nodes = nodes
+        while cur_group != group and cur_nodes[0].parent:
+            name = self.node_groups[cur_group]['parent']
+            parent_uids = {n.parent for n in cur_nodes}
+            kb_id = cur_nodes[0].global_metadata.get(RAG_KB_ID)
+            parents = self.store.get_nodes(group=name, kb_id=kb_id, uids=list(parent_uids), display=True)
+            if not parents: break
+            cur_group = parents[0]._group
+            cur_nodes = parents
+        return cur_nodes if cur_group == group else []
+
+    def find_children(self, nodes: List[DocNode], group: str) -> List[DocNode]:
+        if not nodes: return []
+        kb_id = nodes[0].global_metadata.get(RAG_KB_ID, None)
+        result = self.store.get_nodes(group=group, kb_id=kb_id, parent=[n._uid for n in nodes], display=True)
         if not result:
-            LOG.warning(
-                f"We cannot find any nodes for group `{group}`, please check your input."
-            )
-
+            LOG.warning(f"We cannot find any nodes for group `{group}`, please check your input.")
         LOG.debug(f"Found children nodes for {group}: {result}")
         return list(result)
 
diff --git a/lazyllm/tools/rag/doc_manager.py b/lazyllm/tools/rag/doc_manager.py
index d7330774b..a00861c29 100644
--- a/lazyllm/tools/rag/doc_manager.py
+++ b/lazyllm/tools/rag/doc_manager.py
@@ -5,7 +5,7 @@
 from typing import List, Optional, Dict, Union
 from pydantic import BaseModel, Field
 from starlette.responses import RedirectResponse
-from fastapi import UploadFile, Body
+from fastapi import UploadFile
 
 import lazyllm
 from lazyllm import FastapiApp as app
@@ -106,10 +106,16 @@ def upload_files(self, files: List[UploadFile], override: bool = False,  # noqa
             lazyllm.LOG.error(f'upload_files exception: {e}')
             return BaseResponse(code=500, msg=str(e), data=None)
 
+    class AddFilesRequest(BaseModel):
+        files: List[str]
+        group_name: Optional[str] = None
+        metadatas: Optional[str] = None
+
     @app.post("/add_files")
-    def add_files(self, files: List[str] = Body(...),
-                  group_name: str = Body(None),
-                  metadatas: Optional[str] = Body(None)):
+    def add_files(self, request: AddFilesRequest):
+        files = request.files
+        group_name = request.group_name
+        metadatas = request.metadatas
         try:
             if metadatas:
                 metadatas: Optional[List[Dict[str, str]]] = json.loads(metadatas)
diff --git a/lazyllm/tools/rag/doc_node.py b/lazyllm/tools/rag/doc_node.py
index 869ccc808..8354f2176 100644
--- a/lazyllm/tools/rag/doc_node.py
+++ b/lazyllm/tools/rag/doc_node.py
@@ -4,7 +4,7 @@
 from PIL import Image
 from lazyllm import config, reset_on_pickle
 from lazyllm.components.utils.file_operate import _image_to_base64
-from .global_metadata import RAG_DOC_ID, RAG_DOC_PATH, RAG_DOC_KB_ID
+from .global_metadata import RAG_DOC_ID, RAG_DOC_PATH, RAG_KB_ID
 import uuid
 import threading
 import time
@@ -29,9 +29,9 @@ def __init__(self, uid: Optional[str] = None, content: Optional[Union[str, List[
                  global_metadata: Optional[Dict[str, Any]] = None, *, text: Optional[str] = None):
         if text and content:
             raise ValueError('`text` and `content` cannot be set at the same time.')
-
+        if not content and not text: content = ''
         self._uid: str = uid if uid else str(uuid.uuid4())
-        self._content: Optional[Union[str, List[Any]]] = content if content else text
+        self._content: Optional[Union[str, List[Any]]] = content if content is not None else text
         self._group: Optional[str] = group
         self._embedding: Optional[Dict[str, List[float]]] = embedding or {}
         # metadata: the chunk's meta
@@ -53,6 +53,14 @@ def __init__(self, uid: Optional[str] = None, content: Optional[Union[str, List[
         self.relevance_score = None
         self.similarity_score = None
 
+    @property
+    def uid(self) -> str:
+        return self._uid
+
+    @property
+    def group(self) -> str:
+        return self._group
+
     @property
     def text(self) -> str:
         if isinstance(self._content, str):
@@ -78,7 +86,7 @@ def _load_from_store(self, group_name: str, uids: Union[str, List[str]]) -> List
         if isinstance(uids, str):
             uids = [uids]
         nodes = self._store.get_nodes(group_name=group_name, uids=uids,
-                                      dataset_id=self.global_metadata.get(RAG_DOC_KB_ID), display=True)
+                                      kb_id=self.global_metadata.get(RAG_KB_ID), display=True)
         for n in nodes:
             n._store = self._store
             n._node_groups = self._node_groups
@@ -100,13 +108,13 @@ def parent(self, v: Optional["DocNode"]):
     def children(self) -> Dict[str, List["DocNode"]]:
         if not self._children_loaded and self._store and self._node_groups:
             self._children_loaded = True
-            dataset_id = self.global_metadata.get(RAG_DOC_KB_ID)
+            kb_id = self.global_metadata.get(RAG_KB_ID)
             doc_id = self.global_metadata.get(RAG_DOC_ID)
             c_groups = [grp for grp in self._node_groups.keys() if self._node_groups[grp]['parent'] == self._group]
             for grp in c_groups:
                 if not self._store.is_group_active(grp):
                     continue
-                nodes = self._store.get_nodes(group_name=grp, dataset_id=dataset_id, doc_ids=[doc_id])
+                nodes = self._store.get_nodes(group_name=grp, kb_id=kb_id, doc_ids=[doc_id])
                 c_nodes = [n for n in nodes if n._parent in {self, self._uid}]
                 self._children[grp] = c_nodes
                 for n in self._children[grp]:
@@ -121,8 +129,8 @@ def children(self, v: Dict[str, List["DocNode"]]):
     @property
     def root_node(self) -> "DocNode":
         node = self
-        while isinstance(node.parent, DocNode):
-            node = node.parent
+        while isinstance(node._parent, DocNode):
+            node = node._parent
         return node
 
     @property
@@ -176,7 +184,7 @@ def get_children_str(self) -> str:
         )
 
     def get_parent_id(self) -> str:
-        return self.parent._uid if self.parent else ""
+        return self.parent._uid if self.parent else ''
 
     def __str__(self) -> str:
         return (
@@ -227,7 +235,7 @@ def get_content(self) -> str:
     def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
         """Metadata info string."""
         if mode == MetadataMode.NONE:
-            return ""
+            return ''
 
         metadata_keys = set(self.metadata.keys())
         if mode == MetadataMode.LLM:
@@ -244,7 +252,7 @@ def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:
     def get_text(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
         metadata_str = self.get_metadata_str(metadata_mode).strip()
         if not metadata_str:
-            return self.text if self.text else ""
+            return self.text if self.text else ''
         return f"{metadata_str}\n\n{self.text}".strip()
 
     def to_dict(self) -> Dict:
@@ -269,6 +277,10 @@ def __init__(self, query: str, answer: str, uid: Optional[str] = None, group: Op
         super().__init__(uid, query, group, embedding, parent, metadata, global_metadata=global_metadata, text=text)
         self._answer = answer.strip()
 
+    @property
+    def answer(self) -> str:
+        return self._answer
+
     def get_text(self, metadata_mode: MetadataMode = MetadataMode.NONE) -> str:
         if metadata_mode == MetadataMode.LLM:
             return f'query:\n{self.text}\nanswer\n{self._answer}'
@@ -282,7 +294,7 @@ def __init__(self, image_path: str, uid: Optional[str] = None, group: Optional[s
                  *, text: Optional[str] = None):
         super().__init__(uid, None, group, embedding, parent, metadata, global_metadata=global_metadata, text=text)
         self._image_path = image_path.strip()
-        self._modality = "image"
+        self._modality = 'image'
 
     def do_embedding(self, embed: Dict[str, Callable]) -> None:
         for k, e in embed.items():
diff --git a/lazyllm/tools/rag/doc_processor.py b/lazyllm/tools/rag/doc_processor.py
index a53d92b9d..a59a902ab 100644
--- a/lazyllm/tools/rag/doc_processor.py
+++ b/lazyllm/tools/rag/doc_processor.py
@@ -5,26 +5,30 @@
 from sqlalchemy.engine import Engine
 from lazyllm import LOG, ModuleBase, ServerModule, UrlModule, FastapiApp as app, ThreadPoolExecutor, config
 
-from .store import StoreBase, LAZY_ROOT_NAME, LAZY_IMAGE_GROUP
+from .store import LAZY_ROOT_NAME, LAZY_IMAGE_GROUP
+from .store.store_base import DEFAULT_KB_ID
+from .store.document_store import _DocumentStore
 from .store.utils import fibonacci_backoff, create_file_path
 from .transform import (AdaptiveTransform, make_transform,)
 from .readers import ReaderBase
 from .doc_node import DocNode
-from .utils import gen_docid, BaseResponse
-from .global_metadata import RAG_DOC_ID, RAG_DOC_PATH, RAG_DOC_KB_ID
+from .utils import gen_docid, ensure_call_endpoint, BaseResponse
+from .global_metadata import RAG_DOC_ID, RAG_DOC_PATH, RAG_KB_ID
+
 import queue
 import threading
 import time
 import requests
 import uuid
 import os
+import traceback
 
 DB_TYPES = ['mysql']
 ENABLE_DB = os.getenv("RAG_ENABLE_DB", "false").lower() == "true"
 
 
 class _Processor:
-    def __init__(self, store: StoreBase, reader: ReaderBase, node_groups: Dict[str, Dict],
+    def __init__(self, store: _DocumentStore, reader: ReaderBase, node_groups: Dict[str, Dict],
                  display_name: Optional[str] = None, description: Optional[str] = None,
                  server: bool = False):
         self._store = store
@@ -40,21 +44,34 @@ def add_doc(self, input_files: List[str], ids: Optional[List[str]] = None,
             if not ids: ids = [gen_docid(path) for path in input_files]
             if metadatas is None:
                 metadatas = [{} for _ in input_files]
-
             for metadata, doc_id, path in zip(metadatas, ids, input_files):
                 metadata.setdefault(RAG_DOC_ID, doc_id)
                 metadata.setdefault(RAG_DOC_PATH, path)
+                metadata.setdefault(RAG_KB_ID, DEFAULT_KB_ID)
             root_nodes, image_nodes = self._reader.load_data(input_files, metadatas, split_image_nodes=True)
-            self._store.update_nodes(root_nodes)
+            self._store.update_nodes(self._set_nodes_number(root_nodes))
             self._create_nodes_recursive(root_nodes, LAZY_ROOT_NAME)
             if image_nodes:
-                self._store.update_nodes(image_nodes)
+                self._store.update_nodes(self._set_nodes_number(image_nodes))
                 self._create_nodes_recursive(image_nodes, LAZY_IMAGE_GROUP)
             LOG.info("Add documents done!")
         except Exception as e:
-            LOG.error(f"Add documents failed: {e}")
+            LOG.error(f"Add documents failed: {e}, {traceback.format_exc()}")
             raise e
 
+    def _set_nodes_number(self, nodes: List[DocNode]) -> List[DocNode]:
+        doc_group_number = {}
+        for node in nodes:
+            doc_id = node.global_metadata.get(RAG_DOC_ID)
+            group_name = node.group
+            if doc_id not in doc_group_number:
+                doc_group_number[doc_id] = {}
+            if group_name not in doc_group_number[doc_id]:
+                doc_group_number[doc_id][group_name] = 1
+            node.metadata['lazyllm_store_num'] = doc_group_number[doc_id][group_name]
+            doc_group_number[doc_id][group_name] += 1
+        return nodes
+
     def _create_nodes_recursive(self, p_nodes: List[DocNode], p_name: str):
         for group_name in self._store.activated_groups():
             group = self._node_groups.get(group_name)
@@ -72,29 +89,29 @@ def _create_nodes_impl(self, p_nodes, group_name):
         t = self._node_groups[group_name]['transform']
         transform = AdaptiveTransform(t) if isinstance(t, list) or t.pattern else make_transform(t, group_name)
         nodes = transform.batch_forward(p_nodes, group_name)
-        self._store.update_nodes(nodes)
+        self._store.update_nodes(self._set_nodes_number(nodes))
         return nodes
 
-    def _get_or_create_nodes(self, group_name, ids: Optional[List[str]] = None):
-        nodes = self._store.get_nodes(group_name, ids) if self._store.is_group_active(group_name) else []
+    def _get_or_create_nodes(self, group_name, uids: Optional[List[str]] = None):
+        nodes = self._store.get_nodes(uids=uids, group=group_name) if self._store.is_group_active(group_name) else []
         if not nodes and group_name not in (LAZY_IMAGE_GROUP, LAZY_ROOT_NAME):
-            p_nodes = self._get_or_create_nodes(self._node_groups[group_name]['parent'], ids)
+            p_nodes = self._get_or_create_nodes(self._node_groups[group_name]['parent'], uids)
             nodes = self._create_nodes_impl(p_nodes, group_name)
         return nodes
 
-    def reparse(self, group_name: str, ids: Optional[List[str]] = None, doc_ids: Optional[List[str]] = None, **kwargs):
+    def reparse(self, group_name: str, uids: Optional[List[str]] = None, doc_ids: Optional[List[str]] = None, **kwargs):
         if doc_ids:
             self._reparse_docs(group_name=group_name, doc_ids=doc_ids, **kwargs)
         else:
-            self._get_or_create_nodes(group_name, ids)
+            self._get_or_create_nodes(group_name, uids)
 
     def _reparse_docs(self, group_name: str, doc_ids: List[str], doc_paths: List[str], metadatas: List[Dict]):
-        dataset_id = metadatas[0].get(RAG_DOC_KB_ID, None)
+        kb_id = metadatas[0].get(RAG_KB_ID, None)
         if group_name == "all":
-            self._store.remove_nodes(dataset_id=dataset_id, doc_ids=doc_ids)
+            self._store.remove_nodes(doc_ids=doc_ids, kb_id=kb_id)
             removed_flag = False
             for wait_time in fibonacci_backoff():
-                nodes = self._store.get_nodes(group_name=LAZY_ROOT_NAME, dataset_id=dataset_id, doc_ids=doc_ids)
+                nodes = self._store.get_nodes(group=LAZY_ROOT_NAME, kb_id=kb_id, doc_ids=doc_ids)
                 if not nodes:
                     removed_flag = True
                     break
@@ -103,18 +120,17 @@ def _reparse_docs(self, group_name: str, doc_ids: List[str], doc_paths: List[str
                 raise Exception(f"Failed to remove nodes for docs {doc_ids} from store")
             self.add_doc(input_files=doc_paths, ids=doc_ids, metadatas=metadatas)
         else:
-            p_nodes = self._store.get_nodes(
-                group_name=self._node_groups[group_name]['parent'], dataset_id=dataset_id, doc_ids=doc_ids
-            )
+            p_nodes = self._store.get_nodes(group=self._node_groups[group_name]['parent'],
+                                            kb_id=kb_id, doc_ids=doc_ids)
             self._reparse_group_recursive(p_nodes=p_nodes, cur_name=group_name, doc_ids=doc_ids)
 
     def _reparse_group_recursive(self, p_nodes: List[DocNode], cur_name: str, doc_ids: List[str]):
-        dataset_id = p_nodes[0].global_metadata.get(RAG_DOC_KB_ID, None)
-        self._store.remove_nodes(group_name=cur_name, dataset_id=dataset_id, doc_ids=doc_ids)
+        kb_id = p_nodes[0].global_metadata.get(RAG_KB_ID, None)
+        self._store.remove_nodes(group=cur_name, kb_id=kb_id, doc_ids=doc_ids)
 
         removed_flag = False
         for wait_time in fibonacci_backoff():
-            nodes = self._store.get_nodes(group_name=cur_name, dataset_id=dataset_id, doc_ids=doc_ids)
+            nodes = self._store.get_nodes(group=cur_name, kb_id=kb_id, doc_ids=doc_ids)
             if not nodes:
                 removed_flag = True
                 break
@@ -126,7 +142,7 @@ def _reparse_group_recursive(self, p_nodes: List[DocNode], cur_name: str, doc_id
         transform = AdaptiveTransform(t) if isinstance(t, list) or t.pattern else make_transform(t, cur_name)
         nodes = transform.batch_forward(p_nodes, cur_name)
         # reparse need set global_metadata
-        self._store.update_nodes(nodes)
+        self._store.update_nodes(self._set_nodes_number(nodes))
 
         for group_name in self._store.activated_groups():
             group = self._node_groups.get(group_name)
@@ -139,12 +155,9 @@ def _reparse_group_recursive(self, p_nodes: List[DocNode], cur_name: str, doc_id
     def update_doc_meta(self, doc_id: str, metadata: dict):
         self._store.update_doc_meta(doc_id=doc_id, metadata=metadata)
 
-    def delete_doc(self, doc_ids: List[str] = None, dataset_id: str = None) -> None:
+    def delete_doc(self, doc_ids: List[str] = None, kb_id: str = None) -> None:
         LOG.info(f"delete_doc_ids: {doc_ids}")
-        if dataset_id:
-            self._store.remove_nodes(dataset_id=dataset_id, doc_ids=doc_ids)
-        else:
-            self._store.remove_nodes(doc_ids=doc_ids)
+        self._store.remove_nodes(kb_id=kb_id, doc_ids=doc_ids)
 
 
 class FileInfo(BaseModel):
@@ -197,8 +210,13 @@ def __init__(self, server: bool):
             self._processors: Dict[str, _Processor] = dict()
             self._server = server
             self._inited = False
-            self._feedback_url = config['process_feedback_service']
-            self._path_prefix = config['process_path_prefix']
+            try:
+                self._feedback_url = config['process_feedback_service']
+                self._path_prefix = config['process_path_prefix']
+            except Exception as e:
+                LOG.warning(f"Failed to get config: {e}, use env variables instead")
+                self._feedback_url = os.getenv("PROCESS_FEEDBACK_SERVICE", None)
+                self._path_prefix = os.getenv("PROCESS_PATH_PREFIX", None)
 
         def _init_components(self, server: bool):
             if server and not self._inited:
@@ -217,9 +235,9 @@ def _init_components(self, server: bool):
                 self._worker_thread = threading.Thread(target=self._worker, daemon=True)
                 self._worker_thread.start()
             self._inited = True
-            LOG.info(f"[DocStore] init done. feedback {self._feedback_url}, prefix {self._path_prefix}")
+            LOG.info(f"[DocumentProcessor] init done. feedback {self._feedback_url}, prefix {self._path_prefix}")
 
-        def register_algorithm(self, name: str, store: StoreBase, reader: ReaderBase,
+        def register_algorithm(self, name: str, store: _DocumentStore, reader: ReaderBase,
                                node_groups: Dict[str, Dict], display_name: Optional[str] = None,
                                description: Optional[str] = None, force_refresh: bool = False):
             self._init_components(server=self._server)
@@ -307,7 +325,7 @@ def _upsert_records(self, engine, db_info, file_infos):
                     raw_infos = {"document_id": document_id, "file_name": os.path.basename(file_path),
                                  "file_path": file_path, "description": file_info["metadata"].get("description", None),
                                  "creater": file_info["metadata"].get("creater", None),
-                                 "dataset_id": file_info["metadata"].get(RAG_DOC_KB_ID, None),
+                                 "dataset_id": file_info["metadata"].get(RAG_KB_ID, None),
                                  "tags": file_info["metadata"].get("tags", []) or []}
                     infos = {}
                     for k, v in raw_infos.items():
@@ -577,7 +595,7 @@ def __init__(self, server: bool = True, port: int = None, url: str = None):
             if server:
                 self._impl = ServerModule(self._impl, port=port)
         else:
-            self._impl = UrlModule(url=url)
+            self._impl = UrlModule(url=ensure_call_endpoint(url))
 
     def _dispatch(self, method: str, *args, **kwargs):
         impl = self._impl
@@ -586,7 +604,7 @@ def _dispatch(self, method: str, *args, **kwargs):
         else:
             getattr(impl, method)(*args, **kwargs)
 
-    def register_algorithm(self, name: str, store: StoreBase, reader: ReaderBase, node_groups: Dict[str, Dict],
+    def register_algorithm(self, name: str, store: _DocumentStore, reader: ReaderBase, node_groups: Dict[str, Dict],
                            display_name: Optional[str] = None, description: Optional[str] = None,
                            force_refresh: bool = False, **kwargs):
         self._dispatch("register_algorithm", name, store, reader, node_groups,
diff --git a/lazyllm/tools/rag/document.py b/lazyllm/tools/rag/document.py
index 1f22ff6a1..ffc7f540c 100644
--- a/lazyllm/tools/rag/document.py
+++ b/lazyllm/tools/rag/document.py
@@ -1,6 +1,7 @@
 import os
 
 from typing import Callable, Optional, Dict, Union, List
+from functools import cached_property
 import lazyllm
 from lazyllm import ModuleBase, ServerModule, DynamicDescriptor, deprecated, OnlineChatModule, TrainableModule
 from lazyllm.launcher import LazyLLMLaunchersBase as Launcher
@@ -13,7 +14,7 @@
 from .doc_to_db import DocInfoSchema, DocToDbProcessor, extract_db_schema_from_files
 from .store import LAZY_ROOT_NAME, EMBED_DEFAULT_KEY
 from .index_base import IndexBase
-from .utils import DocListManager
+from .utils import DocListManager, ensure_call_endpoint
 from .global_metadata import GlobalMetadataDesc as DocField
 from .web import DocWebModule
 import copy
@@ -60,7 +61,7 @@ def __init__(self, dataset_path: Optional[str], embed: Optional[Union[Callable,
                 dataset_path, name, enable_path_monitoring=False if manager else True)
             self._kbs = CallableDict({name: DocImpl(
                 embed=self._embed, dlm=self._dlm, doc_files=doc_files, global_metadata_desc=doc_fields,
-                store_conf=store_conf, processor=processor, algo_name=name, display_name=display_name,
+                store=store_conf, processor=processor, algo_name=name, display_name=display_name,
                 description=description)})
 
             if manager: self._manager = ServerModule(DocManager(self._dlm), launcher=self._launcher)
@@ -96,10 +97,10 @@ def add_kb_group(self, name, doc_fields: Optional[Dict[str, DocField]] = None,
             embed = self._get_embeds(embed) if embed else self._embed
             if isinstance(self._kbs, ServerModule):
                 self._kbs._impl._m[name] = DocImpl(dlm=self._dlm, embed=embed, kb_group_name=name,
-                                                   global_metadata_desc=doc_fields, store_conf=store_conf)
+                                                   global_metadata_desc=doc_fields, store=store_conf)
             else:
                 self._kbs[name] = DocImpl(dlm=self._dlm, embed=self._embed, kb_group_name=name,
-                                          global_metadata_desc=doc_fields, store_conf=store_conf)
+                                          global_metadata_desc=doc_fields, store=store_conf)
             self._dlm.add_kb_group(name=name)
 
         def get_doc_by_kb_group(self, name):
@@ -116,7 +117,6 @@ def __call__(self, *args, **kw):
     def __new__(cls, *args, **kw):
         if url := kw.pop('url', None):
             name = kw.pop('name', None)
-            assert name, 'Document name must be provided with `url`'
             assert not args and not kw, 'Only `name` is supported with `url`'
             return UrlDocument(url, name)
         else:
@@ -160,7 +160,7 @@ def __init__(self, dataset_path: Optional[str] = None, embed: Optional[Union[Cal
                 processor._impl.start()
                 manager = False
                 assert name, '`Name` of Document is necessary when using cloud service'
-                assert store_conf['type'] != 'map', 'Cloud manager is not supported when using map store'
+                assert store_conf.get('type') != 'map', 'Cloud manager is not supported when using map store'
                 assert not dataset_path, 'Cloud manager is not supported with local dataset path'
             else:
                 cloud, processor = False, None
@@ -226,7 +226,7 @@ def compare_schema(old_schema: DocInfoSchema, new_schema: DocInfoSchema):
 
     def get_sql_manager(self):
         if self._doc_to_db_processor is None:
-            raise None
+            raise ValueError("Please call connect_sql_manager to init handler first")
         return self._doc_to_db_processor.sql_manager
 
     def extract_db_schema(
@@ -320,7 +320,7 @@ def find(self, target) -> Callable:
     def forward(self, *args, **kw) -> List[DocNode]:
         return self._forward('retrieve', *args, **kw)
 
-    def clear_cache(self, group_names: Optional[List[str]]) -> None:
+    def clear_cache(self, group_names: Optional[List[str]] = None) -> None:
         return self._forward('clear_cache', group_names)
 
     def _get_post_process_tasks(self):
@@ -331,11 +331,11 @@ def __repr__(self):
                                  server=isinstance(self._manager._kbs, ServerModule))
 
 class UrlDocument(ModuleBase):
-    def __init__(self, url: str, name: str):
+    def __init__(self, url: str, name: str = None):
         super().__init__()
         self._missing_keys = set(dir(Document)) - set(dir(UrlDocument))
-        self._manager = lazyllm.UrlModule(url=url)
-        self._curr_group = name
+        self._manager = lazyllm.UrlModule(url=ensure_call_endpoint(url))
+        self._curr_group = name or DocListManager.DEFAULT_GROUP_NAME
 
     def _forward(self, func_name: str, *args, **kwargs):
         args = (self._curr_group, func_name, *args)
@@ -347,7 +347,7 @@ def find(self, target) -> Callable:
     def forward(self, *args, **kw):
         return self._forward('retrieve', *args, **kw)
 
-    @functools.lru_cache
+    @cached_property
     def active_node_groups(self):
         return self._forward('active_node_groups')
 
diff --git a/lazyllm/tools/rag/global_metadata.py b/lazyllm/tools/rag/global_metadata.py
index 51d919cf5..fcd20d6e5 100644
--- a/lazyllm/tools/rag/global_metadata.py
+++ b/lazyllm/tools/rag/global_metadata.py
@@ -10,10 +10,10 @@ def __init__(self, data_type: int, element_type: Optional[int] = None,
         self.max_size = max_size
 
 # ---------------------------------------------------------------------------- #
-
+# RAG system metadata keys
+RAG_KB_ID = 'kb_id'
 RAG_DOC_ID = 'docid'
 RAG_DOC_PATH = 'lazyllm_doc_path'
-RAG_DOC_KB_ID = 'kb_id'
 RAG_DOC_FILE_NAME = 'file_name'
 RAG_DOC_FILE_TYPE = 'file_type'
 RAG_DOC_FILE_SIZE = 'file_size'
@@ -21,6 +21,6 @@ def __init__(self, data_type: int, element_type: Optional[int] = None,
 RAG_DOC_LAST_MODIFIED_DATE = 'last_modified_date'
 RAG_DOC_LAST_ACCESSED_DATE = 'last_accessed_date'
 
-RAG_SYSTEM_META_KEYS = set([RAG_DOC_ID, RAG_DOC_PATH, RAG_DOC_KB_ID, RAG_DOC_FILE_NAME, RAG_DOC_FILE_TYPE,
+RAG_SYSTEM_META_KEYS = set([RAG_DOC_ID, RAG_DOC_PATH, RAG_KB_ID, RAG_DOC_FILE_NAME, RAG_DOC_FILE_TYPE,
                             RAG_DOC_FILE_SIZE, RAG_DOC_CREATION_DATE, RAG_DOC_LAST_MODIFIED_DATE,
                             RAG_DOC_LAST_ACCESSED_DATE])
diff --git a/lazyllm/tools/rag/index_base.py b/lazyllm/tools/rag/index_base.py
index 81792fe75..04f2046ec 100644
--- a/lazyllm/tools/rag/index_base.py
+++ b/lazyllm/tools/rag/index_base.py
@@ -3,6 +3,7 @@
 from typing import List, Optional
 
 class IndexBase(ABC):
+    # TODO(chenjiahao): change params `nodes` to `segments`, index should be able to handle segments
     @abstractmethod
     def update(self, nodes: List[DocNode]) -> None:
         pass
diff --git a/lazyllm/tools/rag/readers/__init__.py b/lazyllm/tools/rag/readers/__init__.py
index 98dc5e1f9..80ee7be46 100644
--- a/lazyllm/tools/rag/readers/__init__.py
+++ b/lazyllm/tools/rag/readers/__init__.py
@@ -10,7 +10,7 @@
 from .mboxreader import MboxReader
 from .pandasReader import PandasCSVReader, PandasExcelReader
 from .videoAudioReader import VideoAudioReader
-from .magic_pdf_reader import MagicPDFReader
+from .mineru_pdf_reader import MineruPDFReader
 
 __all__ = [
     "ReaderBase",
@@ -28,5 +28,5 @@
     "PandasCSVReader",
     "PandasExcelReader",
     "VideoAudioReader",
-    "MagicPDFReader",
+    "MineruPDFReader",
 ]
diff --git a/lazyllm/tools/rag/readers/magic_pdf_reader.py b/lazyllm/tools/rag/readers/magic_pdf_reader.py
deleted file mode 100644
index 1fce97912..000000000
--- a/lazyllm/tools/rag/readers/magic_pdf_reader.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import os
-import copy
-from pathlib import Path
-from bs4 import BeautifulSoup
-from typing import Dict, List, Optional, Callable
-
-import unicodedata
-from ..doc_node import DocNode
-from lazyllm import LOG
-import requests
-
-class MagicPDFReader:
-
-    def __init__(self, magic_url, callback: Optional[Callable[[List[dict], Path, dict], List[DocNode]]] = None,
-                 upload_mode: bool = False):
-        self._magic_url = magic_url
-        self._upload_mode = upload_mode
-        if callback is not None:
-            self._callback = callback
-        else:
-            def default_callback(elements: List[dict], file: Path, extra_info: Optional[Dict] = None) -> List[DocNode]:
-                text_chunks = [el["text"] for el in elements if "text" in el]
-                return [DocNode(text="\n".join(text_chunks), metadata={"file_name": file.name})]
-            self._callback = default_callback
-
-    def __call__(self, file: Path, **kwargs) -> List[DocNode]:
-        try:
-            return self._load_data(file, **kwargs)
-        except Exception as e:
-            LOG.error(f"[MagicPDFReader] Error loading data from {file}: {e}")
-            return []
-
-    def _load_data(self, file: Path, extra_info: Optional[Dict] = None, **kwargs) -> List[DocNode]:
-        if isinstance(file, str):
-            file = Path(file)
-        if self._upload_mode:
-            elements = self._upload_parse_pdf_elements(file)
-        else:
-            elements = self._parse_pdf_elements(file)
-        docs: List[DocNode] = self._callback(elements, file, extra_info)
-        return docs
-
-    def _parse_pdf_elements(self, pdf_path: Path) -> List[dict]:
-        payload = {"files": [str(pdf_path)], "reserve_image": True}
-        try:
-            response = requests.post(self._magic_url, json=payload)
-            response.raise_for_status()
-            res = response.json()
-            if not isinstance(res, list) or not res:
-                LOG.info(f"[MagicPDFReader] No elements found in PDF: {pdf_path}")
-                return []
-        except requests.exceptions.RequestException as e:
-            LOG.error(f"[MagicPDFReader] POST failed: {e}")
-            return []
-        return self._extract_content_blocks(res[0])
-
-    def _upload_parse_pdf_elements(self, pdf_path: Path) -> List[dict]:
-        try:
-            with open(pdf_path, "rb") as f:
-                files = {'file': (os.path.basename(pdf_path), f)}
-                response = requests.post(self._magic_url, files=files)
-                response.raise_for_status()
-                res = response.json()
-                if not isinstance(res, list) or not res:
-                    LOG.info(f"[MagicPDFReader] No elements found in PDF: {pdf_path}")
-                    return []
-        except requests.exceptions.RequestException as e:
-            LOG.error(f"[MagicPDFReader] POST failed: {e}")
-            return []
-        return self._extract_content_blocks(res[0])
-
-    def _extract_content_blocks(self, content_list) -> List[dict]:  # noqa: C901
-        blocks = []
-        cur_title = ""
-        cur_level = -1
-        for content in content_list:
-            block = {}
-            block["bbox"] = content["bbox"]
-            block["lines"] = content["lines"] if 'lines' in content else []
-            for line in block['lines']:
-                line['content'] = self._clean_content(line['content'])
-            if content["type"] == "text":
-                content["text"] = self._clean_content(content["text"]).strip()
-                if not content["text"]:
-                    continue
-                if "text_level" in content:
-                    if cur_title and content["text_level"] > cur_level:
-                        content["title"] = cur_title
-                    cur_title = content["text"]
-                    cur_level = content["text_level"]
-                else:
-                    if cur_title:
-                        content["title"] = cur_title
-                block = copy.deepcopy(content)
-                block["page"] = content["page_idx"]
-                del block["page_idx"]
-                blocks.append(block)
-            elif content["type"] == "image":
-                if not content["img_path"]:
-                    continue
-                block["type"] = content["type"]
-                block["page"] = content["page_idx"]
-                block["image_path"] = os.path.basename(content["img_path"])
-                block['img_caption'] = self._clean_content(content['img_caption'])
-                block['img_footnote'] = self._clean_content(content['img_footnote'])
-                if cur_title:
-                    block["title"] = cur_title
-                img_title = block["img_caption"][0] if len(block["img_caption"]) > 0 else ""
-                block["text"] = f"![{img_title}]({block['image_path']})"
-                blocks.append(block)
-            elif content["type"] == "table":
-                block["type"] = content["type"]
-                block["page"] = content["page_idx"]
-                if self.extract_table:
-                    block["text"] = self._html_table_to_markdown(self._clean_content(content["table_body"])
-                                                                 ) if "table_body" in content else ""
-                else:
-                    block['image_path'] = os.path.basename(content['img_path'])
-                if cur_title:
-                    block["title"] = cur_title
-                block['table_caption'] = self._clean_content(content['table_caption'])
-                block['table_footnote'] = self._clean_content(content['table_footnote'])
-                blocks.append(block)
-        return blocks
-
-    def _clean_content(self, content) -> str:
-        if isinstance(content, str):
-            content = content.encode("utf-8", "replace").decode("utf-8")
-            return unicodedata.normalize("NFKC", content)
-        if isinstance(content, list):
-            return [self._clean_content(t) for t in content]
-        return content
-
-    def _html_table_to_markdown(self, html_table) -> str:  # noqa: C901
-        try:
-            soup = BeautifulSoup(html_table.strip(), 'html.parser')
-            table = soup.find('table')
-            if not table:
-                raise ValueError("No <table> found in the HTML.")
-
-            rows = []
-            max_cols = 0
-
-            for row in table.find_all('tr'):
-                cells = []
-                for cell in row.find_all(['td', 'th']):
-                    rowspan = int(cell.get('rowspan', 1))
-                    colspan = int(cell.get('colspan', 1))
-                    text = cell.get_text(strip=True)
-
-                    for _ in range(colspan):
-                        cells.append({'text': text, 'rowspan': rowspan})
-                rows.append(cells)
-                max_cols = max(max_cols, len(cells))
-
-            expanded_rows = []
-            rowspan_tracker = [0] * max_cols
-            for row in rows:
-                expanded_row = []
-                col_idx = 0
-                for cell in row:
-                    while col_idx < max_cols and rowspan_tracker[col_idx] > 0:
-                        expanded_row.append(None)
-                        rowspan_tracker[col_idx] -= 1
-                        col_idx += 1
-
-                    expanded_row.append(cell['text'])
-                    if cell['rowspan'] > 1:
-                        rowspan_tracker[col_idx] = cell['rowspan'] - 1
-                    col_idx += 1
-
-                while col_idx < max_cols:
-                    if rowspan_tracker[col_idx] > 0:
-                        expanded_row.append(None)
-                        rowspan_tracker[col_idx] -= 1
-                    else:
-                        expanded_row.append("")
-                    col_idx += 1
-
-                expanded_rows.append(expanded_row)
-
-            markdown = ''
-            if not expanded_rows:
-                return ""
-
-            headers = expanded_rows[0]
-            body_rows = expanded_rows[1:]
-            if headers:
-                markdown += '| ' + ' | '.join(h if h else '' for h in headers) + ' |\n'
-                markdown += '| ' + ' | '.join(['-' * (len(h) if h else 3) for h in headers]) + ' |\n'
-            for row in body_rows:
-                markdown += '| ' + ' | '.join(cell if cell else '' for cell in row) + ' |\n'
-
-            return markdown
-
-        except Exception as e:
-            LOG.error(f"Error parsing table: {e}")
-            return ''
diff --git a/lazyllm/tools/rag/readers/mineru_pdf_reader.py b/lazyllm/tools/rag/readers/mineru_pdf_reader.py
new file mode 100644
index 000000000..b66c89228
--- /dev/null
+++ b/lazyllm/tools/rag/readers/mineru_pdf_reader.py
@@ -0,0 +1,241 @@
+import os
+import requests
+from pathlib import Path
+from bs4 import BeautifulSoup
+from typing import Dict, List, Optional, Callable
+import unicodedata
+
+from lazyllm import LOG
+from ..doc_node import DocNode
+from .readerBase import LazyLLMReaderBase
+
+
+class MineruPDFReader(LazyLLMReaderBase):
+    def __init__(self, url, backend='pipeline',
+                 callback: Optional[Callable[[List[dict], Path, dict], List[DocNode]]] = None,
+                 upload_mode: bool = False,
+                 extract_table: bool = True,
+                 extract_formula: bool = True,
+                 split_doc: bool = True,
+                 post_func: Optional[Callable] = None,
+                 return_trace: bool = True):
+        super().__init__(return_trace=return_trace)
+        self._url = url + '/api/v1/pdf_parse'
+        self._upload_mode = upload_mode
+        self._backend = backend
+        self._extract_table = extract_table
+        self._extract_formula = extract_formula
+        self._split_doc = split_doc
+        self._post_func = post_func
+
+    def _load_data(self, file: Path, extra_info: Optional[Dict] = None,
+                   use_cache: bool = True, **kwargs) -> List[DocNode]:
+        try:
+            if isinstance(file, str):
+                file = Path(file)
+            elements = self._parse_pdf_elements(file, use_cache=use_cache)
+            docs = self._build_nodes(elements, file, extra_info)
+
+            if self._post_func:
+                docs = self._post_func(docs)
+                assert isinstance(docs, list), f'Expected list, got {type(docs)}, please check your post function'
+                for node in docs:
+                    assert isinstance(node, DocNode), f'Expected DocNode, got {type(node)}, \
+                        please check your post function'
+                    node.global_metadata = extra_info
+            return docs
+        except Exception as e:
+            LOG.error(f'[MineruPDFReader] Error loading data from {file}: {e}')
+            return []
+
+    def _parse_pdf_elements(self, pdf_path: Path, use_cache: bool = True) -> List[dict]:
+        payload = {'return_content_list': True,
+                   'use_cache': use_cache,
+                   'backend': self._backend,
+                   'table_enable': self._extract_table,
+                   'formula_enable': self._extract_formula}
+        try:
+            if not self._upload_mode:
+                payload['files'] = [str(pdf_path)]
+                response = requests.post(self._url, data=payload)
+            else:
+                with open(pdf_path, 'rb') as f:
+                    files = {'upload_files': (os.path.basename(pdf_path), f)}
+                    response = requests.post(self._url, data=payload, files=files)
+            response.raise_for_status()
+            res = response.json()
+            if not isinstance(res, dict) or not res.get('result'):
+                LOG.error(f'[MineruPDFReader] Invalid response: {res}')
+                return []
+            res = res['result'][0].get('content_list', [])
+            if not res:
+                LOG.warning(f'[MineruPDFReader] No elements found in PDF: {pdf_path}')
+                return []
+        except requests.exceptions.RequestException as e:
+            LOG.error(f'[MineruPDFReader] POST failed: {e}')
+            return []
+        res = self._extract_content_blocks(res)
+        return res
+
+    def _extract_content_blocks(self, content_list) -> List[dict]:  # noqa: C901
+        blocks = []
+        cur_title = ''
+        cur_level = -1
+        for content in content_list:
+            block = {}
+            block['bbox'] = content.get('bbox', [])
+            block['type'] = content.get('type', 'text')
+            block['page'] = content.get('page_idx', 0)
+            block['lines'] = content['lines'] if 'lines' in content else []
+            for line in block['lines']:
+                if 'content' in line:
+                    line['content'] = self._normalize_content_recursively(line['content'])
+            if content['type'] == 'text':
+                block['text'] = self._normalize_content_recursively(content['text']).strip()
+                if not content['text'].strip():
+                    continue
+                if 'text_level' in content:
+                    if cur_title and content['text_level'] > cur_level:
+                        block['title'] = cur_title
+                    cur_title = content['text']
+                    cur_level = content['text_level']
+                    block['text_level'] = content['text_level']
+                else:
+                    if cur_title:
+                        block['title'] = cur_title
+                blocks.append(block)
+            elif content['type'] == 'image':
+                if not content.get('img_path', None):
+                    continue
+                block['image_path'] = content['img_path']
+                block['img_caption'] = '\n'.join(self._normalize_content_recursively(content.get('image_caption', [])))
+                block['img_footnote'] = '\n'.join(self._normalize_content_recursively(content.get('image_footnote', [])))
+                if cur_title:
+                    block['title'] = cur_title
+                block['text'] = f'![{block["img_caption"]}]({block["image_path"]})'
+                block['text'] += f'\n{block["img_footnote"]}\n' if block['img_footnote'] else '\n'
+                blocks.append(block)
+            elif content['type'] == 'table':
+                if self._extract_table:
+                    block['text'] = self._html_table_to_markdown(
+                        self._normalize_content_recursively(content.get('table_body', '')))
+                    block['table_caption'] = '\n'.join(
+                        self._normalize_content_recursively(content.get('table_caption', [])))
+                    block['table_footnote'] = '\n'.join(
+                        self._normalize_content_recursively(content.get('table_footnote', [])))
+                    if block.get('text', None):
+                        block['text'] = f'{block["table_caption"]}\n{block["text"]}'.lstrip('\n')
+                        block['text'] += f'\n{block["table_footnote"]}\n' if block['table_footnote'] else '\n'
+                else:
+                    block['image_path'] = content.get('img_path', '')
+                    block['text'] = f'![table]({block["image_path"]})'
+                if cur_title:
+                    block['title'] = cur_title
+                blocks.append(block)
+            elif content['type'] == 'equation':
+                if self._extract_formula:
+                    block['text'] = content.get('text', '')
+                else:
+                    block['image_path'] = content.get('img_path', '')
+                    if not block['image_path']:
+                        continue
+                    block['text'] = f'![formula]({block["image_path"]})'
+                if cur_title:
+                    block['title'] = cur_title
+                blocks.append(block)
+        return blocks
+
+    def _normalize_content_recursively(self, content) -> str:
+        if isinstance(content, str):
+            content = content.encode("utf-8", "replace").decode("utf-8")
+            return unicodedata.normalize("NFKC", content)
+        if isinstance(content, list):
+            return [self._normalize_content_recursively(t) for t in content]
+        return content
+
+    def _html_table_to_markdown(self, html_table) -> str:  # noqa: C901
+        if not html_table:
+            return ''
+        try:
+            soup = BeautifulSoup(html_table.strip(), 'html.parser')
+            table = soup.find('table')
+            if not table:
+                raise ValueError('No <table> found in the HTML.')
+
+            rows = []
+            max_cols = 0
+
+            for row in table.find_all('tr'):
+                cells = []
+                for cell in row.find_all(['td', 'th']):
+                    rowspan = int(cell.get('rowspan', 1))
+                    colspan = int(cell.get('colspan', 1))
+                    text = cell.get_text(strip=True)
+
+                    for _ in range(colspan):
+                        cells.append({'text': text, 'rowspan': rowspan})
+                rows.append(cells)
+                max_cols = max(max_cols, len(cells))
+
+            expanded_rows = []
+            rowspan_tracker = [0] * max_cols
+            for row in rows:
+                expanded_row = []
+                col_idx = 0
+                for cell in row:
+                    while col_idx < max_cols and rowspan_tracker[col_idx] > 0:
+                        expanded_row.append(None)
+                        rowspan_tracker[col_idx] -= 1
+                        col_idx += 1
+
+                    expanded_row.append(cell['text'])
+                    if cell['rowspan'] > 1:
+                        rowspan_tracker[col_idx] = cell['rowspan'] - 1
+                    col_idx += 1
+
+                while col_idx < max_cols:
+                    if rowspan_tracker[col_idx] > 0:
+                        expanded_row.append(None)
+                        rowspan_tracker[col_idx] -= 1
+                    else:
+                        expanded_row.append('')
+                    col_idx += 1
+
+                expanded_rows.append(expanded_row)
+
+            markdown = ''
+            if not expanded_rows:
+                return ''
+
+            headers = expanded_rows[0]
+            body_rows = expanded_rows[1:]
+            if headers:
+                markdown += '| ' + ' | '.join(h if h else '' for h in headers) + ' |\n'
+                markdown += '| ' + ' | '.join(['-' * (len(h) if h else 3) for h in headers]) + ' |\n'
+            for row in body_rows:
+                markdown += '| ' + ' | '.join(cell if cell else '' for cell in row) + ' |\n'
+
+            return markdown
+
+        except Exception as e:
+            LOG.error(f'Error parsing table: {e}')
+            return str(html_table)
+
+    def _build_nodes(self, elements: List[dict], file: Path, extra_info: Optional[Dict] = None) -> List[DocNode]:
+        docs = []
+        if self._split_doc:
+            for e in elements:
+                metadata = {'file_name': file.name}
+                metadata.update({k: v for k, v in e.items() if k != 'text'})
+                metadata.update({'file_path': str(file)})
+                node = DocNode(text=e.get('text', ''), metadata=metadata, global_metadata=extra_info)
+                node.excluded_embed_metadata_keys = ['type', 'index', 'text_level', 'bbox', 'lines']
+                node.excluded_llm_metadata_keys = ['type', 'index', 'text_level', 'bbox', 'lines']
+                docs.append(node)
+        else:
+            text_chunks = [el['text'] for el in elements if 'text' in el]
+            nodes = DocNode(text='\n'.join(text_chunks), metadata={'file_name': file.name})
+            nodes.excluded_embed_metadata_keys = ['type', 'index', 'text_level', 'bbox', 'lines']
+            nodes.excluded_llm_metadata_keys = ['type', 'index', 'text_level', 'bbox', 'lines']
+            docs.append(nodes)
+        return docs
diff --git a/lazyllm/tools/rag/rerank.py b/lazyllm/tools/rag/rerank.py
index 995f1a042..09d1c35f9 100644
--- a/lazyllm/tools/rag/rerank.py
+++ b/lazyllm/tools/rag/rerank.py
@@ -75,13 +75,8 @@ def get_nlp_and_matchers(language):
 
 
 @Reranker.register_reranker
-def KeywordFilter(
-    node: DocNode,
-    required_keys: List[str] = [],
-    exclude_keys: List[str] = [],
-    language: str = "en",
-    **kwargs,
-) -> Optional[DocNode]:
+def KeywordFilter(node: DocNode, required_keys: Optional[List[str]] = None, exclude_keys: Optional[List[str]] = None,
+                  language: str = "en", **kwargs) -> Optional[DocNode]:
     assert required_keys or exclude_keys, 'One of required_keys or exclude_keys should be provided'
     nlp, required_matcher, exclude_matcher = get_nlp_and_matchers(language)
     if required_keys:
diff --git a/lazyllm/tools/rag/retriever.py b/lazyllm/tools/rag/retriever.py
index 0f4229f22..ccfe172b2 100644
--- a/lazyllm/tools/rag/retriever.py
+++ b/lazyllm/tools/rag/retriever.py
@@ -103,7 +103,7 @@ def add_subretriever(self, group: str, **kwargs):
         self._node_groups.append((group, kwargs))
         return self
 
-    @functools.lru_cache
+    @functools.lru_cache    # noqa: B019
     def _get_retrievers(self, doc_files: List[str]):
         active_node_groups = self._node_groups or [[Document.MediumChunk,
                                                     dict(similarity=('cosine' if self._embed else 'bm25'))]]
diff --git a/lazyllm/tools/rag/store/__init__.py b/lazyllm/tools/rag/store/__init__.py
index d45255f39..33d110ff6 100644
--- a/lazyllm/tools/rag/store/__init__.py
+++ b/lazyllm/tools/rag/store/__init__.py
@@ -1,21 +1,24 @@
 from .store_base import (
-    StoreBase,
+    LazyLLMStoreBase,
     LAZY_IMAGE_GROUP,
     LAZY_ROOT_NAME,
-    EMBED_DEFAULT_KEY
+    EMBED_DEFAULT_KEY,
+    BUILDIN_GLOBAL_META_DESC
 )
-from .sensecore_store import SenseCoreStore
-from .map_store import MapStore
-from .chroma_store import ChromadbStore
-from .milvus_store import MilvusStore
+from .hybrid import HybridStore, MapStore, SenseCoreStore
+from .segment import OpenSearchStore
+from .vector import ChromadbStore, MilvusStore
 
 __all__ = [
-    'StoreBase',
-    'SenseCoreStore',
+    'LazyLLMStoreBase',
+    'HybridStore',
     'MapStore',
+    'OpenSearchStore',
     'ChromadbStore',
     'MilvusStore',
+    'SenseCoreStore',
     'LAZY_IMAGE_GROUP',
     'LAZY_ROOT_NAME',
     'EMBED_DEFAULT_KEY',
+    'BUILDIN_GLOBAL_META_DESC'
 ]
diff --git a/lazyllm/tools/rag/store/chroma_store.py b/lazyllm/tools/rag/store/chroma_store.py
deleted file mode 100644
index d3078a89c..000000000
--- a/lazyllm/tools/rag/store/chroma_store.py
+++ /dev/null
@@ -1,216 +0,0 @@
-from collections import defaultdict
-from typing import Any, Dict, List, Optional, Callable, Set, Union
-
-from .store_base import StoreBase, LAZY_ROOT_NAME
-from .map_store import MapStore
-
-from ..doc_node import DocNode
-from ..index_base import IndexBase
-from ..default_index import DefaultIndex
-from ..utils import sparse2normal
-
-from lazyllm import LOG
-from lazyllm.common import override, obj2str, str2obj
-from lazyllm.thirdparty import chromadb
-
-
-class ChromadbStore(StoreBase):
-    def __init__(self, group_embed_keys: Dict[str, Set[str]], embed: Dict[str, Callable],
-                 embed_dims: Dict[str, int], dir: str, **kwargs) -> None:
-        self._db_client = chromadb.PersistentClient(path=dir)
-        LOG.success(f"Initialzed chromadb in path: {dir}")
-        node_groups = list(group_embed_keys.keys())
-        self._collections: Dict[str, chromadb.api.models.Collection.Collection] = {
-            group: self._db_client.get_or_create_collection(group)
-            for group in node_groups
-        }
-
-        self._map_store = MapStore(node_groups=node_groups, embed=embed)
-        self._load_store(embed_dims)
-
-        self._name2index = {
-            'default': DefaultIndex(embed, self._map_store),
-        }
-
-    @override
-    def update_nodes(self, nodes: List[DocNode]) -> None:
-        self._map_store.update_nodes(nodes)
-        self._save_nodes(nodes)
-
-    @override
-    def remove_nodes(self, doc_ids: List[str], group_name: Optional[str] = None,
-                     uids: Optional[List[str]] = None) -> None:
-        nodes = self._map_store.get_nodes(group_name=group_name, doc_ids=doc_ids, uids=uids)
-        group2uids = defaultdict(list)
-        for node in nodes:
-            group2uids[node._group].append(node._uid)
-        for group, uids in group2uids.items():
-            self._delete_group_nodes(group, uids)
-            self._map_store.remove_nodes(doc_ids=doc_ids, uids=uids)
-
-    @override
-    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
-        self._map_store.update_doc_meta(doc_id=doc_id, metadata=metadata)
-        for group in self.activated_groups():
-            nodes = self.get_nodes(group_name=group, doc_ids=[doc_id])
-            self._save_nodes(nodes)
-
-    @override
-    def get_nodes(self, group_name: Optional[str] = None, uids: Optional[List[str]] = None,
-                  doc_ids: Optional[Set] = None, **kwargs) -> List[DocNode]:
-        return self._map_store.get_nodes(group_name, uids, doc_ids, **kwargs)
-
-    @override
-    def activate_group(self, group_names: Union[str, List[str]]) -> bool:
-        return self._map_store.activate_group(group_names)
-
-    @override
-    def activated_groups(self):
-        return self._map_store.activated_groups()
-
-    @override
-    def is_group_active(self, name: str) -> bool:
-        return self._map_store.is_group_active(name)
-
-    @override
-    def all_groups(self) -> List[str]:
-        return self._map_store.all_groups()
-
-    @override
-    def query(self, *args, **kwargs) -> List[DocNode]:
-        return self.get_index('default').query(*args, **kwargs)
-
-    @override
-    def register_index(self, type: str, index: IndexBase) -> None:
-        self._name2index[type] = index
-
-    @override
-    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
-        if type is None:
-            type = 'default'
-        return self._name2index.get(type)
-
-    @override
-    def clear_cache(self, group_names: Optional[List[str]] = None):
-        if group_names is None:
-            for group_name in self.activated_groups():
-                self._db_client.delete_collection(name=group_name)
-            self._collections.clear()
-            self._map_store.clear_cache()
-        elif isinstance(group_names, str):
-            group_names = [group_names]
-        elif isinstance(group_names, (tuple, list, set)):
-            group_names = list(group_names)
-        else:
-            raise TypeError(f"Invalid type {type(group_names)} for group_names, expected list of str")
-        for group_name in group_names:
-            self._db_client.delete_collection(name=group_name)
-        self._map_store.clear_cache(group_names)
-
-    def _load_store(self, embed_dims: Dict[str, int]) -> None:
-        if not self._collections[LAZY_ROOT_NAME].peek(1)["ids"]:
-            LOG.info("No persistent data found, skip the rebuilding phrase.")
-            return
-
-        # Restore all nodes
-        uid2node = {}
-        for group in self._collections.keys():
-            results = self._peek_all_documents(group)
-            nodes = self._build_nodes_from_chroma(results, embed_dims)
-            for node in nodes:
-                uid2node[node._uid] = node
-
-        # Rebuild relationships
-        for node in uid2node.values():
-            if node.parent:
-                parent_uid = node.parent
-                parent_node = uid2node.get(parent_uid)
-                node.parent = parent_node
-                parent_node.children[node._group].append(node)
-        LOG.debug(f"build {group} nodes from chromadb: {nodes}")
-
-        self._map_store.update_nodes(list(uid2node.values()))
-        LOG.success("Successfully Built nodes from chromadb.")
-
-    def _save_nodes(self, nodes: List[DocNode]) -> None:
-        if not nodes:
-            return
-        # Note: It's caller's duty to make sure this batch of nodes has the same group.
-        group = nodes[0]._group
-        ids, embeddings, metadatas, documents = [], [], [], []
-        collection = self._collections.get(group)
-        assert (
-            collection
-        ), f"Group {group} is not found in collections {self._collections}"
-        for node in nodes:
-            metadata = self._make_chroma_metadata(node)
-            ids.append(node._uid)
-            embeddings.append([0])  # we don't use chroma for retrieving
-            metadatas.append(metadata)
-            documents.append(obj2str(node._content))
-        if ids:
-            collection.upsert(
-                embeddings=embeddings,
-                ids=ids,
-                metadatas=metadatas,
-                documents=documents,
-            )
-            LOG.debug(f"Saved {group} nodes {ids} to chromadb.")
-
-    def _delete_group_nodes(self, group_name: str, uids: List[str]) -> None:
-        collection = self._collections.get(group_name)
-        if collection:
-            collection.delete(ids=uids)
-
-    def _build_nodes_from_chroma(self, results: Dict[str, List], embed_dims: Dict[str, int]) -> List[DocNode]:
-        nodes: List[DocNode] = []
-        for i, uid in enumerate(results['ids']):
-            chroma_metadata = results['metadatas'][i]
-
-            parent = chroma_metadata['parent']
-            local_metadata = str2obj(chroma_metadata['metadata'])
-            global_metadata = str2obj(chroma_metadata['global_metadata']) if not parent else None
-
-            node = DocNode(
-                uid=uid,
-                content=str2obj(results["documents"][i]),
-                group=chroma_metadata["group"],
-                embedding=str2obj(chroma_metadata['embedding']),
-                parent=parent,
-                metadata=local_metadata,
-                global_metadata=global_metadata,
-            )
-
-            if node.embedding:
-                # convert sparse embedding to List[float]
-                new_embedding_dict = {}
-                for key, embedding in node.embedding.items():
-                    if isinstance(embedding, dict):
-                        dim = embed_dims.get(key)
-                        if not dim:
-                            raise ValueError(f'dim of embed [{key}] is not determined.')
-                        new_embedding_dict[key] = sparse2normal(embedding, dim)
-                    else:
-                        new_embedding_dict[key] = embedding
-                node.embedding = new_embedding_dict
-
-            nodes.append(node)
-        return nodes
-
-    def _make_chroma_metadata(self, node: DocNode) -> Dict[str, Any]:
-        metadata = {
-            "group": node._group,
-            "parent": node.parent._uid if node.parent else "",
-            "embedding": obj2str(node.embedding),
-            "metadata": obj2str(node._metadata),
-        }
-
-        if node.is_root_node:
-            metadata["global_metadata"] = obj2str(node.global_metadata)
-
-        return metadata
-
-    def _peek_all_documents(self, group: str) -> Dict[str, List]:
-        assert group in self._collections, f"group {group} not found."
-        collection = self._collections[group]
-        return collection.peek(collection.count())
diff --git a/lazyllm/tools/rag/store/document_store.py b/lazyllm/tools/rag/store/document_store.py
new file mode 100644
index 000000000..b8b55225c
--- /dev/null
+++ b/lazyllm/tools/rag/store/document_store.py
@@ -0,0 +1,389 @@
+import os
+import lazyllm
+import traceback
+
+from collections import defaultdict
+from typing import Optional, List, Union, Set, Dict, Callable, Any, Tuple
+from pathlib import Path
+from lazyllm import LOG, once_wrapper
+
+from .store_base import (LazyLLMStoreBase, StoreCapability, SegmentType, Segment, INSERT_BATCH_SIZE,
+                         BUILDIN_GLOBAL_META_DESC, DEFAULT_KB_ID)
+from .hybrid import HybridStore, MapStore
+from ..default_index import DefaultIndex
+from ..utils import parallel_do_embedding
+
+from ..doc_node import DocNode, QADocNode, ImageDocNode
+from ..index_base import IndexBase
+from ..data_type import DataType
+from ..global_metadata import GlobalMetadataDesc, RAG_DOC_ID, RAG_KB_ID
+from ..similarity import registered_similarities
+
+
+class _DocumentStore(object):
+    def __init__(self, algo_name: str, store: Union[Dict, LazyLLMStoreBase],
+                 group_embed_keys: Optional[Dict[str, Set[str]]] = None, embed: Optional[Dict[str, Callable]] = None,
+                 embed_dims: Optional[Dict[str, int]] = None, embed_datatypes: Optional[Dict[str, DataType]] = None,
+                 global_metadata_desc: Optional[Dict[str, GlobalMetadataDesc]] = None):
+        self._algo_name = algo_name
+        self._group_embed_keys = group_embed_keys
+        self._embed = embed
+        self._embed_dims = embed_dims
+        self._embed_datatypes = embed_datatypes
+        self._global_metadata_desc = (global_metadata_desc or {}) | BUILDIN_GLOBAL_META_DESC
+        self._activated_groups = set()
+        self._indices = {}
+        self._impl = self._prepare_store(store)
+        if self._impl.supports_index_registration:
+            self._indices['default'] = DefaultIndex(self._embed, self)
+
+    def _prepare_store(self, store: Union[Dict, LazyLLMStoreBase]) -> LazyLLMStoreBase:
+        if isinstance(store, dict):
+            # create store from store config
+            if store.get('indices'): store = self._convert_legacy_to_config(store)
+            store = self._create_store_from_config(store)
+        if store.capability == StoreCapability.VECTOR:
+            segment_store = MapStore(uri=os.path.join(store.dir, 'segments.db') if store.dir else None)
+            return HybridStore(segment_store=segment_store, vector_store=store)
+        return store
+
+    def _make_store(self, cfg: Dict[str, Any]) -> LazyLLMStoreBase:
+        if not cfg: return None
+        stype = cfg.get('type')
+        cls = getattr(lazyllm.store, stype, None)
+        if not cls:
+            raise NotImplementedError(f'Not implemented store type: {stype}')
+        return cls(**cfg.get('kwargs', {}))
+
+    def _convert_legacy_to_config(self, cfg: Dict[str, Any]) -> Dict[str, Any]:
+        indices = cfg.pop('indices')
+        if indices.get('smart_embedding_index'):
+            LOG.warning('[_DocumentStore] `smart_embedding_index` is deprecated, converted to milvus type config')
+        else:
+            raise ValueError(f'[_DocumentStore] Unsupported index type: {indices.keys()}')
+        index_config = indices.get('smart_embedding_index')
+        backend = index_config.get('backend')
+        if not backend:
+            raise ValueError('backend is required in indices')
+        cfg = {'type': backend, 'kwargs': index_config.get('kwargs', {})}
+        return cfg
+
+    def _normalize_store_config(self, cfg: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        if 'type' in cfg:
+            store_type = cfg['type']
+            cls = getattr(lazyllm.store, store_type, None)
+            if not cls:
+                raise NotImplementedError(f"Not implemented store type: {store_type}")
+            cap = getattr(cls, "capability", None)
+            if cap is None:
+                raise AttributeError(f"{cls.__name__} must define class attribute 'capability'")
+
+            if cap in (StoreCapability.ALL, StoreCapability.SEGMENT):
+                return cfg, {}
+            elif cap == StoreCapability.VECTOR:
+                return {}, cfg
+            else:
+                raise ValueError(f"Unsupported capability {cap} for {cls.__name__}")
+        return cfg.get('segment_store', {}) or {}, cfg.get('vector_store', {}) or {}
+
+    def _create_store_from_config(self, cfg: Optional[Dict[str, Any]] = None) -> LazyLLMStoreBase:
+        seg_cfg, vec_cfg = self._normalize_store_config(cfg)
+
+        seg_store = self._make_store(seg_cfg)
+        vec_store = self._make_store(vec_cfg)
+
+        if not seg_store and not vec_store:
+            raise ValueError("Provide either 'type' or 'segment_store'/'vector_store' in config.")
+
+        if seg_store:
+            assert seg_store.capability in (StoreCapability.ALL, StoreCapability.SEGMENT), \
+                'Segment store must be a segment store'
+        if vec_store:
+            assert vec_store.capability in (StoreCapability.ALL, StoreCapability.VECTOR), \
+                'Vector store must be a vector store'
+
+        if seg_store and vec_store:
+            return HybridStore(segment_store=seg_store, vector_store=vec_store)
+
+        if seg_store and not vec_store:
+            return seg_store
+        if vec_store and not seg_store:
+            if vec_store.capability == StoreCapability.VECTOR:
+                db_path = getattr(vec_store, "dir", None)
+                if db_path:
+                    p = Path(db_path)
+                    segment_uri = str(p.with_name(f"lazyllm_{p.stem}_segments.db"))
+                else:
+                    segment_uri = None
+                segment_store = MapStore(uri=segment_uri)
+                return HybridStore(segment_store=segment_store, vector_store=vec_store)
+            return vec_store
+        # should not reach here
+        raise RuntimeError("Unexpected store creation state")
+
+    @once_wrapper(reset_on_pickle=True)
+    def _lazy_init(self):
+        self._impl.connect(embed_dims=self._embed_dims, embed_datatypes=self._embed_datatypes,
+                           global_metadata_desc=self._global_metadata_desc,
+                           collections=[self._gen_collection_name(group) for group in self.activated_groups()])
+
+    @property
+    def impl(self):
+        self._lazy_init()
+        return self._impl
+
+    def activate_group(self, groups: Union[str, List[str]]) -> bool:
+        if isinstance(groups, str):
+            groups = [groups]
+        for group in groups:
+            if group not in self._activated_groups:
+                self._activated_groups.add(group)
+        return True
+
+    def activated_groups(self) -> List[str]:
+        return list(self._activated_groups)
+
+    def is_group_active(self, group: str) -> bool:
+        return group in self._activated_groups
+
+    def is_group_empty(self, group: str) -> bool:
+        return not self.impl.get(self._gen_collection_name(group), {})
+
+    def update_nodes(self, nodes: List[DocNode]):   # noqa: C901
+        if not nodes:
+            return
+        try:
+            if self._embed and self.impl.capability == StoreCapability.SEGMENT:
+                LOG.warning(f'[_DocumentStore - {self._algo_name}] Embed is provided'
+                            f' but store {self.impl} does not support embedding')
+            if self.impl.need_embedding:
+                parallel_do_embedding(self._embed, [], nodes, self._group_embed_keys)
+            group_segments = defaultdict(list)
+            for node in nodes:
+                group_segments[node._group].append(self._serialize_node(node))
+            # upsert batch segments
+            for group, segments in group_segments.items():
+                if not self.is_group_active(group):
+                    LOG.warning(f'[_DocumentStore - {self._algo_name}] Group {group} is not active, skip')
+                    continue
+                for i in range(0, len(segments), INSERT_BATCH_SIZE):
+                    self.impl.upsert(self._gen_collection_name(group), segments[i:i + INSERT_BATCH_SIZE])
+            # update indices
+            for index in self._indices.values():
+                index.update(nodes)
+        except Exception as e:
+            LOG.error(f'[_DocumentStore - {self._algo_name}] Failed to update nodes: {e}')
+            LOG.error(traceback.format_exc())
+            raise
+
+    def remove_nodes(self, uids: Optional[List[str]] = None, doc_ids: Optional[Set] = None,
+                     group: Optional[str] = None, kb_id: Optional[str] = None, **kwargs) -> None:
+        # remove a set of nodes by uids
+        # remove the nodes of the whole file -- doc ids only
+        # remove the nodes of a certain group for one file -- doc ids and group (kb_id is optional)
+        # forbid to remove the nodes from multiple kb
+        try:
+            criteria = {}
+            if uids:
+                criteria = {'uid': uids}
+            if doc_ids:
+                criteria[RAG_DOC_ID] = doc_ids
+            if kb_id:
+                criteria[RAG_KB_ID] = kb_id
+            if not group:
+                groups = self._activated_groups
+            else:
+                groups = [group]
+            for group in groups:
+                if not self.is_group_active(group):
+                    LOG.warning(f'[_DocumentStore - {self._algo_name}] Group {group} is not active, skip')
+                    continue
+                self.impl.delete(self._gen_collection_name(group), criteria)
+            # update indices
+            for index in self._indices.values():
+                index.remove(uids, group)
+        except Exception as e:
+            LOG.error(f'[_DocumentStore - {self._algo_name}] Failed to remove nodes: {e}')
+            raise
+
+    def get_nodes(self, uids: Optional[List[str]] = None, doc_ids: Optional[Set] = None,
+                  group: Optional[str] = None, kb_id: Optional[str] = None, **kwargs) -> List[DocNode]:
+        try:
+            segments = self.get_segments(uids, doc_ids, group, kb_id, **kwargs)
+            return [self._deserialize_node(segment) for segment in segments]
+        except Exception as e:
+            LOG.error(f'[_DocumentStore - {self._algo_name}] Failed to get nodes: {e}')
+            raise
+
+    def get_segments(self, uids: Optional[List[str]] = None, doc_ids: Optional[Set] = None,
+                     group: Optional[str] = None, kb_id: Optional[str] = None, **kwargs) -> List[dict]:
+        # get a set of segments by uids
+        # get the segments of the whole file -- doc ids only
+        # get the segments of a certain group for one file -- doc ids and group (kb_id is optional)
+        # forbid to get the segments from multiple kb (only one kb_id is allowed)
+        # TODO: pagination
+        try:
+            criteria = {}
+            if uids:
+                criteria = {'uid': uids}
+            if doc_ids:
+                criteria = {RAG_DOC_ID: doc_ids}
+            if kb_id:
+                criteria[RAG_KB_ID] = kb_id
+            # for find method, parent id should be in the criteria
+            if kwargs.get('parent'):
+                criteria['parent'] = kwargs['parent']
+            if not group:
+                groups = self._activated_groups
+            else:
+                groups = [group]
+            segments = []
+            for group in groups:
+                if not self.is_group_active(group):
+                    LOG.warning(f'[_DocumentStore - {self._algo_name}] Group {group} is not active, skip')
+                    continue
+                segments.extend(self.impl.get(self._gen_collection_name(group), criteria, **kwargs))
+            return segments
+        except Exception as e:
+            LOG.error(f'[_DocumentStore - {self._algo_name}] Failed to get segments: {e}')
+            raise
+
+    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
+        kb_id = metadata.get(RAG_KB_ID, None)
+        segments = self.get_segments(doc_ids=[doc_id], kb_id=kb_id)
+        if not segments:
+            LOG.warning(f'[_DocumentStore] No segments found for doc_id: {doc_id} in dataset: {kb_id}')
+            return
+        group_segments = defaultdict(list)
+        for segment in segments:
+            segment['global_meta'].update(metadata)
+            group_segments[segment.get('group')].append(segment)
+        for group, segments in group_segments.items():
+            self.impl.upsert(self._gen_collection_name(group), segments)
+        LOG.info(f'[_DocumentStore] Updated metadata for doc_id: {doc_id} in dataset: {kb_id}')
+        return
+
+    def query(self, query: str, group_name: str, similarity_name: Optional[str] = None,
+              similarity_cut_off: Union[float, Dict[str, float]] = float('-inf'),
+              topk: Optional[int] = 10, embed_keys: Optional[List[str]] = None,
+              filters: Optional[Dict[str, Union[str, int, List, Set]]] = None, **kwargs) -> List[DocNode]:
+        self._validate_query_params(group_name, similarity_name, embed_keys)
+        segments = []
+        if embed_keys:
+            if self.impl.capability == StoreCapability.SEGMENT:
+                raise ValueError(f'[_DocumentStore - {self._algo_name}] Embed keys {embed_keys}'
+                                 ' are not supported when no vector store is provided')
+            # vector search
+            for embed_key in embed_keys:
+                query_embedding = self._embed.get(embed_key)(query)
+                search_res = self.impl.search(collection_name=self._gen_collection_name(group_name),
+                                              query=query, query_embedding=query_embedding,
+                                              topk=topk, filters=filters, embed_key=embed_key, **kwargs)
+                if search_res:
+                    sim_cut_off = similarity_cut_off if isinstance(similarity_cut_off, float)\
+                        else similarity_cut_off[embed_key]
+                    segments.extend([res for res in search_res if res.get('score', 0) >= sim_cut_off])
+        else:
+            # text search
+            if self.impl.capability == StoreCapability.VECTOR:
+                raise ValueError(f'[_DocumentStore - {self._algo_name}] Text search is not'
+                                 ' supported when no segment store is provided')
+            segments.extend(self.impl.search(collection_name=self._gen_collection_name(group_name),
+                                             query=query, topk=topk, filters=filters, **kwargs))
+        return [self._deserialize_node(segment, segment.get('score', 0)) for segment in segments]
+
+    def _validate_query_params(self, group_name: str, similarity: str,
+                               embed_keys: Optional[List[str]] = None, **kwargs) -> bool:
+        assert self.is_group_active(group_name), f'[_DocumentStore - {self._algo_name}] Group {group_name} is not active'
+        if similarity:
+            if similarity in registered_similarities:
+                _, mode, _ = registered_similarities[similarity]
+                if mode == 'embedding' and self.impl.capability == StoreCapability.SEGMENT:
+                    raise ValueError(f'[_DocumentStore - {self._algo_name}] Similarity {similarity} is not supported, '
+                                     f'embedding similarity is supported for vector or hybrid store')
+                elif mode == 'text' and self.impl.capability == StoreCapability.VECTOR:
+                    raise ValueError(f'[_DocumentStore - {self._algo_name}] Similarity {similarity} is not supported, '
+                                     'text similarity is supported for segment or hybrid store')
+            else:
+                raise ValueError(f'[_DocumentStore - {self._algo_name}] Similarity {similarity} is not supported')
+
+        if embed_keys:
+            assert self.impl.capability != StoreCapability.SEGMENT, \
+                f'[_DocumentStore - {self._algo_name}] Embed {embed_keys} not supported when no vector store provided'
+            assert all(key in self._embed for key in embed_keys), \
+                f'[_DocumentStore - {self._algo_name}] Embed {embed_keys} not supported'
+        return True
+
+    def clear_cache(self, groups: Optional[List[str]] = None) -> None:
+        if not groups:
+            groups = self._activated_groups
+        elif isinstance(groups, str):
+            groups = [groups]
+        elif isinstance(groups, (tuple, list, set)):
+            groups = list(groups)
+        else:
+            raise TypeError(f'Invalid type {type(groups)} for groups, expected list of str')
+        for group in groups:
+            self.impl.delete(self._gen_collection_name(group))
+
+    def register_index(self, type: str, index: IndexBase) -> None:
+        assert self._impl.supports_index_registration, \
+            f'[_DocumentStore - {self._algo_name}] Store {type(self.impl)} does not support index registration'
+        self._indices[type] = index
+
+    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
+        return self._indices.get(type)
+
+    def _serialize_node(self, node: DocNode) -> dict:
+        segment = Segment(
+            uid=node._uid,
+            doc_id=node.global_metadata.get(RAG_DOC_ID),
+            group=node._group,
+            content=node.text,
+            meta=node.metadata,
+            global_meta=node.global_metadata,
+            number=node.metadata.get('lazyllm_store_num', 0),
+            kb_id=node.global_metadata.get(RAG_KB_ID, DEFAULT_KB_ID),
+            excluded_embed_metadata_keys=node.excluded_embed_metadata_keys,
+            excluded_llm_metadata_keys=node.excluded_llm_metadata_keys,
+        )
+        if node.parent:
+            segment.parent = node.parent._uid if isinstance(node.parent, DocNode) else node.parent
+        if isinstance(node, QADocNode):
+            segment.type = SegmentType.QA.value
+            segment.answer = node.answer
+        elif isinstance(node, ImageDocNode):
+            segment.type = SegmentType.IMAGE.value
+            segment.image_keys = [node.image_path] if node.image_path else []
+        res = segment.model_dump()
+        # For speed up, add embedding after serialization
+        if node.embedding:
+            res['embedding'] = {k: v for k, v in node.embedding.items()}
+        return res
+
+    def _deserialize_node(self, data: dict, score: Optional[float] = None) -> DocNode:
+        segment_type = data.get('type', SegmentType.TEXT.value)
+        if segment_type == SegmentType.QA.value:
+            node = QADocNode(query=data.get('content', ''), answer=data.get('answer', ''), uid=data['uid'],
+                             group=data['group'], parent=data.get('parent', ''),
+                             metadata=data.get('meta', {}),
+                             global_metadata=data.get('global_meta', {}))
+        elif segment_type == SegmentType.IMAGE.value:
+            if not data.get('image_keys', []):
+                raise ValueError('ImageDocNode does have any image_keys')
+            node = ImageDocNode(image_path=data.get('image_keys')[0],
+                                uid=data['uid'], group=data['group'], parent=data.get('parent', ''),
+                                metadata=data.get('meta', {}),
+                                global_metadata=data.get('global_meta', {}))
+        else:
+            node = DocNode(uid=data['uid'], group=data['group'], content=data.get('content', ''),
+                           parent=data.get('parent', ''), metadata=data.get('meta', {}),
+                           global_metadata=data.get('global_meta', {}))
+        node.excluded_embed_metadata_keys = data.get('excluded_embed_metadata_keys', [])
+        node.excluded_llm_metadata_keys = data.get('excluded_llm_metadata_keys', [])
+        if 'embedding' in data:
+            node.embedding = {k: v for k, v in data.get('embedding', {}).items()}
+        return node.with_sim_score(score) if score else node
+
+    def _gen_collection_name(self, group: str) -> str:
+        return f'col_{self._algo_name}_{group}'.lower()
diff --git a/lazyllm/tools/rag/store/hybrid/__init__.py b/lazyllm/tools/rag/store/hybrid/__init__.py
new file mode 100644
index 000000000..3f1c00da1
--- /dev/null
+++ b/lazyllm/tools/rag/store/hybrid/__init__.py
@@ -0,0 +1,5 @@
+from .map_store import MapStore
+from .sensecore_store import SenseCoreStore
+from .hybrid_store import HybridStore
+
+__all__ = ["MapStore", "SenseCoreStore", "HybridStore"]
diff --git a/lazyllm/tools/rag/store/hybrid/hybrid_store.py b/lazyllm/tools/rag/store/hybrid/hybrid_store.py
new file mode 100644
index 000000000..58454ce85
--- /dev/null
+++ b/lazyllm/tools/rag/store/hybrid/hybrid_store.py
@@ -0,0 +1,73 @@
+from typing import Dict, List, Optional, Union, Set
+
+from lazyllm.common import override
+
+from ..store_base import LazyLLMStoreBase, StoreCapability
+
+
+class HybridStore(LazyLLMStoreBase):
+    capability = StoreCapability.ALL
+    need_embedding = True
+    supports_index_registration = False
+
+    def __init__(self, segment_store: LazyLLMStoreBase, vector_store: LazyLLMStoreBase):
+        self.segment_store: LazyLLMStoreBase = segment_store
+        self.vector_store: LazyLLMStoreBase = vector_store
+
+    @property
+    def dir(self):
+        return self.segment_store.dir
+
+    @override
+    def connect(self, *args, **kwargs):
+        self.segment_store.connect(*args, **kwargs)
+        self.vector_store.connect(*args, **kwargs)
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
+        segments = [{k: v for k, v in segment.items() if k != 'embedding'} for segment in data]
+        return self.segment_store.upsert(collection_name=collection_name, data=segments) and \
+            self.vector_store.upsert(collection_name=collection_name, data=data)
+
+    @override
+    def delete(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> bool:
+        return self.segment_store.delete(collection_name=collection_name, criteria=criteria, **kwargs) and \
+            self.vector_store.delete(collection_name=collection_name, criteria=criteria, **kwargs)
+
+    @override
+    def get(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> List[dict]:
+        res_segments = self.segment_store.get(collection_name=collection_name, criteria=criteria, **kwargs)
+        if not res_segments: return []
+        uids = [item.get('uid') for item in res_segments]
+        res_vectors = self.vector_store.get(collection_name=collection_name, criteria={'uid': uids})
+
+        data = {}
+        for item in res_segments:
+            data[item.get('uid')] = item
+        for item in res_vectors:
+            if item.get('uid') in data:
+                data[item.get('uid')]['embedding'] = item.get('embedding')
+            else:
+                raise ValueError(f"[HybridStore - get] uid {item['uid']} in vector store"
+                                 " but not found in segment store")
+        return list(data.values())
+
+    @override
+    def search(self, collection_name: str, query: str, query_embedding: Optional[Union[dict, List[float]]] = None,
+               topk: int = 10, filters: Optional[Dict[str, Union[str, int, List, Set]]] = None,
+               embed_key: Optional[str] = None, **kwargs) -> List[dict]:
+        if embed_key:
+            # vector store only give uid and score
+            res = self.vector_store.search(collection_name=collection_name, query=query, query_embedding=query_embedding,
+                                           topk=topk, filters=filters, embed_key=embed_key, **kwargs)
+            if not res: return []
+            uid2score = {item['uid']: item['score'] for item in res}
+            uids = list(uid2score.keys())
+            segments = self.segment_store.get(collection_name=collection_name, criteria={'uid': uids})
+            for segment in segments:
+                segment['score'] = uid2score.get(segment['uid'], 0)
+            return segments
+        else:
+            res = self.segment_store.search(collection_name=collection_name, query=query,
+                                            topk=topk, filters=filters, **kwargs)
+            return res
diff --git a/lazyllm/tools/rag/store/hybrid/map_store.py b/lazyllm/tools/rag/store/hybrid/map_store.py
new file mode 100644
index 000000000..fb2f2bf8c
--- /dev/null
+++ b/lazyllm/tools/rag/store/hybrid/map_store.py
@@ -0,0 +1,217 @@
+import json
+import sqlite3
+import os
+
+from pathlib import Path
+from collections import defaultdict
+from typing import Dict, List, Optional, Union, Set
+
+from lazyllm import LOG
+from lazyllm.common import override
+
+from ..store_base import LazyLLMStoreBase, StoreCapability, DEFAULT_KB_ID
+from ...global_metadata import RAG_DOC_ID, RAG_KB_ID
+
+
+class MapStore(LazyLLMStoreBase):
+    capability = StoreCapability.ALL
+    need_embedding = True
+    supports_index_registration = True
+
+    def __init__(self, uri: Optional[str] = None, **kwargs):
+        self._uri = uri  # filepath to SQLite .db for persistence
+
+    @property
+    def dir(self):
+        path = os.path.dirname(self._uri)
+        return path if path.endswith(os.sep) else path + os.sep
+
+    def _ensure_table(self, cursor: sqlite3.Cursor, table: str):
+        cursor.execute(f"""
+        CREATE TABLE IF NOT EXISTS {table} (
+            uid TEXT PRIMARY KEY,
+            doc_id TEXT,
+            "group" TEXT,
+            content TEXT,
+            meta TEXT,
+            global_meta TEXT,
+            type INTEGER,
+            number INTEGER,
+            kb_id TEXT,
+            excluded_embed_metadata_keys TEXT,
+            excluded_llm_metadata_keys TEXT,
+            parent TEXT,
+            answer TEXT,
+            image_keys TEXT
+        )
+        """)
+        cursor.execute(f"CREATE INDEX IF NOT EXISTS idx_{table}_parent ON {table}(parent)")
+
+    def _load_from_uri(self, collection_name: str, uri: str):
+        conn = sqlite3.connect(uri)
+        cursor = conn.cursor()
+
+        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (collection_name,))
+        if not cursor.fetchone():
+            LOG.warning(f"[MapStore] Table '{collection_name}' does not exist in SQLite DB {uri}, skipping.")
+            conn.close()
+            return
+
+        res = []
+        for row in cursor.execute(
+            f"SELECT uid, doc_id, \"group\", content, meta, global_meta,"
+            f" type, number, kb_id, excluded_embed_metadata_keys, excluded_llm_metadata_keys,"
+            f" parent, answer, image_keys FROM {collection_name}"
+        ):
+            (uid, doc_id, group, content, meta_str, global_meta_str, type_, number,
+             kb_id, excl_emb_str, excl_llm_str, parent, answer, image_keys_str) = row
+            item = {'uid': uid, 'doc_id': doc_id, 'group': group, 'content': content,
+                    'meta': json.loads(meta_str) if meta_str else {},
+                    'global_meta': json.loads(global_meta_str) if global_meta_str else {},
+                    'type': type_, 'number': number, 'kb_id': kb_id,
+                    'excluded_embed_metadata_keys': json.loads(excl_emb_str) if excl_emb_str else [],
+                    'excluded_llm_metadata_keys': json.loads(excl_llm_str) if excl_llm_str else [],
+                    'parent': parent, 'answer': answer,
+                    'image_keys': json.loads(image_keys_str) if image_keys_str else []}
+            res.append(item)
+        conn.close()
+        for item in res:
+            self._uid2data[item['uid']] = item
+            self._collection2uids[collection_name].add(item['uid'])
+            self._col_doc_uids[collection_name][item['doc_id']].add(item['uid'])
+            self._col_kb_doc_uids[collection_name][item['kb_id']][item['doc_id']].add(item['uid'])
+            self._col_parent_uids[collection_name][item['parent']].add(item['uid'])
+
+    def _save_to_uri(self, collection_name: str, uri: str, data: List[dict]):
+        conn = sqlite3.connect(uri)
+        cursor = conn.cursor()
+        self._ensure_table(cursor, collection_name)
+        sql = f"INSERT OR REPLACE INTO {collection_name} (\
+                uid, doc_id, \"group\", content,\
+                meta, global_meta, type, number, kb_id,\
+                excluded_embed_metadata_keys, excluded_llm_metadata_keys,\
+                parent, answer, image_keys)\
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+        params = []
+        for item in data:
+            params.append((item['uid'], item['doc_id'], item['group'], item['content'],
+                           json.dumps(item['meta']), json.dumps(item['global_meta']),
+                           item['type'], item['number'], item['kb_id'],
+                           json.dumps(item['excluded_embed_metadata_keys']),
+                           json.dumps(item['excluded_llm_metadata_keys']),
+                           item['parent'], item['answer'], json.dumps(item['image_keys'])))
+        LOG.info(f"executemany {sql} with {len(params)} params")
+        cursor.executemany(sql, params)
+        conn.commit()
+        conn.close()
+
+    @override
+    def connect(self, collections: Optional[List[str]] = None, **kwargs):
+        self._uid2data: Dict[str, dict] = {}
+        self._collection2uids: Dict[str, Set[str]] = defaultdict(set)
+        self._col_doc_uids: Dict[str, Dict[str, Set[str]]] = defaultdict(lambda: defaultdict(set))
+        self._col_kb_doc_uids: Dict[str, Dict[str, Dict[str, Set[str]]]] = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(set)))
+        self._col_parent_uids: Dict[str, Dict[str, Set[str]]] = defaultdict(lambda: defaultdict(set))
+        if self._uri:
+            if not os.path.exists(self._uri):
+                LOG.info(f"[MapStore] SQLite DB {self._uri} does not exist, creating...")
+                db_path = Path(self._uri)
+                db_path.parent.mkdir(parents=True, exist_ok=True)
+                db_path.touch(exist_ok=True)
+                self._uri = str(db_path)
+            LOG.info(f"[MapStore] Loading data from {self._uri}")
+            for collection_name in collections:
+                self._load_from_uri(collection_name, self._uri)
+        return
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> None:
+        try:
+            for item in data:
+                uid = item.get('uid')
+                doc_id = item.get('doc_id')
+                assert uid and doc_id, "[MapStore - upsert] uid and doc_id are required"
+                self._uid2data[uid] = item
+                self._collection2uids[collection_name].add(uid)
+                self._col_kb_doc_uids[collection_name][item.get(RAG_KB_ID, DEFAULT_KB_ID)][doc_id].add(uid)
+                self._col_doc_uids[collection_name][doc_id].add(uid)
+                self._col_parent_uids[collection_name][item.get('parent')].add(uid)
+            if self._uri:
+                self._save_to_uri(collection_name, self._uri, data)
+        except Exception as e:
+            LOG.error(f"[MapStore - upsert] Error upserting data: {e}")
+            return False
+        return True
+
+    @override
+    def delete(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> bool:
+        try:
+            need_delete = self._get_uids_by_criteria(collection_name, criteria)
+            if not need_delete:
+                return False
+            for uid in need_delete:
+                data = self._uid2data.pop(uid, None)
+                if not data:
+                    continue
+                kb_id = data.get(RAG_KB_ID, DEFAULT_KB_ID)
+                doc_id = data.get('doc_id')
+                parent = data.get('parent')
+                self._collection2uids[collection_name].remove(uid)
+                self._col_kb_doc_uids[collection_name][kb_id][doc_id].remove(uid)
+                self._col_doc_uids[collection_name][doc_id].remove(uid)
+                self._col_parent_uids[collection_name][parent].remove(uid)
+            if self._uri:
+                conn = sqlite3.connect(self._uri)
+                cursor = conn.cursor()
+                sql_del = f"DELETE FROM {collection_name} WHERE uid = ?"
+                cursor.executemany(sql_del, [(uid,) for uid in need_delete])
+                conn.commit()
+                conn.close()
+            return True
+        except Exception as e:
+            LOG.error(f"[MapStore - delete] Error deleting data: {e}")
+            return False
+
+    @override
+    def get(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> List[dict]:
+        uids = self._get_uids_by_criteria(collection_name, criteria)
+        data = []
+        for uid in uids:
+            if uid in self._uid2data:
+                data.append(self._uid2data[uid])
+            else:
+                LOG.warning(f"[MapStore - get] uid {uid} not found in data")
+        return data
+
+    def _get_uids_by_criteria(self, collection_name: str, criteria: dict) -> List[str]:
+        if not criteria:
+            return list(self._collection2uids.get(collection_name, set()))
+        else:
+            uids = criteria.get('uid', [])
+            kb_id = criteria.get(RAG_KB_ID)
+            doc_ids = criteria.get(RAG_DOC_ID, [])
+            parents = criteria.get('parent', [])
+            if uids:
+                return [uid for uid in uids if uid in self._collection2uids.get(collection_name, set())]
+            elif kb_id and doc_ids:
+                return [uid for doc_id in doc_ids
+                        for uid in self._col_kb_doc_uids.get(collection_name, {}).get(kb_id, {}).get(doc_id, ())]
+            elif kb_id:
+                doc_ids = self._col_kb_doc_uids.get(collection_name, {}).get(kb_id, {}).keys()
+                return [uid for doc_id in doc_ids
+                        for uid in self._col_kb_doc_uids.get(collection_name, {}).get(kb_id, {}).get(doc_id, ())]
+            elif doc_ids:
+                return [uid for doc_id in doc_ids for uid in self._col_doc_uids.get(collection_name, {}).get(doc_id, ())]
+            elif parents:
+                return [uid for parent in parents for uid in
+                        self._col_parent_uids.get(collection_name, {}).get(parent, ())]
+            else:
+                raise ValueError(f"[MapStore - get] Invalid criteria: {criteria}")
+
+    @override
+    def search(self, collection_name: str, query: str, topk: int,
+               filters: Optional[Dict[str, Union[str, int, List, Set]]] = None, **kwargs) -> List[dict]:
+        # TODO(chenjiahao): implement search in map store, using default index to search data in map store
+        raise NotImplementedError(
+            "[MapStore - search] Not implemented, please use default index to search data in map store...")
diff --git a/lazyllm/tools/rag/store/hybrid/sensecore_store.py b/lazyllm/tools/rag/store/hybrid/sensecore_store.py
new file mode 100644
index 000000000..4bcd9e9ad
--- /dev/null
+++ b/lazyllm/tools/rag/store/hybrid/sensecore_store.py
@@ -0,0 +1,427 @@
+import os
+import json
+import uuid
+import time
+import requests
+
+from pydantic import BaseModel, Field
+from urllib.parse import urljoin
+from typing import Optional, List, Dict, Any, Union, Set
+
+from ..store_base import (LazyLLMStoreBase, StoreCapability, LAZY_ROOT_NAME, IMAGE_PATTERN, INSERT_BATCH_SIZE,
+                          DEFAULT_KB_ID, SegmentType)
+from ..utils import upload_data_to_s3, download_data_from_s3, fibonacci_backoff, create_file_path
+
+from ...data_type import DataType
+from ...global_metadata import GlobalMetadataDesc, RAG_DOC_ID, RAG_KB_ID
+
+from lazyllm import warp, pipeline, LOG, config
+from lazyllm.common import override
+from lazyllm.thirdparty import boto3
+
+
+class Segment(BaseModel):
+    segment_id: str
+    dataset_id: Optional[str] = '__default__'
+    document_id: str
+    group: str
+    content: Optional[str] = ''
+    meta: str
+    global_meta: str
+    excluded_embed_metadata_keys: Optional[List[str]] = Field(default_factory=list)
+    excluded_llm_metadata_keys: Optional[List[str]] = Field(default_factory=list)
+    parent: Optional[str] = ''
+    children: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    embedding_state: Optional[List[str]] = Field(default_factory=list)
+    answer: Optional[str] = ''
+    image_keys: Optional[List[str]] = Field(default_factory=list)
+    number: Optional[int] = 0
+
+
+class SenseCoreStore(LazyLLMStoreBase):
+    capability = StoreCapability.ALL
+    need_embedding = False
+    supports_index_registration = False
+
+    def __init__(self, uri: str = '', **kwargs):
+        self._uri = uri
+        self._s3_config = kwargs.get('s3_config')
+        self._image_url_config = kwargs.get('image_url_config')
+
+    @property
+    def dir(self):
+        return None
+
+    @override
+    def connect(self, global_metadata_desc: Optional[Dict[str, GlobalMetadataDesc]] = None, **kwargs) -> None:
+        self._check_s3()
+        self._global_metadata_desc = global_metadata_desc or {}
+        LOG.info(f"[SenseCore Store - connect] connected to {self._uri}")
+
+    def _check_s3(self):
+        obj_key = 'lazyllm/warmup.txt'
+        upload_data_to_s3('warmup', bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                          aws_access_key_id=self._s3_config['access_key'],
+                          aws_secret_access_key=self._s3_config['secret_access_key'],
+                          use_minio=self._s3_config['use_minio'], endpoint_url=self._s3_config['endpoint_url'])
+        LOG.info(f"[SenseCore Store - check_s3] uploaded warmup.txt to {self._s3_config['bucket_name']}")
+        return
+
+    def _serialize_data(self, data: dict) -> Dict:  # noqa: C901
+        data = dict(data)
+        content = json.dumps(data.get('content', ''), ensure_ascii=False)
+        matches = IMAGE_PATTERN.findall(content)
+        for _, image_path in matches:
+            if image_path.startswith('lazyllm'):
+                continue
+            image_file_name = os.path.basename(image_path)
+            obj_key = f"lazyllm/images/{image_file_name}"
+            try:
+                prefix = config['image_path_prefix']
+            except Exception:
+                prefix = os.getenv('RAG_IMAGE_PATH_PREFIX', '')
+            file_path = create_file_path(path=image_path, prefix=prefix)
+            try:
+                with open(file_path, 'rb') as f:
+                    upload_data_to_s3(f.read(), bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                                      aws_access_key_id=self._s3_config['access_key'],
+                                      aws_secret_access_key=self._s3_config['secret_access_key'],
+                                      use_minio=self._s3_config['use_minio'],
+                                      endpoint_url=self._s3_config['endpoint_url'])
+                    content = content.replace(image_path, obj_key)
+            except FileNotFoundError:
+                LOG.error(f"Cannot find image path: {image_path} (local path {file_path}), skip...")
+            except Exception as e:
+                LOG.error(f"Error when uploading `{image_path}` {e!r}")
+        data['content'] = json.loads(content)
+
+        if data.get('group') == LAZY_ROOT_NAME:
+            obj_key = f"lazyllm/lazyllm_root/{data.get('uid')}.json"
+            upload_data_to_s3(content.encode('utf-8'), bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                              aws_access_key_id=self._s3_config['access_key'],
+                              aws_secret_access_key=self._s3_config['secret_access_key'],
+                              use_minio=self._s3_config['use_minio'], endpoint_url=self._s3_config['endpoint_url'])
+            data['content'] = obj_key
+
+        segment = Segment(segment_id=data.get('uid', ''), dataset_id=data.get(RAG_KB_ID, ''),
+                          document_id=data.get('doc_id', ''), group=data.get('group', ''),
+                          content=data.get('content', ''), meta=json.dumps(data.get('meta', {}), ensure_ascii=False),
+                          excluded_embed_metadata_keys=data.get('excluded_embed_metadata_keys', []),
+                          excluded_llm_metadata_keys=data.get('excluded_llm_metadata_keys', []),
+                          parent=data.get('parent', ''),
+                          global_meta=json.dumps(data.get('global_meta', {}), ensure_ascii=False),
+                          answer=data.get('answer', ''), number=data.get('number', 0))
+        # image extract
+        if isinstance(segment.content, str):
+            target = segment.content
+        else:
+            target = json.dumps(segment.content)
+        matches = IMAGE_PATTERN.findall(target)
+        for _, image_path in matches:
+            segment.image_keys.append(image_path)
+
+        if data.get('type') == SegmentType.IMAGE.value and data.get('image_keys'):
+            image_path = data.get('image_keys', [])[0]
+            image_file_name = os.path.basename(image_path)
+            obj_key = f"lazyllm/images/{image_file_name}"
+            try:
+                with open(image_path, 'rb') as f:
+                    upload_data_to_s3(f.read(), bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                                      aws_access_key_id=self._s3_config['access_key'],
+                                      aws_secret_access_key=self._s3_config['secret_access_key'],
+                                      use_minio=self._s3_config['use_minio'],
+                                      endpoint_url=self._s3_config['endpoint_url'])
+                segment.image_keys = [obj_key]
+            except FileNotFoundError:
+                LOG.error(f"Cannot find image path: {image_path} (local path {image_path}), skip...")
+            except Exception as e:
+                LOG.error(f"Error when uploading `{image_path}` {e!r}")
+        elif data.get('type') == SegmentType.QA.value and data.get('answer'):
+            answer = data.get('answer')
+            matches = IMAGE_PATTERN.findall(answer)
+            for _, image_path in matches:
+                if image_path.startswith('lazyllm'):
+                    continue
+                image_file_name = os.path.basename(image_path)
+                obj_key = f"lazyllm/images/{image_file_name}"
+                try:
+                    prefix = config['image_path_prefix']
+                except Exception:
+                    prefix = os.getenv('RAG_IMAGE_PATH_PREFIX', '')
+                file_path = create_file_path(path=image_path, prefix=prefix)
+                try:
+                    with open(file_path, 'rb') as f:
+                        upload_data_to_s3(f.read(), bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                                          aws_access_key_id=self._s3_config['access_key'],
+                                          aws_secret_access_key=self._s3_config['secret_access_key'],
+                                          use_minio=self._s3_config['use_minio'],
+                                          endpoint_url=self._s3_config['endpoint_url'])
+                        answer = answer.replace(image_path, obj_key)
+                except FileNotFoundError:
+                    LOG.error(f"Cannot find image path: {image_path} (local path {file_path}), skip...")
+                except Exception as e:
+                    LOG.error(f"Error when uploading `{image_path}` {e!r}")
+            data['answer'] = answer
+            matches = IMAGE_PATTERN.findall(data['answer'])
+            for _, image_path in matches:
+                segment.image_keys.append(image_path)
+            segment.answer = data['answer']
+        return segment.model_dump()
+
+    def _deserialize_data(self, segment: Dict) -> Dict:
+        data = {
+            'uid': segment.get('segment_id', ''),
+            'doc_id': segment.get('document_id', ''),
+            'group': segment.get('group', ''),
+            'content': segment.get('content', ''),
+            'meta': json.loads(segment.get('meta', "{}")),
+            'global_meta': json.loads(segment.get('global_meta', "{}")),
+            'number': segment.get('number', 0),
+            'kb_id': segment.get('dataset_id', ''),
+            'excluded_embed_metadata_keys': segment.get('excluded_embed_metadata_keys', []),
+            'excluded_llm_metadata_keys': segment.get('excluded_llm_metadata_keys', []),
+            'parent': segment.get('parent', ''),
+            'answer': segment.get('answer', ''),
+            'image_keys': segment.get('image_keys', []),
+        }
+        if len(data.get('answer', '')):
+            data['type'] = SegmentType.QA.value
+        else:
+            data['type'] = SegmentType.TEXT.value
+        if data.get('group') == LAZY_ROOT_NAME and data.get('content').startswith('lazyllm/lazyllm_root/'):
+            obj_key = data.get('content')
+            content = download_data_from_s3(bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                                            aws_access_key_id=self._s3_config['access_key'],
+                                            aws_secret_access_key=self._s3_config['secret_access_key'],
+                                            use_minio=self._s3_config['use_minio'],
+                                            endpoint_url=self._s3_config['endpoint_url'], encoding='utf-8')
+            data['content'] = json.loads(content)
+        return data
+
+    def _create_filters_str(self, filters: Dict[str, Union[str, int, List, Set]]) -> str:
+        ret_str = ''
+        for name, candidates in filters.items():
+            desc = self._global_metadata_desc.get(name)
+            if not desc:
+                raise ValueError(f'cannot find desc of field [{name}]')
+            key = name
+            if isinstance(candidates, str):
+                candidates = [candidates]
+            if (not isinstance(candidates, list)) and (not isinstance(candidates, set)):
+                candidates = list(candidates)
+            if desc.data_type == DataType.ARRAY:
+                ret_str += f'array_contains_any({key}, {candidates}) and '
+            else:
+                ret_str += f'{key} in {candidates} and '
+
+        if len(ret_str) > 0:
+            return ret_str[:-5]  # truncate the last ' and '
+        return ret_str
+
+    def _upload_data_and_insert(self, data: List[dict]) -> str:
+        try:
+            job_id = str(uuid.uuid4())
+            groups = set()
+            for item in data:
+                groups.add(item.get('group'))
+            groups = list(groups)
+            data = [self._serialize_data(item) for item in data]
+            dataset_id = None
+            for item in data:
+                dataset_id = item.get('dataset_id', None)
+                break
+            if not dataset_id:
+                raise ValueError("dataset_id is required in SenseCoreStore")
+
+            obj_key = f"lazyllm/segments/{job_id}.jsonl"
+
+            upload_data_to_s3(data=data, bucket_name=self._s3_config['bucket_name'], object_key=obj_key,
+                              aws_access_key_id=self._s3_config['access_key'],
+                              aws_secret_access_key=self._s3_config['secret_access_key'],
+                              use_minio=self._s3_config['use_minio'], endpoint_url=self._s3_config['endpoint_url'])
+            url = urljoin(self._uri, 'v1/writerSegmentJob:submit')
+            params = {'writer_segment_job_id': job_id}
+            headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+            payload = {'dataset_id': dataset_id or self._kb_id, 'file_key': obj_key, 'groups': groups}
+
+            response = requests.post(url, params=params, headers=headers, json=payload)
+            response.raise_for_status()
+            LOG.info(f"SenseCore Store: insert task {job_id} submitted")
+        except Exception as e:
+            LOG.error(f"SenseCore Store: insert task {job_id} failed: {e}")
+            raise e
+        return job_id
+
+    def _check_insert_job_status(self, job_id: str) -> None:
+        url = urljoin(self._uri, f"v1/writerSegmentJobs/{job_id}")
+        headers = {'Accept': 'application/json'}
+        for wait_time in fibonacci_backoff(max_retries=15):
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            status = response.json()['state']
+            if status == 2:
+                LOG.info(f"SenseCore Store: insert task {job_id} finished")
+                return
+            elif status == 3:
+                raise Exception(f"Insert task {job_id} failed")
+            else:
+                time.sleep(wait_time)
+        raise Exception(f"Insert task {job_id} failed after seconds")
+
+    def _get_group_name(self, collection_name: str) -> str:
+        return collection_name.split('_')[-1] if "lazyllm_root" not in collection_name else "lazyllm_root"
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
+        if not data: return True
+        try:
+            with pipeline() as insert_ppl:
+                insert_ppl.get_ids = warp(self._upload_data_and_insert).aslist
+                insert_ppl.check_status = warp(self._check_insert_job_status)
+
+            batched_data = [data[i:i + INSERT_BATCH_SIZE] for i in range(0, len(data), INSERT_BATCH_SIZE)]
+            insert_ppl(batched_data)
+            return True
+        except Exception as e:
+            LOG.error(f"[SenseCore Store - upsert] insert task failed: {e}")
+            return False
+
+    @override
+    def delete(self, collection_name: str, criteria: dict, **kwargs) -> bool:
+        try:
+            url = urljoin(self._uri, 'v1/segments:bulkDelete')
+            headers = {'Accept': '*/*', 'Content-Type': 'application/json'}
+            doc_ids = criteria.get(RAG_DOC_ID)
+            if doc_ids:
+                payload = {'dataset_id': criteria.get(RAG_KB_ID), 'document_ids': doc_ids}
+            else:
+                payload = {'dataset_id': criteria.get(RAG_KB_ID), 'segment_ids': criteria.get('uid')}
+            if collection_name:
+                payload['group'] = self._get_group_name(collection_name)
+            response = requests.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+        except Exception as e:
+            LOG.error(f"[SenseCore Store - delete] task col: {collection_name}\ncriteria: {criteria}\n{e}")
+            return True
+        return True
+
+    @override
+    def get(self, collection_name: str, criteria: dict, **kwargs) -> List[dict]:  # noqa: C901
+        try:
+            uids = criteria.get('uid')
+            doc_ids = criteria.get(RAG_DOC_ID)
+            kb_id = criteria.get(RAG_KB_ID, DEFAULT_KB_ID)
+            if not (uids or collection_name):
+                raise ValueError("group or uids must be provided")
+            if doc_ids and len(doc_ids) > 1:
+                raise ValueError("[Sensecore Store - get]: doc_ids must be a single value")
+            doc_id = doc_ids[0] if doc_ids else None
+            if doc_id and not uids:
+                url = urljoin(self._uri, f"v1/datasets/{kb_id}/documents/{doc_id}/segments:search")
+            else:
+                url = urljoin(self._uri, 'v1/segments:scroll')
+            headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+            payload = {'dataset_id': kb_id}
+            if collection_name:
+                payload['group'] = self._get_group_name(collection_name)
+            if doc_id:
+                payload['document_id'] = doc_id
+            if uids:
+                payload['segment_ids'] = uids
+            else:
+                payload["page_size"] = 100
+            segments = []
+            while True:
+                response = requests.post(url, headers=headers, json=payload)
+                if response.status_code != 200:
+                    LOG.warning(f"SenseCore Store: get task failed: url {url}, data: {payload}, e:{response.text}")
+                    break
+                data = response.json()
+                batch = data.get('segments', [])
+                if not batch:
+                    break
+                segments.extend(batch)
+                next_page_token = data.get('next_page_token')
+                if not next_page_token:
+                    break
+                payload['page_token'] = next_page_token
+            if doc_ids:
+                segments = [segment for segment in segments if segment['document_id'] in doc_ids]
+            if kwargs.get('display'):
+                segments = self._apply_display(segments)
+            return [self._deserialize_data(s) for s in segments]
+        except Exception as e:
+            LOG.error(f"[SenseCore Store - get]:task failed: {e}")
+            return []
+
+    def _apply_display(self, segments: List[dict]) -> List[dict]:
+        out = []
+        for s in segments:
+            if not s.get('is_active', True):
+                continue
+            if s.get('display_content'):
+                s['content'] = s['display_content']
+            out.append(s)
+        return out
+
+    def _multi_modal_process(self, query: str, images: List[str]):
+        urls = []
+        s3 = boto3.client('s3', aws_access_key_id=self._image_url_config['access_key'],
+                          aws_secret_access_key=self._image_url_config['secret_access_key'],
+                          endpoint_url=self._image_url_config['endpoint_url'])
+        for image in images:
+            query = query + '<image>\n'
+            url = s3.generate_presigned_url(ClientMethod='get_object',
+                                            Params={'Bucket': self._image_url_config['bucket_name'], 'Key': image},
+                                            ExpiresIn=3600)
+            urls.append(url)
+        return query, urls
+
+    @override
+    def search(self, collection_name: str, query: Union[str, dict, List[float]], topk: int,  # noqa: C901
+               filters: Optional[Dict[str, Union[str, int, List, Set]]] = None,
+               embed_key: Optional[str] = None, **kwargs) -> List[dict]:
+        try:
+            if not embed_key:
+                raise ValueError("[Sensecore Store] Query: embed_key must be provided")
+            url = urljoin(self._uri, 'v1/segments:hybrid')
+            headers = {'Accept': 'application/json', 'Content-Type': 'application/json'}
+
+            filter_str = self._create_filters_str(filters) if filters else None
+            dataset_ids = []
+            if filters:
+                for name, candidates in filters.items():
+                    desc = self._global_metadata_desc.get(name)
+                    if not desc:
+                        raise ValueError(f'cannot find desc of field [{name}]')
+                    key = name
+                    if key == RAG_KB_ID:
+                        if isinstance(candidates, str):
+                            candidates = [candidates]
+                        if (not isinstance(candidates, list)) and (not isinstance(candidates, set)):
+                            candidates = list(candidates)
+                        dataset_ids = candidates
+                        break
+            if dataset_ids:
+                hybrid_search_datasets = [{'dataset_id': dataset_id} for dataset_id in dataset_ids]
+            else:
+                LOG.error(f"SenseCore Store: no dataset_id provided, please check your filters: {filters}")
+                return []
+
+            images = kwargs.get('images', [])
+            if images:
+                query, images = self._multi_modal_process(query, images)
+            payload = {'query': query, 'hybrid_search_datasets': hybrid_search_datasets, 'hybrid_search_type': 2,
+                       'top_k': topk, 'filters': filter_str, 'group': self._get_group_name(collection_name),
+                       'embedding_model': embed_key, 'images': images}
+            response = requests.post(url, headers=headers, json=payload)
+            response.raise_for_status()
+            segments = response.json()['segments']
+            segments = [s for s in segments if s['is_active']]
+            for s in segments:
+                if len(s.get('display_content', '')):
+                    s['content'] = s['display_content']
+            return [self._deserialize_data(s) for s in segments]
+        except Exception as e:
+            LOG.error(f"SenseCore Store: query task failed: {e}")
+            raise e
diff --git a/lazyllm/tools/rag/store/map_store.py b/lazyllm/tools/rag/store/map_store.py
deleted file mode 100644
index c0eeaedcd..000000000
--- a/lazyllm/tools/rag/store/map_store.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from collections import defaultdict
-from typing import Dict, List, Optional, Callable, Union, Set
-
-from .store_base import StoreBase, LAZY_ROOT_NAME
-
-from ..index_base import IndexBase
-from ..doc_node import DocNode
-from ..default_index import DefaultIndex
-from ..global_metadata import RAG_SYSTEM_META_KEYS, RAG_DOC_ID
-
-from lazyllm.common import override
-
-
-class MapStore(StoreBase):
-    def __init__(self, node_groups: Union[List[str], Set[str]], embed: Dict[str, Callable], **kwargs):
-        self._uid2node: Dict[str, DocNode] = {}
-        self._group2uids: Dict[str, Set[str]] = {group: set() for group in node_groups}
-        self._docid2uids: Dict[str, Set[str]] = defaultdict(set)
-        self._group_doc_uids: Dict[str, Dict[str, Set[str]]] = defaultdict(lambda: defaultdict(set))
-        self._name2index = {
-            'default': DefaultIndex(embed, self),
-        }
-        self._activated_groups = set()
-
-    @override
-    def update_nodes(self, nodes: List[DocNode]) -> None:
-        for node in nodes:
-            group = node._group
-            doc_id = node.global_metadata.get(RAG_DOC_ID)
-            uid = node._uid
-            self._group2uids[group].add(uid)
-            self._docid2uids[doc_id].add(uid)
-            self._group_doc_uids[group][doc_id].add(uid)
-            self._uid2node[node._uid] = node
-
-        for index in self._name2index.values():
-            index.update(nodes)
-
-    @override
-    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
-        doc_nodes: List[DocNode] = self.get_nodes(group_name=LAZY_ROOT_NAME, doc_ids=[doc_id])
-        if not doc_nodes:
-            return
-        root_node = doc_nodes[0].root_node
-        keys_to_delete = []
-        for k in root_node.global_metadata:
-            if not (k in RAG_SYSTEM_META_KEYS or k in metadata):
-                keys_to_delete.append(k)
-        for k in keys_to_delete:
-            root_node.global_metadata.pop(k)
-        root_node.global_metadata.update(metadata)
-
-    @override
-    def remove_nodes(self, doc_ids: List[str], group_name: Optional[str] = None,
-                     uids: Optional[List[str]] = None) -> None:
-        if uids:
-            need_delete = uids
-        elif group_name:
-            need_delete = [uid for doc_id in doc_ids
-                           for uid in self._group_doc_uids.get(group_name, {}).get(doc_id, ())]
-        else:
-            need_delete = [uid for doc_id in doc_ids for uid in self._docid2uids.get(doc_id, ())]
-
-        for index in self._name2index.values():
-            index.remove(need_delete)
-
-        for uid in need_delete:
-            node = self._uid2node.pop(uid, None)
-            if not node:
-                continue
-            group = node._group
-            doc_id = node.global_metadata.get(RAG_DOC_ID)
-            self._group2uids.get(group, set()).discard(uid)
-            self._docid2uids.get(doc_id, set()).discard(uid)
-            self._group_doc_uids.get(group, {}).get(doc_id, set()).discard(uid)
-            if group in self._group_doc_uids and not self._group_doc_uids[group]:
-                self._group_doc_uids.pop(group)
-
-    @override
-    def get_nodes(self, group_name: Optional[str] = None, uids: Optional[List[str]] = None,
-                  doc_ids: Optional[Set] = None, **kwargs) -> List[DocNode]:
-        if uids:
-            return [self._uid2node[uid] for uid in uids if uid in self._uid2node]
-        elif doc_ids and group_name:
-            uids = [uid for doc_id in doc_ids
-                    for uid in self._group_doc_uids.get(group_name, {}).get(doc_id, ())]
-        elif group_name:
-            uids = self._group2uids.get(group_name, set())
-        elif doc_ids:
-            uids = [uid for doc_id in doc_ids for uid in self._docid2uids.get(doc_id, ())]
-        else:
-            return []
-        return [self._uid2node[uid] for uid in uids if uid in self._uid2node]
-
-    @override
-    def is_group_active(self, name: str) -> bool:
-        uids = self._group2uids.get(name)
-        return True if uids else False
-
-    @override
-    def all_groups(self) -> List[str]:
-        return list(self._group2uids.keys())
-
-    @override
-    def activate_group(self, group_names: Union[str, List[str]]) -> bool:
-        if isinstance(group_names, str): group_names = [group_names]
-        self._activated_groups.update(group_names)
-
-    @override
-    def activated_groups(self):
-        return list(self._activated_groups)
-
-    @override
-    def query(self, *args, **kwargs) -> List[DocNode]:
-        return self.get_index('default').query(*args, **kwargs)
-
-    @override
-    def register_index(self, type: str, index: IndexBase) -> None:
-        self._name2index[type] = index
-
-    @override
-    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
-        if type is None:
-            type = 'default'
-        return self._name2index.get(type)
-
-    @override
-    def clear_cache(self, group_names: Optional[List[str]] = None) -> None:
-        if group_names is None:
-            self._docid2uids.clear()
-            self._group_doc_uids.clear()
-            self._uid2node.clear()
-            for group in self._group2uids.keys():
-                self._group2uids[group].clear()
-            return
-        elif isinstance(group_names, str):
-            group_names = [group_names]
-        elif isinstance(group_names, (tuple, list, set)):
-            group_names = list(group_names)
-        else:
-            raise TypeError(f"Invalid type {type(group_names)} for group_names, expected list of str")
-        for group_name in group_names:
-            uids = self._group2uids.get(group_name, set())
-            self.remove_nodes(doc_ids=[], uids=uids)
diff --git a/lazyllm/tools/rag/store/milvus_store.py b/lazyllm/tools/rag/store/milvus_store.py
deleted file mode 100644
index f67ddaf77..000000000
--- a/lazyllm/tools/rag/store/milvus_store.py
+++ /dev/null
@@ -1,436 +0,0 @@
-import copy
-from urllib import parse
-from packaging import version
-from collections import defaultdict
-from typing import Dict, List, Optional, Union, Callable, Set
-
-from .store_base import StoreBase
-from .map_store import MapStore
-from ..doc_node import DocNode
-from ..utils import parallel_do_embedding
-from ..index_base import IndexBase
-from ..global_metadata import (
-    GlobalMetadataDesc, RAG_DOC_ID, RAG_DOC_PATH, RAG_DOC_FILE_NAME,
-    RAG_DOC_FILE_TYPE, RAG_DOC_FILE_SIZE, RAG_DOC_CREATION_DATE,
-    RAG_DOC_LAST_MODIFIED_DATE, RAG_DOC_LAST_ACCESSED_DATE
-)
-from ..data_type import DataType
-
-from lazyllm.thirdparty import pymilvus
-from lazyllm.common import override, obj2str, str2obj
-from lazyllm import LOG
-
-MILVUS_UPSERT_BATCH_SIZE = 500
-MILVUS_PAGINATION_OFFSET = 1000
-
-
-class MilvusStore(StoreBase):
-    # we define these variables as members so that pymilvus is not imported until MilvusStore is instantiated.
-    def _def_constants(self) -> None:
-        self._primary_key = 'uid'
-
-        self._embedding_key_prefix = 'embedding_'
-        self._global_metadata_key_prefix = 'global_metadata_'
-
-        self._builtin_keys = {
-            self._primary_key: {
-                'dtype': pymilvus.DataType.VARCHAR,
-                'max_length': 256,
-                'is_primary': True,
-            },
-            'parent': {
-                'dtype': pymilvus.DataType.VARCHAR,
-                'max_length': 256,
-            },
-            'content': {
-                'dtype': pymilvus.DataType.VARCHAR,
-                'max_length': 65535,
-            },
-            'metadata': {
-                'dtype': pymilvus.DataType.VARCHAR,
-                'max_length': 65535,
-            },
-        }
-
-        self._builtin_global_metadata_desc = {
-            RAG_DOC_ID: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                           default_value=' ', max_size=512),
-            RAG_DOC_PATH: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                             default_value=' ', max_size=65535),
-            RAG_DOC_FILE_NAME: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                                  default_value=' ', max_size=128),
-            RAG_DOC_FILE_TYPE: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                                  default_value=' ', max_size=128),
-            RAG_DOC_FILE_SIZE: GlobalMetadataDesc(data_type=DataType.INT32,
-                                                  default_value=0),
-            RAG_DOC_CREATION_DATE: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                                      default_value=' ', max_size=10),
-            RAG_DOC_LAST_MODIFIED_DATE: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                                           default_value=' ', max_size=10),
-            RAG_DOC_LAST_ACCESSED_DATE: GlobalMetadataDesc(data_type=DataType.VARCHAR,
-                                                           default_value=' ', max_size=10)
-        }
-
-        self._type2milvus = [
-            pymilvus.DataType.VARCHAR,
-            pymilvus.DataType.ARRAY,
-            pymilvus.DataType.INT32,
-            pymilvus.DataType.FLOAT_VECTOR,
-            pymilvus.DataType.SPARSE_FLOAT_VECTOR,
-        ]
-
-    def __init__(self, group_embed_keys: Dict[str, Set[str]], embed: Dict[str, Callable], # noqa C901
-                 embed_dims: Dict[str, int], embed_datatypes: Dict[str, DataType],
-                 global_metadata_desc: Dict[str, GlobalMetadataDesc],
-                 uri: str, index_kwargs: Optional[Union[Dict, List]] = None, db_name: Optional[str] = 'lazyllm'):
-        self._def_constants()
-
-        self._group_embed_keys = group_embed_keys
-        self._embed = embed
-        self._uri = uri
-        self._db_name = db_name
-        self._client = pymilvus.MilvusClient(uri=self._uri)
-
-        if uri and parse.urlparse(uri).scheme.lower() not in ["unix", "http", "https", "tcp", "grpc"]:
-            self._type = 'local'
-        else:
-            self._type = 'remote'
-            try:
-                try:
-                    default_embedding_keys = next(
-                        keys for keys in self._group_embed_keys.values() if keys
-                    )
-                except StopIteration:
-                    raise ValueError('embedding keys are required for milvus standalone')
-                for group, keys in self._group_embed_keys.items():
-                    if not keys:
-                        self._group_embed_keys[group] = default_embedding_keys
-
-                if self._db_name:
-                    existing_dbs = self._client.list_databases()
-                    if self._db_name not in existing_dbs:
-                        self._client.create_database(self._db_name)
-                    self._client.using_database(self._db_name)
-            except Exception as e:
-                LOG.error(f'milvus-standalone database error {e}')
-
-        if embed_dims is None:
-            embed_dims = {}
-        if embed_datatypes is None:
-            embed_datatypes = {}
-
-        # XXX milvus 2.4.x doesn't support `default_value`
-        # https://milvus.io/docs/product_faq.md#Does-Milvus-support-specifying-default-values-for-scalar-or-vector-fields
-        if global_metadata_desc:
-            self._global_metadata_desc = global_metadata_desc | self._builtin_global_metadata_desc
-        else:
-            self._global_metadata_desc = self._builtin_global_metadata_desc
-
-        collections = self._client.list_collections()
-        for group, embed_keys in group_embed_keys.items():
-            if group in collections:
-                continue
-
-            field_list = []
-            index_params = self._client.prepare_index_params()
-
-            for key, info in self._builtin_keys.items():
-                field_list.append(pymilvus.FieldSchema(name=key, **info))
-
-            for key in embed_keys:
-                datatype = embed_datatypes.get(key)
-                if not datatype:
-                    raise ValueError(f'cannot find embedding datatype if embed [{key}] in [{embed_datatypes}]')
-
-                field_kwargs = {}
-                dim = embed_dims.get(key)  # can be empty if embedding is sparse
-                if dim:
-                    field_kwargs['dim'] = dim
-
-                field_name = self._gen_embedding_key(key)
-                field_list.append(pymilvus.FieldSchema(name=field_name, dtype=self._type2milvus[datatype],
-                                                       **field_kwargs))
-                if isinstance(index_kwargs, list):
-                    embed_key_field_name = "embed_key"
-                    for item in index_kwargs:
-                        item_key = item.get(embed_key_field_name, None)
-                        if not item_key:
-                            raise ValueError(f'cannot find `{embed_key_field_name}` in `index_kwargs` of `{field_name}`')
-                        if item_key == key:
-                            index_kwarg = item.copy()
-                            index_kwarg.pop(embed_key_field_name, None)
-                            index_params.add_index(field_name=field_name, **index_kwarg)
-                            break
-                elif isinstance(index_kwargs, dict):
-                    index_params.add_index(field_name=field_name, **index_kwargs)
-
-            if self._global_metadata_desc:
-                for key, desc in self._global_metadata_desc.items():
-                    if desc.data_type == DataType.ARRAY:
-                        if desc.element_type is None:
-                            raise ValueError(f'Milvus field [{key}]: `element_type` is required when '
-                                             '`data_type` is ARRAY.')
-                        field_args = {
-                            'element_type': self._type2milvus[desc.element_type],
-                            'max_capacity': desc.max_size,
-                        }
-                    elif desc.data_type == DataType.VARCHAR:
-                        field_args = {
-                            'max_length': desc.max_size,
-                        }
-                    else:
-                        field_args = {}
-                    field_list.append(pymilvus.FieldSchema(name=self._gen_field_key(key),
-                                                           dtype=self._type2milvus[desc.data_type],
-                                                           default_value=desc.default_value,
-                                                           **field_args))
-
-            schema = pymilvus.CollectionSchema(fields=field_list, auto_id=False, enable_dynamic_field=False)
-            self._client.create_collection(collection_name=group, schema=schema, index_params=index_params)
-
-        self._map_store = MapStore(node_groups=list(group_embed_keys.keys()), embed=embed)
-        self._load_all_nodes_to(self._map_store)
-
-    def _check_connection(self):
-        if not pymilvus.connections.has_connection(alias=self._client._using):
-            LOG.info("Milvus Store: try to reconnect...")
-            if self._type == 'local':
-                pymilvus.connections.connect(alias=self._client._using, uri=self._uri)
-            else:
-                pymilvus.connections.connect(alias=self._client._using, db_name=self._db_name, uri=self._uri)
-
-    @override
-    def update_nodes(self, nodes: List[DocNode]) -> None:
-        parallel_do_embedding(self._embed, [], nodes, self._group_embed_keys)
-        group_embed_dict = defaultdict(list)
-        for node in nodes:
-            data = self._serialize_node_partial(node)
-            group_embed_dict[node._group].append(data)
-        self._check_connection()
-        for group_name, data in group_embed_dict.items():
-            for i in range(0, len(data), MILVUS_UPSERT_BATCH_SIZE):
-                self._client.upsert(collection_name=group_name, data=data[i:i + MILVUS_UPSERT_BATCH_SIZE])
-        self._map_store.update_nodes(nodes)
-
-    @override
-    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
-        self._map_store.update_doc_meta(doc_id=doc_id, metadata=metadata)
-        for group in self.activated_groups():
-            nodes = self.get_nodes(group_name=group, doc_ids=[doc_id])
-            self.update_nodes(nodes)
-
-    @override
-    def remove_nodes(self, doc_ids: List[str] = None, group_name: Optional[str] = None,
-                     uids: Optional[List[str]] = None) -> None:
-        self._check_connection()
-        nodes = self._map_store.get_nodes(group_name=group_name, doc_ids=doc_ids, uids=uids)
-        group2uids = defaultdict(list)
-        for node in nodes:
-            group2uids[node._group].append(node._uid)
-        for group, uids in group2uids.items():
-            if self._client.has_collection(group):
-                self._client.delete(collection_name=group,
-                                    filter=f'{self._primary_key} in {uids}')
-                self._map_store.remove_nodes(doc_ids=doc_ids, uids=uids)
-        return
-
-    @override
-    def get_nodes(self, group_name: Optional[str] = None, uids: Optional[List[str]] = None,
-                  doc_ids: Optional[Set] = None, **kwargs) -> List[DocNode]:
-        return self._map_store.get_nodes(group_name, uids, doc_ids, **kwargs)
-
-    @override
-    def activate_group(self, group_names: Union[str, List[str]]) -> bool:
-        return self._map_store.activate_group(group_names)
-
-    @override
-    def activated_groups(self):
-        return self._map_store.activated_groups()
-
-    @override
-    def is_group_active(self, name: str) -> bool:
-        return self._map_store.is_group_active(name)
-
-    @override
-    def all_groups(self) -> List[str]:
-        return self._map_store.all_groups()
-
-    @override
-    def register_index(self, type: str, index: IndexBase) -> None:
-        self._map_store.register_index(type, index)
-
-    @override
-    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
-        if type is None:
-            type = 'default'
-        return self._map_store.get_index(type)
-
-    @override
-    def clear_cache(self, group_names: Optional[List[str]] = None):
-        if group_names is None:
-            for group_name in self.activated_groups():
-                if self._client.has_collection(group_name):
-                    self._client.drop_collection(collection_name=group_name)
-            self._map_store.clear_cache()
-        elif isinstance(group_names, str):
-            group_names = [group_names]
-        elif isinstance(group_names, (tuple, list, set)):
-            group_names = list(group_names)
-        else:
-            raise TypeError(f"Invalid type {type(group_names)} for group_names, expected list of str")
-        for group_name in group_names:
-            if self._client.has_collection(group_name):
-                self._client.drop_collection(collection_name=group_name)
-        self._map_store.clear_cache(group_names)
-
-    @override
-    def query(self, query: str, group_name: str, similarity_name: Optional[str] = None,
-              similarity_cut_off: Optional[Union[float, Dict[str, float]]] = float('-inf'),
-              topk: int = 10, embed_keys: Optional[List[str]] = None,
-              filters: Optional[Dict[str, Union[List, set]]] = None, **kwargs) -> List[DocNode]:
-        if similarity_name is not None:
-            raise ValueError('`similarity` MUST be None when Milvus backend is used.')
-
-        if not embed_keys:
-            raise ValueError('empty or None `embed_keys` is not supported.')
-
-        filter_str = self._construct_filter_expr(filters) if filters else ""
-
-        uid_score = {}
-        self._check_connection()
-        for key in embed_keys:
-            embed_func = self._embed.get(key)
-            query_embedding = embed_func(query)
-            results = self._client.search(
-                collection_name=group_name, data=[query_embedding], limit=topk,
-                anns_field=self._gen_embedding_key(key), filter=filter_str
-            )
-            # we have only one `data` for search() so there is only one result in `results`
-            if len(results) != 1:
-                raise ValueError(f'number of results [{len(results)}] != expected [1]')
-            sim_cut_off = similarity_cut_off if isinstance(similarity_cut_off, float) else similarity_cut_off[key]
-
-            for result in results[0]:
-                if result['distance'] < sim_cut_off:
-                    continue
-                uid_score[result['id']] = result['distance'] if result['id'] not in uid_score \
-                    else max(uid_score[result['id']], result['distance'])
-        uids = list(uid_score.keys())
-        nodes = self._map_store.get_nodes(uids=uids)
-        return [node.with_sim_score(uid_score[node._uid]) for node in nodes]
-
-    # ----- internal helper functions ----- #
-
-    def _gen_embedding_key(self, k: str) -> str:
-        return self._embedding_key_prefix + k
-
-    def _gen_field_key(self, k: str) -> str:
-        return self._global_metadata_key_prefix + k
-
-    def _load_all_nodes_to(self, store: StoreBase) -> None:
-        uid2node = {}
-        current_version = version.parse(pymilvus.__version__)
-        use_iterator = current_version >= version.parse("2.4.11")
-        LOG.info(f'the current pymilvus version is {pymilvus.__version__}, use_iterator is {use_iterator}')
-        if not use_iterator:
-            LOG.warning(
-                'pymilvus version is lower than 2.4.11, '
-                'we recommend to upgrade pymilvus to 2.4.11 to support larger data size'
-            )
-
-        for group_name in self._client.list_collections():
-            if use_iterator:
-                collection_desc = self._client.describe_collection(collection_name=group_name)
-                field_names = [field.get("name") for field in collection_desc.get('fields', [])]
-                iterator = self._client.query_iterator(
-                    collection_name=group_name,
-                    batch_size=MILVUS_PAGINATION_OFFSET,
-                    filter=f'{self._primary_key} != ""',
-                    output_fields=field_names
-                )
-                results = []
-                while True:
-                    result = iterator.next()
-                    if not result:
-                        iterator.close()
-                        break
-                    results += result
-            else:
-                results = self._client.query(collection_name=group_name,
-                                filter=f'{self._primary_key} != ""')    # noqa: E128
-            for result in results:
-                node = self._deserialize_node_partial(result)
-                node._group = group_name
-                uid2node.setdefault(node._uid, node)
-
-        # construct DocNode::parent and DocNode::children
-        for node in uid2node.values():
-            if node.parent:
-                parent_uid = node.parent
-                parent_node = uid2node.get(parent_uid)
-                if not parent_node:
-                    raise ValueError(f'cannot find parent node [{parent_uid}]')
-                node.parent = parent_node
-                parent_node.children[node._group].append(node)
-
-        store.update_nodes(list(uid2node.values()))
-
-    def _construct_filter_expr(self, filters: Dict[str, Union[str, int, List, Set]]) -> str:
-        ret_str = ""
-        for name, candidates in filters.items():
-            desc = self._global_metadata_desc.get(name)
-            if not desc:
-                raise ValueError(f'cannot find desc of field [{name}]')
-
-            key = self._gen_field_key(name)
-            if isinstance(candidates, str):
-                candidates = [candidates]
-            elif (not isinstance(candidates, list)) and (not isinstance(candidates, set)):
-                candidates = list(candidates)
-            if desc.data_type == DataType.ARRAY:
-                # https://github.com/milvus-io/milvus/discussions/35279
-                # `array_contains_any` requires milvus >= 2.4.3 and is not supported in local(aka lite) mode.
-                ret_str += f'array_contains_any({key}, {candidates}) and '
-            else:
-                ret_str += f'{key} in {candidates} and '
-
-        if len(ret_str) > 0:
-            return ret_str[:-5]  # truncate the last ' and '
-
-        return ret_str
-
-    def _serialize_node_partial(self, node: DocNode) -> Dict:
-        res = {
-            'uid': node._uid,
-            'content': obj2str(node._content),
-            'parent': node.parent._uid if node.parent else '',
-            'metadata': obj2str(node._metadata),
-        }
-
-        for k, v in node.embedding.items():
-            res[self._gen_embedding_key(k)] = v
-
-        for name, desc in self._global_metadata_desc.items():
-            val = node.global_metadata.get(name, desc.default_value)
-            if val is not None:
-                res[self._gen_field_key(name)] = val
-
-        return res
-
-    def _deserialize_node_partial(self, result: Dict) -> DocNode:
-        record = copy.copy(result)
-
-        doc = DocNode(
-            uid=record.pop('uid'),
-            content=str2obj(record.pop('content')),
-            parent=record.pop('parent'),  # this is the parent's uid
-            metadata=str2obj(record.pop('metadata')),
-        )
-
-        for k, v in record.items():
-            if k.startswith(self._embedding_key_prefix):
-                doc.embedding[k[len(self._embedding_key_prefix):]] = v
-            elif k.startswith(self._global_metadata_key_prefix):
-                if doc.is_root_node:
-                    doc.global_metadata.update({k[len(self._global_metadata_key_prefix):]: v})
-        return doc
diff --git a/lazyllm/tools/rag/store/segment/__init__.py b/lazyllm/tools/rag/store/segment/__init__.py
new file mode 100644
index 000000000..d4d1b3550
--- /dev/null
+++ b/lazyllm/tools/rag/store/segment/__init__.py
@@ -0,0 +1,3 @@
+from .opensearch_store import OpenSearchStore
+
+__all__ = ["OpenSearchStore"]
diff --git a/lazyllm/tools/rag/store/segment/opensearch_store.py b/lazyllm/tools/rag/store/segment/opensearch_store.py
new file mode 100644
index 000000000..907294695
--- /dev/null
+++ b/lazyllm/tools/rag/store/segment/opensearch_store.py
@@ -0,0 +1,176 @@
+import json
+import urllib3
+
+from typing import Dict, List, Union, Optional
+
+from lazyllm import LOG
+from lazyllm.common import override
+from lazyllm.thirdparty import opensearchpy
+
+from ..store_base import LazyLLMStoreBase, StoreCapability, INSERT_BATCH_SIZE
+from ...global_metadata import RAG_DOC_ID, RAG_KB_ID
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+DEFAULT_INDEX_BODY = {
+    'settings': {
+        'index': {
+            'number_of_shards': 4,
+            'number_of_replicas': 1,
+            'refresh_interval': '1s',
+        }
+    },
+    'mappings': {
+        'dynamic': 'strict',
+        'properties': {
+            'uid': {'type': 'keyword'},
+            'doc_id': {'type': 'keyword'},
+            'group': {'type': 'keyword'},
+            'kb_id': {'type': 'keyword'},
+            'content': {'type': 'text', 'index': False, 'store': True},
+            'answer': {'type': 'text', 'index': False, 'store': True},
+            'meta': {'type': 'text', 'index': False, 'store': True},
+            'global_meta': {'type': 'text', 'index': False, 'store': True},
+            'type': {'type': 'keyword', 'store': True},
+            'number': {'type': 'integer', 'store': True},
+            'excluded_embed_metadata_keys': {'type': 'keyword', 'store': True},
+            'excluded_llm_metadata_keys': {'type': 'keyword', 'store': True},
+            'parent': {'type': 'keyword', 'store': True},
+            'image_keys': {'type': 'keyword', 'store': True},
+        }
+    }
+}
+
+
+class OpenSearchStore(LazyLLMStoreBase):
+    capability = StoreCapability.SEGMENT
+    need_embedding = False
+    supports_index_registration = False
+
+    def __init__(self, uris: List[str], client_kwargs: Optional[Dict] = None,
+                 index_kwargs: Optional[Union[Dict, List]] = None, **kwargs):
+        if isinstance(uris, str):
+            uris = [uris]
+        self._uris = uris
+        self._client_kwargs = client_kwargs or {}
+        self._index_kwargs = index_kwargs or DEFAULT_INDEX_BODY
+        self._primary_key = 'uid'
+
+    @property
+    def dir(self):
+        return None
+
+    @override
+    def connect(self, *args, **kwargs) -> None:
+        if self._client_kwargs.get('user') and self._client_kwargs.get('password'):
+            self._client_kwargs['http_auth'] = (self._client_kwargs.pop('user'), self._client_kwargs.pop('password'))
+        self._client = opensearchpy.OpenSearch(hosts=self._uris, **self._client_kwargs)
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
+        if not data: return
+        try:
+            if not self._client.indices.exists(index=collection_name):
+                self._client.indices.create(index=collection_name, body=self._index_kwargs)
+            for i in range(0, len(data), INSERT_BATCH_SIZE):
+                bulk_data = []
+                batch_data = data[i:i + INSERT_BATCH_SIZE]
+                for segment in batch_data:
+                    segment = self._serialize_node(segment)
+                    bulk_data.append({'index': {'_index': collection_name, '_id': segment.get(self._primary_key)}})
+                    bulk_data.append(segment)
+                response = self._client.bulk(index=collection_name, body=bulk_data)
+                if response['errors']:
+                    raise ValueError(f"Error upserting data to OpenSearch: {response['items']}")
+            self._client.indices.refresh(index=collection_name)
+            return True
+        except Exception as e:
+            LOG.error(f"[OpenSearchStore - upsert] Error upserting data to OpenSearch: {e}")
+            return False
+
+    @override
+    def delete(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> bool:
+        try:
+            if not self._client.indices.exists(index=collection_name):
+                LOG.warning(f"[OpenSearchStore - delete] Index {collection_name} does not exist")
+                return True
+            if not criteria:
+                self._client.indices.delete(index=collection_name)
+                return True
+            else:
+                resp = self._client.delete_by_query(index=collection_name,
+                                                    body=self._construct_criteria(criteria), refresh=True)
+                if resp.get('failures'):
+                    raise ValueError(f"Error deleting data from OpenSearch: {resp['failures']}")
+                return True
+        except Exception as e:
+            LOG.error(f"[OpenSearchStore - delete] Error deleting data from OpenSearch: {e}")
+            return False
+
+    @override
+    def get(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> List[dict]:
+        try:
+            if not self._client.indices.exists(index=collection_name):
+                LOG.warning(f"[OpenSearchStore - get] Index {collection_name} does not exist")
+                return []
+            results: List[dict] = []
+            criteria = dict(criteria) if criteria else {}
+            if criteria and self._primary_key in criteria:
+                vals = criteria.pop(self._primary_key)
+                if not isinstance(vals, list):
+                    vals = [vals]
+                body = {'ids': vals}
+                resp = self._client.mget(index=collection_name, body=body)
+                for doc in resp['docs']:
+                    if doc.get('found', False):
+                        src = doc['_source']
+                        src['uid'] = doc['_id']
+                        results.append(self._deserialize_node(src))
+            else:
+                query = self._construct_criteria(criteria)
+                for hit in opensearchpy.helpers.scan(client=self._client, index=collection_name, query=query,
+                                                     scroll='2m', size=500, preserve_order=True):
+                    src = hit['_source']
+                    src['uid'] = hit['_id']
+                    results.append(self._deserialize_node(src))
+            return results
+        except Exception as e:
+            LOG.error(f"[OpenSearchStore - get] Error getting data from OpenSearch: {e}")
+            return []
+
+    @override
+    def search(self, collection_name: str, query: str, topk: int, **kwargs) -> List[dict]:
+        raise NotImplementedError("[OpenSearchStore - search] Not implemented yet")
+
+    def _serialize_node(self, segment: dict):
+        seg = dict(segment)
+        seg.pop('embedding', None)
+        seg['global_meta'] = json.dumps(seg.get('global_meta', {}), ensure_ascii=False)
+        seg['meta'] = json.dumps(seg.get('meta', {}), ensure_ascii=False)
+        seg['image_keys'] = json.dumps(seg.get('image_keys', []), ensure_ascii=False)
+        return seg
+
+    def _deserialize_node(self, segment: dict) -> dict:
+        segment['meta'] = json.loads(segment.get('meta', "{}"))
+        segment['global_meta'] = json.loads(segment.get('global_meta', "{}"))
+        segment['image_keys'] = json.loads(segment.get('image_keys', "[]"))
+        return segment
+
+    def _construct_criteria(self, criteria: Optional[dict] = None) -> dict:
+        criteria = dict(criteria) if criteria else {}
+        if not criteria:
+            return {}
+        if self._primary_key in criteria:
+            vals = criteria.pop(self._primary_key)
+            if not isinstance(vals, list):
+                vals = [vals]
+            return {'query': {'ids': {'values': vals}}}
+        else:
+            must_clauses = []
+            if RAG_DOC_ID in criteria:
+                must_clauses.append({'terms': {'doc_id': criteria.pop(RAG_DOC_ID)}})
+            if RAG_KB_ID in criteria:
+                must_clauses.append({'term': {'kb_id': criteria.pop(RAG_KB_ID)}})
+            if 'parent' in criteria:
+                must_clauses.append({'terms': {'parent': criteria.pop('parent')}})
+            return {'query': {'bool': {'must': must_clauses}}}
diff --git a/lazyllm/tools/rag/store/sensecore_store.py b/lazyllm/tools/rag/store/sensecore_store.py
deleted file mode 100644
index 0325bb23a..000000000
--- a/lazyllm/tools/rag/store/sensecore_store.py
+++ /dev/null
@@ -1,549 +0,0 @@
-import os
-import json
-import copy
-import uuid
-import time
-import requests
-
-from pydantic import BaseModel, Field
-from urllib.parse import urljoin
-from typing import Optional, List, Dict, Any, Union, Set
-
-from .store_base import StoreBase, LAZY_ROOT_NAME, BUILDIN_GLOBAL_META_DESC, IMAGE_PATTERN, INSERT_BATCH_SIZE
-from .utils import upload_data_to_s3, download_data_from_s3, fibonacci_backoff, create_file_path
-
-from ..index_base import IndexBase
-from ..data_type import DataType
-from ..doc_node import ImageDocNode, QADocNode, DocNode
-from ..global_metadata import GlobalMetadataDesc, RAG_DOC_ID, RAG_DOC_KB_ID
-
-from lazyllm import warp, pipeline, LOG, config
-from lazyllm.common import override
-from lazyllm.thirdparty import boto3
-
-
-class Segment(BaseModel):
-    segment_id: str
-    dataset_id: Optional[str] = "__default__"
-    document_id: str
-    group: str
-    content: Optional[str] = ""
-    meta: str
-    global_meta: str
-    excluded_embed_metadata_keys: Optional[List[str]] = Field(default_factory=list)
-    excluded_llm_metadata_keys: Optional[List[str]] = Field(default_factory=list)
-    parent: Optional[str] = ""
-    children: Dict[str, Any] = Field(default_factory=dict)
-    embedding_state: Optional[List[str]] = Field(default_factory=list)
-    answer: Optional[str] = ""
-    image_keys: Optional[List[str]] = Field(default_factory=list)
-    number: Optional[int] = 0
-
-
-class SenseCoreStore(StoreBase):
-    def __init__(self, group_embed_keys: Dict[str, Set[str]],
-                 global_metadata_desc: Dict[str, GlobalMetadataDesc] = None,
-                 kb_id: str = "__default__", uri: str = "", **kwargs):
-        self._uri = uri
-        self._kb_id = kb_id
-        self._group_embed_keys = group_embed_keys
-        self._s3_config = kwargs.get("s3_config")
-        self._image_url_config = kwargs.get("image_url_config")
-        self._activated_groups = set()
-        if global_metadata_desc:
-            self._global_metadata_desc = global_metadata_desc | BUILDIN_GLOBAL_META_DESC
-        else:
-            self._global_metadata_desc = BUILDIN_GLOBAL_META_DESC
-
-        if self._connect_store(uri):
-            LOG.info(f"Connected to doc store {self._uri}")
-        else:
-            raise ConnectionError(f"Failed to connect to doc store {self._uri}")
-
-    @override
-    def _connect_store(self, uri: str) -> bool:
-        # TODO get the url for testing connection
-        self._check_s3()
-        return True
-
-    def _check_s3(self):
-        obj_key = "lazyllm/warmup.txt"
-        upload_data_to_s3("warmup", bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                          aws_access_key_id=self._s3_config["access_key"],
-                          aws_secret_access_key=self._s3_config["secret_access_key"],
-                          use_minio=self._s3_config["use_minio"], endpoint_url=self._s3_config["endpoint_url"])
-        return
-
-    def _serialize_node(self, node: DocNode) -> Dict:  # noqa: C901
-        """ serialize node to dict """
-        segment = Segment(segment_id=node._uid, dataset_id=node.global_metadata.get(RAG_DOC_KB_ID, None) or self._kb_id,
-                          document_id=node.global_metadata.get(RAG_DOC_ID), group=node._group,
-                          meta=json.dumps(node._metadata, ensure_ascii=False),
-                          excluded_embed_metadata_keys=node.excluded_embed_metadata_keys,
-                          excluded_llm_metadata_keys=node.excluded_llm_metadata_keys,
-                          global_meta=json.dumps(node.global_metadata, ensure_ascii=False),
-                          children={group: {"ids": [n._uid for n in c_l]} for group, c_l in node.children.items()},
-                          embedding_state=node._embedding_state, number=node._metadata.get("store_num", 0))
-        if node.parent:
-            if isinstance(node.parent, DocNode):
-                segment.parent = node.parent._uid
-            elif isinstance(node.parent, str):
-                segment.parent = node.parent
-
-        if node._group == LAZY_ROOT_NAME:
-            # content is root, process image key
-            content = json.dumps(node._content, ensure_ascii=False)
-            # image extract
-            matches = IMAGE_PATTERN.findall(content)
-            for title, image_path in matches:
-                if image_path.startswith("lazyllm"):
-                    continue
-                image_file_name = os.path.basename(image_path)
-                obj_key = f"lazyllm/images/{image_file_name}"
-                try:
-                    prefix = config['image_path_prefix']
-                except Exception:
-                    prefix = os.getenv("RAG_IMAGE_PATH_PREFIX", "")
-                file_path = create_file_path(path=image_path, prefix=prefix)
-                try:
-                    with open(file_path, "rb") as f:
-                        upload_data_to_s3(f.read(), bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                                          aws_access_key_id=self._s3_config["access_key"],
-                                          aws_secret_access_key=self._s3_config["secret_access_key"],
-                                          use_minio=self._s3_config["use_minio"],
-                                          endpoint_url=self._s3_config["endpoint_url"])
-                        content = content.replace(image_path, obj_key)
-                except FileNotFoundError:
-                    LOG.error(f"Cannot find image path: {image_path} (local path {file_path}), skip...")
-                except Exception as e:
-                    LOG.error(f"Error when uploading `{image_path}` {e!r}")
-            node._content = json.loads(content)
-            # image extract
-            matches = IMAGE_PATTERN.findall(content)
-            for title, image_path in matches:
-                segment.image_keys.append(image_path)
-
-            # upload content
-            obj_key = f"lazyllm/lazyllm_root/{node._uid}.json"
-            upload_data_to_s3(content.encode('utf-8'), bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                              aws_access_key_id=self._s3_config["access_key"],
-                              aws_secret_access_key=self._s3_config["secret_access_key"],
-                              use_minio=self._s3_config["use_minio"], endpoint_url=self._s3_config["endpoint_url"])
-            segment.content = obj_key
-        else:
-            segment.content = node._content
-
-            content = json.dumps(node._content, ensure_ascii=False)
-            # image extract
-            matches = IMAGE_PATTERN.findall(content)
-            for title, image_path in matches:
-                segment.image_keys.append(image_path)
-
-        if isinstance(node, ImageDocNode):
-            image_path = node._image_path
-            image_file_name = os.path.basename(image_path)
-            obj_key = f"lazyllm/images/{image_file_name}"
-            with open(image_path, "rb") as f:
-                upload_data_to_s3(f.read(), bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                                  aws_access_key_id=self._s3_config["access_key"],
-                                  aws_secret_access_key=self._s3_config["secret_access_key"],
-                                  use_minio=self._s3_config["use_minio"], endpoint_url=self._s3_config["endpoint_url"])
-                segment.image_keys = [obj_key]
-        elif isinstance(node, QADocNode):
-            answer = node._answer
-            # image extract
-            matches = IMAGE_PATTERN.findall(answer)
-            for title, image_path in matches:
-                if image_path.startswith("lazyllm"):
-                    continue
-                image_file_name = os.path.basename(image_path)
-                obj_key = f"lazyllm/images/{image_file_name}"
-                try:
-                    prefix = config['image_path_prefix']
-                except Exception:
-                    prefix = os.getenv("RAG_IMAGE_PATH_PREFIX", "")
-                file_path = create_file_path(path=image_path, prefix=prefix)
-                try:
-                    with open(file_path, "rb") as f:
-                        upload_data_to_s3(f.read(), bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                                          aws_access_key_id=self._s3_config["access_key"],
-                                          aws_secret_access_key=self._s3_config["secret_access_key"],
-                                          use_minio=self._s3_config["use_minio"],
-                                          endpoint_url=self._s3_config["endpoint_url"])
-                        answer = answer.replace(image_path, obj_key)
-                except FileNotFoundError:
-                    LOG.error(f"Cannot find image path: {image_path} (local path {file_path}), skip...")
-                except Exception as e:
-                    LOG.error(f"Error when uploading `{image_path}` {e!r}")
-            node._answer = answer
-
-            matches = IMAGE_PATTERN.findall(node._answer)
-            for title, image_path in matches:
-                segment.image_keys.append(image_path)
-
-            segment.answer = node._answer
-        return segment.model_dump()
-
-    def _deserialize_node(self, segment: Dict) -> DocNode:
-        """ deserialize node from dict """
-        if len(segment.get("answer", "")):
-            node = QADocNode(query=segment["content"], answer=segment["answer"], uid=segment["segment_id"],
-                             group=segment["group"], metadata=json.loads(segment["meta"]),
-                             global_metadata=json.loads(segment["global_meta"]), parent=segment["parent"])
-        else:
-            node = DocNode(uid=segment["segment_id"], content=segment["content"], group=segment["group"],
-                           metadata=json.loads(segment["meta"]), global_metadata=json.loads(segment["global_meta"]),
-                           parent=segment["parent"])
-        node.excluded_llm_metadata_keys = segment["excluded_embed_metadata_keys"]
-        node.excluded_embed_metadata_keys = segment["excluded_llm_metadata_keys"]
-        if segment["children"]:
-            children = {group: item["ids"] for group, item in segment["children"].items()}
-        else:
-            children = {}
-        node.children = children
-        if node._group == LAZY_ROOT_NAME and node._content.startswith("lazyllm/lazyllm_root/"):
-            obj_key = node._content
-            content = download_data_from_s3(bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                                            aws_access_key_id=self._s3_config["access_key"],
-                                            aws_secret_access_key=self._s3_config["secret_access_key"],
-                                            use_minio=self._s3_config["use_minio"],
-                                            endpoint_url=self._s3_config["endpoint_url"], encoding="utf-8")
-            node._content = json.loads(content)
-        if segment.get("metadata", {}) is not None:
-            node = node.with_sim_score(score=segment.get("metadata", {}).get("score", 0))
-        return node
-
-    def _create_filters_str(self, filters: Dict[str, Union[str, int, List, Set]]) -> str:
-        ret_str = ""
-        for name, candidates in filters.items():
-            desc = self._global_metadata_desc.get(name)
-            if not desc:
-                raise ValueError(f'cannot find desc of field [{name}]')
-            key = name
-            if isinstance(candidates, str):
-                candidates = [candidates]
-            if (not isinstance(candidates, List)) and (not isinstance(candidates, Set)):
-                candidates = list(candidates)
-            if desc.data_type == DataType.ARRAY:
-                ret_str += f'array_contains_any({key}, {candidates}) and '
-            else:
-                ret_str += f'{key} in {candidates} and '
-
-        if len(ret_str) > 0:
-            return ret_str[:-5]  # truncate the last ' and '
-        return ret_str
-
-    @override
-    def update_nodes(self, nodes: List[DocNode]):
-        """ update nodes to the store """
-        filtered_nodes = []
-        for node in nodes:
-            if isinstance(node, QADocNode):
-                kb_id = node.global_metadata.get(RAG_DOC_KB_ID)
-                source_file = node.metadata["source_file_name"]
-                source_chunk = node.metadata["source_chunk"]
-                target_nodes = self.query(query=source_chunk, group_name="block", topk=1, embed_keys=["bge_m3_dense"],
-                                          filters={"kb_id": [kb_id], "file_name": [source_file]})
-                if not len(target_nodes):
-                    LOG.warning(f"cannot find file for qa node: source_file {source_file}, chunk {source_chunk}")
-                    continue
-            filtered_nodes.append(node)
-        if not filtered_nodes:
-            LOG.warning("no nodes to update")
-            return
-        group_cnt = {}
-        for node in filtered_nodes:
-            if node._group not in group_cnt:
-                group_cnt[node._group] = 1
-            node._metadata["store_num"] = group_cnt[node._group]
-            group_cnt[node._group] += 1
-
-        with pipeline() as insert_ppl:
-            insert_ppl.get_ids = warp(self._upload_nodes_and_insert).aslist
-            insert_ppl.check_status = warp(self._check_insert_job_status)
-
-        batched_nodes = [
-            filtered_nodes[i:i + INSERT_BATCH_SIZE] for i in range(0, len(filtered_nodes), INSERT_BATCH_SIZE)]
-        insert_ppl(batched_nodes)
-        return
-
-    def _upload_nodes_and_insert(self, segments: List[DocNode]) -> str:
-        try:
-            job_id = str(uuid.uuid4())
-            groups = set()
-            for node in segments:
-                groups.add(node._group)
-            groups = list(groups)
-
-            segments = [self._serialize_node(node) for node in segments]
-            dataset_id = None
-            for segment in segments:
-                dataset_id = segment.get("dataset_id", None)
-                break
-            obj_key = f"lazyllm/segments/{job_id}.jsonl"
-
-            upload_data_to_s3(data=segments, bucket_name=self._s3_config["bucket_name"], object_key=obj_key,
-                              aws_access_key_id=self._s3_config["access_key"],
-                              aws_secret_access_key=self._s3_config["secret_access_key"],
-                              use_minio=self._s3_config["use_minio"], endpoint_url=self._s3_config["endpoint_url"])
-            url = urljoin(self._uri, "v1/writerSegmentJob:submit")
-            params = {"writer_segment_job_id": job_id}
-            headers = {"Accept": "application/json", "Content-Type": "application/json"}
-            payload = {"dataset_id": dataset_id or self._kb_id, "file_key": obj_key, "groups": groups}
-
-            response = requests.post(url, params=params, headers=headers, json=payload)
-            response.raise_for_status()
-            LOG.info(f"SenseCore Store: insert task {job_id} submitted")
-        except Exception as e:
-            LOG.error(f"SenseCore Store: insert task {job_id} failed: {e}")
-            raise e
-        return job_id
-
-    def _check_insert_job_status(self, job_id: str) -> None:
-        """ check if the insert task is finished """
-        url = urljoin(self._uri, f"v1/writerSegmentJobs/{job_id}")
-        headers = {"Accept": "application/json"}
-        for wait_time in fibonacci_backoff(max_retries=15):
-            response = requests.get(url, headers=headers)
-            response.raise_for_status()
-            status = response.json()["state"]
-            if status == 2:
-                LOG.info(f"SenseCore Store: insert task {job_id} finished")
-                return
-            elif status == 3:
-                raise Exception(f"Insert task {job_id} failed")
-            else:
-                time.sleep(wait_time)
-        raise Exception(f"Insert task {job_id} failed after seconds")
-
-    @override
-    def remove_nodes(self, group_name: Optional[str] = None, dataset_id: Optional[str] = None,
-                     doc_ids: Optional[List[str]] = None, uids: Optional[List[str]] = None) -> None:
-        """ remove nodes from the store by doc_ids or uids """
-        try:
-            url = urljoin(self._uri, "v1/segments:bulkDelete")
-            headers = {"Accept": "*/*", "Content-Type": "application/json"}
-            if doc_ids:
-                payload = {"dataset_id": dataset_id or self._kb_id, "document_ids": doc_ids}
-            else:
-                payload = {"dataset_id": dataset_id or self._kb_id, "segment_ids": uids}
-            if group_name:
-                payload["group"] = group_name
-            response = requests.post(url, headers=headers, json=payload)
-            response.raise_for_status()
-        except Exception as e:
-            LOG.error(f"SenseCore Store: remove task failed: {e}")
-            raise e
-        return
-
-    @override
-    def get_nodes(self, group_name: Optional[str] = None, uids: Optional[List[str]] = None,  # noqa: C901
-                  doc_ids: Optional[Set] = None, dataset_id: Optional[str] = None,
-                  display: bool = False) -> List[DocNode]:
-        """ get nodes from the store """
-        if not (uids or group_name):
-            raise ValueError("group_name or uids must be provided")
-        if doc_ids and len(doc_ids) > 1:
-            raise ValueError("[Sensecore Store] - get_nodes: doc_ids must be a single value")
-        doc_id = doc_ids[0] if doc_ids else None
-        dataset_id = dataset_id or self._kb_id
-
-        if doc_id and not uids:
-            url = urljoin(self._uri, f"v1/datasets/{dataset_id}/documents/{doc_id}/segments:search")
-        else:
-            url = urljoin(self._uri, "v1/segments:scroll")
-
-        headers = {"Accept": "application/json", "Content-Type": "application/json"}
-        payload = {"dataset_id": dataset_id}
-        if group_name:
-            payload["group"] = group_name
-        if doc_id:
-            payload["document_id"] = doc_id
-        if uids:
-            payload["segment_ids"] = uids
-        else:
-            payload["page_size"] = 100
-        segments = []
-        while True:
-            response = requests.post(url, headers=headers, json=payload)
-            if response.status_code != 200:
-                LOG.warning(f"SenseCore Store: get node task failed: {response.text}")
-                break
-            data = response.json()
-            batch = data.get("segments", [])
-            if not batch:
-                break
-            segments.extend(batch)
-            next_page_token = data.get('next_page_token')
-            if not next_page_token:
-                break
-            payload['page_token'] = next_page_token
-        if doc_ids:
-            segments = [segment for segment in segments if segment['document_id'] in doc_ids]
-        if display:
-            segments = self._apply_display(segments)
-        return [self._deserialize_node(s) for s in segments]
-
-    def _apply_display(self, segments: List[dict]) -> List[dict]:
-        out = []
-        for s in segments:
-            if not s.get('is_active', True):
-                continue
-            if s.get('display_content'):
-                s['content'] = s['display_content']
-            out.append(s)
-        return out
-
-    def _multi_modal_process(self, query: str, images: List[str]):
-        urls = []
-        s3 = boto3.client('s3', aws_access_key_id=self._image_url_config["access_key"],
-                          aws_secret_access_key=self._image_url_config["secret_access_key"],
-                          endpoint_url=self._image_url_config["endpoint_url"])
-        for image in images:
-            query = query + "<image>\n"
-            url = s3.generate_presigned_url(ClientMethod='get_object',
-                                            Params={'Bucket': self._image_url_config["bucket_name"], 'Key': image},
-                                            ExpiresIn=3600)
-            urls.append(url)
-        return query, urls
-
-    @override
-    def query(self, query: str, group_name: str, topk: int = 10, embed_keys: Optional[List[str]] = None,  # noqa: C901
-              filters: Optional[Dict[str, Union[str, int, List, Set]]] = None, **kwargs) -> List[DocNode]:
-        """ search nodes from the store """
-        try:
-            if not embed_keys:
-                raise ValueError("[Sensecore Store] Query: embed_keys must be provided")
-            url = urljoin(self._uri, "v1/segments:hybrid")
-            headers = {"Accept": "application/json", "Content-Type": "application/json"}
-
-            original_filters = copy.deepcopy(filters)
-            if group_name == 'qa':
-                filters = {"kb_id": filters.get("kb_id", [])}
-            filter_str = self._create_filters_str(filters) if filters else None
-
-            dataset_ids = []
-            if filters:
-                for name, candidates in filters.items():
-                    desc = self._global_metadata_desc.get(name)
-                    if not desc:
-                        raise ValueError(f'cannot find desc of field [{name}]')
-                    key = name
-                    if key == "kb_id":
-                        if isinstance(candidates, str):
-                            candidates = [candidates]
-                        if (not isinstance(candidates, List)) and (not isinstance(candidates, Set)):
-                            candidates = list(candidates)
-                        dataset_ids = candidates
-
-            if dataset_ids:
-                hybrid_search_datasets = [{"dataset_id": dataset_id} for dataset_id in dataset_ids]
-            else:
-                LOG.error(f"SenseCore Store: no dataset_id provided, please check your filters: {filters}")
-                return []
-
-            images = kwargs.get("images", [])
-            if images:
-                query, images = self._multi_modal_process(query, images)
-
-            nodes = []
-            for embed_key in embed_keys:
-                payload = {"query": query, "hybrid_search_datasets": hybrid_search_datasets, "hybrid_search_type": 2,
-                           "top_k": topk, "filters": filter_str, "group": group_name, "embedding_model": embed_key,
-                           "images": images}
-                LOG.info(f"[Sensecore Store]: query request body: {payload}.")
-                response = requests.post(url, headers=headers, json=payload)
-                response.raise_for_status()
-                segments = response.json()['segments']
-                segments = [s for s in segments if s['is_active']]
-                for s in segments:
-                    if len(s.get('display_content', '')):
-                        s['content'] = s['display_content']
-                if group_name == 'qa':
-                    for segment in segments:
-                        node = self._deserialize_node(segment)
-                        source_file = node.metadata.get("source_file_name", "")
-                        if not source_file:
-                            continue
-                        source_chunk = node.metadata.get("source_chunk", "")
-                        original_filters["file_name"] = [source_file]
-                        target_nodes = self.query(query=source_chunk, group_name="block", topk=1,
-                                                  embed_keys=["bge_m3_dense"], filters=original_filters)
-                        if len(target_nodes):
-                            node.global_metadata.update(target_nodes[0].global_metadata)
-                            node.metadata.update(target_nodes[0].metadata)
-                            nodes.append(node)
-                else:
-                    nodes.extend([self._deserialize_node(node) for node in segments])
-            return nodes
-        except Exception as e:
-            LOG.error(f"SenseCore Store: query task failed: {e}")
-            raise e
-
-    @override
-    def register_index(self, type: str, index: IndexBase) -> None:
-        """ register index to the store (for store that support hook only)"""
-        raise NotImplementedError("register_index is not supported for SenseCoreStore."
-                                  "Please use register_index for store that support hook")
-
-    @override
-    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
-        """ get registered index from the store """
-        raise NotImplementedError('get_index is not supported for SenseCoreStore.')
-
-    @override
-    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
-        """ update doc meta """
-        # TODO 性能优化
-        dataset_id = metadata.get(RAG_DOC_KB_ID, None)
-        nodes: List[DocNode] = []
-        for group in self.activated_groups():
-            group_nodes = self.get_nodes(group_name=group, dataset_id=dataset_id, doc_ids=[doc_id])
-            nodes.extend(group_nodes)
-
-        for node in nodes:
-            node.global_metadata.update(metadata)
-        self.update_nodes(nodes)
-        return
-
-    @override
-    def all_groups(self) -> List[str]:
-        """ get all node groups for Document """
-        return list(self._activated_groups)
-
-    @override
-    def activate_group(self, group_names: Union[str, List[str]]):
-        if isinstance(group_names, str): group_names = [group_names]
-        active_groups = []
-        for group_name in group_names:
-            if group_name.isupper():
-                LOG.error(f"Group name {group_name} should be lowercase (`_` is allowed)")
-                continue
-            active_groups.append(group_name)
-        self._activated_groups.update(active_groups)
-
-    @override
-    def activated_groups(self):
-        return list(self._activated_groups)
-
-    @override
-    def is_group_active(self, name: str) -> bool:
-        """ check if a group has nodes (active) """
-        try:
-            url = urljoin(self._uri, "/v1/segments:scroll")
-            headers = {"Accept": "application/json", "Content-Type": "application/json"}
-            payload = {"dataset_id": self._kb_id, "group": name}
-
-            response = requests.post(url, headers=headers, json=payload)
-            response.raise_for_status()
-            data = response.json()
-            if len(data.get("segments", [])):
-                return True
-        except Exception as e:
-            LOG.error(f"is_group_active error for group {name}: {str(e)}")
-        return False
-
-    @override
-    def clear_cache(self, group_names: Optional[List[str]] = None) -> None:
-        """ clear cache for a group """
-        raise NotImplementedError("clear_cache is not supported for SenseCoreStore.")
diff --git a/lazyllm/tools/rag/store/store_base.py b/lazyllm/tools/rag/store/store_base.py
index c80c32865..410a7f537 100644
--- a/lazyllm/tools/rag/store/store_base.py
+++ b/lazyllm/tools/rag/store/store_base.py
@@ -1,22 +1,28 @@
 import re
 
 from abc import ABC, abstractmethod
-from typing import Optional, List, Union, Set
+from enum import IntFlag, auto
+from typing import Optional, List, Union, Set, Dict, Any
+from lazyllm import LazyLLMRegisterMetaABCClass
+from pydantic import BaseModel, Field
 
-from ..doc_node import DocNode
-from ..index_base import IndexBase
 from ..data_type import DataType
 from ..global_metadata import (
     GlobalMetadataDesc, RAG_DOC_ID, RAG_DOC_PATH, RAG_DOC_FILE_NAME,
     RAG_DOC_FILE_TYPE, RAG_DOC_FILE_SIZE, RAG_DOC_CREATION_DATE,
-    RAG_DOC_LAST_MODIFIED_DATE, RAG_DOC_LAST_ACCESSED_DATE
+    RAG_DOC_LAST_MODIFIED_DATE, RAG_DOC_LAST_ACCESSED_DATE, RAG_KB_ID
 )
 
-LAZY_ROOT_NAME = "lazyllm_root"
-LAZY_IMAGE_GROUP = "image"
+LAZY_ROOT_NAME = 'lazyllm_root'
+LAZY_IMAGE_GROUP = 'image'
 EMBED_DEFAULT_KEY = '__default__'
+EMBED_PREFIX = 'embedding_'
+DEFAULT_KB_ID = 'default'
+GLOBAL_META_KEY_PREFIX = 'global_meta_'
+
 BUILDIN_GLOBAL_META_DESC = {
     RAG_DOC_ID: GlobalMetadataDesc(data_type=DataType.VARCHAR, default_value=' ', max_size=512),
+    RAG_KB_ID: GlobalMetadataDesc(data_type=DataType.VARCHAR, default_value=' ', max_size=512),
     RAG_DOC_PATH: GlobalMetadataDesc(data_type=DataType.VARCHAR, default_value=' ', max_size=65535),
     RAG_DOC_FILE_NAME: GlobalMetadataDesc(data_type=DataType.VARCHAR, default_value=' ', max_size=65535),
     RAG_DOC_FILE_TYPE: GlobalMetadataDesc(data_type=DataType.VARCHAR, default_value=' ', max_size=65535),
@@ -28,64 +34,68 @@
 INSERT_BATCH_SIZE = 3000
 IMAGE_PATTERN = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
 
-class StoreBaseMixin:
-    @abstractmethod
-    def update_nodes(self, nodes: List[DocNode]) -> None:
-        """ update nodes to the store """
-        raise NotImplementedError
-
-    @abstractmethod
-    def remove_nodes(self, doc_ids: List[str], group_name: Optional[str] = None,
-                     uids: Optional[List[str]] = None) -> None:
-        """ remove nodes from the store by doc_ids or uids """
-        raise NotImplementedError
-
-    @abstractmethod
-    def register_index(self, type: str, index: IndexBase) -> None:
-        """ register index to the store (for store that support hook only)"""
-        raise NotImplementedError
-
-    @abstractmethod
-    def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
-        """ get registered index from the store """
-        raise NotImplementedError
-
-    @abstractmethod
-    def clear_cache(self, group_names: Optional[List[str]] = None) -> None:
-        raise NotImplementedError
 
-
-class StoreBase(StoreBaseMixin, ABC):
-    @abstractmethod
-    def get_nodes(self, group_name: Optional[str] = None, uids: Optional[List[str]] = None,
-                  doc_ids: Optional[Set] = None, **kwargs) -> List[DocNode]:
-        """ get nodes from the store """
-        raise NotImplementedError
-
-    @abstractmethod
-    def update_doc_meta(self, doc_id: str, metadata: dict) -> None:
-        """ update doc meta """
+class SegmentType(IntFlag):
+    TEXT = auto()
+    IMAGE = auto()
+    HYBRID = auto()
+    TABLE = auto()
+    CODE = auto()
+    QA = auto()
+
+
+class Segment(BaseModel):
+    uid: str
+    doc_id: str
+    group: str
+    content: str
+    meta: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    global_meta: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    embedding: Optional[Dict[str, List[float]]] = Field(default_factory=dict)
+    type: Optional[int] = SegmentType.TEXT.value
+    number: Optional[int] = 0
+    kb_id: Optional[str] = '__default__'
+    excluded_embed_metadata_keys: Optional[List[str]] = Field(default_factory=list)
+    excluded_llm_metadata_keys: Optional[List[str]] = Field(default_factory=list)
+    parent: Optional[str] = None    # uid of parent node
+    answer: Optional[str] = ''
+    image_keys: Optional[List[str]] = Field(default_factory=list)
+
+
+class StoreCapability(IntFlag):
+    SEGMENT = auto()
+    VECTOR = auto()
+    ALL = SEGMENT | VECTOR
+
+
+class LazyLLMStoreBase(ABC, metaclass=LazyLLMRegisterMetaABCClass):
+    capability: StoreCapability
+    need_embedding: bool = True
+    supports_index_registration: bool = False
+
+    @property
+    def dir(self):
         raise NotImplementedError
 
     @abstractmethod
-    def query(self, *args, **kwargs) -> List[DocNode]:
-        """ search nodes from the store """
+    def connect(self, *args, **kwargs):
         raise NotImplementedError
 
     @abstractmethod
-    def all_groups(self) -> List[str]:
-        """ get all node groups for Document """
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
         raise NotImplementedError
 
     @abstractmethod
-    def activate_group(self, group_names: Union[str, List[str]]) -> bool:
+    def delete(self, collection_name: str, criteria: dict, **kwargs) -> bool:
         raise NotImplementedError
 
     @abstractmethod
-    def activated_groups(self):
+    def get(self, collection_name: str, criteria: dict, **kwargs) -> List[dict]:
         raise NotImplementedError
 
     @abstractmethod
-    def is_group_active(self, name: str) -> bool:
-        """ check if a group has nodes (active) """
+    def search(self, collection_name: str, query: Optional[str] = None,
+               query_embedding: Optional[Union[dict, List[float]]] = None, topk: int = 10,
+               filters: Optional[Dict[str, Union[str, int, List, Set]]] = None,
+               embed_key: Optional[str] = None, **kwargs) -> List[dict]:
         raise NotImplementedError
diff --git a/lazyllm/tools/rag/store/vector/__init__.py b/lazyllm/tools/rag/store/vector/__init__.py
new file mode 100644
index 000000000..5f7fdfa21
--- /dev/null
+++ b/lazyllm/tools/rag/store/vector/__init__.py
@@ -0,0 +1,4 @@
+from .milvus_store import MilvusStore
+from .chroma_store import ChromadbStore
+
+__all__ = ["MilvusStore", "ChromadbStore"]
diff --git a/lazyllm/tools/rag/store/vector/chroma_store.py b/lazyllm/tools/rag/store/vector/chroma_store.py
new file mode 100644
index 000000000..45267bbfd
--- /dev/null
+++ b/lazyllm/tools/rag/store/vector/chroma_store.py
@@ -0,0 +1,244 @@
+import os
+import re
+import traceback
+
+from typing import Dict, List, Optional, Set, Union, Any
+from collections import defaultdict
+from urllib.parse import urlparse
+from pathlib import Path
+
+from ..store_base import (LazyLLMStoreBase, StoreCapability, GLOBAL_META_KEY_PREFIX)
+from ...data_type import DataType
+from ...global_metadata import GlobalMetadataDesc
+
+from lazyllm import LOG
+from lazyllm.common import override
+from lazyllm.thirdparty import chromadb
+
+INSERT_BATCH_SIZE = 1000
+
+DEFAULT_INDEX_CONFIG = {
+    'hnsw': {
+        'space': 'cosine',
+        'ef_construction': 200,
+    }
+}
+
+
+class ChromadbStore(LazyLLMStoreBase):
+    capability = StoreCapability.VECTOR
+    need_embedding = True
+    supports_index_registration = False
+
+    def __init__(self, uri: Optional[str] = None, dir: Optional[str] = None,
+                 index_kwargs: Optional[Union[Dict, List]] = None, client_kwargs: Optional[Dict] = None,
+                 **kwargs) -> None:
+        assert uri or (dir), "uri or dir must be provided"
+        self._index_kwargs = index_kwargs or DEFAULT_INDEX_CONFIG
+        self._client_kwargs = client_kwargs or {}
+        if dir:
+            self._dir = dir
+        else:
+            self._dir, self._host, self._port = self._parse_uri(uri)
+        self._primary_key = 'uid'
+
+    @property
+    def dir(self):
+        if not self._dir: return None
+        p = Path(self._dir)
+        p = p if p.suffix else (p / "chroma.sqlite3")
+        return str(p.resolve(strict=False))
+
+    def _parse_uri(self, uri: str):
+        windows_drive = re.match(r"^[a-zA-Z]:[\\/]", uri or "")
+        if ("://" not in uri) and (windows_drive or os.path.isabs(uri)):
+            return os.path.abspath(uri), None, None
+
+        p = urlparse(uri)
+
+        if p.scheme == "":
+            return os.path.abspath(uri), None, None
+
+        if p.scheme == "file":
+            path = p.path
+            if os.name == "nt" and path.startswith("/") and re.match(r"^/[a-zA-Z]:", path):
+                path = path.lstrip("/")  # file:///C:/... -> C:/...
+            return os.path.abspath(path), None, None
+
+        scheme = p.scheme
+        if scheme.startswith("chroma+"):
+            scheme = scheme.split("+", 1)[1]  # http or https
+
+        if scheme in ("http", "https"):
+            host = p.hostname or "127.0.0.1"
+            port = p.port or (443 if scheme == "https" else 80)
+            return None, host, port
+
+        raise ValueError(f"Unsupported URI scheme in '{uri}'. "
+                         "Use file:///path or plain path for local; http(s)://host:port for remote.")
+
+    @override
+    def connect(self, embed_dims: Optional[Dict[str, int]] = None,
+                embed_datatypes: Optional[Dict[str, DataType]] = None,
+                global_metadata_desc: Optional[Dict[str, GlobalMetadataDesc]] = None, **kwargs):
+        self._global_metadata_desc = global_metadata_desc or {}
+        self._embed_dims = embed_dims or {}
+        self._embed_datatypes = embed_datatypes or {}
+        for k, v in self._global_metadata_desc.items():
+            if v.data_type not in [DataType.VARCHAR, DataType.INT32, DataType.FLOAT, DataType.BOOLEAN]:
+                raise ValueError(f"[Chromadb Store] Unsupported data type {v.data_type} for global metadata {k}"
+                                 " (only string, int, float, bool are supported)")
+        for k, v in self._embed_datatypes.items():
+            if v not in [DataType.FLOAT_VECTOR, DataType.SPARSE_FLOAT_VECTOR]:
+                raise ValueError(f"[Chromadb Store] Unsupported data type {v} for embed key {k}"
+                                 " (only float vector and sparse float vector are supported)")
+        if self._dir:
+            self._client = chromadb.PersistentClient(path=self._dir, **self._client_kwargs)
+            LOG.success(f"Initialzed chromadb in path: {self._dir}")
+        else:
+            self._client = chromadb.HttpClient(host=self._host, port=self._port, **self._client_kwargs)
+            LOG.success(f"Initialzed chromadb in host: {self._host}, port: {self._port}")
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
+        try:
+            # NOTE chromadb only support single embedding for each collection
+            if not data: return
+            data_embeddings = data[0].get('embedding', {})
+            if not data_embeddings: return
+            embed_keys = list(data_embeddings.keys())
+            for embed_key in embed_keys:
+                if embed_key not in self._embed_datatypes:
+                    raise ValueError(f"Embed key {embed_key} not found in embed_datatypes")
+                collection = self._client.get_or_create_collection(
+                    name=self._gen_collection_name(collection_name, embed_key), configuration=self._index_kwargs)
+                for i in range(0, len(data), INSERT_BATCH_SIZE):
+                    collection.upsert(**self._serialize_data(data[i: i + INSERT_BATCH_SIZE], embed_key))
+            return True
+        except Exception as e:
+            LOG.error(f"[Chromadb Store - upsert] Failed to create collection {collection_name}: {e}")
+            LOG.error(traceback.format_exc())
+            return False
+
+    def _serialize_data(self, data: List[dict], embed_key: str) -> List[dict]:
+        res = {'ids': [], 'embeddings': [], 'metadatas': []}
+        for d in data:
+            res['ids'].append(d.get('uid'))
+            res['embeddings'].append(d.get('embedding', {}).get(embed_key))
+            res['metadatas'].append({self._gen_global_meta_key(k): v for k, v in d.get('global_meta', {}).items()
+                                     if k in self._global_metadata_desc})
+        return res
+
+    @override
+    def delete(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> bool:
+        try:
+            if not criteria:
+                for embed_key in self._embed_datatypes.keys():
+                    try:
+                        self._client.delete_collection(name=self._gen_collection_name(collection_name, embed_key))
+                    except Exception:
+                        continue
+                return True
+            else:
+                filters = self._construct_criteria(criteria)
+                for embed_key in self._embed_datatypes.keys():
+                    collection = self._client.get_collection(name=self._gen_collection_name(collection_name, embed_key))
+                    collection.delete(**filters)
+                return True
+        except Exception as e:
+            LOG.error(f"[Chromadb Store - delete] Failed to delete collection {collection_name}: {e}")
+            LOG.error(traceback.format_exc())
+            return False
+
+    @override
+    def get(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> List[dict]:
+        try:
+            filters = self._construct_criteria(criteria) if criteria else {}
+            all_data = []
+            for key in self._embed_datatypes:
+                try:
+                    coll = self._client.get_collection(
+                        name=self._gen_collection_name(collection_name, key)
+                    )
+                    data = coll.get(include=['metadatas', 'embeddings'], **filters)
+                    all_data.append((key, data))
+                except Exception:
+                    continue
+
+            res: Dict[str, Dict[str, Any]] = defaultdict(lambda: {
+                'uid': None, 'global_meta': {}, 'embedding': {}})
+            for embed_key, data in all_data:
+                ids = data['ids']
+                metas = data['metadatas']
+                embs = data['embeddings']
+
+                for uid, meta, emb in zip(ids, metas, embs):
+                    entry = res[uid]
+                    entry['uid'] = uid
+                    if not entry['global_meta']:
+                        entry['global_meta'] = {
+                            k[len(GLOBAL_META_KEY_PREFIX):]: v
+                            for k, v in meta.items()
+                        }
+                    entry['embedding'][embed_key] = list(emb)
+            return list(res.values())
+        except Exception as e:
+            LOG.error(f"[ChromadbStore - get] task fail: {e}")
+            LOG.error(traceback.format_exc())
+
+    @override
+    def search(self, collection_name: str, query_embedding: List[float], embed_key: str, topk: Optional[int] = 10,
+               filters: Optional[Dict[str, Union[str, int, List, Set]]] = None,
+               **kwargs) -> List[dict]:
+        try:
+            collection = self._client.get_collection(name=self._gen_collection_name(collection_name, embed_key))
+
+            filters = self._construct_filter_expr(filters) if filters else {}
+            query_results = collection.query(query_embeddings=[query_embedding], n_results=topk, **filters)
+            res = []
+            for i, r_list in enumerate(query_results['ids']):
+                for j, uid in enumerate(r_list):
+                    dis = query_results['distances'][i][j]
+                    res.append({'uid': uid, 'score': 1 - dis})
+            return res
+        except Exception as e:
+            LOG.error(f"[ChromadbStore - search] task fail: {e}")
+            LOG.error(traceback.format_exc())
+
+    def _construct_criteria(self, criteria: dict) -> dict:
+        res = {}
+        if self._primary_key in criteria:
+            res['ids'] = criteria[self._primary_key]
+        else:
+            res['where'] = {}
+            for key, vaule in criteria.items():
+                if key not in self._global_metadata_desc:
+                    continue
+                field_key = self._gen_global_meta_key(key)
+                if isinstance(vaule, list):
+                    res['where'][field_key] = {'$in': vaule}
+                elif isinstance(vaule, str):
+                    res['where'][field_key] = {'$eq': vaule}
+                else:
+                    raise ValueError(f'invalid criteria type: {type(vaule)}')
+        return res
+
+    def _construct_filter_expr(self, filters: Dict[str, Union[str, int, List, Set]]) -> str:
+        ret = {}
+        for name, candidates in filters.items():
+            desc = self._global_metadata_desc.get(name)
+            if not desc:
+                raise ValueError(f'cannot find desc of field [{name}]')
+            key = self._gen_global_meta_key(name)
+            if isinstance(candidates, str):
+                candidates = [candidates]
+            elif (not isinstance(candidates, List)) and (not isinstance(candidates, Set)):
+                candidates = list(candidates)
+            ret[key] = {'$in': candidates}
+        return {'where': ret}
+
+    def _gen_global_meta_key(self, k: str) -> str:
+        return GLOBAL_META_KEY_PREFIX + k
+
+    def _gen_collection_name(self, collection_name: str, embed_key: str) -> str:
+        return collection_name + '_' + embed_key + "_embed"
diff --git a/lazyllm/tools/rag/store/vector/milvus_store.py b/lazyllm/tools/rag/store/vector/milvus_store.py
new file mode 100644
index 000000000..c45554880
--- /dev/null
+++ b/lazyllm/tools/rag/store/vector/milvus_store.py
@@ -0,0 +1,310 @@
+import copy
+import traceback
+
+from packaging import version
+from urllib import parse
+from pathlib import Path
+from typing import Dict, List, Union, Optional, Set
+
+from lazyllm import LOG
+from lazyllm.thirdparty import pymilvus
+from lazyllm.common import override
+
+from ..store_base import LazyLLMStoreBase, StoreCapability, GLOBAL_META_KEY_PREFIX, EMBED_PREFIX
+from ...data_type import DataType
+from ...global_metadata import GlobalMetadataDesc
+
+MILVUS_UPSERT_BATCH_SIZE = 500
+MILVUS_PAGINATION_OFFSET = 1000
+
+
+class MilvusStore(LazyLLMStoreBase):
+    capability = StoreCapability.VECTOR
+    need_embedding = True
+    supports_index_registration = False
+
+    def __init__(self, uri: str = '', db_name: str = 'lazyllm', index_kwargs: Optional[Union[Dict, List]] = None,
+                 client_kwargs: Optional[Dict] = None):
+        # one database, different collection for each group (for standalone, add prefix to collection name)
+        # when there's data need upsert, collection creation happen.
+        self._uri = uri
+        self._db_name = db_name
+        self._index_kwargs = index_kwargs
+        self._client_kwargs = client_kwargs or {}
+        self._primary_key = 'uid'
+        self._client = None
+        if self._uri and parse.urlparse(self._uri).scheme.lower() in ['unix', 'http', 'https', 'tcp', 'grpc']:
+            self._is_remote = True
+        else:
+            self._is_remote = False
+
+    @property
+    def dir(self):
+        if self._is_remote: return None
+        p = Path(self._uri)
+        p = p if p.suffix else (p / "milvus.db")
+        return str(p.resolve(strict=False))
+
+    @override
+    def connect(self, embed_dims: Optional[Dict[str, int]] = None,
+                embed_datatypes: Optional[Dict[str, DataType]] = None,
+                global_metadata_desc: Optional[Dict[str, GlobalMetadataDesc]] = None, **kwargs):
+        self._embed_dims = embed_dims or {}
+        self._embed_datatypes = embed_datatypes or {}
+        self._global_metadata_desc = global_metadata_desc or {}
+        self._set_constants()
+        self._connect()
+        LOG.info("[Milvus Vector Store] init success!")
+        self._disconnect()
+
+    def _connect(self):
+        try:
+            self._client = pymilvus.MilvusClient(uri=self._uri, **self._client_kwargs)
+            if self._is_remote and self._db_name:
+                existing_dbs = self._client.list_databases()
+                if self._db_name not in existing_dbs:
+                    self._client.create_database(self._db_name)
+                self._client.using_database(self._db_name)
+        except Exception as e:
+            LOG.error(f'[Milvus Store - connect] error: {e}')
+
+    def _disconnect(self):
+        try:
+            if self._client:
+                self._client.close()
+                self._client = None
+        except Exception as e:
+            LOG.error(f'[Milvus Store - disconnect] error: {e}')
+
+    @override
+    def upsert(self, collection_name: str, data: List[dict]) -> bool:
+        try:
+            if not data: return
+            data_embeddings = data[0].get('embedding', {})
+            if not data_embeddings: return
+            self._connect()
+            if not self._client.has_collection(collection_name):
+                embed_kwargs = {}
+                for embed_key in data_embeddings.keys():
+                    assert self._embed_datatypes.get(embed_key), \
+                        f'cannot find embedding params for embed [{embed_key}]'
+                    if embed_key not in embed_kwargs:
+                        embed_kwargs[embed_key] = {'dtype': self._type2milvus[self._embed_datatypes[embed_key]]}
+                    if self._embed_dims.get(embed_key): embed_kwargs[embed_key]['dim'] = self._embed_dims[embed_key]
+                self._create_collection(collection_name, embed_kwargs)
+
+            for i in range(0, len(data), MILVUS_UPSERT_BATCH_SIZE):
+                self._client.upsert(collection_name=collection_name,
+                                    data=[self._serialize_data(d) for d in data[i:i + MILVUS_UPSERT_BATCH_SIZE]])
+            self._disconnect()
+            return True
+        except Exception as e:
+            LOG.error(f'[Milvus Store - upsert] error: {e}')
+            LOG.error(traceback.format_exc())
+            self._disconnect()
+            return False
+
+    @override
+    def delete(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> bool:
+        try:
+            self._connect()
+            if not self._client.has_collection(collection_name):
+                return True
+            self._client.load_collection(collection_name)
+            if not criteria:
+                self._client.drop_collection(collection_name=collection_name)
+            else:
+                self._client.delete(collection_name=collection_name, **self._construct_criteria(criteria))
+            self._disconnect()
+            return True
+        except Exception as e:
+            LOG.error(f'[Milvus Store - delete] error: {e}')
+            self._disconnect()
+            return False
+
+    @override
+    def get(self, collection_name: str, criteria: Optional[dict] = None, **kwargs) -> List[dict]:
+        try:
+            self._connect()
+            if not self._client.has_collection(collection_name):
+                return []
+            self._client.load_collection(collection_name)
+            col_desc = self._client.describe_collection(collection_name=collection_name)
+            field_names = [field.get('name') for field in col_desc.get('fields', [])
+                           if field.get('name').startswith(EMBED_PREFIX)]
+            if criteria and self._primary_key in criteria:
+                res = self._client.get(collection_name=collection_name, ids=criteria[self._primary_key])
+            else:
+                filters = self._construct_criteria(criteria) if criteria else {}
+                if version.parse(pymilvus.__version__) >= version.parse('2.4.11'):
+                    iterator = self._client.query_iterator(collection_name=collection_name,
+                                                           batch_size=MILVUS_PAGINATION_OFFSET,
+                                                           output_fields=field_names, **filters)
+                    res = []
+                    while True:
+                        result = iterator.next()
+                        if not result:
+                            iterator.close()
+                            break
+                        res += result
+                else:
+                    res = self._client.query(collection_name=collection_name, output_fields=field_names, **filters)
+            self._disconnect()
+            return [self._deserialize_data(r) for r in res]
+        except Exception as e:
+            LOG.error(f'[Milvus Store - get] error: {e}')
+            self._disconnect()
+            return []
+
+    def _set_constants(self):
+        self._type2milvus = {
+            DataType.VARCHAR: pymilvus.DataType.VARCHAR,
+            DataType.ARRAY: pymilvus.DataType.ARRAY,
+            DataType.FLOAT_VECTOR: pymilvus.DataType.FLOAT_VECTOR,
+            DataType.INT32: pymilvus.DataType.INT32,
+            DataType.INT64: pymilvus.DataType.INT64,
+            DataType.SPARSE_FLOAT_VECTOR: pymilvus.DataType.SPARSE_FLOAT_VECTOR,
+            DataType.STRING: pymilvus.DataType.STRING,
+        }
+        self._builtin_keys = {
+            'uid': {'dtype': pymilvus.DataType.VARCHAR, 'max_length': 256, 'is_primary': True}
+        }
+        self._constant_fields = self._get_constant_fields()
+
+    def _get_constant_fields(self) -> list:
+        field_list = []
+        for k, kws in self._builtin_keys.items():
+            field_list.append(pymilvus.FieldSchema(name=k, **kws))
+        for k, desc in self._global_metadata_desc.items():
+            field_name = self._gen_global_meta_key(k)
+            if desc.data_type == DataType.ARRAY:
+                if desc.element_type is None:
+                    raise ValueError(f'Milvus field [{field_name}]: '
+                                     '`element_type` is required when `data_type` is ARRAY.')
+                field_args = {'element_type': self._type2milvus[desc.element_type], 'max_capacity': desc.max_size}
+                if desc.element_type == DataType.VARCHAR: field_args['max_length'] = 65535
+            elif desc.data_type == DataType.VARCHAR:
+                field_args = {'max_length': desc.max_size}
+            else:
+                field_args = {}
+            field_list.append(pymilvus.FieldSchema(name=field_name, dtype=self._type2milvus[desc.data_type],
+                                                   default_value=desc.default_value, **field_args))
+        return field_list
+
+    def _create_collection(self, collection_name: str, embed_kwargs: Dict[str, Dict]):  # noqa: C901
+        field_list = copy.deepcopy(self._constant_fields)
+        index_params = self._client.prepare_index_params()
+        for k, kws in embed_kwargs.items():
+            embed_field_name = self._gen_embed_key(k)
+            field_list.append(pymilvus.FieldSchema(name=embed_field_name, **kws))
+            index_params.add_index(field_name=embed_field_name, **kws)
+            if isinstance(self._index_kwargs, list):
+                for item in self._index_kwargs:
+                    embed_key = item.get('embed_key', None)
+                    if not embed_key:
+                        raise ValueError(f'cannot find `embed_key` in `index_kwargs` of `{item}`')
+                    if embed_key == k:
+                        index_kwarg = item.copy()
+                        index_kwarg.pop('embed_key', None)
+                        index_params.add_index(field_name=embed_field_name, **index_kwarg)
+                        break
+            elif isinstance(self._index_kwargs, dict):
+                index_params.add_index(field_name=embed_field_name, **self._index_kwargs)
+        schema = pymilvus.CollectionSchema(fields=field_list, auto_id=False, enable_dynamic_field=False)
+        self._client.create_collection(collection_name=collection_name, schema=schema, index_params=index_params)
+
+    def _serialize_data(self, d: dict) -> dict:
+        # only keep primary_key, embedding and global_meta
+        res = {
+            self._primary_key: d.get(self._primary_key, '')
+        }
+        for embed_key, value in d.get('embedding', {}).items():
+            res[self._gen_embed_key(embed_key)] = value
+        global_meta = d.get('global_meta', {})
+        for name, desc in self._global_metadata_desc.items():
+            value = global_meta.get(name, desc.default_value)
+            if value is not None:
+                res[self._gen_global_meta_key(name)] = value
+        return res
+
+    def _deserialize_data(self, d: dict) -> dict:
+        res = {
+            self._primary_key: d.get(self._primary_key, ''),
+            'embedding': {}
+        }
+        for k, v in d.items():
+            if k.startswith(EMBED_PREFIX):
+                res['embedding'][k[len(EMBED_PREFIX):]] = v
+        return res
+
+    def _gen_embed_key(self, k: str) -> str:
+        return EMBED_PREFIX + k
+
+    def _gen_global_meta_key(self, k: str) -> str:
+        return GLOBAL_META_KEY_PREFIX + k
+
+    def _construct_criteria(self, criteria: dict) -> dict:
+        res = {}
+        criteria = dict(criteria)
+        if self._primary_key in criteria:
+            res['ids'] = criteria[self._primary_key]
+        else:
+            filter_str = ''
+            for key, vaule in criteria.items():
+                if key not in self._global_metadata_desc:
+                    continue
+                field_name = self._gen_global_meta_key(key)
+                if len(filter_str) > 0:
+                    filter_str += ' and '
+                if isinstance(vaule, list):
+                    filter_str += f'{field_name} in {vaule}'
+                elif isinstance(vaule, str):
+                    filter_str += f'{field_name} == "{vaule}"'
+                else:
+                    raise ValueError(f'invalid criteria type: {type(vaule)}')
+            res['filter'] = filter_str
+        return res
+
+    @override
+    def search(self, collection_name: str, query_embedding: Union[dict, List[float]], topk: int,
+               filters: Optional[Dict[str, Union[List, set]]] = None, embed_key: Optional[str] = None,
+               filter_str: Optional[str] = '', **kwargs) -> List[dict]:
+        self._connect()
+        if not embed_key or embed_key not in self._embed_datatypes:
+            raise ValueError(f'[Milvus Store - search] Not supported or None `embed_key`: {embed_key}')
+        res = []
+        filter_expr = self._construct_filter_expr(filters) if filters else filter_str
+        results = self._client.search(collection_name=collection_name, data=[query_embedding], limit=topk,
+                                      anns_field=self._gen_embed_key(embed_key),
+                                      filter=filter_expr)
+        if len(results) != 1:
+            raise ValueError(f'number of results [{len(results)}] != expected [1]')
+        for result in results[0]:
+            score = result.get('distance', 0)
+            uid = result.get('id', result.get(self._primary_key, ''))
+            if not uid:
+                continue
+            res.append({'uid': uid, 'score': score})
+        self._disconnect()
+        return res
+
+    def _construct_filter_expr(self, filters: Dict[str, Union[str, int, List, Set]]) -> str:
+        ret_str = ''
+        if not filters:
+            return ret_str
+        for name, candidates in filters.items():
+            desc = self._global_metadata_desc.get(name)
+            if not desc:
+                raise ValueError(f'cannot find desc of field [{name}]')
+            key = self._gen_global_meta_key(name)
+            if isinstance(candidates, str):
+                candidates = [candidates]
+            elif (not isinstance(candidates, list)) and (not isinstance(candidates, set)):
+                candidates = list(candidates)
+            if desc.data_type == DataType.ARRAY:
+                ret_str += f'array_contains_any({key}, {candidates}) and '
+            else:
+                ret_str += f'{key} in {candidates} and '
+        if len(ret_str) > 0:
+            return ret_str[:-5]  # truncate the last ' and '
+        return ret_str
diff --git a/lazyllm/tools/rag/tools/__init__.py b/lazyllm/tools/rag/tools/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/lazyllm/tools/rag/utils.py b/lazyllm/tools/rag/utils.py
index 02483c4da..6d4a34a02 100644
--- a/lazyllm/tools/rag/utils.py
+++ b/lazyllm/tools/rag/utils.py
@@ -12,6 +12,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import (Any, Callable, Dict, Generator, List, Optional, Set, Tuple,
                     Union)
+from urllib.parse import urlsplit, urlunsplit
 
 import pydantic
 import sqlalchemy
@@ -734,13 +735,10 @@ class Config:
         }
 
 
-def run_in_thread_pool(
-    func: Callable,
-    params: List[Dict] = [],
-) -> Generator:
+def run_in_thread_pool(func: Callable, params: Optional[List[Dict]] = None) -> Generator:
     tasks = []
     with ThreadPoolExecutor() as pool:
-        for kwargs in params:
+        for kwargs in params or []:
             thread = pool.submit(func, **kwargs)
             tasks.append(thread)
 
@@ -952,3 +950,21 @@ def is_sparse(embedding: Union[Dict[int, float], List[Tuple[int, float]], List[f
         return False
 
     raise TypeError(f'unsupported embedding type `{type(embedding[0])}`')
+
+
+def ensure_call_endpoint(raw: str) -> str:
+    if not raw: return raw
+    if '://' not in raw:
+        raw = f'http://{raw}'
+
+    parts = urlsplit(raw)
+    path = parts.path or ''
+
+    if path.rstrip('/').endswith('_call'):
+        new_path = path.rstrip('/')
+    elif path == '' or path.endswith('/'):
+        new_path = (path or '') + '_call' if path.endswith('/') else '/_call'
+    else:
+        new_path = path + '/_call'
+
+    return urlunsplit((parts.scheme, parts.netloc, new_path, parts.query, parts.fragment))
diff --git a/lazyllm/tools/rag/web.py b/lazyllm/tools/rag/web.py
index e638421dd..b37c836b0 100644
--- a/lazyllm/tools/rag/web.py
+++ b/lazyllm/tools/rag/web.py
@@ -181,19 +181,12 @@ class Mode:
         Refresh = 1
         Appendix = 2
 
-    def __init__(
-        self,
-        doc_server: ServerModule,
-        title="文档管理演示终端",
-        port=range(20800, 20999),
-        history=[],
-        text_mode=None,
-        trace_mode=None,
-    ) -> None:
+    def __init__(self, doc_server: ServerModule, title="文档管理演示终端", port=None,
+                 history=None, text_mode=None, trace_mode=None) -> None:
         super().__init__()
         self.title = title
-        self.port = port
-        self.history = history
+        self.port = port or range(20800, 20999)
+        self.history = history or []
         self.trace_mode = trace_mode if trace_mode else DocWebModule.Mode.Refresh
         self.text_mode = text_mode if text_mode else DocWebModule.Mode.Dynamic
         self.doc_server = doc_server
diff --git a/lazyllm/tools/sql/sql_manager.py b/lazyllm/tools/sql/sql_manager.py
index 851e24856..851b20e80 100644
--- a/lazyllm/tools/sql/sql_manager.py
+++ b/lazyllm/tools/sql/sql_manager.py
@@ -153,7 +153,7 @@ def desc(self) -> str:
             self.set_desc(tables_desc_dict={})
         return self._desc
 
-    def set_desc(self, tables_desc_dict: dict = {}):
+    def set_desc(self, tables_desc_dict: dict = {}):  # noqa B006
         self._desc = ""
         if not isinstance(tables_desc_dict, dict):
             raise ValueError(f"desc type {type(tables_desc_dict)} not supported")
diff --git a/lazyllm/tools/sql_call/sql_call.py b/lazyllm/tools/sql_call/sql_call.py
index fa5619acc..9424caefe 100644
--- a/lazyllm/tools/sql_call/sql_call.py
+++ b/lazyllm/tools/sql_call/sql_call.py
@@ -2,7 +2,7 @@
 from lazyllm.components import ChatPrompter
 from lazyllm.tools.utils import chat_history_to_str
 from lazyllm import pipeline, globals, bind, _0, switch
-from typing import List, Any, Dict, Union, Callable
+from typing import List, Any, Dict, Optional, Union, Callable
 import datetime
 import re
 from lazyllm.tools.sql import DBManager
@@ -103,7 +103,7 @@ def __init__(
     def sql_query_promt_hook(
         self,
         input: Union[str, List, Dict[str, str], None] = None,
-        history: List[Union[List[str], Dict[str, Any]]] = [],
+        history: Optional[List[Union[List[str], Dict[str, Any]]]] = None,
         tools: Union[List[Dict[str, Any]], None] = None,
         label: Union[str, None] = None,
     ):
@@ -115,7 +115,7 @@ def sql_query_promt_hook(
             raise ValueError(f"Unexpected type for input: {type(input)}")
         return (
             dict(current_date=current_date, db_type=self._sql_tool.db_type, desc=schema_desc, user_query=input),
-            history,
+            history or [],
             tools,
             label,
         )
@@ -123,7 +123,7 @@ def sql_query_promt_hook(
     def sql_explain_prompt_hook(
         self,
         input: Union[str, List, Dict[str, str], None] = None,
-        history: List[Union[List[str], Dict[str, Any]]] = [],
+        history: List[Union[List[str], Dict[str, Any]]] = [],  # noqa B006
         tools: Union[List[Dict[str, Any]], None] = None,
         label: Union[str, None] = None,
     ):
diff --git a/lazyllm/tools/train_service/serve.py b/lazyllm/tools/train_service/serve.py
index 84fb08382..39f3c51ea 100644
--- a/lazyllm/tools/train_service/serve.py
+++ b/lazyllm/tools/train_service/serve.py
@@ -122,7 +122,7 @@ def _get_log_path(self, model):
         return log_files_paths[-1]
 
     @app.post('/v1/fine_tuning/jobs')
-    async def create_job(self, job: JobDescription, token: str = Header(None)):
+    async def create_job(self, job: JobDescription, token: str = Header(None)):  # noqa B008
         # await self.authorize_current_user(token)
         if not self._in_user_job_info(token):
             self._update_user_job_info(token)
@@ -193,7 +193,7 @@ async def create_job(self, job: JobDescription, token: str = Header(None)):
         return {'job_id': job_id, 'status': status}
 
     @app.post('/v1/fine_tuning/jobs/{job_id}/cancel')
-    async def cancel_job(self, job_id: str, token: str = Header(None)):
+    async def cancel_job(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_active_jobs(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
@@ -219,7 +219,7 @@ async def cancel_job(self, job_id: str, token: str = Header(None)):
         return {'status': status}
 
     @app.get('/v1/fine_tuning/jobs')
-    async def list_jobs(self, token: str = Header(None)):
+    async def list_jobs(self, token: str = Header(None)):  # noqa B008
         # await self.authorize_current_user(token)
         if not self._in_user_job_info(token):
             self._update_user_job_info(token)
@@ -252,7 +252,7 @@ async def list_jobs(self, token: str = Header(None)):
         return server_running_dict
 
     @app.get('/v1/fine_tuning/jobs/{job_id}')
-    async def get_job_info(self, job_id: str, token: str = Header(None)):
+    async def get_job_info(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_user_job_info(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
@@ -262,7 +262,7 @@ async def get_job_info(self, job_id: str, token: str = Header(None)):
         return self._read_user_job_info(token, job_id)
 
     @app.get('/v1/fine_tuning/jobs/{job_id}/events')
-    async def get_job_log(self, job_id: str, token: str = Header(None)):
+    async def get_job_log(self, job_id: str, token: str = Header(None)):  # noqa B008
         await self.authorize_current_user(token)
         if not self._in_user_job_info(token, job_id):
             raise HTTPException(status_code=404, detail='Job not found')
diff --git a/lazyllm/tools/utils.py b/lazyllm/tools/utils.py
index e7ade848c..5a640269d 100644
--- a/lazyllm/tools/utils.py
+++ b/lazyllm/tools/utils.py
@@ -1,7 +1,8 @@
-from typing import Dict, Union, Any, List
+from typing import Dict, Union, Any, List, Optional
 
 
-def chat_history_to_str(history: List[Union[List[str], Dict[str, Any]]] = [], user_query: Union[str, None] = None):
+def chat_history_to_str(history: Optional[List[Union[List[str], Dict[str, Any]]]] = None,
+                        user_query: Union[str, None] = None):
     MAX_HISTORY_LEN = 20
     history_info = ""
     MAP_ROLE = {"user": "human", "assitant": "assitant"}
diff --git a/lazyllm/tools/webpages/webmodule.py b/lazyllm/tools/webpages/webmodule.py
index b6cd89533..a76738689 100644
--- a/lazyllm/tools/webpages/webmodule.py
+++ b/lazyllm/tools/webpages/webmodule.py
@@ -34,8 +34,8 @@ class Mode:
         Refresh = 1
         Appendix = 2
 
-    def __init__(self, m: Any, *, components: Dict[Any, Any] = dict(), title: str = '对话演示终端',
-                 port: Optional[Union[int, range, tuple, list]] = None, history: List[Any] = [],
+    def __init__(self, m: Any, *, components: Dict[Any, Any] = dict(), title: str = '对话演示终端',  # noqa B008
+                 port: Optional[Union[int, range, tuple, list]] = None, history: List[Any] = [],  # noqa B006
                  text_mode: Optional[Mode] = None, trace_mode: Optional[Mode] = None, audio: bool = False,
                  stream: bool = False, files_target: Optional[Union[Any, List[Any]]] = None,
                  static_paths: Optional[Union[str, Path, List[Union[str, Path]]]] = None,
diff --git a/pyproject.toml b/pyproject.toml
index 4c9f1e1de..a1070fb3a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lazyllm"
-version = "0.5.0"
+version = "0.6.0"
 description = "A Low-code Development Tool For Building Multi-agent LLMs Applications."
 authors = ["wangzhihong <wangzhihong@sensetime.com>"]
 license = "Apache-2.0 license"
@@ -49,7 +49,7 @@ beautifulsoup4 = "^4.13.4"
 appdirs = { version = "*", optional = true }
 loralib = { version = "*", optional = true }
 flake8 = { version = ">=7.0.0", optional = true }
-chromadb = { version = "^0.5.5", optional = true }
+chromadb = {version = ">=1.0.6", optional = true}
 sentence-transformers = { version = "^3.0.1", optional = true }
 modelscope = { version = "^1.17.1", optional = true }
 pytest = { version = "^8.3.3", optional = true }
@@ -105,6 +105,7 @@ imageio-ffmpeg = {version = "==0.6.0", optional = true}
 volcengine-python-sdk = {version = ">=4.0.6", extras = ["ark"], optional = true}
 dashscope = {version = ">=1.23.6", optional = true}
 zhipuai = {version = ">=2.1.5.20250708", optional = true}
+opensearch-py = {version = "^3.0.0", optional = true}
 
 [tool.poetry.extras]
 standard = [
@@ -327,7 +328,8 @@ rag-advanced = [
     "torch",
     "transformers",
     "boto3",
-    "botocore"
+    "botocore",
+    "opensearch-py"
 ]
 agent-advanced = [
     "ctranslate2",
diff --git a/requirements.full.txt b/requirements.full.txt
index e96a79d16..8cc15f1e3 100644
--- a/requirements.full.txt
+++ b/requirements.full.txt
@@ -36,7 +36,7 @@ beautifulsoup4
 appdirs
 loralib
 flake8>=7.0.0
-chromadb
+chromadb>=1.0.6
 sentence-transformers
 modelscope
 pytest
@@ -91,4 +91,5 @@ imageio==2.37.0
 imageio-ffmpeg==0.6.0
 volcengine-python-sdk[ark]>=4.0.6
 dashscope>=1.23.6
-zhipuai>=2.1.5.20250708
\ No newline at end of file
+zhipuai>=2.1.5.20250708
+opensearch-py>=3.0.0
diff --git a/tests/advanced_tests/full_test/test_deploy.py b/tests/advanced_tests/full_test/test_deploy.py
index 439a4758b..760597526 100644
--- a/tests/advanced_tests/full_test/test_deploy.py
+++ b/tests/advanced_tests/full_test/test_deploy.py
@@ -28,14 +28,12 @@ def wrapper(*args, **kwargs):
         original_values = {var: os.environ.get(var, None) for var in env_vars_to_reset}
         for var in env_vars_to_reset:
             os.environ.pop(var, None)
-            lazyllm.config.refresh(var)
         result = func(*args, **kwargs)
         for var, value in original_values.items():
             if value is None:
                 os.environ.pop(var, None)
             else:
                 os.environ[var] = value
-                lazyllm.config.refresh(var)
         return result
     return wrapper
 
diff --git a/tests/advanced_tests/full_test/test_example.py b/tests/advanced_tests/full_test/test_example.py
index dc9c878a4..502200f94 100644
--- a/tests/advanced_tests/full_test/test_example.py
+++ b/tests/advanced_tests/full_test/test_example.py
@@ -181,7 +181,7 @@ def test_upload_and_filter(self):
         assert 'Shanghai' in res and 'Beijing' not in res
 
         store = self.documents._impl.store
-        nodes = store.get_nodes('block')
+        nodes = store.get_nodes(group='block')
         for node in nodes:
             if node.global_metadata[RAG_DOC_PATH].endswith('test1.txt'):
                 test1_docid = node.global_metadata[RAG_DOC_ID]
diff --git a/tests/advanced_tests/full_test/test_mineru_server.py b/tests/advanced_tests/full_test/test_mineru_server.py
new file mode 100644
index 000000000..e897868f3
--- /dev/null
+++ b/tests/advanced_tests/full_test/test_mineru_server.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+import pytest
+import requests
+import unittest
+import lazyllm
+from lazyllm import LOG
+from lazyllm.components.deploy.mineru.mineru_server_module import MineruServer
+from lazyllm.tools.rag.readers.mineru_pdf_reader import MineruPDFReader
+
+os.environ['MINERU_MODEL_SOURCE'] = 'modelscope'
+
+@pytest.fixture(autouse=True)
+def setup_tmpdir(request, tmpdir):
+    request.cls.tmpdir = tmpdir
+
+
+@pytest.fixture(scope='class', autouse=True)
+def setup_tmpdir_class(request, tmpdir_factory):
+    request.cls.tmpdir_class = tmpdir_factory.mktemp('mineru_test')
+
+
+@pytest.mark.skip(reason='Skip for env issues')
+@pytest.mark.skip_on_win
+@pytest.mark.skip_on_mac
+@pytest.mark.usefixtures('setup_tmpdir_class')
+class TestMineruServer(unittest.TestCase):
+    TEST_FILES_LOCAL = {
+        'pdf1': os.path.join(lazyllm.config['data_path'], 'ci_data/test_mineru/test_mineru1.pdf'),
+        'pdf2': os.path.join(lazyllm.config['data_path'], 'ci_data/test_mineru/test_mineru2.pdf'),
+        'pdf3': os.path.join(lazyllm.config['data_path'], 'ci_data/test_mineru/test_mineru3.pdf'),
+        'docx': os.path.join(lazyllm.config['data_path'], 'ci_data/test_mineru/test_mineru.docx'),
+        'pptx': os.path.join(lazyllm.config['data_path'], 'ci_data/test_mineru/test_mineru.pptx'),
+    }
+
+    @classmethod
+    def setUpClass(cls):
+        cls.cache_dir = str(cls.tmpdir_class.mkdir('cache'))
+        cls.image_save_dir = str(cls.tmpdir_class.mkdir('images'))
+        cls.server = MineruServer(
+            cache_dir=cls.cache_dir,
+            image_save_dir=cls.image_save_dir,
+            pythonpath=None, port=31769
+        )
+        cls.server.start()
+        cls.url = cls.server._url[:-9] + '/api/v1/pdf_parse'
+        cls.test_results = {}
+
+    @classmethod
+    def tearDownClass(cls):
+        if hasattr(cls, 'server'):
+            cls.server.stop()
+
+    def setUp(self):
+        self.test_files = self.__class__.TEST_FILES
+        self.validate_test_files()
+
+    def validate_test_files(self):
+        '''Validate that test files exist'''
+        missing_files = []
+        for file_type, file_path in self.test_files.items():
+            if not os.path.exists(file_path):
+                missing_files.append(f'{file_type}: {file_path}')
+
+        if missing_files:
+            error_msg = '❌ The following test files do not exist:\n' + '\n'.join(missing_files)
+            error_msg += '\n\nPlease modify the test_files dictionary in the setUp method to provide correct file paths.'
+            raise FileNotFoundError(error_msg)
+
+    def post_pdf_parse(
+        self,
+        files,
+        backend='pipeline',
+        return_md=True,
+        return_content_list=True,
+        use_cache=False,
+    ):
+        '''Fix: Use correct Form data format to send request'''
+        data = {
+            'files': files,
+            'backend': backend,
+            'return_md': return_md,
+            'return_content_list': return_content_list,
+            'use_cache': use_cache,
+        }
+        try:
+            resp = requests.post(self.__class__.url, data=data)
+            try:
+                return resp.status_code, resp.json()
+            except Exception:
+                return resp.status_code, resp.text
+        except Exception as e:
+            return 500, str(e)
+
+    def check_result(self, result):
+        assert isinstance(result, dict)
+        assert 'result' in result, f'result: {result}'
+        for res in result['result']:
+            assert 'md_content' in res and 'content_list' in res
+
+    @pytest.mark.order(1)
+    def test_pdf_parsing(self):
+        '''Test 1: Initial PDF parsing (create cache foundation)'''
+        initial_files = [str(self.test_files['pdf1']), str(self.test_files['pdf2'])]
+
+        status, result = self.post_pdf_parse(
+            files=initial_files,
+            backend='pipeline',
+            return_md=True,
+            return_content_list=True,
+            use_cache=False,
+        )
+        assert status == 200, f'status: {status}, error: {result}'
+        self.check_result(result)
+
+        content_list = result['result'][0]['content_list']
+        self.__class__.test_results[str(self.test_files['pdf1'])] = result['result'][0]
+        types = [node.get('type', '') for node in content_list]
+        assert 'text' in types
+        assert 'image' in types
+        assert 'table' in types
+        assert 'equation' in types
+        image_paths = [os.path.join(self.image_save_dir, node.get('img_path', ''))
+                       for node in content_list if node['type'] == 'image']
+        for image_path in image_paths:
+            assert os.path.exists(image_path)
+
+        for i, file_result in enumerate(result['result']):
+            file_path = initial_files[i]
+            self.__class__.test_results[file_path] = file_result
+
+    @pytest.mark.order(2)
+    def test_pdf_parsing_with_upload_files(self):
+        '''Test 2: Initial upload file object parsing'''
+        file_path = str(self.test_files['pdf2'])
+        with open(file_path, 'rb') as f:
+            upload_files = [
+                (
+                    'upload_files',
+                    (os.path.basename(file_path), f.read(), 'application/pdf'),
+                )
+            ]
+        data = {
+            'backend': 'pipeline',
+            'return_md': True,
+            'return_content_list': True,
+            'use_cache': False,
+        }
+        resp = requests.post(self.__class__.url, data=data, files=upload_files)
+        status = resp.status_code
+        assert status == 200, f'status: {status}, error: {resp.text}'
+        result = resp.json()
+        self.check_result(result)
+        assert len(result['result'][0]['content_list']) == 2
+
+    @pytest.mark.order(3)
+    def test_pdf_parsing_with_cache(self):
+        '''Test 3: Mixed PDF cache and new files'''
+        mixed_files = [str(self.test_files['pdf1']), str(self.test_files['pdf3'])]
+        status, result = self.post_pdf_parse(
+            files=mixed_files,
+            backend='pipeline',
+            return_md=True,
+            return_content_list=True,
+            use_cache=True,  # Use cache
+        )
+        assert status == 200, f'status: {status}, error: {result}'
+        self.check_result(result)
+        assert len(result['result']) == 2
+        content_list = result['result'][0]['content_list']
+        assert content_list == self.__class__.test_results[mixed_files[0]]['content_list']
+
+    @pytest.mark.order(4)
+    def test_office_document_parsing(self):
+        '''Test 4: Office document parsing functionality'''
+        office_files = [str(self.test_files['docx']), str(self.test_files['pptx'])]
+        for file_path in office_files:
+            status, result = self.post_pdf_parse(
+                files=[file_path],
+                backend='pipeline',
+                return_md=True,
+                return_content_list=True,
+                use_cache=False,
+            )
+            assert status in [200, 400], f'status: {status}, error: {result}'
+            if status == 200:
+                self.check_result(result)
+            else:
+                LOG.warning('Skipping office document parsing test')
+
+    @pytest.mark.order(5)
+    def test_different_backends(self):
+        '''Test 6: Different backend testing'''
+        backends = ['vlm-sglang-engine', 'vlm-transformers']
+        test_file = str(self.test_files['pdf1'])
+        for backend in backends:
+            status, result = self.post_pdf_parse(
+                files=[test_file],
+                backend=backend,
+                return_md=True,
+                return_content_list=True,
+                use_cache=False,
+            )
+
+            if status != 200:
+                LOG.warning(f'Skipping backend: {backend}, status: {status}, error: {result}')
+                continue
+            self.check_result(result)
+
+    @pytest.mark.order(6)
+    def test_pdf_reader(self):
+        '''Test 6: Test pdf reader (file path)'''
+        pdf_reader = MineruPDFReader(self.__class__.server._url[:-9])
+        pdf_path = str(self.test_files['pdf1'])
+        nodes = pdf_reader(pdf_path)
+        assert isinstance(nodes, list)
+        assert len(nodes) == len(self.__class__.test_results[pdf_path]['content_list'])
+        image_paths = [os.path.join(self.image_save_dir, node.metadata.get('image_path', ''))
+                       for node in nodes if node.metadata.get('type', '') == 'image']
+        for image_path in image_paths:
+            print(image_path)
+            assert os.path.exists(image_path)
+
+    @pytest.mark.order(7)
+    def test_pdf_reader_with_upload_files(self):
+        '''Test 7: Test pdf reader (upload files)'''
+        pdf_reader = MineruPDFReader(self.__class__.server._url[:-9], upload_mode=True)
+        pdf_path = str(self.test_files['pdf1'])
+        nodes = pdf_reader(pdf_path)
+        assert isinstance(nodes, list)
+        assert len(nodes) == len(self.__class__.test_results[pdf_path]['content_list'])
+
+    @pytest.mark.order(8)
+    def test_pdf_reader_with_post_func(self):
+        '''Test 8: Test pdf reader's post-processing function post_func functionality'''
+        def test_post_func(nodes):
+            for node in nodes:
+                node._content += '[after_process]'
+            return nodes
+
+        pdf_reader = MineruPDFReader(
+            self.__class__.server._url[:-9],
+            post_func=test_post_func
+        )
+
+        pdf_path = str(self.test_files['pdf1'])
+        nodes = pdf_reader(pdf_path)
+
+        nodes = pdf_reader(pdf_path)
+        assert isinstance(nodes, list)
+        for node in nodes:
+            assert node._content.endswith('[after_process]')
diff --git a/tests/advanced_tests/standard_test/test_deploy.py b/tests/advanced_tests/standard_test/test_deploy.py
index 01ddf15d3..f791bd5f6 100644
--- a/tests/advanced_tests/standard_test/test_deploy.py
+++ b/tests/advanced_tests/standard_test/test_deploy.py
@@ -17,13 +17,11 @@ def set_enviroment(request):
     env_key, env_var = request.param
     original_value = os.getenv(env_key, None)
     os.environ[env_key] = env_var
-    lazyllm.config.refresh(env_key)
     yield
     if original_value:
         os.environ[env_key] = original_value
     else:
         os.environ.pop(env_key, None)
-    lazyllm.config.refresh(env_key)
 
 class TestDeploy(object):
 
diff --git a/tests/advanced_tests/standard_test/test_milvus_filter.py b/tests/advanced_tests/standard_test/test_milvus_filter.py
index 7bcaad07b..40e76ee23 100644
--- a/tests/advanced_tests/standard_test/test_milvus_filter.py
+++ b/tests/advanced_tests/standard_test/test_milvus_filter.py
@@ -15,7 +15,8 @@
 from lazyllm.tools.rag.global_metadata import GlobalMetadataDesc as DocField
 from lazyllm.tools.rag import DataType
 
-def get_milvus_store_conf(rag_dir: str, kb_group_name: str = str(uuid.uuid4())):
+def get_milvus_store_conf(rag_dir: str, kb_group_name: str = ''):
+    kb_group_name = kb_group_name or str(uuid.uuid4())
     milvus_db_dir = os.path.join(rag_dir, kb_group_name)
     if not os.path.exists(milvus_db_dir):
         os.makedirs(milvus_db_dir)
@@ -36,7 +37,7 @@ def get_milvus_store_conf(rag_dir: str, kb_group_name: str = str(uuid.uuid4())):
     return milvus_store_conf
 
 
-def get_milvus_index_conf(rag_dir: str, kb_group_name: str = str(uuid.uuid4())):
+def get_milvus_index_conf(rag_dir: str, kb_group_name: str = str(uuid.uuid4())):  # noqa B008
     milvus_db_dir = os.path.join(rag_dir, kb_group_name)
     if not os.path.exists(milvus_db_dir):
         os.makedirs(milvus_db_dir)
@@ -112,27 +113,3 @@ def test_filter_by_tag(self):
         # in case of re-run with old failing staus that will trigger reparsing, call release to clean db
         doc._manager._dlm.release()
         doc.stop()
-
-    def test_smart_embedding_index(self):
-        CUSTOM_DOC_FIELDS = {"department": DocField(data_type=DataType.VARCHAR, max_size=65535, default_value=' ')}
-        Document.create_node_group('sentences', transform=SentenceSplitter, chunk_size=512, chunk_overlap=100)
-        doc = Document(self.index_dir, name='law_kg_smart', doc_fields=CUSTOM_DOC_FIELDS,
-                       embed={"dense": OnlineEmbeddingModule(source="qwen")}, manager=True,
-                       store_conf=get_milvus_index_conf(self.doc_dir, 'law_kg_smart'))
-        retriever = Retriever(doc, group_name="sentences", topk=5, embed_keys=['dense'], similarity='cosine')
-        retriever_bm25 = Retriever(doc, group_name="sentences", topk=5, similarity='bm25')
-        doc.start()
-
-        doc_manager_url = doc._manager.url.rsplit('/', 1)[0]
-        do_upload(doc_manager_url, 'law_kg_smart')
-        time.sleep(20)
-        query = "合同问题"
-
-        nodes = retriever(query, filters={'department': ['dpt_123']})
-        assert len(nodes) == 1 and nodes[0].global_metadata["department"] == "dpt_123"
-
-        nodes = retriever_bm25(query, filters={'department': ['dpt_123']})
-        assert len(nodes) == 1 and nodes[0].global_metadata["department"] == "dpt_123"
-
-        doc._manager._dlm.release()
-        doc.stop()
diff --git a/tests/advanced_tests/standard_test/test_reranker.py b/tests/advanced_tests/standard_test/test_reranker.py
index 0d7b5a480..1a6d700bc 100644
--- a/tests/advanced_tests/standard_test/test_reranker.py
+++ b/tests/advanced_tests/standard_test/test_reranker.py
@@ -1,6 +1,5 @@
 import unittest
 import os
-import lazyllm
 from lazyllm.tools.rag.doc_node import DocNode
 from lazyllm.tools.rag.rerank import Reranker, register_reranker
 
@@ -43,7 +42,6 @@ def test_module_reranker(self):
         for value in test_cases:
             with self.subTest(value=value):
                 os.environ[env_key] = value
-                lazyllm.config.refresh(env_key)
                 reranker = Reranker(name="ModuleReranker", model="bge-reranker-large", topk=2)
                 reranker.start()
                 results = reranker.forward(self.nodes, query='cherry')
@@ -55,7 +53,6 @@ def test_module_reranker(self):
                 assert results[0].relevance_score > results[1].relevance_score
         if original_value:
             os.environ[env_key] = original_value
-            lazyllm.config.refresh(env_key)
 
     def test_register_reranker_decorator(self):
         @register_reranker
diff --git a/tests/basic_tests/test_common.py b/tests/basic_tests/test_common.py
index 8b0b0a8b8..53ee9f491 100644
--- a/tests/basic_tests/test_common.py
+++ b/tests/basic_tests/test_common.py
@@ -83,7 +83,7 @@ def is_equal2(x):
         [t.start() for t in ts]
 
         assert ts[0].get_result() == 2
-        with pytest.raises(Exception):
+        with pytest.raises(Exception):  # noqa B017
             ts[1].get_result()
 
     def test_common_makerepr(self):
diff --git a/tests/basic_tests/test_config.py b/tests/basic_tests/test_config.py
index 316d89bf4..cd8b40461 100644
--- a/tests/basic_tests/test_config.py
+++ b/tests/basic_tests/test_config.py
@@ -44,10 +44,8 @@ class TestConfig(object):
     def test_refresh(self):
         origin = copy.deepcopy(lazyllm.config.impl)
         os.environ['LAZYLLM_GPU_TYPE'] = 'H100'
-        lazyllm.config.refresh('LAZYLLM_GPU_TYPE')
         assert lazyllm.config.impl['gpu_type'] == 'H100'
         os.environ['LAZYLLM_GPU_TYPE'] = origin['gpu_type']
-        lazyllm.config.refresh('gpu_type')
         assert lazyllm.config.impl['gpu_type'] == origin['gpu_type']
         lazyllm.config.refresh()
         assert lazyllm.config.impl == origin
diff --git a/tests/basic_tests/test_doc_processor.py b/tests/basic_tests/test_doc_processor.py
new file mode 100644
index 000000000..953c5eed6
--- /dev/null
+++ b/tests/basic_tests/test_doc_processor.py
@@ -0,0 +1,177 @@
+import os
+import shutil
+import uuid
+import time
+import tempfile
+import requests
+import unittest
+
+from lazyllm.tools.rag.doc_processor import DocumentProcessor
+from lazyllm import Document, Retriever
+
+
+@unittest.skip("For local test")
+class TestDocProcessor(unittest.TestCase):
+
+    def setUp(self):
+        self._store_config = {
+            "vector_store": {
+                "type": "milvus",
+                "kwargs": {
+                    "uri": os.getenv("MILVUS_URI", ""),
+                    "db_name": os.getenv("MILVUS_DB_NAME", "lazyllm_test"),
+                    "index_kwargs": [
+                        {
+                            'embed_key': 'vec_dense',
+                            'index_type': 'FLAT',
+                            'metric_type': 'COSINE',
+                            'params': {
+                                'nlist': 128,
+                            }
+                        },
+                        {
+                            'embed_key': 'vec_sparse',
+                            'index_type': 'SPARSE_INVERTED_INDEX',
+                            'metric_type': 'IP',
+                            'params': {
+                                'nlist': 128,
+                            }
+                        }
+                    ]
+                }
+            },
+            "segment_store": {
+                'type': 'opensearch',
+                'kwargs': {
+                    'uris': os.getenv("OPENSEARCH_URI", ""),
+                    'client_kwargs': {
+                        "http_compress": True,
+                        "use_ssl": True,
+                        "verify_certs": False,
+                        "user": os.getenv("OPENSEARCH_USER", ""),
+                        "password": os.getenv("OPENSEARCH_PASSWORD", ""),
+                    }
+                }
+            }
+        }
+        self._temp_dir = tempfile.mkdtemp()
+        with open(os.path.join(self._temp_dir, "test.txt"), "w") as f:
+            f.write("This is a test txt file for doc processor.\nThe answer is lazyllm.")
+        self._file_path = os.path.join(self._temp_dir, "test.txt")
+        self._dp_port = 14410
+        self._document_port = 14411
+        self.mock_embed = {'vec_dense': lambda x: [1.0, 2.0, 3.0],
+                           'vec_sparse': lambda x: {0: 1.0, 1: 2.0, 2: 3.0}}
+        self._algo_name = "test_algo"
+        self.doc_processor = DocumentProcessor(port=self._dp_port)
+        self.document = Document(dataset_path=None, name=self._algo_name, embed=self.mock_embed,
+                                 store_conf=self._store_config, server=self._document_port, manager=self.doc_processor)
+        self.document.create_node_group("line", display_name="Line Chunk", transform=lambda x: x.split("\n"),
+                                        parent="CoarseChunk")
+        self.document.activate_group("CoarseChunk", embed_keys=["vec_dense", "vec_sparse"])
+        self.document.activate_group("line", embed_keys=["vec_dense", "vec_sparse"])
+        self.document.start()
+
+    def _upload_doc(self):
+        url = f"http://localhost:{self._dp_port}/doc/add"
+        data = {
+            "task_id": uuid.uuid4().hex,
+            "algo_id": self._algo_name,
+            "file_infos": [
+                {
+                    "file_path": self._file_path,
+                    "doc_id": "doc_test",
+                    "metadata": {
+                        "kb_id": "kb_test",
+                        "test_meta": "test1"
+                    }
+                }
+            ],
+            "db_info": {
+                "db_type": "mysql",
+                "db_name": "db_test",
+                "user": "user_test",
+                "password": "password_test",
+                "host": "host_test",
+                "port": 3306,
+                "table_name": "table_test"
+            },
+            "feedback_url": ""
+        }
+        try:
+            response = requests.post(url, json=data, timeout=5)
+            if response.status_code == 200:
+                time.sleep(20)
+            else:
+                raise requests.exceptions.RequestException(f"Request failed: {response.status_code}")
+        except requests.exceptions.RequestException as e:
+            self.fail(f"Request failed: {e}")
+
+    def tearDown(self):
+        self.document.clear_cache()
+        self.document.stop()
+        time.sleep(5)
+        if os.path.exists(self._temp_dir):
+            shutil.rmtree(self._temp_dir)
+
+    def test_algo_list(self):
+        url = f"http://localhost:{self._dp_port}/algo/list"
+        try:
+            response = requests.get(url, timeout=5)  # 添加超时
+            self.assertEqual(response.status_code, 200)
+            self.assertEqual(response.json().get("data")[0].get("algo_id"), self._algo_name)
+        except requests.exceptions.RequestException as e:
+            self.fail(f"Request failed: {e}")
+
+    def test_retrieve(self):
+        self._upload_doc()
+        retriever = Retriever(doc=self.document, group_name="line", topk=2, embed_keys=["vec_dense"])
+        nodes = retriever("What is the answer?")
+        self.assertEqual(len(nodes), 2)
+
+    def test_delete_doc(self):
+        self._upload_doc()
+        retriever = Retriever(doc=self.document, group_name="line", topk=2, embed_keys=["vec_dense"])
+        nodes = retriever("What is the answer?")
+        self.assertEqual(len(nodes), 2)
+        url = f"http://localhost:{self._dp_port}/doc/delete"
+        data = {"algo_id": self._algo_name, "dataset_id": "kb_test", "doc_ids": ["doc_test"]}
+        response = requests.delete(url, json=data)
+        self.assertEqual(response.status_code, 200)
+        time.sleep(3)
+        nodes = retriever("What is the answer?")
+        self.assertEqual(len(nodes), 0)
+
+    def test_update_meta(self):
+        self._upload_doc()
+        retriever = Retriever(doc=self.document, group_name="line", topk=2, embed_keys=["vec_dense"])
+        nodes = retriever("What is the answer?")
+        self.assertEqual(len(nodes), 2)
+        self.assertEqual(nodes[0].global_metadata.get("test_meta"), "test1")
+        url = f"http://localhost:{self._dp_port}/doc/meta/update"
+        data = {
+            "algo_id": self._algo_name,
+            "file_infos": [
+                {
+                    "file_path": self._file_path,
+                    "doc_id": "doc_test",
+                    "metadata": {"kb_id": "kb_test", "test_meta": "test2"}
+                }
+            ],
+            "db_info": {
+                "db_type": "mysql",
+                "db_name": "db_test",
+                "user": "user_test",
+                "password": "password_test",
+                "host": "host_test",
+                "port": 3306,
+                "table_name": "table_test"
+            }
+        }
+        response = requests.post(url, json=data)
+        self.assertEqual(response.status_code, 200)
+        time.sleep(20)
+        nodes = retriever("What is the answer?")
+        self.assertEqual(len(nodes), 2)
+        for node in nodes:
+            self.assertEqual(node.global_metadata.get("test_meta"), "test2")
diff --git a/tests/basic_tests/test_document.py b/tests/basic_tests/test_document.py
index f3107fc3b..748e00fa2 100644
--- a/tests/basic_tests/test_document.py
+++ b/tests/basic_tests/test_document.py
@@ -6,7 +6,7 @@
 from lazyllm.tools.rag.global_metadata import RAG_DOC_PATH, RAG_DOC_ID
 from lazyllm.tools.rag import Document, Retriever, TransformArgs, AdaptiveTransform, TempDocRetriever
 from lazyllm.tools.rag.doc_manager import DocManager
-from lazyllm.tools.rag.utils import DocListManager
+from lazyllm.tools.rag.utils import DocListManager, gen_docid
 from lazyllm.launcher import cleanup
 from lazyllm import config
 from unittest.mock import MagicMock
@@ -30,7 +30,7 @@ def setUp(self):
         self.tmp_file_a = tempfile.NamedTemporaryFile()
         self.tmp_file_b = tempfile.NamedTemporaryFile()
         mock_node = DocNode(group=LAZY_ROOT_NAME, text="dummy text")
-        mock_node._global_metadata = {RAG_DOC_PATH: self.tmp_file_a.name}
+        mock_node._global_metadata = {RAG_DOC_ID: gen_docid(self.tmp_file_a.name), RAG_DOC_PATH: self.tmp_file_a.name}
         self.mock_directory_reader.load_data.return_value = ([mock_node], [])
 
         self.doc_impl = DocImpl(embed=self.mock_embed, doc_files=[self.tmp_file_a.name])
@@ -80,12 +80,12 @@ def test_retrieve(self):
     def test_add_files(self):
         assert self.doc_impl.store is None
         self.doc_impl._lazy_init()
-        assert len(self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)) == 1
+        assert len(self.doc_impl.store.get_nodes(group=LAZY_ROOT_NAME)) == 1
         new_doc = DocNode(text="new dummy text", group=LAZY_ROOT_NAME)
-        new_doc._global_metadata = {RAG_DOC_PATH: self.tmp_file_b.name}
+        new_doc._global_metadata = {RAG_DOC_ID: gen_docid(self.tmp_file_b.name), RAG_DOC_PATH: self.tmp_file_b.name}
         self.mock_directory_reader.load_data.return_value = ([new_doc], [])
         self.doc_impl._add_doc_to_store([self.tmp_file_b.name])
-        assert len(self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)) == 2
+        assert len(self.doc_impl.store.get_nodes(group=LAZY_ROOT_NAME)) == 2
 
 class TestDocument(unittest.TestCase):
     @classmethod
@@ -232,6 +232,7 @@ def setUp(self):
 
         url_pattern = r'(http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+)'
         self.doc_server_addr = re.findall(url_pattern, self.server._url)[0]
+        self.time_sleep = 30
 
     def test_delete_files_in_store(self):
         files = [('files', ('test1.txt', io.BytesIO(b"John's house is in Beijing"), 'text/palin')),
@@ -246,25 +247,36 @@ def test_delete_files_in_store(self):
         lazyllm.LOG.info(f'debug!!! ids -> {ids}')
         assert len(ids) == 2
 
-        time.sleep(20)  # waiting for worker thread to update newly uploaded files
+        time.sleep(self.time_sleep)  # waiting for worker thread to update newly uploaded files
 
         # make sure that ids are written into the store
-        nodes = self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)
+        nodes = self.doc_impl.store.get_nodes(group=LAZY_ROOT_NAME)
+        doc_ids = []
+        doc_file_paths = []
+        doc_metadatas = []
+        test1_docid = None
+        test2_docid = None
         for node in nodes:
-            if node.global_metadata[RAG_DOC_PATH].endswith('test1.txt'):
+            doc_ids.append(node.global_metadata[RAG_DOC_ID])
+            doc_file_paths.append(node.global_metadata.get(RAG_DOC_PATH, ''))
+            doc_metadatas.append(node.global_metadata)
+            if "test1" in node.global_metadata.get(RAG_DOC_PATH, ''):
                 test1_docid = node.global_metadata[RAG_DOC_ID]
-            elif node.global_metadata[RAG_DOC_PATH].endswith('test2.txt'):
+            elif "test2" in node.global_metadata.get(RAG_DOC_PATH, ''):
                 test2_docid = node.global_metadata[RAG_DOC_ID]
+        lazyllm.LOG.info(f'debug!!! doc_ids -> {doc_ids}\n')
+        lazyllm.LOG.info(f'debug!!! doc_file_paths -> {doc_file_paths}\n')
+        lazyllm.LOG.info(f'debug!!! doc_metadatas -> {doc_metadatas}\n')
         assert test1_docid and test2_docid
-        assert set([test1_docid, test2_docid]) == set(ids)
+        assert set(doc_ids) == set(ids)
 
         url = f'{self.doc_server_addr}/delete_files'
         response = httpx.post(url, json=dict(file_ids=[test1_docid]))
         assert response.status_code == 200 and response.json().get('code') == 200
 
-        time.sleep(20)  # waiting for worker thread to delete files
+        time.sleep(self.time_sleep)  # waiting for worker thread to delete files
 
-        nodes = self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)
+        nodes = self.doc_impl.store.get_nodes(group=LAZY_ROOT_NAME)
         assert len(nodes) == 1
         assert nodes[0].global_metadata[RAG_DOC_ID] == test2_docid
         cur_meta_dict = nodes[0].global_metadata
@@ -272,36 +284,28 @@ def test_delete_files_in_store(self):
         url = f'{self.doc_server_addr}/add_metadata'
         response = httpx.post(url, json=dict(doc_ids=[test2_docid], kv_pair={"title": "title2"}))
         assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
+        time.sleep(self.time_sleep)
+        lazyllm.LOG.info(f'debug!!! cur_meta_dict -> {cur_meta_dict}\n')
         assert cur_meta_dict["title"] == "title2"
 
         response = httpx.post(url, json=dict(doc_ids=[test2_docid], kv_pair={"title": "TITLE2"}))
         assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
+        time.sleep(self.time_sleep)
+        lazyllm.LOG.info(f'debug!!! cur_meta_dict -> {cur_meta_dict}\n')
         assert cur_meta_dict["title"] == ["title2", "TITLE2"]
 
         url = f'{self.doc_server_addr}/delete_metadata_item'
-        response = httpx.post(url, json=dict(doc_ids=[test2_docid], keys=["signature"]))
-        assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
-        assert "signature" not in cur_meta_dict
 
         response = httpx.post(url, json=dict(doc_ids=[test2_docid], kv_pair={"title": "TITLE2"}))
         assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
+        time.sleep(self.time_sleep)
         assert cur_meta_dict["title"] == ["title2"]
 
-        url = f'{self.doc_server_addr}/update_or_create_metadata_keys'
-        response = httpx.post(url, json=dict(doc_ids=[test2_docid], kv_pair={"signature": "signature2"}))
-        assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
-        assert cur_meta_dict["signature"] == "signature2"
-
         url = f'{self.doc_server_addr}/reset_metadata'
         response = httpx.post(url, json=dict(doc_ids=[test2_docid],
                                              new_meta={"author": "author2", "signature": "signature_new"}))
         assert response.status_code == 200 and response.json().get('code') == 200
-        time.sleep(20)
+        time.sleep(self.time_sleep)
         assert cur_meta_dict["signature"] == "signature_new" and cur_meta_dict["author"] == "author2"
 
         url = f'{self.doc_server_addr}/query_metadata'
diff --git a/tests/basic_tests/test_document_store.py b/tests/basic_tests/test_document_store.py
new file mode 100644
index 000000000..1713a2812
--- /dev/null
+++ b/tests/basic_tests/test_document_store.py
@@ -0,0 +1,183 @@
+import os
+import tempfile
+import unittest
+import pytest
+
+from unittest.mock import MagicMock
+
+from lazyllm.tools.rag.store.document_store import _DocumentStore
+from lazyllm.tools.rag.store import MapStore, MilvusStore, BUILDIN_GLOBAL_META_DESC, HybridStore
+from lazyllm.tools.rag.data_type import DataType
+from lazyllm.tools.rag.global_metadata import RAG_DOC_ID, RAG_KB_ID
+from lazyllm.tools.rag.doc_node import DocNode, QADocNode, ImageDocNode
+
+node1 = DocNode(uid="1", text="text1", group="group1", parent=None,
+                global_metadata={RAG_KB_ID: "kb1", RAG_DOC_ID: "doc1", "tags": ["tag1"]})
+node2 = DocNode(uid="2", text="text2", group="group1", parent=None,
+                global_metadata={RAG_KB_ID: "kb2", RAG_DOC_ID: "doc2", "tags": ["tag2"]})
+node3 = DocNode(uid="3", text="text3", group="group2", parent=node1,
+                global_metadata={RAG_KB_ID: "kb3", RAG_DOC_ID: "doc3", "tags": ["tag3"]})
+qa_node1 = QADocNode(uid="4", query="query1", answer="answer1", group="qa", parent=node1,
+                     global_metadata={RAG_KB_ID: "kb1", RAG_DOC_ID: "doc3", "tags": ["tag4"]})
+image_node1 = ImageDocNode(uid="5", image_path="image1.png", group="image", parent=node1,
+                           global_metadata={RAG_KB_ID: "kb1", RAG_DOC_ID: "doc4", "tags": ["tag5"]})
+
+
+@pytest.mark.skip_on_win
+@pytest.mark.skip_on_mac
+class TestStoreWithMapAndMilvus(unittest.TestCase):
+    def setUp(self):
+        fd, self.store_dir = tempfile.mkstemp(suffix=".db")
+        os.close(fd)
+        self.mock_embed = {
+            'vec_dense': MagicMock(return_value=[1.0, 2.0, 3.0]),
+            'vec_sparse': MagicMock(return_value={0: 1.0, 1: 2.0, 2: 3.0}),
+        }
+        self.index_kwargs = [
+            {
+                'embed_key': 'vec_dense',
+                'index_type': 'FLAT',
+                'metric_type': 'COSINE',
+                'params': {
+                    'nlist': 128,
+                }
+            },
+            {
+                'embed_key': 'vec_sparse',
+                'index_type': 'SPARSE_INVERTED_INDEX',
+                'metric_type': 'IP',
+                'params': {
+                    'nlist': 128,
+                }
+            }
+        ]
+        self.embed_dims = {"vec_dense": 3}
+        self.embed_datatypes = {"vec_dense": DataType.FLOAT_VECTOR, "vec_sparse": DataType.SPARSE_FLOAT_VECTOR}
+        self.group_embed_keys = {
+            "group1": {"vec_dense", "vec_sparse"},
+            "group2": {"vec_dense", "vec_sparse"},
+            "qa": {"vec_dense", "vec_sparse"},
+            "image": {}
+        }
+        self.global_metadata_desc = BUILDIN_GLOBAL_META_DESC
+        self.document_store = _DocumentStore(algo_name="__default__",
+                                             store=HybridStore(MapStore(),
+                                                               MilvusStore(uri=self.store_dir,
+                                                                           index_kwargs=self.index_kwargs)),
+                                             group_embed_keys=self.group_embed_keys,
+                                             embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
+                                             embed=self.mock_embed,
+                                             global_metadata_desc=self.global_metadata_desc)
+        self.document_store.activate_group(["group1", "group2", "qa", "image"])
+        self.document_store.update_nodes([node1, node2, node3, qa_node1, image_node1])
+
+    def tearDown(self):
+        os.remove(self.store_dir)
+
+    def test_initialization(self):
+        self.assertEqual(set(self.document_store.activated_groups()),
+                         set(["group1", "group2", "qa", "image"]))
+
+    def test_get_nodes_by_group(self):
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(set([node.uid for node in nodes]), set([node1.uid, node2.uid]))
+        nodes = self.document_store.get_nodes(group="group2")
+        self.assertEqual(set([node.uid for node in nodes]), set([node3.uid]))
+        self.assertEqual(nodes[0].parent, node1.uid)
+        nodes = self.document_store.get_nodes(group="qa")
+        self.assertEqual(set([node.uid for node in nodes]), set([qa_node1.uid]))
+        self.assertEqual(isinstance(nodes[0], QADocNode), True)
+        nodes = self.document_store.get_nodes(group="image")
+        self.assertEqual(set([node.uid for node in nodes]), set([image_node1.uid]))
+        self.assertEqual(isinstance(nodes[0], ImageDocNode), True)
+
+    def test_get_nodes_by_doc_id(self):
+        nodes = self.document_store.get_nodes(group="group1", doc_ids=[node1.global_metadata.get(RAG_DOC_ID)])
+        self.assertEqual(set([node.uid for node in nodes]), set([node1.uid]))
+        nodes = self.document_store.get_nodes(group="group1", doc_ids=[node2.global_metadata.get(RAG_DOC_ID)])
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        nodes = self.document_store.get_nodes(group="group2", doc_ids=[node2.global_metadata.get(RAG_DOC_ID)])
+        self.assertEqual(len(nodes), 0)
+        nodes = self.document_store.get_nodes(group="group3", doc_ids=[node2.global_metadata.get(RAG_DOC_ID)])
+        self.assertEqual(len(nodes), 0)
+
+    def test_get_nodes_by_kb_id(self):
+        nodes = self.document_store.get_nodes(group="group1", kb_id=node1.global_metadata.get(RAG_KB_ID))
+        self.assertEqual(set([node.uid for node in nodes]), set([node1.uid]))
+        nodes = self.document_store.get_nodes(group="group2", kb_id=node3.global_metadata.get(RAG_KB_ID))
+        self.assertEqual(set([node.uid for node in nodes]), set([node3.uid]))
+        nodes = self.document_store.get_nodes(group="group3", kb_id=node3.global_metadata.get(RAG_KB_ID))
+        self.assertEqual(len(nodes), 0)
+
+    def test_get_nodes_by_uids(self):
+        nodes = self.document_store.get_nodes(group="group1", uids=[node1.uid])
+        self.assertEqual(set([node.uid for node in nodes]), set([node1.uid]))
+        nodes = self.document_store.get_nodes(group="group1", uids=[node2.uid])
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        nodes = self.document_store.get_nodes(group="group2", uids=[node3.uid])
+        self.assertEqual(set([node.uid for node in nodes]), set([node3.uid]))
+        nodes = self.document_store.get_nodes(group="group3", uids=[node3.uid])
+        self.assertEqual(len(nodes), 0)
+
+    def test_remove_nodes_by_uids(self):
+        self.document_store.remove_nodes(group="group1", uids=[node1.uid])
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        self.document_store.remove_nodes(group="group1", uids=[node2.uid])
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(len(nodes), 0)
+        self.document_store.remove_nodes(group="group2", uids=[node3.uid])
+        nodes = self.document_store.get_nodes(group="group2")
+        self.assertEqual(len(nodes), 0)
+
+    def test_remove_nodes_by_doc_id(self):
+        self.document_store.remove_nodes(group="group1", doc_ids=[node1.global_metadata.get(RAG_DOC_ID)])
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        self.document_store.remove_nodes(group="group1", doc_ids=[node2.global_metadata.get(RAG_DOC_ID)])
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(len(nodes), 0)
+        self.document_store.remove_nodes(group="group2", doc_ids=[node3.global_metadata.get(RAG_DOC_ID)])
+        nodes = self.document_store.get_nodes(group="group2")
+        self.assertEqual(len(nodes), 0)
+
+    def test_remove_nodes_by_kb_id(self):
+        self.document_store.remove_nodes(group="group1", kb_id=node1.global_metadata.get(RAG_KB_ID))
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        self.document_store.remove_nodes(group="group1", kb_id=node2.global_metadata.get(RAG_KB_ID))
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(len(nodes), 0)
+        self.document_store.remove_nodes(group="group2", kb_id=node3.global_metadata.get(RAG_KB_ID))
+        nodes = self.document_store.get_nodes(group="group2")
+        self.assertEqual(len(nodes), 0)
+        self.document_store.update_nodes([node1, node2, node3])
+        self.document_store.remove_nodes(kb_id=node1.global_metadata.get(RAG_KB_ID))
+        nodes = self.document_store.get_nodes(group="group1")
+        self.assertEqual(set([node.uid for node in nodes]), set([node2.uid]))
+        nodes = self.document_store.get_nodes(group="group2")
+        self.assertEqual(len(nodes), 0)
+
+    def test_update_doc_meta(self):
+        self.document_store.update_doc_meta(node1.global_metadata.get(RAG_DOC_ID), {"tags": ["updated_tag"]})
+        nodes = self.document_store.get_nodes(kb_id=node1.global_metadata.get(RAG_KB_ID))
+        self.assertEqual(len(nodes), 4)
+        for node in nodes:
+            self.assertEqual(node.global_metadata.get("tags"), ["updated_tag"])
+
+    def test_query_without_filters(self):
+        nodes = self.document_store.query(query="text1", group_name="group1", embed_keys=["vec_dense"], topk=2)
+        self.assertEqual(len(nodes), 2)
+        nodes = self.document_store.query(query="text1", group_name="qa", embed_keys=["vec_dense"], topk=2)
+        self.assertEqual(len(nodes), 1)
+        self.assertEqual(nodes[0].uid, qa_node1.uid)
+
+    def test_query_with_filters(self):
+        nodes = self.document_store.query(query="text1", group_name="group1", embed_keys=["vec_dense"],
+                                          topk=2, filters={RAG_DOC_ID: ["doc1"]})
+        self.assertEqual(len(nodes), 1)
+        self.assertEqual(nodes[0].uid, node1.uid)
+        nodes = self.document_store.query(query="text1", group_name="group1", embed_keys=["vec_dense"],
+                                          topk=2, filters={RAG_DOC_ID: ["doc2"]})
+        self.assertEqual(len(nodes), 1)
+        self.assertEqual(nodes[0].uid, node2.uid)
diff --git a/tests/basic_tests/test_index.py b/tests/basic_tests/test_index.py
index c579f6cdb..b97219101 100644
--- a/tests/basic_tests/test_index.py
+++ b/tests/basic_tests/test_index.py
@@ -1,15 +1,17 @@
 import time
 import unittest
 from unittest.mock import MagicMock
-from lazyllm.tools.rag.store import MapStore
+from lazyllm.tools.rag.store.document_store import _DocumentStore
 from lazyllm.tools.rag import DocNode, IndexBase, Document
 from lazyllm.tools.rag.default_index import DefaultIndex
 from lazyllm.tools.rag.similarity import register_similarity, registered_similarities
-from lazyllm.tools.rag.store import StoreBase
+from lazyllm.tools.rag.data_type import DataType
+from lazyllm.tools.rag.store.store_base import LazyLLMStoreBase
 from lazyllm.tools.rag.utils import parallel_do_embedding, generic_process_filters
 from typing import List, Optional, Dict
 from lazyllm.common import override
 from lazyllm import SentenceSplitter, Retriever
+from lazyllm.tools.rag.global_metadata import RAG_DOC_ID
 
 class TestDefaultIndex(unittest.TestCase):
     def setUp(self):
@@ -18,17 +20,22 @@ def setUp(self):
             'test1': MagicMock(return_value=[0, 1, 0]),
             'test2': MagicMock(return_value=[0, 0, 1]),
         }
-        self.mock_store = MapStore(node_groups=['group1'], embed=self.mock_embed)
-
+        self.mock_store = _DocumentStore(algo_name='test_algo', store={"type": "map"},
+                                         group_embed_keys={"group1": ["default", "test1", "test2"]},
+                                         embed=self.mock_embed, embed_dims={"default": 3, "test1": 3, "test2": 3},
+                                         embed_datatypes={"default": DataType.FLOAT_VECTOR,
+                                                          "test1": DataType.FLOAT_VECTOR,
+                                                          "test2": DataType.FLOAT_VECTOR})
+        self.mock_store.activate_group('group1')
         # Create instance of DefaultIndex
         self.index = DefaultIndex(embed=self.mock_embed, store=self.mock_store)
 
         # Create mock DocNodes
-        self.doc_node_1 = DocNode(uid="text1", group="group1")
+        self.doc_node_1 = DocNode(uid="text1", group="group1", global_metadata={RAG_DOC_ID: "test_doc_id"})
         self.doc_node_1.embedding = {"default": [1, 0, 0], "test1": [1, 0, 0], "test2": [1, 0, 0]}
-        self.doc_node_2 = DocNode(uid="text2", group="group1")
+        self.doc_node_2 = DocNode(uid="text2", group="group1", global_metadata={RAG_DOC_ID: "test_doc_id"})
         self.doc_node_2.embedding = {"default": [0, 1, 0], "test1": [0, 1, 0], "test2": [0, 1, 0]}
-        self.doc_node_3 = DocNode(uid="text3", group="group1")
+        self.doc_node_3 = DocNode(uid="text3", group="group1", global_metadata={RAG_DOC_ID: "test_doc_id"})
         self.doc_node_3.embedding = {"default": [0, 0, 1], "test1": [0, 0, 1], "test2": [0, 0, 1]}
         self.nodes = [self.doc_node_1, self.doc_node_2, self.doc_node_3]
         self.mock_store.update_nodes(self.nodes)  # used by index
@@ -105,7 +112,7 @@ def test_query_multi_embed_one_thresholds(self):
         self.assertIn(self.doc_node_2, results)
 
 class KeywordIndex(IndexBase):
-    def __init__(self, cstore: StoreBase):
+    def __init__(self, cstore: LazyLLMStoreBase):
         self.store = cstore
 
     @override
diff --git a/tests/basic_tests/test_magic_pdf_reader.py b/tests/basic_tests/test_magic_pdf_reader.py
deleted file mode 100644
index 4ba656d00..000000000
--- a/tests/basic_tests/test_magic_pdf_reader.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import unittest
-from lazyllm.tools.rag.readers import MagicPDFReader
-from lazyllm.tools.rag.doc_node import DocNode
-from unittest.mock import patch, MagicMock
-from typing import List, Optional, Dict
-from pathlib import Path
-
-
-EGS_RESULT_LIST = [[
-    {
-        'text': '铁路信号设计规范',
-        'file_name': 'TB 10007-2017 铁路信号设计规范.pdf',
-        'type': 'text',
-        'text_level': 1,
-        'bbox': [267, 407, 630, 462],
-        'lines': [{'bbox': [273, 414, 625, 457], 'content': '铁路信号设计规范', 'type': 'text', 'page': 0}],
-        'page_idx': 0,
-        'file_path': '/home/mnt/jisiyuan/projects/lazyllm-project/dataset/rag_data/pdf_data/TB 10007-2017 铁路信号设计规范.pdf'
-    },
-    {
-        "text": "Code for Design of Railway Signaling",
-        "file_name": "TB 10007-2017 铁路信号设计规范.pdf",
-        "type": "text",
-        "bbox": [
-            253,
-            478,
-            644,
-            508
-        ],
-        "lines": [{"bbox": [256, 480, 643, 508], "content": "Code for Design of Railway Signaling",
-                   "type": "text", "page": 0}],
-        "title": "铁路信号设计规范",
-        "page_idx": 0,
-        "file_path": "/home/mnt/jisiyuan/projects/lazyllm-project/dataset/rag_data/pdf_data/TB 10007-2017 铁路信号设计规范.pdf"
-    }]]
-
-
-class TestMagicPDFReader(unittest.TestCase):
-    magic_url = "http://127.0.0.1:20231/api/v1/pdf_parse"
-
-    @patch("lazyllm.tools.rag.readers.magic_pdf_reader.requests.post")
-    def test_read(self, mock_post: MagicMock):
-        magic_reader = MagicPDFReader(self.magic_url)
-        mock_response = MagicMock()
-        mock_response.json.return_value = EGS_RESULT_LIST
-        mock_response.status_code = 200
-        mock_post.return_value = mock_response
-
-        result = magic_reader("./demo.pdf")
-        self.assertTrue(result[0].text.startswith("铁路信号设计规范"))
-
-    @patch("lazyllm.tools.rag.readers.magic_pdf_reader.requests.post")
-    def test_read_with_callback(self, mock_post: MagicMock):
-        def custom_callback(elements: List[dict], file: Path, extra_info: Optional[Dict] = None) -> List[DocNode]:
-            text_chunks = [f"page_{el['page']}:{el['text']}" for el in elements if "text" in el]
-            return [DocNode(text="\n".join(text_chunks), metadata={"file_name": file.name})]
-
-        magic_reader = MagicPDFReader(self.magic_url, custom_callback)
-        mock_response = MagicMock()
-        mock_response.json.return_value = EGS_RESULT_LIST
-        mock_response.status_code = 200
-        mock_post.return_value = mock_response
-
-        result = magic_reader("./demo.pdf")
-        self.assertTrue(result[0].text.startswith("page"))
diff --git a/tests/basic_tests/test_module.py b/tests/basic_tests/test_module.py
index 09a7be28c..3b16d5d2d 100644
--- a/tests/basic_tests/test_module.py
+++ b/tests/basic_tests/test_module.py
@@ -79,7 +79,7 @@ def __call__(self, a, b):
         assert s._call('test1', 1, 2) == 3
 
     def test_TrainableModule(self):
-        tm1 = lazyllm.TrainableModule(self.base_model, self.target_path)
+        tm1 = lazyllm.TrainableModule(self.base_model, self.target_path, trust_remote_code=False)
         tm2 = tm1.share()
         # tm1 and tm2 all use: ChatPrompter
         assert tm1._prompt == tm2._prompt
@@ -128,7 +128,8 @@ def test_TrainableModule(self):
         assert type(tm2._prompt) is lazyllm.prompter.EmptyPrompter
 
         # tm5 use tm4's url
-        tm5 = lazyllm.TrainableModule(self.base_model).deploy_method(tm4._deploy_type, url=tm4._url)
+        tm5 = lazyllm.TrainableModule(self.base_model, trust_remote_code=False).deploy_method(
+            tm4._deploy_type, url=tm4._url)
         tm5.evalset(inputs)
         tm5.eval()
         assert tm5.eval_result == tm4.eval_result
@@ -139,7 +140,8 @@ def test_TrainableModule(self):
         assert tm5(inputs) == res_template.format(inputs)
 
     def test_TrainableModule_stream(self):
-        tm = lazyllm.TrainableModule(self.base_model, self.target_path, stream=True).deploy_method(lazyllm.deploy.dummy)
+        tm = lazyllm.TrainableModule(self.base_model, self.target_path, stream=True, trust_remote_code=False)
+        tm.deploy_method(lazyllm.deploy.dummy)
         assert tm._deploy_type == lazyllm.deploy.dummy
         tm.prompt(None).start()
 
diff --git a/tests/basic_tests/test_store.py b/tests/basic_tests/test_store.py
index 9fb584152..4da00b18a 100644
--- a/tests/basic_tests/test_store.py
+++ b/tests/basic_tests/test_store.py
@@ -3,12 +3,35 @@
 import pytest
 import tempfile
 import unittest
-from unittest.mock import MagicMock
-from lazyllm.tools.rag.store import MapStore, ChromadbStore, MilvusStore, LAZY_ROOT_NAME
-from lazyllm.tools.rag.doc_node import DocNode
+from lazyllm.tools.rag.store import (MapStore, ChromadbStore, MilvusStore, OpenSearchStore,
+                                     SenseCoreStore, BUILDIN_GLOBAL_META_DESC, HybridStore)
 from lazyllm.tools.rag.data_type import DataType
-from lazyllm.tools.rag.global_metadata import GlobalMetadataDesc, RAG_DOC_ID
-
+from lazyllm.tools.rag.global_metadata import RAG_DOC_ID, RAG_KB_ID
+data = [
+    {'uid': 'uid1', 'doc_id': 'doc1', 'group': 'g1', 'content': 'test1', 'meta': {},
+     'global_meta': {RAG_DOC_ID: 'doc1', RAG_KB_ID: 'kb1'},
+     'embedding': {'vec_dense': [0.1, 0.2, 0.3], 'vec_sparse': {"1563": 0.212890625, "238": 0.1768798828125}},
+     'type': 1, 'number': 0, 'kb_id': 'kb1',
+     'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'parent': None, 'answer': "", 'image_keys': []},
+
+    {'uid': 'uid2', 'doc_id': 'doc2', 'group': 'g2', 'content': 'test2', 'meta': {},
+     'global_meta': {RAG_DOC_ID: 'doc2', RAG_KB_ID: 'kb2'},
+     'embedding': {'vec_dense': [0.3, 0.2, 0.1], 'vec_sparse': {"1563": 0.212890625, "238": 0.1768798828125}},
+     'type': 1, 'number': 0, 'kb_id': 'kb2',
+     'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'parent': 'p2', 'answer': "", 'image_keys': []},
+
+    {'uid': 'uid3', 'doc_id': 'doc3', 'group': 'g1', 'content': 'test3', 'meta': {},
+     'global_meta': {RAG_DOC_ID: 'doc3', RAG_KB_ID: 'kb3'},
+     'embedding': {'vec_dense': [0.3, 0.2, 0.1], 'vec_sparse': {"12": 0.212890625, "23": 0.1768798828125}},
+     'type': 1, 'number': 0, 'kb_id': 'kb3',
+     'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+     'parent': None, 'answer': "", 'image_keys': []},
+]
 
 def clear_directory(directory_path):
     if os.path.exists(directory_path):
@@ -24,373 +47,884 @@ def clear_directory(directory_path):
     else:
         print(f"The directory {directory_path} does not exist.")
 
-# Test class for ChromadbStore
+
+class TestMapStore(unittest.TestCase):
+    def setUp(self):
+        self.collections = ["col_g1", "col_g2"]
+        fd, self.store_dir = tempfile.mkstemp(suffix=".db")
+        os.close(fd)
+        self.store1 = MapStore()
+        self.store1.connect(collections=self.collections)
+
+    def tearDown(self):
+        os.remove(self.store_dir)
+
+    def test_upsert(self):
+        self.store1.upsert(self.collections[0], [data[0]])
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[0])
+
+    def test_delete_segments_by_collection(self):
+        self.store1.upsert(self.collections[0], [data[0]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        self.store1.delete(self.collections[0])
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+        res = self.store1.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[1])
+
+    def test_delete_segments_by_kb_id(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.delete(self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+        self.store1.delete(self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_uid(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.delete(self.collections[0], criteria={'uid': ['uid1']})
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+
+    def test_delete_segments_by_doc_id(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        self.store1.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+
+    def test_get_segments_by_collection(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        res = self.store1.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = self.store1.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[1])
+
+    def test_get_segments_by_kb_id(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[0])
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 0)
+
+    def test_get_segments_by_uid(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        res = self.store1.get(collection_name=self.collections[0], criteria={'uid': ['uid1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[0])
+        res = self.store1.get(collection_name=self.collections[0], criteria={'uid': ['uid3']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+        res = self.store1.get(collection_name=self.collections[0], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 0)
+
+    def test_get_segments_by_doc_id(self):
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[0])
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        self.assertEqual(len(res), 0)
+        res = self.store1.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1', 'doc3']})
+        self.assertEqual(len(res), 2)
+
+    def test_mapstore_with_uri(self):
+        store2 = MapStore(uri=self.store_dir)
+        store2.connect(collections=self.collections)
+        store2.upsert(self.collections[0], [data[0], data[2]])
+        store2.upsert(self.collections[1], [data[1]])
+        res = store2.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = store2.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[1])
+        store2.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = store2.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0], data[2])
+        store3 = MapStore(uri=self.store_dir)
+        store3.connect(collections=self.collections)
+        res = store3.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+
 @pytest.mark.skip_on_win
 @pytest.mark.skip_on_mac
 class TestChromadbStore(unittest.TestCase):
     def setUp(self):
-        self.node_groups = [LAZY_ROOT_NAME, "group1", "group2"]
+        self.data = [
+            {'uid': 'uid1', 'doc_id': 'doc1', 'group': 'g1', 'content': 'test1', 'meta': {},
+             'global_meta': {RAG_DOC_ID: 'doc1', RAG_KB_ID: 'kb1'},
+             'embedding': {'vec_dense': [0.1, 0.2, 0.3]}, 'type': 1, 'number': 0, 'kb_id': 'kb1',
+             'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'parent': None, 'answer': "", 'image_keys': []},
+            {'uid': 'uid2', 'doc_id': 'doc2', 'group': 'g2', 'content': 'test2', 'meta': {},
+             'global_meta': {RAG_DOC_ID: 'doc2', RAG_KB_ID: 'kb2'},
+             'embedding': {'vec_dense': [0.3, 0.2, 0.1]}, 'type': 1, 'number': 0, 'kb_id': 'kb2',
+             'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'parent': 'p2', 'answer': "", 'image_keys': []},
+            {'uid': 'uid3', 'doc_id': 'doc3', 'group': 'g1', 'content': 'test3', 'meta': {},
+             'global_meta': {RAG_DOC_ID: 'doc3', RAG_KB_ID: 'kb3'},
+             'embedding': {'vec_dense': [0.3, 0.2, 0.1]}, 'type': 1, 'number': 0, 'kb_id': 'kb3',
+             'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'parent': None, 'answer': "", 'image_keys': []},
+        ]
+        self.collections = ["col_g1", "col_g2"]
+        self.embed_dims = {"vec_dense": 3}
+        self.embed_datatypes = {"vec_dense": DataType.FLOAT_VECTOR}
+        self.global_metadata_desc = BUILDIN_GLOBAL_META_DESC
         self.store_dir = tempfile.mkdtemp()
-        self.mock_embed = {
-            'default': MagicMock(return_value=[1.0, 2.0, 3.0]),
-        }
-        self.embed_dims = {"default": 3}
-
-        embed_keys = set(['default'])
-        group_embed_keys = {
-            LAZY_ROOT_NAME: embed_keys,
-            'group1': embed_keys,
-            'group2': embed_keys,
-        }
-        self.store = ChromadbStore(group_embed_keys=group_embed_keys, embed=self.mock_embed,
-                                   embed_dims=self.embed_dims, dir=self.store_dir)
-
-        self.store.update_nodes(
-            [DocNode(uid="1", text="text1", group=LAZY_ROOT_NAME, parent=None)],
-        )
+        self.store = ChromadbStore(uri=self.store_dir)
+        self.store.connect(embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
+                           global_metadata_desc=self.global_metadata_desc)
 
     def tearDown(self):
         clear_directory(self.store_dir)
 
-    def test_initialization(self):
-        self.assertEqual(set(self.store._collections.keys()), set(self.node_groups))
-
-    def test_update_nodes(self):
-        node1 = DocNode(uid="1", text="text1", group="group1")
-        node2 = DocNode(uid="2", text="text2", group="group2")
-        self.store.update_nodes([node1, node2])
-        collection = self.store._collections["group1"]
-        self.assertEqual(set(collection.peek(collection.count())["ids"]), set(["1", "2"]))
-        nodes = self.store.get_nodes("group1")
-        self.assertEqual(nodes, [node1])
-
-    def test_remove_group_nodes(self):
-        node1 = DocNode(uid="1", text="text1", group="group1")
-        node2 = DocNode(uid="2", text="text2", group="group2")
-        self.store.update_nodes([node1, node2])
-        collection = self.store._collections["group1"]
-        self.assertEqual(collection.peek(collection.count())["ids"], ["1", "2"])
-        self.store.remove_nodes([node1.global_metadata.get(RAG_DOC_ID)], "group1", ["1"])
-        self.assertEqual(collection.peek(collection.count())["ids"], ["2"])
-
-    def test_load_store(self):
-        # Set up initial data to be loaded
-        node1 = DocNode(uid="1", text="text1", group="group1", parent=None)
-        node2 = DocNode(uid="2", text="text2", group="group1", parent=node1)
-        self.store.update_nodes([node1, node2])
-
-        # Reset store and load from "persistent" storage
-        self.store._map_store.clear_cache()
-        self.store._load_store(self.embed_dims)
-
-        nodes = self.store.get_nodes("group1")
-        self.assertEqual(len(nodes), 2)
-        n1 = self.store.get_nodes("group1", ["1"])[0]
-        self.assertEqual(n1._uid, "1")
-        n2 = self.store.get_nodes("group1", ["2"])[0]
-        self.assertEqual(n2._uid, "2")
-        self.assertEqual(n2.parent._uid, "1")
-
-    def test_insert_dict_as_sparse_embedding(self):
-        node1 = DocNode(uid="1", text="text1", group="group1", embedding={'default': {1: 10, 2: 20}})
-        node2 = DocNode(uid="2", text="text2", group="group1", embedding={'default': {0: 30, 2: 50}})
-        orig_embedding_dict = {
-            node1._uid: [0, 10, 20],
-            node2._uid: [30, 0, 50],
-        }
-        self.store.update_nodes([node1, node2])
+    def test_upsert(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+
+    def test_delete_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        self.store.delete(self.collections[0])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_delete_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.delete(self.collections[0], criteria={'uid': ['uid1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+
+    def test_delete_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+
+    def test_get_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_get_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_get_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid3']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_get_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1', 'doc3']})
+        self.assertEqual(len(res), 2)
+
+    def test_search(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=5)
+        self.assertEqual(len(res), 2)
+        self.assertEqual(res[0].get('uid'), self.data[2].get('uid'))
+        self.assertEqual(res[1].get('uid'), self.data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_search_with_filters(self):
+        self.store.upsert(self.collections[0], [self.data[0], self.data[2]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=2, filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=1, filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 0)
 
-        results = self.store._peek_all_documents('group1')
-        nodes = self.store._build_nodes_from_chroma(results, self.embed_dims)
-        nodes_dict = {
-            node._uid: node for node in nodes
-        }
-
-        assert nodes_dict.keys() == orig_embedding_dict.keys()
-        for uid, node in nodes_dict.items():
-            assert node.embedding['default'] == orig_embedding_dict.get(uid)
-
-    def test_all_groups(self):
-        self.assertEqual(set(self.store.all_groups()), set(self.node_groups))
-
-    def test_query(self):
-        node1 = DocNode(uid="1", text="text1", group="group1", parent=None)
-        node2 = DocNode(uid="2", text="text2", group="group1", parent=node1)
-        self.store.update_nodes([node1, node2])
-        res = self.store.query(query='text1', group_name='group1', embed_keys=['default'], topk=2,
-                               similarity_name='cosine', similarity_cut_off=0.000001)
-        self.assertEqual(set([node1, node2]), set(res))
-
-    def test_group_others(self):
-        node1 = DocNode(uid="1", text="text1", group="group1", parent=None)
-        node2 = DocNode(uid="2", text="text2", group="group1", parent=node1)
-        self.store.update_nodes([node1, node2])
-        self.assertEqual(self.store.is_group_active("group1"), True)
-        self.assertEqual(self.store.is_group_active("group2"), False)
-
-class TestMapStore(unittest.TestCase):
-    def setUp(self):
-        self.mock_embed = {
-            'default': MagicMock(return_value=[1.0, 2.0, 3.0]),
-        }
-        self.node_groups = [LAZY_ROOT_NAME, "group1", "group2"]
-        self.store = MapStore(node_groups=self.node_groups, embed=self.mock_embed)
-        self.node1 = DocNode(uid="1", text="text1", group="group1", parent=None)
-        self.node2 = DocNode(uid="2", text="text2", group="group1", parent=self.node1)
-
-    def test_update_nodes(self):
-        self.store.update_nodes([self.node1, self.node2])
-        nodes = self.store.get_nodes("group1")
-        self.assertEqual(len(nodes), 2)
-        n1 = self.store.get_nodes("group1", ["1"])[0]
-        self.assertEqual(n1._uid, "1")
-        n2 = self.store.get_nodes("group1", ["2"])[0]
-        self.assertEqual(n2._uid, "2")
-        self.assertEqual(n2.parent._uid, "1")
-
-    def test_get_group_nodes(self):
-        self.store.update_nodes([self.node1, self.node2])
-        n1 = self.store.get_nodes("group1", ["1"])[0]
-        self.assertEqual(n1.text, self.node1.text)
-        n2 = self.store.get_nodes("group1", ["2"])[0]
-        self.assertEqual(n2.text, self.node2.text)
-        ids = set([self.node1._uid, self.node2._uid])
-        docs = self.store.get_nodes("group1")
-        self.assertEqual(ids, set([doc._uid for doc in docs]))
-
-    def test_remove_group_nodes(self):
-        self.store.update_nodes([self.node1, self.node2])
-
-        n1 = self.store.get_nodes("group1", ["1"])[0]
-        assert n1.text == self.node1.text
-        self.store.remove_nodes([self.node1.global_metadata.get(RAG_DOC_ID)], uids=["1"])
-        n1 = self.store.get_nodes("group1", ["1"])
-        assert not n1
-
-        n2 = self.store.get_nodes("group1", ["2"])[0]
-        assert n2.text == self.node2.text
-        self.store.remove_nodes([self.node2.global_metadata.get(RAG_DOC_ID)], uids=["2"])
-        n2 = self.store.get_nodes("group1", ["2"])
-        assert not n2
-
-    def test_all_groups(self):
-        self.assertEqual(set(self.store.all_groups()), set(self.node_groups))
-
-    def test_query(self):
-        self.store.update_nodes([self.node1, self.node2])
-        res = self.store.query(query='text1', group_name='group1', embed_keys=['default'], topk=2,
-                               similarity_name='cosine', similarity_cut_off=0.000001)
-        self.assertEqual(set([self.node1, self.node2]), set(res))
-
-    def test_group_others(self):
-        self.store.update_nodes([self.node1, self.node2])
-        self.assertEqual(self.store.is_group_active("group1"), True)
-        self.assertEqual(self.store.is_group_active("group2"), False)
 
 @pytest.mark.skip_on_win
 @pytest.mark.skip_on_mac
-class TestMilvusStoreWithNormalEmbedding(unittest.TestCase):
+class TestMilvusStore(unittest.TestCase):
     def setUp(self):
-        self.mock_embed = {
-            'vec1': MagicMock(return_value=[1.0, 2.0, 3.0]),
-            'vec2': MagicMock(return_value=[400.0, 500.0, 600.0, 700.0, 800.0]),
-        }
-        self.global_metadata_desc = {
-            'comment': GlobalMetadataDesc(data_type=DataType.VARCHAR, max_size=65535, default_value=' '),
-            'signature': GlobalMetadataDesc(data_type=DataType.VARCHAR, max_size=256, default_value=' '),
-            'tags': GlobalMetadataDesc(data_type=DataType.ARRAY, element_type=DataType.INT32, max_size=128,
-                                       default_value=[]),
-        }
-
-        self.node_groups = [LAZY_ROOT_NAME, "group1", "group2"]
-        _, self.store_file = tempfile.mkstemp(suffix=".db")
-
-        embed_keys = set(['vec1', 'vec2'])
-        self.group_embed_keys = {
-            LAZY_ROOT_NAME: embed_keys,
-            'group1': embed_keys,
-            'group2': embed_keys,
-        }
-        self.embed_dims = {
-            "vec1": 3,
-            "vec2": 5,
-        }
-        self.embed_datatypes = {
-            'vec1': DataType.FLOAT_VECTOR,
-            'vec2': DataType.FLOAT_VECTOR,
-        }
-
-        self.kwargs = {
-            'uri': self.store_file,
-            'index_kwargs': {
-                'index_type': 'HNSW',
+        self.collections = ["col_g1", "col_g2"]
+        self.embed_dims = {"vec_dense": 3}
+        self.embed_datatypes = {"vec_dense": DataType.FLOAT_VECTOR, "vec_sparse": DataType.SPARSE_FLOAT_VECTOR}
+        self.global_metadata_desc = BUILDIN_GLOBAL_META_DESC
+        self.index_kwargs = [
+            {
+                'embed_key': 'vec_dense',
+                'index_type': 'FLAT',
                 'metric_type': 'COSINE',
+                'params': {
+                    'nlist': 128,
+                }
+            },
+            {
+                'embed_key': 'vec_sparse',
+                'index_type': 'SPARSE_INVERTED_INDEX',
+                'metric_type': 'IP',
+                'params': {
+                    'nlist': 128,
+                }
             }
-        }
-
-        self.store = MilvusStore(group_embed_keys=self.group_embed_keys, embed=self.mock_embed,
-                                 embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
-                                 global_metadata_desc=self.global_metadata_desc, **self.kwargs)
-
-        self.node1 = DocNode(uid="1", text="text1", group="group1", parent=None,
-                             embedding={"vec1": [8.0, 9.0, 10.0], "vec2": [11.0, 12.0, 13.0, 14.0, 15.0]},
-                             metadata={'comment': 'comment1'},
-                             global_metadata={'comment': 'comment3', 'signature': 'node1', 'tags': [1, 3, 5]})
-        self.node2 = DocNode(uid="2", text="text2", group="group1", parent=self.node1,
-                             embedding={"vec1": [100.0, 200.0, 300.0], "vec2": [400.0, 500.0, 600.0, 700.0, 800.0]},
-                             metadata={'comment': 'comment2', 'signature': 'node2'})
-        self.node3 = DocNode(uid="3", text="text3", group="group1", parent=None,
-                             embedding={"vec1": [4.0, 5.0, 6.0], "vec2": [16.0, 17.0, 18.0, 19.0, 20.0]},
-                             metadata={'comment': 'comment3', 'signature': 'node3'},
-                             global_metadata={'tags': [1, 2, 3]})
+        ]
+        fd, self.store_dir = tempfile.mkstemp(suffix=".db")
+        os.close(fd)
+        self.uri_standalone = ""
+        self.store = MilvusStore(uri=self.store_dir, index_kwargs=self.index_kwargs)
+        self.store.connect(embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
+                           global_metadata_desc=self.global_metadata_desc)
 
     def tearDown(self):
-        os.remove(self.store_file)
-
-    def test_update_and_query(self):
-        self.store.update_nodes([self.node1])
-        ret = self.store.query(query='text1', group_name='group1', embed_keys=['vec2'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node1._uid)
-
-        self.store.update_nodes([self.node2])
-        ret = self.store.query(query='text2', group_name='group1', embed_keys=['vec2'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node2._uid)
-
-    def test_remove_and_query(self):
-        self.store.update_nodes([self.node1, self.node2])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec2'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node2._uid)
-
-        self.store.remove_nodes([self.node2.global_metadata.get(RAG_DOC_ID)], "group1", [self.node2._uid])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec2'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node1._uid)
-
-    def test_all_groups(self):
-        self.assertEqual(set(self.store.all_groups()), set(self.node_groups))
-
-    def test_group_others(self):
-        self.store.update_nodes([self.node1, self.node2])
-        self.assertEqual(self.store.is_group_active("group1"), True)
-        self.assertEqual(self.store.is_group_active("group2"), False)
-
-    def test_query_with_filter_exist_1(self):
-        self.store.update_nodes([self.node1, self.node3])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec2'], topk=10,
-                               filters={'comment': ['comment3']})
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node1._uid)
-
-    def test_query_with_filter_exist_2(self):
-        self.store.update_nodes([self.node1, self.node2, self.node3])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec2'], topk=10,
-                               filters={'comment': ['comment3']})
-        self.assertEqual(len(ret), 2)
-        self.assertEqual(set([ret[0]._uid, ret[1]._uid]), set([self.node1._uid, self.node2._uid]))
-
-    def test_query_with_filter_non_exist(self):
-        self.store.update_nodes([self.node1, self.node3])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec1'], topk=10,
-                               filters={'comment': ['non-exist']})
-        self.assertEqual(len(ret), 0)
-
-    def test_reload(self):
-        self.store.update_nodes([self.node1, self.node2, self.node3])
-        # reload from storage
-        del self.store
-        self.store = MilvusStore(group_embed_keys=self.group_embed_keys, embed=self.mock_embed,
-                                 embed_dims=self.embed_dims, global_metadata_desc=self.global_metadata_desc,
-                                 embed_datatypes=self.embed_datatypes, **self.kwargs)
-
-        nodes = self.store.get_nodes('group1')
-        orig_nodes = [self.node1, self.node2, self.node3]
-        self.assertEqual(set([node._uid for node in nodes]), set([node._uid for node in orig_nodes]))
-
-        for node in nodes:
-            for orig_node in orig_nodes:
-                if node._uid == orig_node._uid:
-                    self.assertEqual(node.text, orig_node.text)
-                    # builtin fields are not in orig node, so we can not use
-                    # node.global_metadata == orig_node.global_metadata
-                    for k, v in orig_node.global_metadata.items():
-                        self.assertEqual(node.global_metadata[k], v)
-                    break
-
-    # XXX `array_contains_any` is not supported in local(aka lite) mode. skip this ut
-    def _test_query_with_array_filter(self):
-        self.store.update_nodes([self.node1, self.node3])
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec1'], topk=10,
-                               filters={'tags': [2]})
-        self.assertEqual(len(ret), 2)
-        self.assertEqual(set([ret[0]._uid, ret[1]._uid]), set([self.node1._uid, self.node2._uid]))
-
+        os.remove(self.store_dir)
+
+    def test_upsert(self):
+        self.store.upsert(self.collections[0], [data[0]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+
+    def test_delete_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        self.store.delete(self.collections[0])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_delete_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={'uid': ['uid1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_delete_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_get_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_kb_and_doc(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1', RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+
+    def test_get_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid3']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1', 'doc3']})
+        self.assertEqual(len(res), 2)
+
+    def test_search(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0],
+                                query_embedding={"1563": 0.212890625, "238": 0.1768798828125},
+                                embed_key='vec_sparse', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.search(collection_name=self.collections[0],
+                                query_embedding={"12": 0.212890625, "23": 0.1768798828125},
+                                embed_key='vec_sparse', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=5)
+        self.assertEqual(len(res), 2)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        self.assertEqual(res[1].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query_embedding=[0.3, 0.2, 0.1],
+                                embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_search_with_filters(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.search(collection_name=self.collections[0], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=2, filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query_embedding=[0.1, 0.2, 0.3],
+                                embed_key='vec_dense', topk=1, filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 0)
+
+    @pytest.mark.skip(reason=("local test for milvus standalone, please set up a milvus standalone server"
+                              " and set the uri to the server"))
+    def test_milvus_standalone(self):
+        self.store1 = MilvusStore(uri=self.uri_standalone, index_kwargs=self.index_kwargs)
+        self.store1.connect(embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
+                            global_metadata_desc=self.global_metadata_desc)
+        self.store1.upsert(self.collections[0], [data[0], data[2]])
+        self.store1.upsert(self.collections[1], [data[1]])
+        res = self.store1.search(collection_name=self.collections[0], query_embedding=[0.1, 0.2, 0.3],
+                                 embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+
+
+@pytest.mark.skip(reason="To test open search store, please set up a open search server")
+class TestOpenSearchStore(unittest.TestCase):
+    def setUp(self):
+        self.collections = ["col_g1", "col_g2"]
+        self.uri = ""
+        self.client_kwargs = {}
+        self.store = OpenSearchStore(uris=self.uri, client_kwargs=self.client_kwargs)
+        self.store.connect()
 
-@pytest.mark.skip_on_win
-@pytest.mark.skip_on_mac
-class TestMilvusStoreWithSparseEmbedding(unittest.TestCase):
+    def tearDown(self):
+        for collection in self.collections:
+            self.store.delete(collection)
+
+    def test_upsert(self):
+        self.store.upsert(self.collections[0], [data[0]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+
+    def test_delete_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        self.store.delete(self.collections[0])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_delete_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={'uid': ['uid1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_delete_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_get_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid3']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1', 'doc3']})
+        self.assertEqual(len(res), 2)
+
+
+@pytest.mark.skip(reason="To test sensecore store, please set up a sensecore rag-store server")
+class TestSenseCoreStore(unittest.TestCase):
     def setUp(self):
-        self.mock_embed = {
-            'vec1': MagicMock(return_value={0: 1.0, 1: 2.0, 2: 3.0}),
-            'vec2': MagicMock(return_value={0: 400.0, 1: 500.0, 2: 600.0, 3: 700.0, 4: 800.0}),
+        # sensecore store need kb_id when get or delete
+        self.collections = ["col_block", "col_line"]
+        self.data = [
+            {'uid': 'uid1', 'doc_id': 'doc1', 'group': 'block', 'content': 'test1', 'meta': {},
+             'global_meta': {RAG_DOC_ID: 'doc1', RAG_KB_ID: 'kb1'},
+             'embedding': {'bge_m3_dense': [0.1, 0.2, 0.3],
+                           'bge_m3_sparse': {"1563": 0.212890625, "238": 0.1768798828125}},
+             'type': 1, 'number': 0, 'kb_id': 'kb1',
+             'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'parent': None, 'answer': "", 'image_keys': []},
+
+            {'uid': 'uid2', 'doc_id': 'doc2', 'group': 'line', 'content': 'test2', 'meta': {},
+             'global_meta': {RAG_DOC_ID: 'doc2', RAG_KB_ID: 'kb2'},
+             'embedding': {'bge_m3_dense': [0.3, 0.2, 0.1],
+                           'bge_m3_sparse': {"1563": 0.212890625, "238": 0.1768798828125}},
+             'type': 1, 'number': 0, 'kb_id': 'kb2',
+             'excluded_embed_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'excluded_llm_metadata_keys': ['file_size', 'file_name', 'file_type'],
+             'parent': 'uid1', 'answer': "", 'image_keys': []},
+        ]
+        self.global_metadata_desc = BUILDIN_GLOBAL_META_DESC
+        self.uri = ""
+        self.s3_config = {
+            "endpoint_url": os.getenv("RAG_S3_ENDPOINT", ""),
+            "access_key": os.getenv("RAG_S3_ACCESS_KEY", ""),
+            "secret_access_key": os.getenv("RAG_S3_SECRET_KEY", ""),
+            "bucket_name": os.getenv("RAG_S3_BUCKET", "rag-data"),
+            "use_minio": os.getenv("RAG_S3_USE_MINIO", "true").lower() == "true",
         }
-        self.global_metadata_desc = {
-            'comment': GlobalMetadataDesc(data_type=DataType.VARCHAR, max_size=65535, default_value=' '),
+        self.image_url_config = {
+            "access_key": os.getenv("RAG_IMAGE_URL_ACCESS_KEY", ""),
+            "secret_access_key": os.getenv("RAG_IMAGE_URL_SECRET_KEY", ""),
+            "endpoint_url": os.getenv("RAG_IMAGE_URL_ENDPOINT", ""),
+            "bucket_name": os.getenv("RAG_IMAGE_URL_BUCKET", "lazyjfs")
         }
+        self.store = SenseCoreStore(uri=self.uri, s3_config=self.s3_config, image_url_config=self.image_url_config)
+        self.store.connect(global_metadata_desc=self.global_metadata_desc)
 
-        self.node_groups = [LAZY_ROOT_NAME, "group1", "group2"]
-        _, self.store_file = tempfile.mkstemp(suffix=".db")
+    def tearDown(self):
+        try:
+            self.store.delete(self.collections[0], criteria={'uid': ['uid1'], RAG_KB_ID: self.data[0].get('kb_id')})
+            self.store.delete(self.collections[1], criteria={'uid': ['uid2'], RAG_KB_ID: self.data[1].get('kb_id')})
+        except Exception:
+            pass
+
+    def test_upsert(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[1], criteria={RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_delete_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.store.delete(self.collections[0], criteria={'uid': ['uid1'], RAG_KB_ID: self.data[0].get('kb_id')})
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2'], RAG_KB_ID: self.data[0].get('kb_id')})
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1'], RAG_KB_ID: self.data[0].get('kb_id')})
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 0)
+
+    def test_get_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0],
+                             criteria={'uid': ['uid1'], RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0],
+                             criteria={'uid': ['uid2'], RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1],
+                             criteria={'uid': ['uid2'], RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_get_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.get(collection_name=self.collections[0],
+                             criteria={RAG_DOC_ID: ['doc1'], RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0],
+                             criteria={RAG_DOC_ID: ['doc2'], RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1],
+                             criteria={RAG_DOC_ID: ['doc2'], RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
+
+    def test_search(self):
+        self.store.upsert(self.collections[0], [self.data[0]])
+        self.store.upsert(self.collections[1], [self.data[1]])
+        res = self.store.search(collection_name=self.collections[0], query="test1",
+                                embed_key='bge_m3_dense', topk=1,
+                                filters={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        print(f"res: {res}")
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0],
+                                query="test1", embed_key='bge_m3_sparse', topk=1,
+                                filters={RAG_KB_ID: self.data[0].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query="test2",
+                                embed_key='bge_m3_dense', topk=1,
+                                filters={RAG_KB_ID: self.data[1].get('kb_id')})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), self.data[1].get('uid'))
 
-        embed_keys = set(['vec1', 'vec2'])
-        self.group_embed_keys = {
-            LAZY_ROOT_NAME: embed_keys,
-            'group1': embed_keys,
-            'group2': embed_keys,
-        }
-        self.embed_datatypes = {
-            'vec1': DataType.SPARSE_FLOAT_VECTOR,
-            'vec2': DataType.SPARSE_FLOAT_VECTOR,
-        }
 
-        self.kwargs = {
-            'uri': self.store_file,
-            'index_kwargs': [
-                {
-                    'embed_key': 'vec1',
-                    'index_type': 'SPARSE_INVERTED_INDEX',
-                    'metric_type': 'IP',
-                },
-                {
-                    'embed_key': 'vec2',
-                    'index_type': 'SPARSE_WAND',
-                    'metric_type': 'IP',
+@pytest.mark.skip_on_win
+@pytest.mark.skip_on_mac
+class TestHybridStore(unittest.TestCase):
+    def setUp(self):
+        self.collections = ["col_g1", "col_g2"]
+        self.embed_dims = {"vec_dense": 3}
+        self.embed_datatypes = {"vec_dense": DataType.FLOAT_VECTOR, "vec_sparse": DataType.SPARSE_FLOAT_VECTOR}
+        self.global_metadata_desc = BUILDIN_GLOBAL_META_DESC
+        self.index_kwargs = [
+            {
+                'embed_key': 'vec_dense',
+                'index_type': 'FLAT',
+                'metric_type': 'COSINE',
+                'params': {
+                    'nlist': 128,
                 }
-            ]
-        }
-
-        self.store = MilvusStore(group_embed_keys=self.group_embed_keys, embed=self.mock_embed,
-                                 embed_dims=None, embed_datatypes=self.embed_datatypes,
-                                 global_metadata_desc=self.global_metadata_desc, **self.kwargs)
-
-        self.node1 = DocNode(uid="1", text="text1", group="group1", parent=None,
-                             embedding={"vec1": {0: 1.0, 1: 2.0, 2: 3.0},
-                                        "vec2": {0: 400.0, 1: 500.0, 2: 600.0, 3: 700.0, 4: 800.0}})
-        self.node2 = DocNode(uid="2", text="text2", group="group1", parent=None,
-                             embedding={"vec1": {0: 8.0, 1: 9.0, 2: 10.0},
-                                        "vec2": {0: 11.0, 1: 12.0, 2: 13.0, 3: 14.0, 4: 15.0}})
+            },
+            {
+                'embed_key': 'vec_sparse',
+                'index_type': 'SPARSE_INVERTED_INDEX',
+                'metric_type': 'IP',
+                'params': {
+                    'nlist': 128,
+                }
+            }
+        ]
+        fd, self.store_dir = tempfile.mkstemp(suffix=".db")
+        os.close(fd)
+        self.segment_store = MapStore()
+        self.vector_store = MilvusStore(uri=self.store_dir, index_kwargs=self.index_kwargs)
+        self.store = HybridStore(self.segment_store, self.vector_store)
+        self.store.connect(embed_dims=self.embed_dims, embed_datatypes=self.embed_datatypes,
+                           global_metadata_desc=self.global_metadata_desc, collections=self.collections)
 
     def tearDown(self):
-        os.remove(self.store_file)
-
-    def test_sparse_embedding(self):
-        self.store.update_nodes([self.node1, self.node2])
-
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec1'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node2._uid)
-
-        ret = self.store.query(query='test', group_name='group1', embed_keys=['vec2'], topk=1)
-        self.assertEqual(len(ret), 1)
-        self.assertEqual(ret[0]._uid, self.node1._uid)
+        os.remove(self.store_dir)
+
+    def test_upsert(self):
+        self.store.upsert(self.collections[0], [data[0]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+
+    def test_delete_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        self.store.delete(self.collections[0])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_delete_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        self.store.delete(self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 0)
+
+    def test_delete_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={'uid': ['uid1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_delete_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        res = self.store.get(collection_name=self.collections[0], )
+        self.assertEqual(len(res), 2)
+        self.store.delete(self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+
+    def test_get_segments_by_collection(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0])
+        self.assertEqual(len(res), 2)
+        res = self.store.get(collection_name=self.collections[1])
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_kb_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb1'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb3'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={RAG_KB_ID: 'kb2'})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_uid(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid3']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[1], criteria={'uid': ['uid2']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_get_segments_by_doc_id(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc2']})
+        self.assertEqual(len(res), 0)
+        res = self.store.get(collection_name=self.collections[0], criteria={RAG_DOC_ID: ['doc1', 'doc3']})
+        self.assertEqual(len(res), 2)
+
+    def test_search(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.search(collection_name=self.collections[0], query="test1",
+                                query_embedding=[0.1, 0.2, 0.3], embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query="test1",
+                                query_embedding={"1563": 0.212890625, "238": 0.1768798828125},
+                                embed_key='vec_sparse', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query="test3",
+                                query_embedding=[0.3, 0.2, 0.1], embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query="test3",
+                                query_embedding={"12": 0.212890625, "23": 0.1768798828125},
+                                embed_key='vec_sparse', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        res = self.store.search(collection_name=self.collections[0], query="test3",
+                                query_embedding=[0.3, 0.2, 0.1], embed_key='vec_dense', topk=5)
+        self.assertEqual(len(res), 2)
+        self.assertEqual(res[0].get('uid'), data[2].get('uid'))
+        self.assertEqual(res[1].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query="test2",
+                                query_embedding=[0.3, 0.2, 0.1], embed_key='vec_dense', topk=1)
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[1].get('uid'))
+
+    def test_search_with_filters(self):
+        self.store.upsert(self.collections[0], [data[0], data[2]])
+        self.store.upsert(self.collections[1], [data[1]])
+        res = self.store.search(collection_name=self.collections[0], query="test1",
+                                query_embedding=[0.1, 0.2, 0.3], embed_key='vec_dense', topk=2,
+                                filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0].get('uid'), data[0].get('uid'))
+        res = self.store.search(collection_name=self.collections[1], query="test2",
+                                query_embedding=[0.1, 0.2, 0.3], embed_key='vec_dense', topk=1,
+                                filters={RAG_KB_ID: ['kb1']})
+        self.assertEqual(len(res), 0)
diff --git a/tests/charge_tests/test_doc_to_db.py b/tests/charge_tests/test_doc_to_db.py
index ae8bc0cf2..830b60f7f 100644
--- a/tests/charge_tests/test_doc_to_db.py
+++ b/tests/charge_tests/test_doc_to_db.py
@@ -14,6 +14,7 @@ def setUpClass(cls):
         assert data_root_dir
         cls.pdf_root = os.path.join(data_root_dir, "rag_master/default/__data/pdfs")
 
+    @pytest.mark.skip(reason="Skip for now, will be fixed in v0.6")
     def test_doc_to_db_sop(self):
         sql_manager = SqlManager("SQLite", None, None, None, None, db_name=":memory:")
         documents = lazyllm.Document(dataset_path=self.pdf_root, create_ui=False)
diff --git a/tests/doc_check/test_doc_api_check.py b/tests/doc_check/test_doc_api_check.py
index 715a5f288..71c8b2088 100644
--- a/tests/doc_check/test_doc_api_check.py
+++ b/tests/doc_check/test_doc_api_check.py
@@ -26,7 +26,7 @@ def get_sub_classes(module):
     """Get all valid subclasses from a module recursively."""
     clsmembers = inspect.getmembers(module, inspect.isclass)
     classes = set([ele[1] for ele in clsmembers if class_should_check(ele[1], module)])
-    for name, sub_module in inspect.getmembers(module, inspect.ismodule):
+    for _, sub_module in inspect.getmembers(module, inspect.ismodule):
         if sub_module.__name__.startswith(module.__name__):
             classes.update(get_sub_classes(sub_module))
     return classes
@@ -66,7 +66,7 @@ def get_doc_from_language(cls, func: Callable, language: str = 'ENGLISH'):
         warnings.warn(
             f"Documentation for {cls.__name__}.{func.__name__} is written directly in the "
             f"function/class. Please use add_{language.lower()}_doc instead.",
-            UserWarning
+            UserWarning, stacklevel=2
         )
         return None
 
@@ -116,7 +116,7 @@ def do_check_method(cls, func: Callable):
         warnings.warn(
             f"Documentation for {cls.__name__}.{func.__name__} is written directly in the "
             f"function/class. Please use add_english_doc and add_chinese_doc instead.",
-            UserWarning
+            UserWarning, stacklevel=2
         )
         return
 
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 02f94e177..2007388d8 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -16,4 +16,3 @@ rapidfuzz
 volcengine-python-sdk[ark]>=4.0.6
 dashscope>=1.23.6
 zhipuai>=2.1.5.20250708
-