feat: add basic enhancer

liningping · liningping · commit 2a9bc878adc3 · 2025-08-22T08:23:01.000+08:00
diff --git a/.env.template b/.env.template
@@ -51,4 +51,13 @@ DEBUG=false
 LOG_LEVEL=INFO
 
 # PDF解析
-MINERU_MODEL_SOURCE=local
+MINERU_MODEL_SOURCE=local
+
+# 信息增强
+LLM_MODEL_NAME=gpt-4o
+LLM_BASE_URL=http://192.168.120.2:4000
+LLM_API_KEY=ae
+
+VLLM_MODEL_NAME=qwen2.5-vl-7b-instruct
+VLLM_API_KEY=sk-
+VLLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
diff --git a/config.py b/config.py
@@ -33,4 +33,13 @@ class Settings:
     MAX_FILES_PER_REQUEST: int = int(os.getenv("MAX_FILES_PER_REQUEST", "20"))
     TASK_TIMEOUT: int = int(os.getenv("TASK_TIMEOUT", "3600"))  # 1小时
 
+    # 模型配置
+    LLM_MODEL_NAME: str = os.getenv("LLM_MODEL_NAME", "gpt-4o")
+    LLM_BASE_URL: str = os.getenv("LLM_BASE_URL", "http://192.168.120.2:4000")
+    LLM_API_KEY: str = os.getenv("LLM_API_KEY", "sk-")
+
+    VLLM_MODEL_NAME: str = os.getenv("VLLM_MODEL_NAME", "qwen2.5-vl-7b-instruct")
+    VLLM_API_KEY: str = os.getenv("VLLM_API_KEY", "sk-")
+    VLLM_BASE_URL: str = os.getenv("VLLM_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+
 settings = Settings()
diff --git a/enhancers/base_models.py b/enhancers/base_models.py
@@ -1,32 +1,44 @@
 from abc import ABC, abstractmethod
+from typing import Any
+
+from openai import AsyncOpenAI
+from pydantic import BaseModel
+from tenacity import retry, stop_after_attempt, wait_exponential
 
 from parsers.base_models import ChunkData
 
+MAX_RETRIES = 3
+WAIT_TIME = 4
+WAIT_MAX_TIME = 15
+MULTIPLIER = 1
+
+class JsonResponseFormat(BaseModel):
+    """JSON 响应格式"""
+    description:str
 
 class InformationEnhancer(ABC):
     """信息增强器基类"""
+    def __init__(self, model_name: str, base_url: str, api_key: str):
+        self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+        self.model_name = model_name
+        self.system_prompt = "You are a helpful assistant."
+
     @abstractmethod
     async def enhance(self, information: ChunkData) -> ChunkData:
         """增强信息"""
         pass
 
-class TableInformationEnhancer(InformationEnhancer):
-    """表格信息增强器"""
-
-    async def enhance(self, information: ChunkData) -> ChunkData:
-        """增强信息"""
-        return information
-
-class FormulasInformationEnhancer(InformationEnhancer):
-    """公式信息增强器"""
-
-    async def enhance(self, information: ChunkData) -> ChunkData:
-        """增强信息"""
-        return information
-
-class ImageInformationEnhancer(InformationEnhancer):
-    """图片信息增强器"""
-
-    async def enhance(self, information: ChunkData) -> ChunkData:
-        """增强信息"""
-        return information
+    @retry(stop=stop_after_attempt(MAX_RETRIES), wait=wait_exponential(multiplier=MULTIPLIER, min=WAIT_TIME, max=WAIT_MAX_TIME))
+    async def get_structured_response(self, user_prompt: list[dict[str, Any]], response_format: JsonResponseFormat) -> str|None:
+        """获取结构化响应"""
+        response = await self.client.chat.completions.parse(
+            model=self.model_name,
+            messages=[
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": user_prompt} # type: ignore
+            ],
+            response_format=response_format # type: ignore
+        )
+        if response.choices[0].message.refusal:
+            raise ValueError(f"模型拒绝了请求: {response.choices[0].message.refusal}")
+        return response.choices[0].message.parsed
diff --git a/enhancers/enhancer_registry.py b/enhancers/enhancer_registry.py
@@ -7,6 +7,7 @@
 import logging
 from collections.abc import Callable
 
+from config import settings
 from enhancers.base_models import InformationEnhancer
 from parsers.base_models import ChunkType
 
@@ -67,7 +68,11 @@ def get_enhancer(modality: ChunkType) -> InformationEnhancer | None:
 
     enhancer_class = ENHANCER_REGISTRY[modality_type]
     try:
-        return enhancer_class()
+        match modality_type:
+            case ChunkType.IMAGE.value.lower():
+                return enhancer_class(settings.VLLM_MODEL_NAME, settings.VLLM_BASE_URL, settings.VLLM_API_KEY)
+            case _:
+                return enhancer_class(settings.LLM_MODEL_NAME, settings.LLM_BASE_URL, settings.LLM_API_KEY)
     except Exception as e:
         logger.error(f"创建信息增强器实例失败: {enhancer_class.__name__}, 错误: {e}")
         return None
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
     "docling>=2.45.0",
     "mineru[core]>=2.1.11",
     "beautifulsoup4>=4.13.4",
+    "tenacity>=9.1.2",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ dependencies = [`
`18`	`18`	`"docling>=2.45.0",`
`19`	`19`	`"mineru[core]>=2.1.11",`
`20`	`20`	`"beautifulsoup4>=4.13.4",`
	`21`	`+ "tenacity>=9.1.2",`
`21`	`22`	`]`
`22`	`23`
`23`	`24`	`[dependency-groups]`