66 @date:2023/10/20 14:01
77 @desc:
88"""
9+ import datetime
910import os
1011import threading
11- import datetime
1212import traceback
1313from typing import List
1414
1515import django .db .models
1616from django .db .models import QuerySet
1717from django .db .models .functions import Substr , Reverse
18+ from django .utils .translation import gettext_lazy as _
1819from langchain_core .embeddings import Embeddings
1920
2021from common .config .embedding_config import VectorStore
2324from common .utils .lock import RedisLock
2425from common .utils .logger import maxkb_logger
2526from common .utils .page_utils import page_desc
26- from knowledge .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State ,SourceType , SearchMode
27+ from knowledge .models import Paragraph , Status , Document , ProblemParagraphMapping , TaskType , State , SourceType , \
28+ SearchMode
2729from maxkb .conf import (PROJECT_DIR )
28- from django .utils .translation import gettext_lazy as _
29-
3030
3131lock = threading .Lock ()
3232
@@ -91,8 +91,9 @@ def embedding_by_paragraph_list(paragraph_id_list, embedding_model: Embeddings):
9191
9292 @staticmethod
9393 def embedding_by_paragraph_data_list (data_list , paragraph_id_list , embedding_model : Embeddings ):
94- maxkb_logger .info (_ ('Start--->Embedding paragraph: {paragraph_id_list}' ).format (paragraph_id_list = paragraph_id_list ))
95- status = State .SUCCESS
94+ maxkb_logger .info (_ ('Start--->Embedding paragraph: {paragraph_id_list}' ).format (
95+ paragraph_id_list = paragraph_id_list )
96+ )
9697 try :
9798 # 删除段落
9899 VectorStore .get_embedding_vector ().delete_by_paragraph_ids (paragraph_id_list )
@@ -102,14 +103,20 @@ def is_save_function():
102103
103104 # 批量向量化
104105 VectorStore .get_embedding_vector ().batch_save (data_list , embedding_model , is_save_function )
106+ ListenerManagement .update_status (
107+ QuerySet (Paragraph ).filter (id__in = paragraph_id_list ), TaskType .EMBEDDING , State .SUCCESS
108+ )
105109 except Exception as e :
106110 maxkb_logger .error (_ ('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}' ).format (
107- paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ()))
108- status = State .FAILURE
111+ paragraph_id_list = paragraph_id_list , error = str (e ), traceback = traceback .format_exc ())
112+ )
113+ ListenerManagement .update_status (
114+ QuerySet (Paragraph ).filter (id__in = paragraph_id_list ), TaskType .EMBEDDING , State .FAILURE
115+ )
109116 finally :
110- QuerySet ( Paragraph ). filter ( id__in = paragraph_id_list ). update ( ** { 'status' : status })
111- maxkb_logger . info (
112- _ ( 'End--->Embedding paragraph: {paragraph_id_list}' ). format ( paragraph_id_list = paragraph_id_list ) )
117+ maxkb_logger . info ( _ ( 'End--->Embedding paragraph: {paragraph_id_list}' ). format (
118+ paragraph_id_list = paragraph_id_list )
119+ )
113120
114121 @staticmethod
115122 def embedding_by_paragraph (paragraph_id , embedding_model : Embeddings ):
@@ -266,12 +273,11 @@ def is_the_task_interrupted():
266273 if is_the_task_interrupted ():
267274 return
268275 maxkb_logger .info (_ ('Start--->Embedding document: {document_id}' ).format (document_id = document_id )
269- )
276+ )
270277 # 批量修改状态为PADDING
271278 ListenerManagement .update_status (QuerySet (Document ).filter (id = document_id ), TaskType .EMBEDDING ,
272279 State .STARTED )
273280
274-
275281 # 根据段落进行向量化处理
276282 page_desc (QuerySet (Paragraph )
277283 .annotate (
@@ -381,5 +387,6 @@ def hit_test(query_text, knowledge_id: list[str], exclude_document_id_list: list
381387 similarity : float ,
382388 search_mode : SearchMode ,
383389 embedding : Embeddings ):
384- return VectorStore .get_embedding_vector ().hit_test (query_text , knowledge_id , exclude_document_id_list , top_number ,
390+ return VectorStore .get_embedding_vector ().hit_test (query_text , knowledge_id , exclude_document_id_list ,
391+ top_number ,
385392 similarity , search_mode , embedding )
0 commit comments