@@ -36,10 +36,16 @@ vectordb:
36
36
host : ${oc.env:VDB_HOST, milvus}
37
37
port : ${oc.env:VDB_iPORT, 19530}
38
38
connector_name : ${oc.env:VDB_CONNECTOR_NAME, milvus}
39
- collection_name : vdb_test
40
- hybrid_mode : true
39
+ collection_name : ${oc.env:VDB_COLLECTION_NAME, vdb_test}
40
+ hybrid_search : true
41
41
enable : true
42
42
43
+ rdb :
44
+ host : ${oc.env:POSTGRES_HOST, rdb}
45
+ port : ${oc.env:POSTGRES_PORT, 5432}
46
+ user : ${oc.env:POSTGRES_USER, root}
47
+ password : ${oc.env:POSTGRES_PASSWORD, root_password}
48
+
43
49
reranker :
44
50
enable : ${oc.decode:${oc.env:RERANKER_ENABLED, true}}
45
51
model_name : ${oc.env:RERANKER_MODEL, Alibaba-NLP/gte-multilingual-reranker-base}
@@ -69,9 +75,21 @@ loader:
69
75
image_captioning : true
70
76
save_markdown : false
71
77
audio_model : ${oc.env:WHISPER_MODEL, base} # tiny, base, small, medium, large-v1, large-v2, large-v3
78
+ mimetypes :
79
+ text/plain : .txt
80
+ text/markdown : .md
81
+ application/pdf : .pdf
82
+ message/rfc822 : .eml
83
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document : .docx
84
+ application/vnd.openxmlformats-officedocument.presentationml.presentation : .pptx
85
+ application/msword : .doc
86
+ image/png : .png
87
+ image/jpeg : .jpeg
88
+ audio/vnd.wav : .wav
89
+ audio/mpeg : .mp3
72
90
file_loaders :
73
91
txt : TextLoader
74
- pdf : ${oc.env:PDFLoader, DoclingLoader } # DoclingLoader # MarkerLoader # PyMuPDFLoader # Custompymupdf4llm
92
+ pdf : ${oc.env:PDFLoader, MarkerLoader } # DoclingLoader # MarkerLoader # PyMuPDFLoader # Custompymupdf4llm
75
93
eml : EmlLoader
76
94
docx : MarkItDownLoader
77
95
pptx : PPTXLoader
@@ -93,8 +111,26 @@ loader:
93
111
marker_max_processes : ${oc.decode:${oc.env:MARKER_MAX_PROCESSES, 2}}
94
112
marker_min_processes : ${oc.decode:${oc.env:MARKER_MIN_PROCESSES, 1}}
95
113
marker_num_gpus : ${oc.decode:${oc.env:MARKER_NUM_GPUS, 0.01}}
114
+ marker_timeout : ${oc.decode:${oc.env:MARKER_TIMEOUT, 3600}}
96
115
97
116
ray :
98
117
num_gpus : ${oc.decode:${oc.env:RAY_NUM_GPUS, 0.01}}
99
118
pool_size : ${oc.decode:${oc.env:RAY_POOL_SIZE, 1}}
100
119
max_tasks_per_worker : ${oc.decode:${oc.env:RAY_MAX_TASKS_PER_WORKER, 5}}
120
+ indexer :
121
+ max_task_retries : ${oc.decode:${oc.env:RAY_MAX_TASK_RETRIES, 2}}
122
+ serialize_timeout : ${oc.decode:${oc.env:INDEXER_SERIALIZE_TIMEOUT, 36000}}
123
+ concurrency_groups :
124
+ default : ${oc.decode:${oc.env:INDEXER_DEFAULT_CONCURRENCY, 1000}}
125
+ update : ${oc.decode:${oc.env:INDEXER_UPDATE_CONCURRENCY, 100}}
126
+ search : ${oc.decode:${oc.env:INDEXER_SEARCH_CONCURRENCY, 100}}
127
+ delete : ${oc.decode:${oc.env:INDEXER_DELETE_CONCURRENCY, 100}}
128
+ chunk : ${oc.decode:${oc.env:INDEXER_CHUNK_CONCURRENCY, 1000}}
129
+ insert : ${oc.decode:${oc.env:INDEXER_INSERT_CONCURRENCY, 1}}
130
+ semaphore :
131
+ concurrency : ${oc.decode:${oc.env:RAY_SEMAPHORE_CONCURRENCY, 100000}}
132
+ serve :
133
+ enable : ${oc.decode:${oc.env:ENABLE_RAY_SERVE, false}}
134
+ num_replicas : ${oc.decode:${oc.env:RAY_SERVE_NUM_REPLICAS, 1}}
135
+ host : ${oc.env:RAY_SERVE_HOST, 0.0.0.0}
136
+ port : ${oc.env:RAY_SERVE_PORT, 8080}
0 commit comments