@@ -142,8 +142,7 @@ def index(
142
142
Returns:
143
143
str: A unique ID for the file and indexing arguments combination
144
144
"""
145
- doc_id = self .generate_file_id (
146
- tool_id = tool_id ,
145
+ doc_id = self .generate_index_key (
147
146
vector_db = vector_db_instance_id ,
148
147
embedding = embedding_instance_id ,
149
148
x2text = x2text_instance_id ,
@@ -335,9 +334,8 @@ def index(
335
334
finally :
336
335
vector_db .close ()
337
336
338
- def generate_file_id (
337
+ def generate_index_key (
339
338
self ,
340
- tool_id : str ,
341
339
vector_db : str ,
342
340
embedding : str ,
343
341
x2text : str ,
@@ -349,7 +347,6 @@ def generate_file_id(
349
347
"""Generates a unique ID useful for identifying files during indexing.
350
348
351
349
Args:
352
- tool_id (str): Unique ID of the tool or workflow
353
350
vector_db (str): UUID of the vector DB adapter
354
351
embedding (str): UUID of the embedding adapter
355
352
x2text (str): UUID of the X2Text adapter
@@ -373,7 +370,6 @@ def generate_file_id(
373
370
# which might not be relevant to indexing. This is easier for now than
374
371
# marking certain keys of the adapter config as necessary.
375
372
index_key = {
376
- "tool_id" : tool_id ,
377
373
"file_hash" : file_hash ,
378
374
"vector_db_config" : ToolAdapter .get_adapter_config (self .tool , vector_db ),
379
375
"embedding_config" : ToolAdapter .get_adapter_config (self .tool , embedding ),
@@ -388,7 +384,28 @@ def generate_file_id(
388
384
hashed_index_key = ToolUtils .hash_str (json .dumps (index_key , sort_keys = True ))
389
385
return hashed_index_key
390
386
391
- @deprecated ("Instantiate Index and call index() instead" )
387
+ @deprecated (version = "0.45.0" , reason = "Use generate_index_key() instead" )
388
+ def generate_file_id (
389
+ self ,
390
+ tool_id : str ,
391
+ vector_db : str ,
392
+ embedding : str ,
393
+ x2text : str ,
394
+ chunk_size : str ,
395
+ chunk_overlap : str ,
396
+ file_path : Optional [str ] = None ,
397
+ file_hash : Optional [str ] = None ,
398
+ ) -> str :
399
+ self .generate_index_key (
400
+ vector_db ,
401
+ embedding ,
402
+ x2text ,
403
+ chunk_size ,
404
+ chunk_overlap ,
405
+ file_path ,
406
+ file_hash ,
407
+ )
408
+
392
409
def index_file (
393
410
self ,
394
411
tool_id : str ,
0 commit comments