Skip to content

Commit b49d6f4

Browse files
Remove tool id from index key id generation (#88)
* Index - tool - id removal change * Update version * Add version in deprecation details
1 parent f5fd84f commit b49d6f4

File tree

2 files changed

+25
-8
lines changed

2 files changed

+25
-8
lines changed

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.44.0"
1+
__version__ = "0.45.0"
22

33

44
def get_sdk_version():

src/unstract/sdk/index.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ def index(
142142
Returns:
143143
str: A unique ID for the file and indexing arguments combination
144144
"""
145-
doc_id = self.generate_file_id(
146-
tool_id=tool_id,
145+
doc_id = self.generate_index_key(
147146
vector_db=vector_db_instance_id,
148147
embedding=embedding_instance_id,
149148
x2text=x2text_instance_id,
@@ -335,9 +334,8 @@ def index(
335334
finally:
336335
vector_db.close()
337336

338-
def generate_file_id(
337+
def generate_index_key(
339338
self,
340-
tool_id: str,
341339
vector_db: str,
342340
embedding: str,
343341
x2text: str,
@@ -349,7 +347,6 @@ def generate_file_id(
349347
"""Generates a unique ID useful for identifying files during indexing.
350348
351349
Args:
352-
tool_id (str): Unique ID of the tool or workflow
353350
vector_db (str): UUID of the vector DB adapter
354351
embedding (str): UUID of the embedding adapter
355352
x2text (str): UUID of the X2Text adapter
@@ -373,7 +370,6 @@ def generate_file_id(
373370
# which might not be relevant to indexing. This is easier for now than
374371
# marking certain keys of the adapter config as necessary.
375372
index_key = {
376-
"tool_id": tool_id,
377373
"file_hash": file_hash,
378374
"vector_db_config": ToolAdapter.get_adapter_config(self.tool, vector_db),
379375
"embedding_config": ToolAdapter.get_adapter_config(self.tool, embedding),
@@ -388,7 +384,28 @@ def generate_file_id(
388384
hashed_index_key = ToolUtils.hash_str(json.dumps(index_key, sort_keys=True))
389385
return hashed_index_key
390386

391-
@deprecated("Instantiate Index and call index() instead")
387+
@deprecated(version="0.45.0", reason="Use generate_index_key() instead")
388+
def generate_file_id(
389+
self,
390+
tool_id: str,
391+
vector_db: str,
392+
embedding: str,
393+
x2text: str,
394+
chunk_size: str,
395+
chunk_overlap: str,
396+
file_path: Optional[str] = None,
397+
file_hash: Optional[str] = None,
398+
) -> str:
399+
self.generate_index_key(
400+
vector_db,
401+
embedding,
402+
x2text,
403+
chunk_size,
404+
chunk_overlap,
405+
file_path,
406+
file_hash,
407+
)
408+
392409
def index_file(
393410
self,
394411
tool_id: str,

0 commit comments

Comments
 (0)