Skip to content

Commit 5fd0928

Browse files
fixes in structure tool and bumped sdk versions (#876)
* fixes in structure tool and bumped sdk versions * Commit pdm.lock changes * Merge conflicts * Commit pdm.lock changes * updated pdm dependancies * Commit pdm.lock changes --------- Co-authored-by: muhammad-ali-e <[email protected]>
1 parent 51195d9 commit 5fd0928

File tree

12 files changed

+3812
-70
lines changed

12 files changed

+3812
-70
lines changed

backend/pdm.lock

Lines changed: 59 additions & 55 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dependencies = [
3737
# Indirect local dependencies usually need to be added in their own projects
3838
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
3939
#
40-
# However, such indirect local dependencies which are not direct depedency of
40+
# However, such indirect local dependencies which are not direct dependency of
4141
# main project appear as absolute paths in pdm.lock of main project, making it
4242
# impossible to check in the lock file.
4343
#
@@ -77,6 +77,7 @@ dev = [
7777
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/../unstract/tool-registry",
7878
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/../unstract/tool-sandbox",
7979
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/../unstract/workflow-execution",
80+
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/../unstract/filesystem",
8081
]
8182

8283
[tool.pytest.ini_options]

docker/scripts/pdm-lock-gen/pdm-lock.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ directories=(
7373
"backend"
7474
"prompt-service"
7575
"worker"
76+
"unstract/filesystem"
7677
"unstract/core"
7778
"unstract/flags"
7879
"platform-service"

pdm.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/classifier/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Add your dependencies here
22

33
# Required for all unstract tools
4-
unstract-sdk~=0.53.1
4+
unstract-sdk~=0.54.0rc5
5+
# TODO: remove once it added in sdk
6+
s3fs[boto3]==2024.6.0

tools/structure/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Add your dependencies here
22

33
# Required for all unstract tools
4-
unstract-sdk~=0.53.2
4+
unstract-sdk~=0.54.0rc5
5+
# TODO: remove once it added in sdk
6+
s3fs[boto3]==2024.6.0

tools/structure/src/main.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any, Callable, Optional
77

88
from constants import SettingsKeys # type: ignore [attr-defined]
9-
from unstract.sdk.constants import LogLevel, LogState, MetadataKey
9+
from unstract.sdk.constants import LogLevel, LogState, MetadataKey, ToolEnv
1010
from unstract.sdk.index import Index
1111
from unstract.sdk.prompt import PromptTool
1212
from unstract.sdk.tool.base import BaseTool
@@ -97,7 +97,10 @@ def run(
9797
_, file_name = os.path.split(input_file)
9898
if summarize_as_source:
9999
file_name = SettingsKeys.SUMMARIZE
100-
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
100+
if hasattr(self, "workflow_filestorage"):
101+
tool_data_dir = Path(self.get_env_or_die(ToolEnv.EXECUTION_DATA_DIR))
102+
else:
103+
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
101104
execution_run_data_folder = Path(
102105
self.get_env_or_die(SettingsKeys.EXECUTION_RUN_DATA_FOLDER)
103106
)
@@ -268,8 +271,13 @@ def run(
268271
self.stream_log("Writing parsed output...")
269272
source_name = self.get_exec_metadata.get(MetadataKey.SOURCE_NAME)
270273
output_path = Path(output_dir) / f"{Path(source_name).stem}.json"
271-
with open(output_path, "w", encoding="utf-8") as f:
272-
f.write(structured_output)
274+
if hasattr(self, "workflow_filestorage"):
275+
self.workflow_filestorage.json_dump(
276+
path=output_path, data=structured_output_dict
277+
)
278+
else:
279+
with open(output_path, "w", encoding="utf-8") as f:
280+
f.write(structured_output)
273281
except OSError as e:
274282
self.stream_error_and_exit(f"Error creating output file: {e}")
275283
except json.JSONDecodeError as e:
@@ -351,8 +359,13 @@ def _summarize_and_index(
351359
structure_output = json.loads(response[SettingsKeys.STRUCTURE_OUTPUT])
352360
summarized_context = structure_output.get(SettingsKeys.DATA, "")
353361
self.stream_log("Writing summarized context to a file")
354-
with open(summarize_file_path, "w", encoding="utf-8") as f:
355-
f.write(summarized_context)
362+
if hasattr(self, "workflow_filestorage"):
363+
self.workflow_filestorage.write(
364+
path=summarize_file_path, mode="w", data=summarized_context
365+
)
366+
else:
367+
with open(summarize_file_path, "w", encoding="utf-8") as f:
368+
f.write(summarized_context)
356369

357370
self.stream_log("Indexing summarized context")
358371
summarize_file_hash: str = ToolUtils.get_hash_from_file(

tools/text_extractor/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Add your dependencies here
22

33
# Required for all unstract tools
4-
unstract-sdk~=0.53.1
4+
unstract-sdk~=0.54.0rc5
5+
# TODO: remove once it added in sdk
6+
s3fs[boto3]==2024.6.0

0 commit comments

Comments
 (0)