Skip to content

update bohrium-sdk version 0.6.0 #520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 67 additions & 11 deletions dpdispatcher/contexts/openapi_context.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import glob
import os
import shutil
import uuid
from zipfile import ZipFile

import tqdm

try:
from bohriumsdk.client import Client
from bohriumsdk.job import Job
from bohriumsdk.storage import Storage
from bohriumsdk.util import Util
except ModuleNotFoundError:
from bohrium import Bohrium
from bohrium.resources import Job, Tiefblue

Check warning on line 11 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L11

Added line #L11 was not covered by tests
except ModuleNotFoundError as e:
found_bohriumsdk = False
import_bohrium_error = e
else:
found_bohriumsdk = True
import_bohrium_error = None

Check warning on line 17 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L17

Added line #L17 was not covered by tests

from dpdispatcher.base_context import BaseContext
from dpdispatcher.dlog import dlog
Expand All @@ -23,6 +25,36 @@
)


def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)

Check warning on line 31 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L29-L31

Added lines #L29 - L31 were not covered by tests

Comment on lines +28 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Same Zip concerns as in machines/openapi.py

Replicate the safer, context-managed implementation suggested earlier to avoid handle leaks and Zip-Slip.

🧰 Tools
🪛 Pylint (3.3.7)

[refactor] 27-27: Consider using 'with' for resource-allocating operations

(R1732)

🤖 Prompt for AI Agents
In dpdispatcher/contexts/openapi_context.py around lines 26 to 30, the
unzip_file function opens the ZipFile without using a context manager and
extracts files without validating paths, risking resource leaks and Zip-Slip
attacks. Refactor the function to use a with statement to open the ZipFile,
ensuring it is properly closed, and add path validation to prevent extraction
outside the target directory, mitigating Zip-Slip vulnerabilities.


def zip_file_list(root_path, zip_filename, file_list=[]):
out_zip_file = os.path.join(root_path, zip_filename)

Check warning on line 35 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L35

Added line #L35 was not covered by tests
# print('debug: file_list', file_list)
zip_obj = ZipFile(out_zip_file, "w")
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):

Check warning on line 40 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L37-L40

Added lines #L37 - L40 were not covered by tests
# print('debug: matched_files:ii', ii)
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)

Check warning on line 48 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L42-L48

Added lines #L42 - L48 were not covered by tests
# print('debug: filename:arcname:root_path', filename, arcname, root_path)
zip_obj.write(filename, arcname)

Check warning on line 50 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L50

Added line #L50 was not covered by tests
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
zip_obj.close()
return out_zip_file

Check warning on line 55 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L52-L55

Added lines #L52 - L55 were not covered by tests
Comment on lines +34 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

zip_file_list – avoid mutable default & close the archive properly

Two issues:

  1. file_list=[] is a mutable default shared between calls.
  2. ZipFile is not used in a with block; handle remains open on exceptions.
-from zipfile import ZipFile
+from zipfile import ZipFile, ZIP_DEFLATED
@@
-def zip_file_list(root_path, zip_filename, file_list=[]):
+def zip_file_list(root_path, zip_filename, file_list=None):
+    if file_list is None:
+        file_list = []
@@
-    zip_obj = ZipFile(out_zip_file, "w")
-    for f in file_list:
+    with ZipFile(out_zip_file, "w", compression=ZIP_DEFLATED) as zip_obj:
+        for f in file_list:
@@
-                zip_obj.write(ii, arcname)
-    zip_obj.close()
-    return out_zip_file
+                zip_obj.write(ii, arcname)
+    return out_zip_file

This removes the shared-state bug, guarantees the file is flushed/closed, and activates compression for a smaller upload.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def zip_file_list(root_path, zip_filename, file_list=[]):
out_zip_file = os.path.join(root_path, zip_filename)
# print('debug: file_list', file_list)
zip_obj = ZipFile(out_zip_file, "w")
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):
# print('debug: matched_files:ii', ii)
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)
# print('debug: filename:arcname:root_path', filename, arcname, root_path)
zip_obj.write(filename, arcname)
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
zip_obj.close()
return out_zip_file
from zipfile import ZipFile, ZIP_DEFLATED
import os
import glob
def zip_file_list(root_path, zip_filename, file_list=None):
if file_list is None:
file_list = []
out_zip_file = os.path.join(root_path, zip_filename)
with ZipFile(out_zip_file, "w", compression=ZIP_DEFLATED) as zip_obj:
for f in file_list:
matched_files = os.path.join(root_path, f)
for ii in glob.glob(matched_files):
if os.path.isdir(ii):
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
for root, dirs, files in os.walk(ii):
for file in files:
filename = os.path.join(root, file)
arcname = os.path.relpath(filename, start=root_path)
zip_obj.write(filename, arcname)
else:
arcname = os.path.relpath(ii, start=root_path)
zip_obj.write(ii, arcname)
return out_zip_file
🧰 Tools
🪛 Pylint (3.3.7)

[refactor] 35-35: Consider using 'with' for resource-allocating operations

(R1732)

🤖 Prompt for AI Agents
In dpdispatcher/contexts/openapi_context.py around lines 32 to 53, the function
zip_file_list uses a mutable default argument file_list=[] which can cause
shared state bugs across calls, and it opens the ZipFile without a with
statement, risking the file not being closed properly on exceptions. To fix
this, change the default value of file_list to None and initialize it to an
empty list inside the function if None. Also, open the ZipFile using a with
statement to ensure it is properly closed after writing, and enable compression
by specifying the compression mode when creating the ZipFile.



class OpenAPIContext(BaseContext):
def __init__(
self,
Expand All @@ -35,15 +67,39 @@
if not found_bohriumsdk:
raise ModuleNotFoundError(
"bohriumsdk not installed. Install dpdispatcher with `pip install dpdispatcher[bohrium]`"
)
) from import_bohrium_error
self.init_local_root = local_root
self.init_remote_root = remote_root
self.temp_local_root = os.path.abspath(local_root)
self.remote_profile = remote_profile
self.client = Client()
self.storage = Storage(client=self.client)
access_key = (

Check warning on line 75 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L75

Added line #L75 was not covered by tests
remote_profile.get("access_key", None)
or os.getenv("BOHRIUM_ACCESS_KEY", None)
or os.getenv("ACCESS_KEY", None)
)
project_id = (

Check warning on line 80 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L80

Added line #L80 was not covered by tests
remote_profile.get("project_id", None)
or os.getenv("BOHRIUM_PROJECT_ID", None)
or os.getenv("PROJECT_ID", None)
)
app_key = (

Check warning on line 85 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L85

Added line #L85 was not covered by tests
remote_profile.get("app_key", None)
or os.getenv("BOHRIUM_APP_KEY", None)
or os.getenv("APP_KEY", None)
)
if access_key is None:
raise ValueError(

Check warning on line 91 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L90-L91

Added lines #L90 - L91 were not covered by tests
"remote_profile must contain 'access_key' or set environment variable 'BOHRIUM_ACCESS_KEY'"
)
if project_id is None:
raise ValueError(

Check warning on line 95 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L94-L95

Added lines #L94 - L95 were not covered by tests
"remote_profile must contain 'project_id' or set environment variable 'BOHRIUM_PROJECT_ID'"
)
self.client = Bohrium(

Check warning on line 98 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L98

Added line #L98 was not covered by tests
access_key=access_key, project_id=project_id, app_key=app_key
)
self.storage = Tiefblue()

Check warning on line 101 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L101

Added line #L101 was not covered by tests
self.job = Job(client=self.client)
self.util = Util()
self.jgid = None

@classmethod
Expand Down Expand Up @@ -97,7 +153,7 @@
for file in task.forward_files:
upload_file_list.append(os.path.join(task.task_work_path, file))

upload_zip = Util.zip_file_list(
upload_zip = zip_file_list(

Check warning on line 156 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L156

Added line #L156 was not covered by tests
self.local_root, zip_task_file, file_list=upload_file_list
)
project_id = self.remote_profile.get("project_id", 0)
Expand Down Expand Up @@ -189,7 +245,7 @@
):
continue
self.storage.download_from_url(info["resultUrl"], target_result_zip)
Util.unzip_file(target_result_zip, out_dir=self.local_root)
unzip_file(target_result_zip, out_dir=self.local_root)

Check warning on line 248 in dpdispatcher/contexts/openapi_context.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/contexts/openapi_context.py#L248

Added line #L248 was not covered by tests
self._backup(self.local_root, target_result_zip)
self._clean_backup(
self.local_root, keep_backup=self.remote_profile.get("keep_backup", True)
Expand Down
46 changes: 38 additions & 8 deletions dpdispatcher/machines/openapi.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import os
import shutil
import time
from zipfile import ZipFile

from dpdispatcher.utils.utils import customized_script_header_template

try:
from bohriumsdk.client import Client
from bohriumsdk.job import Job
from bohriumsdk.storage import Storage
from bohriumsdk.util import Util
from bohrium import Bohrium
from bohrium.resources import Job, Tiefblue

Check warning on line 10 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L10

Added line #L10 was not covered by tests
except ModuleNotFoundError:
found_bohriumsdk = False
else:
Expand All @@ -23,6 +22,12 @@
"""


def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)

Check warning on line 28 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L26-L28

Added lines #L26 - L28 were not covered by tests


Comment on lines +25 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Use a context-manager & path-sanitisation in unzip_file

The helper leaks the ZipFile handle and is vulnerable to Zip-Slip because arbitrary paths from the archive are blindly extracted.

-def unzip_file(zip_file, out_dir="./"):
-    obj = ZipFile(zip_file, "r")
-    for item in obj.namelist():
-        obj.extract(item, out_dir)
+def unzip_file(zip_file, out_dir: str = "./"):
+    """Safely extract *zip_file* into *out_dir*, guarding against Zip-Slip."""
+    abs_out = os.path.abspath(out_dir)
+    with ZipFile(zip_file, "r") as zf:
+        for member in zf.namelist():
+            target = os.path.abspath(os.path.join(abs_out, member))
+            if not target.startswith(abs_out + os.sep):
+                raise ValueError(f"Unsafe path detected in zip: {member}")
+        zf.extractall(abs_out)

Closes the file handle automatically and blocks directory-traversal attacks.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def unzip_file(zip_file, out_dir="./"):
obj = ZipFile(zip_file, "r")
for item in obj.namelist():
obj.extract(item, out_dir)
def unzip_file(zip_file, out_dir: str = "./"):
"""Safely extract *zip_file* into *out_dir*, guarding against Zip-Slip."""
abs_out = os.path.abspath(out_dir)
with ZipFile(zip_file, "r") as zf:
for member in zf.namelist():
target = os.path.abspath(os.path.join(abs_out, member))
if not target.startswith(abs_out + os.sep):
raise ValueError(f"Unsafe path detected in zip: {member}")
zf.extractall(abs_out)
🧰 Tools
🪛 Pylint (3.3.7)

[refactor] 26-26: Consider using 'with' for resource-allocating operations

(R1732)

🤖 Prompt for AI Agents
In dpdispatcher/machines/openapi.py around lines 25 to 30, the unzip_file
function currently opens a ZipFile without using a context manager, causing a
file handle leak, and extracts files without sanitizing paths, making it
vulnerable to Zip-Slip attacks. Fix this by using a with statement to open the
ZipFile, ensuring it closes automatically, and sanitize each extracted file path
to prevent directory traversal before extraction.

class OpenAPI(Machine):
def __init__(self, context):
if not found_bohriumsdk:
Expand All @@ -35,9 +40,35 @@
self.grouped = self.remote_profile.get("grouped", True)
self.retry_count = self.remote_profile.get("retry_count", 3)
self.ignore_exit_code = context.remote_profile.get("ignore_exit_code", True)
self.client = Client()

access_key = (

Check warning on line 44 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L44

Added line #L44 was not covered by tests
self.remote_profile.get("access_key", None)
or os.getenv("BOHRIUM_ACCESS_KEY", None)
or os.getenv("ACCESS_KEY", None)
)
project_id = (

Check warning on line 49 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L49

Added line #L49 was not covered by tests
self.remote_profile.get("project_id", None)
or os.getenv("BOHRIUM_PROJECT_ID", None)
or os.getenv("PROJECT_ID", None)
)
app_key = (

Check warning on line 54 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L54

Added line #L54 was not covered by tests
self.remote_profile.get("app_key", None)
or os.getenv("BOHRIUM_APP_KEY", None)
or os.getenv("APP_KEY", None)
)
if access_key is None:
raise ValueError(

Check warning on line 60 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L59-L60

Added lines #L59 - L60 were not covered by tests
"remote_profile must contain 'access_key' or set environment variable 'BOHRIUM_ACCESS_KEY'"
)
if project_id is None:
raise ValueError(

Check warning on line 64 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L63-L64

Added lines #L63 - L64 were not covered by tests
"remote_profile must contain 'project_id' or set environment variable 'BOHRIUM_PROJECT_ID'"
)
self.client = Bohrium(

Check warning on line 67 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L67

Added line #L67 was not covered by tests
access_key=access_key, project_id=project_id, app_key=app_key
)
self.storage = Tiefblue()

Check warning on line 70 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L70

Added line #L70 was not covered by tests
self.job = Job(client=self.client)
self.storage = Storage(client=self.client)
self.group_id = None

def gen_script(self, job):
Expand Down Expand Up @@ -102,7 +133,6 @@
}
if job.job_state == JobStatus.unsubmitted:
openapi_params["job_id"] = job.job_id

data = self.job.insert(**openapi_params)

job.job_id = data.get("jobId", 0) # type: ignore
Expand Down Expand Up @@ -170,7 +200,7 @@
result_filename = job_hash + "_back.zip"
target_result_zip = os.path.join(self.context.local_root, result_filename)
self.storage.download_from_url(job_url, target_result_zip)
Util.unzip_file(target_result_zip, out_dir=self.context.local_root)
unzip_file(target_result_zip, out_dir=self.context.local_root)

Check warning on line 203 in dpdispatcher/machines/openapi.py

View check run for this annotation

Codecov / codecov/patch

dpdispatcher/machines/openapi.py#L203

Added line #L203 was not covered by tests
try:
os.makedirs(os.path.join(self.context.local_root, "backup"), exist_ok=True)
shutil.move(
Expand Down
Loading