Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ae jfw #9

Open
wants to merge 6 commits into
base: AE
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dmpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from dmpy.dmpy import state, list_files, get_file_content, archive_preview, stream_text_from_archive, upload_data
from dmpy.dmpy import state, list_files, get_file_content, archive_preview, stream_text_from_archive, upload_data, stream_data_from_archive
from dmpy.dmpy import get_study_fields, create_new_field, get_data_records, upload_data_in_array, delete_study_field
2 changes: 1 addition & 1 deletion dmpy/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def upload_file(self, file_name: str, file_content: bytes, variables: any):
'fileName': (file_name, file_content, 'application/octet-stream'),
}

response: requests.Response = requests.post(self._host_graphql, data=data, files=files)
response: requests.Response = requests.post(self._host_graphql, data=data, files=files, cookies=self._cookies)

response.raise_for_status() # Ensure we got a successful response

Expand Down
85 changes: 71 additions & 14 deletions dmpy/dmpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,12 @@ def archive_preview(file_id: str, file_name: str):
print(name)
return [name for name in z.getnames()]



def stream_text_from_archive(file_id, file_name):
return stream_data_from_archive(file_id, file_name, data_type='text')


def stream_data_from_archive(file_id, file_name, data_type='text'):
file_type = get_file_type(file_name)
file_stream = get_file_content(file_id)
compressed_data = BytesIO(file_stream)
Expand All @@ -164,42 +167,75 @@ def stream_text_from_archive(file_id, file_name):
continue
with zf.open(file_info, 'r') as file:
try:
data = StringIO(file.read().decode('utf-8'))
if data_type == 'binary':
data = data = file.read() # Read file as binary data
elif data_type == 'text':
data = StringIO(file.read().decode('utf-8'))
yield file_info.filename, data
except UnicodeDecodeError:
print(f'Could not decode file {file_info.filename} in UTF-8')
try:
file.seek(0) # Reset file pointer
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
print(f"Failed to decode file {file_info.filename} in UTF-8 or ISO-8859-1: {e}")

elif file_type == 'tar.gz':
with tarfile.open(fileobj=compressed_data, mode='r:gz') as tar:
for tar_info in tar:
if tar_info.isfile():
file = tar.extractfile(tar_info)
try:
data = StringIO(file.read().decode('utf-8'))
if data_type == 'binary':
data = data = file.read() # Read file as binary data
elif data_type == 'text':
data = StringIO(file.read().decode('utf-8'))
yield tar_info.name, data
except UnicodeDecodeError:
print(f'Could not decode file {tar_info.name} in UTF-8')
try:
file.seek(0) # Reset file pointer
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
print(f"Failed to decode file {file_info.filename} in UTF-8 or ISO-8859-1: {e}")
elif file_type == '7z':
with py7zr.SevenZipFile(compressed_data, mode='r') as z:
for file_info in z.getnames():
if file_info.filename.endswith('/'):
continue
try:
with z.read(file_info) as file:
data = StringIO(file.read().decode('utf-8'))
if data_type == 'binary':
data = data = file.read() # Read file as binary data
elif data_type == 'text':
data = StringIO(file.read().decode('utf-8'))
yield file_info, z.data
except UnicodeDecodeError:
print(f'Could not decode file {file_info} in UTF-8')
try:
file.seek(0) # Reset file pointer
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
print(f"Failed to decode file {file_info.filename} in UTF-8 or ISO-8859-1: {e}")
elif file_type == 'rar':
with rarfile.RarFile(compressed_data) as rf:
for file_info in rf.infolist():
if file_info.filename.endswith('/'):
continue
with rf.open(file_info, 'r') as file:
try:
data = StringIO(file.read().decode('utf-8'))
if data_type == 'binary':
data = file.read() # Read file as binary data
elif data_type == 'text':
data = StringIO(file.read().decode('utf-8'))
yield file_info.filename, data
except UnicodeDecodeError:
print(f'Could not decode file {file_info.filename} in UTF-8')
try:
file.seek(0) # Reset file pointer
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
print(f"Failed to decode file {file_info.filename} in UTF-8 or ISO-8859-1: {e}")

def stream_text_from_specific_archive_file(file_id, file_name, sub_file_name: str = None):
file_type = get_file_type(file_name)
Expand All @@ -217,7 +253,12 @@ def stream_text_from_specific_archive_file(file_id, file_name, sub_file_name: st
data = StringIO(file.read().decode('utf-8')).getvalue()
return file_info.filename, data
except UnicodeDecodeError:
return (f'Could not decode file {file_info.filename} in UTF-8')
try:
file.seek(0)
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
return (f'Could not decode file {file_info.filename} in UTF-8 or ISO-8859-1')
elif file_type == 'tar.gz':
with tarfile.open(fileobj=compressed_data, mode='r:gz') as tar:
for tar_info in tar:
Expand All @@ -229,7 +270,12 @@ def stream_text_from_specific_archive_file(file_id, file_name, sub_file_name: st
data = StringIO(file.read().decode('utf-8')).getvalue()
return tar_info.name, data
except UnicodeDecodeError:
return (f'Could not decode file {tar_info.name} in UTF-8')
try:
file.seek(0)
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
return (f'Could not decode file {file_info.filename} in UTF-8 or ISO-8859-1')
elif file_type == '7z':
with py7zr.SevenZipFile(compressed_data, mode='r') as z:
for file_info in z.getnames():
Expand All @@ -240,7 +286,12 @@ def stream_text_from_specific_archive_file(file_id, file_name, sub_file_name: st
data = StringIO(file.read().decode('utf-8')).getvalue()
return file_info, z.data
except UnicodeDecodeError:
return (f'Could not decode file {file_info.filename} in UTF-8')
try:
file.seek(0)
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
return (f'Could not decode file {file_info.filename} in UTF-8 or ISO-8859-1')
elif file_type == 'rar':
with rarfile.RarFile(compressed_data) as rf:
for file_info in rf.infolist():
Expand All @@ -251,7 +302,12 @@ def stream_text_from_specific_archive_file(file_id, file_name, sub_file_name: st
data = StringIO(file.read().decode('utf-8')).getvalue()
return file_info.filename, data
except UnicodeDecodeError:
print(f'Could not decode file {file_info.filename} in UTF-8')
try:
file.seek(0)
data = StringIO(file.read().decode('ISO-8859-1'))
yield file_info.filename, data
except Exception as e:
return (f'Could not decode file {file_info.filename} in UTF-8 or ISO-8859-1')

return sub_file_name

Expand Down Expand Up @@ -368,6 +424,7 @@ def upload_data_in_array(study_id: str, data: List[dict]):
except Exception as e:
print(f"{Fore.LIGHTRED_EX}Error uploading data: {e}{Fore.RESET}")


def delete_study_field(study_id: str, field_id: str):
conn = DMPConnection()
variables = {
Expand All @@ -376,4 +433,4 @@ def delete_study_field(study_id: str, field_id: str):
}
response = conn.graphql_request('deleteField', variables)
print(response)
return response['data']['deleteField']
return response['data']['deleteField']