Skip to content

Commit

Permalink
Propagate uri to compression_wrapper (#842)
Browse files Browse the repository at this point in the history
* Add ftp compression test

* Propagate uri to compression_wrapper

* Prefer name attribute in smart_open_lib

* Amend test to actually check for gzip bytes

* Add comments
  • Loading branch information
ddelange authored Oct 30, 2024
1 parent c0cb405 commit 60976d0
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 1 deletion.
34 changes: 34 additions & 0 deletions integration-tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import unicode_literals
import gzip
import pytest
from smart_open import open
import ssl
Expand Down Expand Up @@ -52,6 +53,39 @@ def test_binary(server_info):
read_contents = f.read()
assert read_contents == file_contents + appended_content1

def test_compression(server_info):
server_type = server_info[0]
port_num = server_info[1]
file_contents = "Test Test \n new test \n another tests"
appended_content1 = "Added \n to end"

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "w") as f:
f.write(file_contents)

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "r") as f:
read_contents = f.read()
assert read_contents == file_contents

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "a") as f:
f.write(appended_content1)

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "r") as f:
read_contents = f.read()
assert read_contents == file_contents + appended_content1

# ftp socket makefile returns a file whose name attribute is fileno() which is int
# that can't be used to infer compression extension, so the calls above would
# silently not use any compression (neither reading nor writing) so they would pass
# pytest suppresses the logging.warning('unable to transparently decompress...')
# so check here explicitly that the bytes on server are gzip compressed
with open(
f"{server_type}://user:123@localhost:{port_num}/file.gz",
"rb",
compression='disable',
) as f:
read_contents = gzip.decompress(f.read()).decode()
assert read_contents == file_contents + appended_content1

def test_line_endings_non_binary(server_info):
server_type = server_info[0]
port_num = server_info[1]
Expand Down
14 changes: 13 additions & 1 deletion smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,19 @@ def open(
raise NotImplementedError(ve.args[0])

binary = _open_binary_stream(uri, binary_mode, transport_params)
decompressed = so_compression.compression_wrapper(binary, binary_mode, compression)
filename = (
binary.name
# if name attribute is not string-like (e.g. ftp socket fileno)...
if isinstance(getattr(binary, "name", None), (str, bytes))
# ...fall back to uri
else uri
)
decompressed = so_compression.compression_wrapper(
binary,
binary_mode,
compression,
filename=filename,
)

if 'b' not in mode or explicit_encoding is not None:
decoded = _encoding_wrapper(
Expand Down

0 comments on commit 60976d0

Please sign in to comment.