Skip to content

Commit

Permalink
Merge branch 'release-7.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
mpenkov committed Dec 17, 2024
2 parents 497541c + 3569c17 commit e040efd
Show file tree
Hide file tree
Showing 15 changed files with 169 additions and 63 deletions.
29 changes: 14 additions & 15 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ jobs:
with:
python-version: "3.11"

- name: Update pip
run: pip install -U pip

- name: Install dependencies
run: pip install flake8

Expand All @@ -34,22 +31,21 @@ jobs:
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}
- {python-version: '3.13', os: ubuntu-20.04}

- {python-version: '3.8', os: windows-2019}
- {python-version: '3.9', os: windows-2019}
- {python-version: '3.10', os: windows-2019}
- {python-version: '3.11', os: windows-2019}
- {python-version: '3.12', os: windows-2019}
- {python-version: '3.13', os: windows-2019}
steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: pip install -U pip

- name: Install smart_open without dependencies
run: pip install -e .

Expand All @@ -73,6 +69,7 @@ jobs:
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}
- {python-version: '3.13', os: ubuntu-20.04}

#
# Some of the doctests don't pass on Windows because of Windows-specific
Expand All @@ -82,6 +79,9 @@ jobs:
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}
# - {python-version: '3.11', os: windows-2019}
# - {python-version: '3.12', os: windows-2019}
# - {python-version: '3.13', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -90,9 +90,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]

Expand All @@ -113,13 +110,17 @@ jobs:
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}
- {python-version: '3.13', os: ubuntu-20.04}

# Not sure why we exclude these, perhaps for historical reasons?
#
# - {python-version: '3.7', os: windows-2019}
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}
# - {python-version: '3.11', os: windows-2019}
# - {python-version: '3.12', os: windows-2019}
# - {python-version: '3.13', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -128,9 +129,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]

Expand Down Expand Up @@ -165,11 +163,15 @@ jobs:
- {python-version: '3.10', os: ubuntu-20.04}
- {python-version: '3.11', os: ubuntu-20.04}
- {python-version: '3.12', os: ubuntu-20.04}
- {python-version: '3.13', os: ubuntu-20.04}

# - {python-version: '3.7', os: windows-2019}
# - {python-version: '3.8', os: windows-2019}
# - {python-version: '3.9', os: windows-2019}
# - {python-version: '3.10', os: windows-2019}
# - {python-version: '3.11', os: windows-2019}
# - {python-version: '3.12', os: windows-2019}
# - {python-version: '3.13', os: windows-2019}

steps:
- uses: actions/checkout@v2
Expand All @@ -178,9 +180,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Update pip
run: pip install -U pip

- name: Install smart_open and its dependencies
run: pip install -e .[test]

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 7.1.0, 2024-12-17

- Add support for python 3.13 (PR [#847](https://github.com/piskvorky/smart_open/pull/847), [@ddelange](https://github.com/ddelange))
- Propagate uri to compression_wrapper (PR [#842](https://github.com/piskvorky/smart_open/pull/842), [@ddelange](https://github.com/ddelange))

# 7.0.5, 2024-10-04

- Fix zstd compression in ab mode (PR [#833](https://github.com/piskvorky/smart_open/pull/833), [@ddelange](https://github.com/ddelange))
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ The supported values for this parameter are:
- ``disable``
- ``.gz``
- ``.bz2``
- ``.zst``

By default, ``smart_open`` determines the compression algorithm to use based on the file extension.

Expand Down
34 changes: 34 additions & 0 deletions integration-tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import unicode_literals
import gzip
import pytest
from smart_open import open
import ssl
Expand Down Expand Up @@ -52,6 +53,39 @@ def test_binary(server_info):
read_contents = f.read()
assert read_contents == file_contents + appended_content1

def test_compression(server_info):
server_type = server_info[0]
port_num = server_info[1]
file_contents = "Test Test \n new test \n another tests"
appended_content1 = "Added \n to end"

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "w") as f:
f.write(file_contents)

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "r") as f:
read_contents = f.read()
assert read_contents == file_contents

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "a") as f:
f.write(appended_content1)

with open(f"{server_type}://user:123@localhost:{port_num}/file.gz", "r") as f:
read_contents = f.read()
assert read_contents == file_contents + appended_content1

# ftp socket makefile returns a file whose name attribute is fileno() which is int
# that can't be used to infer compression extension, so the calls above would
# silently not use any compression (neither reading nor writing) so they would pass
# pytest suppresses the logging.warning('unable to transparently decompress...')
# so check here explicitly that the bytes on server are gzip compressed
with open(
f"{server_type}://user:123@localhost:{port_num}/file.gz",
"rb",
compression='disable',
) as f:
read_contents = gzip.decompress(f.read()).decode()
assert read_contents == file_contents + appended_content1

def test_line_endings_non_binary(server_info):
server_type = server_info[0]
port_num = server_info[1]
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def read(fname):
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
'Topic :: System :: Distributed Computing',
'Topic :: Database :: Front-Ends',
],
Expand Down
52 changes: 32 additions & 20 deletions smart_open/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,9 @@ class Reader(io.BufferedIOBase):
Implements the io.BufferedIOBase interface of the standard library.
:raises azure.core.exceptions.ResourceNotFoundError: Raised when the blob to read from does not exist.
"""
_blob = None # so `closed` property works in case __init__ fails and __del__ is called

def __init__(
self,
container,
Expand All @@ -207,9 +208,10 @@ def __init__(
max_concurrency=DEFAULT_MAX_CONCURRENCY,
):
self._container_name = container
self._blob_name = blob

self._blob = _get_blob_client(client, container, blob)
# type: azure.storage.blob.BlobClient
self._blob = _get_blob_client(client, container, blob)

if self._blob is None:
raise azure.core.exceptions.ResourceNotFoundError(
Expand All @@ -236,8 +238,13 @@ def __init__(
def close(self):
"""Flush and close this stream."""
logger.debug("close: called")
self._blob = None
self._raw_reader = None
if not self.closed:
self._blob = None
self._raw_reader = None

@property
def closed(self):
return self._blob is None

def readable(self):
"""Return True if the stream can be read from."""
Expand Down Expand Up @@ -369,20 +376,26 @@ def __exit__(self, exc_type, exc_val, exc_tb):
self.close()

def __str__(self):
return "(%s, %r, %r)" % (self.__class__.__name__,
self._container_name,
self._blob.blob_name)
return "(%s, %r, %r)" % (
self.__class__.__name__,
self._container_name,
self._blob_name
)

def __repr__(self):
return "%s(container=%r, blob=%r)" % (
self.__class__.__name__, self._container_name, self._blob.blob_name,
self.__class__.__name__,
self._container_name,
self._blob_name,
)


class Writer(io.BufferedIOBase):
"""Writes bytes to Azure Blob Storage.
Implements the io.BufferedIOBase interface of the standard library."""
Implements the io.BufferedIOBase interface of the standard library.
"""
_blob = None # so `closed` property works in case __init__ fails and __del__ is called

def __init__(
self,
Expand All @@ -392,21 +405,19 @@ def __init__(
blob_kwargs=None,
min_part_size=_DEFAULT_MIN_PART_SIZE,
):
self._is_closed = False
self._container_name = container

self._blob = _get_blob_client(client, container, blob)
self._blob_name = blob
self._blob_kwargs = blob_kwargs or {}
# type: azure.storage.blob.BlobClient

self._min_part_size = min_part_size

self._total_size = 0
self._total_parts = 0
self._bytes_uploaded = 0
self._current_part = io.BytesIO()
self._block_list = []

# type: azure.storage.blob.BlobClient
self._blob = _get_blob_client(client, container, blob)

#
# This member is part of the io.BufferedIOBase interface.
#
Expand All @@ -424,25 +435,26 @@ def terminate(self):
logger.debug('%s: terminating multipart upload', self)
if not self.closed:
self._block_list = []
self._is_closed = True
self._blob = None
logger.debug('%s: terminated multipart upload', self)

#
# Override some methods from io.IOBase.
#
def close(self):
logger.debug("close: called")
if not self.closed:
logger.debug('%s: completing multipart upload', self)
if self._current_part.tell() > 0:
self._upload_part()
self._blob.commit_block_list(self._block_list, **self._blob_kwargs)
self._block_list = []
self._is_closed = True
self._blob = None
logger.debug('%s: completed multipart upload', self)

@property
def closed(self):
return self._is_closed
return self._blob is None

def writable(self):
"""Return True if the stream supports writing."""
Expand Down Expand Up @@ -528,13 +540,13 @@ def __str__(self):
return "(%s, %r, %r)" % (
self.__class__.__name__,
self._container_name,
self._blob.blob_name
self._blob_name
)

def __repr__(self):
return "%s(container=%r, blob=%r, min_part_size=%r)" % (
self.__class__.__name__,
self._container_name,
self._blob.blob_name,
self._blob_name,
self._min_part_size
)
25 changes: 20 additions & 5 deletions smart_open/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class CliRawInputBase(io.RawIOBase):
Implements the io.RawIOBase interface of the standard library.
"""
_sub = None # so `closed` property works in case __init__ fails and __del__ is called

def __init__(self, uri):
self._uri = uri
Expand All @@ -84,8 +85,13 @@ def __init__(self, uri):
def close(self):
"""Flush and close this stream."""
logger.debug("close: called")
self._sub.terminate()
self._sub = None
if not self.closed:
self._sub.terminate()
self._sub = None

@property
def closed(self):
return self._sub is None

def readable(self):
"""Return True if the stream can be read from."""
Expand Down Expand Up @@ -125,6 +131,8 @@ class CliRawOutputBase(io.RawIOBase):
Implements the io.RawIOBase interface of the standard library.
"""
_sub = None # so `closed` property works in case __init__ fails and __del__ is called

def __init__(self, uri):
self._uri = uri
self._sub = subprocess.Popen(["hdfs", "dfs", '-put', '-f', '-', self._uri],
Expand All @@ -136,9 +144,16 @@ def __init__(self, uri):
self.raw = None

def close(self):
self.flush()
self._sub.stdin.close()
self._sub.wait()
logger.debug("close: called")
if not self.closed:
self.flush()
self._sub.stdin.close()
self._sub.wait()
self._sub = None

@property
def closed(self):
return self._sub is None

def flush(self):
self._sub.stdin.flush()
Expand Down
Loading

0 comments on commit e040efd

Please sign in to comment.