Skip to content

Commit 7dd8597

Browse files
committed
fix: fail-fast import guard for ArrowSerializer when pyarrow missing
The import guard in auto_serializer.py was ineffective because ArrowSerializer class was always defined and importable - the ImportError only fired in __init__(), after the guard had passed. Fix: Raise ImportError at module level when pyarrow isn't installed, so the import guard actually catches it: Before (broken): auto_serializer.py: from .arrow_serializer import ArrowSerializer ✓ auto_serializer.py: HAS_ARROW_SERIALIZER = True # Wrong! ArrowSerializer.__init__(): raise ImportError # Too late After (fixed): arrow_serializer.py: import pyarrow → ImportError auto_serializer.py: from .arrow_serializer... → ImportError caught auto_serializer.py: HAS_ARROW_SERIALIZER = False # Correct! Adds test coverage for the import guard by mocking pyarrow unavailability.
1 parent bc80fe9 commit 7dd8597

File tree

2 files changed

+54
-13
lines changed

2 files changed

+54
-13
lines changed

src/cachekit/serializers/arrow_serializer.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
- Requires: pip install 'cachekit[data]' (includes pyarrow, pandas)
1616
1717
Type Checking Note:
18-
Optional imports (pyarrow, pandas) are guarded at runtime by HAS_PYARROW, HAS_PANDAS flags.
19-
Type checker cannot statically verify these; suppressed via pyright config comments above.
18+
pandas is guarded at runtime by HAS_PANDAS flag. pyarrow is required at import time
19+
(module fails to load without it, enabling proper import guards in auto_serializer).
20+
Type checker cannot statically verify optional imports; suppressed via pyright config comments above.
2021
"""
2122

2223
from __future__ import annotations
@@ -38,15 +39,12 @@
3839
HAS_PANDAS = False
3940
pd = None # type: ignore[assignment]
4041

41-
# Optional dependency: pyarrow
42+
# Required dependency: pyarrow (fail-fast at module level for import guard in auto_serializer)
4243
try:
4344
import pyarrow as pa
4445
import pyarrow.ipc # noqa: F401 (used via pa.ipc.new_file and pa.ipc.open_file)
45-
46-
HAS_PYARROW = True
47-
except ImportError:
48-
HAS_PYARROW = False
49-
pa = None # type: ignore[assignment]
46+
except ImportError as e:
47+
raise ImportError("pyarrow is not installed. ArrowSerializer requires the [data] extra: pip install 'cachekit[data]'") from e
5048

5149
# Standard dependency: xxhash (always available)
5250
import xxhash
@@ -124,13 +122,8 @@ def __init__(self, return_format: str = "pandas", enable_integrity_checking: boo
124122
When False: No checksum (faster, use for @cache.minimal speed-first scenarios)
125123
126124
Raises:
127-
ImportError: If required optional dependencies are not installed
128125
ValueError: If return_format is not one of the valid options
129126
"""
130-
if not HAS_PYARROW:
131-
raise ImportError(
132-
"pyarrow is not installed. ArrowSerializer requires the [data] extra: pip install 'cachekit[data]'"
133-
)
134127
if return_format not in ("pandas", "polars", "arrow"):
135128
raise ValueError(f"Invalid return_format: '{return_format}'. Valid options: 'pandas', 'polars', 'arrow'")
136129
self.return_format = return_format

tests/unit/test_arrow_serializer.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,3 +375,51 @@ def test_polars_serialization_requires_arrow_c_stream(self):
375375
# are detected via __arrow_c_stream__ interface (zero-copy).
376376
# We can't test this without polars installed, but the code path is covered.
377377
pass
378+
379+
380+
class TestImportGuard:
381+
"""Test module-level import guard for pyarrow dependency."""
382+
383+
def test_import_fails_without_pyarrow(self):
384+
"""ArrowSerializer module raises ImportError when pyarrow is not installed.
385+
386+
This tests the fail-fast import guard that enables auto_serializer.py
387+
to correctly detect when ArrowSerializer is unavailable.
388+
"""
389+
import builtins
390+
import importlib
391+
import sys
392+
393+
# Save original modules and import function
394+
original_import = builtins.__import__
395+
original_modules = {}
396+
397+
# Collect all modules we need to temporarily remove
398+
modules_to_remove = [
399+
key
400+
for key in list(sys.modules.keys())
401+
if key == "pyarrow" or key.startswith("pyarrow.") or key == "cachekit.serializers.arrow_serializer"
402+
]
403+
404+
for mod in modules_to_remove:
405+
original_modules[mod] = sys.modules.pop(mod)
406+
407+
def blocking_import(name, globals=None, locals=None, fromlist=(), level=0):
408+
if name == "pyarrow" or name.startswith("pyarrow."):
409+
raise ImportError(f"No module named '{name}'")
410+
return original_import(name, globals, locals, fromlist, level)
411+
412+
builtins.__import__ = blocking_import
413+
414+
try:
415+
# Now importing arrow_serializer should fail with our custom message
416+
with pytest.raises(ImportError) as exc_info:
417+
importlib.import_module("cachekit.serializers.arrow_serializer")
418+
419+
assert "pyarrow is not installed" in str(exc_info.value)
420+
assert "pip install 'cachekit[data]'" in str(exc_info.value)
421+
finally:
422+
# Restore everything
423+
builtins.__import__ = original_import
424+
for mod, module in original_modules.items():
425+
sys.modules[mod] = module

0 commit comments

Comments
 (0)