Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed Nov 5, 2024
1 parent 41614cd commit 25ce71c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 3 additions & 2 deletions lib/repack/owid/repack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def to_int(s: pd.Series) -> pd.Series:
# values could be integers or strings
v = s.astype("float64").astype("int64[pyarrow]")

if not series_eq(v, s, cast="float64[pyarrow]"):
# casting to float converts strings to floats, that doesn't work with float64[pyarrow]
if not series_eq(v, s, cast=float):
raise ValueError()

# it's an integer, now pack it smaller
Expand Down Expand Up @@ -145,7 +146,7 @@ def series_eq(lhs: pd.Series, rhs: pd.Series, cast: Any, rtol: float = 1e-5, ato
return False

# improve performance by calling native astype method
if cast == "float64[pyarrow]":
if cast in {float, "float", "float64[pyarrow]"}:
func = lambda s: s.astype(cast) # noqa: E731
else:
raise NotImplementedError()
Expand Down
2 changes: 2 additions & 0 deletions lib/repack/tests/test_repack.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_repack_object_columns():
"myint": [1, 2, None, 3],
"myfloat": [1.2, 2.0, 3.0, None],
"mycat": ["a", None, "b", "c"],
"myintstr": [1, 2, 3, "4"],
},
dtype="object",
)
Expand All @@ -37,6 +38,7 @@ def test_repack_object_columns():
assert df_repack.myint.dtype.name == "UInt8"
assert df_repack.myfloat.dtype.name == "float32"
assert df_repack.mycat.dtype.name == "category"
assert df_repack.myintstr.dtype.name == "UInt8"


def test_repack_frame_with_index():
Expand Down

0 comments on commit 25ce71c

Please sign in to comment.