Skip to content

Commit

Permalink
extract helper functions
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jan 29, 2024
1 parent 4b47a8c commit bc0644f
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 120 deletions.
27 changes: 27 additions & 0 deletions apis/python/src/tiledbsoma/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from itertools import zip_longest
from typing import Any, Optional, Tuple, Type, TypeVar

import pandas as pd
import pyarrow as pa
import somacore
from somacore import options
Expand Down Expand Up @@ -284,3 +285,29 @@ def pa_types_is_string_or_bytes(dtype: pa.DataType) -> bool:
or pa.types.is_string(dtype)
or pa.types.is_binary(dtype)
)


def anndata_dataframe_unmodified(old: pd.DataFrame, new: pd.DataFrame) -> bool:
"""
Checks that we didn't mutate the object while ingesting. Intended for unit tests.
"""
try:
return (old == new).all().all()
except ValueError:

Check warning on line 296 in apis/python/src/tiledbsoma/_util.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_util.py#L296

Added line #L296 was not covered by tests
# Can be thrown when columns don't match -- which is what we check for
return False

Check warning on line 298 in apis/python/src/tiledbsoma/_util.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_util.py#L298

Added line #L298 was not covered by tests


def anndata_dataframe_unmodified_nan_safe(old: pd.DataFrame, new: pd.DataFrame) -> bool:
"""
Same as anndata_dataframe_unmodified, except it works with NaN data.
A key property of NaN is it's not equal to itself: x != x.
"""

if old.index.name != new.index.name:
return False

Check warning on line 308 in apis/python/src/tiledbsoma/_util.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_util.py#L308

Added line #L308 was not covered by tests
if len(old) != len(new):
return False

Check warning on line 310 in apis/python/src/tiledbsoma/_util.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_util.py#L310

Added line #L310 was not covered by tests
if any(old.keys() != new.keys()):
return False

Check warning on line 312 in apis/python/src/tiledbsoma/_util.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/_util.py#L312

Added line #L312 was not covered by tests
return True
72 changes: 26 additions & 46 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
import tiledbsoma
import tiledbsoma.io
from tiledbsoma import _constants, _factory
from tiledbsoma._util import (
anndata_dataframe_unmodified,
anndata_dataframe_unmodified_nan_safe,
)

HERE = Path(__file__).parent

Expand Down Expand Up @@ -89,30 +93,6 @@ def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def _anndata_dataframe_unmodified_nan_safe(old, new):
"""
Same as _anndata_dataframe_unmodified, except it works with NaN data.
A key property of NaN is it's not equal to itself: x != x.
"""

if old.index.name != new.index.name:
return False
if len(old) != len(new):
return False
if any(old.keys() != new.keys()):
return False
return True


@pytest.mark.parametrize(
"ingest_modes",
[
Expand Down Expand Up @@ -158,8 +138,8 @@ def test_import_anndata(adata, ingest_modes, X_kind):
if ingest_mode != "schema_only":
have_ingested = True

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)

Expand Down Expand Up @@ -447,8 +427,8 @@ def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys)
uns_keys=ingest_uns_keys,
)

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(uri) as exp:
uns = exp.ms["hello"]["uns"]
Expand Down Expand Up @@ -517,8 +497,8 @@ def test_add_matrix_to_collection(adata):

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -644,8 +624,8 @@ def add_matrix_to_collection(

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
Expand Down Expand Up @@ -703,8 +683,8 @@ def test_export_anndata(adata):

tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with _factory.open(output_path) as exp:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -753,8 +733,8 @@ def test_null_obs(adata, tmp_path: Path):
uri = tiledbsoma.io.from_anndata(
output_path, adata, "RNA", ingest_mode="write", X_kind=tiledbsoma.SparseNDArray
)
assert _anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert _anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
Expand Down Expand Up @@ -782,8 +762,8 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
output_path = tmp_path.as_posix()
tiledbsoma.io.from_anndata(output_path, adata, "RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(output_path)

Expand Down Expand Up @@ -929,8 +909,8 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var
obs_id_name=obs_id_name,
var_id_name=var_id_name,
)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(uri) as exp:
assert obs_id_name in exp.obs.keys()
Expand Down Expand Up @@ -1015,8 +995,8 @@ def test_uns_io(tmp_path, outgest_uns_keys):
soma_uri = tmp_path.as_posix()

tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name="RNA")
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(
Expand Down Expand Up @@ -1067,8 +1047,8 @@ def test_string_nan_columns(tmp_path, adata, write_index):
uri = tmp_path.as_posix()
original = adata.copy()
tiledbsoma.io.from_anndata(uri, adata, measurement_name="RNA")
assert _anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert _anndata_dataframe_unmodified_nan_safe(original.var, adata.var)
assert anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert anndata_dataframe_unmodified_nan_safe(original.var, adata.var)

# Step 3
with tiledbsoma.open(uri, "r") as exp:
Expand Down Expand Up @@ -1126,8 +1106,8 @@ def test_index_names_io(tmp_path, obs_index_name, var_index_name):

original = adata.copy()
tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(exp, measurement_name)
Expand Down
14 changes: 3 additions & 11 deletions apis/python/tests/test_platform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tiledbsoma
import tiledbsoma.io
import tiledbsoma.options._tiledb_create_options as tco
from tiledbsoma._util import anndata_dataframe_unmodified

HERE = Path(__file__).parent

Expand All @@ -25,15 +26,6 @@ def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def test_platform_config(adata):
# Set up anndata input path and tiledb-group output path
original = adata.copy()
Expand Down Expand Up @@ -63,8 +55,8 @@ def test_platform_config(adata):
}
},
)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)
assert anndata_dataframe_unmodified(original.obs, adata.obs)
assert anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(output_path) as exp:
x_data = exp.ms["RNA"].X["data"]
Expand Down
18 changes: 5 additions & 13 deletions apis/python/tests/test_registration_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,7 @@

import tiledbsoma.io
import tiledbsoma.io._registration as registration


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False
from tiledbsoma._util import anndata_dataframe_unmodified


def _create_anndata(
Expand Down Expand Up @@ -755,8 +747,8 @@ def test_append_items_with_experiment(soma1, h5ad2):
registration_mapping=rd,
)

assert _anndata_dataframe_unmodified(original.obs, adata2.obs)
assert _anndata_dataframe_unmodified(original.var, adata2.var)
assert anndata_dataframe_unmodified(original.obs, adata2.obs)
assert anndata_dataframe_unmodified(original.var, adata2.var)

expect_obs_soma_joinids = list(range(6))
expect_var_soma_joinids = list(range(5))
Expand Down Expand Up @@ -858,8 +850,8 @@ def test_append_with_disjoint_measurements(
registration_mapping=rd,
)

assert _anndata_dataframe_unmodified(original.obs, anndata2.obs)
assert _anndata_dataframe_unmodified(original.var, anndata2.var)
assert anndata_dataframe_unmodified(original.obs, anndata2.obs)
assert anndata_dataframe_unmodified(original.var, anndata2.var)

# exp/obs, use_same_cells=True: exp/obs, use_same_cells=False:
# soma_joinid obs_id cell_type is_primary_data soma_joinid obs_id cell_type is_primary_data
Expand Down
30 changes: 11 additions & 19 deletions apis/python/tests/test_registration_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import tiledbsoma.io
import tiledbsoma.io._registration.signatures as signatures
from tiledbsoma._util import anndata_dataframe_unmodified

HERE = Path(__file__).parent

Expand All @@ -21,15 +22,6 @@ def canned_anndata(canned_h5ad_file):
return ad.read_h5ad(canned_h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def test_signature_serdes(canned_h5ad_file, canned_anndata):
sig = signatures.Signature.from_h5ad(canned_h5ad_file.as_posix())
text1 = sig.to_json()
Expand All @@ -39,8 +31,8 @@ def test_signature_serdes(canned_h5ad_file, canned_anndata):

original = canned_anndata.copy()
sig = signatures.Signature.from_anndata(canned_anndata)
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)
assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert anndata_dataframe_unmodified(original.var, canned_anndata.var)

text2 = sig.to_json()
assert sig == signatures.Signature.from_json(text2)
Expand All @@ -51,8 +43,8 @@ def test_signature_serdes(canned_h5ad_file, canned_anndata):
output_path = tempdir.name

uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA")
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)
assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert anndata_dataframe_unmodified(original.var, canned_anndata.var)

sig = signatures.Signature.from_soma_experiment(uri)
text3 = sig.to_json()
Expand All @@ -67,14 +59,14 @@ def test_compatible(canned_anndata):

original = canned_anndata.copy()
sig1 = signatures.Signature.from_anndata(canned_anndata)
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)
assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert anndata_dataframe_unmodified(original.var, canned_anndata.var)

tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name
uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA")
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)
assert anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert anndata_dataframe_unmodified(original.var, canned_anndata.var)
sig2 = signatures.Signature.from_soma_experiment(uri)

# Check that single inputs result in zero incompatibility
Expand All @@ -98,8 +90,8 @@ def test_compatible(canned_anndata):

original = adata3.copy()
sig3 = signatures.Signature.from_anndata(adata3)
assert _anndata_dataframe_unmodified(original.obs, adata3.obs)
assert _anndata_dataframe_unmodified(original.var, adata3.var)
assert anndata_dataframe_unmodified(original.obs, adata3.obs)
assert anndata_dataframe_unmodified(original.var, adata3.var)

with pytest.raises(ValueError):
signatures.Signature.check_compatible({"orig": sig1, "anndata3": sig3})
Loading

0 comments on commit bc0644f

Please sign in to comment.