Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jan 29, 2024
1 parent c051512 commit 4b47a8c
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 2 deletions.
73 changes: 71 additions & 2 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,30 @@ def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def _anndata_dataframe_unmodified_nan_safe(old, new):
"""
Same as _anndata_dataframe_unmodified, except it works with NaN data.
A key property of NaN is it's not equal to itself: x != x.
"""

if old.index.name != new.index.name:
return False
if len(old) != len(new):
return False
if any(old.keys() != new.keys()):
return False
return True


@pytest.mark.parametrize(
"ingest_modes",
[
Expand All @@ -109,6 +133,7 @@ def adata(h5ad_file):
[tiledbsoma.SparseNDArray, tiledbsoma.DenseNDArray],
)
def test_import_anndata(adata, ingest_modes, X_kind):
original = adata.copy()
adata = adata.copy()

have_ingested = False
Expand All @@ -133,6 +158,9 @@ def test_import_anndata(adata, ingest_modes, X_kind):
if ingest_mode != "schema_only":
have_ingested = True

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)

assert exp.metadata[metakey] == "SOMAExperiment"
Expand Down Expand Up @@ -411,13 +439,17 @@ def test_ingest_relative(h5ad_file_extended, use_relative_uri):
def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys):
tmp_uri = tmp_path.as_uri()
original = anndata.read(h5ad_file_extended)
adata = anndata.read(h5ad_file_extended)
uri = tiledbsoma.io.from_anndata(
tmp_uri,
original,
adata,
measurement_name="hello",
uns_keys=ingest_uns_keys,
)

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(uri) as exp:
uns = exp.ms["hello"]["uns"]
assert isinstance(uns, tiledbsoma.Collection)
Expand Down Expand Up @@ -446,7 +478,7 @@ def test_ingest_uns(tmp_path: pathlib.Path, h5ad_file_extended, ingest_uns_keys)
assert isinstance(random_state, tiledbsoma.DenseNDArray)
assert np.array_equal(random_state.read().to_numpy(), np.array([0]))
got_pca_variance = uns["pca"]["variance"].read().to_numpy()
assert np.array_equal(got_pca_variance, original.uns["pca"]["variance"])
assert np.array_equal(got_pca_variance, adata.uns["pca"]["variance"])
else:
assert set(uns) == set(ingest_uns_keys)

Expand Down Expand Up @@ -481,7 +513,13 @@ def test_add_matrix_to_collection(adata):
tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name

original = adata.copy()

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
assert list(exp_r.ms["RNA"].X.keys()) == ["data"]
Expand Down Expand Up @@ -602,8 +640,13 @@ def add_matrix_to_collection(

tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name
original = adata.copy()

uri = tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with _factory.open(output_path) as exp_r:
assert list(exp_r.ms["RNA"].X.keys()) == ["data"]
Expand Down Expand Up @@ -656,8 +699,13 @@ def test_export_anndata(adata):
tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name

original = adata.copy()

tiledbsoma.io.from_anndata(output_path, adata, measurement_name="RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with _factory.open(output_path) as exp:
with pytest.raises(ValueError):
tiledbsoma.io.to_anndata(
Expand Down Expand Up @@ -700,9 +748,14 @@ def test_null_obs(adata, tmp_path: Path):
# Create column of partially-null values
rng = np.random.RandomState(seed)
adata.obs["empty_partial"] = rng.choice((np.NaN, 1.0), adata.n_obs, True)

original = adata.copy()
uri = tiledbsoma.io.from_anndata(
output_path, adata, "RNA", ingest_mode="write", X_kind=tiledbsoma.SparseNDArray
)
assert _anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert _anndata_dataframe_unmodified_nan_safe(original.var, adata.var)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
# Explicitly check columns created above
Expand All @@ -717,6 +770,7 @@ def test_null_obs(adata, tmp_path: Path):

def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix())
original = adata.copy()
assert 1 == 1

# This data file is prepared such that obsm["X_pca"] has shape (2638, 50)
Expand All @@ -728,6 +782,9 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
output_path = tmp_path.as_posix()
tiledbsoma.io.from_anndata(output_path, adata, "RNA")

assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

exp = tiledbsoma.Experiment.open(output_path)

# Verify the bounding box on the SOMA SparseNDArray
Expand Down Expand Up @@ -860,6 +917,7 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var
X[i, j] = 100 + 10 * i + j

adata = anndata.AnnData(X=X, obs=obs, var=var, dtype=X.dtype)
original = adata.copy()

uri = tmp_path.as_posix()

Expand All @@ -871,6 +929,8 @@ def test_id_names(tmp_path, obs_id_name, var_id_name, indexify_obs, indexify_var
obs_id_name=obs_id_name,
var_id_name=var_id_name,
)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(uri) as exp:
assert obs_id_name in exp.obs.keys()
Expand Down Expand Up @@ -950,10 +1010,13 @@ def test_uns_io(tmp_path, outgest_uns_keys):
uns=uns,
dtype=X.dtype,
)
original = adata.copy()

soma_uri = tmp_path.as_posix()

tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name="RNA")
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(
Expand Down Expand Up @@ -1002,7 +1065,10 @@ def test_string_nan_columns(tmp_path, adata, write_index):

# Step 2
uri = tmp_path.as_posix()
original = adata.copy()
tiledbsoma.io.from_anndata(uri, adata, measurement_name="RNA")
assert _anndata_dataframe_unmodified_nan_safe(original.obs, adata.obs)
assert _anndata_dataframe_unmodified_nan_safe(original.var, adata.var)

# Step 3
with tiledbsoma.open(uri, "r") as exp:
Expand Down Expand Up @@ -1058,7 +1124,10 @@ def test_index_names_io(tmp_path, obs_index_name, var_index_name):

soma_uri = tmp_path.as_posix()

original = adata.copy()
tiledbsoma.io.from_anndata(soma_uri, adata, measurement_name)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(soma_uri) as exp:
bdata = tiledbsoma.io.to_anndata(exp, measurement_name)
Expand Down
12 changes: 12 additions & 0 deletions apis/python/tests/test_platform_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,18 @@ def adata(h5ad_file):
return anndata.read_h5ad(h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def test_platform_config(adata):
# Set up anndata input path and tiledb-group output path
original = adata.copy()
with tempfile.TemporaryDirectory() as output_path:
# Ingest
tiledbsoma.io.from_anndata(
Expand All @@ -53,6 +63,8 @@ def test_platform_config(adata):
}
},
)
assert _anndata_dataframe_unmodified(original.obs, adata.obs)
assert _anndata_dataframe_unmodified(original.var, adata.var)

with tiledbsoma.Experiment.open(output_path) as exp:
x_data = exp.ms["RNA"].X["data"]
Expand Down
19 changes: 19 additions & 0 deletions apis/python/tests/test_registration_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
import tiledbsoma.io._registration as registration


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def _create_anndata(
*,
obs_ids: Sequence[str],
Expand Down Expand Up @@ -720,6 +729,8 @@ def test_append_items_with_experiment(soma1, h5ad2):

adata2 = ad.read_h5ad(h5ad2)

original = adata2.copy()

with tiledbsoma.Experiment.open(soma1, "w") as exp1:
tiledbsoma.io.append_obs(
exp1,
Expand All @@ -744,6 +755,9 @@ def test_append_items_with_experiment(soma1, h5ad2):
registration_mapping=rd,
)

assert _anndata_dataframe_unmodified(original.obs, adata2.obs)
assert _anndata_dataframe_unmodified(original.var, adata2.var)

expect_obs_soma_joinids = list(range(6))
expect_var_soma_joinids = list(range(5))

Expand Down Expand Up @@ -827,6 +841,8 @@ def test_append_with_disjoint_measurements(

anndata2 = anndata1 if use_same_cells else anndata4

original = anndata2.copy()

rd = tiledbsoma.io.register_anndatas(
soma_uri,
[anndata2],
Expand All @@ -842,6 +858,9 @@ def test_append_with_disjoint_measurements(
registration_mapping=rd,
)

assert _anndata_dataframe_unmodified(original.obs, anndata2.obs)
assert _anndata_dataframe_unmodified(original.var, anndata2.var)

# exp/obs, use_same_cells=True: exp/obs, use_same_cells=False:
# soma_joinid obs_id cell_type is_primary_data soma_joinid obs_id cell_type is_primary_data
# 0 0 AAAT B cell 1 0 0 AAAT B cell 1
Expand Down
27 changes: 27 additions & 0 deletions apis/python/tests/test_registration_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,39 @@ def canned_anndata(canned_h5ad_file):
return ad.read_h5ad(canned_h5ad_file)


def _anndata_dataframe_unmodified(old, new):
"""Checks that we didn't mutate the object while ingesting"""
try:
return (old == new).all().all()
except ValueError:
# Can be thrown when columns don't match -- which is what we check for
return False


def test_signature_serdes(canned_h5ad_file, canned_anndata):
sig = signatures.Signature.from_h5ad(canned_h5ad_file.as_posix())
text1 = sig.to_json()
assert "obs_schema" in text1
assert "var_schema" in text1
assert sig == signatures.Signature.from_json(text1)

original = canned_anndata.copy()
sig = signatures.Signature.from_anndata(canned_anndata)
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)

text2 = sig.to_json()
assert sig == signatures.Signature.from_json(text2)

assert text1 == text2

tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name

uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA")
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)

sig = signatures.Signature.from_soma_experiment(uri)
text3 = sig.to_json()
assert sig == signatures.Signature.from_json(text3)
Expand All @@ -48,11 +65,16 @@ def test_compatible(canned_anndata):
# Check that zero inputs result in zero incompatibility
signatures.Signature.check_compatible({})

original = canned_anndata.copy()
sig1 = signatures.Signature.from_anndata(canned_anndata)
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)

tempdir = tempfile.TemporaryDirectory()
output_path = tempdir.name
uri = tiledbsoma.io.from_anndata(output_path, canned_anndata, "RNA")
assert _anndata_dataframe_unmodified(original.obs, canned_anndata.obs)
assert _anndata_dataframe_unmodified(original.var, canned_anndata.var)
sig2 = signatures.Signature.from_soma_experiment(uri)

# Check that single inputs result in zero incompatibility
Expand All @@ -73,6 +95,11 @@ def test_compatible(canned_anndata):
# Check incompatibility of modified AnnData
adata3 = canned_anndata
del adata3.obs["groups"]

original = adata3.copy()
sig3 = signatures.Signature.from_anndata(adata3)
assert _anndata_dataframe_unmodified(original.obs, adata3.obs)
assert _anndata_dataframe_unmodified(original.var, adata3.var)

with pytest.raises(ValueError):
signatures.Signature.check_compatible({"orig": sig1, "anndata3": sig3})
Loading

0 comments on commit 4b47a8c

Please sign in to comment.