Skip to content

Commit

Permalink
Many test warnings
Browse files Browse the repository at this point in the history
Signed-off-by: zethson <[email protected]>
  • Loading branch information
Zethson committed Nov 15, 2023
1 parent 6a22b55 commit b981e1e
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 32 deletions.
2 changes: 1 addition & 1 deletion ehrapy/anndata/anndata_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
)

if cols_not_in_x:
new_adata = concat([adata, AnnData(adata.obs[cols_not_in_x], dtype="object")], axis=1)
new_adata = concat([adata, AnnData(adata.obs[cols_not_in_x])], axis=1)
new_adata.obs = adata.obs[adata.obs.columns[~adata.obs.columns.isin(cols_not_in_x)]]
# update uns (copy maybe: could be a costly operation but reduces reference cycles)
# users might save those as separate AnnData object and this could be unexpected behaviour if we dont copy
Expand Down
1 change: 0 additions & 1 deletion ehrapy/core/meta_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def print_header(*, file=None): # pragma: no cover
("sklearn", "scikit-learn"),
"statsmodels",
("igraph", "python-igraph"),
"louvain",
"leidenalg",
"pynndescent",
]
Expand Down
4 changes: 3 additions & 1 deletion ehrapy/io/_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,9 @@ def _do_read_h5ad(file_path: Path | Iterator[str]) -> AnnData:
Returns:
An AnnData object.
"""
adata = read_h5(file_path)
import anndata as ad

adata = ad.read_h5ad(file_path)
if "ehrapy_dummy_encoding" in adata.uns.keys():
# if dummy encoding was needed, the original dtype of X could not be numerical, so cast it to object
adata.X = adata.X.astype("object")
Expand Down
24 changes: 11 additions & 13 deletions tests/anndata/test_anndata_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,17 @@ def test_move_to_x_move_to_obs(self):
# case 1: move some column from obs to X and this col was copied previously from X to obs
move_to_obs(adata, ["name"], copy_obs=True)
adata = move_to_x(adata, ["name"])
assert {"name"}.issubset(set(adata.var_names)) # check if the copied column is still in X
assert adata.X.shape == adata_dim_old # the shape of X should be the same as previously
assert "name" in [item for sublist in adata.uns.values() for item in sublist] # check if the column in in uns
delete_from_obs(adata, ["name"]) # delete the column from obs to restore the original adata state
assert {"name"}.issubset(set(adata.var_names))
assert adata.X.shape == adata_dim_old
assert "name" in [item for sublist in adata.uns.values() for item in sublist]
delete_from_obs(adata, ["name"])

# case 2: move some column from obs to X and this col was previously moved inplace from X to obs
move_to_obs(adata, ["clinic_id"], copy_obs=False)
adata = move_to_x(adata, ["clinic_id"])
assert not {"clinic_id"}.issubset(set(adata.obs.columns)) # check if the copied column was removed from obs
assert {"clinic_id"}.issubset(set(adata.var_names)) # check if the copied column is now in X
assert adata.X.shape == adata_dim_old # the shape of X should be the same as previously
assert not {"clinic_id"}.issubset(set(adata.obs.columns))
assert {"clinic_id"}.issubset(set(adata.var_names))
assert adata.X.shape == adata_dim_old
assert "clinic_id" in [
item for sublist in adata.uns.values() for item in sublist
] # check if the column in in uns
Expand All @@ -142,12 +142,10 @@ def test_move_to_x_move_to_obs(self):
assert not {"los_days"}.issubset(
set(adata.obs.columns)
) # check if the copied column was removed from obs by delete_from_obs()
assert not {"b12_values"}.issubset(set(adata.obs.columns)) # check if the moved column was removed from obs
assert {"los_days", "b12_values"}.issubset(set(adata.var_names)) # check if the copied column is now in X
assert adata.X.shape == adata_dim_old # the shape of X should be the same as previously
assert {"los_days", "b12_values"}.issubset(
{item for sublist in adata.uns.values() for item in sublist}
) # check if the column in in uns
assert not {"b12_values"}.issubset(set(adata.obs.columns))
assert {"los_days", "b12_values"}.issubset(set(adata.var_names))
assert adata.X.shape == adata_dim_old
assert {"los_days", "b12_values"}.issubset({item for sublist in adata.uns.values() for item in sublist})

def test_delete_from_obs(self):
adata = ep.io.read_csv(CUR_DIR / "../io/test_data_io/dataset_move_obs_mix.csv")
Expand Down
Empty file removed tests/io/__init__.py
Empty file.
3 changes: 2 additions & 1 deletion tests/io/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
import pytest
from pandas import CategoricalDtype

from ehrapy.io._read import read_csv, read_fhir, read_h5ad

Expand Down Expand Up @@ -135,7 +136,7 @@ def test_read_csv_with_bools_and_cats_obs_only(self):
assert set(adata.obs.columns) == {"b12_values", "survival", "name"}
assert pd.api.types.is_bool_dtype(adata.obs["survival"].dtype)
assert pd.api.types.is_numeric_dtype(adata.obs["b12_values"].dtype)
assert pd.api.types.is_categorical_dtype(adata.obs["name"].dtype)
assert isinstance(adata.obs["name"].dtype, CategoricalDtype)

def test_set_default_index(self):
adata = read_csv(dataset_path=f"{_TEST_PATH}/dataset_index.csv")
Expand Down
Empty file removed tests/preprocessing/__init__.py
Empty file.
11 changes: 6 additions & 5 deletions tests/preprocessing/test_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pandas as pd
import pytest
from pandas import CategoricalDtype

from ehrapy.io._read import read_csv
from ehrapy.preprocessing._encode import DuplicateColumnEncodingError, _reorder_encodings, encode
Expand Down Expand Up @@ -70,7 +71,7 @@ def test_autodetect_encode():
]
)
assert pd.api.types.is_bool_dtype(encoded_ann_data.obs["survival"].dtype)
assert pd.api.types.is_categorical_dtype(encoded_ann_data.obs["clinic_day"].dtype)
assert isinstance(encoded_ann_data.obs["clinic_day"].dtype, CategoricalDtype)


def test_autodetect_num_only(capfd):
Expand Down Expand Up @@ -113,7 +114,7 @@ def test_autodetect_custom_mode():
for column in ["ehrapycat_survival", "ehrapycat_clinic_day"]
)
assert pd.api.types.is_bool_dtype(encoded_ann_data.obs["survival"].dtype)
assert pd.api.types.is_categorical_dtype(encoded_ann_data.obs["clinic_day"].dtype)
assert isinstance(encoded_ann_data.obs["clinic_day"].dtype, CategoricalDtype)


def test_autodetect_encode_again():
Expand Down Expand Up @@ -169,7 +170,7 @@ def test_custom_encode():
]
)
assert pd.api.types.is_bool_dtype(encoded_ann_data.obs["survival"].dtype)
assert pd.api.types.is_categorical_dtype(encoded_ann_data.obs["clinic_day"].dtype)
assert isinstance(encoded_ann_data.obs["clinic_day"].dtype, CategoricalDtype)


def test_custom_encode_again_single_columns_encoding():
Expand Down Expand Up @@ -199,7 +200,7 @@ def test_custom_encode_again_single_columns_encoding():
}
assert id(encoded_ann_data_again.X) != id(encoded_ann_data_again.layers["original"])
assert pd.api.types.is_bool_dtype(encoded_ann_data.obs["survival"].dtype)
assert pd.api.types.is_categorical_dtype(encoded_ann_data.obs["clinic_day"].dtype)
assert isinstance(encoded_ann_data.obs["clinic_day"].dtype, CategoricalDtype)


def test_custom_encode_again_multiple_columns_encoding():
Expand Down Expand Up @@ -233,7 +234,7 @@ def test_custom_encode_again_multiple_columns_encoding():
}
assert id(encoded_ann_data_again.X) != id(encoded_ann_data_again.layers["original"])
assert pd.api.types.is_bool_dtype(encoded_ann_data.obs["survival"].dtype)
assert pd.api.types.is_categorical_dtype(encoded_ann_data.obs["clinic_day"].dtype)
assert isinstance(encoded_ann_data.obs["clinic_day"].dtype, CategoricalDtype)


def test_update_encoding_scheme_1():
Expand Down
12 changes: 6 additions & 6 deletions tests/preprocessing/test_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,19 @@ def test_obs_nan_qc_metrics():
adata.X[0][4] = np.nan
adata2 = encode(adata, encodings={"one-hot": ["clinic_day"]})
obs_metrics = _obs_qc_metrics(adata2)
assert obs_metrics.iloc[0][0] == 1
assert obs_metrics.iloc[0].iloc[0] == 1


def test_var_nan_qc_metrics():
adata = read_csv(dataset_path=f"{_TEST_PATH_ENCODE}/dataset1.csv")
adata.X[0][4] = np.nan
adata2 = encode(adata, encodings={"one-hot": ["clinic_day"]})
var_metrics = _var_qc_metrics(adata2)
assert var_metrics.iloc[0][0] == 1
assert var_metrics.iloc[1][0] == 1
assert var_metrics.iloc[2][0] == 1
assert var_metrics.iloc[3][0] == 1
assert var_metrics.iloc[4][0] == 1
assert var_metrics.iloc[0].iloc[0] == 1
assert var_metrics.iloc[1].iloc[0] == 1
assert var_metrics.iloc[2].iloc[0] == 1
assert var_metrics.iloc[3].iloc[0] == 1
assert var_metrics.iloc[4].iloc[0] == 1


def test_calculate_qc_metrics(missing_values_adata):
Expand Down
8 changes: 4 additions & 4 deletions tests/tools/test_sa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def test_ols(self):
formula = "tco2_first ~ pco2_first"
var_names = ["tco2_first", "pco2_first"]
ols = ep.tl.ols(adata, var_names, formula, missing="drop")
s = ols.fit().params[1]
i = ols.fit().params[0]
s = ols.fit().params.iloc[1]
i = ols.fit().params.iloc[0]
assert isinstance(ols, statsmodels.regression.linear_model.OLS)
assert 0.18857179158259973 == pytest.approx(s)
assert 16.210859352601442 == pytest.approx(i)
Expand All @@ -24,8 +24,8 @@ def test_glm(self):
var_names = ["day_28_flg", "age"]
family = "Binomial"
glm = ep.tl.glm(adata, var_names, formula, family, missing="drop", as_continuous=["age"])
Intercept = glm.fit().params[0]
age = glm.fit().params[1]
Intercept = glm.fit().params.iloc[0]
age = glm.fit().params.iloc[1]
assert isinstance(glm, statsmodels.genmod.generalized_linear_model.GLM)
assert 5.778006344870297 == pytest.approx(Intercept)
assert -0.06523274132877163 == pytest.approx(age)
Expand Down

0 comments on commit b981e1e

Please sign in to comment.