Merge branch 'main' into enhancement/issue-743

theislab · Dec 11, 2024 · aca8220 · aca8220
2 parents 74bab7c + 419f2b2
commit aca8220
Show file tree

Hide file tree

Showing 39 changed files with 911 additions and 473 deletions.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -1,5 +1,5 @@
-name-template: "0.9.0 🌈"
-tag-template: 0.9.0
+name-template: "0.11.0 🌈"
+tag-template: 0.11.0
 exclude-labels:
     - "skip-changelog"
 

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -16,12 +16,10 @@ jobs:
         steps:
             - uses: actions/checkout@v3
 
-            - name: Set up Python 3.11
+            - name: Set up Python 3.12
               uses: actions/setup-python@v5
               with:
-                  python-version: "3.11"
-                  cache: "pip"
-                  cache-dependency-path: "**/pyproject.toml"
+                  python-version: "3.12"
 
             - name: Install build dependencies
               run: python -m pip install --upgrade pip wheel twine build

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -13,10 +13,10 @@ jobs:
             - name: Checkout code
               uses: actions/checkout@v3
 
-            - name: Set up Python 3.11
+            - name: Set up Python 3.12
               uses: actions/setup-python@v5
               with:
-                  python-version: "3.11"
+                  python-version: "3.12"
 
             - name: Install hatch
               run: pip install hatch

diff --git a/.github/workflows/run_notebooks.yml b/.github/workflows/run_notebooks.yml
@@ -13,7 +13,7 @@ jobs:
                         "docs/tutorials/notebooks/ehrapy_introduction.ipynb",
                         "docs/tutorials/notebooks/mimic_2_introduction.ipynb",
                         "docs/tutorials/notebooks/mimic_2_survival_analysis.ipynb",
-                        "docs/tutorials/notebooks/mimic_2_fate.ipynb",
+                        # "docs/tutorials/notebooks/mimic_2_fate.ipynb",  # https://github.com/theislab/cellrank/issues/1235
                         "docs/tutorials/notebooks/mimic_2_causal_inference.ipynb",
                         # "docs/tutorials/notebooks/mimic_3_demo.ipynb",
                         # "docs/tutorials/notebooks/medcat.ipynb",
@@ -26,13 +26,16 @@ jobs:
             - name: Set up Python
               uses: actions/setup-python@v5
               with:
-                  python-version: "3.11"
+                  python-version: "3.12"
 
             - name: Install UV
               run: pip install uv
 
             - name: Install ehrapy and additional dependencies
-              run: uv pip install --system . cellrank nbconvert ipykernel
+              run: uv pip install --system . cellrank nbconvert ipykernel graphviz
+
+            - name: Install scvelo from Github
+              run: uv pip install --system git+https://github.com/theislab/scvelo.git
 
             - name: Run ${{ matrix.notebook }} Notebook
               run: jupyter nbconvert --to notebook --execute ${{ matrix.notebook }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -24,9 +24,9 @@ jobs:
                     - os: ubuntu-latest
                       python: "3.10"
                     - os: ubuntu-latest
-                      python: "3.11"
+                      python: "3.12"
                     - os: ubuntu-latest
-                      python: "3.11"
+                      python: "3.12"
                       pip-flags: "--pre"
 
         env:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,7 +11,7 @@ repos:
       hooks:
           - id: prettier
     - repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.7.3
+      rev: v0.8.2
       hooks:
           - id: ruff
             args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,18 +1,15 @@
 version: 2
 build:
-    os: ubuntu-22.04
+    os: ubuntu-24.04
     tools:
-        python: "3.11"
-    jobs:
-        pre_build:
-            - python -c "import ehrapy"
-            - pip freeze
-        post_create_environment:
-            - pip install uv
-        post_install:
-            # VIRTUAL_ENV needs to be set manually for now.
-            # See https://github.com/readthedocs/readthedocs.org/pull/11152/
-            - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH pip install .[docs]
+        python: "3.12"
+    commands:
+        - asdf plugin add uv
+        - asdf install uv latest
+        - asdf global uv latest
+        - uv venv
+        - uv pip install .[docs]
+        - .venv/bin/python -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs $READTHEDOCS_OUTPUT/html
 sphinx:
     configuration: docs/conf.py
     fail_on_warning: false

diff --git a/docs/_ext/edit_on_github.py b/docs/_ext/edit_on_github.py
@@ -20,7 +20,7 @@ def get_github_repo(app: Sphinx, path: str) -> str:
 
 
 def _html_page_context(
-    app: Sphinx, _pagename: str, templatename: str, context: dict[str, Any], doctree: Optional[Any]
+    app: Sphinx, _pagename: str, templatename: str, context: dict[str, Any], doctree: Any | None
 ) -> None:
     # doctree is None - otherwise viewcode fails
     if templatename != "page.html" or doctree is None:

diff --git a/docs/conf.py b/docs/conf.py
@@ -96,6 +96,7 @@
     "flax": ("https://flax.readthedocs.io/en/latest/", None),
     "jax": ("https://jax.readthedocs.io/en/latest/", None),
     "lamin": ("https://lamin.ai/docs", None),
+    "lifelines": ("https://lifelines.readthedocs.io/en/latest/", None),
 }
 
 language = "en"

diff --git a/docs/contributing.md b/docs/contributing.md
@@ -51,7 +51,7 @@ and [prettier][prettier-editors].
 ## Writing tests
 
 ```{note}
-Remember to first install the package with `pip install -e "[dev,test,docs]"`
+Remember to first install the package with `pip install -e ".[dev,test,docs]"`
 ```
 
 This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added

diff --git a/docs/usage/usage.md b/docs/usage/usage.md
@@ -226,7 +226,7 @@ In contrast to a preprocessing function, a tool usually adds an easily interpret
 
     tools.ols
     tools.glm
-    tools.kmf
+    tools.kaplan_meier
     tools.test_kmf_logrank
     tools.test_nested_f_statistic
     tools.cox_ph
@@ -368,7 +368,7 @@ Methods that extract and visualize tool-specific annotation in an AnnData object
     :nosignatures:
 
     plot.ols
-    plot.kmf
+    plot.kaplan_meier
 ```
 
 ### Causal Inference

diff --git a/ehrapy/_settings.py b/ehrapy/_settings.py
@@ -53,7 +53,7 @@ def __init__(
         figdir: str | Path = "./figures/",
         cache_compression: str | None = "lzf",
         max_memory=15,
-        n_jobs: int = 1,
+        n_jobs: int = -1,
         logfile: str | Path | None = None,
         categories_to_ignore: Iterable[str] = ("N/A", "dontknow", "no_gate", "?"),
         _frameon: bool = True,

diff --git a/ehrapy/core/_tool_available.py → ehrapy/_utils_available.py b/ehrapy/core/_tool_available.py → ehrapy/_utils_available.py
@@ -4,7 +4,7 @@
 from subprocess import PIPE, Popen
 
 
-def _check_module_importable(package: str) -> bool:  # pragma: no cover
+def _check_module_importable(package: str) -> bool:
     """Checks whether a module is installed and can be loaded.
 
     Args:
@@ -19,7 +19,7 @@ def _check_module_importable(package: str) -> bool:  # pragma: no cover
     return module_available
 
 
-def _shell_command_accessible(command: list[str]) -> bool:  # pragma: no cover
+def _shell_command_accessible(command: list[str]) -> bool:
     """Checks whether the provided command is accessible in the current shell.
 
     Args:
@@ -29,7 +29,7 @@ def _shell_command_accessible(command: list[str]) -> bool:  # pragma: no cover
         True if the command is accessible, False otherwise.
     """
     command_accessible = Popen(command, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
-    (commmand_stdout, command_stderr) = command_accessible.communicate()
+    command_accessible.communicate()
     if command_accessible.returncode != 0:
         return False
 

diff --git a/ehrapy/_doc_util.py → ehrapy/_utils_doc.py b/ehrapy/_doc_util.py → ehrapy/_utils_doc.py
@@ -1,9 +1,9 @@
 import inspect
+from collections.abc import Callable
 from textwrap import dedent
-from typing import Callable, Optional, Union
 
 
-def getdoc(c_or_f: Union[Callable, type]) -> Optional[str]:  # pragma: no cover
+def getdoc(c_or_f: Callable | type) -> str | None:  # pragma: no cover
     if getattr(c_or_f, "__doc__", None) is None:
         return None
     doc = inspect.getdoc(c_or_f)

diff --git a/ehrapy/_utils_rendering.py b/ehrapy/_utils_rendering.py
@@ -0,0 +1,21 @@
+import functools
+
+from rich.progress import Progress, SpinnerColumn
+
+
+def spinner(message: str = "Running task"):
+    def wrap(func):
+        @functools.wraps(func)
+        def wrapped_f(*args, **kwargs):
+            with Progress(
+                "[progress.description]{task.description}",
+                SpinnerColumn(),
+                refresh_per_second=1500,
+            ) as progress:
+                progress.add_task(f"[blue]{message}", total=1)
+                result = func(*args, **kwargs)
+            return result
+
+        return wrapped_f
+
+    return wrap
diff --git a/ehrapy/anndata/anndata_ext.py b/ehrapy/anndata/anndata_ext.py
@@ -3,7 +3,7 @@
 import random
 from collections import OrderedDict
 from string import ascii_letters
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Any, NamedTuple
 
 import numpy as np
 import pandas as pd
@@ -252,13 +252,13 @@ def delete_from_obs(adata: AnnData, to_delete: list[str]) -> AnnData:
     return adata
 
 
-def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
+def move_to_x(adata: AnnData, to_x: list[str] | str, copy_x: bool = False) -> AnnData:
     """Move features from obs to X inplace.
 
     Args:
         adata: The AnnData object
         to_x: The columns to move to X
-        copy: Whether to return a copy or not
+        copy_x: The values are copied to X (and therefore kept in obs) instead of moved completely
 
     Returns:
         A new AnnData object with moved columns from obs to X. This should not be used for datetime columns currently.
@@ -292,7 +292,10 @@ def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
 
     if cols_not_in_x:
         new_adata = concat([adata, AnnData(adata.obs[cols_not_in_x])], axis=1)
-        new_adata.obs = adata.obs[adata.obs.columns[~adata.obs.columns.isin(cols_not_in_x)]]
+        if copy_x:
+            new_adata.obs = adata.obs
+        else:
+            new_adata.obs = adata.obs[adata.obs.columns[~adata.obs.columns.isin(cols_not_in_x)]]
 
         # AnnData's concat discards var if they don't match in their keys, so we need to create a new var
         created_var = pd.DataFrame(index=cols_not_in_x)
@@ -303,7 +306,7 @@ def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
     return new_adata
 
 
-def _get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
+def get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
     """Fetches the column indices in X for a given list of column names
 
     Args:
@@ -383,7 +386,10 @@ def set_numeric_vars(
     if copy:
         adata = adata.copy()
 
-    vars_idx = _get_column_indices(adata, vars)
+    vars_idx = get_column_indices(adata, vars)
+
+    # if e.g. adata.X is of type int64, and values of dtype float64, the floats will be casted to int
+    adata.X = adata.X.astype(values.dtype)
 
     adata.X[:, vars_idx] = values
 
@@ -404,7 +410,7 @@ def _detect_binary_columns(df: pd.DataFrame, numerical_columns: list[str]) -> li
     for column in numerical_columns:
         # checking for float and int as well as NaNs (this is safe since checked columns are numericals only)
         # only columns that contain at least one 0 and one 1 are counted as binary (or 0.0/1.0)
-        if df[column].isin([0.0, 1.0, np.NaN, 0, 1]).all() and df[column].nunique() == 2:
+        if df[column].isin([0.0, 1.0, np.nan, 0, 1]).all() and df[column].nunique() == 2:
             binary_columns.append(column)
 
     return binary_columns
@@ -423,7 +429,7 @@ def _cast_obs_columns(obs: pd.DataFrame) -> pd.DataFrame:
     # type cast each non-numerical column to either bool (if possible) or category else
     obs[object_columns] = obs[object_columns].apply(
         lambda obs_name: obs_name.astype("category")
-        if not set(pd.unique(obs_name)).issubset({False, True, np.NaN})
+        if not set(pd.unique(obs_name)).issubset({False, True, np.nan})
         else obs_name.astype("bool"),
         axis=0,
     )
@@ -663,3 +669,49 @@ def get_rank_features_df(
 
 class NotEncodedError(AssertionError):
     pass
+
+
+def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
+    """Check if two arrays are equal member-wise.
+
+    Note: Two NaN are considered equal.
+
+    Args:
+        arr1: First array to compare
+        arr2: Second array to compare
+
+    Returns:
+        True if the two arrays are equal member-wise
+    """
+    return np.all(np.equal(arr1, arr2, dtype=object) | ((arr1 != arr1) & (arr2 != arr2)))
+
+
+def _is_val_missing(data: np.ndarray) -> np.ndarray[Any, np.dtype[np.bool_]]:
+    """Check if values in a AnnData matrix are missing.
+
+    Args:
+        data: The AnnData matrix to check
+
+    Returns:
+        An array of bool representing the missingness of the original data, with the same shape
+    """
+    return np.isin(data, [None, ""]) | (data != data)
+
+
+def _to_dense_matrix(adata: AnnData, layer: str | None = None) -> np.ndarray:  # pragma: no cover
+    """Extract a layer from an AnnData object and convert it to a dense matrix if required.
+
+    Args:
+        adata: The AnnData where to extract the layer from.
+        layer: Name of the layer to extract. If omitted, X is considered.
+
+    Returns:
+        The layer as a dense matrix. If a conversion was required, this function returns a copy of the original layer,
+        othersize this function returns a reference.
+    """
+    from scipy.sparse import issparse
+
+    if layer is None:
+        return adata.X.toarray() if issparse(adata.X) else adata.X
+    else:
+        return adata.layers[layer].toarray() if issparse(adata.layers[layer]) else adata.layers[layer]
diff --git a/ehrapy/data/_datasets.py b/ehrapy/data/_datasets.py
@@ -743,7 +743,7 @@ def synthea_1k_sample(
 
     df = anndata_to_df(adata)
     df.drop(
-        columns=[col for col in df.columns if any(isinstance(x, (list, dict)) for x in df[col].dropna())], inplace=True
+        columns=[col for col in df.columns if any(isinstance(x, list | dict) for x in df[col].dropna())], inplace=True
     )
     df.drop(columns=df.columns[df.isna().all()], inplace=True)
     adata = df_to_anndata(df, index_column="id")

diff --git a/ehrapy/plot/__init__.py b/ehrapy/plot/__init__.py
@@ -2,6 +2,6 @@
 from ehrapy.plot._colormaps import *  # noqa: F403
 from ehrapy.plot._missingno_pl_api import *  # noqa: F403
 from ehrapy.plot._scanpy_pl_api import *  # noqa: F403
-from ehrapy.plot._survival_analysis import kmf, ols, coxph_forestplot
+from ehrapy.plot._survival_analysis import kaplan_meier, ols, coxph_forestplot
 from ehrapy.plot.causal_inference._dowhy import causal_effect
 from ehrapy.plot.feature_ranking._feature_importances import rank_features_supervised