Add FHIR tutorial and simplify

Signed-off-by: zethson <[email protected]>
theislab · Dec 18, 2023 · 8ff74b2 · 8ff74b2
2 parents 8c8fa09 + 94f257c
commit 8ff74b2
Show file tree

Hide file tree

Showing 21 changed files with 837 additions and 823 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,17 +10,11 @@ repos:
       rev: v3.1.0
       hooks:
           - id: prettier
-            # Newer versions of node don't work on systems that have an older version of GLIBC
-            # (in particular Ubuntu 18.04 and Centos 7)
-            # EOL of Centos 7 is in 2024-06, we can probably get rid of this then.
-            # See https://github.com/scverse/cookiecutter-scverse/issues/143 and
-            # https://github.com/jupyterlab/jupyterlab/issues/12675
-            language_version: "17.9.1"
     - repo: https://github.com/astral-sh/ruff-pre-commit
       rev: v0.1.6
       hooks:
           - id: ruff
-            args: [--fix, --exit-non-zero-on-fix]
+            args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
           - id: ruff-format
     - repo: https://github.com/pre-commit/pre-commit-hooks
       rev: v4.5.0
@@ -33,7 +27,7 @@ repos:
           - id: trailing-whitespace
           - id: check-case-conflict
     - repo: https://github.com/pre-commit/mirrors-mypy
-      rev: v1.7.0
+      rev: v1.7.1
       hooks:
           - id: mypy
             args: [--no-strict-optional, --ignore-missing-imports]

diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@
 
 # ehrapy overview
 
-![fig_1](https://github.com/theislab/ehrapy/assets/99650244/257d29ae-79bc-4101-b1ea-612fb9f3ed9a)
+![fig1](https://github.com/theislab/ehrapy/assets/99650244/aad523a3-b4f9-4a7a-bb61-612af9a6874c)
 
 ## Features
 
@@ -36,3 +36,9 @@ Please have a look at the [Usage documentation][usage] and the [tutorials][tutor
 ```python
 import ehrapy as ep
 ```
+
+## Citation
+
+[ Exploratory electronic health record analysis with ehrapy
+Lukas Heumos, Philipp Ehmele, Tim Treis, Julius Upmeier zu Belzen, Altana Namsaraeva, Nastassya Horlava, Vladimir A. Shitov, Xinyue Zhang, Luke Zappia, Rainer Knoll, Niklas J. Lang, Leon Hetzel, Isaac Virshup, Lisa Sikkema, Eljas Roellin, Fabiola Curion, Roland Eils, Herbert B. Schiller, Anne Hilgendorff, Fabian J. Theis
+medRxiv 2023.12.11.23299816; doi: https://doi.org/10.1101/2023.12.11.23299816 ](https://www.medrxiv.org/content/10.1101/2023.12.11.23299816v1).
diff --git a/docs/_static/tutorials/fhir.jpg b/docs/_static/tutorials/fhir.jpg
diff --git a/docs/conf.py b/docs/conf.py
@@ -11,7 +11,7 @@
 
 # General information about the project.
 project = "ehrapy"
-copyright = "2021, Lukas Heumos, Theislab"
+copyright = "2021-2024, Lukas Heumos, Theislab"
 author = "Lukas Heumos"
 github_repo = "ehrapy"
 
@@ -181,4 +181,5 @@
     "tutorials/notebooks/medcat": "_static/tutorials/nlp.png",
     "tutorials/notebooks/ml_usecases": "_static/tutorials/machine_learning.png",
     "tutorials/notebooks/ontology_mapping": "_static/tutorials/ontology.png",
+    "tutorials/notebooks/fhir": "_static/tutorials/fhir.png",
 }
diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md
@@ -25,6 +25,7 @@ For questions about the usage of ehrapy use [Github Discussions].
    notebooks/medcat
    notebooks/ml_usecases
    notebooks/ontology_mapping
+   notebooks/fhir
 
 ```
 

diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks
diff --git a/docs/usage/usage.md b/docs/usage/usage.md
@@ -224,10 +224,9 @@ In contrast to a preprocessing function, a tool usually adds an easily interpret
     :nosignatures:
 
     tools.Translator
-    tools.MedCAT
-    tools.mc.run_unsupervised_training
-    tools.mc.annotate_text
-    tools.mc.get_annotation_overview
+    tools.annotate_text
+    tools.get_medcat_annotation_overview
+    tools.add_medcat_annotation_to_obs
 ```
 
 ### Survival Analysis

diff --git a/ehrapy/io/_read.py b/ehrapy/io/_read.py
@@ -362,11 +362,18 @@ def read_fhir(
     Uses https://github.com/dermatologist/fhiry to read the FHIR file into a Pandas DataFrame
     which is subsequently transformed into an AnnData object.
 
+    Be aware that FHIR data can be nested and return lists or dictionaries as values.
+    In such cases, one can either:
+    1. Transform the data into an awkward array and flatten it when needed.
+    2. Extract values from all lists and dictionaries to store single values in the fields.
+    3. Remove all lists and dictionaries. Only do this if the information is not relevant to you.
+
     Args:
         dataset_path: Path to one or multiple FHIR files.
         format: The file format of the FHIR data. One of 'json' or 'ndjson'. Defaults to 'json'.
         columns_obs_only: These columns will be added to obs only and not X.
-        columns_x_only: These columns will be added to X only and all remaining columns to obs. Note that datetime columns will always be added to .obs though.
+        columns_x_only: These columns will be added to X only and all remaining columns to obs.
+                        Note that datetime columns will always be added to .obs though.
         return_df: Whether to return one or several Pandas DataFrames.
         cache: Whether to write to cache when reading or not. Defaults to False.
         download_dataset_name: Name of the file or directory in case the dataset is downloaded
@@ -379,6 +386,12 @@ def read_fhir(
     Examples:
         >>> import ehrapy as ep
         >>> adata = ep.io.read_fhir("/path/to/fhir/resources")
+
+        Be aware that most FHIR datasets have nested data that might need to be removed.
+        In such cases consider working with DataFrames.
+        >>> df = ep.io.read_fhir("/path/to/fhir/resources", return_df=True)
+        >>> df.drop(columns=[col for col in df.columns if any(isinstance(x, (list, dict)) for x in df[col].dropna())], inplace=True)
+        >>> df.drop(columns=df.columns[df.isna().all()], inplace=True)
     """
     _check_columns_only_params(columns_obs_only, columns_x_only)
     file_path: Path = Path(dataset_path)
+4 −0		.gitignore
+5 −6		.pre-commit-config.yaml
+1,219 −0		fhir.ipynb
+395 −359		medcat.ipynb