Merge pull request #112 from theislab/enhance_plots

AnnaChristina · web-flow · commit f309341001ec · 2022-08-08T15:25:08.000+02:00
Tutorial for deconvoluted visium
diff --git a/.cookietemple.yml b/.cookietemple.yml
@@ -15,5 +15,5 @@ full_name: Anna Schaar
 email: anna.schaar@helmholtz-muenchen.de
 project_name: ncem
 project_short_description: ncem.  Learning cell communication from spatial graphs of cells.
-version: 0.1.1
+version: 0.1.4
 license: BSD-3-Clause
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -1,5 +1,5 @@
-name-template: "0.1.1 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.1.1 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.1.4 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.1.4 # <<COOKIETEMPLE_FORCE_BUMP>>
 categories:
     - title: "🚀 Features"
       labels:
diff --git a/cookietemple.cfg b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.1
+current_version = 0.1.4
 
 [bumpversion_files_whitelisted]
 init_file = ncem/__init__.py
diff --git a/docs/conf.py b/docs/conf.py
@@ -48,10 +48,10 @@
 intersphinx_mapping = dict(
     anndata=("https://anndata.readthedocs.io/en/latest/", None),
     scanpy=("https://scanpy.readthedocs.io/en/latest/", None),
-    numpy=("https://docs.scipy.org/doc/numpy/", None),
-    pandas=("http://pandas.pydata.org/pandas-docs/stable/", None),
-    python=("https://docs.python.org/3", None),
-    scipy=("https://docs.scipy.org/doc/scipy/reference/", None),
+    #numpy=("https://docs.scipy.org/doc/numpy/", None),
+    #pandas=("http://pandas.pydata.org/pandas-docs/stable/", None),
+    #python=("https://docs.python.org/3", None),
+    #scipy=("https://docs.scipy.org/doc/scipy/reference/", None),
 )
 
 # Add any paths that contain templates here, relative to this directory.
@@ -73,9 +73,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = "0.1.1"
+version = "0.1.4"
 # The full version, including alpha/beta/rc tags.
-release = "0.1.1"
+release = "0.1.4"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,6 +1,7 @@
+ncem>=0.1.2
 Sphinx>=4.0.1
 sphinx_rtd_theme>=0.5.2
 sphinx-rtd-dark-mode>=1.2.1
 sphinx-automodapi>=0.13
 sphinx_click>=3.0.0
-click>=8.0.1
+click>=7.1.2
diff --git a/ncem/__init__.py b/ncem/__init__.py
@@ -8,4 +8,4 @@
 __maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __email__ = ", ".join(["anna.schaar@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de"])
-__version__ = "0.1.1"
+__version__ = "0.1.4"
diff --git a/ncem/api/train/__init__.py b/ncem/api/train/__init__.py
@@ -1,4 +1,5 @@
 """Initializes a train object in api."""
+import numpy as np
 from ncem.estimators import (Estimator, EstimatorCVAE, EstimatorCVAEncem,
                              EstimatorED, EstimatorEDncem, EstimatorEdNcemNeighborhood, EstimatorGraph,
                              EstimatorInteractions, EstimatorLinear,
diff --git a/ncem/data.py b/ncem/data.py
@@ -21,6 +21,53 @@
 from tqdm import tqdm
 
 
+def get_data_custom(interpreter, deconvolution: bool = False, n_eval_nodes_per_graph: int=10):
+    interpreter.undefined_node_types = None
+    interpreter.img_to_patient_dict = interpreter.data.celldata.uns["img_to_patient_dict"]
+    interpreter.complete_img_keys = list(interpreter.data.img_celldata.keys())
+
+    interpreter.a = {k: adata.obsp["adjacency_matrix_connectivities"] for k, adata in interpreter.data.img_celldata.items()}
+    interpreter.h_0 = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()}
+    interpreter.h_1 = {k: adata.X for k, adata in interpreter.data.img_celldata.items()}
+    interpreter.node_types = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()}
+    interpreter.node_type_names = interpreter.data.celldata.uns["node_type_names"]
+    interpreter.n_features_type = list(interpreter.node_types.values())[0].shape[1]
+    interpreter.n_features_standard = interpreter.data.celldata.shape[1]
+    interpreter.node_feature_names = list(interpreter.data.celldata.var_names)
+    interpreter.size_factors = interpreter.data.size_factors()
+
+    # Add covariates:
+    # Add graph-level covariate information
+    interpreter.graph_covar_names = interpreter.data.celldata.uns["graph_covariates"]["label_names"]
+
+    interpreter.graph_covar = {k: np.array([], ndmin=1) for k, adata in interpreter.data.img_celldata.items()}
+    # Add node-level conditional information
+    interpreter.node_covar = {k: np.empty((adata.shape[0], 0)) for k, adata in interpreter.data.img_celldata.items()}
+
+    # Set selection-specific tensor dimensions:
+    interpreter.n_features_0 = list(interpreter.h_0.values())[0].shape[1]
+    interpreter.n_features_1 = list(interpreter.h_1.values())[0].shape[1]
+    interpreter.n_graph_covariates = list(interpreter.graph_covar.values())[0].shape[0]
+    interpreter.n_node_covariates = list(interpreter.node_covar.values())[0].shape[1]
+    interpreter.max_nodes = max([interpreter.a[i].shape[0] for i in interpreter.complete_img_keys])
+
+    interpreter.domains = {key: i for i, key in enumerate(interpreter.complete_img_keys)}
+    interpreter.n_domains = len(np.unique(list(interpreter.domains.values())))
+
+    # Report summary statistics of loaded graph:
+    print(
+        "Mean of mean node degree per images across images: %f"
+        % np.mean([np.mean(v.sum(axis=1)) for k, v in interpreter.a.items()])
+    )
+    
+    # splitting data into test and validation sets, can be ignored for non sender-receiver focused analysis
+    interpreter.split_data_node(0.1, 0.1)
+    interpreter.n_eval_nodes_per_graph = n_eval_nodes_per_graph
+    interpreter.cell_names = list(interpreter.data.celldata.uns['node_type_names'].values())
+    if deconvolution:
+        interpreter.proportions = {k: adata.obsm["proportions"] for k, adata in interpreter.data.img_celldata.items()}
+    
+    
 class GraphTools:
     """GraphTools class."""
 
@@ -756,7 +803,7 @@ def compute_cluster_enrichment(
         adata, adata_substates, log_pval, fold_change
         """
         titles = list(self.celldata.uns["node_type_names"].values())
-        sorce_type_names = [f"source type {x.replace('_', ' ')}" for x in titles]
+        sorce_type_names = [f"source type {x}" for x in titles]
 
         pbar_total = len(self.img_celldata.keys()) + len(self.img_celldata.keys()) + len(titles)
         with tqdm(total=pbar_total) as pbar:
@@ -1761,6 +1808,223 @@ def size_factors(self):
     @property
     def var_names(self):
         return self.celldata.var_names
+    
+
+class customLoader(DataLoader):
+    
+    def __init__(
+        self,
+        adata, 
+        cluster, 
+        patient, 
+        library_id,
+        radius,
+        coord_type='generic',
+        n_rings=1,
+        n_top_genes=None,
+        label_selection=None
+    ):
+        self.adata = adata.copy()
+        self.cluster = cluster
+        self.patient = patient
+        self.library_id = library_id
+
+        print("Loading data from raw files")
+        self.register_celldata(n_top_genes=n_top_genes)
+        self.register_img_celldata()
+        self.register_graph_features(label_selection=label_selection)
+        self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings)
+        self.radius = radius
+
+        print(
+            "Loaded %i images with complete data from %i patients "
+            "over %i cells with %i cell features and %i distinct celltypes."
+            % (
+                len(self.img_celldata),
+                len(self.patients),
+                self.celldata.shape[0],
+                self.celldata.shape[1],
+                len(self.celldata.uns["node_type_names"]),
+            )
+        )
+    
+    def _register_celldata(self, n_top_genes):
+        
+        metadata = {
+            "cluster_col_preprocessed": self.cluster,
+            "image_col": self.library_id
+        }
+        
+        celldata = self.adata.copy()
+        celldata.X = celldata.X.toarray()
+        celldata.uns["metadata"] = metadata
+        del celldata.uns['spatial']
+
+        # register node type names
+        node_type_names = list(np.unique(celldata.obs[self.cluster]))
+        celldata.uns["node_type_names"] = {x: x for x in node_type_names}
+        node_types = np.zeros((celldata.shape[0], len(node_type_names)))
+        node_type_idx = np.array(
+            [
+                node_type_names.index(x) for x in celldata.obs[self.cluster].values
+            ]  # index in encoding vector
+        )
+        node_types[np.arange(0, node_type_idx.shape[0]), node_type_idx] = 1
+        celldata.obsm["node_types"] = node_types
+
+        if self.patient:
+            img_to_patient_dict = {}
+            for p in np.unique(celldata.obs[self.patient]):
+                for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]):
+                    img_to_patient_dict[i] = p
+        else:
+            img_to_patient_dict = {"image": "patient"}
+        celldata.uns["img_to_patient_dict"] = img_to_patient_dict
+        self.img_to_patient_dict = img_to_patient_dict
+
+        self.celldata = celldata
+        
+    def _register_img_celldata(self):
+        """Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}."""
+        img_celldata = {}
+        if self.library_id:
+            for k in np.unique(self.celldata.obs[self.library_id]):
+                img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy()
+            self.img_celldata = img_celldata
+        else:
+            self.img_celldata = {"image": self.celldata}
+
+    def _register_graph_features(self, label_selection):
+        """Load graph level covariates.
+
+        Parameters
+        ----------
+        label_selection
+            Label selection.
+        """
+        # Save processed data to attributes.
+        for adata in self.img_celldata.values():
+            graph_covariates = {
+                "label_names": {},
+                "label_tensors": {},
+                "label_selection": [],
+                "continuous_mean": {},
+                "continuous_std": {},
+                "label_data_types": {},
+            }
+            adata.uns["graph_covariates"] = graph_covariates
+
+        graph_covariates = {
+            "label_names": {},
+            "label_selection": [],
+            "continuous_mean": {},
+            "continuous_std": {},
+            "label_data_types": {},
+        }
+        self.celldata.uns["graph_covariates"] = graph_covariates
+
+        
+class customLoaderDeconvolution(DataLoader):
+    
+    def __init__(
+        self,
+        adata, 
+        patient, 
+        library_id,
+        radius,
+        coord_type='generic',
+        n_rings=1,
+        n_top_genes=None,
+        label_selection=None
+    ):
+        self.adata = adata.copy()
+        self.patient = patient
+        self.library_id = library_id
+
+        print("Loading data from raw files")
+        self.register_celldata(n_top_genes=n_top_genes)
+        self.register_img_celldata()
+        self.register_graph_features(label_selection=label_selection)
+        self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings)
+        self.radius = radius
+
+        print(
+            "Loaded %i images with complete data from %i patients "
+            "over %i cells with %i cell features and %i distinct celltypes."
+            % (
+                len(self.img_celldata),
+                len(self.patients),
+                self.celldata.shape[0],
+                self.celldata.shape[1],
+                len(self.celldata.uns["node_type_names"]),
+            )
+        )
+    
+    def _register_celldata(self, n_top_genes):
+        
+        metadata = {
+            #"cluster_col_preprocessed": self.cluster,
+            "image_col": self.library_id
+        }
+        
+        celldata = self.adata.copy()
+        celldata.uns["metadata"] = metadata
+
+        if self.patient:
+            img_to_patient_dict = {}
+            for p in np.unique(celldata.obs[self.patient]):
+                for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]):
+                    img_to_patient_dict[i] = p
+        else:
+            if self.library_id:
+                img_to_patient_dict = {}
+                for img in np.unique(celldata.obs[self.library_id]):
+                    img_to_patient_dict[str(img)] = "patient"
+            else:
+                img_to_patient_dict = {"image": "patient"}
+        celldata.uns["img_to_patient_dict"] = img_to_patient_dict
+        self.img_to_patient_dict = img_to_patient_dict
+
+        self.celldata = celldata
+        
+    def _register_img_celldata(self):
+        """Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}."""
+        img_celldata = {}
+        if self.library_id:
+            for k in np.unique(self.celldata.obs[self.library_id]):
+                img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy()
+            self.img_celldata = img_celldata
+        else:
+            self.img_celldata = {"image": self.celldata}
+
+    def _register_graph_features(self, label_selection):
+        """Load graph level covariates.
+
+        Parameters
+        ----------
+        label_selection
+            Label selection.
+        """
+        # Save processed data to attributes.
+        for adata in self.img_celldata.values():
+            graph_covariates = {
+                "label_names": {},
+                "label_tensors": {},
+                "label_selection": [],
+                "continuous_mean": {},
+                "continuous_std": {},
+                "label_data_types": {},
+            }
+            adata.uns["graph_covariates"] = graph_covariates
+
+        graph_covariates = {
+            "label_names": {},
+            "label_selection": [],
+            "continuous_mean": {},
+            "continuous_std": {},
+            "label_data_types": {},
+        }
+        self.celldata.uns["graph_covariates"] = graph_covariates
 
 
 class DataLoaderZhang(DataLoader):
diff --git a/ncem/interpretation/interpreter.py b/ncem/interpretation/interpreter.py
diff --git a/ncem/utils/wald_test.py b/ncem/utils/wald_test.py
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""Initializes a train object in api."""`
	`2`	`+import numpy as np`
`2`	`3`	`from ncem.estimators import (Estimator, EstimatorCVAE, EstimatorCVAEncem,`
`3`	`4`	`EstimatorED, EstimatorEDncem, EstimatorEdNcemNeighborhood, EstimatorGraph,`
`4`	`5`	`EstimatorInteractions, EstimatorLinear,`