Skip to content

Commit f309341

Browse files
Merge pull request #112 from theislab/enhance_plots
Tutorial for deconvoluted visium
2 parents 762cca3 + 45ffab2 commit f309341

File tree

11 files changed

+315
-45
lines changed

11 files changed

+315
-45
lines changed

.cookietemple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ full_name: Anna Schaar
1515
1616
project_name: ncem
1717
project_short_description: ncem. Learning cell communication from spatial graphs of cells.
18-
version: 0.1.1
18+
version: 0.1.4
1919
license: BSD-3-Clause

.github/release-drafter.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
name-template: "0.1.1 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
2-
tag-template: 0.1.1 # <<COOKIETEMPLE_FORCE_BUMP>>
1+
name-template: "0.1.4 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
2+
tag-template: 0.1.4 # <<COOKIETEMPLE_FORCE_BUMP>>
33
categories:
44
- title: "🚀 Features"
55
labels:

cookietemple.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 0.1.1
2+
current_version = 0.1.4
33

44
[bumpversion_files_whitelisted]
55
init_file = ncem/__init__.py

docs/conf.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@
4848
intersphinx_mapping = dict(
4949
anndata=("https://anndata.readthedocs.io/en/latest/", None),
5050
scanpy=("https://scanpy.readthedocs.io/en/latest/", None),
51-
numpy=("https://docs.scipy.org/doc/numpy/", None),
52-
pandas=("http://pandas.pydata.org/pandas-docs/stable/", None),
53-
python=("https://docs.python.org/3", None),
54-
scipy=("https://docs.scipy.org/doc/scipy/reference/", None),
51+
#numpy=("https://docs.scipy.org/doc/numpy/", None),
52+
#pandas=("http://pandas.pydata.org/pandas-docs/stable/", None),
53+
#python=("https://docs.python.org/3", None),
54+
#scipy=("https://docs.scipy.org/doc/scipy/reference/", None),
5555
)
5656

5757
# Add any paths that contain templates here, relative to this directory.
@@ -73,9 +73,9 @@
7373
# the built documents.
7474
#
7575
# The short X.Y version.
76-
version = "0.1.1"
76+
version = "0.1.4"
7777
# The full version, including alpha/beta/rc tags.
78-
release = "0.1.1"
78+
release = "0.1.4"
7979

8080
# The language for content autogenerated by Sphinx. Refer to documentation
8181
# for a list of supported languages.

docs/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
ncem>=0.1.2
12
Sphinx>=4.0.1
23
sphinx_rtd_theme>=0.5.2
34
sphinx-rtd-dark-mode>=1.2.1
45
sphinx-automodapi>=0.13
56
sphinx_click>=3.0.0
6-
click>=8.0.1
7+
click>=7.1.2

ncem/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@
88
__maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
99
__author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
1010
__email__ = ", ".join(["[email protected]", "[email protected]"])
11-
__version__ = "0.1.1"
11+
__version__ = "0.1.4"

ncem/api/train/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Initializes a train object in api."""
2+
import numpy as np
23
from ncem.estimators import (Estimator, EstimatorCVAE, EstimatorCVAEncem,
34
EstimatorED, EstimatorEDncem, EstimatorEdNcemNeighborhood, EstimatorGraph,
45
EstimatorInteractions, EstimatorLinear,

ncem/data.py

Lines changed: 265 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,53 @@
2121
from tqdm import tqdm
2222

2323

24+
def get_data_custom(interpreter, deconvolution: bool = False, n_eval_nodes_per_graph: int=10):
25+
interpreter.undefined_node_types = None
26+
interpreter.img_to_patient_dict = interpreter.data.celldata.uns["img_to_patient_dict"]
27+
interpreter.complete_img_keys = list(interpreter.data.img_celldata.keys())
28+
29+
interpreter.a = {k: adata.obsp["adjacency_matrix_connectivities"] for k, adata in interpreter.data.img_celldata.items()}
30+
interpreter.h_0 = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()}
31+
interpreter.h_1 = {k: adata.X for k, adata in interpreter.data.img_celldata.items()}
32+
interpreter.node_types = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()}
33+
interpreter.node_type_names = interpreter.data.celldata.uns["node_type_names"]
34+
interpreter.n_features_type = list(interpreter.node_types.values())[0].shape[1]
35+
interpreter.n_features_standard = interpreter.data.celldata.shape[1]
36+
interpreter.node_feature_names = list(interpreter.data.celldata.var_names)
37+
interpreter.size_factors = interpreter.data.size_factors()
38+
39+
# Add covariates:
40+
# Add graph-level covariate information
41+
interpreter.graph_covar_names = interpreter.data.celldata.uns["graph_covariates"]["label_names"]
42+
43+
interpreter.graph_covar = {k: np.array([], ndmin=1) for k, adata in interpreter.data.img_celldata.items()}
44+
# Add node-level conditional information
45+
interpreter.node_covar = {k: np.empty((adata.shape[0], 0)) for k, adata in interpreter.data.img_celldata.items()}
46+
47+
# Set selection-specific tensor dimensions:
48+
interpreter.n_features_0 = list(interpreter.h_0.values())[0].shape[1]
49+
interpreter.n_features_1 = list(interpreter.h_1.values())[0].shape[1]
50+
interpreter.n_graph_covariates = list(interpreter.graph_covar.values())[0].shape[0]
51+
interpreter.n_node_covariates = list(interpreter.node_covar.values())[0].shape[1]
52+
interpreter.max_nodes = max([interpreter.a[i].shape[0] for i in interpreter.complete_img_keys])
53+
54+
interpreter.domains = {key: i for i, key in enumerate(interpreter.complete_img_keys)}
55+
interpreter.n_domains = len(np.unique(list(interpreter.domains.values())))
56+
57+
# Report summary statistics of loaded graph:
58+
print(
59+
"Mean of mean node degree per images across images: %f"
60+
% np.mean([np.mean(v.sum(axis=1)) for k, v in interpreter.a.items()])
61+
)
62+
63+
# splitting data into test and validation sets, can be ignored for non sender-receiver focused analysis
64+
interpreter.split_data_node(0.1, 0.1)
65+
interpreter.n_eval_nodes_per_graph = n_eval_nodes_per_graph
66+
interpreter.cell_names = list(interpreter.data.celldata.uns['node_type_names'].values())
67+
if deconvolution:
68+
interpreter.proportions = {k: adata.obsm["proportions"] for k, adata in interpreter.data.img_celldata.items()}
69+
70+
2471
class GraphTools:
2572
"""GraphTools class."""
2673

@@ -756,7 +803,7 @@ def compute_cluster_enrichment(
756803
adata, adata_substates, log_pval, fold_change
757804
"""
758805
titles = list(self.celldata.uns["node_type_names"].values())
759-
sorce_type_names = [f"source type {x.replace('_', ' ')}" for x in titles]
806+
sorce_type_names = [f"source type {x}" for x in titles]
760807

761808
pbar_total = len(self.img_celldata.keys()) + len(self.img_celldata.keys()) + len(titles)
762809
with tqdm(total=pbar_total) as pbar:
@@ -1761,6 +1808,223 @@ def size_factors(self):
17611808
@property
17621809
def var_names(self):
17631810
return self.celldata.var_names
1811+
1812+
1813+
class customLoader(DataLoader):
1814+
1815+
def __init__(
1816+
self,
1817+
adata,
1818+
cluster,
1819+
patient,
1820+
library_id,
1821+
radius,
1822+
coord_type='generic',
1823+
n_rings=1,
1824+
n_top_genes=None,
1825+
label_selection=None
1826+
):
1827+
self.adata = adata.copy()
1828+
self.cluster = cluster
1829+
self.patient = patient
1830+
self.library_id = library_id
1831+
1832+
print("Loading data from raw files")
1833+
self.register_celldata(n_top_genes=n_top_genes)
1834+
self.register_img_celldata()
1835+
self.register_graph_features(label_selection=label_selection)
1836+
self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings)
1837+
self.radius = radius
1838+
1839+
print(
1840+
"Loaded %i images with complete data from %i patients "
1841+
"over %i cells with %i cell features and %i distinct celltypes."
1842+
% (
1843+
len(self.img_celldata),
1844+
len(self.patients),
1845+
self.celldata.shape[0],
1846+
self.celldata.shape[1],
1847+
len(self.celldata.uns["node_type_names"]),
1848+
)
1849+
)
1850+
1851+
def _register_celldata(self, n_top_genes):
1852+
1853+
metadata = {
1854+
"cluster_col_preprocessed": self.cluster,
1855+
"image_col": self.library_id
1856+
}
1857+
1858+
celldata = self.adata.copy()
1859+
celldata.X = celldata.X.toarray()
1860+
celldata.uns["metadata"] = metadata
1861+
del celldata.uns['spatial']
1862+
1863+
# register node type names
1864+
node_type_names = list(np.unique(celldata.obs[self.cluster]))
1865+
celldata.uns["node_type_names"] = {x: x for x in node_type_names}
1866+
node_types = np.zeros((celldata.shape[0], len(node_type_names)))
1867+
node_type_idx = np.array(
1868+
[
1869+
node_type_names.index(x) for x in celldata.obs[self.cluster].values
1870+
] # index in encoding vector
1871+
)
1872+
node_types[np.arange(0, node_type_idx.shape[0]), node_type_idx] = 1
1873+
celldata.obsm["node_types"] = node_types
1874+
1875+
if self.patient:
1876+
img_to_patient_dict = {}
1877+
for p in np.unique(celldata.obs[self.patient]):
1878+
for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]):
1879+
img_to_patient_dict[i] = p
1880+
else:
1881+
img_to_patient_dict = {"image": "patient"}
1882+
celldata.uns["img_to_patient_dict"] = img_to_patient_dict
1883+
self.img_to_patient_dict = img_to_patient_dict
1884+
1885+
self.celldata = celldata
1886+
1887+
def _register_img_celldata(self):
1888+
"""Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}."""
1889+
img_celldata = {}
1890+
if self.library_id:
1891+
for k in np.unique(self.celldata.obs[self.library_id]):
1892+
img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy()
1893+
self.img_celldata = img_celldata
1894+
else:
1895+
self.img_celldata = {"image": self.celldata}
1896+
1897+
def _register_graph_features(self, label_selection):
1898+
"""Load graph level covariates.
1899+
1900+
Parameters
1901+
----------
1902+
label_selection
1903+
Label selection.
1904+
"""
1905+
# Save processed data to attributes.
1906+
for adata in self.img_celldata.values():
1907+
graph_covariates = {
1908+
"label_names": {},
1909+
"label_tensors": {},
1910+
"label_selection": [],
1911+
"continuous_mean": {},
1912+
"continuous_std": {},
1913+
"label_data_types": {},
1914+
}
1915+
adata.uns["graph_covariates"] = graph_covariates
1916+
1917+
graph_covariates = {
1918+
"label_names": {},
1919+
"label_selection": [],
1920+
"continuous_mean": {},
1921+
"continuous_std": {},
1922+
"label_data_types": {},
1923+
}
1924+
self.celldata.uns["graph_covariates"] = graph_covariates
1925+
1926+
1927+
class customLoaderDeconvolution(DataLoader):
1928+
1929+
def __init__(
1930+
self,
1931+
adata,
1932+
patient,
1933+
library_id,
1934+
radius,
1935+
coord_type='generic',
1936+
n_rings=1,
1937+
n_top_genes=None,
1938+
label_selection=None
1939+
):
1940+
self.adata = adata.copy()
1941+
self.patient = patient
1942+
self.library_id = library_id
1943+
1944+
print("Loading data from raw files")
1945+
self.register_celldata(n_top_genes=n_top_genes)
1946+
self.register_img_celldata()
1947+
self.register_graph_features(label_selection=label_selection)
1948+
self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings)
1949+
self.radius = radius
1950+
1951+
print(
1952+
"Loaded %i images with complete data from %i patients "
1953+
"over %i cells with %i cell features and %i distinct celltypes."
1954+
% (
1955+
len(self.img_celldata),
1956+
len(self.patients),
1957+
self.celldata.shape[0],
1958+
self.celldata.shape[1],
1959+
len(self.celldata.uns["node_type_names"]),
1960+
)
1961+
)
1962+
1963+
def _register_celldata(self, n_top_genes):
1964+
1965+
metadata = {
1966+
#"cluster_col_preprocessed": self.cluster,
1967+
"image_col": self.library_id
1968+
}
1969+
1970+
celldata = self.adata.copy()
1971+
celldata.uns["metadata"] = metadata
1972+
1973+
if self.patient:
1974+
img_to_patient_dict = {}
1975+
for p in np.unique(celldata.obs[self.patient]):
1976+
for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]):
1977+
img_to_patient_dict[i] = p
1978+
else:
1979+
if self.library_id:
1980+
img_to_patient_dict = {}
1981+
for img in np.unique(celldata.obs[self.library_id]):
1982+
img_to_patient_dict[str(img)] = "patient"
1983+
else:
1984+
img_to_patient_dict = {"image": "patient"}
1985+
celldata.uns["img_to_patient_dict"] = img_to_patient_dict
1986+
self.img_to_patient_dict = img_to_patient_dict
1987+
1988+
self.celldata = celldata
1989+
1990+
def _register_img_celldata(self):
1991+
"""Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}."""
1992+
img_celldata = {}
1993+
if self.library_id:
1994+
for k in np.unique(self.celldata.obs[self.library_id]):
1995+
img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy()
1996+
self.img_celldata = img_celldata
1997+
else:
1998+
self.img_celldata = {"image": self.celldata}
1999+
2000+
def _register_graph_features(self, label_selection):
2001+
"""Load graph level covariates.
2002+
2003+
Parameters
2004+
----------
2005+
label_selection
2006+
Label selection.
2007+
"""
2008+
# Save processed data to attributes.
2009+
for adata in self.img_celldata.values():
2010+
graph_covariates = {
2011+
"label_names": {},
2012+
"label_tensors": {},
2013+
"label_selection": [],
2014+
"continuous_mean": {},
2015+
"continuous_std": {},
2016+
"label_data_types": {},
2017+
}
2018+
adata.uns["graph_covariates"] = graph_covariates
2019+
2020+
graph_covariates = {
2021+
"label_names": {},
2022+
"label_selection": [],
2023+
"continuous_mean": {},
2024+
"continuous_std": {},
2025+
"label_data_types": {},
2026+
}
2027+
self.celldata.uns["graph_covariates"] = graph_covariates
17642028

17652029

17662030
class DataLoaderZhang(DataLoader):

0 commit comments

Comments
 (0)