|
21 | 21 | from tqdm import tqdm
|
22 | 22 |
|
23 | 23 |
|
| 24 | +def get_data_custom(interpreter, deconvolution: bool = False, n_eval_nodes_per_graph: int=10): |
| 25 | + interpreter.undefined_node_types = None |
| 26 | + interpreter.img_to_patient_dict = interpreter.data.celldata.uns["img_to_patient_dict"] |
| 27 | + interpreter.complete_img_keys = list(interpreter.data.img_celldata.keys()) |
| 28 | + |
| 29 | + interpreter.a = {k: adata.obsp["adjacency_matrix_connectivities"] for k, adata in interpreter.data.img_celldata.items()} |
| 30 | + interpreter.h_0 = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()} |
| 31 | + interpreter.h_1 = {k: adata.X for k, adata in interpreter.data.img_celldata.items()} |
| 32 | + interpreter.node_types = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()} |
| 33 | + interpreter.node_type_names = interpreter.data.celldata.uns["node_type_names"] |
| 34 | + interpreter.n_features_type = list(interpreter.node_types.values())[0].shape[1] |
| 35 | + interpreter.n_features_standard = interpreter.data.celldata.shape[1] |
| 36 | + interpreter.node_feature_names = list(interpreter.data.celldata.var_names) |
| 37 | + interpreter.size_factors = interpreter.data.size_factors() |
| 38 | + |
| 39 | + # Add covariates: |
| 40 | + # Add graph-level covariate information |
| 41 | + interpreter.graph_covar_names = interpreter.data.celldata.uns["graph_covariates"]["label_names"] |
| 42 | + |
| 43 | + interpreter.graph_covar = {k: np.array([], ndmin=1) for k, adata in interpreter.data.img_celldata.items()} |
| 44 | + # Add node-level conditional information |
| 45 | + interpreter.node_covar = {k: np.empty((adata.shape[0], 0)) for k, adata in interpreter.data.img_celldata.items()} |
| 46 | + |
| 47 | + # Set selection-specific tensor dimensions: |
| 48 | + interpreter.n_features_0 = list(interpreter.h_0.values())[0].shape[1] |
| 49 | + interpreter.n_features_1 = list(interpreter.h_1.values())[0].shape[1] |
| 50 | + interpreter.n_graph_covariates = list(interpreter.graph_covar.values())[0].shape[0] |
| 51 | + interpreter.n_node_covariates = list(interpreter.node_covar.values())[0].shape[1] |
| 52 | + interpreter.max_nodes = max([interpreter.a[i].shape[0] for i in interpreter.complete_img_keys]) |
| 53 | + |
| 54 | + interpreter.domains = {key: i for i, key in enumerate(interpreter.complete_img_keys)} |
| 55 | + interpreter.n_domains = len(np.unique(list(interpreter.domains.values()))) |
| 56 | + |
| 57 | + # Report summary statistics of loaded graph: |
| 58 | + print( |
| 59 | + "Mean of mean node degree per images across images: %f" |
| 60 | + % np.mean([np.mean(v.sum(axis=1)) for k, v in interpreter.a.items()]) |
| 61 | + ) |
| 62 | + |
| 63 | + # splitting data into test and validation sets, can be ignored for non sender-receiver focused analysis |
| 64 | + interpreter.split_data_node(0.1, 0.1) |
| 65 | + interpreter.n_eval_nodes_per_graph = n_eval_nodes_per_graph |
| 66 | + interpreter.cell_names = list(interpreter.data.celldata.uns['node_type_names'].values()) |
| 67 | + if deconvolution: |
| 68 | + interpreter.proportions = {k: adata.obsm["proportions"] for k, adata in interpreter.data.img_celldata.items()} |
| 69 | + |
| 70 | + |
24 | 71 | class GraphTools:
|
25 | 72 | """GraphTools class."""
|
26 | 73 |
|
@@ -756,7 +803,7 @@ def compute_cluster_enrichment(
|
756 | 803 | adata, adata_substates, log_pval, fold_change
|
757 | 804 | """
|
758 | 805 | titles = list(self.celldata.uns["node_type_names"].values())
|
759 |
| - sorce_type_names = [f"source type {x.replace('_', ' ')}" for x in titles] |
| 806 | + sorce_type_names = [f"source type {x}" for x in titles] |
760 | 807 |
|
761 | 808 | pbar_total = len(self.img_celldata.keys()) + len(self.img_celldata.keys()) + len(titles)
|
762 | 809 | with tqdm(total=pbar_total) as pbar:
|
@@ -1761,6 +1808,223 @@ def size_factors(self):
|
1761 | 1808 | @property
|
1762 | 1809 | def var_names(self):
|
1763 | 1810 | return self.celldata.var_names
|
| 1811 | + |
| 1812 | + |
| 1813 | +class customLoader(DataLoader): |
| 1814 | + |
| 1815 | + def __init__( |
| 1816 | + self, |
| 1817 | + adata, |
| 1818 | + cluster, |
| 1819 | + patient, |
| 1820 | + library_id, |
| 1821 | + radius, |
| 1822 | + coord_type='generic', |
| 1823 | + n_rings=1, |
| 1824 | + n_top_genes=None, |
| 1825 | + label_selection=None |
| 1826 | + ): |
| 1827 | + self.adata = adata.copy() |
| 1828 | + self.cluster = cluster |
| 1829 | + self.patient = patient |
| 1830 | + self.library_id = library_id |
| 1831 | + |
| 1832 | + print("Loading data from raw files") |
| 1833 | + self.register_celldata(n_top_genes=n_top_genes) |
| 1834 | + self.register_img_celldata() |
| 1835 | + self.register_graph_features(label_selection=label_selection) |
| 1836 | + self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings) |
| 1837 | + self.radius = radius |
| 1838 | + |
| 1839 | + print( |
| 1840 | + "Loaded %i images with complete data from %i patients " |
| 1841 | + "over %i cells with %i cell features and %i distinct celltypes." |
| 1842 | + % ( |
| 1843 | + len(self.img_celldata), |
| 1844 | + len(self.patients), |
| 1845 | + self.celldata.shape[0], |
| 1846 | + self.celldata.shape[1], |
| 1847 | + len(self.celldata.uns["node_type_names"]), |
| 1848 | + ) |
| 1849 | + ) |
| 1850 | + |
| 1851 | + def _register_celldata(self, n_top_genes): |
| 1852 | + |
| 1853 | + metadata = { |
| 1854 | + "cluster_col_preprocessed": self.cluster, |
| 1855 | + "image_col": self.library_id |
| 1856 | + } |
| 1857 | + |
| 1858 | + celldata = self.adata.copy() |
| 1859 | + celldata.X = celldata.X.toarray() |
| 1860 | + celldata.uns["metadata"] = metadata |
| 1861 | + del celldata.uns['spatial'] |
| 1862 | + |
| 1863 | + # register node type names |
| 1864 | + node_type_names = list(np.unique(celldata.obs[self.cluster])) |
| 1865 | + celldata.uns["node_type_names"] = {x: x for x in node_type_names} |
| 1866 | + node_types = np.zeros((celldata.shape[0], len(node_type_names))) |
| 1867 | + node_type_idx = np.array( |
| 1868 | + [ |
| 1869 | + node_type_names.index(x) for x in celldata.obs[self.cluster].values |
| 1870 | + ] # index in encoding vector |
| 1871 | + ) |
| 1872 | + node_types[np.arange(0, node_type_idx.shape[0]), node_type_idx] = 1 |
| 1873 | + celldata.obsm["node_types"] = node_types |
| 1874 | + |
| 1875 | + if self.patient: |
| 1876 | + img_to_patient_dict = {} |
| 1877 | + for p in np.unique(celldata.obs[self.patient]): |
| 1878 | + for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]): |
| 1879 | + img_to_patient_dict[i] = p |
| 1880 | + else: |
| 1881 | + img_to_patient_dict = {"image": "patient"} |
| 1882 | + celldata.uns["img_to_patient_dict"] = img_to_patient_dict |
| 1883 | + self.img_to_patient_dict = img_to_patient_dict |
| 1884 | + |
| 1885 | + self.celldata = celldata |
| 1886 | + |
| 1887 | + def _register_img_celldata(self): |
| 1888 | + """Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}.""" |
| 1889 | + img_celldata = {} |
| 1890 | + if self.library_id: |
| 1891 | + for k in np.unique(self.celldata.obs[self.library_id]): |
| 1892 | + img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy() |
| 1893 | + self.img_celldata = img_celldata |
| 1894 | + else: |
| 1895 | + self.img_celldata = {"image": self.celldata} |
| 1896 | + |
| 1897 | + def _register_graph_features(self, label_selection): |
| 1898 | + """Load graph level covariates. |
| 1899 | +
|
| 1900 | + Parameters |
| 1901 | + ---------- |
| 1902 | + label_selection |
| 1903 | + Label selection. |
| 1904 | + """ |
| 1905 | + # Save processed data to attributes. |
| 1906 | + for adata in self.img_celldata.values(): |
| 1907 | + graph_covariates = { |
| 1908 | + "label_names": {}, |
| 1909 | + "label_tensors": {}, |
| 1910 | + "label_selection": [], |
| 1911 | + "continuous_mean": {}, |
| 1912 | + "continuous_std": {}, |
| 1913 | + "label_data_types": {}, |
| 1914 | + } |
| 1915 | + adata.uns["graph_covariates"] = graph_covariates |
| 1916 | + |
| 1917 | + graph_covariates = { |
| 1918 | + "label_names": {}, |
| 1919 | + "label_selection": [], |
| 1920 | + "continuous_mean": {}, |
| 1921 | + "continuous_std": {}, |
| 1922 | + "label_data_types": {}, |
| 1923 | + } |
| 1924 | + self.celldata.uns["graph_covariates"] = graph_covariates |
| 1925 | + |
| 1926 | + |
| 1927 | +class customLoaderDeconvolution(DataLoader): |
| 1928 | + |
| 1929 | + def __init__( |
| 1930 | + self, |
| 1931 | + adata, |
| 1932 | + patient, |
| 1933 | + library_id, |
| 1934 | + radius, |
| 1935 | + coord_type='generic', |
| 1936 | + n_rings=1, |
| 1937 | + n_top_genes=None, |
| 1938 | + label_selection=None |
| 1939 | + ): |
| 1940 | + self.adata = adata.copy() |
| 1941 | + self.patient = patient |
| 1942 | + self.library_id = library_id |
| 1943 | + |
| 1944 | + print("Loading data from raw files") |
| 1945 | + self.register_celldata(n_top_genes=n_top_genes) |
| 1946 | + self.register_img_celldata() |
| 1947 | + self.register_graph_features(label_selection=label_selection) |
| 1948 | + self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings) |
| 1949 | + self.radius = radius |
| 1950 | + |
| 1951 | + print( |
| 1952 | + "Loaded %i images with complete data from %i patients " |
| 1953 | + "over %i cells with %i cell features and %i distinct celltypes." |
| 1954 | + % ( |
| 1955 | + len(self.img_celldata), |
| 1956 | + len(self.patients), |
| 1957 | + self.celldata.shape[0], |
| 1958 | + self.celldata.shape[1], |
| 1959 | + len(self.celldata.uns["node_type_names"]), |
| 1960 | + ) |
| 1961 | + ) |
| 1962 | + |
| 1963 | + def _register_celldata(self, n_top_genes): |
| 1964 | + |
| 1965 | + metadata = { |
| 1966 | + #"cluster_col_preprocessed": self.cluster, |
| 1967 | + "image_col": self.library_id |
| 1968 | + } |
| 1969 | + |
| 1970 | + celldata = self.adata.copy() |
| 1971 | + celldata.uns["metadata"] = metadata |
| 1972 | + |
| 1973 | + if self.patient: |
| 1974 | + img_to_patient_dict = {} |
| 1975 | + for p in np.unique(celldata.obs[self.patient]): |
| 1976 | + for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]): |
| 1977 | + img_to_patient_dict[i] = p |
| 1978 | + else: |
| 1979 | + if self.library_id: |
| 1980 | + img_to_patient_dict = {} |
| 1981 | + for img in np.unique(celldata.obs[self.library_id]): |
| 1982 | + img_to_patient_dict[str(img)] = "patient" |
| 1983 | + else: |
| 1984 | + img_to_patient_dict = {"image": "patient"} |
| 1985 | + celldata.uns["img_to_patient_dict"] = img_to_patient_dict |
| 1986 | + self.img_to_patient_dict = img_to_patient_dict |
| 1987 | + |
| 1988 | + self.celldata = celldata |
| 1989 | + |
| 1990 | + def _register_img_celldata(self): |
| 1991 | + """Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}.""" |
| 1992 | + img_celldata = {} |
| 1993 | + if self.library_id: |
| 1994 | + for k in np.unique(self.celldata.obs[self.library_id]): |
| 1995 | + img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy() |
| 1996 | + self.img_celldata = img_celldata |
| 1997 | + else: |
| 1998 | + self.img_celldata = {"image": self.celldata} |
| 1999 | + |
| 2000 | + def _register_graph_features(self, label_selection): |
| 2001 | + """Load graph level covariates. |
| 2002 | +
|
| 2003 | + Parameters |
| 2004 | + ---------- |
| 2005 | + label_selection |
| 2006 | + Label selection. |
| 2007 | + """ |
| 2008 | + # Save processed data to attributes. |
| 2009 | + for adata in self.img_celldata.values(): |
| 2010 | + graph_covariates = { |
| 2011 | + "label_names": {}, |
| 2012 | + "label_tensors": {}, |
| 2013 | + "label_selection": [], |
| 2014 | + "continuous_mean": {}, |
| 2015 | + "continuous_std": {}, |
| 2016 | + "label_data_types": {}, |
| 2017 | + } |
| 2018 | + adata.uns["graph_covariates"] = graph_covariates |
| 2019 | + |
| 2020 | + graph_covariates = { |
| 2021 | + "label_names": {}, |
| 2022 | + "label_selection": [], |
| 2023 | + "continuous_mean": {}, |
| 2024 | + "continuous_std": {}, |
| 2025 | + "label_data_types": {}, |
| 2026 | + } |
| 2027 | + self.celldata.uns["graph_covariates"] = graph_covariates |
1764 | 2028 |
|
1765 | 2029 |
|
1766 | 2030 | class DataLoaderZhang(DataLoader):
|
|
0 commit comments