1
1
from ....data .sample import load_sample_data
2
2
from ....tools .decorators import dataset
3
3
from .utils import filter_celltypes
4
+ from .utils import precompute_hvg
4
5
6
+ import numbers
5
7
import numpy as np
6
8
7
9
MIN_CELLS_PER_CELLTYPE = 50
10
+ N_HVG_UNINT = 2000
8
11
9
12
10
13
def check_neighbors (adata , neighbors_key , connectivities_key , distances_key ):
@@ -15,7 +18,12 @@ def check_neighbors(adata, neighbors_key, connectivities_key, distances_key):
15
18
assert distances_key in adata .obsp
16
19
17
20
18
- def check_dataset (adata , do_check_pca = False , do_check_neighbors = False ):
21
+ def check_dataset (
22
+ adata ,
23
+ do_check_pca = False ,
24
+ do_check_neighbors = False ,
25
+ do_check_hvg = False ,
26
+ ):
19
27
"""Check that dataset output fits expected API."""
20
28
21
29
assert "batch" in adata .obs
@@ -28,12 +36,21 @@ def check_dataset(adata, do_check_pca=False, do_check_neighbors=False):
28
36
assert adata .var_names .is_unique
29
37
assert adata .obs_names .is_unique
30
38
39
+ assert "n_genes_pre" in adata .uns
40
+ assert isinstance (adata .uns ["n_genes_pre" ], numbers .Integral )
41
+ assert adata .uns ["n_genes_pre" ] == adata .n_vars
42
+
31
43
assert "organism" in adata .uns
32
44
assert adata .uns ["organism" ] in ["mouse" , "human" ]
33
45
34
46
if do_check_pca :
35
47
assert "X_uni_pca" in adata .obsm
36
48
49
+ if do_check_hvg :
50
+ assert "hvg_unint" in adata .uns
51
+ assert len (adata .uns ["hvg_unint" ]) == min (N_HVG_UNINT , adata .n_vars )
52
+ assert np .all (np .isin (adata .uns ["hvg_unint" ], adata .var .index ))
53
+
37
54
if do_check_neighbors :
38
55
check_neighbors (adata , "uni" , "uni_connectivities" , "uni_distances" )
39
56
@@ -58,6 +75,10 @@ def sample_dataset(run_pca: bool = False, run_neighbors: bool = False):
58
75
adata .obs ["batch" ] = np .random .choice (2 , adata .shape [0 ], replace = True ).astype (str )
59
76
adata .obs ["labels" ] = np .random .choice (3 , adata .shape [0 ], replace = True ).astype (str )
60
77
adata = filter_celltypes (adata )
78
+
79
+ adata .uns ["hvg_unint" ] = precompute_hvg (adata )
80
+ adata .uns ["n_genes_pre" ] = adata .n_vars
81
+
61
82
if run_pca :
62
83
adata .obsm ["X_uni_pca" ] = sc .pp .pca (adata .X )
63
84
if run_neighbors :
0 commit comments