diff --git a/bin/scdrs b/bin/scdrs index 52f174e..f5b3a64 100644 --- a/bin/scdrs +++ b/bin/scdrs @@ -167,6 +167,21 @@ def compute_score( ) print("First 3 cells: %s" % (str(list(adata.obs_names[:3])))) print("First 5 genes: %s" % (str(list(adata.var_names[:5])))) + if ADJ_PROP is not None: + err_msg = "'adj_prop'=%s not in 'adata.obs.columns'" % ADJ_PROP + assert ADJ_PROP in adata.obs, err_msg + temp_df = adata.obs[[ADJ_PROP]].groupby(ADJ_PROP).agg({ADJ_PROP: len}) + temp_df.columns = ["n"] + temp_df.sort_values(by="n", ascending=False, inplace=True) + print("--adj-prop: %d categories" % temp_df.shape[0]) + msg = ", ".join( + ["%s (%d)" % (x, temp_df.loc[x, "n"]) for x in temp_df.index[:3]] + ) + print("Top 3: %s" % msg) + msg = ", ".join( + ["%s (%d)" % (x, temp_df.loc[x, "n"]) for x in temp_df.index[-3:]] + ) + print("Bottom 3: %s" % msg) # Load .cov file if COV_FILE is not None: diff --git a/scdrs/pp.py b/scdrs/pp.py index d7f5459..094c75d 100644 --- a/scdrs/pp.py +++ b/scdrs/pp.py @@ -4,6 +4,7 @@ import pandas as pd from skmisc.loess import loess from typing import List +import warnings def category2dummy( @@ -373,6 +374,10 @@ def compute_stats( # Borrowed from scanpy _highly_variable_genes_seurat_v3 not_const = df_gene["ct_var"].values > 0 + if (df_gene["ct_mean"].values <= 0).sum() > 0: + # Exclude genes with negative values (usually small) + warnings.warn("%d genes with ct_mean<0" % (df_gene["ct_mean"].values < 0).sum()) + not_const = not_const & (df_gene["ct_mean"].values > 0) estimat_var = np.zeros(adata.shape[1], dtype=np.float64) y = np.log10(df_gene["ct_var"].values[not_const]) x = np.log10(df_gene["ct_mean"].values[not_const]) diff --git a/scdrs/version.py b/scdrs/version.py index bd94f5f..e8d28c1 100644 --- a/scdrs/version.py +++ b/scdrs/version.py @@ -2,5 +2,5 @@ # 1) we don't load dependencies by storing it in __init__.py # 2) we can import it in setup.py for the same reason # 3) we can import it into your module module -__version__ = '1.0.2' +__version__ = '1.0.3' __version_info__ = tuple([ int(num) for num in __version__.split('.')]) \ No newline at end of file