Skip to content
This repository was archived by the owner on Dec 30, 2023. It is now read-only.

Commit b88266a

Browse files
committed
replace GSETS with iGSETS
1 parent 288c42e commit b88266a

20 files changed

+301
-284
lines changed

R/pgx-api.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,14 @@ pgx.getMarkerGenes <- function(pgx, n=10, dir=0, sym=FALSE, filt=NULL) {
125125
pgx.getFamilies <- function(pgx, nmin=10, extended=FALSE) {
126126
if(extended) {
127127
fam <- grep("^[<].*|^FAMILY|^TISSUE|^COMPARTMENT|^CELLTYPE|^GOCC|^DISEASE|^CUSTOM",
128-
names(GSETS),value=TRUE)
129-
fam <- grep("^[<].*|^FAMILY|^COMPARTMENT|^CUSTOM",names(GSETS),value=TRUE)
128+
names(iGSETS),value=TRUE)
129+
fam <- grep("^[<].*|^FAMILY|^COMPARTMENT|^CUSTOM",names(iGSETS),value=TRUE)
130130
} else {
131-
fam <- grep("^[<].*|^FAMILY|^CUSTOM",names(GSETS),value=TRUE)
131+
fam <- grep("^[<].*|^FAMILY|^CUSTOM",names(iGSETS),value=TRUE)
132132
}
133133
xgenes <- toupper(rownames(pgx$X))
134134
xgenes <- toupper(pgx$genes$gene_name)
135-
jj <- which(sapply(GSETS[fam],function(x) sum(x %in% xgenes)) >= nmin)
135+
gg <- getGSETS(fam)
136+
jj <- which(sapply(gg,function(x) sum(x %in% xgenes)) >= nmin)
136137
sort(fam[jj])
137138
}

R/pgx-correct.R

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -323,14 +323,14 @@ pgx.superBatchCorrect <- function(X, pheno, model.par, partype=NULL,
323323
n.sv
324324
}
325325
cX1 <- Matrix::head(cX[order(-apply(cX,1,sd)),],1000) ## top 1000 genes only (faster)
326-
sv <- try( sva(cX1, mod1x, mod0=mod0x, n.sv=n.sv)$sv )
326+
sv <- try( sva::sva(cX1, mod1x, mod0=mod0x, n.sv=n.sv)$sv )
327327
##sv <- SmartSVA::smartsva.cpp(cX, mod1x, mod0=mod0x, n.sv=n.sv)$sv
328328
if(any(class(sv)=="try-error")) {
329329
## try again with little bit of noise...
330330
a <- 0.01*mean(apply(cX,1,sd))
331331
cX1 <- cX + a*matrix(rnorm(length(cX)),nrow(cX),ncol(cX))
332332
cX1 <- Matrix::head(cX1[order(-apply(cX1,1,sd)),],1000) ## top 1000 genes only (faster)
333-
sv <- try( sva(cX1, mod1x, mod0=mod0x, n.sv=pmax(n.sv-1,1))$sv )
333+
sv <- try( sva::sva(cX1, mod1x, mod0=mod0x, n.sv=pmax(n.sv-1,1))$sv )
334334
}
335335
if(!any(class(sv)=="try-error")) {
336336
message("[pgx.superBatchCorrect] Performing SVA correction...")
@@ -509,7 +509,7 @@ pgx.PC_correlation <- function(X, pheno, nv=3, stat="F", plot=TRUE, main=NULL) {
509509
tt0 <- c("PC correlation","PC variation")[1 + 1*(stat=="F")]
510510
if(is.null(main)) main <- tt0
511511
## R <- R[,ncol(R):1]
512-
plt <- ggpubr::ggbarplot(t(R), ylab=stat0, srt=45, group.name="") +
512+
plt <- plot.ggbarplot(t(R), ylab=stat0, srt=45, group.name="") +
513513
## ggplot2::theme(
514514
## legend.key.size = grid::unit(0.65,"lines"),
515515
## legend.key.height = grid::unit(0.35,"lines"),
@@ -683,7 +683,7 @@ pgx.performBatchCorrection <- function(ngs, zx, batchparams,
683683
## mod1 <- ngs$model.parameters$design
684684
mod0 <- cbind(mod1[,1])
685685
##mod0 = model.matrix( ~ 1, data=ngs$samples)
686-
sv <- sva( 0.0001+zx, mod1, mod0, n.sv=NULL)$sv
686+
sv <- sva::sva( 0.0001+zx, mod1, mod0, n.sv=NULL)$sv
687687
##sv <- svaseq( 2**zx, mod1, mod0, n.sv=NULL)$sv
688688
zx <- limma::removeBatchEffect(zx, covariates=sv, design=mod1)
689689
} else if(batchpar=="<NNM>") {
@@ -778,7 +778,7 @@ pgx.removeBatchEffect <- function(X, batch, model.vars=NULL,
778778
X <- limma::removeBatchEffect(X, batch=batch0)
779779
} else if(method=="ComBat") {
780780

781-
X <- ComBat(X, batch = batch0)
781+
X <- sva::ComBat(X, batch = batch0)
782782
} else if(method=="BMC") {
783783
## batch mean center
784784
matlist <- tapply(1:ncol(X), batch0, function(i) X[,i,drop=FALSE])
@@ -885,13 +885,10 @@ pgx.computeBiologicalEffects <- function(X, is.count=FALSE)
885885
return(pheno)
886886
}
887887

888-
nmax=-1
888+
##nmax=-1
889889
pgx.svaCorrect <- function(X, pheno, nmax=-1) {
890890
##
891-
## IK: not sure about this SVA correction stuff...
892-
893-
894-
891+
## IK: not sure about this SVA correction stuff...
895892
if(NCOL(pheno)==1) {
896893
pheno <- data.frame(pheno=pheno)
897894
}
@@ -915,24 +912,23 @@ pgx.svaCorrect <- function(X, pheno, nmax=-1) {
915912
##df <- data.frame(var=y)
916913
##mod1x = model.matrix( ~var, data=df)
917914
##mod0x = model.matrix( ~1, data=df)
918-
919915
mod1x <- cbind(1, mod1)
920916
mod0x <- mod1x[,1,drop=FALSE]
921917
##mod0 = NULL
922918

923919
message("Estimating number of surrogate variables...")
924920
if(0) {
925921
## original method using SVA
926-
n.sv = num.sv(X, mod1x, method="be")
922+
n.sv = sva::num.sv(X, mod1x, method="be")
927923
n.sv
928924
} else {
929925
## fast method using SmartSVA
930926
##X.r <- t(resid(lm(t(X) ~ var, data=df)))
931927
pp <- paste0(colnames(pheno),collapse="+")
932928
pp
933929
lm.expr <- paste0("lm(t(X) ~ ",pp,", data=pheno)")
934-
X.r <- t(resid(eval(parse(text=lm.expr))))
935-
n.sv <- EstDimRMT(X.r, FALSE)$dim + 1
930+
X.r <- t(stats::resid(eval(parse(text=lm.expr))))
931+
n.sv <- isva::EstDimRMT(X.r, FALSE)$dim + 1
936932
n.sv
937933
}
938934
n.sv <- min(n.sv, min(table(y)))
@@ -943,7 +939,7 @@ pgx.svaCorrect <- function(X, pheno, nmax=-1) {
943939
if(nmax>0) {
944940
vX = Matrix::head(X[order(-apply(X,1,sd)),],nmax)
945941
}
946-
sv <- sva(vX, mod1x, mod0x, n.sv=n.sv)$sv
942+
sv <- sva::sva(vX, mod1x, mod0x, n.sv=n.sv)$sv
947943
##sv <- SmartSVA::smartsva.cpp(X, mod1x, mod0=mod0x, n.sv=n.sv)$sv
948944

949945
message("Perform batch correction...")
@@ -1056,7 +1052,7 @@ pgx._runComputeNumSig <- function(ngs, parcomb, contrast, resample=-1,
10561052
mod0 = cbind(mod1[,1])
10571053
logcpm <- edgeR::cpm(ngs$counts, log=TRUE) ## perform SVA on logCPM
10581054
log <- capture.output({
1059-
suppressWarnings(sv <- sva(logcpm, mod1, mod0, n.sv=NULL)$sv)
1055+
suppressWarnings(sv <- sva::sva(logcpm, mod1, mod0, n.sv=NULL)$sv)
10601056
suppressWarnings(aX <- limma::removeBatchEffect( logcpm, covariates=sv, design=mod1))
10611057
})
10621058
dim(aX)

R/pgx-init.R

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
##-----------------------------------------------------------------------------
1010
## GLOBAL variables
1111
##-----------------------------------------------------------------------------
12-
12+
if(0) {
13+
RDIR='~/Playground/omicsplayground/R'
14+
FILES='~/Playground/omicsplayground/lib'
15+
source(file.path(RDIR,"pgx-include.R"),local=TRUE) ## pass local vars
16+
}
1317
source(file.path(RDIR,"pgx-functions.R"),local=TRUE) ## pass local vars
1418

1519
## Caching the init files
@@ -20,23 +24,24 @@ INIT.FILE
2024

2125
file.exists(INIT.FILE)
2226

23-
init.start_time = Sys.time()
24-
2527
if(1 && file.exists(INIT.FILE)) {
2628

2729
message("[INIT] loading cached INIT file ",INIT.FILE)
28-
t <- Sys.time()
30+
t0 <- Sys.time()
2931
load(INIT.FILE, verbose=1)
30-
message("Loading cache took: ", round(Sys.time() - t), " seconds")
32+
message("Loading cache took: ", round(Sys.time() - t0), " seconds")
3133

3234
} else {
3335

3436
message("[INIT] no INIT file! building INIT from scratch.")
37+
message("[INIT] INIT.FILE = ", INIT.FILE)
38+
t0 <- Sys.time()
39+
3540
oldvars <- ls()
3641

3742
## All gene families in Human UPPER CASE
3843
GENE.TITLE = unlist(as.list(org.Hs.eg.db::org.Hs.egGENENAME))
39-
GENE.SYMBOL = unlist(as.list(org.hs.eg.db::org.Hs.egSYMBOL))
44+
GENE.SYMBOL = unlist(as.list(org.Hs.eg.db::org.Hs.egSYMBOL))
4045
names(GENE.TITLE) = GENE.SYMBOL
4146
##GSET.PREFIX.REGEX = paste(paste0("^",GSET.PREFIXES,"_"),collapse="|")
4247
GSET.PREFIX.REGEX="^BIOCARTA_|^C2_|^C3_|^C7_|^CHEA_|^GOBP_|^GOCC_|^GOMF_|^HALLMARK_|^KEA_|^KEGG_|^PID_|^REACTOME_|^ST_"
@@ -49,7 +54,6 @@ if(1 && file.exists(INIT.FILE)) {
4954
GSETS = gmt.all;remove(gmt.all)
5055

5156
message("[INIT] parsing gene families...")
52-
5357
FAMILIES <- pgx.getGeneFamilies(GENE.SYMBOL, FILES=FILES, min.size=10, max.size=9999)
5458
fam.file <- file.path(FILES,"custom-families.gmt")
5559
if(file.exists(fam.file)) {
@@ -63,11 +67,20 @@ if(1 && file.exists(INIT.FILE)) {
6367
names(f1) <- sub("FAMILY:<all>","<all>",names(f1))
6468
GSETS <- c(GSETS,f1)
6569

70+
## convert to integer list (more efficient)
71+
message("[INIT] converting GSETS to list of integers...")
72+
GSET.GENES <- sort(unique(unlist(GSETS))) ## slow...
73+
iGSETS <- parallel::mclapply(GSETS, function(a) match(a,GSET.GENES)) ## slow...
74+
names(iGSETS) <- names(GSETS)
75+
getGSETS <- function(gs) {
76+
lapply(iGSETS[gs],function(i) GSET.GENES[i])
77+
}
78+
6679
message("[INIT] parsing collections...")
6780
COLLECTIONS <- pgx.getGeneSetCollections(names(GSETS), min.size=10, max.size=99999)
6881
COLLECTIONS <- COLLECTIONS[order(names(COLLECTIONS))]
6982

70-
remove(list=c("custom.gmt","f1"))
83+
remove(list=c("custom.gmt","f1","GSETS"))
7184

7285
##-----------------------------------------------------------------------------
7386
## TISSUE/REFERENCE data sets
@@ -100,8 +113,7 @@ if(1 && file.exists(INIT.FILE)) {
100113
##-----------------------------------------------------------------------------
101114
## Colors
102115
##-----------------------------------------------------------------------------
103-
104-
116+
105117
COLORS = rep(RColorBrewer::brewer.pal(8,"Set2"),99)
106118
COLORS = rep(c(ggsci::pal_npg("nrc", alpha = 0.7)(10),
107119
ggsci::pal_aaas("default", alpha = 0.7)(10),
@@ -117,14 +129,12 @@ if(1 && file.exists(INIT.FILE)) {
117129
newvars <- setdiff(ls(), oldvars)
118130
newvars
119131

120-
## message("[INIT] saving INIT file ", INIT.FILE)
121-
## save( list=newvars, file=INIT.FILE)
132+
message("Creating global init took: ", round(Sys.time() - t0), " seconds")
133+
message("[INIT] saving INIT file ", INIT.FILE)
134+
save( list=newvars, file=INIT.FILE)
135+
122136
}
123137

124-
init.load_time = round( Sys.time() - init.start_time, digits=3)
125-
message("[INIT] init load time = ",init.load_time, " ",attr(init.load_time,"units"))
126-
127-
128138
pgx.initialize <- function(pgx) {
129139

130140
##---------------------------------------------------------------------

0 commit comments

Comments
 (0)