Skip to content

Commit

Permalink
Merge pull request #42 from genesofeve/39-errors-in-projectr-method-f…
Browse files Browse the repository at this point in the history
…or-sparse-matrix-data

39 errors in projectr method for sparse matrix data
  • Loading branch information
dimalvovs authored Nov 14, 2024
2 parents 2e0788c + c3fdb7a commit 754af18
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 62 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: projectR
Type: Package
Title: Functions for the projection of weights from PCA, CoGAPS, NMF,
correlation, and clustering
Version: 1.19.2
Version: 1.23.1
Author: Gaurav Sharma, Charles Shin, Jared Slosberg, Loyal Goff, Genevieve Stein-O'Brien
Authors@R: c(
person("Gaurav", "Sharma", role = c("aut")),
Expand All @@ -15,6 +15,7 @@ Authors@R: c(
Description: Functions for the projection of data into the spaces defined by PCA, CoGAPS, NMF, correlation, and clustering.
License: GPL (==2)
Imports:
SingleCellExperiment,
methods,
cluster,
stats,
Expand Down Expand Up @@ -50,7 +51,7 @@ Suggests:
SeuratObject
LazyData: TRUE
LazyDataCompression: gzip
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Encoding: UTF-8
VignetteBuilder: knitr
biocViews: FunctionalPrediction, GeneRegulation, BiologicalQuestion, Software
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ exportClasses(correlateR)
exportClasses(rotatoR)
import(MatrixModels)
import(RColorBrewer)
import(SingleCellExperiment)
import(cluster)
import(dplyr)
import(fgsea)
Expand Down
93 changes: 40 additions & 53 deletions R/projectR.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ setOldClass("prcomp")
#' @param NP vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
#' @param full logical indicating whether to return the full model solution. By default only the new pattern object is returned.
#' @param model Optional arguements to choose method for projection
#' @param family VGAM family function for model fitting (default: "gaussianff")
#' @param bootstrapPval logical to indicate whether to generate p-values using bootstrap, not available for prcomp and rotatoR objects
#' @param bootIter number of bootstrap iterations, default = 1000
#' @rdname projectR-methods
Expand All @@ -21,13 +20,12 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
loadingsNames = NULL, # a vector with names of loadings rows
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
family="gaussianff", # VGAM family function (default: "gaussianff")
bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
bootIter=1e3 # No of bootstrap iterations
){

ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
#if(!is.na(NP)){loadings<-loadings[,NP]} was giving warning with subset of patterns

#match genes in data sets
if(is.null(dataNames)){
dataNames <- rownames(data)
Expand All @@ -45,16 +43,8 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
projectionPatterns <- t(projection$coefficients)
projection.ts<-t(projection$coefficients/projection$stdev.unscaled/projection$sigma)

#projection<-vglm(dataM$data2 ~ 0 + dataM$data1,family=family)
#projectionPatterns<-coefvlm(projection,matrix.out=TRUE)

#For VGAM
#pval.matrix<-matrix(2*pnorm(-abs(summary(projection)@coef3[,3])),nrow=5,byrow=TRUE)

#For limma
pval.matrix<-2*pnorm(-abs(projection.ts))
#colnames(pval.matrix)<-colnames(projectionPatterns)
#rownames(pval.matrix)<-rownames(projectionPatterns)

if(bootstrapPval){
boots <- lapply(1:bootIter,function(x){
Expand All @@ -66,7 +56,6 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
}

if(full & bootstrapPval){
#projectionFit <- list('projection'=projectionPatterns, 'fit'=projection,'pval'=pval.matrix)
projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix, 'bootstrapPval' = bootPval)
return(projectionFit)
} else if(full){
Expand All @@ -82,60 +71,61 @@ setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
#' @param NP vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
#' @param full logical indicating whether to return the full model solution. By default only the new pattern object is returned.
#' @param model Optional arguements to choose method for projection
#' @param family VGAM family function for model fitting (default: "gaussianff")
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="dgCMatrix",loadings="matrix"),function(
data, # a dataset to be projected onto
loadings, # a matrix of continous values to be projected with unique rownames
dataNames = NULL, # a vector with names of data rows
loadingsNames = NULL, # a vector with names of loadings rows
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
family="gaussianff" # VGAM family function (default: "gaussianff")
NP=NULL, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
){

ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
#if(!is.na(NP)){loadings<-loadings[,NP]} was giving warning with subset of patterns
#match genes in data sets
if(is.null(dataNames)){
dataNames <- rownames(data)
if(!is.null(NP)) {
loadings<-loadings[,NP]
}
if(is.null(loadingsNames)){
loadingsNames <- rownames(loadings)
}
dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE)
print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings'))
print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' '))))
# do projection
Design <- model.matrix(~0 + dataM[[1]])
colnames(Design) <- colnames(dataM[[1]])
projection <- MatrixModels:::lm.fit.sparse(t(dataM[[2]]),dataM[[1]])
projectionPatterns <- t(projection$coefficients)
projection.ts<-t(projection$coefficients/projection$stdev.unscaled/projection$sigma)

#projection<-vglm(dataM$data2 ~ 0 + dataM$data1,family=family)
#projectionPatterns<-coefvlm(projection,matrix.out=TRUE)

#For VGAM
#pval.matrix<-matrix(2*pnorm(-abs(summary(projection)@coef3[,3])),nrow=5,byrow=TRUE)

#For limma
pval.matrix<-2*pnorm(-abs(projection.ts))
#colnames(pval.matrix)<-colnames(projectionPatterns)
#rownames(pval.matrix)<-rownames(projectionPatterns)
print("dgCMatrix detected, projecting in chunks.")
#columns of dgcMatrix are LHS for stats::lm, and columns of loadings are the
#dense RHS (predictors). sometimes dgcMatrix is too big to fit RAM, so we
#just fit chunks of lm models as supported by stats::lm/limma::lmFit

chop <- function(sparsematrix) {
coln <- ncol(sparsematrix)
bins <- seq(1, coln, by = 1000)
lapply(seq_along(bins), function(i) {
start <- bins[i]
end <- ifelse(i < length(bins), bins[i + 1] - 1, coln)
return(start:end)
})
}
#discard print statements projectR generates each time a chunk is called
w <- invisible(capture.output(
projectionList <- lapply(chop(data), function(i) {
projectR(as.matrix(data[,i]), loadings, full=full)
})
))
#since chopping by columns, it's enough to print matching rows only once
print(w[1])

if(full==TRUE){
#projectionFit <- list('projection'=projectionPatterns, 'fit'=projection,'pval'=pval.matrix)
projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix)
return(projectionFit)
if(full==TRUE) {
if(length(projectionList)==1) {#if only one chunk
res <- projectionList[[1]]
} else {
projectionFit <- lapply(projectionList, function(x) do.call(cbind, x))
res <- projectionFit
}
} else {
res <- do.call(cbind, projectionList)
}
else{return(projectionPatterns)}
return(res)
})


#######################################################################################################################################
#' @import limma
#' @import SingleCellExperiment
#' @importFrom NMF fcnnls
#' @examples
#' library("CoGAPS")
Expand All @@ -153,7 +143,6 @@ setMethod("projectR",signature(data="matrix",loadings="LinearEmbeddingMatrix"),f
NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
model=NA, # optional arguements to choose method for projection
family="gaussianff", # VGAM family function (default: "gaussianff")
bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
bootIter=1e3 # No of bootstrap iterations
){
Expand Down Expand Up @@ -256,10 +245,8 @@ setMethod("projectR",signature(data="matrix",loadings="rotatoR"),function(
Eigenvalues<-eigen(cov(projectionPatterns))$values
PercentVariance<-round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2)

#PercentVariance<-apply(projectionPatterns,2, function(x) 100*var(x)/sum(apply(p2P,2,var)))

projectionFit <- list(t(projectionPatterns), PercentVariance)
return(projectionFit)
projectionFit <- list(t(projectionPatterns), PercentVariance)
return(projectionFit)
}
else{return(t(projectionPatterns))}

Expand Down
9 changes: 2 additions & 7 deletions man/projectR-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test_projectR.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,28 @@ test_that("results are correctly formatted for P value mode",{

})

test_that("projection works on sparse data matrix", {
dense <- as.matrix(p.RNAseq6l3c3t)
sparse <- as(p.RNAseq6l3c3t, "sparseMatrix")
loadings <- CR.RNAseq6l3c3t@featureLoadings

expect_true("dgCMatrix" %in% class(sparse))
expect_no_error(projectR(sparse, loadings))

pdense <- projectR(dense, loadings)
psparse <- projectR(sparse, loadings)
expect_identical(pdense, psparse)
})

test_that("projection works on sparse data matrix with full=TRUE", {
dense <- as.matrix(p.RNAseq6l3c3t)
sparse <- as(p.RNAseq6l3c3t, "sparseMatrix")
loadings <- CR.RNAseq6l3c3t@featureLoadings

expect_true("dgCMatrix" %in% class(sparse))
expect_no_error(projectR(sparse, loadings))

pdense <- projectR(dense, loadings, full=TRUE)
psparse <- projectR(sparse, loadings, full=TRUE)
expect_identical(pdense, psparse)
})

0 comments on commit 754af18

Please sign in to comment.