-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First version on GitHub
- Loading branch information
Showing
100 changed files
with
36,558 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Package: SARTools | ||
Type: Package | ||
Title: Statistical Analysis of RNA-Seq Tools | ||
Version: 1.0.0 | ||
Date: 2014-12-10 | ||
Author: Marie-Agnes Dillies and Hugo Varet | ||
Maintainer: Hugo Varet <[email protected]> | ||
Depends: R (>= 3.1.0), DESeq2 (>= 1.6.0), edgeR (>= 3.8.0), xtable | ||
Suggests: genefilter (>= 1.44.0), BiocStyle | ||
Imports: knitr, GenomicRanges, S4Vectors, limma | ||
VignetteBuilder: knitr | ||
Encoding: latin1 | ||
Description: SARTools provides R tools and an environment for the statistical analysis of RNA-Seq projects: load and clean data, produce figures, perform statistical analysis/testing with DESeq2 or edgeR, export results and create final report | ||
License: GPL-2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Generated by roxygen2 (4.0.2): do not edit by hand | ||
|
||
exportPattern("^[a-zA-Z]") | ||
import(DESeq2) | ||
import(edgeR) | ||
import(xtable) | ||
importFrom(GenomicRanges,assay) | ||
importFrom(GenomicRanges,colData) | ||
importFrom(S4Vectors,mcols) | ||
importFrom(knitr,knit2html) | ||
importFrom(limma,plotMDS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
CHANGES IN VERSION 1.0.0 | ||
------------------------ | ||
o First version publicly available on GitHub | ||
|
||
CHANGES IN VERSION 0.99.6 | ||
------------------------- | ||
o Added batch argument to loadTargetFile() | ||
o Simplified the R script templates with wrappers | ||
|
||
CHANGES IN VERSION 0.99.5 | ||
------------------------- | ||
o Now check the format/validity of the parameters at the beginning of the script | ||
o Added functions checkParameters.DESeq2() and checkParameters.edgeR() | ||
o Added small precisions in the vignette | ||
|
||
CHANGES IN VERSION 0.99.4 | ||
------------------------- | ||
o First version of the package |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#' BCV plot (for edgeR dispersions) | ||
#' | ||
#' Biological Coefficient of Variation plot (for edgeR objects) | ||
#' | ||
#' @param dge a \code{DGEList} object | ||
#' @return A file named BCV.png in the figures directory with a BCV plot produced by the \code{plotBCV()} function of the edgeR package | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
BCVPlot <- function(dge){ | ||
png(filename="figures/BCV.png", width=400, height=400) | ||
plotBCV(dge, las = 1, main = "BCV plot") | ||
dev.off() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#' MA-plots | ||
#' | ||
#' MA-plot for each comparison: log2(FC) vs mean of normalized counts with one dot per feature (red dot for a differentially expressed feature, black dot otherwise) | ||
#' | ||
#' @param complete A \code{list} of \code{data.frame} containing features results (from \code{exportResults.DESeq2()} or \code{exportResults.edgeR()}) | ||
#' @param alpha cut-off to apply on each adjusted p-value | ||
#' @return A file named MAPlot.png in the figures directory containing one MA-plot per comparison | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
MAPlot <- function(complete, alpha=0.05){ | ||
nrow <- ceiling(sqrt(length(complete))) | ||
ncol <- ceiling(length(complete)/nrow) | ||
png(filename="figures/MAPlot.png", width=400*max(ncol,nrow), height=400*min(ncol,nrow)) | ||
par(mfrow=sort(c(nrow,ncol))) | ||
for (name in names(complete)){ | ||
complete.name <- complete[[name]] | ||
complete.name <- complete.name[complete.name$baseMean>0,] | ||
complete.name$padj <- ifelse(is.na(complete.name$padj),1,complete.name$padj) | ||
log2FC <- complete.name$log2FoldChange | ||
ylim = 1.1 * c(-1,1) * quantile(abs(log2FC[is.finite(log2FC)]), probs=0.99) | ||
plot(complete.name$baseMean, pmax(ylim[1], pmin(ylim[2], log2FC)), | ||
log = "x", cex=0.45, las = 1, ylim = ylim, | ||
col = ifelse(complete.name[,"padj"] < alpha, "red", "black"), | ||
pch = ifelse(log2FC<ylim[1], 6, ifelse(log2FC>ylim[2], 2, 20)), | ||
xlab = "Mean of normalized counts", | ||
ylab = expression(log[2]~fold~change), | ||
main = paste0("MA-plot - ",gsub("_"," ",name))) | ||
abline(h=0, lwd=1, col="lightgray") | ||
} | ||
dev.off() | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#' MDS plot (for edgeR objects) | ||
#' | ||
#' Multi-Dimensional Scaling plot of samples based on the 500 most variant features (for edgeR analyses) | ||
#' | ||
#' @param dge a \code{DGEList} object | ||
#' @param group vector of the condition from which each sample belongs | ||
#' @param n number of features to keep among the most variant | ||
#' @param gene.selection \code{"pairwise"} to choose the top features separately for each pairwise comparison between the samples or \code{"common"} to select the same features for all comparisons. Only used when \code{method="logFC"} | ||
#' @param col colors to use (one per biological condition) | ||
#' @return A file named MDS.png in the figures directory | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
MDSPlot <- function(dge, group, n=500, gene.selection=c("pairwise", "common"), | ||
col=c("lightblue","orange","MediumVioletRed","SpringGreen")){ | ||
png(filename="figures/MDS.png", width=400, height=400) | ||
coord <- plotMDS(dge, top=n, method="logFC", gene.selection=gene.selection[1]) | ||
abs=range(coord$x); abs=abs(abs[2]-abs[1])/25; | ||
ord=range(coord$y); ord=abs(ord[2]-ord[1])/25; | ||
plot(coord$x,coord$y, col=col[as.integer(group)], las=1, main="Multi-Dimensional Scaling plot", | ||
xlab="Leading logFC dimension 1", ylab="Leading logFC dimension 2", cex=2, pch=16) | ||
abline(h=0,v=0,lty=2,col="lightgray") | ||
text(coord$x - ifelse(coord$x>0,abs,-abs), coord$y - ifelse(coord$y>0,ord,-ord), | ||
colnames(dge$counts), col=col[as.integer(group)]) | ||
dev.off() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#' @exportPattern ^[a-zA-Z] | ||
#' @import DESeq2 | ||
#' @import edgeR | ||
#' @import xtable | ||
#' @importFrom knitr knit2html | ||
#' @importFrom GenomicRanges colData assay | ||
#' @importFrom S4Vectors mcols | ||
#' @importFrom limma plotMDS | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#' PCA of samples (if use of DESeq2) | ||
#' | ||
#' Principal Component Analysis of samples based on the 500 most variant features on VST- or rlog-counts (if use of DESeq2) | ||
#' | ||
#' @param counts.trans a matrix a transformed counts (VST- or rlog-counts) | ||
#' @param group factor vector of the condition from which each sample belongs | ||
#' @param n number of features to keep among the most variant | ||
#' @param col colors to use (one per biological condition) | ||
#' @return A file named PCA.png in the figures directory with a pairwise plot of the three first principal components | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
PCAPlot <- function(counts.trans, group, n=500, col=c("lightblue","orange","MediumVioletRed","SpringGreen")){ | ||
# PCA on the 500 most variables features | ||
rv = apply(counts.trans, 1, var, na.rm=TRUE) | ||
select = order(rv, decreasing = TRUE)[1:n] | ||
pca = prcomp(t(counts.trans[select, ])) | ||
prp <- pca$sdev^2 * 100 / sum(pca$sdev^2) | ||
prp <- round(prp[1:3],2) | ||
|
||
# create figure | ||
png(filename="figures/PCA.png",width=400*2,height=400) | ||
par(mfrow=c(1,2)) | ||
# axes 1 et 2 | ||
abs=range(pca$x[,1]); abs=abs(abs[2]-abs[1])/25; | ||
ord=range(pca$x[,2]); ord=abs(ord[2]-ord[1])/25; | ||
plot(pca$x[,1], pca$x[,2], | ||
las = 1, cex = 2, pch = 16, col = col[as.integer(group)], | ||
xlab = paste0("PC1 (",prp[1],"%)"), | ||
ylab = paste0("PC2 (",prp[2],"%)"), | ||
main = "Principal Component Analysis - Axes 1 and 2") | ||
abline(h=0,v=0,lty=2,col="lightgray") | ||
text(pca$x[,1] - ifelse(pca$x[,1]>0,abs,-abs), pca$x[,2] - ifelse(pca$x[,2]>0,ord,-ord), | ||
colnames(counts.trans), col=col[as.integer(group)]) | ||
|
||
# axes 1 et 3 | ||
abs=range(pca$x[,1]); abs=abs(abs[2]-abs[1])/25; | ||
ord=range(pca$x[,3]); ord=abs(ord[2]-ord[1])/25; | ||
plot(pca$x[,1], pca$x[,3], | ||
las = 1, cex = 2, pch = 16, col = col[as.integer(group)], | ||
xlab = paste0("PC1 (",prp[1],"%)"), | ||
ylab = paste0("PC3 (",prp[3],"%)"), | ||
main = "Principal Component Analysis - Axes 1 and 3") | ||
abline(h=0,v=0,lty=2,col="lightgray") | ||
text(pca$x[,1] - ifelse(pca$x[,1]>0,abs,-abs), pca$x[,3] - ifelse(pca$x[,3]>0,ord,-ord), | ||
colnames(counts.trans), col=col[as.integer(group)]) | ||
dev.off() | ||
|
||
return(invisible(pca$x)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#' SARTools provides R tools and an environment for the statistical analysis of RNA-Seq projects: load and clean data, produce figures, perform statistical analysis/testing with DESeq2 or edgeR, export results and create final report | ||
#' @title Statistical Analysis of RNA-Seq Tools | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
#' @docType package | ||
#' @name SARTools-package | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#' Pairwise SERE for two samples | ||
#' | ||
#' Compute the SERE coefficient for two samples | ||
#' | ||
#' @param observed \code{matrix} with two columns containing observed counts of two samples | ||
#' @return The SERE coefficient for the two samples | ||
#' @references Schulze, Kanwar, Golzenleuchter et al, SERE: Single-parameter quality control and sample comparison for RNA-Seq, BMC Genomics, 2012 | ||
#' @author See paper published | ||
|
||
SERE <- function(observed){ | ||
#calculate lambda and expected values | ||
laneTotals <- colSums(observed) | ||
total <- sum(laneTotals) | ||
fullObserved <- observed[rowSums(observed)>0,] | ||
fullLambda <- rowSums(fullObserved)/total | ||
fullLhat <- fullLambda > 0 | ||
fullExpected<- outer(fullLambda, laneTotals) | ||
|
||
#keep values | ||
fullKeep <- which(fullExpected > 0) | ||
|
||
#calculate degrees of freedom (nrow*(ncol -1) >> number of parameters - calculated (just lamda is calculated >> thats why minus 1) | ||
#calculate pearson and deviance for all values | ||
oeFull <- (fullObserved[fullKeep] - fullExpected[fullKeep])^2/ fullExpected[fullKeep] # pearson chisq test | ||
dfFull <- length(fullKeep) - sum(fullLhat!=0) | ||
|
||
sqrt(sum(oeFull)/dfFull) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#' Percentage of null counts per sample | ||
#' | ||
#' Bar plot of the percentage of null counts per sample | ||
#' | ||
#' @param counts \code{matrix} of counts | ||
#' @param group factor vector of the condition from which each sample belongs | ||
#' @param col colors of the bars (one color per biological condition) | ||
#' @return A file named barplotNull.png in the figures directory | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
barplotNull <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen")){ | ||
png(filename="figures/barplotNull.png",width=400,height=400) | ||
percentage <- apply(counts, 2, function(x){sum(x == 0)})*100/nrow(counts) | ||
percentage.allNull <- (nrow(counts) - nrow(removeNull(counts)))*100/nrow(counts) | ||
barplot(percentage, las = 2, | ||
col = col[as.integer(group)], | ||
ylab = "Proportion of null counts", | ||
main = "Proportion of null counts per sample", | ||
ylim = c(0,1.2*max(percentage))) | ||
abline(h = percentage.allNull, lty = 2, lwd = 2) | ||
legend("topright", levels(group), fill=col[1:nlevels(group)], bty="n") | ||
dev.off() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#' Total number of reads per sample | ||
#' | ||
#' Bar plot of the total number of reads per sample | ||
#' | ||
#' @param counts \code{matrix} of counts | ||
#' @param group factor vector of the condition from which each sample belongs | ||
#' @param col colors of the bars (one color per biological condition) | ||
#' @return A file named barplotTotal.png in the figures directory | ||
#' @author Marie-Agnes Dillies and Hugo Varet | ||
|
||
barplotTotal <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen")){ | ||
png(filename="figures/barplotTotal.png",width=400,height=400) | ||
barplot(colSums(counts), | ||
main = "Total read count per sample", | ||
ylab = "Total read count", | ||
ylim = c(0, max(colSums(counts))*1.2), | ||
col = col[as.integer(group)], | ||
las = 2) | ||
legend("topright", levels(group), fill=col[1:nlevels(group)], bty="n") | ||
dev.off() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#' Check the parameters (when using DESeq2) | ||
#' | ||
#' Check the format and the validity of the parameters which will be used for the analysis with DESeq2. | ||
# For example, it is important that $alpha$ be a numeric of length 1 between 0 and 1. This function avoid | ||
# potential stupid bugs when running the suite of the script. | ||
#' | ||
#' @param projectName name of the project | ||
#' @param author author of the statistical analysis/report | ||
#' @param targetFile path to the design/target file | ||
#' @param rawDir path to the directory containing raw counts files | ||
#' @param featuresToRemove names of the features to be removed | ||
#' @param varInt factor of interest | ||
#' @param condRef reference biological condition | ||
#' @param batch blocking factor in the design | ||
#' @param fitType mean-variance relationship: "parametric" (default) or "local" | ||
#' @param cooksCutoff outliers detection threshold (NULL to let DESeq2 choosing it) | ||
#' @param independentFiltering TRUE/FALSE to perform independent filtering | ||
#' @param alpha threshold of statistical significance | ||
#' @param pAdjustMethod p-value adjustment method: "BH" (default) or "BY" for example | ||
#' @param typeTrans transformation for PCA/clustering: "VST" ou "rlog" | ||
#' @param locfunc "median" (default) or "shorth" to estimate the size factors | ||
#' @param colors vector of colors of each biological condition on the plots | ||
#' @return A boolean indicating if there is a problem in the parameters | ||
#' @author Hugo Varet | ||
|
||
checkParameters.DESeq2 <- function(projectName,author,targetFile,rawDir, | ||
featuresToRemove,varInt,condRef,batch,fitType, | ||
cooksCutoff,independentFiltering,alpha,pAdjustMethod, | ||
typeTrans,locfunc,colors){ | ||
problem <- FALSE | ||
if (!is.character(projectName) | length(projectName)!=1){ | ||
print("projectName must be a character vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.character(author) | length(author)!=1){ | ||
print("author must be a character vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.character(targetFile) | length(targetFile)!=1 || !file.exists(targetFile)){ | ||
print("targetFile must be a character vector of length 1 specifying an accessible file") | ||
problem <- TRUE | ||
} | ||
if (!is.character(rawDir) | length(rawDir)!=1 || is.na(file.info(rawDir)[1,"isdir"]) | !file.info(rawDir)[1,"isdir"]){ | ||
print("rawDir must be a character vector of length 1 specifying an accessible directory") | ||
problem <- TRUE | ||
} | ||
if (!is.character(featuresToRemove)){ | ||
print("featuresToRemove must be a character vector") | ||
problem <- TRUE | ||
} | ||
if (!is.character(varInt) | length(varInt)!=1){ | ||
print("varInt must be a character vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.character(condRef) | length(condRef)!=1){ | ||
print("condRef must be a character vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.null(batch) && I(!is.character(batch) | length(batch)!=1)){ | ||
print("batch must be NULL or a character vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.character(fitType) | length(fitType)!=1 || !I(fitType %in% c("parametric","local"))){ | ||
print("fitType must be equal to 'parametric' or 'local'") | ||
problem <- TRUE | ||
} | ||
if (!is.null(cooksCutoff) && I(!is.numeric(cooksCutoff) | length(cooksCutoff)!=1 || cooksCutoff<=0)){ | ||
print("cooksCutoff must be NULL or a numeric vector of length 1 with a positive value") | ||
problem <- TRUE | ||
} | ||
if (!is.logical(independentFiltering) | length(independentFiltering)!=1){ | ||
print("independentFiltering must be a boolean vector of length 1") | ||
problem <- TRUE | ||
} | ||
if (!is.numeric(alpha) | length(alpha)!=1 || I(alpha<=0 | alpha>=1)){ | ||
print("alpha must be a numeric vector of length 1 with a value between 0 and 1") | ||
problem <- TRUE | ||
} | ||
if (!is.character(pAdjustMethod) | length(pAdjustMethod)!=1 || !I(pAdjustMethod %in% p.adjust.methods)){ | ||
print(paste("pAdjustMethod must be a value in", paste(p.adjust.methods, collapse=", "))) | ||
problem <- TRUE | ||
} | ||
if (!is.character(typeTrans) | length(typeTrans)!=1 || !I(typeTrans %in% c("VST","rlog"))){ | ||
print("typeTrans must be equal to 'VST' or 'rlog'") | ||
problem <- TRUE | ||
} | ||
if (!is.character(locfunc) | length(locfunc)!=1 || !I(locfunc %in% c("median","shorth"))){ | ||
print("locfunc must be equal to 'median' or 'shorth'") | ||
problem <- TRUE | ||
} else{ | ||
if (locfunc=="shorth" & !I("genefilter" %in% installed.packages()[,"Package"])){ | ||
print("Package genefilter is needed if using locfunc='shorth'") | ||
problem <- TRUE | ||
} | ||
} | ||
areColors <- function(col){ | ||
sapply(col, function(X){tryCatch(is.matrix(col2rgb(X)), error=function(e){FALSE})}) | ||
} | ||
if (!is.vector(colors) || !all(areColors(colors))){ | ||
print("colors must be a vector of colors") | ||
problem <- TRUE | ||
} | ||
|
||
if (!problem){ | ||
print("All the parameters are correct") | ||
} | ||
return(invisible(problem)) | ||
} |
Oops, something went wrong.