Skip to content

Commit

Permalink
Merge pull request #69 from PF2-pasteur-fr/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
hvaret authored Sep 16, 2019
2 parents e2efcdf + 6bcec04 commit 944ced8
Show file tree
Hide file tree
Showing 25 changed files with 409 additions and 207 deletions.
31 changes: 26 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
Package: SARTools
Type: Package
Title: Statistical Analysis of RNA-Seq Tools
Version: 1.6.9
Date: 2019-07-10
Version: 1.7.0
Date: 2019-09-16
Author: Marie-Agnes Dillies and Hugo Varet
Maintainer: Hugo Varet <[email protected]>
Depends: R (>= 3.3.0), DESeq2 (>= 1.12.0), edgeR (>= 3.12.0), xtable
Imports: stats, utils, graphics, grDevices, knitr, rmarkdown (>= 1.4), SummarizedExperiment, S4Vectors, limma, genefilter (>= 1.44.0)
Depends: R (>= 3.3.0),
DESeq2 (>= 1.12.0),
edgeR (>= 3.12.0),
ggplot2,
kableExtra
Imports: genefilter (>= 1.44.0),
GGally,
ggrepel,
ggdendro,
graphics,
grDevices,
grid,
gridExtra,
knitr,
limma,
RGraphics,
rlang,
rmarkdown (>= 1.4),
S4Vectors,
scales,
stats,
SummarizedExperiment,
utils
Suggests: optparse
biocViews: Software
VignetteBuilder: knitr, rmarkdown
Encoding: latin1
Description: Provide R tools and an environment for the statistical analysis of RNA-Seq projects: load and clean data, produce figures, perform statistical analysis/testing with DESeq2 or edgeR, export results and create final report.
License: GPL-2
RoxygenNote: 6.0.1
RoxygenNote: 6.1.1
21 changes: 20 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,19 @@
exportPattern("^[a-zA-Z]")
import(DESeq2)
import(edgeR)
import(xtable)
import(ggplot2)
import(kableExtra)
importFrom(GGally,ggally_points)
importFrom(GGally,ggpairs)
importFrom(GGally,wrap)
importFrom(RGraphics,splitTextGrob)
importFrom(S4Vectors,mcols)
importFrom(S4Vectors,metadata)
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,colData)
importFrom(genefilter,shorth)
importFrom(ggdendro,ggdendrogram)
importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,col2rgb)
importFrom(grDevices,dev.off)
importFrom(grDevices,png)
Expand All @@ -24,8 +31,19 @@ importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,text)
importFrom(grid,gpar)
importFrom(grid,textGrob)
importFrom(gridExtra,grid.arrange)
importFrom(limma,plotMDS)
importFrom(rlang,.data)
importFrom(rmarkdown,render)
importFrom(scales,log10_trans)
importFrom(scales,log_breaks)
importFrom(scales,math_format)
importFrom(scales,trans_breaks)
importFrom(scales,trans_format)
importFrom(scales,trans_new)
importFrom(stats,coefficients)
importFrom(stats,density)
importFrom(stats,dist)
importFrom(stats,dnorm)
Expand All @@ -44,5 +62,6 @@ importFrom(utils,head)
importFrom(utils,installed.packages)
importFrom(utils,packageVersion)
importFrom(utils,read.table)
importFrom(utils,stack)
importFrom(utils,tail)
importFrom(utils,write.table)
13 changes: 10 additions & 3 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
CHANGES IN VERSION 1.7.0
------------------------
o plots are now generated using ggplot2
o pairwise scatter plot is no longer produced with more than 12 samples
o PCA and MDS labels are placed using geom_text_repel()
o tables in the HTML report are produced with kable() instead of xtable()

CHANGES IN VERSION 1.6.9
------------------------
o new vignette style
Expand All @@ -11,7 +18,7 @@ CHANGES IN VERSION 1.6.8

CHANGES IN VERSION 1.6.7
------------------------
o modified the installation guidelines for the Bioconductor packages
o modified the installation guidelines for the Bioconductor packages
o new parameters in the MAPlot() and volcanoPlot() functions thanks to Ken Field suggestion on GitHub

CHANGES IN VERSION 1.6.6
Expand All @@ -20,11 +27,11 @@ CHANGES IN VERSION 1.6.6

CHANGES IN VERSION 1.6.5
------------------------
o added the "stat" column of the DESeq2 results to the exported tables
o added the "stat" column of the DESeq2 results to the exported tables

CHANGES IN VERSION 1.6.4
------------------------
o fixed a bug when in loadCountData() if gene IDs contain only numeric values
o fixed a bug when in loadCountData() if gene IDs contain only numeric values

CHANGES IN VERSION 1.6.3
------------------------
Expand Down
21 changes: 19 additions & 2 deletions R/BCVPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,24 @@
#' @author Marie-Agnes Dillies and Hugo Varet

BCVPlot <- function(dge, outfile=TRUE){
if (outfile) png(filename="figures/BCV.png", width=1800, height=1800, res=300)
plotBCV(dge, las = 1, main = "BCV plot")
if (outfile) png(filename="figures/BCV.png", width=2100, height=1800, res=300)
A <- dge$AveLogCPM
if (is.null(A)) A <- aveLogCPM(dge$counts, offset = getOffset(dge))
disp <- getDispersion(dge)
if (is.null(disp)) stop("No dispersions to plot")
if (attr(disp, "type") == "common") disp <- rep(disp, length = length(A))
d <- data.frame(A=A,
sqrtdisp=sqrt(disp),
sqrttagwise=sqrt(dge$tagwise.dispersion),
sqrttrended=sqrt(dge$trended.dispersion),
sqrtcommon=sqrt(dge$common.dispersion))
print(ggplot() +
scale_color_manual("", values=c("black", "blue", "red"), labels=c("Tagwise", "Trend", "Common")) +
geom_point(data=d, mapping=aes(x=.data$A, y=.data$sqrttagwise, color="a"), size=0.5, alpha=0.5) +
geom_line(data=d, mapping=aes(x=.data$A, y=.data$sqrttrended, color="b")) +
geom_hline(data=d, aes(yintercept=.data$sqrtcommon, color="c")) +
xlab("Average log CPM") +
ylab("Biological coefficient of variation") +
ggtitle("BCV plot"))
if (outfile) dev.off()
}
52 changes: 29 additions & 23 deletions R/MAPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,36 @@
#' @author Marie-Agnes Dillies and Hugo Varet

MAPlot <- function(complete, alpha=0.05, outfile=TRUE, log2FClim=NULL){
ncol <- ifelse(length(complete)<=4, ceiling(sqrt(length(complete))), 3)
ncol <- min(2, length(complete))
nrow <- ceiling(length(complete)/ncol)
if (outfile) png(filename="figures/MAPlot.png", width=cairoSizeWrapper(1800*ncol), height=cairoSizeWrapper(1800*nrow), res=300)
par(mfrow=c(nrow,ncol))
for (name in names(complete)){
complete.name <- complete[[name]]
complete.name <- complete.name[complete.name$baseMean>0,]
complete.name$padj <- ifelse(is.na(complete.name$padj),1,complete.name$padj)
log2FC <- complete.name$log2FoldChange
if (is.null(log2FClim)){
ylim <- 1.1 * c(-1,1) * quantile(abs(log2FC[is.finite(log2FC)]), probs=0.99)
} else{
ylim <- log2FClim
}
plot(complete.name$baseMean, pmax(ylim[1], pmin(ylim[2], log2FC)),
log = "x", cex=0.45, las = 1, ylim = ylim,
col = ifelse(complete.name[,"padj"] < alpha, "red", "black"),
pch = ifelse(log2FC<ylim[1], 6, ifelse(log2FC>ylim[2], 2, 20)),
xlab = "Mean of normalized counts",
ylab = expression(log[2]~fold~change),
main = paste0("MA-plot - ",gsub("_"," ",name)))
abline(h=0, lwd=1, col="lightgray")
}
p <- list()
for (name in names(complete)){
complete.name <- complete[[name]]
complete.name <- complete.name[which(complete.name$baseMean>0),]
complete.name$padj <- ifelse(is.na(complete.name$padj), 1, complete.name$padj)
complete.name$DE <- factor(ifelse(complete.name$padj <= alpha, "yes", "no"), levels=c("no", "yes"))
py <- complete.name$log2FoldChange
if (is.null(log2FClim)) ymax <- quantile(abs(py[is.finite(py)]), probs=0.99) else ymax <- log2FClim
complete.name$log2FoldChange[which(py > ymax)] <- ymax
complete.name$log2FoldChange[which(py < -ymax)] <- -ymax
complete.name$outfield <- factor(ifelse(py > ymax, "top", ifelse(py < -ymax, "bottom", "in")),
levels=c("bottom", "in", "top"))
p[[name]] <- ggplot(data=complete.name,
aes(x=.data$baseMean, y=.data$log2FoldChange, color=.data$DE, fill=.data$DE, shape=.data$outfield)) +
scale_x_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x))) +
geom_point(show.legend=FALSE, alpha=0.5, size=0.8) +
scale_colour_manual(values=c("no"="black", "yes"="red"), drop=FALSE) +
scale_shape_manual(values=c("bottom"=25, "in"=21, "top"=24), drop=FALSE) +
scale_fill_manual(values=c("no"="black", "yes"="red"), drop=FALSE) +
scale_y_continuous(expand=expand_scale(mult=c(0.03, 0.03))) +
xlab("Mean of normalized counts") +
ylab(expression(log[2]~fold~change)) +
ggtitle(paste0("MA-plot - ", gsub("_"," ",name)))
}
tmpfun <- function(...) grid.arrange(..., nrow=nrow, ncol=ncol)
do.call(tmpfun, p)
if (outfile) dev.off()
}


21 changes: 13 additions & 8 deletions R/MDSPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,21 @@
#' @return A file named MDS.png in the figures directory
#' @author Marie-Agnes Dillies and Hugo Varet

MDSPlot <- function(dge, group, n=min(500,nrow(dge$counts)), gene.selection=c("pairwise", "common"),
MDSPlot <- function(dge, group, n=min(500, nrow(dge$counts)), gene.selection=c("pairwise", "common"),
col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
if (outfile) png(filename="figures/MDS.png", width=1800, height=1800, res=300)
coord <- plotMDS(dge, top=n, method="logFC", gene.selection=gene.selection[1], plot=FALSE)
abs=range(coord$x); abs=abs(abs[2]-abs[1])/25;
ord=range(coord$y); ord=abs(ord[2]-ord[1])/25;
plot(coord$x,coord$y, col=col[as.integer(group)], las=1, main="Multi-Dimensional Scaling plot",
xlab="Leading logFC dimension 1", ylab="Leading logFC dimension 2", cex=2, pch=16)
abline(h=0,v=0,lty=2,col="lightgray")
text(coord$x - ifelse(coord$x>0,abs,-abs), coord$y - ifelse(coord$y>0,ord,-ord),
colnames(dge$counts), col=col[as.integer(group)])
coord <- as.data.frame(coord)
d <- data.frame(coord[,c("x", "y")],
group=group,
sample=factor(row.names(coord), levels=row.names(coord)))
print(ggplot(data=d, aes(x=.data$x, y=.data$y, color=group, label=sample)) +
geom_point(show.legend=TRUE, size=3) +
labs(color="") +
scale_colour_manual(values=col) +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab("Leading logFC dimension 1") +
ylab("Leading logFC dimension 2") +
ggtitle("Multi-Dimensional Scaling plot"))
if (outfile) dev.off()
}
23 changes: 16 additions & 7 deletions R/NAMESPACE.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
#' @exportPattern ^[a-zA-Z]
#' @import DESeq2
#' @import edgeR
#' @import xtable
#' @importFrom grDevices col2rgb dev.off png
#' @import ggplot2
#' @import kableExtra
#' @importFrom genefilter shorth
#' @importFrom GGally ggpairs ggally_points wrap
#' @importFrom ggdendro ggdendrogram
#' @importFrom ggrepel geom_text_repel
#' @importFrom graphics abline barplot boxplot curve hist legend lines pairs par plot points text
#' @importFrom stats density dist dnorm formula hclust lm model.matrix p.adjust.methods prcomp quantile relevel sd var
#' @importFrom utils combn head installed.packages packageVersion read.table tail write.table
#' @importFrom grid textGrob gpar
#' @importFrom gridExtra grid.arrange
#' @importFrom grDevices col2rgb dev.off png
#' @importFrom limma plotMDS
#' @importFrom RGraphics splitTextGrob
#' @importFrom rlang .data
#' @importFrom rmarkdown render
#' @importFrom SummarizedExperiment assay colData
#' @importFrom S4Vectors mcols metadata
#' @importFrom limma plotMDS
#' @importFrom genefilter shorth
#' @importFrom scales trans_breaks log10_trans trans_new trans_format log_breaks math_format
#' @importFrom SummarizedExperiment assay colData
#' @importFrom stats coefficients density dist dnorm formula hclust lm model.matrix p.adjust.methods prcomp quantile relevel sd var
#' @importFrom utils stack combn head installed.packages packageVersion read.table tail write.table
NULL
47 changes: 21 additions & 26 deletions R/PCAPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @return A file named PCA.png in the figures directory with a pairwise plot of the three first principal components
#' @author Marie-Agnes Dillies and Hugo Varet

PCAPlot <- function(counts.trans, group, n=min(500,nrow(counts.trans)),
PCAPlot <- function(counts.trans, group, n=min(500, nrow(counts.trans)),
col=c("lightblue","orange","MediumVioletRed","SpringGreen"),
outfile=TRUE){
# PCA on the 500 most variables features
Expand All @@ -20,31 +20,26 @@ PCAPlot <- function(counts.trans, group, n=min(500,nrow(counts.trans)),
prp <- round(prp[1:3],2)

# create figure
if (outfile) png(filename="figures/PCA.png",width=cairoSizeWrapper(1800*2),height=cairoSizeWrapper(1800),res=300)
par(mfrow=c(1,2))
# axes 1 et 2
abs=range(pca$x[,1]); abs=abs(abs[2]-abs[1])/25;
ord=range(pca$x[,2]); ord=abs(ord[2]-ord[1])/25;
plot(pca$x[,1], pca$x[,2],
las = 1, cex = 2, pch = 16, col = col[as.integer(group)],
xlab = paste0("PC1 (",prp[1],"%)"),
ylab = paste0("PC2 (",prp[2],"%)"),
main = "Principal Component Analysis - Axes 1 and 2")
abline(h=0,v=0,lty=2,col="lightgray")
text(pca$x[,1] - ifelse(pca$x[,1]>0,abs,-abs), pca$x[,2] - ifelse(pca$x[,2]>0,ord,-ord),
colnames(counts.trans), col=col[as.integer(group)])

# axes 1 et 3
abs=range(pca$x[,1]); abs=abs(abs[2]-abs[1])/25;
ord=range(pca$x[,3]); ord=abs(ord[2]-ord[1])/25;
plot(pca$x[,1], pca$x[,3],
las = 1, cex = 2, pch = 16, col = col[as.integer(group)],
xlab = paste0("PC1 (",prp[1],"%)"),
ylab = paste0("PC3 (",prp[3],"%)"),
main = "Principal Component Analysis - Axes 1 and 3")
abline(h=0,v=0,lty=2,col="lightgray")
text(pca$x[,1] - ifelse(pca$x[,1]>0,abs,-abs), pca$x[,3] - ifelse(pca$x[,3]>0,ord,-ord),
colnames(counts.trans), col=col[as.integer(group)])
if (outfile) png(filename="figures/PCA.png", width=cairoSizeWrapper(1900*2), height=cairoSizeWrapper(1800), res=300)

tmpFunction <- function(axes=c(1, 2)){
index1 <- axes[1]
index2 <- axes[2]
d <- data.frame(x=pca$x[,index1], y=pca$x[,index2],
group=group, sample=factor(rownames(pca$x), levels=rownames(pca$x)))
ggplot(data=d, aes(x=.data$x, y=.data$y, color=group, label=sample)) +
geom_point(show.legend=TRUE, size=3) +
labs(color="") +
scale_colour_manual(values=col) +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab(paste0("PC", index1, " (",prp[index1],"%)")) +
ylab(paste0("PC", index2, " (",prp[index2],"%)"))
}
p1 <- tmpFunction(c(1, 2))
p2 <- tmpFunction(c(1, 3))
grid.arrange(p1, p2, nrow=1, ncol=2,
top=textGrob("Principal Component Analysis", x=0.01, hjust=0, gp=gpar(fontsize=15)))

if (outfile) dev.off()

return(invisible(pca$x))
Expand Down
20 changes: 12 additions & 8 deletions R/barplotNull.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@
#' @author Marie-Agnes Dillies and Hugo Varet

barplotNull <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
if (outfile) png(filename="figures/barplotNull.png",width=min(3600,1800+800*ncol(counts)/10),height=1800,res=300)
if (outfile) png(filename="figures/barplotNull.png", width=min(3600, 1800+800*ncol(counts)/10), height=1800, res=300)
percentage <- apply(counts, 2, function(x){sum(x == 0)})*100/nrow(counts)
percentage.allNull <- (nrow(counts) - nrow(removeNull(counts)))*100/nrow(counts)
barplot(percentage, las = 2,
col = col[as.integer(group)],
ylab = "Percentage of null counts",
main = "Percentage of null counts per sample",
ylim = c(0,1.2*ifelse(max(percentage)==0,1,max(percentage))))
abline(h = percentage.allNull, lty = 2, lwd = 2)
legend("topright", levels(group), fill=col[1:nlevels(group)], bty="n")
d <- data.frame(percentage=percentage, sample=factor(names(percentage), levels=names(percentage)), group)
print(ggplot(d, aes(x=.data$sample, y=.data$percentage, fill=.data$group)) +
geom_bar(stat="identity", show.legend=TRUE) +
labs(fill="") +
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Percentage of null counts") +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))) +
ggtitle("Percentage of null counts per sample") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
geom_hline(yintercept=percentage.allNull, linetype="dashed", color="black", size=1))
if (outfile) dev.off()
}
20 changes: 11 additions & 9 deletions R/barplotTotal.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,16 @@
#' @author Marie-Agnes Dillies and Hugo Varet

barplotTotal <- function(counts, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), outfile=TRUE){
if (outfile) png(filename="figures/barplotTotal.png",width=min(3600,1800+800*ncol(counts)/10),height=1800,res=300)
libsize <- colSums(counts)/1e6
barplot(libsize,
main = "Total read count per sample (million)",
ylab = "Total read count (million)",
ylim = c(0, max(libsize)*1.2),
col = col[as.integer(group)],
las = 2)
legend("topright", levels(group), fill=col[1:nlevels(group)], bty="n")
if (outfile) png(filename="figures/barplotTotal.png", width=min(3600, 1800+800*ncol(counts)/10), height=1800, res=300)
d <- data.frame(tc=colSums(counts)/1e6, sample=factor(colnames(counts), colnames(counts)), group)
print(ggplot(d, aes(x=.data$sample, y=.data$tc, fill=.data$group)) +
geom_bar(stat="identity", show.legend=TRUE) +
labs(fill="") +
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Total read count (million)") +
scale_y_continuous(expand=expand_scale(mult=c(0.01, 0.05))) +
ggtitle("Total read count per sample (million)") +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)))
if (outfile) dev.off()
}
Loading

0 comments on commit 944ced8

Please sign in to comment.