Skip to content

Commit

Permalink
Updated projectionDriveR function and added volcano plotting function
Browse files Browse the repository at this point in the history
Updated projectionDriveR function with mode argument, user specifies generation of confidence intervals or pvalues

Updated plotting.R with volcano plotting function

Added tests for updated functions
  • Loading branch information
rpalaganas committed Jan 9, 2024
1 parent 5144edc commit a38a12c
Show file tree
Hide file tree
Showing 3 changed files with 420 additions and 188 deletions.
170 changes: 166 additions & 4 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@
# superheat(test$projection,row.dendrogram=TRUE, pretty.order.cols = TRUE,
# heat.pal.values = c(0, 0.5, 1),yt=colSums(test$projection),yt.plot.type='scatterline',yt.axis.name="Sum of\nProjections",X.text=tmp,X.text.size=8,bottom.label.text.angle = 90)
#




#######################################################################################################################################
#'
#' plotConfidenceIntervals
Expand Down Expand Up @@ -137,3 +133,169 @@ plotConfidenceIntervals <- function(
"weights_heatmap" = wt_heatmap))
}

#######################################################################################################################################
#' pdVolcano
#'
#' Generate volcano plot and gate genes based on fold change and pvalue, includes vectors that can be used with fast gene set enrichment (fgsea)
#' @param result result output from projectionDriveR function with PI method selected
#' @param FC fold change threshold, default at 0.2
#' @param pvalue significance threshold, default set to pvalue stored in projectionDriveR output
#' @param subset vector of gene names to subset the plot by
#' @param filter.inf remove genes that have pvalues below machine double minimum value
#' @param label.no Number of genes to label on either side of the volcano plot, default 5
#' @importFrom stats var
#' @importFrom ggrepel geom_label_repel
#' @importFrom ggrepel geom_text_repel
#' @import dplyr
#plot FC, weighted and unweighted. Designed for use with the output of projectionDriveRs
pdVolcano <- function(result,
FC = 0.2,
pvalue = NULL,
subset = NULL,
filter.inf = FALSE,
label.num = 5) {

#if a genelist is provided, use them to subset the output of projectiondrivers
if(!is.null(subset)){
#subset the mean_stats object by provided gene list
result$mean_stats <- result$mean_stats[(which(rownames(result$mean_stats) %in% subset)),]
#subset the weighted_mean_stats object by provided gene list
result$weighted_mean_stats <- result$weighted_mean_stats[(which(rownames(result$weighted_mean_stats) %in% subset)),]

}

if(filter.inf == TRUE){
#remove p values below the machine limit representation for plotting purposes
cat("Filtering", length(which(result$mean_stats$welch_padj <= .Machine$double.xmin)),"unweighted genes and",
length(which(result$weighted_mean_stats$welch_padj <= .Machine$double.xmin)), "weighted genes", "\n")
result$mean_stats <- subset(result$mean_stats, welch_padj > .Machine$double.xmin)
result$weighted_mean_stats <- subset(result$weighted_mean_stats, welch_padj > .Machine$double.xmin)
}

if(is.numeric(FC) == FALSE){
stop('FC must be a number')
}

if(is.null(pvalue) == FALSE) {
pvalue = pvalue
} else {
pvalue <- result$meta_data$pvalue
}

#extract object meta data
metadata <- result$meta_data

#volcano plotting unweighted
#extract unweighted confidence intervals / statistics
mean_stats <- result$mean_stats
#fold change / significance calls
mean_stats$Color <- paste("NS or FC", FC)
mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix)
mean_stats$Color[mean_stats$welch_padj < pvalue & mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix)
mean_stats$Color <- factor(mean_stats$Color,
levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix)))

#label the most significant genes for enrichment
mean_stats$invert_P <- (-log10(mean_stats$welch_padj)) * (mean_stats$mean_diff)

top_indices <- order(mean_stats$invert_P, decreasing = TRUE)[1:label.num]
bottom_indices <- order(mean_stats$invert_P)[1:label.num]

# Add labels to the dataframe
mean_stats$label <- NA
mean_stats$label[top_indices] <- paste(rownames(mean_stats)[top_indices])
mean_stats$label[bottom_indices] <- paste(rownames(mean_stats)[bottom_indices])

#set custom colors
myColors <- c("gray","red","dodgerblue")
names(myColors) <- levels(mean_stats$Color)
custom_colors <- scale_color_manual(values = myColors, drop = FALSE)

#plot
unweightedvolcano = ggplot(data = mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) +
geom_vline(xintercept = c(FC, -FC), lty = "dashed") +
geom_hline(yintercept = -log10(pvalue), lty = "dashed") +
geom_point(na.rm = TRUE) +
custom_colors +
coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) +
ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black",
min.segment.length = .1, box.padding = 0.15,
max.overlaps = Inf, na.rm = TRUE) +
labs(x = "FC",
y = "Significance (-log10pval)",
color = NULL) +
ggtitle("Differential Expression") +
theme_bw() +
theme(plot.title = element_text(size = 16),
legend.position = "bottom",
axis.title=element_text(size=14),
legend.text = element_text(size=12))

#weighted volcano plot
weighted_mean_stats <- result$weighted_mean_stats
weighted_mean_stats$Color <- paste("NS or FC <", FC)
weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff > FC] <- paste("Enriched in", metadata$test_matrix)
weighted_mean_stats$Color[weighted_mean_stats$welch_padj < pvalue & weighted_mean_stats$mean_diff < -FC] <- paste("Enriched in", metadata$reference_matrix)
weighted_mean_stats$Color <- factor(weighted_mean_stats$Color,
levels = c(paste("NS or FC <", FC), paste("Enriched in", metadata$reference_matrix), paste("Enriched in", metadata$test_matrix)))

weighted_mean_stats$invert_P <- (-log10(weighted_mean_stats$welch_padj)) * (weighted_mean_stats$mean_diff)


top_indices <- order(weighted_mean_stats$invert_P, decreasing = TRUE)[1:label.num]
bottom_indices <- order(weighted_mean_stats$invert_P)[1:label.num]

# Add labels to the dataframe
weighted_mean_stats$label <- NA
weighted_mean_stats$label[top_indices] <- paste(rownames(weighted_mean_stats)[top_indices])
weighted_mean_stats$label[bottom_indices] <- paste(rownames(weighted_mean_stats)[bottom_indices])

myColors <- c("gray","red","dodgerblue")
names(myColors) <- levels(weighted_mean_stats$Color)
custom_colors <- scale_color_manual(values = myColors, drop = FALSE)

weightedvolcano = ggplot(data = weighted_mean_stats, aes(x = mean_diff, y = -log10(welch_padj), color = Color, label = label)) +
geom_vline(xintercept = c(FC, -FC), lty = "dashed") +
geom_hline(yintercept = -log10(pvalue), lty = "dashed") +
geom_point(na.rm = TRUE) +
custom_colors +
coord_cartesian(ylim = c(0, 350), xlim = c(-2, 2)) +
ggrepel::geom_text_repel(size = 3, point.padding = 1, color = "black",
min.segment.length = .1, box.padding = 0.15,
max.overlaps = Inf, na.rm = TRUE) +
labs(x = "FC",
y = "Significance (-log10pval)",
color = NULL) +
ggtitle("Weighted Differential Expression") +
theme_bw() +
theme(plot.title = element_text(size = 16),
legend.position = "bottom",
axis.title=element_text(size=14),
legend.text = element_text(size=12))

plt <- ggpubr::ggarrange(unweightedvolcano, weightedvolcano, common.legend = TRUE, legend = "bottom")
print(plt)

#return a list of genes that can be used as input to fgsea
difexdf <- subset(mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix))
vec <- difexdf$estimate
names(vec) <- rownames(difexdf)

weighted_difexdf <- subset(weighted_mean_stats, Color == paste("Enriched in", metadata$reference_matrix) | Color == paste("Enriched in", metadata$test_matrix))
weighted_vec <- weighted_difexdf$estimate
names(weighted_vec) <- rownames(weighted_difexdf)
names(vec) <- rownames(difexdf)
vol_result <- list(mean_stats = mean_stats,
weighted_mean_stats = weighted_mean_stats,
sig_genes = result$sig_genes,
difexpgenes = difexdf,
weighted_difexpgenes = weighted_difexdf,
fgseavecs = list(unweightedvec = vec,
weightedvec = weighted_vec),
meta_data = metadata,
plt = plt)
return(vol_result)
}



Loading

0 comments on commit a38a12c

Please sign in to comment.