diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..dd0c4df Binary files /dev/null and b/.DS_Store differ diff --git a/dev_environment.yml b/dev_environment.yml index 82a5bd1..1031776 100644 --- a/dev_environment.yml +++ b/dev_environment.yml @@ -16,5 +16,7 @@ dependencies: - r-ggplot2==3.0.* - r-base==3.5.1 - r-scico=1.1.0 +- r-dplyr=0.8.0.1 +- r-tidyr=0.8.3 - r-jsonlite=1.5.* - libiconv 1.15.* diff --git a/metaquantome/.DS_Store b/metaquantome/.DS_Store new file mode 100644 index 0000000..6db9df3 Binary files /dev/null and b/metaquantome/.DS_Store differ diff --git a/metaquantome/cli.py b/metaquantome/cli.py index ede63a0..4ed4afa 100755 --- a/metaquantome/cli.py +++ b/metaquantome/cli.py @@ -216,7 +216,7 @@ def parse_args_cli(): help='Perform paired tests.') # ---- METAQUANTOME VIZ ---- # - parser_viz.add_argument('--plottype', '-p', required=True, choices=['bar', 'volcano', 'heatmap', 'pca', 'ft_dist'], + parser_viz.add_argument('--plottype', '-p', required=True, choices=['bar', 'volcano', 'heatmap', 'pca', 'ft_dist', 'stacked_bar'], help="Select the type of plot to generate.") parser_viz.add_argument('--img', required=True, help='Path to the PNG image file (must end in ".png").') @@ -231,19 +231,19 @@ def parse_args_cli(): parser_viz.add_argument('--tabfile', default=None, help="Optional. File to write plot table to.") - bar = parser_viz.add_argument_group('Arguments for barplots - both total taxonomy peptide intensity ("bar") and ' + - 'function-taxonomy interaction distributions ("ft_dist")') + bar = parser_viz.add_argument_group('Arguments for barplots - including total taxonomy peptide intensity ("bar"), function-taxonomy ' + + 'interaction distributions ("ft_dist"), and stacked taxonomy bar plots ("stacked_bar")') bar.add_argument('--meancol', help="(Tax bar and FT dist). Mean intensity column name for desired experimental conditio.") bar.add_argument('--nterms', default='5', - help="(Tax bar and FT dist). Number of taxa or functional terms to display. The default is 5.") + help="(Tax bar, FT dist, and stacked bar). Number of taxa or functional terms to display. The default is 5.") bar.add_argument('--barcol', type=check_col_range, default="6", help="(Tax bar and FT dist). Color for the bar fill. The color vector in R is " + 'c("dodgerblue", "darkorange", "yellow2", "red2", "darkviolet", "black"), ' + ' so providing a 1 will give the "dodgerblue" color. These same colors are also used in the ' + ' heatmap and PCA plot, so the colors can be tweaked to match. ') bar.add_argument('--target_rank', - help="(Tax bar and FT dist). Taxonomic rank to restrict to in the plot. ") + help="(Tax bar, FT dist, and stacked bar). Taxonomic rank to restrict to in the plot. ") bar.add_argument('--target_onto', choices=["mf", "bp", "cc"], help="(Function and FT dist bar only) " + "Ontology to restrict to, for function distribution.") diff --git a/metaquantome/modules/run_viz.py b/metaquantome/modules/run_viz.py index f139a5b..f26ab15 100644 --- a/metaquantome/modules/run_viz.py +++ b/metaquantome/modules/run_viz.py @@ -7,7 +7,7 @@ def run_viz(plottype, img, infile, strip=None, - mode=None, meancol=None, nterms='5', target_rank=None, barcol=6, # barplot + mode=None, meancol=None, nterms='5', target_rank=None, barcol=6, # barplot, stacked_bar textannot=None, fc_name=None, flip_fc=False, gosplit=False, # volcano sinfo=None, filter_to_sig=False, alpha='0.05', # heatmap calculate_sep=False, # pca @@ -35,11 +35,16 @@ def run_viz(plottype, img, infile, strip=None, all_intcols_str = ','.join(samp_grps.all_intcols) json_dump = json.dumps(samp_grps.sample_names) cmd += [all_intcols_str, json_dump, calculate_sep, width, height, strip] - if plottype == "ft_dist": + elif plottype == "ft_dist": cmd += [whichway, name, id, meancol, nterms, width, height, target_rank, target_onto, barcol, tabfile] + if plottype == "stacked_bar": + samp_grps = SampleGroups(sinfo) + all_intcols_str = ','.join(samp_grps.all_intcols) + json_dump = json.dumps(samp_grps.sample_names) + cmd += [all_intcols_str, json_dump, nterms, target_rank, width, height, tabfile] else: - ValueError("Wrong plot type. Must be bar, volcano, heatmap, ft_dist, or pca.") + ValueError("Wrong plot type. Must be bar, volcano, heatmap, ft_dist, stacked_bar, or pca.") # ensure that all elements are strings (even booleans, etc) cmd_string = [str(elem) for elem in cmd] diff --git a/metaquantome/modules/viz.R b/metaquantome/modules/viz.R index 5596dcf..909bdcd 100644 --- a/metaquantome/modules/viz.R +++ b/metaquantome/modules/viz.R @@ -4,8 +4,11 @@ options(stringsAsFactors = FALSE, message=FALSE, warnings=FALSE) ####### ==================== ####### # LIBRARIES # ####### ==================== ####### +suppressMessages(library(dplyr)) +suppressMessages(library(tidyr)) suppressWarnings(suppressMessages(library(ggplot2))) suppressMessages(library(gplots)) +suppressMessages(library(RColorBrewer)) suppressMessages(library(jsonlite)) suppressMessages(library(stringr)) @@ -626,6 +629,84 @@ ft_dist_cli <- function(args){ int_barcol=barcol, tabfile=tabfile) } +####### ==================== ####### +# STACKED BAR # +####### ==================== ####### + +mq_stacked <- function(img, df, all_intcols, json_dump, nterms, target_rank, width, height, tabfile){ + # df is the dataframe after stat + # nterms is the number of taxa to show + + grp_list <- fromJSON(json_dump) + + grp_df <- grp_list %>% + as.data.frame() %>% + pivot_longer(cols = 1:ncol(.), names_to = "samplegroup", values_to = "sample") + + # parse out sample groups, exponentiate, calculate relative abundance + dat <- df %>% + pivot_longer(all_intcols, names_to = "sample", values_to = "abundance") %>% + full_join(grp_df) %>% + mutate(replicate = str_replace(string = sample, pattern = samplegroup, replacement = "")) %>% + replace_na(list(abundance = 0)) %>% + mutate(abundance = 2^abundance) %>% + select(sample, samplegroup, replicate, id, name, rank, abundance) %>% + filter(rank == target_rank) %>% + group_by(sample) %>% + mutate(abundance = 100*abundance/sum(abundance)) + + # reorder taxa levels for plotting + taxa_levels <- names(sort(tapply(dat$abundance, dat$name, sum))) + + # collapse less abundang terms into "Other" if terms exceed desired terms + if(length(unique(dat$name))>nterms){ + topn <- tail(taxa_levels, nterms) + dat<-dat %>% + mutate(name = factor(ifelse(name %in% topn, name, "Other"))) %>% + group_by(sample, samplegroup, replicate, name) %>% + summarise(abundance = sum(abundance)) %>% + group_by(sample) + taxa_levels <- names(sort(tapply(dat$abundance, dat$name, sum))) + # reorder taxa levels for plotting, to have "Other" on top + other_index <- as.numeric(which(taxa_levels == "Other")) + taxa_levels <- c("Other", taxa_levels[1:(other_index-1)], taxa_levels[(other_index+1):length(taxa_levels)]) + } + + # make stacked bar plot + fig <- dat %>% + ggplot(aes(x=replicate, y=abundance, fill=factor(name, levels = taxa_levels)))+ + geom_bar(position="stack", stat = "identity") + + facet_grid(cols = vars(samplegroup)) + + labs(x= "Sample", y="Relative Abundance")+ + scale_fill_brewer(name="Taxa", palette = "Set1") + + # write tabular file + write.table(x = dat, file = tabfile, quote = FALSE, row.names = FALSE) + + # save plot + ggsave(file=img, width=width, height=height, units="in", dpi=300) + +} + +stacked_cli <- function(args){ + + img <- args[2] + infile <- args[3] + df <- read_result(infile) + + # split all_intcols from SampleGroups(), for samp_columns vector + all_intcols <- unlist(strsplit(get_all_intcols(args[4]), split=",")) + + # other args + json_dump <- args[5] + nterms <- as.numeric(args[6]) + target_rank <- toString(args[7]) + width <- as.numeric(args[8]) + height <- as.numeric(args[9]) + tabfile <- args[10] + + mq_stacked(img, df, all_intcols, json_dump, nterms, target_rank, width, height, tabfile) +} ####### ==================== ####### # MAIN #