From bd1fe130913ef45f2766fbf17edf5e89a2552c8a Mon Sep 17 00:00:00 2001 From: mariecrane Date: Thu, 23 Jul 2020 15:10:02 -0500 Subject: [PATCH 1/2] Add stacked bar plot to viz module Updated viz.R, run_viz.py, and cli.py to include a stacked bar plot option --- .DS_Store | Bin 0 -> 6148 bytes dev_environment.yml | 2 + metaquantome/.DS_Store | Bin 0 -> 6148 bytes metaquantome/cli.py | 10 ++-- metaquantome/modules/run_viz.py | 11 +++-- metaquantome/modules/viz.R | 81 ++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 .DS_Store create mode 100644 metaquantome/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..dd0c4dfea3ef781e2ec06b440c82e9568a0a3b47 GIT binary patch literal 6148 zcmeHK%Wl&^6upzC)aD@~fz&R&L1Ix=ZPZAu5RyrgMWlidMz8>s+SsC&Tu+E6jY0_W zhL30MY z=?hjfEwYoTF%rt@lu{ZJUtrq?tAJHt*%Xl9?j4FL!FSt7`TP6L=|t59ZP5gA*30=kwV5zve~b zVqmWBTQtbhLTqoJv)W31_1c=_x{iCxeG?q>aUeo5D%#=jm8?At#c^cf=bUH1;xKxV zCgXnN?g7t5nC8h)C8u!$%IlYD9`kXVkMg)s;{>`xP1J8}OsBi8y-jbo)tYU3)BU|B z;%0L;bDi~jI}dwDCzB7;kF!r-q;6O{v3M%GZSW_2MR2K5r@=7Kc>WF?neWmwNdJgB z^cc7WDdUQipTYX`fjLy*0Sm}K0DnM}3jd|L%<&BR21||Tfiay5)TzuIF_=zA zyJvX5!BV46CuR;G%*@Qp35BWIF}|nh#C(mmvvyO*E;=aX#BDqt1( zuM}YQPS9!NmdxI|aC366_25TvX_8lJR4FLTcB}%q6>q?mq0f^6&^K6WL<`LR2q+nB KVHLPi1%3d8`lvzx literal 0 HcmV?d00001 diff --git a/dev_environment.yml b/dev_environment.yml index 82a5bd1..1031776 100644 --- a/dev_environment.yml +++ b/dev_environment.yml @@ -16,5 +16,7 @@ dependencies: - r-ggplot2==3.0.* - r-base==3.5.1 - r-scico=1.1.0 +- r-dplyr=0.8.0.1 +- r-tidyr=0.8.3 - r-jsonlite=1.5.* - libiconv 1.15.* diff --git a/metaquantome/.DS_Store b/metaquantome/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6db9df334153aaff198372f41b14ac91cdf72ada GIT binary patch literal 6148 zcmeHK!EVz)5S>j;W4Efvp;CMF1&KpdwNWD#p-NULhu)Bk-~d%>*HLT9^+vJNsDvP2 z_z3s|K7|uUK7L`<^aVBFh)bj8Y{`e;w zBBC~(QbrLKxQ0~J*k5YwWoDqM5#w^5-T1J~L zNFr;3nK#(2=Ib{)3A)nNzsqSZ&d9? zRG!Bseko=4JC5R4>2NyetUr@^8KwDf%*kmo1m*4PG*9HTFDH3Y@HnCF;2RA(o3q(o zZ-2|*>-FYa{_M$q7jd^cpL_1cquqm}lZ&f&v-k54pYTX9f#sR)p}}kTjG!jUWjM|= znV+FYWjPS5#EX$F{ur%q=)3tkpla=YDPUd-clCZv+s@W1U=_Fn1=RZB!xaVwD~-H# zpx`3_&_OdBV)bW1<@g2zgOx_~z=Tc(>QrHl7(%C`-8a0zV5L!~lQ4%5VeTx<2}SVT zF}}}q5`jirS_P~EiwZ31YDew=&;NY>Ure$!tAJJDzfwRrhv8u#k7V}N#>1(-)`y?K ql__4OkyB8Z?U)0#6>q}X(C4cF7#OTHq6KFE2(S#cunOF%0>1zqf~q?J literal 0 HcmV?d00001 diff --git a/metaquantome/cli.py b/metaquantome/cli.py index ede63a0..4ed4afa 100755 --- a/metaquantome/cli.py +++ b/metaquantome/cli.py @@ -216,7 +216,7 @@ def parse_args_cli(): help='Perform paired tests.') # ---- METAQUANTOME VIZ ---- # - parser_viz.add_argument('--plottype', '-p', required=True, choices=['bar', 'volcano', 'heatmap', 'pca', 'ft_dist'], + parser_viz.add_argument('--plottype', '-p', required=True, choices=['bar', 'volcano', 'heatmap', 'pca', 'ft_dist', 'stacked_bar'], help="Select the type of plot to generate.") parser_viz.add_argument('--img', required=True, help='Path to the PNG image file (must end in ".png").') @@ -231,19 +231,19 @@ def parse_args_cli(): parser_viz.add_argument('--tabfile', default=None, help="Optional. File to write plot table to.") - bar = parser_viz.add_argument_group('Arguments for barplots - both total taxonomy peptide intensity ("bar") and ' + - 'function-taxonomy interaction distributions ("ft_dist")') + bar = parser_viz.add_argument_group('Arguments for barplots - including total taxonomy peptide intensity ("bar"), function-taxonomy ' + + 'interaction distributions ("ft_dist"), and stacked taxonomy bar plots ("stacked_bar")') bar.add_argument('--meancol', help="(Tax bar and FT dist). Mean intensity column name for desired experimental conditio.") bar.add_argument('--nterms', default='5', - help="(Tax bar and FT dist). Number of taxa or functional terms to display. The default is 5.") + help="(Tax bar, FT dist, and stacked bar). Number of taxa or functional terms to display. The default is 5.") bar.add_argument('--barcol', type=check_col_range, default="6", help="(Tax bar and FT dist). Color for the bar fill. The color vector in R is " + 'c("dodgerblue", "darkorange", "yellow2", "red2", "darkviolet", "black"), ' + ' so providing a 1 will give the "dodgerblue" color. These same colors are also used in the ' + ' heatmap and PCA plot, so the colors can be tweaked to match. ') bar.add_argument('--target_rank', - help="(Tax bar and FT dist). Taxonomic rank to restrict to in the plot. ") + help="(Tax bar, FT dist, and stacked bar). Taxonomic rank to restrict to in the plot. ") bar.add_argument('--target_onto', choices=["mf", "bp", "cc"], help="(Function and FT dist bar only) " + "Ontology to restrict to, for function distribution.") diff --git a/metaquantome/modules/run_viz.py b/metaquantome/modules/run_viz.py index f139a5b..f26ab15 100644 --- a/metaquantome/modules/run_viz.py +++ b/metaquantome/modules/run_viz.py @@ -7,7 +7,7 @@ def run_viz(plottype, img, infile, strip=None, - mode=None, meancol=None, nterms='5', target_rank=None, barcol=6, # barplot + mode=None, meancol=None, nterms='5', target_rank=None, barcol=6, # barplot, stacked_bar textannot=None, fc_name=None, flip_fc=False, gosplit=False, # volcano sinfo=None, filter_to_sig=False, alpha='0.05', # heatmap calculate_sep=False, # pca @@ -35,11 +35,16 @@ def run_viz(plottype, img, infile, strip=None, all_intcols_str = ','.join(samp_grps.all_intcols) json_dump = json.dumps(samp_grps.sample_names) cmd += [all_intcols_str, json_dump, calculate_sep, width, height, strip] - if plottype == "ft_dist": + elif plottype == "ft_dist": cmd += [whichway, name, id, meancol, nterms, width, height, target_rank, target_onto, barcol, tabfile] + if plottype == "stacked_bar": + samp_grps = SampleGroups(sinfo) + all_intcols_str = ','.join(samp_grps.all_intcols) + json_dump = json.dumps(samp_grps.sample_names) + cmd += [all_intcols_str, json_dump, nterms, target_rank, width, height, tabfile] else: - ValueError("Wrong plot type. Must be bar, volcano, heatmap, ft_dist, or pca.") + ValueError("Wrong plot type. Must be bar, volcano, heatmap, ft_dist, stacked_bar, or pca.") # ensure that all elements are strings (even booleans, etc) cmd_string = [str(elem) for elem in cmd] diff --git a/metaquantome/modules/viz.R b/metaquantome/modules/viz.R index 5596dcf..909bdcd 100644 --- a/metaquantome/modules/viz.R +++ b/metaquantome/modules/viz.R @@ -4,8 +4,11 @@ options(stringsAsFactors = FALSE, message=FALSE, warnings=FALSE) ####### ==================== ####### # LIBRARIES # ####### ==================== ####### +suppressMessages(library(dplyr)) +suppressMessages(library(tidyr)) suppressWarnings(suppressMessages(library(ggplot2))) suppressMessages(library(gplots)) +suppressMessages(library(RColorBrewer)) suppressMessages(library(jsonlite)) suppressMessages(library(stringr)) @@ -626,6 +629,84 @@ ft_dist_cli <- function(args){ int_barcol=barcol, tabfile=tabfile) } +####### ==================== ####### +# STACKED BAR # +####### ==================== ####### + +mq_stacked <- function(img, df, all_intcols, json_dump, nterms, target_rank, width, height, tabfile){ + # df is the dataframe after stat + # nterms is the number of taxa to show + + grp_list <- fromJSON(json_dump) + + grp_df <- grp_list %>% + as.data.frame() %>% + pivot_longer(cols = 1:ncol(.), names_to = "samplegroup", values_to = "sample") + + # parse out sample groups, exponentiate, calculate relative abundance + dat <- df %>% + pivot_longer(all_intcols, names_to = "sample", values_to = "abundance") %>% + full_join(grp_df) %>% + mutate(replicate = str_replace(string = sample, pattern = samplegroup, replacement = "")) %>% + replace_na(list(abundance = 0)) %>% + mutate(abundance = 2^abundance) %>% + select(sample, samplegroup, replicate, id, name, rank, abundance) %>% + filter(rank == target_rank) %>% + group_by(sample) %>% + mutate(abundance = 100*abundance/sum(abundance)) + + # reorder taxa levels for plotting + taxa_levels <- names(sort(tapply(dat$abundance, dat$name, sum))) + + # collapse less abundang terms into "Other" if terms exceed desired terms + if(length(unique(dat$name))>nterms){ + topn <- tail(taxa_levels, nterms) + dat<-dat %>% + mutate(name = factor(ifelse(name %in% topn, name, "Other"))) %>% + group_by(sample, samplegroup, replicate, name) %>% + summarise(abundance = sum(abundance)) %>% + group_by(sample) + taxa_levels <- names(sort(tapply(dat$abundance, dat$name, sum))) + # reorder taxa levels for plotting, to have "Other" on top + other_index <- as.numeric(which(taxa_levels == "Other")) + taxa_levels <- c("Other", taxa_levels[1:(other_index-1)], taxa_levels[(other_index+1):length(taxa_levels)]) + } + + # make stacked bar plot + fig <- dat %>% + ggplot(aes(x=replicate, y=abundance, fill=factor(name, levels = taxa_levels)))+ + geom_bar(position="stack", stat = "identity") + + facet_grid(cols = vars(samplegroup)) + + labs(x= "Sample", y="Relative Abundance")+ + scale_fill_brewer(name="Taxa", palette = "Set1") + + # write tabular file + write.table(x = dat, file = tabfile, quote = FALSE, row.names = FALSE) + + # save plot + ggsave(file=img, width=width, height=height, units="in", dpi=300) + +} + +stacked_cli <- function(args){ + + img <- args[2] + infile <- args[3] + df <- read_result(infile) + + # split all_intcols from SampleGroups(), for samp_columns vector + all_intcols <- unlist(strsplit(get_all_intcols(args[4]), split=",")) + + # other args + json_dump <- args[5] + nterms <- as.numeric(args[6]) + target_rank <- toString(args[7]) + width <- as.numeric(args[8]) + height <- as.numeric(args[9]) + tabfile <- args[10] + + mq_stacked(img, df, all_intcols, json_dump, nterms, target_rank, width, height, tabfile) +} ####### ==================== ####### # MAIN # From b80af6eba280634811860517718eec3653497994 Mon Sep 17 00:00:00 2001 From: mariecrane Date: Wed, 29 Jul 2020 10:05:45 -0500 Subject: [PATCH 2/2] Update setup.py Bumped version to 1.1.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d829d8f..04c6216 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -VERSION = '1.0.0' +VERSION = '1.1.0' URL = 'https://github.com/galaxyproteomics/metaquantome' AUTHOR = 'Caleb Easterly' AUTHOR_EMAIL = 'caleb.easterly@gmail.com'