references.bib

% Encoding: UTF-8


@Article{angel_total_2012,
  Title                    = {{Total Nucleic Acid Extraction from Soil}},
  Author                   = {Angel, Roey},
  Pages                    = {\{DOI: 10.1038/protex.2011.204.\}},

  Abstract                 = {The following protocol is intended for the simultaneous extraction of \{{DNA}\} and \{{RNA}\} from various soil samples along with suggestions on how to tweak the protocol for soil with higher humic content. The protocol has been used by many and results in very high yields of nucleic acids, typically much more than commercial kits. For buffers and solutions used in this protocol, please see accompanying document Buffers and Solutions for \{{TNA}\} Extractions.pdf.},
  Date                     = {2012-10-23},
  ISSN                     = {2043-0116},
  Journaltitle             = {Protocol Exchange},
  Keywords                 = {Humic acids; {mRNA}; {RNA}; Soil},
  Url                      = {http://dx.doi.org/10.1038/protex.2012.046}
}

@Article{chen_gmpr:_2017,
  Title                    = {{{GMPR}: A novel normalization method for microbiome sequencing data}},
  Author                   = {Chen, Jun and Chen, Li},
  Pages                    = {112565},

  Abstract                 = {Normalization is the first and a critical step in microbiome sequencing (microbiome-Seq) data analysis to account for variable library sizes. Though {RNA}-Seq based normalization methods have been adapted for microbiome-Seq data, they fail to consider the unique characteristics of microbiome-Seq data, which contain a vast number of zeros due to the physical absence or undersampling of the microbes. Normalization methods that specifically address the zeroinflation remain largely undeveloped. Here we propose {GMPR} - a simple but effective normalization method - for zeroinflated sequencing data such as microbiome-Seq data. Simulation studies and analyses of 38 real gut microbiome datasets from 16S {rRNA} gene amplicon sequencing demonstrated the superior performance of the proposed method.},
  Date                     = {2017-02-28},
  Doi                      = {10.1101/112565},
  File                     = {Chen\_Chen\_2017\_.pdf:/home/angel/Work/Literature/PDFs/Chen\_Chen\_2017\_.pdf:application/pdf;Chen\_Chen\_2017\_supp.pdf:/home/angel/Work/Literature/PDFs/Chen\_Chen\_2017\_supp.pdf:application/pdf},
  Journaltitle             = {{bioRxiv}},
  Keywords                 = {{HT} seq; Methods; Bioinfo; Diversity; Stat/Model},
  Langid                   = {english},
  Rights                   = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-{NonCommercial} 4.0 International), {CC} {BY}-{NC} 4.0, as described at http://creativecommons.org/licenses/by-nc/4.0/},
  Shorttitle               = {{GMPR}},
  Url                      = {http://biorxiv.org/content/early/2017/02/28/112565},
  Urldate                  = {2017-03-06}
}

@Article{davis_simple_2017,
  Title                    = {{Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data}},
  Author                   = {Davis, Nicole M. and Proctor, Diana and Holmes, Susan P. and Relman, David A. and Callahan, Benjamin J.},
  Pages                    = {221499},

  Abstract                 = {The accuracy of microbial community surveys based on marker-gene and metagenomic sequencing ({MGS}) suffers from the presence of contaminants - {DNA} sequences not truly present in the sample. Contaminants come from a variety of sources, including reagents. Appropriate laboratory practices can reduce contamination in {MGS} data, but do not eliminate it. Here we introduce decontam (https://github.com/benjjneb/decontam), an open-source R package which implements a statistical classification procedure for identifying contaminants in {MGS} data. Contaminants are identified on the basis of two widely reproduced signatures: contaminants are more frequent in low-concentration samples, and are often found in negative controls. In a dataset from the human oral microbiome, the classification of amplicon sequence variants by decontam was strongly consistent with prior microscopic observations of microbial taxa in that environment. In both metagenomics and marker-gene measurements of a mock community dilution series, the removal of contaminants identified by decontam substantially reduced technical variation due to differences in reagents and sequencing centers. The application of decontam to two recently published datasets corroborated and extended their conclusions that little evidence existed for an indigenous placenta microbiome, and that some low-frequency taxa seemingly associated with preterm birth were run-specific contaminants. decontam integrates easily with existing {MGS} workflows, and allows researchers to generate more accurate profiles of microbial community composition at little to no additional cost.},
  Date                     = {2017-11-17},
  Doi                      = {10.1101/221499},
  File                     = {Davis\_et\_al\_2017\_.pdf:/home/angel/Documents/Work/Literature/PDFs/Davis\_et\_al\_2017\_.pdf:application/pdf},
  Journaltitle             = {{bioRxiv}},
  Keywords                 = {{HT} seq; {PCR}; Methods; Bioinfo; Diversity; Stat/Model},
  Langid                   = {english},
  Rights                   = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), {CC} {BY} 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
  Url                      = {https://www.biorxiv.org/content/early/2017/11/17/221499},
  Urldate                  = {2017-12-06}
}

@Article{herbold_flexible_2015,
  Title                    = {{A flexible and economical barcoding approach for highly multiplexed amplicon sequencing of diverse target genes}},
  Author                   = {Herbold, Craig W. and Pelikan, Claus and Kuzyk, Orest and Hausmann, Bela and Angel, Roey and Berry, David and Loy, Alexander},
  Pages                    = {731},
  Volume                   = {6},

  Abstract                 = {High throughput sequencing of phylogenetic and functional gene amplicons provides tremendous insight into the structure and functional potential of complex microbial communities. Here, we introduce a highly adaptable and economical {PCR} approach to barcoding and pooling libraries of numerous target genes. In this approach, we replace gene- and sequencing platform-specific fusion primers with general, interchangeable barcoding primers, enabling nearly limitless customized barcode-primer combinations. Compared to barcoding with long fusion primers, our multiple-target gene approach is more economical because it overall requires lower number of primers and is based on short primers with generally lower synthesis and purification costs. To highlight our approach, we pooled over 900 different small-subunit {rRNA} and functional gene amplicon libraries obtained from various environmental or host-associated microbial community samples into a single, paired-end Illumina {MiSeq} run. Although the amplicon regions ranged in size from approximately 290 to 720 bp, we found no significant systematic sequencing bias related to amplicon length or gene target. Our results indicate that this flexible multiplexing approach produces large, diverse, and high quality sets of amplicon sequence data for modern studies in microbial ecology.},
  Date                     = {2015},
  Doi                      = {10.3389/fmicb.2015.00731},
  File                     = {Herbold\_et\_al\_2015\_Front\_Microbiol.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Herbold\_et\_al\_2015\_Front\_Microbiol.pdf:application/pdf},
  Journaltitle             = {Frontiers in Microbiology},
  Shortjournal             = {Front Microbiol},
  Url                      = {http://journal.frontiersin.org/article/10.3389/fmicb.2015.00731/abstract},
  Urldate                  = {2015-07-23}
}

@Article{paulson_differential_2013,
  Title                    = {{Differential abundance analysis for microbial marker-gene surveys}},
  Author                   = {Paulson, Joseph N. and Stine, O. Colin and Bravo, H{\'e}ctor Corrada and Pop, Mihai},
  Number                   = {12},
  Pages                    = {1200--1202},
  Volume                   = {10},

  Abstract                 = {We introduce a methodology to assess differential abundance in sparse high-throughput microbial marker-gene survey data. Our approach, implemented in the {metagenomeSeq} Bioconductor package, relies on a novel normalization technique and a statistical model that accounts for undersampling---a common feature of large-scale marker-gene studies. Using simulated data and several published microbiota data sets, we show that {metagenomeSeq} outperforms the tools currently used in this field.},
  Date                     = {2013-12},
  Doi                      = {10.1038/nmeth.2658},
  File                     = {Paulson\_et\_al\_2013\_Nat\_Meth.pdf:/home/angel/Work/Literature/PDFs/Paulson\_et\_al\_2013\_Nat\_Meth.pdf:application/pdf},
  ISSN                     = {1548-7091},
  Journaltitle             = {Nature Methods},
  Langid                   = {english},
  Rights                   = {© 2013 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
  Shortjournal             = {Nat Meth},
  Url                      = {http://www.nature.com/nmeth/journal/v10/n12/full/nmeth.2658.html%3FWT.ec_id%3DNMETH-201312},
  Urldate                  = {2016-07-26}
}

@Article{walters_improved_2016,
  Title                    = {{Improved bacterial 16S {rRNA} gene (v4 and v4-5) and fungal internal transcribed spacer marker gene primers for microbial community surveys}},
  Author                   = {Walters, William and Hyde, Embriette R. and Berg-Lyons, Donna and Ackermann, Gail and Humphrey, Greg and Parada, Alma and Gilbert, Jack A. and Jansson, Janet K. and Caporaso, J. Gregory and Fuhrman, Jed A. and Apprill, Amy and Knight, Rob},
  Number                   = {1},
  Pages                    = {e00009--15},
  Volume                   = {1},

  Abstract                 = {Designing primers for {PCR}-based taxonomic surveys that amplify a broad range of phylotypes in varied community samples is a difficult challenge, and the comparability of data sets amplified with varied primers requires attention. Here, we examined the performance of modified 16S {rRNA} gene and internal transcribed spacer ({ITS}) primers for archaea/bacteria and fungi, respectively, with nonaquatic samples. We moved primer bar codes to the 5′ end, allowing for a range of different 3′ primer pairings, such as the 515f/926r primer pair, which amplifies variable regions 4 and 5 of the 16S {rRNA} gene. We additionally demonstrated that modifications to the 515f/806r (variable region 4) 16S primer pair, which improves detection of Thaumarchaeota and clade {SAR}11 in marine samples, do not degrade performance on taxa already amplified effectively by the original primer set. Alterations to the fungal {ITS} primers did result in differential but overall improved performance compared to the original primers. In both cases, the improved primers should be widely adopted for amplicon studies. {IMPORTANCE} We continue to uncover a wealth of information connecting microbes in important ways to human and environmental ecology. As our scientific knowledge and technical abilities improve, the tools used for microbiome surveys can be modified to improve the accuracy of our techniques, ensuring that we can continue to identify groundbreaking connections between microbes and the ecosystems they populate, from ice caps to the human body. It is important to confirm that modifications to these tools do not cause new, detrimental biases that would inhibit the field rather than continue to move it forward. We therefore demonstrated that two recently modified primer pairs that target taxonomically discriminatory regions of bacterial and fungal genomic {DNA} do not introduce new biases when used on a variety of sample types, from soil to human skin. This confirms the utility of these primers for maintaining currently recommended microbiome research techniques as the state of the art.},
  Date                     = {2016-02-23},
  Doi                      = {10.1128/mSystems.00009-15},
  File                     = {Walters\_et\_al\_2016\_mSystems.pdf:/home/angel/Dropbox/Shared/Work/Literature/PDFs/Walters\_et\_al\_2016\_mSystems.pdf:application/pdf},
  ISSN                     = {2379-5077},
  Journaltitle             = {{mSystems}},
  Keywords                 = {Diversity; {HT} seq; Methods},
  Langid                   = {english},
  Rights                   = {Copyright © 2015 Walters et al.. This is an open-access article distributed under the terms of the Creative Commons Attribution 4.0 International license.},
  Shortjournal             = {{mSystems}},
  Url                      = {http://msystems.asm.org/content/1/1/e00009-15},
  Urldate                  = {2016-02-25}
}

@Article{callahan_bioconductor_2016,
  author       = {Callahan, Ben J. and Sankaran, Kris and Fukuyama, Julia A. and McMurdie, Paul J. and Holmes, Susan P.},
  title        = {Bioconductor workflow for microbiome data analysis: from raw reads to community analyses},
  volume       = {5},
  pages        = {1492},
  issn         = {2046-1402},
  date         = {2016-11-02},
  doi          = {10.12688/f1000research.8986.2},
  file         = {Callahan_et_al_2016_F1000Research.pdf:/home/angel/Work/Literature/PDFs/Callahan_et_al_2016_F1000Research.pdf:application/pdf},
  journaltitle = {F1000Research},
  keywords     = {HT seq,Methods,Bioinfo,Review,Diversity,Stat/Model},
  langid       = {english},
  shortjournal = {F1000Research},
  shorttitle   = {Bioconductor {{Workflow}} for {{Microbiome Data Analysis}}},
  timestamp    = {2018-01-24T13:03:33Z},
  url          = {https://f1000research.com/articles/5-1492/v2},
  urldate      = {2017-10-11},
}

@InCollection{magurran_chapter_2011,
  author     = {Magurran, Anne E. and McGill, Brian J.},
  title      = {Chapter 6: {{Compositional}} Similarity and Beta Diversity},
  booktitle  = {Biological {{Diversity}}: {{Frontiers}} in {{Measurement}} and {{Assessment}}},
  publisher  = {{Oxford University Press}},
  isbn       = {978-0-19-958066-8},
  abstract   = {Biological Diversity provides an up to date, authoritative review of the methods of measuring and assessing biological diversity, together with their application. The book's emphasis is on quantifying the variety, abundance, and occurrence of taxa, and on providing objective and clear guidance for both scientists and managers. This is a fast-moving field and one that is the focus of intense research interest. However the rapid development of new methods, the inconsistent and sometimes confusing application of old ones, and the lack of consensus in the literature about the best approach, means that there is a real need for a current synthesis. Biological Diversity covers fundamental measurement issues such as sampling, re-examines familiar diversity metrics (including species richness, diversity statistics, and estimates of spatial and temporal turnover), discusses species abundance distributions and how best to fit them, explores species occurrence and the spatial structure of biodiversity, and investigates alternative approaches used to assess trait, phylogenetic, and genetic diversity. The final section of the book turns to a selection of contemporary challenges such as measuring microbial diversity, evaluating the impact of disturbance, assessing biodiversity in managed landscapes, measuring diversity in the imperfect fossil record, and using species density estimates in management and conservation.},
  date       = {2011},
  eprinttype = {googlebooks},
  file       = {Chapter6_Compositional_similarity_and_beta_diversity.pdf:/home/angel/Work/Literature/PDFs/Chapter6_Compositional_similarity_and_beta_diversity.pdf:application/pdf},
  keywords   = {Ecology,Methods,Diversity,Stat/Model},
  langid     = {english},
  timestamp  = {2018-05-22T09:45:36Z},
}

@Article{mcmurdie_phyloseq_2013,
  author       = {McMurdie, Paul J. and Holmes, Susan},
  title        = {Phyloseq: {{An R Package}} for {{Reproducible Interactive Analysis}} and {{Graphics}} of {{Microbiome Census Data}}},
  year         = {22-Apr-2013},
  volume       = {8},
  number       = {4},
  pages        = {e61217},
  issn         = {1932-6203},
  abstract     = {Background The analysis of microbial communities through DNA sequencing brings many challenges: the integration of different types of data with methods from ecology, genetics, phylogenetics, multivariate statistics, visualization and testing. With the increased breadth of experimental designs now being pursued, project-specific statistical analyses are often needed, and these analyses are often difficult (or impossible) for peer researchers to independently reproduce. The vast majority of the requisite tools for performing these analyses reproducibly are already implemented in R and its extensions (packages), but with limited support for high throughput microbiome census data. Results Here we describe a software project, phyloseq, dedicated to the object-oriented representation and analysis of microbiome census data in R. It supports importing data from a variety of common formats, as well as many analysis techniques. These include calibration, filtering, subsetting, agglomeration, multi-table comparisons, diversity analysis, parallelized Fast UniFrac, ordination methods, and production of publication-quality graphics; all in a manner that is easy to document, share, and modify. We show how to apply functions from other R packages to phyloseq-represented data, illustrating the availability of a large number of open source analysis techniques. We discuss the use of phyloseq with tools for reproducible research, a practice common in other fields but still rare in the analysis of highly parallel microbiome census data. We have made available all of the materials necessary to completely reproduce the analysis and figures included in this article, an example of best practices for reproducible research. Conclusions The phyloseq project for R is a new open-source software package, freely available on the web from both GitHub and Bioconductor.},
  doi          = {10.1371/journal.pone.0061217},
  file         = {McMurdie_Holmes_2013_PLOS_ONE.pdf:/home/angel/Documents/Work/Literature/PDFs/McMurdie_Holmes_2013_PLOS_ONE.pdf:application/pdf},
  journaltitle = {PLOS ONE},
  keywords     = {Bioinfo,Diversity,Meta-omics,Methods,Stat/Model},
  langid       = {english},
  shortjournal = {PLOS ONE},
  shorttitle   = {Phyloseq},
  timestamp    = {2018-07-10T11:29:05Z},
  url          = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0061217},
  urldate      = {2018-07-10},
}

@Article{fernandes_anova-like_2013,
  author       = {Fernandes, Andrew D. and Macklaim, Jean M. and Linn, Thomas G. and Reid, Gregor and Gloor, Gregory B.},
  title        = {{{ANOVA}}-{{Like Differential Expression}} ({{ALDEx}}) {{Analysis}} for {{Mixed Population RNA}}-{{Seq}}},
  year         = {02-Jul-2013},
  volume       = {8},
  number       = {7},
  pages        = {e67019},
  issn         = {1932-6203},
  abstract     = {Experimental variance is a major challenge when dealing with high-throughput sequencing data. This variance has several sources: sampling replication, technical replication, variability within biological conditions, and variability between biological conditions. The high per-sample cost of RNA-Seq often precludes the large number of experiments needed to partition observed variance into these categories as per standard ANOVA models. We show that the partitioning of within-condition to between-condition variation cannot reasonably be ignored, whether in single-organism RNA-Seq or in Meta-RNA-Seq experiments, and further find that commonly-used RNA-Seq analysis tools, as described in the literature, do not enforce the constraint that the sum of relative expression levels must be one, and thus report expression levels that are systematically distorted. These two factors lead to misleading inferences if not properly accommodated. As it is usually only the biological between-condition and within-condition differences that are of interest, we developed ALDEx, an ANOVA-like differential expression procedure, to identify genes with greater between- to within-condition differences. We show that the presence of differential expression and the magnitude of these comparative differences can be reasonably estimated with even very small sample sizes.},
  doi          = {10.1371/journal.pone.0067019},
  file         = {ALDEx2-vignette:/home/angel/Documents/Work/Literature/PDFs/ALDEx2-vignette.pdf:application/pdf;Fernandes_et_al_2013_PLOS_ONE.pdf:/home/angel/Documents/Work/Literature/PDFs/Fernandes_et_al_2013_PLOS_ONE.pdf:application/pdf},
  journaltitle = {PLOS ONE},
  keywords     = {RNA,Methods,mRNA,Diversity,Stat/Model,Meta-omics},
  langid       = {english},
  shortjournal = {PLOS ONE},
  timestamp    = {2018-06-07T13:19:45Z},
  url          = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0067019},
  urldate      = {2018-06-07},
}

@Comment{jabref-meta: databaseType:bibtex;}