diff --git a/.dockerignore b/.dockerignore index 3f58cbc..a25fbb2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,4 +3,5 @@ rsconnect archives sandbox renv -R_paper \ No newline at end of file +R_paper +R_scripts \ No newline at end of file diff --git a/R_paper/PR2 Primers 18S.Rmd b/R_paper/PR2 Primers 18S.Rmd index 1c2d6a0..c92ff16 100644 --- a/R_paper/PR2 Primers 18S.Rmd +++ b/R_paper/PR2 Primers 18S.Rmd @@ -473,6 +473,8 @@ ncol = 8 fig3 <- list() +fig1 <- list() + ``` @@ -766,13 +768,12 @@ Comments * Five more V4 primer sets are suitable for Illumina 3x250 (max amplicon size = 550 bp) -### Plot only V4 and V9 - For Fig. 1 +### Plot only V4 and V9 - For Fig. 2 ```{r , fig.height=10, fig.width=6} -fig1 <- list() for (one_kingdom in kingdoms){ @@ -886,7 +887,7 @@ for (one_kingdom in kingdoms){ } ``` -### Plot all with panel general and specific +### Plot all with panel general and specific - For Fig S1 ```{r , fig.height=10, fig.width=6} @@ -947,7 +948,7 @@ for (one_kingdom in kingdoms){ ``` -## Mismatches +## Mismatches - for Fig.S1 ### Number of mismatches @@ -1043,7 +1044,7 @@ for (one_kingdom in kingdoms){ facet_wrap(vars(primer_set_label_long), ncol = ncol) ``` -## By supergroup (only for Eukaryota) +## By supergroup (only for Eukaryota) - For Fig. S2 and S5 Comments: @@ -1115,7 +1116,7 @@ for (specific_one in c("general", "specific")) { } ``` -### Number of mismatches (Only Eukaryotes) +### Number of mismatches (Only Eukaryotes) - Fig. S2 and S5 ```{r fig.height=20, fig.width=20} @@ -1164,7 +1165,7 @@ for (specific_one in c("general", "specific")) { ``` -## By class for autotrophs (Only EUkaryotes) +## By class for autotrophs (Only EUkaryotes) - Fig. S6 ```{r graph_class, fig.height=30, fig.width=20} @@ -1506,15 +1507,15 @@ reorder_primer<- function (x, y) {x+0.01*(y-x)} fig <- ggplot(filter(primer_sets, !is.na(fwd_start))) + geom_segment(aes( - x = fwd_start_yeast, - xend = rev_end_yeast, + x = fwd_start, + xend = rev_end, y = forcats::fct_reorder2(primer_set_name, fwd_start,rev_end, reorder_primer), yend = forcats::fct_reorder2(primer_set_name, fwd_start,rev_end, reorder_primer), color = specific ), size = 3 ) + - geom_text(aes(x = rev_end_yeast + 50, + geom_text(aes(x = rev_end + 50, y = forcats::fct_reorder2(primer_set_name, fwd_start,rev_end, reorder_primer), label = str_c(primer_set_id, gene_region, str_replace_all(primer_set_name, c("_" = " ", "V4"="", "V9"="")), str_replace_na(specificity, ""), sep = " ")), size = 3, hjust = 0 @@ -1534,7 +1535,7 @@ fig <- ggplot(filter(primer_sets, !is.na(fwd_start))) + y = "", legend = "" ) + - xlim(0,2100) + + xlim(0,2200) + coord_cartesian(clip = "off") fig diff --git a/docs/PR2_Primers.html b/docs/PR2_Primers.html index e48e6d5..757990c 100644 --- a/docs/PR2_Primers.html +++ b/docs/PR2_Primers.html @@ -1,35 +1,32 @@ - + + + + - - - - - - - - - -Testing primers against PR2 + + + + Testing primers against PR2 - - - - - - - - - - - - - - - - + + + + + + + + + + + + + - - + + - - - -
- - - + - - - - - - - - -

3 Constants

-
max_mismatch = 2
-gene_selected = "18S_rRNA"  # Can be 18S_rRNA or 16_rRNA
-rda_file_label = "_all"  # This label is added at the end of the rda file name
-gene_regions = c("V4", "V9")
-kingdoms = c("Eukaryota", "Bacteria", "Archaea")
-
-# These primers should not be included (semi nested PCR or amplify very few sequences)
-primer_sets_excluded = c(73, 74, 37, 67, 91, 68)
-
-sequence_length_min = 1350
-sequence_length_min_V9 = 1650
-sequence_length_min_V4 = 1200
+
file_param <- "../R_scripts/param_pr2_primers.R"
+source(file_param)
+cat(readChar(file_param, 1e+05))
+
max_mismatch = 2
+gene_selected = "18S_rRNA"  # Can be 18S_rRNA or 16_rRNA
+rda_file_label = "_all"  # This label is added at the end of the rda file name
+gene_regions = c("V4", "V9")
+kingdoms = c("Eukaryota", "Bacteria", "Archaea")
+
+# These primers should not be included (semi nested PCR or amplify very few sequences)
+primer_sets_excluded = c(73, 74, 37, 67, 91, 68) 
+
+sequence_length_min = 1350
+sequence_length_min_V9 = 1650 
+sequence_length_min_V4 = 1200 

4 Read primer file

Only keep the 18S primers with V region

-
primers <- import("../shiny/data/primers.rds")
-primer_sets <- import("../shiny/data/primer_sets.rds")
-
-
-if (gene_selected == "18S_rRNA") {
-    
-    primers <- primers %>% filter(gene == "18S rRNA", !is.na(doi))
-    
-    
-    primer_sets <- primer_sets %>% filter(gene == "18S rRNA", !is.na(doi), !str_detect(gene_region, "ITS|cloning|full"))
-    
-    
-    gene_regions_all = unique(primer_sets$gene_region)
-    print(gene_regions_all)
-    
-} else if (gene_selected == "16S_rRNA") {
-    primer_sets <- primer_sets %>% filter(specificity == "plastid" | (gene == "18S rRNA" & specificity == 
-        "universal"))
-}
-
 [1] "37F-41F"          "Helix 37 and 37f" "V1-V2"            "V2"               "V2-V3"           
- [6] "V3"               "V3-V4"            "V4"               "V4-V5"            "V5"              
-[11] "V6"               "V6-V8"            "V7"               "V7-V8"            "V7-V9"           
-[16] "V8-V9"            "V9"              
-
primer_sets <- primer_sets %>% filter(!(primer_set_id %in% primer_sets_excluded)) %>% mutate(specific = ifelse(is.na(specificity), 
-    "general", "specific")) %>% relocate(specific, .before = specificity)
-
-
-file_name = str_c("../output/Table_primers_", gene_selected, ".xlsx")
-export(primer_sets, file = file_name, firstActiveRow = 2)
-
-n_primers <- list()
-n_primers[["general"]] <- nrow(filter(primer_sets, specific == "general"))
-n_primers[["specific"]] <- nrow(filter(primer_sets, specific == "specific"))
-n_primers
+
primers <- import("../data/primers.rds")
+primer_sets <- import("../data/primer_sets.rds")
+
+
+if (gene_selected == "18S_rRNA") {
+    
+    primers <- primers %>% filter(gene == "18S rRNA", !is.na(doi))
+    
+    
+    primer_sets <- primer_sets %>% filter(gene == "18S rRNA", !is.na(doi), !str_detect(gene_region, "ITS|cloning|full"))
+    
+    
+    gene_regions_all = unique(primer_sets$gene_region)
+    print(gene_regions_all)
+    
+} else if (gene_selected == "16S_rRNA") {
+    primer_sets <- primer_sets %>% filter(specificity == "plastid" | (gene == "18S rRNA" & specificity == "universal"))
+}
+
 [1] "37F-41F"          "Helix 37 and 37f" "V1-V2"            "V2"               "V2-V3"            "V3"               "V3-V4"           
+ [8] "V4"               "V4-V5"            "V5"               "V6"               "V6-V8"            "V7"               "V7-V8"           
+[15] "V7-V9"            "V8-V9"            "V9"              
+
primer_sets <- primer_sets %>% filter(!(primer_set_id %in% primer_sets_excluded)) %>% mutate(specific = ifelse(is.na(specificity), "general", "specific")) %>% 
+    relocate(specific, .before = specificity)
+
+
+# file_name = str_c('output/Table_primers_',gene_selected, '.xlsx') export(primer_sets, file = file_name, firstActiveRow = 2)
+
+n_primers <- list()
+n_primers[["general"]] <- nrow(filter(primer_sets, specific == "general"))
+n_primers[["specific"]] <- nrow(filter(primer_sets, specific == "specific"))
+n_primers
$general
 [1] 44
 
 $specific
-[1] 24
-
knitr::kable(select(primer_sets, primer_set_name, primer_set_id, fwd_name, rev_name, gene_region, amplicon_size)) %>% 
-    kableExtra::kable_styling()
+[1] 25 +
knitr::kable(select(primer_sets, primer_set_name, primer_set_id, fwd_name, rev_name, gene_region, amplicon_size)) %>% kableExtra::kable_styling()
@@ -2387,26 +2411,6 @@

4 Read primer file

- - - - - - - - + + + + + + + + + + + + + + + +
-UNonMet - -35 - -EUK581-F - -EUK1134-R - -V4 - -578 -
Needham @@ -2887,6 +2891,26 @@

4 Read primer file

+Bass_2020 + +119 + +574*f + +UNonMet DB + +V4 + +566 +
Hugerth_1 @@ -3127,6 +3151,25 @@

4 Read primer file

+UNonMet + +35 + +EUK581-F + +EUK1134-R + +V4 + +
Stokes_2002 @@ -3434,83 +3477,76 @@

5 Computing the matches

6 Read the file for all primer sets and filter

This file is created by the R script script_update files.R

-
# For testing load(file=str_c('../output/pr2_match_', gene_selected ,'_test_mismatches_', max_mismatch,
-# '.rda'))
-
-pr2_match_final <- readRDS(file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, 
-    ".rds"))
-
-print(str_c("Before filtration: ", nrow(pr2_match_final)))
-
[1] "Before filtration: 11033461"
-
# Replace the gene_region and primer_set_label_short since they can be changed in the database
-primer_sets_labels <- primer_sets %>% mutate(primer_set_label_short = str_c(str_sub(gene_region, 1, 2), sprintf("%03d", 
-    primer_set_id), str_sub(str_replace_na(specificity, replacement = ""), 1, 3), sep = " "), primer_set_label_long = str_c(gene_region, 
-    primer_set_name, "-", str_replace_na(specificity, "general"), sep = " ")) %>% # Remove the last underscore if left by itself
-mutate(primer_set_label_short = str_replace(primer_set_label_short, " $", "")) %>% select(primer_set_id, 
-    primer_set_label_short, primer_set_label_long, gene_region, specific, specificity)
-
-pr2_match_final <- pr2_match_final %>% left_join(primer_sets_labels) %>% # Remove sequences for which the introns have been removed
-filter(!str_detect(pr2_accession, "_UC")) %>% # Remove sequence that are shorter
-filter((str_detect(gene_region, "V4") & sequence_length >= sequence_length_min_V4) | (str_detect(gene_region, 
-    "V9") & sequence_length >= sequence_length_min_V9) | (!str_detect(gene_region, "V4|V9") & sequence_length >= 
-    sequence_length_min)) %>% mutate(mismatch_number = fwd_mismatch_number + rev_mismatch_number) %>% # Only keep the selected primers
-filter(primer_set_id %in% primer_sets$primer_set_id)
-
-print(str_c("After filtration: ", nrow(pr2_match_final)))
-
[1] "After filtration: 6791270"
+
# For testing
+
+pr2_match_final <- readRDS(file = str_c("output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, ".rds"))
+
+print(str_c("Before filtration: ", nrow(pr2_match_final)))
+
[1] "Before filtration: 11053938"
+
# Replace the gene_region and primer_set_label_short since they can be changed in the database
+primer_sets_labels <- primer_sets %>% mutate(primer_set_label_short = str_c(str_sub(gene_region, 1, 2), sprintf("%03d", primer_set_id), str_sub(str_replace_na(specificity, 
+    replacement = ""), 1, 3), sep = " "), primer_set_label_long = str_c(gene_region, primer_set_name, "-", str_replace_na(specificity, "general"), 
+    sep = " ")) %>% # Remove the last underscore if left by itself
+mutate(primer_set_label_short = str_replace(primer_set_label_short, " $", "")) %>% select(primer_set_id, primer_set_label_short, primer_set_label_long, 
+    gene_region, specific, specificity)
+
+pr2_match_final <- pr2_match_final %>% left_join(primer_sets_labels) %>% # Remove sequences for which the introns have been removed
+filter(!str_detect(pr2_accession, "_UC")) %>% # Remove sequence that are shorter
+filter((str_detect(gene_region, "V4") & sequence_length >= sequence_length_min_V4) | (str_detect(gene_region, "V9") & sequence_length >= sequence_length_min_V9) | 
+    (!str_detect(gene_region, "V4|V9") & sequence_length >= sequence_length_min)) %>% mutate(mismatch_number = fwd_mismatch_number + rev_mismatch_number) %>% 
+    # Only keep the selected primers
+filter(primer_set_id %in% primer_sets$primer_set_id)
+
+print(str_c("After filtration: ", nrow(pr2_match_final)))
+
[1] "After filtration: 6807694"

7 Read the summarized tables

-
pr2_match_summary_primer_set <- readRDS(file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", 
-    max_mismatch, "_summary.rds"))
-pr2_match_summary_primer_set_sg <- readRDS(file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", 
-    max_mismatch, "_summary_sg.rds"))
-pr2_match_summary_primer_set_class <- readRDS(file = str_c("../output/pr2_match_", gene_selected, "_mismatches_", 
-    max_mismatch, "_summary_class.rds"))
-
-
-# Long form for Number of sequences
-
-# This dataframe is used to re-order the bars correctly
-pct_category_order <- data.frame(pct_category = c("ampli_pct", "fwd_pct", "rev_pct"), pct_category_order = c(1, 
-    3, 2))
-
-pr2_match_summary_primer_set_long <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "pct_category", 
-    values_to = "pct_seq", cols = fwd_pct:ampli_pct) %>% left_join(pct_category_order)
+
pr2_match_summary_primer_set <- readRDS(file = str_c("output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, "_summary.rds"))
+pr2_match_summary_primer_set_sg <- readRDS(file = str_c("output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, "_summary_sg.rds"))
+pr2_match_summary_primer_set_class <- readRDS(file = str_c("output/pr2_match_", gene_selected, "_mismatches_", max_mismatch, "_summary_class.rds"))
+
+
+# Long form for Number of sequences
+
+# This dataframe is used to re-order the bars correctly
+pct_category_order <- data.frame(pct_category = c("ampli_pct", "fwd_pct", "rev_pct"), pct_category_order = c(1, 3, 2))
+
+pr2_match_summary_primer_set_long <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "pct_category", values_to = "pct_seq", cols = fwd_pct:ampli_pct) %>% 
+    left_join(pct_category_order)

8 Tables

8.1 Path for tables

-
table_path = "C:/daniel.vaulot@gmail.com/Papers/2020 Vaulot primers/paper-primers-overleaf-1.0/tables/"
+
table_path = "C:/daniel.vaulot@gmail.com/Papers/2020 Vaulot primers/paper-primers-overleaf-1.0/tables/"

8.2 Primers

-
table_primers <- primers %>% mutate(specific = ifelse(is.na(specificity), "general", "specific")) %>% filter(!str_detect(specificity, 
-    "blocking") | is.na(specificity)) %>% relocate(specific, .before = specificity) %>% group_by(direction, 
-    specific) %>% count() %>% arrange(-n) %>% tidyr::pivot_wider(names_from = specific, values_from = n)
+
table_primers <- primers %>% mutate(specific = ifelse(is.na(specificity), "general", "specific")) %>% filter(!str_detect(specificity, "blocking") | 
+    is.na(specificity)) %>% relocate(specific, .before = specificity) %>% group_by(direction, specific) %>% count() %>% arrange(-n) %>% tidyr::pivot_wider(names_from = specific, 
+    values_from = n)

8.3 Primers sets

-
table_primer_sets <- primer_sets %>% group_by(gene_region, specific) %>% count() %>% arrange(gene_region) %>% 
-    tidyr::pivot_wider(names_from = specific, values_from = n) %>% relocate(general, .before = specific)
+
table_primer_sets <- primer_sets %>% group_by(gene_region, specific) %>% count() %>% arrange(gene_region) %>% tidyr::pivot_wider(names_from = specific, 
+    values_from = n) %>% relocate(general, .before = specific)

8.4 Primer sets statistics

-
table_primer_sets_ampli <- pr2_match_summary_primer_set %>% filter(kingdom == "Eukaryota") %>% group_by(specific) %>% 
-    summarise(across(.cols = all_of(c("fwd_pct", "rev_pct", "ampli_pct", "ampli_size_mean")), .fns = list(min = min, 
-        mean = mean, max = max))) %>% tidyr::pivot_longer(cols = matches("min|max|mean"), names_to = "parameter", 
-    values_to = "values") %>% tidyr::pivot_wider(names_from = specific, values_from = values)
+
table_primer_sets_ampli <- pr2_match_summary_primer_set %>% filter(kingdom == "Eukaryota") %>% group_by(specific) %>% summarise(across(.cols = all_of(c("fwd_pct", 
+    "rev_pct", "ampli_pct", "ampli_size_mean")), .fns = list(min = min, mean = mean, max = max))) %>% tidyr::pivot_longer(cols = matches("min|max|mean"), 
+    names_to = "parameter", values_to = "values") %>% tidyr::pivot_wider(names_from = specific, values_from = values)

8.5 Save to Excel file

-
onglets = list(table_primers = table_primers, table_primer_sets = table_primer_sets, table_primer_sets_ampli = table_primer_sets_ampli)
-file_name <- str_c(table_path, "tables_R.xlsx")
-openxlsx::write.xlsx(onglets, file_name, zoom = 90, firstRow = TRUE, firstCol = TRUE)
+
onglets = list(table_primers = table_primers, table_primer_sets = table_primer_sets, table_primer_sets_ampli = table_primer_sets_ampli)
+file_name <- str_c(table_path, "tables_R.xlsx")
+openxlsx::write.xlsx(onglets, file_name, zoom = 90, firstRow = TRUE, firstCol = TRUE)

8.6 Export to Latex tables

-
library(xtable)
+
library(xtable)

8.6.1 Define function to format the tables

    @@ -3519,153 +3555,152 @@

    8.6.1 Define function to format t
  • “X” = “\\cellcolor{gray}”
-
sanitize.italics <- function(str) {
-  str_replace_all(str, c("_" = " ", 
-                         # "ital\\{" = "\\\\textit{", 
-                         "°" = "\\\\degree",
-                         "±"="$\\pm$"))
-}
-
-
-# See: https://www.rdocumentation.org/packages/xtable/versions/1.8-4/topics/print.xtable
-
-latex_table <- function(table, caption, label, file, align, 
-                        scalebox = getOption("xtable.scalebox", NULL), 
-                        digits = 0, 
-                        add.to.row = getOption("xtable.add.to.row", NULL),
-                        include.colnames = TRUE, 
-                        tabular.environment = getOption("xtable.tabular.environment", "tabular") ) {
-  
-    table <- xtable::xtable(table,
-                            label=label,
-                            caption=caption,
-                            align = align,
-                            digits=digits)
-    print(table,  scalebox = scalebox,
-          caption.placement = "top",
-          include.rownames = FALSE,
-          add.to.row = add.to.row,
-          include.colnames = include.colnames, 
-          tabular.environment = tabular.environment,
-          file=file,
-          sanitize.text.function = sanitize.italics)
-    }
+
sanitize.italics <- function(str) {
+  str_replace_all(str, c("_" = " ", 
+                         # "ital\\{" = "\\\\textit{", 
+                         "°" = "\\\\degree",
+                         "±"="$\\pm$"))
+}
+
+
+# See: https://www.rdocumentation.org/packages/xtable/versions/1.8-4/topics/print.xtable
+
+latex_table <- function(table, caption, label, file, align, 
+                        scalebox = getOption("xtable.scalebox", NULL), 
+                        digits = 0, 
+                        add.to.row = getOption("xtable.add.to.row", NULL),
+                        include.colnames = TRUE, 
+                        tabular.environment = getOption("xtable.tabular.environment", "tabular") ) {
+  
+    table <- xtable::xtable(table,
+                            label=label,
+                            caption=caption,
+                            align = align,
+                            digits=digits)
+    print(table,  scalebox = scalebox,
+          caption.placement = "top",
+          include.rownames = FALSE,
+          add.to.row = add.to.row,
+          include.colnames = include.colnames, 
+          tabular.environment = tabular.environment,
+          file=file,
+          sanitize.text.function = sanitize.italics)
+    }

8.6.2 File path

-
full_file_name <- function(file_name) {
-    str_c(table_path, file_name)
-}
+
full_file_name <- function(file_name) {
+    str_c(table_path, file_name)
+}

8.6.3 Table - Primers

-
latex_table(table = table_primers, caption = "Type of primers listed in the pr2-primer database. General primers target all eukaryotes and specific only certain taxonomic groups.", 
-    label = "tab:primers", file = full_file_name("table_primers.tex"), align = c("l", "c", "c", "c"), scalebox = 1, 
-    digits = 0)
+
latex_table(table = table_primers, caption = "Type of primers listed in the pr2-primers database. General primers target all eukaryotes and specific primers only certain taxonomic groups.", 
+    label = "tab:primers", file = full_file_name("table_primers.tex"), align = c("l", "c", "c", "c"), scalebox = 1, digits = 0)

8.6.4 Table - Primer sets

-
latex_table(table = table_primer_sets, caption = "Regions of the 18S rRNA gene targeted by the primer sets from the pr2-primer database.", 
-    label = "tab:primer_sets", file = full_file_name("table_primer_sets.tex"), align = c("l", "l", "c", "c"), 
-    scalebox = 1, digits = 0)
+
latex_table(table = table_primer_sets, caption = "Regions of the 18S rRNA gene targeted by the primer sets from the pr2-primers database.", label = "tab:primer_sets", 
+    file = full_file_name("table_primer_sets.tex"), align = c("l", "l", "c", "c"), scalebox = 1, digits = 0)

8.6.5 Table - Primer sets stats

-
latex_table(table = table_primer_sets_ampli, caption = "Overall characteristics of primer sets listed in the PR2 primer database.", 
-    label = "tab:primer_sets_ampli", file = full_file_name("table_primer_sets_ampli.tex"), align = c("l", 
-        "l", "c", "c"), scalebox = 1, digits = 1)
+
latex_table(table = table_primer_sets_ampli, caption = "Overall characteristics of primer sets listed in the pr2-primers database.", label = "tab:primer_sets_ampli", 
+    file = full_file_name("table_primer_sets_ampli.tex"), align = c("l", "l", "c", "c"), scalebox = 1, digits = 1)

8.7 Supplementary Table - Primers

-
table <- primers %>% 
-  select(primer_id, name, sequence, direction, start_yeast, specificity, doi)
- 
-add.to.row <- list()
-add.to.row$pos <- list(0, 0, 0, 0, 0)
-add.to.row$command <- c( 
-  # Header for first page
-  "id & Name & Sequence & Direction & Start (yeast) & Specificity & DOI \\\\\n",
-  "\\endfirsthead \n \\hline \n",
-  
-  # Header for other pages
-  "id & Name & Sequence & Direction & Start (yeast) & Specificity & DOI \\\\\n",
-  "\\hline \n \\endhead \n",
-  
-  # Footers
-  "\\hline \n \\endfoot \n  \\endlastfoot \n"
-)
-
-
-latex_table(table = table,
-            caption = "List of primers in the pr2-primer database ordered by start position relative to the sequence of the yeast \textit{Saccharomyces cerevisiae} (FU970071).",
-            label = "tabsup:primers",
-            file = full_file_name("table_sup_primers.tex"),
-            align = c("l","l","l", "l","l","l","l", "l" ),
-            digits= 0,
-            add.to.row = add.to.row,
-            include.colnames = FALSE, 
-            tabular.environment = "longtable"
-)
+
table <- primers %>% 
+  select(primer_id, name, sequence, direction, start_yeast, specificity, doi)
+ 
+add.to.row <- list()
+add.to.row$pos <- list(0, 0, 0, 0, 0)
+add.to.row$command <- c( 
+  # Header for first page
+  "id & Name & Sequence & Direction & Start (yeast) & Specificity & DOI \\\\\n",
+  "\\endfirsthead \n \\hline \n",
+  
+  # Header for other pages
+  "id & Name & Sequence & Direction & Start (yeast) & Specificity & DOI \\\\\n",
+  "\\hline \n \\endhead \n",
+  
+  # Footers
+  "\\hline \n \\endfoot \n  \\endlastfoot \n"
+)
+
+
+latex_table(table = table,
+            caption = "List of primers in the pr2-primers database ordered by start position relative to the sequence of the yeast \textit{Saccharomyces cerevisiae} (FU970071).",
+            label = "tabsup:primers",
+            file = full_file_name("table_sup_primers.tex"),
+            align = c("l","l","l", "l","l","l","l", "l" ),
+            digits= 0,
+            add.to.row = add.to.row,
+            include.colnames = FALSE, 
+            tabular.environment = "longtable"
+)

8.8 Supplementary Table - Primer sets

-
table <- primer_sets %>% 
-  select(primer_set_id, primer_set_name, fwd_name, rev_name, gene_region, specificity, doi) %>% 
-  arrange(primer_set_id)
- 
-add.to.row <- list()
-add.to.row$pos <- list(0, 0, 0, 0, 0)
-add.to.row$command <- c( 
-  # Header for first page
-  "id & Name & Primer fwd & Primer rev & Region & Specificity & DOI \\\\\n",
-  "\\endfirsthead \n \\hline \n",
-  
-  # Header for other pages
-  "id & Name & Primer fwd & Primer rev & Region & Specificity & DOI \\\\\n",
-  "\\hline \n \\endhead \n",
-  
-  # Footers
-  "\\hline \n \\endfoot \n  \\endlastfoot \n"
-)
-
-
-latex_table(table = table,
-            caption = "List of primer sets in the pr2-primer database.",
-            label = "tabsup:primer_sets",
-            file = full_file_name("table_sup_primer_sets.tex"),
-            align = c("l","l","l", "l","l","l","l", "l" ),
-            digits= 0,
-            add.to.row = add.to.row,
-            include.colnames = FALSE, 
-            tabular.environment = "longtable"
-)
+
table <- primer_sets %>% 
+  select(primer_set_id, primer_set_name, fwd_name, rev_name, gene_region, specificity, doi) %>% 
+  arrange(primer_set_id)
+ 
+add.to.row <- list()
+add.to.row$pos <- list(0, 0, 0, 0, 0)
+add.to.row$command <- c( 
+  # Header for first page
+  "id & Name & Primer fwd & Primer rev & Region & Specificity & DOI \\\\\n",
+  "\\endfirsthead \n \\hline \n",
+  
+  # Header for other pages
+  "id & Name & Primer fwd & Primer rev & Region & Specificity & DOI \\\\\n",
+  "\\hline \n \\endhead \n",
+  
+  # Footers
+  "\\hline \n \\endfoot \n  \\endlastfoot \n"
+)
+
+
+latex_table(table = table,
+            caption = "List of primer sets in the pr2-primers database.",
+            label = "tabsup:primer_sets",
+            file = full_file_name("table_sup_primer_sets.tex"),
+            align = c("l","l","l", "l","l","l","l", "l" ),
+            digits= 0,
+            add.to.row = add.to.row,
+            include.colnames = FALSE, 
+            tabular.environment = "longtable"
+)

9 Graphics

9.1 Common parameters

-
ncol = 8
-
-fig3 <- list()
+
ncol = 8
+
+fig3 <- list()
+
+fig1 <- list()

9.2 Amplicon length

9.2.1 Scatter

-
ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_short, y = ampli_size, group = primer_set_id, 
-    color = as.factor(mismatch_number))) + geom_point(size = 3) + theme(axis.text.x = element_text(angle = 45, 
-    hjust = 1)) + labs(x = "Primer set") + scale_color_viridis_d() + facet_wrap(vars(specific), nrow = 2, 
-    scales = "free_x")
+
ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_short, y = ampli_size, group = primer_set_id, 
+    color = as.factor(mismatch_number))) + geom_point(size = 3) + theme(axis.text.x = element_text(angle = 45, 
+    hjust = 1)) + labs(x = "Primer set") + scale_color_viridis_d() + facet_wrap(vars(specific), nrow = 2, 
+    scales = "free_x")

9.2.2 Average size

-
ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_short, y = ampli_size, group = primer_set_id, 
-    fill = as.factor(mismatch_number))) + geom_boxplot(outlier.alpha = 0.3) + theme(axis.text.x = element_text(angle = 45, 
-    hjust = 1)) + labs(x = "Primer set") + scale_fill_viridis_d() + facet_wrap(vars(specific), nrow = 2, 
-    scales = "free_x")
+
ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_short, y = ampli_size, group = primer_set_id, 
+    fill = as.factor(mismatch_number))) + geom_boxplot(outlier.alpha = 0.3) + theme(axis.text.x = element_text(angle = 45, 
+    hjust = 1)) + labs(x = "Primer set") + scale_fill_viridis_d() + facet_wrap(vars(specific), nrow = 2, 
+    scales = "free_x")

@@ -3673,142 +3708,137 @@

9.2.2 Average size

9.3 Example with sets V4 and V9

9.3.1 Plot an example of amplicon distribution (Fig. 3)

-
for (one_primer_set in c(8, 27)) {
-    
-    if (one_primer_set == 8) {
-        xmin = 400
-        xmax = 450
-        xmax2 = 2000
-    } else {
-        xmin = 140
-        xmax = 190
-        xmax2 = 1000
-        
-    }
-    
-    pr2_match_final_one <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% filter(kingdom == 
-        "Eukaryota") %>% filter(!(supergroup %in% c("Apusozoa", "Eukaryota_X", "Protoalveoalata")))
-    
-    primer_set_label_long = pr2_match_final_one$primer_set_label_long[1]
-    
-    
-    g <- ggplot(pr2_match_final_one, aes(x = ampli_size)) + geom_density(fill = "blue", alpha = 0.9) + xlab("Amplicon size") + 
-        ggtitle(str_c("Primer set - ", primer_set_label_long)) + xlim(xmin, xmax)
-    
-    print(g)
-    
-    g <- ggplot(pr2_match_final_one, aes(x = ampli_size, fill = supergroup)) + geom_density(alpha = 0.9) + 
-        theme_bw() + # theme(legend.position = 'none') +
-    guides(fill = guide_legend(nrow = 3)) + theme(legend.position = "top", legend.box = "horizontal") + scale_fill_viridis_d(option = "inferno") + 
-        labs(x = "Amplicon size (bp)", y = "Density", fill = "Supergroup") + # ggtitle(str_c('Primer set - ', primer_set_label_long)) +
-    xlim(xmin, xmax)
-    
-    print(g)
-    fig3[[str_c(one_primer_set, " size_distri")]] <- g
-    
-    g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_boxplot(outlier.alpha = 0.3) + 
-        theme_bw() + coord_flip() + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-    xlab("Supergroup") + ylab("Amplicon size (bp)") + ylim(0, xmax2) + geom_hline(yintercept = c(450, 550), 
-        linetype = c(2, 3))
-    
-    print(g)
-    fig3[[str_c(one_primer_set, " size")]] <- g
-    
-    g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_violin() + coord_flip() + 
-        # theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
-    xlab("Supergroup") + ylab("Amplicon size (bp)")
-    print(g)
-    
-}
-

-
-
-

9.3.2 Plot an example of percent amplification

for (one_primer_set in c(8, 27)) {
     
-    pr2_match_summary_filtered <- pr2_match_summary_primer_set_sg %>% filter((n_seq > 20) & (primer_set_id == 
-        one_primer_set) & (kingdom == "Eukaryota") & !(supergroup %in% c("Apusozoa", "Eukaryota_X")))
-    
-    
-    
-    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, aes(x = str_c(supergroup, 
-        " - n = ", n_seq), y = ampli_pct, fill = supergroup), position = "dodge") + theme_bw() + # theme(legend.position = 'none') +
-    guides(fill = guide_legend(nrow = 2)) + theme(legend.position = "top", legend.box = "horizontal") + scale_fill_viridis_d(option = "inferno") + 
-        coord_flip() + labs(x = "% of sequences amplified", y = "", legend = "Supergroup") + scale_y_continuous(minor_breaks = seq(0, 
-        100, by = 10), breaks = seq(0, 100, by = 20), limits = c(0, 100))
+    if (one_primer_set == 8) {
+        xmin = 400
+        xmax = 450
+        xmax2 = 2000
+    } else {
+        xmin = 140
+        xmax = 190
+        xmax2 = 1000
+        
+    }
     
-    # ggtitle (str_c('Set - ', one_primer_set, ' - % amplified per Supergroup') )
-    
-    print(g)
-    fig3[[str_c(one_primer_set, " pct")]] <- g
+    pr2_match_final_one <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% filter(kingdom == 
+        "Eukaryota") %>% filter(!(supergroup %in% c("Apusozoa", "Eukaryota_X", "Protoalveoalata")))
+    
+    primer_set_label_long = pr2_match_final_one$primer_set_label_long[1]
     
-    g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(supergroup, 
-        " - n = ", n_seq), y = ampli_size_mean), colour = "black") + coord_flip() + geom_errorbar(aes(x = str_c(supergroup, 
-        " - n = ", n_seq), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + 
-        xlab("Supergroup") + ylab("Amplicon size (bp)")  # +
-    # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle (str_c('Set - ', one_primer_set, '
-    # - Amplicon sizes') ) + geom_hline(yintercept = c(450,550) , linetype= 2)
-    
-    print(g)
-    
-    
-    
+    
+    g <- ggplot(pr2_match_final_one, aes(x = ampli_size)) + geom_density(fill = "blue", alpha = 0.9) + xlab("Amplicon size") + 
+        ggtitle(str_c("Primer set - ", primer_set_label_long)) + xlim(xmin, xmax)
+    
+    print(g)
+    
+    g <- ggplot(pr2_match_final_one, aes(x = ampli_size, fill = supergroup)) + geom_density(alpha = 0.9) + 
+        theme_bw() + # theme(legend.position = 'none') +
+    guides(fill = guide_legend(nrow = 3)) + theme(legend.position = "top", legend.box = "horizontal") + scale_fill_viridis_d(option = "inferno") + 
+        labs(x = "Amplicon size (bp)", y = "Density", fill = "Supergroup") + # ggtitle(str_c('Primer set - ', primer_set_label_long)) +
+    xlim(xmin, xmax)
     
-}
-

+ print(g) + fig3[[str_c(one_primer_set, " size_distri")]] <- g + + g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_boxplot(outlier.alpha = 0.3) + + theme_bw() + coord_flip() + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + xlab("Supergroup") + ylab("Amplicon size (bp)") + ylim(0, xmax2) + geom_hline(yintercept = c(450, 550), + linetype = c(2, 3)) + + print(g) + fig3[[str_c(one_primer_set, " size")]] <- g + + g <- ggplot(pr2_match_final_one, aes(x = supergroup, y = ampli_size)) + geom_violin() + coord_flip() + + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + xlab("Supergroup") + ylab("Amplicon size (bp)") + print(g) + +}
+

-
-

9.3.3 Plot an example of mismatches

+
+

9.3.2 Plot an example of percent amplification

for (one_primer_set in c(8, 27)) {
     
-    pr2_mismatches <- pr2_match_summary_primer_set_sg %>% tidyr::pivot_longer(names_to = "mismatch_number", 
-        values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
-        select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 1)) %>% 
-        mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter((n_seq > 20) & (primer_set_id == 
-        one_primer_set) & (kingdom == "Eukaryota") & !(supergroup %in% c("Apusozoa", "Eukaryota_X")))
-    
-    g <- ggplot(pr2_mismatches, aes(x = str_c(supergroup, " - n = ", n_seq), y = mismatch_pct, group = primer_set_id, 
-        fill = as.factor(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
-        hjust = 0, vjust = 0)) + labs(x = "Supergroup", y = "% of sequences with mismatches", fill = "Mismatches") + 
-        scale_fill_viridis_d(direction = -1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", 
-        legend.box = "horizontal") + coord_flip()
-    
-    print(g)
-    
-    fig3[[str_c(one_primer_set, "mismatches", sep = " ")]] <- g
+    pr2_match_summary_filtered <- pr2_match_summary_primer_set_sg %>% filter((n_seq > 20) & (primer_set_id == one_primer_set) & (kingdom == "Eukaryota") & 
+        !(supergroup %in% c("Apusozoa", "Eukaryota_X")))
+    
+    
+    
+    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, aes(x = str_c(supergroup, " - n = ", n_seq), y = ampli_pct, 
+        fill = supergroup), position = "dodge") + theme_bw() + # theme(legend.position = 'none') +
+    guides(fill = guide_legend(nrow = 2)) + theme(legend.position = "top", legend.box = "horizontal") + scale_fill_viridis_d(option = "inferno") + 
+        coord_flip() + labs(x = "% of sequences amplified", y = "", legend = "Supergroup") + scale_y_continuous(minor_breaks = seq(0, 100, by = 10), 
+        breaks = seq(0, 100, by = 20), limits = c(0, 100))
+    
+    # ggtitle (str_c('Set - ', one_primer_set, ' - % amplified per Supergroup') )
+    
+    print(g)
+    fig3[[str_c(one_primer_set, " pct")]] <- g
     
-    
-    print(g)
-    
-    
-}
-

+ g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(supergroup, " - n = ", n_seq), y = ampli_size_mean), + colour = "black") + coord_flip() + geom_errorbar(aes(x = str_c(supergroup, " - n = ", n_seq), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - + ampli_size_sd)) + xlab("Supergroup") + ylab("Amplicon size (bp)") # + + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle (str_c('Set - ', one_primer_set, ' - Amplicon sizes') ) + + # geom_hline(yintercept = c(450,550) , linetype= 2) + + print(g) + + + + +}
+

-
-

9.3.4 Plot an example of mismatches positions

+
+

9.3.3 Plot an example of mismatches

for (one_primer_set in c(8, 27)) {
     
-    df <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% filter(kingdom == "Eukaryota") %>% 
-        filter(!(supergroup %in% c("Apusozoa", "Eukaryota_X")))
-    
-    g <- ggplot() + geom_histogram(data = filter(df, !is.na(fwd_mismatch_primer_position_5prime)), aes(x = fwd_mismatch_primer_position_5prime, 
-        fill = supergroup), binwidth = 1, stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + 
-        theme_bw() + scale_x_continuous(minor_breaks = 0:30) + labs(y = "Density", x = "Position of mismatches from 5' end", 
-        title = "fwd")
-    
-    print(g)
-    fig3[[str_c(one_primer_set, "mismatches positions fwd", sep = " ")]] <- g
-    
-    g <- ggplot() + geom_histogram(data = filter(df, !is.na(rev_mismatch_primer_position_5prime)), aes(x = rev_mismatch_primer_position_5prime, 
-        fill = supergroup), binwidth = 1, stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + 
-        theme_bw() + scale_x_continuous(minor_breaks = 0:30) + labs(y = "Density", x = "Position of mismatches from 5' end", 
-        title = "rev")
-    
-    print(g)
-    fig3[[str_c(one_primer_set, "mismatches positions rev", sep = " ")]] <- g
-    
-}
-

+ pr2_mismatches <- pr2_match_summary_primer_set_sg %>% tidyr::pivot_longer(names_to = "mismatch_number", values_to = "mismatch_pct", cols = contains("ampli_mismatch"), + names_prefix = "ampli_mismatch_") %>% select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 1)) %>% + mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter((n_seq > 20) & (primer_set_id == one_primer_set) & (kingdom == + "Eukaryota") & !(supergroup %in% c("Apusozoa", "Eukaryota_X"))) + + g <- ggplot(pr2_mismatches, aes(x = str_c(supergroup, " - n = ", n_seq), y = mismatch_pct, group = primer_set_id, fill = as.factor(mismatch_number))) + + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + labs(x = "Supergroup", y = "% of sequences with mismatches", + fill = "Mismatches") + scale_fill_viridis_d(direction = -1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", + legend.box = "horizontal") + coord_flip() + + print(g) + + fig3[[str_c(one_primer_set, "mismatches", sep = " ")]] <- g + + + print(g) + + +}
+

+
+
+

9.3.4 Plot an example of mismatches positions

+
for (one_primer_set in c(8, 27)) {
+    
+    df <- pr2_match_final %>% filter(primer_set_id == one_primer_set) %>% filter(kingdom == "Eukaryota") %>% filter(!(supergroup %in% c("Apusozoa", 
+        "Eukaryota_X")))
+    
+    g <- ggplot() + geom_histogram(data = filter(df, !is.na(fwd_mismatch_primer_position_5prime)), aes(x = fwd_mismatch_primer_position_5prime, fill = supergroup), 
+        binwidth = 1, stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + theme_bw() + scale_x_continuous(minor_breaks = 0:30) + 
+        labs(y = "Density", x = "Position of mismatches from 5' end", title = "fwd")
+    
+    print(g)
+    fig3[[str_c(one_primer_set, "mismatches positions fwd", sep = " ")]] <- g
+    
+    g <- ggplot() + geom_histogram(data = filter(df, !is.na(rev_mismatch_primer_position_5prime)), aes(x = rev_mismatch_primer_position_5prime, fill = supergroup), 
+        binwidth = 1, stat = "bin", alpha = 1) + scale_fill_viridis_d(option = "inferno") + theme_bw() + scale_x_continuous(minor_breaks = 0:30) + 
+        labs(y = "Density", x = "Position of mismatches from 5' end", title = "rev")
+    
+    print(g)
+    fig3[[str_c(one_primer_set, "mismatches positions rev", sep = " ")]] <- g
+    
+}
+

@@ -3829,174 +3859,150 @@

9.4 All Eukaryotes

  • Five more V4 primer sets are suitable for Illumina 3x250 (max amplicon size = 550 bp)
  • -
    -

    9.4.1 Plot only V4 and V9

    -
    fig1 <- list()
    -
    -for (one_kingdom in kingdoms) {
    -    
    -    for (one_region in gene_regions) {
    -        
    -        pr2_match_summary_primer_set_region_long <- pr2_match_summary_primer_set_long %>% filter(kingdom == 
    -            one_kingdom) %>% filter(gene_region == one_region) %>% # Remove the group specific primers
    -        filter(specific == "general") %>% filter(pct_category %in% c("ampli_pct", "fwd_pct", "rev_pct"))
    -        
    -        pr2_match_summary_primer_set_region <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% 
    -            filter(gene_region == one_region) %>% # Remove the group specific primers)
    -        filter(specific == "general")
    -        
    -        pr2_match_region <- pr2_match_final %>% filter(kingdom == one_kingdom) %>% filter(gene_region == 
    -            one_region) %>% # Remove the group specific primers
    -        filter(specific == "general")
    -        
    -        # % Ampli
    -        
    -        g <- ggplot(pr2_match_summary_primer_set_region_long) + geom_col(aes(x = str_replace(str_c(sprintf("%03d", 
    -            primer_set_id), primer_set_label_long, sep = " - "), " - general", ""), y = pct_seq, fill = fct_reorder(pct_category, 
    -            pct_category_order)), width = 0.7, position = "dodge") + theme_bw() + xlab("Primer set") + ylab("% of sequences amplified") + 
    -            scale_fill_manual(name = "", values = c(ampli_pct = "black", fwd_pct = "blue", rev_pct = "red"), 
    -                labels = c("Amplicons", "rev", "fwd")) + ggtitle(str_c(one_kingdom, one_region, "Percentage of sequences recovered", 
    -            sep = " - ")) + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + theme(axis.text.y = element_text(angle = 0, 
    -            hjust = 0, vjust = 0)) + ylim(0, 100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + 
    -            theme(legend.position = "top", legend.box = "horizontal")
    -        
    -        print(g)
    -        
    -        # Size
    -        
    -        fig1[[str_c(one_kingdom, one_region, "pct", sep = " ")]] <- g
    -        
    -        
    -        g <- ggplot(filter(pr2_match_summary_primer_set_region, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(sprintf("%03d", 
    -            primer_set_id), primer_set_label_long, sep = " - "), y = ampli_size_mean), colour = "black") + 
    -            geom_errorbar(aes(x = str_c(sprintf("%03d", primer_set_id), primer_set_label_long, sep = " - "), 
    -                ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + theme_bw() + 
    -            xlab("Primer set") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    -        ggtitle(str_c(one_kingdom, one_region, "Amplicon size", sep = " - ")) + geom_hline(yintercept = c(450, 
    -            550), linetype = c(2, 3)) + ylim(0, 850) + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + 
    -            theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + coord_flip()
    -        
    -        print(g)
    -        
    -        fig1[[str_c(one_kingdom, one_region, "size", sep = " ")]] <- g
    -        
    -        # Mismatches
    -        
    -        pr2_mismatches <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% tidyr::pivot_longer(names_to = "mismatch_number", 
    -            values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
    -            select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 
    -            1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter(gene_region == 
    -            one_region) %>% # Remove the group specific primers
    -        filter(specific == "general")
    -        
    -        g <- ggplot(pr2_mismatches, aes(x = str_c(sprintf("%03d", primer_set_id), primer_set_label_long, 
    -            sep = " - "), y = mismatch_pct, group = primer_set_id, fill = as.factor(mismatch_number))) + 
    -            geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + 
    -            labs(x = "Primer set", y = "% of sequences with mismatches", fill = "Mismatches", title = str_c(one_kingdom, 
    -                one_region, sep = " - ")) + scale_fill_viridis_d(direction = -1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + 
    -            theme(legend.position = "top", legend.box = "horizontal") + coord_flip()
    -        
    -        print(g)
    -        
    -        fig1[[str_c(one_kingdom, one_region, "mismatches", sep = " ")]] <- g
    -        
    -        
    -    }
    -}
    -

    -
    -
    -

    9.4.2 Plot all with panel general and specific

    +
    +

    9.4.1 Plot only V4 and V9 - For Fig. 2

    for (one_kingdom in kingdoms) {
         
    -    for (specific_one in c("general", "specific")) {
    +    for (one_region in gene_regions) {
             
    -        df <- pr2_match_summary_primer_set_long %>% filter(kingdom == one_kingdom) %>% filter(pct_category %in% 
    -            c("ampli_pct", "fwd_pct", "rev_pct")) %>% filter(specific == specific_one)
    -        
    -        g <- ggplot(df) + geom_col(aes(x = str_replace(str_c(primer_set_label_long, sprintf("%03d", primer_set_id), 
    -            sep = " - "), " - general", ""), y = pct_seq, fill = fct_reorder(pct_category, pct_category_order)), 
    -            width = 0.7, position = "dodge") + theme_bw() + xlab("Primer set") + ylab("% of sequences amplified") + 
    -            scale_fill_manual(name = "", values = c(ampli_pct = "black", fwd_pct = "blue", rev_pct = "red"), 
    -                labels = c("Amplicons", "rev", "fwd")) + ggtitle(str_c(one_kingdom, specific_one, "Percentage of sequences recovered", 
    -            sep = " - ")) + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + ylim(0, 
    -            100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", 
    -            legend.box = "horizontal")
    -        
    +        pr2_match_summary_primer_set_region_long <- pr2_match_summary_primer_set_long %>% filter(kingdom == one_kingdom) %>% filter(gene_region == 
    +            one_region) %>% # Remove the group specific primers
    +        filter(specific == "general") %>% filter(pct_category %in% c("ampli_pct", "fwd_pct", "rev_pct"))
    +        
    +        pr2_match_summary_primer_set_region <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% filter(gene_region == one_region) %>% 
    +            # Remove the group specific primers)
    +        filter(specific == "general")
    +        
    +        pr2_match_region <- pr2_match_final %>% filter(kingdom == one_kingdom) %>% filter(gene_region == one_region) %>% # Remove the group specific primers
    +        filter(specific == "general")
    +        
    +        # % Ampli
             
    -        print(g)
    -        
    -        fig1[[str_c(one_kingdom, specific_one, "pct", sep = " ")]] <- g
    -        
    -        df <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% filter(!is.nan(ampli_size_mean)) %>% 
    -            filter(specific == specific_one)
    +        g <- ggplot(pr2_match_summary_primer_set_region_long) + geom_col(aes(x = str_replace(str_c(sprintf("%03d", primer_set_id), primer_set_label_long, 
    +            sep = " - "), " - general", ""), y = pct_seq, fill = fct_reorder(pct_category, pct_category_order)), width = 0.7, position = "dodge") + 
    +            theme_bw() + xlab("Primer set") + ylab("% of sequences amplified") + scale_fill_manual(name = "", values = c(ampli_pct = "black", fwd_pct = "blue", 
    +            rev_pct = "red"), labels = c("Amplicons", "rev", "fwd")) + ggtitle(str_c(one_kingdom, one_region, "Percentage of sequences recovered", 
    +            sep = " - ")) + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + 
    +            ylim(0, 100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal")
             
    -        g <- ggplot(df) + geom_point(aes(x = str_c(primer_set_label_long, sprintf("%03d", primer_set_id), 
    -            sep = " - "), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(primer_set_label_long, 
    -            sprintf("%03d", primer_set_id), sep = " - "), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - 
    -            ampli_size_sd)) + theme_bw() + xlab("Primer set") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    -        ggtitle(str_c(one_kingdom, specific_one, " - Amplicon size", sep = " - ")) + geom_hline(yintercept = c(450, 
    -            550), linetype = c(2, 3)) + ylim(0, 900) + theme(axis.text.y = element_text(angle = 0, hjust = 0)) + 
    -            coord_flip()
    -        
    -        print(g)
    -        
    -        fig1[[str_c(one_kingdom, specific_one, "size", sep = " ")]] <- g
    -        
    -    }
    -}
    -

    -
    + print(g) + + # Size + + fig1[[str_c(one_kingdom, one_region, "pct", sep = " ")]] <- g + + + g <- ggplot(filter(pr2_match_summary_primer_set_region, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(sprintf("%03d", primer_set_id), + primer_set_label_long, sep = " - "), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(sprintf("%03d", primer_set_id), + primer_set_label_long, sep = " - "), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + theme_bw() + xlab("Primer set") + + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + + ggtitle(str_c(one_kingdom, one_region, "Amplicon size", sep = " - ")) + geom_hline(yintercept = c(450, 550), linetype = c(2, 3)) + ylim(0, + 850) + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + coord_flip() + + print(g) + + fig1[[str_c(one_kingdom, one_region, "size", sep = " ")]] <- g + + # Mismatches + + pr2_mismatches <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% tidyr::pivot_longer(names_to = "mismatch_number", values_to = "mismatch_pct", + cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, + 1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter(gene_region == one_region) %>% # Remove the group specific primers + filter(specific == "general") + + g <- ggplot(pr2_mismatches, aes(x = str_c(sprintf("%03d", primer_set_id), primer_set_label_long, sep = " - "), y = mismatch_pct, group = primer_set_id, + fill = as.factor(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + labs(x = "Primer set", + y = "% of sequences with mismatches", fill = "Mismatches", title = str_c(one_kingdom, one_region, sep = " - ")) + scale_fill_viridis_d(direction = -1, + alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + coord_flip() + + print(g) + + fig1[[str_c(one_kingdom, one_region, "mismatches", sep = " ")]] <- g + + + } +}
    +

    -
    -

    9.5 Mismatches

    -
    -

    9.5.1 Number of mismatches

    +
    +

    9.4.2 Plot all with panel general and specific - For Fig S1

    for (one_kingdom in kingdoms) {
         
         for (specific_one in c("general", "specific")) {
             
    -        pr2_mismatches <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "mismatch_number", 
    -            values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
    -            select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 
    -            1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter(specific == 
    -            specific_one) %>% filter(kingdom == one_kingdom)
    -        
    -        g <- ggplot(pr2_mismatches, aes(x = str_c(primer_set_label_long, sprintf("%03d", primer_set_id), 
    -            sep = " - "), y = mismatch_pct, group = primer_set_id, fill = as.factor(mismatch_number))) + 
    -            geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + 
    -            labs(x = "Primer set", y = "% of sequences with mismatches", fill = "Mismatches", title = str_c(one_kingdom, 
    -                specific_one, sep = " - ")) + scale_fill_viridis_d(direction = -1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + 
    -            theme(legend.position = "top", legend.box = "horizontal") + coord_flip()
    -        
    -        print(g)
    -        
    -        fig1[[str_c(one_kingdom, specific_one, "mismatches", sep = " ")]] <- g
    -        
    -        
    -    }
    -}
    -

    + df <- pr2_match_summary_primer_set_long %>% filter(kingdom == one_kingdom) %>% filter(pct_category %in% c("ampli_pct", "fwd_pct", "rev_pct")) %>% + filter(specific == specific_one) + + g <- ggplot(df) + geom_col(aes(x = str_replace(str_c(primer_set_label_long, sprintf("%03d", primer_set_id), sep = " - "), " - general", ""), + y = pct_seq, fill = fct_reorder(pct_category, pct_category_order)), width = 0.7, position = "dodge") + theme_bw() + xlab("Primer set") + + ylab("% of sequences amplified") + scale_fill_manual(name = "", values = c(ampli_pct = "black", fwd_pct = "blue", rev_pct = "red"), labels = c("Amplicons", + "rev", "fwd")) + ggtitle(str_c(one_kingdom, specific_one, "Percentage of sequences recovered", sep = " - ")) + theme(axis.text.y = element_text(angle = 0, + hjust = 0, vjust = 0)) + ylim(0, 100) + coord_flip() + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + + + print(g) + + fig1[[str_c(one_kingdom, specific_one, "pct", sep = " ")]] <- g + + df <- pr2_match_summary_primer_set %>% filter(kingdom == one_kingdom) %>% filter(!is.nan(ampli_size_mean)) %>% filter(specific == specific_one) + + g <- ggplot(df) + geom_point(aes(x = str_c(primer_set_label_long, sprintf("%03d", primer_set_id), sep = " - "), y = ampli_size_mean), colour = "black") + + geom_errorbar(aes(x = str_c(primer_set_label_long, sprintf("%03d", primer_set_id), sep = " - "), ymax = ampli_size_mean + ampli_size_sd, + ymin = ampli_size_mean - ampli_size_sd)) + theme_bw() + xlab("Primer set") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + + ggtitle(str_c(one_kingdom, specific_one, " - Amplicon size", sep = " - ")) + geom_hline(yintercept = c(450, 550), linetype = c(2, 3)) + ylim(0, + 900) + theme(axis.text.y = element_text(angle = 0, hjust = 0)) + coord_flip() + + print(g) + + fig1[[str_c(one_kingdom, specific_one, "size", sep = " ")]] <- g + + } +}
    +

    +
    +
    +
    +

    9.5 Mismatches - for Fig.S1

    +
    +

    9.5.1 Number of mismatches

    +
    for (one_kingdom in kingdoms) {
    +    
    +    for (specific_one in c("general", "specific")) {
    +        
    +        pr2_mismatches <- pr2_match_summary_primer_set %>% tidyr::pivot_longer(names_to = "mismatch_number", values_to = "mismatch_pct", cols = contains("ampli_mismatch"), 
    +            names_prefix = "ampli_mismatch_") %>% select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 1)) %>% 
    +            mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter(specific == specific_one) %>% filter(kingdom == one_kingdom)
    +        
    +        g <- ggplot(pr2_mismatches, aes(x = str_c(primer_set_label_long, sprintf("%03d", primer_set_id), sep = " - "), y = mismatch_pct, group = primer_set_id, 
    +            fill = as.factor(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + labs(x = "Primer set", 
    +            y = "% of sequences with mismatches", fill = "Mismatches", title = str_c(one_kingdom, specific_one, sep = " - ")) + scale_fill_viridis_d(direction = -1, 
    +            alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + coord_flip()
    +        
    +        print(g)
    +        
    +        fig1[[str_c(one_kingdom, specific_one, "mismatches", sep = " ")]] <- g
    +        
    +        
    +    }
    +}
    +

    9.5.2 Position of mismatches (only for Eukaryota)

    -
    ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_long)) + geom_boxplot(aes(y = fwd_mismatch_primer_position_5prime), 
    -    outlier.alpha = 0.3, color = "blue") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
    -    vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, breaks = seq(0, 25, by = 5), limits = c(0, 
    -    25)) + labs(x = "Primer set", y = "Position of mismatches", title = "Primer fwd") + facet_wrap(vars(specific), 
    -    nrow = 2, scales = "free_y")
    -

    -
    ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_long)) + geom_boxplot(aes(y = rev_mismatch_primer_position_5prime), 
    -    outlier.alpha = 0.3, color = "red") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, 
    -    vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, breaks = seq(0, 25, by = 5), limits = c(0, 
    -    25)) + labs(x = "Primer set", y = "Position of mismatches", title = "Primer rev") + facet_wrap(vars(specific), 
    -    nrow = 2, scales = "free_y")
    -

    -
    ggplot(filter(pr2_match_final, !is.na(rev_mismatch_primer_position_5prime), kingdom == "Eukaryota")) + geom_histogram(aes(x = rev_mismatch_primer_position_5prime), 
    -    color = "red", binwidth = 1, stat = "density") + theme_bw() + scale_x_continuous(minor_breaks = 0:30, 
    -    breaks = seq(0, 30, by = 5), limits = c(0, 30)) + labs(y = "Primer set", x = "Position of mismatches", 
    -    title = "Primer rev") + facet_wrap(vars(primer_set_label_long), ncol = ncol)
    -

    ## By supergroup (only for Eukaryota)

    +
    ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_long)) + geom_boxplot(aes(y = fwd_mismatch_primer_position_5prime), 
    +    outlier.alpha = 0.3, color = "blue") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, 
    +    breaks = seq(0, 25, by = 5), limits = c(0, 25)) + labs(x = "Primer set", y = "Position of mismatches", title = "Primer fwd") + facet_wrap(vars(specific), 
    +    nrow = 2, scales = "free_y")
    +

    +
    ggplot(filter(pr2_match_final, kingdom == "Eukaryota"), aes(x = primer_set_label_long)) + geom_boxplot(aes(y = rev_mismatch_primer_position_5prime), 
    +    outlier.alpha = 0.3, color = "red") + coord_flip() + theme_bw() + theme(axis.text.y = element_text(angle = 0, vjust = 0, hjust = 0)) + scale_y_continuous(minor_breaks = 0:25, 
    +    breaks = seq(0, 25, by = 5), limits = c(0, 25)) + labs(x = "Primer set", y = "Position of mismatches", title = "Primer rev") + facet_wrap(vars(specific), 
    +    nrow = 2, scales = "free_y")
    +

    +
    ggplot(filter(pr2_match_final, !is.na(rev_mismatch_primer_position_5prime), kingdom == "Eukaryota")) + geom_histogram(aes(x = rev_mismatch_primer_position_5prime), 
    +    color = "red", binwidth = 1, stat = "density") + theme_bw() + scale_x_continuous(minor_breaks = 0:30, breaks = seq(0, 30, by = 5), limits = c(0, 
    +    30)) + labs(y = "Primer set", x = "Position of mismatches", title = "Primer rev") + facet_wrap(vars(primer_set_label_long), ncol = ncol)
    +

    ## By supergroup (only for Eukaryota) - For Fig. S2 and S5

    Comments:

    • Excavata have a very different patterns from the rest of the group. They are not amplified by quite a few primer sets. They have also bigger amplicons
    • @@ -4005,111 +4011,109 @@

      9.5.2 Position of mismatches (onl

    9.5.3 % of Ampli and Amplicon size

    -
    fig_supergroup <- list()
    -ncol = 8
    -
    -for (specific_one in c("general", "specific")) {
    -    
    -    pr2_match_summary_primer_set_sg_region <- pr2_match_summary_primer_set_sg %>% filter(kingdom == "Eukaryota") %>% 
    -        filter(specific == specific_one) %>% filter((n_seq > 20)) %>% filter(!(supergroup %in% c("Apusozoa", 
    -        "Eukaryota_X")))
    -    
    -    
    -    
    -    g <- ggplot(pr2_match_summary_primer_set_sg_region) + geom_col(aes(x = supergroup, y = ampli_pct, fill = supergroup), 
    -        position = "dodge") + theme_bw() + coord_flip() + ylab("% of sequences amplified") + xlab("Supergroup") + 
    -        ggtitle(str_c(specific_one, " primers")) + scale_fill_viridis_d(option = "inferno") + ylim(0, 100) + 
    -        facet_wrap(~primer_set_label_short, scales = "fixed", ncol = ncol) + guides(fill = guide_legend(nrow = 1)) + 
    -        theme(legend.position = "top", legend.box = "horizontal") + theme(legend.position = "none")
    -    
    -    print(g)
    -    
    -    list_label <- str_c("pct", specific_one, sep = " ")
    -    fig_supergroup[[list_label]] <- g
    -    
    -    
    -    
    -    g <- ggplot(filter(pr2_match_summary_primer_set_sg_region, !is.nan(ampli_size_mean))) + geom_point(aes(x = supergroup, 
    -        y = ampli_size_mean), colour = "black") + theme_bw() + coord_flip() + geom_errorbar(aes(x = supergroup, 
    -        ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + xlab("Supergroup") + 
    -        ylab("Amplicon size (bp)") + ggtitle(str_c(specific_one, " primers")) + geom_hline(yintercept = c(450, 
    -        550), linetype = 2) + facet_wrap(~primer_set_label_short, scales = "fixed", ncol = ncol)
    -    
    -    print(g)
    -    
    -    list_label <- str_c("size", specific_one, sep = " ")
    -    
    -    fig_supergroup[[list_label]] <- g
    -    
    -}
    -

    -
    -
    -

    9.5.4 Number of mismatches (Only Eukaryotes)

    -
    ncol = 8
    -
    -pr2_mismatches_sg <- pr2_match_summary_primer_set_sg %>% filter(kingdom == "Eukaryota") %>% tidyr::pivot_longer(names_to = "mismatch_number", 
    -    values_to = "mismatch_pct", cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% 
    -    select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 1, 1)) %>% 
    -    mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter((n_seq > 20)) %>% filter(!(supergroup %in% 
    -    c("Apusozoa", "Eukaryota_X")))
    -
    -for (specific_one in c("general", "specific")) {
    +
    fig_supergroup <- list()
    +ncol = 8
    +
    +for (specific_one in c("general", "specific")) {
    +    
    +    pr2_match_summary_primer_set_sg_region <- pr2_match_summary_primer_set_sg %>% filter(kingdom == "Eukaryota") %>% 
    +        filter(specific == specific_one) %>% filter((n_seq > 20)) %>% filter(!(supergroup %in% c("Apusozoa", 
    +        "Eukaryota_X")))
    +    
         
    -    pr2_mismatches_sg_one <- pr2_mismatches_sg %>% filter(specific == specific_one)
    -    
    -    g <- ggplot(pr2_mismatches_sg_one, aes(x = supergroup, y = mismatch_pct, fill = fct_rev(mismatch_number))) + 
    -        geom_col() + theme_bw() + theme(axis.text.x = element_text(angle = 0, hjust = 1)) + labs(x = "Group", 
    -        y = "% of sequences with mismatches", title = str_c(specific_one, " primers"), fill = "Mismatches") + 
    -        scale_fill_viridis_d(direction = 1, alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", 
    -        legend.box = "horizontal") + coord_flip() + facet_wrap(vars(primer_set_label_short), ncol = ncol)
    -    
    -    print(g)
    -    
    -    fig_supergroup[[str_c("mismatches", specific_one, sep = " ")]] <- g
    -}
    -

    -
    + + g <- ggplot(pr2_match_summary_primer_set_sg_region) + geom_col(aes(x = supergroup, y = ampli_pct, fill = supergroup), + position = "dodge") + theme_bw() + coord_flip() + ylab("% of sequences amplified") + xlab("Supergroup") + + ggtitle(str_c(specific_one, " primers")) + scale_fill_viridis_d(option = "inferno") + ylim(0, 100) + + facet_wrap(~primer_set_label_short, scales = "fixed", ncol = ncol) + guides(fill = guide_legend(nrow = 1)) + + theme(legend.position = "top", legend.box = "horizontal") + theme(legend.position = "none") + + print(g) + + list_label <- str_c("pct", specific_one, sep = " ") + fig_supergroup[[list_label]] <- g + + + + g <- ggplot(filter(pr2_match_summary_primer_set_sg_region, !is.nan(ampli_size_mean))) + geom_point(aes(x = supergroup, + y = ampli_size_mean), colour = "black") + theme_bw() + coord_flip() + geom_errorbar(aes(x = supergroup, + ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + xlab("Supergroup") + + ylab("Amplicon size (bp)") + ggtitle(str_c(specific_one, " primers")) + geom_hline(yintercept = c(450, + 550), linetype = 2) + facet_wrap(~primer_set_label_short, scales = "fixed", ncol = ncol) + + print(g) + + list_label <- str_c("size", specific_one, sep = " ") + + fig_supergroup[[list_label]] <- g + +}
    +

    -
    -

    9.6 By class for autotrophs (Only EUkaryotes)

    -
    fig_class <- list()
    -ncol = 8
    -
    -for (specific_one in c("general", "specific")) {
    -    
    -    pr2_match_summary_filtered_one <- pr2_match_summary_primer_set_class %>% filter(n_seq > 20) %>% filter(division %in% 
    -        c("Haptophyta", "Dinoflagellata", "Chlorophyta", "Ochrophyta", "Cryptophyta")) %>% filter(specific == 
    -        specific_one)
    +
    +

    9.5.4 Number of mismatches (Only Eukaryotes) - Fig. S2 and S5

    +
    ncol = 8
    +
    +pr2_mismatches_sg <- pr2_match_summary_primer_set_sg %>% filter(kingdom == "Eukaryota") %>% tidyr::pivot_longer(names_to = "mismatch_number", values_to = "mismatch_pct", 
    +    cols = contains("ampli_mismatch"), names_prefix = "ampli_mismatch_") %>% select(-contains("ampli_mismatch")) %>% mutate(mismatch_number = str_sub(mismatch_number, 
    +    1, 1)) %>% mutate(mismatch_number = str_replace(mismatch_number, "5", "5+")) %>% filter((n_seq > 20)) %>% filter(!(supergroup %in% c("Apusozoa", 
    +    "Eukaryota_X")))
    +
    +for (specific_one in c("general", "specific")) {
         
    -    
    -    g <- ggplot(pr2_match_summary_filtered_one) + geom_col(aes(x = str_c(str_trunc(division, 20, ellipsis = ""), 
    -        "-", class), y = ampli_pct, fill = division), position = "dodge") + theme_bw() + coord_flip() + ylab("% of sequences amplified") + 
    -        xlab("Class") + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + # scale_fill_viridis_d(option = 'plasma') +
    -    scale_fill_brewer(palette = "Accent") + ylim(0, 100) + facet_wrap(vars(primer_set_label_short), scales = "fixed", 
    -        ncol = ncol) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal")
    +    pr2_mismatches_sg_one <- pr2_mismatches_sg %>% filter(specific == specific_one)
    +    
    +    g <- ggplot(pr2_mismatches_sg_one, aes(x = supergroup, y = mismatch_pct, fill = fct_rev(mismatch_number))) + geom_col() + theme_bw() + theme(axis.text.x = element_text(angle = 0, 
    +        hjust = 1)) + labs(x = "Group", y = "% of sequences with mismatches", title = str_c(specific_one, " primers"), fill = "Mismatches") + scale_fill_viridis_d(direction = 1, 
    +        alpha = 0.85) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal") + coord_flip() + facet_wrap(vars(primer_set_label_short), 
    +        ncol = ncol)
         
         print(g)
         
    -    list_label <- str_c("pct", specific_one, sep = " ")
    -    
    -    fig_class[[list_label]] <- g
    -    
    -    g <- ggplot(filter(pr2_match_summary_filtered_one, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(str_trunc(division, 
    -        3, ellipsis = ""), "-", class), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(str_trunc(division, 
    -        3, ellipsis = ""), "-", class), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - 
    -        ampli_size_sd)) + theme_bw() + coord_flip() + theme(axis.text.y = element_text(angle = 0, hjust = 0, 
    -        vjust = 0)) + xlab("Class") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle (str_c('Set -', one_primer_set, '
    -    # - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively') ) +
    -    geom_hline(yintercept = c(450, 550), linetype = 2) + facet_wrap(~primer_set_label_short, scales = "fixed", 
    -        ncol = ncol)
    -    
    -    print(g)
    -    
    -    list_label <- str_c("size", specific_one, sep = " ")
    -    
    -    fig_class[[list_label]] <- g
    -}
    + fig_supergroup[[str_c("mismatches", specific_one, sep = " ")]] <- g +}
    +

    +
    + +
    +

    9.6 By class for autotrophs (Only EUkaryotes) - Fig. S6

    +
    fig_class <- list()
    +ncol = 8
    +
    +for (specific_one in c("general", "specific")) {
    +    
    +    pr2_match_summary_filtered_one <- pr2_match_summary_primer_set_class %>% filter(n_seq > 20) %>% filter(division %in% 
    +        c("Haptophyta", "Dinoflagellata", "Chlorophyta", "Ochrophyta", "Cryptophyta")) %>% filter(specific == 
    +        specific_one)
    +    
    +    
    +    g <- ggplot(pr2_match_summary_filtered_one) + geom_col(aes(x = str_c(str_trunc(division, 20, ellipsis = ""), 
    +        "-", class), y = ampli_pct, fill = division), position = "dodge") + theme_bw() + coord_flip() + ylab("% of sequences amplified") + 
    +        xlab("Class") + theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + # scale_fill_viridis_d(option = 'plasma') +
    +    scale_fill_brewer(palette = "Accent") + ylim(0, 100) + facet_wrap(vars(primer_set_label_short), scales = "fixed", 
    +        ncol = ncol) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal")
    +    
    +    print(g)
    +    
    +    list_label <- str_c("pct", specific_one, sep = " ")
    +    
    +    fig_class[[list_label]] <- g
    +    
    +    g <- ggplot(filter(pr2_match_summary_filtered_one, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(str_trunc(division, 
    +        3, ellipsis = ""), "-", class), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(str_trunc(division, 
    +        3, ellipsis = ""), "-", class), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - 
    +        ampli_size_sd)) + theme_bw() + coord_flip() + theme(axis.text.y = element_text(angle = 0, hjust = 0, 
    +        vjust = 0)) + xlab("Class") + ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) + ggtitle (str_c('Set -', one_primer_set, '
    +    # - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively') ) +
    +    geom_hline(yintercept = c(450, 550), linetype = 2) + facet_wrap(~primer_set_label_short, scales = "fixed", 
    +        ncol = ncol)
    +    
    +    print(g)
    +    
    +    list_label <- str_c("size", specific_one, sep = " ")
    +    
    +    fig_class[[list_label]] <- g
    +}

    @@ -4122,73 +4126,65 @@

    10.1 Specific et sets for Opistho
  • 16 Piredda
  • 17 Comeau
  • -
    for (one_primer_set in c(16, 17, 35)) {
    -    
    -    pr2_match_summary_filtered <- filter(pr2_match_summary_primer_set_class, (n_seq > 20) & (supergroup %in% 
    -        c("Opisthokonta")) & (primer_set_id == one_primer_set))
    -    
    -    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, aes(x = str_c(division, 
    -        "-", class, " - n= ", n_seq), y = ampli_pct), fill = "grey", position = "dodge") + theme(axis.text.y = element_text(angle = 0, 
    -        hjust = 0, vjust = 0)) + theme_bw() + coord_flip() + ylab("% of sequences amplified") + xlab("Class") + 
    -        ggtitle(str_c("Set -", one_primer_set, " - % amplified per Class"))
    -    
    -    print(g)
    -    
    -    g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(division, 
    -        "-", class, " - n= ", n_seq), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(division, 
    -        "-", class, " - n= ", n_seq), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + 
    -        theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + coord_flip() + xlab("Class") + 
    -        ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    -    ggtitle(str_c("Set -", one_primer_set, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively")) + 
    -        geom_hline(yintercept = c(450, 550), linetype = 2)
    -    
    -    print(g)
    -    
    -}
    +
    for (one_primer_set in c(16, 17, 35)) {
    +    
    +    pr2_match_summary_filtered <- filter(pr2_match_summary_primer_set_class, (n_seq > 20) & (supergroup %in% 
    +        c("Opisthokonta")) & (primer_set_id == one_primer_set))
    +    
    +    g <- ggplot(pr2_match_summary_filtered) + geom_col(data = pr2_match_summary_filtered, aes(x = str_c(division, 
    +        "-", class, " - n= ", n_seq), y = ampli_pct), fill = "grey", position = "dodge") + theme(axis.text.y = element_text(angle = 0, 
    +        hjust = 0, vjust = 0)) + theme_bw() + coord_flip() + ylab("% of sequences amplified") + xlab("Class") + 
    +        ggtitle(str_c("Set -", one_primer_set, " - % amplified per Class"))
    +    
    +    print(g)
    +    
    +    g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) + geom_point(aes(x = str_c(division, 
    +        "-", class, " - n= ", n_seq), y = ampli_size_mean), colour = "black") + geom_errorbar(aes(x = str_c(division, 
    +        "-", class, " - n= ", n_seq), ymax = ampli_size_mean + ampli_size_sd, ymin = ampli_size_mean - ampli_size_sd)) + 
    +        theme(axis.text.y = element_text(angle = 0, hjust = 0, vjust = 0)) + coord_flip() + xlab("Class") + 
    +        ylab("Amplicon size (bp)") + # scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
    +    ggtitle(str_c("Set -", one_primer_set, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively")) + 
    +        geom_hline(yintercept = c(450, 550), linetype = 2)
    +    
    +    print(g)
    +    
    +}

    11 Figures

    -
    library(patchwork)
    -
    -

    11.1 Fig. 1 - V4 and V9 - Amplification % and size

    -
    -

    11.1.1 Eukaryota

    -
    fig_1 <- (fig1[["Eukaryota V4 pct"]] + ggtitle("") + ylab("") + fig1[["Eukaryota V4 mismatches"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()) + fig1[["Eukaryota V4 size"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()))/(fig1[["Eukaryota V9 pct"]] + ggtitle("") + 
    -    theme(legend.position = "none") + fig1[["Eukaryota V9 mismatches"]] + ggtitle("") + theme(legend.position = "none") + 
    -    xlab("") + theme(axis.text.y = element_blank()) + fig1[["Eukaryota V9 size"]] + ggtitle("") + xlab("") + 
    -    theme(axis.text.y = element_blank())) + plot_layout(heights = c(20, 4))
    -fig_1
    -
    -ggsave(plot = fig_1, filename = "../figs/fig_pct_sizes_mismatches_V4_V9_euk.pdf", width = 17, height = 12, 
    -    scale = 2, units = "cm", useDingbats = FALSE)
    +
    +

    11.1 Fig. 2, S3 and S4 - V4 and V9 - Amplification % and size

    +
    +

    11.1.1 Eukaryota - Fig. 2

    +
    fig_1 <- (fig1[["Eukaryota V4 pct"]] + ggtitle("") + ylab("") + fig1[["Eukaryota V4 mismatches"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()) + 
    +    fig1[["Eukaryota V4 size"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()))/(fig1[["Eukaryota V9 pct"]] + ggtitle("") + 
    +    theme(legend.position = "none") + fig1[["Eukaryota V9 mismatches"]] + ggtitle("") + theme(legend.position = "none") + xlab("") + theme(axis.text.y = element_blank()) + 
    +    fig1[["Eukaryota V9 size"]] + ggtitle("") + xlab("") + theme(axis.text.y = element_blank())) + plot_layout(heights = c(20, 4))
    +fig_1
    +
    +ggsave(plot = fig_1, filename = "figs/fig_pct_sizes_mismatches_V4_V9_euk.pdf", width = 17, height = 12, scale = 2, units = "cm", useDingbats = FALSE)
    -
    -

    11.1.2 Bacteria

    -
    fig_1_bact <- (fig1[["Bacteria V4 pct"]] + ggtitle("") + ylab("") + fig1[["Bacteria V4 mismatches"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()) + fig1[["Bacteria V4 size"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()))
    -fig_1_bact
    -
    -ggsave(plot = fig_1_bact, filename = "../figs/fig_pct_sizes_mismatches_V4_bact.pdf", width = 17, height = 10, 
    -    scale = 2, units = "cm", useDingbats = FALSE)
    +
    +

    11.1.2 Bacteria - Fig. S3

    +
    fig_1_bact <- (fig1[["Bacteria V4 pct"]] + ggtitle("") + ylab("") + fig1[["Bacteria V4 mismatches"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()) + 
    +    fig1[["Bacteria V4 size"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()))
    +fig_1_bact
    +
    +ggsave(plot = fig_1_bact, filename = "figs/fig_pct_sizes_mismatches_V4_bact.pdf", width = 17, height = 10, scale = 2, units = "cm", useDingbats = FALSE)
    -
    -

    11.1.3 Archaea

    -
    fig_1_arch <- (fig1[["Archaea V4 pct"]] + ggtitle("") + ylab("") + fig1[["Archaea V4 mismatches"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()) + fig1[["Archaea V4 size"]] + ggtitle("") + 
    -    xlab("") + ylab("") + theme(axis.text.y = element_blank()))
    -fig_1_arch
    -
    -ggsave(plot = fig_1_arch, filename = "../figs/fig_pct_sizes_mismatches_V4_arch.pdf", width = 17, height = 10, 
    -    scale = 2, units = "cm", useDingbats = FALSE)
    +
    +

    11.1.3 Archaea - Fig. S4

    +
    fig_1_arch <- (fig1[["Archaea V4 pct"]] + ggtitle("") + ylab("") + fig1[["Archaea V4 mismatches"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()) + 
    +    fig1[["Archaea V4 size"]] + ggtitle("") + xlab("") + ylab("") + theme(axis.text.y = element_blank()))
    +fig_1_arch
    +
    +ggsave(plot = fig_1_arch, filename = "figs/fig_pct_sizes_mismatches_V4_arch.pdf", width = 17, height = 10, scale = 2, units = "cm", useDingbats = FALSE)
    -
    -

    11.2 Fig. 1 - All - Amplification % and size and mismatches

    +
    +

    11.2 Fig. S1 - All - Amplification % and size and mismatches

    legend_pct <- cowplot::get_legend( fig1[["Eukaryota general pct"]] + 
                           # create some space to the left of the legend    
                           theme(legend.box.margin = margin(0, 0, 0, 20))
    @@ -4217,11 +4213,11 @@ 

    11.2 Fig. 1 - All - Amplification -ggsave(plot= fig_1 , filename="../figs/fig_pct_sizes_mismatches_all.pdf", +ggsave(plot= fig_1 , filename="figs/fig_pct_sizes_mismatches_all.pdf", width = 19 , height = 17, scale=2, units="cm", useDingbats=FALSE)

    -
    -

    11.3 Fig. - Example of V4 and V9

    +
    +

    11.3 Fig. 3 - Example of V4 and V9

    legend_mismatches <- cowplot::get_legend( fig3[["8 mismatches"]] + 
                           # create some space to the left of the legend    
                           theme(legend.box.margin = margin(t = 20, r = 0, b = 20, l = 20))
    @@ -4261,37 +4257,33 @@ 

    11.3 Fig. - Example of V4 and V9< fig -ggsave(plot= fig , filename="../figs/fig_examples_V4_V9.pdf", +ggsave(plot= fig , filename="figs/fig_examples_V4_V9.pdf", width = 20 , height = 25, scale=2.5, units="cm", useDingbats=FALSE)

    -
    -

    11.4 Fig. 3 - Supergroup analysis

    +
    +

    11.4 Fig. S2 and S5 - Supergroup analysis

    row_height = 5
     
     for (specific_one in c("general", "specific")) {
         
         
    -    fig_3 <- cowplot::plot_grid(fig_supergroup[[str_c("mismatches", specific_one, sep = " ")]], NULL, fig_supergroup[[str_c("size", 
    -        specific_one, sep = " ")]], NULL, labels = c("A", "", "B", ""), ncol = 2, nrow = 2, align = "v", 
    -        rel_widths = c(13, 0.2))
    -    
    -    print(fig_3)
    -    
    -    height <- row_height * (ceiling(n_primers[[specific_one]]/ncol))
    -    
    -    ggsave(plot = fig_3, filename = str_c("../figs/fig_supergroup_", specific_one, ".pdf"), width = 20, height = height, 
    -        scale = 1.75, units = "cm", useDingbats = FALSE)
    -    
    -    # ggsave(plot= fig_supergroup[[str_c('mismatches', specific_one, sep = ' ')]] ,
    -    # filename=str_c('../figs/fig_supergroup_', specific_one, '_A.pdf'), width = 14 , height = height/2,
    -    # scale=1.75, units='cm', useDingbats=FALSE) ggsave(plot= fig_supergroup[[str_c('size', specific_one, sep
    -    # = ' ')]] , filename=str_c('../figs/fig_supergroup_', specific_one, '_B.pdf'), width = 14 , height =
    -    # height/2, scale=1.75, units='cm', useDingbats=FALSE)
    -    
    -}
    + fig_3 <- cowplot::plot_grid(fig_supergroup[[str_c("mismatches", specific_one, sep = " ")]], NULL, fig_supergroup[[str_c("size", specific_one, sep = " ")]], + NULL, labels = c("A", "", "B", ""), ncol = 2, nrow = 2, align = "v", rel_widths = c(13, 0.2)) + + print(fig_3) + + height <- row_height * (ceiling(n_primers[[specific_one]]/ncol)) + + ggsave(plot = fig_3, filename = str_c("figs/fig_supergroup_", specific_one, ".pdf"), width = 20, height = height, scale = 1.75, units = "cm", useDingbats = FALSE) + + # ggsave(plot= fig_supergroup[[str_c('mismatches', specific_one, sep = ' ')]] , filename=str_c('figs/fig_supergroup_', specific_one, '_A.pdf'), + # width = 14 , height = height/2, scale=1.75, units='cm', useDingbats=FALSE) ggsave(plot= fig_supergroup[[str_c('size', specific_one, sep = ' ')]] + # , filename=str_c('figs/fig_supergroup_', specific_one, '_B.pdf'), width = 14 , height = height/2, scale=1.75, units='cm', useDingbats=FALSE) + +}
    -
    -

    11.5 Fig. 4 - Class analysis

    +
    +

    11.5 Fig. S6 - Class analysis

    row_height = 3.5
     
     for (specific_one in c("general", "specific")) {
    @@ -4302,58 +4294,57 @@ 

    11.5 Fig. 4 - Class analysis

    height <- row_height * (trunc(n_primers[[specific_one]]/ncol)) - ggsave(plot = fig_4, filename = str_c("../figs/fig_class_", specific_one, ".pdf"), width = 10, height = height, - scale = 3, units = "cm", useDingbats = FALSE) + ggsave(plot = fig_4, filename = str_c("figs/fig_class_", specific_one, ".pdf"), width = 10, height = height, scale = 3, units = "cm", useDingbats = FALSE) + - -}
    +}
    -
    -

    11.6 Fig - Primer sets position

    +
    +

    11.6 Fig 1 - Primer sets position

    reorder_primer <- function(x, y) {
         x + 0.01 * (y - x)
     }
     
    -fig <- ggplot(filter(primer_sets, !is.na(fwd_start_yeast))) + geom_segment(aes(x = fwd_start_yeast, xend = rev_end_yeast, 
    -    y = forcats::fct_reorder2(primer_set_name, fwd_start_yeast, rev_end_yeast, reorder_primer), yend = forcats::fct_reorder2(primer_set_name, 
    -        fwd_start_yeast, rev_end_yeast, reorder_primer), color = specific), size = 3) + geom_text(aes(x = rev_end_yeast + 
    -    50, y = forcats::fct_reorder2(primer_set_name, fwd_start_yeast, rev_end_yeast, reorder_primer), label = str_c(primer_set_id, 
    -    gene_region, str_replace_all(primer_set_name, c(`_` = " ", V4 = "", V9 = "")), str_replace_na(specificity, 
    -        ""), sep = " ")), size = 3, hjust = 0) + scale_color_manual(name = "", values = c(general = "gray30", 
    -    specific = "grey70")) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal", 
    -    panel.grid.major.y = element_blank(), axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank(), 
    -    panel.border = element_blank()) + 
    -labs(x = "Position on 18S rRNA", y = "", legend = "") + xlim(0, 2100) + coord_cartesian(clip = "off")
    +fig <- ggplot(filter(primer_sets, !is.na(fwd_start))) + geom_segment(aes(x = fwd_start, xend = rev_end, y = forcats::fct_reorder2(primer_set_name, 
    +    fwd_start, rev_end, reorder_primer), yend = forcats::fct_reorder2(primer_set_name, fwd_start, rev_end, reorder_primer), color = specific), size = 3) + 
    +    geom_text(aes(x = rev_end + 50, y = forcats::fct_reorder2(primer_set_name, fwd_start, rev_end, reorder_primer), label = str_c(primer_set_id, gene_region, 
    +        str_replace_all(primer_set_name, c(`_` = " ", V4 = "", V9 = "")), str_replace_na(specificity, ""), sep = " ")), size = 3, hjust = 0) + scale_color_manual(name = "", 
    +    values = c(general = "gray30", specific = "grey70")) + guides(fill = guide_legend(nrow = 1)) + theme(legend.position = "top", legend.box = "horizontal", 
    +    panel.grid.major.y = element_blank(), axis.text.y = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank(), panel.border = element_blank()) + 
    +    
    +labs(x = "Position on 18S rRNA", y = "", legend = "") + xlim(0, 2200) + coord_cartesian(clip = "off")
    +
    +fig
     
    -fig
    -
    -ggsave(plot = fig, filename = str_c("../figs/fig_primer_sets_position.pdf"), width = 10, height = 15, scale = 2, 
    -    units = "cm", useDingbats = FALSE)
    +ggsave(plot = fig, filename = str_c("figs/fig_primer_sets_position.pdf"), width = 10, height = 15, scale = 2, units = "cm", useDingbats = FALSE)
    + + + + -
    - + - - - - - + + + diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-1.png new file mode 100644 index 0000000..15ff5a7 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-2.png new file mode 100644 index 0000000..649d7f7 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-3.png new file mode 100644 index 0000000..80635d1 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-4.png new file mode 100644 index 0000000..5b09bf2 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-61-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-1.png new file mode 100644 index 0000000..05afcb9 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-2.png new file mode 100644 index 0000000..05afcb9 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-3.png new file mode 100644 index 0000000..e2e7b45 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-4.png new file mode 100644 index 0000000..e2e7b45 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-62-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-1.png new file mode 100644 index 0000000..6000cde Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-2.png new file mode 100644 index 0000000..9a949de Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-3.png new file mode 100644 index 0000000..9000a0e Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-4.png new file mode 100644 index 0000000..52119f7 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-63-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-1.png new file mode 100644 index 0000000..01cc91c Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-10.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-10.png new file mode 100644 index 0000000..f3ea03c Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-10.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-11.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-11.png new file mode 100644 index 0000000..011ab52 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-11.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-12.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-12.png new file mode 100644 index 0000000..65eaef8 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-12.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-13.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-13.png new file mode 100644 index 0000000..8ba7c86 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-13.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-14.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-14.png new file mode 100644 index 0000000..98ec791 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-14.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-15.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-15.png new file mode 100644 index 0000000..b0c59f0 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-15.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-16.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-16.png new file mode 100644 index 0000000..d129aaf Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-16.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-17.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-17.png new file mode 100644 index 0000000..ee621fd Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-17.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-18.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-18.png new file mode 100644 index 0000000..a9b056c Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-18.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-2.png new file mode 100644 index 0000000..8b15fc0 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-3.png new file mode 100644 index 0000000..8ff7b84 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-4.png new file mode 100644 index 0000000..8dd0c5b Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-5.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-5.png new file mode 100644 index 0000000..0eebb82 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-5.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-6.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-6.png new file mode 100644 index 0000000..a90d68d Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-6.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-7.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-7.png new file mode 100644 index 0000000..5350974 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-7.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-8.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-8.png new file mode 100644 index 0000000..6b8ea46 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-8.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-9.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-9.png new file mode 100644 index 0000000..f5a0dc3 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-64-9.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-1.png new file mode 100644 index 0000000..ee33a26 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-10.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-10.png new file mode 100644 index 0000000..95d0e31 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-10.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-11.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-11.png new file mode 100644 index 0000000..ef968ff Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-11.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-12.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-12.png new file mode 100644 index 0000000..14eb7ee Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-12.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-2.png new file mode 100644 index 0000000..732b079 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-3.png new file mode 100644 index 0000000..b18a293 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-4.png new file mode 100644 index 0000000..6b42f62 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-5.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-5.png new file mode 100644 index 0000000..f856489 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-5.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-6.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-6.png new file mode 100644 index 0000000..e5c2dfd Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-6.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-7.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-7.png new file mode 100644 index 0000000..74fc34f Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-7.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-8.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-8.png new file mode 100644 index 0000000..fb96b9b Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-8.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-9.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-9.png new file mode 100644 index 0000000..90be909 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-65-9.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-1.png new file mode 100644 index 0000000..a3a012d Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-2.png new file mode 100644 index 0000000..61e9b23 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-3.png new file mode 100644 index 0000000..e817069 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-4.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-4.png new file mode 100644 index 0000000..87adb97 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-4.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-5.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-5.png new file mode 100644 index 0000000..facc634 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-5.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-6.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-6.png new file mode 100644 index 0000000..aa44dcf Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-66-6.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-1.png new file mode 100644 index 0000000..7d7b18b Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-2.png new file mode 100644 index 0000000..842f046 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-2.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-3.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-3.png new file mode 100644 index 0000000..e52d58f Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-67-3.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-1.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-1.png new file mode 100644 index 0000000..00393f7 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-1.png differ diff --git a/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-2.png b/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-2.png new file mode 100644 index 0000000..9421a82 Binary files /dev/null and b/docs/PR2_Primers_files/figure-html/unnamed-chunk-68-2.png differ