From 609f01c4f2984cd9f6f2677b94e5a017ccfaabe8 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 27 Oct 2024 19:07:40 +1100
Subject: [PATCH 1/8] handle apgi wgs/wts meta

---
 R/meta_wgs_tumor_normal.R |  7 ++++++-
 R/meta_wts_tumor_only.R   | 11 ++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/R/meta_wgs_tumor_normal.R b/R/meta_wgs_tumor_normal.R
index 7b73add..1487ed0 100644
--- a/R/meta_wgs_tumor_normal.R
+++ b/R/meta_wgs_tumor_normal.R
@@ -60,7 +60,12 @@ meta_wgs_tumor_normal <- function(pmeta, status = "Succeeded") {
       gds_outfile_dragen_somatic_snv_vcf = purrr::map_chr(.data$output, list("somatic_snv_vcf_out", "location"), .default = NA),
       gds_outfile_dragen_somatic_snv_vcf_hardfilt = purrr::map_chr(.data$output, list("somatic_snv_vcf_hard_filtered_out", "location"), .default = NA),
       gds_outfile_dragen_somatic_sv_vcf = purrr::map_chr(.data$output, list("somatic_structural_vcf_out", "location"), .default = NA),
-      SubjectID = sub("umccr__automated__wgs_tumor_normal__(SBJ.....)__L.*", "\\1", .data$wfr_name) # infer from wfr name
+      SubjectID = sub("umccr__automated__wgs_tumor_normal__(SBJ.....)__L.*", "\\1", .data$wfr_name),
+      SubjectID = ifelse(
+        !grepl("external_apgi", .data$wfr_name),
+        .data$SubjectID,
+        sub("umccr__external_apgi__wgs_tumor_normal__(.*)", "\\1", .data$wfr_name)
+      )
     )
   d |>
     dplyr::select(
diff --git a/R/meta_wts_tumor_only.R b/R/meta_wts_tumor_only.R
index 91ac3a4..8d23c2a 100644
--- a/R/meta_wts_tumor_only.R
+++ b/R/meta_wts_tumor_only.R
@@ -28,8 +28,8 @@ meta_wts_tumor_only <- function(pmeta, status = "Succeeded") {
     meta_io_fromjson() |>
     dplyr::mutate(
       # input
-      rglb = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rglb"]])),
-      rgsm = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rgsm"]])),
+      rglb = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rglb"]]) %||% NA),
+      rgsm = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rgsm"]]) %||% NA),
       lane = purrr::map_chr(.data$input, \(x) paste(x[["fastq_list_rows"]][["lane"]], collapse = ",")),
       lane = as.character(.data$lane),
       # output
@@ -37,7 +37,12 @@ meta_wts_tumor_only <- function(pmeta, status = "Succeeded") {
       gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
       gds_outdir_arriba = purrr::map_chr(.data$output, list("arriba_output_directory", "location"), .default = NA),
       gds_outdir_qualimap = purrr::map_chr(.data$output, list("qualimap_output_directory", "location"), .default = NA),
-      SubjectID = sub("umccr__.*__wts_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name)
+      SubjectID = sub("umccr__.*__wts_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name),
+      SubjectID = ifelse(
+        !grepl("external_apgi", .data$wfr_name),
+        .data$SubjectID,
+        sub("umccr__external_apgi__wts_tumor_only__(.*)", "\\1", .data$wfr_name)
+      )
     )
   d |>
     dplyr::select(

From 85259d9e22f8c89c0c84f65f2a4f82a61ac3513a Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Sun, 27 Oct 2024 22:08:52 +1100
Subject: [PATCH 2/8] add year, durationMin in meta

---
 R/meta_bcl_convert.R                     | 7 ++++++-
 R/meta_oncoanalyser_wgs.R                | 6 +++++-
 R/meta_oncoanalyser_wgts_existing_both.R | 6 +++++-
 R/meta_oncoanalyser_wts.R                | 6 +++++-
 R/meta_rnasum.R                          | 4 ++++
 R/meta_sash.R                            | 6 +++++-
 R/meta_star_alignment.R                  | 6 +++++-
 R/meta_tso_ctdna_tumor_only.R            | 6 +++++-
 R/meta_umccrise.R                        | 6 +++++-
 R/meta_wgs_alignment_qc.R                | 4 ++++
 R/meta_wgs_tumor_normal.R                | 6 +++++-
 R/meta_wts_alignment_qc.R                | 4 ++++
 R/meta_wts_tumor_only.R                  | 6 +++++-
 13 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/R/meta_bcl_convert.R b/R/meta_bcl_convert.R
index 5d2c94b..420fb9d 100644
--- a/R/meta_bcl_convert.R
+++ b/R/meta_bcl_convert.R
@@ -61,9 +61,14 @@ meta_bcl_convert <- function(pmeta, status = "Succeeded") {
   d |>
     tidyr::separate_wider_regex("sample", c(sampleid = ".*", "_", libid1 = "L.*"), cols_remove = FALSE) |>
     tidyr::separate_wider_regex("libid1", c(libid2 = ".*", "_", topup_or_rerun = ".*"), cols_remove = FALSE, too_few = "align_start") |>
-    dplyr::mutate(gds_outdir_reports = file.path(dirname(.data$gds_outdir_multiqc), .data$batch_name, "Reports")) |>
+    dplyr::mutate(
+      gds_outdir_reports = file.path(dirname(.data$gds_outdir_multiqc), .data$batch_name, "Reports"),
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+    ) |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       -dplyr::any_of(c("batch_run")), # NA for bcl_convert
       SampleID = "sampleid",
       LibraryID = "libid2",
diff --git a/R/meta_oncoanalyser_wgs.R b/R/meta_oncoanalyser_wgs.R
index 1931bff..e56764a 100644
--- a/R/meta_oncoanalyser_wgs.R
+++ b/R/meta_oncoanalyser_wgs.R
@@ -36,11 +36,15 @@ meta_oncoanalyser_wgs <- function(pmeta, status = "Succeeded") {
       gds_bam_tumor = purrr::map_chr(.data$input, "tumor_wgs_bam", .default = NA),
       gds_bam_normal = purrr::map_chr(.data$input, "normal_wgs_bam", .default = NA),
       # output
-      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
+      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       "LibraryID_tumor",
       "LibraryID_normal",
diff --git a/R/meta_oncoanalyser_wgts_existing_both.R b/R/meta_oncoanalyser_wgts_existing_both.R
index 1337558..2869557 100644
--- a/R/meta_oncoanalyser_wgts_existing_both.R
+++ b/R/meta_oncoanalyser_wgts_existing_both.R
@@ -41,11 +41,15 @@ meta_oncoanalyser_wgts_existing_both <- function(pmeta, status = "Succeeded") {
       s3_indir_oncoanalyser_wgs = purrr::map_chr(.data$input, "existing_wgs_dir", .default = NA),
       s3_indir_oncoanalyser_wts = purrr::map_chr(.data$input, "existing_wts_dir", .default = NA),
       # output
-      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
+      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       "LibraryID_tumor_wgs",
       "LibraryID_normal_wgs",
diff --git a/R/meta_oncoanalyser_wts.R b/R/meta_oncoanalyser_wts.R
index b7259d1..051bf7e 100644
--- a/R/meta_oncoanalyser_wts.R
+++ b/R/meta_oncoanalyser_wts.R
@@ -34,11 +34,15 @@ meta_oncoanalyser_wts <- function(pmeta, status = "Succeeded") {
       LibraryID = purrr::map_chr(.data$input, "tumor_wts_library_id", .default = NA),
       s3_bam = purrr::map_chr(.data$input, "tumor_wts_bam", .default = NA),
       # output
-      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
+      s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       "LibraryID",
       "SampleID",
diff --git a/R/meta_rnasum.R b/R/meta_rnasum.R
index 49ec1e6..9212db6 100644
--- a/R/meta_rnasum.R
+++ b/R/meta_rnasum.R
@@ -63,11 +63,15 @@ meta_rnasum <- function(pmeta, status = "Succeeded") {
       # output
       gds_outfile_rnasum_html = purrr::map_chr(.data$output, list("rnasum_html", "location"), .default = NA),
       gds_outdir_rnasum = purrr::map_chr(.data$output, list("rnasum_output_directory", "location"), .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
       -dplyr::any_of(c("sequence_run", "batch_run")), # NA for rnasum
+      "year", "durationMin",
       SubjectID = "sbjid1",
       LibraryID = "libid1",
       SampleID = "rnasum_sample_name",
diff --git a/R/meta_sash.R b/R/meta_sash.R
index 30a6337..fe4e4c8 100644
--- a/R/meta_sash.R
+++ b/R/meta_sash.R
@@ -37,11 +37,15 @@ meta_sash <- function(pmeta, status = "Succeeded") {
       gds_indir_dragen_germline = purrr::map_chr(.data$input, "dragen_germline_dir", .default = NA),
       s3_indir_oncoanalyser = purrr::map_chr(.data$input, "oncoanalyser_dir", .default = NA),
       # output
-      s3_outdir_sash = purrr::map_chr(.data$output, "output_directory", .default = NA)
+      s3_outdir_sash = purrr::map_chr(.data$output, "output_directory", .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       "LibraryID_tumor",
       "LibraryID_normal",
diff --git a/R/meta_star_alignment.R b/R/meta_star_alignment.R
index 08d994b..728203c 100644
--- a/R/meta_star_alignment.R
+++ b/R/meta_star_alignment.R
@@ -34,11 +34,15 @@ meta_star_alignment <- function(pmeta, status = "Succeeded") {
       gds_fq_fwd = purrr::map_chr(.data$input, "fastq_fwd", .default = NA),
       gds_fq_rev = purrr::map_chr(.data$input, "fastq_rev", .default = NA),
       # output
-      s3_outdir_star = purrr::map_chr(.data$output, "output_directory", .default = NA)
+      s3_outdir_star = purrr::map_chr(.data$output, "output_directory", .default = NA),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       "LibraryID",
       "SampleID",
diff --git a/R/meta_tso_ctdna_tumor_only.R b/R/meta_tso_ctdna_tumor_only.R
index 3e1b1cc..864e473 100644
--- a/R/meta_tso_ctdna_tumor_only.R
+++ b/R/meta_tso_ctdna_tumor_only.R
@@ -35,11 +35,15 @@ meta_tso_ctdna_tumor_only <- function(pmeta, status = c("Succeeded")) {
       libid1 = sub(".*_(L.*)", "\\1", .data$sample_id),
       rerun = grepl("rerun", .data$libid1),
       subjectid = sub("umccr__automated__tso_ctdna_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name),
-      libid = sub("umccr__automated__tso_ctdna_tumor_only__SBJ.*__(L.*)__.*", "\\1", .data$wfr_name) # equal to libid1 wo _rerun
+      libid = sub("umccr__automated__tso_ctdna_tumor_only__SBJ.*__(L.*)__.*", "\\1", .data$wfr_name), # equal to libid1 wo _rerun
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       SubjectID = "subjectid",
       LibraryID = "libid",
       SampleID = "sample_name2",
diff --git a/R/meta_umccrise.R b/R/meta_umccrise.R
index 2ceabfd..ec87099 100644
--- a/R/meta_umccrise.R
+++ b/R/meta_umccrise.R
@@ -76,12 +76,16 @@ meta_umccrise <- function(pmeta, status = "Succeeded") {
       gds_outdir_umccrise1 = purrr::map_chr(.data$output, list("umccrise_output_directory", "location"), .default = NA),
       gds_outdir_umccrise = dplyr::if_else(
         is.na(.data$gds_outdir_umccrise1), .data$gds_outdir_umccrise2, .data$gds_outdir_umccrise1
-      )
+      ),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
       -dplyr::any_of(c("sequence_run", "batch_run")), # NA for umccrise
+      "year", "durationMin",
       "SubjectID",
       "LibraryID_tumor",
       "LibraryID_normal",
diff --git a/R/meta_wgs_alignment_qc.R b/R/meta_wgs_alignment_qc.R
index 1bf927e..0aeea07 100644
--- a/R/meta_wgs_alignment_qc.R
+++ b/R/meta_wgs_alignment_qc.R
@@ -39,6 +39,9 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
       gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
       gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
       SubjectID = sub("umccr__.*__wgs_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     ) |>
     tidyr::separate_wider_delim(
       cols = "rgid", delim = ".",
@@ -48,6 +51,7 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       LibraryID = "rglb",
       SampleID = "rgsm",
diff --git a/R/meta_wgs_tumor_normal.R b/R/meta_wgs_tumor_normal.R
index 1487ed0..4b03588 100644
--- a/R/meta_wgs_tumor_normal.R
+++ b/R/meta_wgs_tumor_normal.R
@@ -65,12 +65,16 @@ meta_wgs_tumor_normal <- function(pmeta, status = "Succeeded") {
         !grepl("external_apgi", .data$wfr_name),
         .data$SubjectID,
         sub("umccr__external_apgi__wgs_tumor_normal__(.*)", "\\1", .data$wfr_name)
-      )
+      ),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
       -dplyr::any_of(c("sequence_run", "batch_run")), # NA for wgs_tumor_normal
+      "year", "durationMin",
       "SubjectID",
       "LibraryID_tumor",
       "LibraryID_normal",
diff --git a/R/meta_wts_alignment_qc.R b/R/meta_wts_alignment_qc.R
index 164e398..baf9d29 100644
--- a/R/meta_wts_alignment_qc.R
+++ b/R/meta_wts_alignment_qc.R
@@ -39,6 +39,9 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
       gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
       gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
       SubjectID = sub("umccr__.*__wts_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     ) |>
     tidyr::separate_wider_delim(
       cols = "rgid", delim = ".",
@@ -48,6 +51,7 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       LibraryID = "rglb",
       SampleID = "rgsm",
diff --git a/R/meta_wts_tumor_only.R b/R/meta_wts_tumor_only.R
index 8d23c2a..2b1cc16 100644
--- a/R/meta_wts_tumor_only.R
+++ b/R/meta_wts_tumor_only.R
@@ -42,11 +42,15 @@ meta_wts_tumor_only <- function(pmeta, status = "Succeeded") {
         !grepl("external_apgi", .data$wfr_name),
         .data$SubjectID,
         sub("umccr__external_apgi__wts_tumor_only__(.*)", "\\1", .data$wfr_name)
-      )
+      ),
+      # other
+      year = as.character(lubridate::year(.data$start)),
+      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
     )
   d |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
+      "year", "durationMin",
       "SubjectID",
       LibraryID = "rglb",
       SampleID = "rgsm",

From 70739a380ae043b23e3f5a5a292d830290b7822e Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 07:45:01 +1100
Subject: [PATCH 3/8] init mega seqrunsum

---
 inst/reports/mega_seqrunsum/.gitignore  |   4 +
 inst/reports/mega_seqrunsum/README.md   |  13 +
 inst/reports/mega_seqrunsum/_quarto.yml |   2 +
 inst/reports/mega_seqrunsum/funcs.R     |  43 +++
 inst/reports/mega_seqrunsum/render.sh   |   5 +
 inst/reports/mega_seqrunsum/report.qmd  | 396 ++++++++++++++++++++++++
 6 files changed, 463 insertions(+)
 create mode 100644 inst/reports/mega_seqrunsum/.gitignore
 create mode 100644 inst/reports/mega_seqrunsum/README.md
 create mode 100644 inst/reports/mega_seqrunsum/_quarto.yml
 create mode 100644 inst/reports/mega_seqrunsum/funcs.R
 create mode 100644 inst/reports/mega_seqrunsum/render.sh
 create mode 100644 inst/reports/mega_seqrunsum/report.qmd

diff --git a/inst/reports/mega_seqrunsum/.gitignore b/inst/reports/mega_seqrunsum/.gitignore
new file mode 100644
index 0000000..b457f40
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/.gitignore
@@ -0,0 +1,4 @@
+/.quarto/
+nogit
+*html
+report_files
diff --git a/inst/reports/mega_seqrunsum/README.md b/inst/reports/mega_seqrunsum/README.md
new file mode 100644
index 0000000..96b6086
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/README.md
@@ -0,0 +1,13 @@
+# Mega Sequencing Run Summary
+
+Same as a typical Sequencing Run Summary, but for a large number of samples over a long period of time.
+
+**Contents**
+
+- Visualisation of workflow runtimes based on the PortalDB Workflow table.
+- Summary of sample metadata and workflow input/output paths.
+
+**Inputs**
+
+- PortalDB `workflow` slice for a given timeframe.
+- PortalDB `limsrow` slice for a given set of library IDs.
diff --git a/inst/reports/mega_seqrunsum/_quarto.yml b/inst/reports/mega_seqrunsum/_quarto.yml
new file mode 100644
index 0000000..00b55d8
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/_quarto.yml
@@ -0,0 +1,2 @@
+project:
+  title: "Sequencing Run Summary"
diff --git a/inst/reports/mega_seqrunsum/funcs.R b/inst/reports/mega_seqrunsum/funcs.R
new file mode 100644
index 0000000..a47591b
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/funcs.R
@@ -0,0 +1,43 @@
+funcs <- list(
+  #----#
+  dt_view = function(x, id, height = 500, ...) {
+    htmltools::browsable(
+      htmltools::tagList(
+        htmltools::tags$button(
+          htmltools::tagList(fontawesome::fa("download"), "CSV"),
+          onclick = glue("Reactable.downloadDataCSV('{id}', '{id}.csv')")
+        ),
+        x |>
+          reactable::reactable(
+            bordered = TRUE,
+            filterable = TRUE,
+            fullWidth = TRUE,
+            height = height,
+            highlight = TRUE,
+            pagination = FALSE,
+            resizable = TRUE,
+            searchable = TRUE,
+            sortable = TRUE,
+            striped = TRUE,
+            wrap = FALSE,
+            elementId = id,
+            ...
+          )
+      )
+    )
+  },
+  func_eval = function(f) {
+    eval(parse(text = f))
+  },
+  #----#
+  get_ids = function(d, id) {
+    .get_ids <- function(tbl, id) {
+      tbl |>
+        select(contains(id)) |>
+        unlist() |>
+        unique()
+    }
+    d |>
+      mutate(ids = list(.get_ids(.data$tidy_meta, {{ id }})))
+  }
+)
diff --git a/inst/reports/mega_seqrunsum/render.sh b/inst/reports/mega_seqrunsum/render.sh
new file mode 100644
index 0000000..222a896
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/render.sh
@@ -0,0 +1,5 @@
+out="mega_seqrunsum.html"
+
+quarto render report.qmd \
+    -o ${out} \
+    --output-dir nogit
diff --git a/inst/reports/mega_seqrunsum/report.qmd b/inst/reports/mega_seqrunsum/report.qmd
new file mode 100644
index 0000000..cca4885
--- /dev/null
+++ b/inst/reports/mega_seqrunsum/report.qmd
@@ -0,0 +1,396 @@
+---
+title: "{{< meta params.title >}}"
+subtitle: "Period: 2022-01-01 to 2024-10-27"
+author: "UMCCR - Genomics Platform Group"
+date: now
+date-format: "YYYY-MM-DD HH:mm Z"
+execute:
+  echo: false
+  eval: true
+format: revealjs
+params:
+  title: "UMCCR PortalDB Workflow Summary"
+---
+
+```{r}
+#| label: pkg_load
+#| message: false
+{
+  require(assertthat, include.only = "assert_that")
+  require(dplyr)
+  require(dracarys, include.only = "session_info_kable")
+  require(glue, include.only = "glue")
+  require(here, include.only = "here")
+  require(jsonlite, include.only = "fromJSON")
+  require(kableExtra, include.only = "kbl")
+  require(purrr, include.only = "map")
+  require(rportal, include.only = "portaldb_query_workflow")
+  require(readr, include.only = "cols")
+  require(tibble, include.only = "as_tibble")
+  require(tidyr, include.only = "unnest")
+  require(knitr, include.only = "kable")
+  require(ggplot2)
+}
+set.seed(42)
+```
+
+```{r}
+#| label: envvars
+#| eval: false
+# make sure you have logged into AWS and ICA
+c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION") |>
+  rportal::envvar_defined() |>
+  stopifnot()
+```
+
+```{r funcs_source}
+source(here("inst/reports/mega_seqrunsum/funcs.R"))
+```
+
+```{r}
+#| label: vars
+#| eval: false
+wf_fun <- tibble::tribble(
+  ~name,
+  "bcl_convert",
+  "tso_ctdna_tumor_only",
+  "wgs_alignment_qc",
+  "wts_alignment_qc",
+  "wts_tumor_only",
+  "wgs_tumor_normal",
+  "umccrise",
+  "rnasum",
+  "star_alignment",
+  "oncoanalyser_wts",
+  "oncoanalyser_wgs",
+  "oncoanalyser_wgts_existing_both",
+  "sash"
+) |>
+  mutate(func = glue("rportal::meta_{name}"))
+```
+
+```{r}
+#| label: query_workflow_table
+#| message: false
+#| eval: false
+# Grab all rows from workflow table
+# query_wf <- glue('ORDER BY "start" DESC;')
+# wf_raw <- rportal::portaldb_query_workflow(query_wf)
+rds_wf <- here("inst/reports/mega_seqrunsum/nogit", "wf_2024-10-25.rds")
+# saveRDS(wf_raw, rds_wf)
+wf_raw <- readRDS(rds_wf)
+```
+
+
+```{r}
+#| label: tidy_workflow_table
+#| eval: false
+# date_cutoff <- as.POSIXct("2022-01-01 00:00:01 AEDT")
+# wf <- wf_raw |>
+#   filter(start > date_cutoff) |>
+#   filter(end_status == "Succeeded")
+# list of tidy tbls per wf
+# wf_tidy <- wf_fun |>
+#   rowwise() |>
+#   mutate(tidy_meta = list(funcs$func_eval(.data$func)(wf))) |>
+#   select("name", "tidy_meta")
+rds_wf_tidy <- here("inst/reports/mega_seqrunsum/nogit", "wf_tidy_2024-10-27.rds")
+# saveRDS(wf_tidy, rds_wf_tidy)
+wf_tidy <- readRDS(rds_wf_tidy)
+# grab all libids/sbjids involved in any of the workflows
+# libids <- funcs$get_ids(wf_tidy, "LibraryID")
+```
+
+```{r}
+#| label: query_limsrow_table
+#| eval: false
+# query_lims <- glue('ORDER BY "library_id" DESC;')
+# lims_raw <- rportal::portaldb_query_limsrow(query_lims)
+rds_lims <- here("inst/reports/mega_seqrunsum/nogit", "lims_2024-10-27.rds")
+# saveRDS(lims_raw, rds_lims)
+lims_raw <- readRDS(rds_lims)
+lims_tidy <- lims_raw |>
+  tidyr::separate_wider_delim(
+    library_id,
+    delim = "_", names = c("library_id", "topup_or_rerun"), too_few = "align_start"
+  ) |>
+  select(
+    SubjectID = "subject_id",
+    SampleID = "sample_id",
+    LibraryID = "library_id",
+    ExternalSubjectID = "external_subject_id",
+    ProjectOwner = "project_owner",
+    ProjectName = "project_name",
+    Type = "type",
+    Phenotype = "phenotype",
+    Topup = "topup",
+    Workflow = "workflow",
+    Assay = "assay",
+    Source = "source",
+  ) |>
+  distinct() |>
+  filter(
+    !(LibraryID %in% "L2300925" & SampleID %in% "PTC_HCC1395"),
+    !(LibraryID %in% "L2400357" & Workflow %in% "clinical"),
+    !(LibraryID %in% "L2300952" & SampleID %in% "PTC_HCC1143"),
+    !(LibraryID %in% paste0("L2000", 431:437) & ProjectName %in% "Testing"),
+    !(LibraryID %in% "LPRJ230021" & SubjectID %in% NA),
+    !(LibraryID %in% "L2101562" & Workflow %in% "research"),
+    !(LibraryID %in% "L2000251" & Workflow %in% "research"),
+    !(LibraryID %in% "LPRJ210843" & Phenotype %in% NA),
+    !(LibraryID %in% "L2101737" & ProjectName %in% "KRAS-WT"),
+    !(LibraryID %in% "LPRJ210514" & Workflow %in% "qc"),
+    !(LibraryID %in% "LPRJ210515" & Workflow %in% "qc"),
+    !(LibraryID %in% "LPRJ210517" & Workflow %in% "qc"),
+    !(
+      LibraryID %in% c(
+        "L1900180",
+        paste0("L1900", c(820:827, 833:836)),
+        paste0("L2000", c("069", 172:183, 185:196, 198:210, 212, 411:416, 568, 647:652))
+      ) & Workflow %in% NA)
+  )
+rds_lims_tidy <- here("inst/reports/mega_seqrunsum/nogit", "lims_tidy_2024-10-27.rds")
+saveRDS(lims_tidy, rds_lims_tidy)
+```
+
+```{r}
+#| label: load_data
+wf <- here("inst/reports/mega_seqrunsum/nogit", "wf_tidy_2024-10-27.rds") |>
+  readRDS() |>
+  ungroup()
+lims <- here("inst/reports/mega_seqrunsum/nogit", "lims_tidy_2024-10-27.rds") |>
+  readRDS()
+```
+
+
+# Workflow Counts
+
+```{r}
+tot_wf_counts <- wf |>
+  mutate(
+    main = purrr::map(.data$tidy_meta, \(tbl) {
+      tbl |>
+        select("year", "start", "durationMin", "portal_run_id") |>
+        distinct()
+    })
+  ) |>
+  select(name, main) |>
+  tidyr::unnest(main)
+```
+
+## Total
+
+```{r}
+tot_wf_counts |>
+  count(name, year) |>
+  arrange(name, year) |>
+  reactable::reactable(
+    bordered = TRUE,
+    filterable = TRUE,
+    height = 600,
+    fullWidth = TRUE,
+    highlight = TRUE,
+    pagination = FALSE,
+    resizable = TRUE,
+    searchable = TRUE,
+    sortable = TRUE,
+    striped = TRUE,
+    wrap = FALSE
+  )
+```
+
+---
+
+```{r}
+ggplot2::theme_set(ggplot2::theme_bw())
+p <- tot_wf_counts |>
+  ggplot(aes(x = name)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("Total number of workflows per year")
+plotly::ggplotly(p)
+```
+
+```{r}
+wf_name <- function(x) {
+  wf |>
+    filter(.data$name == x) |>
+    select(-"name") |>
+    tidyr::unnest(tidy_meta)
+}
+```
+
+## bcl_convert
+
+Assay types and number of libraries loaded on each run:
+
+```{r}
+wf_name("bcl_convert") |>
+  select(year, start, durationMin, portal_run_id, batch_name, runfolder_name) |>
+  mutate(nlib = n(), .by = c("portal_run_id", "batch_name")) |>
+  mutate(batch_name_nlib = glue("{batch_name} ({nlib})")) |>
+  distinct() |>
+  mutate(batches_nlib = paste(batch_name_nlib, collapse = ", "), .by = "portal_run_id") |>
+  select(-c(batch_name, nlib, batch_name_nlib)) |>
+  distinct() |>
+  reactable::reactable(
+    height = 800,
+    width = 2000,
+    pagination = FALSE,
+    fullWidth = TRUE,
+    bordered = TRUE,
+    groupBy = "year",
+    onClick = "expand",
+    rowStyle = list(cursor = "pointer"),
+    columns = list(
+      year = reactable::colDef(align = "left")
+    ),
+    theme = reactable::reactableTheme(
+      borderColor = "#dfe2e5",
+      stripedColor = "#f6f8fa",
+      highlightColor = "#f0f5f9",
+      style = list(
+        fontFamily = "Monaco",
+        fontSize = "0.875rem"
+      )
+    )
+  )
+```
+
+## Assays
+
+```{r}
+bcl <- wf_name("bcl_convert") |>
+  select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name) |>
+  distinct() |>
+  left_join(lims, by = "LibraryID")
+p <- bcl |>
+  mutate(Assay = ifelse(grepl("10X", .data$Assay), "10X", .data$Assay)) |>
+  ggplot(aes(x = Assay)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("Library assays per year")
+plotly::ggplotly(p)
+```
+
+## ProjectOwner
+
+```{r}
+p <- bcl |>
+  # filter(year != "2022") |>
+  ggplot(aes(x = ProjectOwner)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("Libraries per ProjectOwner")
+plotly::ggplotly(p)
+```
+
+
+# Duration
+
+---
+
+```{r}
+p <- tot_wf_counts |>
+  ggplot(aes(x = name, y = durationMin)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single"), stat = "summary", fun = "mean") +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("**Average** Runtime Per Workflow (Minutes)")
+plotly::ggplotly(p)
+```
+
+---
+
+```{r}
+p <- tot_wf_counts |>
+  ggplot(aes(x = name, y = durationMin)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single"), stat = "summary", fun = "median") +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("**Median** Runtime Per Workflow (Minutes)")
+plotly::ggplotly(p)
+```
+
+
+# Report Return Dates
+
+```{r}
+reports1 <- here("inst/reports/mega_seqrunsum/nogit/report_return_dates_2023-2024.csv") |>
+  readr::read_csv(col_types = readr::cols(.default = "c")) |>
+  rename(
+    ExternalSubjectID = "External ID",
+    URN_CUP = "URN of CUPs",
+    SubjectID = "SBJ ID",
+    Assay = "assay",
+    DateReported = "date of report return"
+  ) |>
+  mutate(
+    DateReported = as.Date(.data$DateReported, format = "%d/%m/%Y")
+  ) |>
+  left_join(
+    tibble::tribble(
+      ~SubjectID, ~ExternalSubjectID_portal,
+      "SBJ01670", "UR2501514",
+      "SBJ04381", "SID-00009-Lot00500",
+      "SBJ04411", "IMPARP F-B",
+      "SBJ04416", "SN-PMC-126",
+      "SBJ04430", "IMPARP C-H",
+      "SBJ04752", "905579",
+      "SBJ04774", "4138985",
+      "SBJ04778", "PPGL29",
+      "SBJ04782", "2260976",
+      "SBJ04783", "2136896",
+      "SBJ04784", "10070134",
+      "SBJ04787", "10063388",
+      "SBJ04788", "10033903",
+      "SBJ04789", "9645642",
+      "SBJ04790", "173728",
+      "SBJ04868", "PMEX159074",
+      "SBJ04878", "9080989",
+      "SBJ04882", "PMEX159439 / UR10063958",
+      "SBJ04885", "PMEX159637 / UR5130743",
+      "SBJ04893", "10083361 GL0196",
+      "SBJ04894", "859363 HA0004",
+      "SBJ04899", "PM9696198",
+      "SBJ04966", "PMEX160574 / 10072166",
+      "SBJ04968", "PMEX160569 / 4233320",
+      "SBJ04969", "PMEX160653 / 10037062",
+      "SBJ05035", "IMPARP-AUS M-C",
+      "SBJ05081", "PMEX161353 / UR 50601716",
+      "SBJ05082", "PM9401040 / UR 13066986",
+      "SBJ05083", "PMEX161346 / UR 11190942",
+      "SBJ05084", "PM10081348 / UR 0204405",
+      "SBJ05085", "PM10063184 / UR 0120835",
+      "SBJ05086", "PMEX161344 / UR 2643844",
+      "SBJ05087", "PMEX161505 / UR 8770669",
+      "SBJ05572", "SN-PMC-169",
+      "SBJ05692", "IMPARP-AUS A-J"
+    ),
+    by = "SubjectID"
+  )
+reports <- reports1 |>
+  mutate(
+    ExternalSubjectID = ifelse(is.na(ExternalSubjectID_portal), ExternalSubjectID, ExternalSubjectID_portal),
+    in_lims = ExternalSubjectID %in% lims$ExternalSubjectID,
+    ExternalSubjectID = ifelse(
+      !in_lims,
+      sub("^PM|^UR", "", .data$ExternalSubjectID),
+      .data$ExternalSubjectID
+    ),
+    in_lims = ExternalSubjectID %in% lims$ExternalSubjectID,
+    ExternalSubjectID = ifelse(
+      !in_lims,
+      sub("^GL.*_UR(.*)", "\\1", .data$ExternalSubjectID),
+      .data$ExternalSubjectID
+    ),
+    in_lims = ExternalSubjectID %in% lims$ExternalSubjectID
+  ) |>
+  select(ExternalSubjectID, URN_CUP, SubjectID, Assay, DateReported, libid)
+rep_cttso <- reports |>
+  filter(Assay == "ctTSO")
+```
+

From 1db93d3276abef70e8f58f1f19123e934a5e5fdb Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 11:48:37 +1100
Subject: [PATCH 4/8] mega seqrunsum fixes

---
 inst/reports/mega_seqrunsum/report.qmd | 133 ++++++++++++++++++++++++-
 1 file changed, 132 insertions(+), 1 deletion(-)

diff --git a/inst/reports/mega_seqrunsum/report.qmd b/inst/reports/mega_seqrunsum/report.qmd
index cca4885..fc3ecbd 100644
--- a/inst/reports/mega_seqrunsum/report.qmd
+++ b/inst/reports/mega_seqrunsum/report.qmd
@@ -262,7 +262,8 @@ wf_name("bcl_convert") |>
 
 ```{r}
 bcl <- wf_name("bcl_convert") |>
-  select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name) |>
+  # select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name) |>
+  select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name, topup_or_rerun) |>
   distinct() |>
   left_join(lims, by = "LibraryID")
 p <- bcl |>
@@ -392,5 +393,135 @@ reports <- reports1 |>
   select(ExternalSubjectID, URN_CUP, SubjectID, Assay, DateReported, libid)
 rep_cttso <- reports |>
   filter(Assay == "ctTSO")
+rep_cttso_done <- rep_cttso |>
+  filter(!is.na(libid))
+rep_cttso_nolibid <- rep_cttso |>
+  filter(is.na(libid))
+lims_cttso <- lims |>
+  filter(SubjectID %in% rep_cttso_nolibid$SubjectID) |>
+  filter(Type %in% c("ctDNA"), Assay %in% c("ctTSO"))
+rep_cttso_all <- rep_cttso_nolibid |>
+  left_join(lims_cttso, by = c("SubjectID", "ExternalSubjectID", "Assay")) |>
+  mutate(libid = LibraryID) |>
+  select(ExternalSubjectID, URN_CUP, SubjectID, Assay, DateReported, libid) |>
+  bind_rows(rep_cttso_done)
+
+rep_wgts <- reports |>
+  filter(Assay != "ctTSO")
+rep_wgts_done <- rep_wgts |>
+  filter(!is.na(libid))
+rep_wgts_nolibid <- rep_wgts |>
+  filter(is.na(libid))
+lims_wgts <- lims |>
+  filter(SubjectID %in% rep_wgts_nolibid$SubjectID) |>
+  filter(Type == "WGS", !Assay %in% "ctTSO", Phenotype == "tumor")
+rep_wgts_all <- rep_wgts_nolibid |>
+  left_join(lims_wgts |> select(SubjectID, LibraryID), by = c("SubjectID")) |>
+  mutate(libid = LibraryID) |>
+  select(ExternalSubjectID, URN_CUP, SubjectID, Assay, DateReported, libid) |>
+  bind_rows(rep_wgts_done)
+rep_all <- bind_rows(rep_wgts_all, rep_cttso_all) |>
+  rename(LibraryID = "libid") |>
+  select(SubjectID, LibraryID, Assay, DateReported)
+# rep_all |>
+#   filter(!LibraryID %in% bcl$LibraryID)
+```
+
+```{r}
+# bcl |>
+#   filter(LibraryID %in% rep_all$LibraryID)
+# c("L2400522" "L2400397" "L2400346")
+bcl_topups <- bcl |>
+  filter(LibraryID %in% rep_all$LibraryID) |>
+  filter(!is.na(topup_or_rerun))
+```
+
+---
+
+## Umccrise
+
+```{r}
+um <- wf_name("umccrise")
+# filter(LibraryID_tumor %in% rep_all$LibraryID) |>
+# select(portal_run_id, start, end, durationMin, year, SubjectID, LibraryID_tumor)
+# all reported tumor libs have umccrise runs
+# rep_all |>
+#   filter(Assay == "WGTS") |>
+#   filter(!LibraryID %in% um$LibraryID_tumor)
+
+# some libs have multiple umccrise runs
+um_notok <- um |>
+  filter(LibraryID_tumor %in% rep_all$LibraryID) |>
+  group_by(LibraryID_tumor) |>
+  filter(n() > 1) |>
+  ungroup() |>
+  arrange(LibraryID_tumor) |>
+  select(portal_run_id, start, end, year, SubjectID, LibraryID_tumor)
+um_ok <- um |>
+  filter(LibraryID_tumor %in% rep_all$LibraryID) |>
+  group_by(LibraryID_tumor) |>
+  filter(n() == 1) |>
+  ungroup() |>
+  select(portal_run_id, start, end, year, SubjectID, LibraryID_tumor)
+um_notok_fixed <- tibble::tribble(
+  ~LibraryID_tumor, ~portal_run_id,
+  "L2301133", "202309234afd5c1b",
+  "L2301146", "20230929e4ba1e4c",
+  "L2301151", "2023092377ecefc3",
+  "L2301198", "20230929bcb6bec3",
+  "L2301265", "20231021ae547543",
+  "L2301358", "202311111a93c4ad",
+  "L2400346", "202403273f3c403e",
+  "L2400397", "20240415777c0407",
+  "L2400522", "20240519e7f8df41"
+) |>
+  left_join(um_notok, by = c("portal_run_id", "LibraryID_tumor")) |>
+  select(portal_run_id, start, end, year, SubjectID, LibraryID_tumor)
+um_rep <- bind_rows(
+  um_notok_fixed,
+  um_ok
+) |>
+  rename(LibraryID = "LibraryID_tumor") |>
+  left_join(rep_all, by = c("SubjectID", "LibraryID")) |>
+  mutate(
+    end_year = lubridate::year(end),
+    end_month = lubridate::month(end),
+    end_day = lubridate::day(end),
+    AnalysisEnd = as.character(glue::glue("{end_year}-{end_month}-{end_day}")),
+    AnalysisEnd = lubridate::ymd(AnalysisEnd),
+    TotalDays = as.integer(DateReported - AnalysisEnd)
+  )
+p <- um_rep |>
+  ggplot(aes(x = DateReported, y = TotalDays)) +
+  geom_point()
+plotly::ggplotly(p)
+summary(um_rep$TotalDays)
+```
+
+## ctTSO
+
+```{r}
+tso <- wf_name("tso_ctdna_tumor_only")
+# all reported tumor libs have tso runs
+# rep_all |>
+#   filter(Assay == "ctTSO") |>
+#   filter(!LibraryID %in% tso$LibraryID)
+tso_rep <- tso |>
+  filter(LibraryID %in% rep_all$LibraryID) |>
+  select(portal_run_id, start, end, year, SubjectID, LibraryID) |>
+  left_join(rep_all, by = c("SubjectID", "LibraryID")) |>
+  mutate(
+    end_year = lubridate::year(end),
+    end_month = lubridate::month(end),
+    end_day = lubridate::day(end),
+    AnalysisEnd = as.character(glue::glue("{end_year}-{end_month}-{end_day}")),
+    AnalysisEnd = lubridate::ymd(AnalysisEnd),
+    TotalDays = as.integer(DateReported - AnalysisEnd)
+  )
+p <- tso_rep |>
+  ggplot(aes(x = DateReported, y = TotalDays)) +
+  geom_point()
+plotly::ggplotly(p)
+summary(tso_rep$TotalDays)
 ```
 

From 0b349713c4cbf4c72e033de6d31ac8639709f232 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 12:34:52 +1100
Subject: [PATCH 5/8] use flat plots instead

---
 inst/reports/mega_seqrunsum/report.qmd | 63 ++++++++++++++++----------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/inst/reports/mega_seqrunsum/report.qmd b/inst/reports/mega_seqrunsum/report.qmd
index fc3ecbd..66d3eb5 100644
--- a/inst/reports/mega_seqrunsum/report.qmd
+++ b/inst/reports/mega_seqrunsum/report.qmd
@@ -178,7 +178,22 @@ tot_wf_counts <- wf |>
   tidyr::unnest(main)
 ```
 
-## Total
+---
+
+```{r}
+#| fig-height: 7.5
+ggplot2::theme_set(ggplot2::theme_bw())
+p <- tot_wf_counts |>
+  ggplot(aes(x = name)) +
+  geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
+  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  coord_flip() +
+  ggtitle("Total number of workflows per year")
+p
+# plotly::ggplotly(p)
+```
+
+---
 
 ```{r}
 tot_wf_counts |>
@@ -187,7 +202,7 @@ tot_wf_counts |>
   reactable::reactable(
     bordered = TRUE,
     filterable = TRUE,
-    height = 600,
+    height = 500,
     fullWidth = TRUE,
     highlight = TRUE,
     pagination = FALSE,
@@ -199,18 +214,6 @@ tot_wf_counts |>
   )
 ```
 
----
-
-```{r}
-ggplot2::theme_set(ggplot2::theme_bw())
-p <- tot_wf_counts |>
-  ggplot(aes(x = name)) +
-  geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
-  scale_y_continuous(breaks = scales::pretty_breaks(20)) +
-  coord_flip() +
-  ggtitle("Total number of workflows per year")
-plotly::ggplotly(p)
-```
 
 ```{r}
 wf_name <- function(x) {
@@ -258,9 +261,11 @@ wf_name("bcl_convert") |>
   )
 ```
 
-## Assays
+---
 
 ```{r}
+#| fig-width: 10
+#| fig-height: 7.5
 bcl <- wf_name("bcl_convert") |>
   # select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name) |>
   select(portal_run_id, start, year, durationMin, LibraryID, batch_name, runfolder_name, topup_or_rerun) |>
@@ -271,49 +276,57 @@ p <- bcl |>
   ggplot(aes(x = Assay)) +
   geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
   scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
   coord_flip() +
   ggtitle("Library assays per year")
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 ```
 
-## ProjectOwner
+---
 
 ```{r}
+#| fig-height: 7.5
 p <- bcl |>
   # filter(year != "2022") |>
   ggplot(aes(x = ProjectOwner)) +
   geom_bar(aes(fill = year), position = position_dodge(preserve = "single")) +
   scale_y_continuous(breaks = scales::pretty_breaks(20)) +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
   coord_flip() +
   ggtitle("Libraries per ProjectOwner")
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 ```
 
-
 # Duration
 
 ---
 
 ```{r}
+#| fig-height: 7.5
 p <- tot_wf_counts |>
   ggplot(aes(x = name, y = durationMin)) +
   geom_bar(aes(fill = year), position = position_dodge(preserve = "single"), stat = "summary", fun = "mean") +
   scale_y_continuous(breaks = scales::pretty_breaks(20)) +
   coord_flip() +
   ggtitle("**Average** Runtime Per Workflow (Minutes)")
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 ```
 
 ---
 
 ```{r}
+#| fig-height: 7.5
 p <- tot_wf_counts |>
   ggplot(aes(x = name, y = durationMin)) +
   geom_bar(aes(fill = year), position = position_dodge(preserve = "single"), stat = "summary", fun = "median") +
   scale_y_continuous(breaks = scales::pretty_breaks(20)) +
   coord_flip() +
   ggtitle("**Median** Runtime Per Workflow (Minutes)")
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 ```
 
 
@@ -441,6 +454,7 @@ bcl_topups <- bcl |>
 ## Umccrise
 
 ```{r}
+#| fig-height: 5
 um <- wf_name("umccrise")
 # filter(LibraryID_tumor %in% rep_all$LibraryID) |>
 # select(portal_run_id, start, end, durationMin, year, SubjectID, LibraryID_tumor)
@@ -494,13 +508,15 @@ um_rep <- bind_rows(
 p <- um_rep |>
   ggplot(aes(x = DateReported, y = TotalDays)) +
   geom_point()
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 summary(um_rep$TotalDays)
 ```
 
 ## ctTSO
 
 ```{r}
+#| fig-height: 5
 tso <- wf_name("tso_ctdna_tumor_only")
 # all reported tumor libs have tso runs
 # rep_all |>
@@ -521,7 +537,8 @@ tso_rep <- tso |>
 p <- tso_rep |>
   ggplot(aes(x = DateReported, y = TotalDays)) +
   geom_point()
-plotly::ggplotly(p)
+p
+# plotly::ggplotly(p)
 summary(tso_rep$TotalDays)
 ```
 

From ea1bf9faa31420a453e0f4ffc084b8c6215fa954 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 13:19:27 +1100
Subject: [PATCH 6/8] jitter ftw

---
 inst/reports/mega_seqrunsum/report.qmd | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/inst/reports/mega_seqrunsum/report.qmd b/inst/reports/mega_seqrunsum/report.qmd
index 66d3eb5..550c71d 100644
--- a/inst/reports/mega_seqrunsum/report.qmd
+++ b/inst/reports/mega_seqrunsum/report.qmd
@@ -507,7 +507,8 @@ um_rep <- bind_rows(
   )
 p <- um_rep |>
   ggplot(aes(x = DateReported, y = TotalDays)) +
-  geom_point()
+  geom_jitter(width = 0.8, alpha = 0.5) +
+  ggtitle(glue("{nrow(um_rep)} libraries"))
 p
 # plotly::ggplotly(p)
 summary(um_rep$TotalDays)
@@ -536,7 +537,8 @@ tso_rep <- tso |>
   )
 p <- tso_rep |>
   ggplot(aes(x = DateReported, y = TotalDays)) +
-  geom_point()
+  geom_jitter(width = 0.8, alpha = 0.5) +
+  ggtitle(glue("{nrow(tso_rep)} libraries"))
 p
 # plotly::ggplotly(p)
 summary(tso_rep$TotalDays)

From b0c16f43aa5a234aace43ec5b5ee5744d71c4fb3 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 13:35:41 +1100
Subject: [PATCH 7/8] fix crancheck

---
 .Rbuildignore                            | 3 +++
 R/meta_bcl_convert.R                     | 2 +-
 R/meta_oncoanalyser_wgs.R                | 2 +-
 R/meta_oncoanalyser_wgts_existing_both.R | 2 +-
 R/meta_oncoanalyser_wts.R                | 2 +-
 R/meta_rnasum.R                          | 2 +-
 R/meta_sash.R                            | 2 +-
 R/meta_star_alignment.R                  | 2 +-
 R/meta_tso_ctdna_tumor_only.R            | 2 +-
 R/meta_umccrise.R                        | 2 +-
 R/meta_wgs_alignment_qc.R                | 2 +-
 R/meta_wgs_tumor_normal.R                | 2 +-
 R/meta_wts_alignment_qc.R                | 2 +-
 R/meta_wts_tumor_only.R                  | 2 +-
 14 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index eee388e..50bd05c 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -17,5 +17,8 @@
 ^nogit$
 ^vignettes$
 inst/reports/datashare/nogit
+inst/reports/mega_seqrunsum/.quarto
+inst/reports/mega_seqrunsum/nogit
+inst/reports/mega_seqrunsum/report_files
 inst/reports/seqrunsum/.quarto
 inst/reports/seqrunsum/nogit
diff --git a/R/meta_bcl_convert.R b/R/meta_bcl_convert.R
index 420fb9d..97774f8 100644
--- a/R/meta_bcl_convert.R
+++ b/R/meta_bcl_convert.R
@@ -64,7 +64,7 @@ meta_bcl_convert <- function(pmeta, status = "Succeeded") {
     dplyr::mutate(
       gds_outdir_reports = file.path(dirname(.data$gds_outdir_multiqc), .data$batch_name, "Reports"),
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     ) |>
     dplyr::select(
       dplyr::all_of(meta_main_cols()),
diff --git a/R/meta_oncoanalyser_wgs.R b/R/meta_oncoanalyser_wgs.R
index e56764a..11f2176 100644
--- a/R/meta_oncoanalyser_wgs.R
+++ b/R/meta_oncoanalyser_wgs.R
@@ -39,7 +39,7 @@ meta_oncoanalyser_wgs <- function(pmeta, status = "Succeeded") {
       s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_oncoanalyser_wgts_existing_both.R b/R/meta_oncoanalyser_wgts_existing_both.R
index 2869557..df48c6f 100644
--- a/R/meta_oncoanalyser_wgts_existing_both.R
+++ b/R/meta_oncoanalyser_wgts_existing_both.R
@@ -44,7 +44,7 @@ meta_oncoanalyser_wgts_existing_both <- function(pmeta, status = "Succeeded") {
       s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_oncoanalyser_wts.R b/R/meta_oncoanalyser_wts.R
index 051bf7e..9828cbe 100644
--- a/R/meta_oncoanalyser_wts.R
+++ b/R/meta_oncoanalyser_wts.R
@@ -37,7 +37,7 @@ meta_oncoanalyser_wts <- function(pmeta, status = "Succeeded") {
       s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_rnasum.R b/R/meta_rnasum.R
index 9212db6..4865a27 100644
--- a/R/meta_rnasum.R
+++ b/R/meta_rnasum.R
@@ -65,7 +65,7 @@ meta_rnasum <- function(pmeta, status = "Succeeded") {
       gds_outdir_rnasum = purrr::map_chr(.data$output, list("rnasum_output_directory", "location"), .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_sash.R b/R/meta_sash.R
index fe4e4c8..1dbe9ea 100644
--- a/R/meta_sash.R
+++ b/R/meta_sash.R
@@ -40,7 +40,7 @@ meta_sash <- function(pmeta, status = "Succeeded") {
       s3_outdir_sash = purrr::map_chr(.data$output, "output_directory", .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_star_alignment.R b/R/meta_star_alignment.R
index 728203c..96fe87c 100644
--- a/R/meta_star_alignment.R
+++ b/R/meta_star_alignment.R
@@ -37,7 +37,7 @@ meta_star_alignment <- function(pmeta, status = "Succeeded") {
       s3_outdir_star = purrr::map_chr(.data$output, "output_directory", .default = NA),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_tso_ctdna_tumor_only.R b/R/meta_tso_ctdna_tumor_only.R
index 864e473..b0f7b62 100644
--- a/R/meta_tso_ctdna_tumor_only.R
+++ b/R/meta_tso_ctdna_tumor_only.R
@@ -38,7 +38,7 @@ meta_tso_ctdna_tumor_only <- function(pmeta, status = c("Succeeded")) {
       libid = sub("umccr__automated__tso_ctdna_tumor_only__SBJ.*__(L.*)__.*", "\\1", .data$wfr_name), # equal to libid1 wo _rerun
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_umccrise.R b/R/meta_umccrise.R
index ec87099..9592b8e 100644
--- a/R/meta_umccrise.R
+++ b/R/meta_umccrise.R
@@ -79,7 +79,7 @@ meta_umccrise <- function(pmeta, status = "Succeeded") {
       ),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_wgs_alignment_qc.R b/R/meta_wgs_alignment_qc.R
index 0aeea07..c858ccc 100644
--- a/R/meta_wgs_alignment_qc.R
+++ b/R/meta_wgs_alignment_qc.R
@@ -41,7 +41,7 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
       SubjectID = sub("umccr__.*__wgs_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     ) |>
     tidyr::separate_wider_delim(
       cols = "rgid", delim = ".",
diff --git a/R/meta_wgs_tumor_normal.R b/R/meta_wgs_tumor_normal.R
index 4b03588..08f9c0e 100644
--- a/R/meta_wgs_tumor_normal.R
+++ b/R/meta_wgs_tumor_normal.R
@@ -68,7 +68,7 @@ meta_wgs_tumor_normal <- function(pmeta, status = "Succeeded") {
       ),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(
diff --git a/R/meta_wts_alignment_qc.R b/R/meta_wts_alignment_qc.R
index baf9d29..74ed756 100644
--- a/R/meta_wts_alignment_qc.R
+++ b/R/meta_wts_alignment_qc.R
@@ -41,7 +41,7 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
       SubjectID = sub("umccr__.*__wts_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     ) |>
     tidyr::separate_wider_delim(
       cols = "rgid", delim = ".",
diff --git a/R/meta_wts_tumor_only.R b/R/meta_wts_tumor_only.R
index 2b1cc16..1fc57e2 100644
--- a/R/meta_wts_tumor_only.R
+++ b/R/meta_wts_tumor_only.R
@@ -45,7 +45,7 @@ meta_wts_tumor_only <- function(pmeta, status = "Succeeded") {
       ),
       # other
       year = as.character(lubridate::year(.data$start)),
-      durationMin = round(as.numeric(difftime(end, start, units = "mins")))
+      durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
     )
   d |>
     dplyr::select(

From d97255ea3d9950722da26a3d3b4f8ba90d146811 Mon Sep 17 00:00:00 2001
From: pdiakumis <peterdiakumis@gmail.com>
Date: Mon, 28 Oct 2024 13:36:00 +1100
Subject: [PATCH 8/8] add lubridate dependency

---
 DESCRIPTION            | 1 +
 conda/recipe/meta.yaml | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 68eb031..5b1cd2b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,6 +32,7 @@ Imports:
     httr2,
     jose,
     jsonlite,
+    lubridate,
     optparse,
     paws,
     purrr,
diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml
index 2d93f24..7081687 100644
--- a/conda/recipe/meta.yaml
+++ b/conda/recipe/meta.yaml
@@ -29,6 +29,7 @@ requirements:
     - r-httr2
     - r-jose
     - r-jsonlite
+    - r-lubridate
     - r-optparse
     - r-paws
     - r-purrr
@@ -52,6 +53,7 @@ requirements:
     - r-httr2
     - r-jose
     - r-jsonlite
+    - r-lubridate
     - r-optparse
     - r-paws
     - r-purrr