Skip to content

Commit

Permalink
Merge pull request #26 from umccr/annual_summary
Browse files Browse the repository at this point in the history
Annual workflow summary
  • Loading branch information
pdiakumis authored Oct 28, 2024
2 parents 5ea17ae + d97255e commit bf2861d
Show file tree
Hide file tree
Showing 22 changed files with 694 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@
^nogit$
^vignettes$
inst/reports/datashare/nogit
inst/reports/mega_seqrunsum/.quarto
inst/reports/mega_seqrunsum/nogit
inst/reports/mega_seqrunsum/report_files
inst/reports/seqrunsum/.quarto
inst/reports/seqrunsum/nogit
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Imports:
httr2,
jose,
jsonlite,
lubridate,
optparse,
paws,
purrr,
Expand Down
7 changes: 6 additions & 1 deletion R/meta_bcl_convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,14 @@ meta_bcl_convert <- function(pmeta, status = "Succeeded") {
d |>
tidyr::separate_wider_regex("sample", c(sampleid = ".*", "_", libid1 = "L.*"), cols_remove = FALSE) |>
tidyr::separate_wider_regex("libid1", c(libid2 = ".*", "_", topup_or_rerun = ".*"), cols_remove = FALSE, too_few = "align_start") |>
dplyr::mutate(gds_outdir_reports = file.path(dirname(.data$gds_outdir_multiqc), .data$batch_name, "Reports")) |>
dplyr::mutate(
gds_outdir_reports = file.path(dirname(.data$gds_outdir_multiqc), .data$batch_name, "Reports"),
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
) |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
-dplyr::any_of(c("batch_run")), # NA for bcl_convert
SampleID = "sampleid",
LibraryID = "libid2",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_oncoanalyser_wgs.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,15 @@ meta_oncoanalyser_wgs <- function(pmeta, status = "Succeeded") {
gds_bam_tumor = purrr::map_chr(.data$input, "tumor_wgs_bam", .default = NA),
gds_bam_normal = purrr::map_chr(.data$input, "normal_wgs_bam", .default = NA),
# output
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
"LibraryID_tumor",
"LibraryID_normal",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_oncoanalyser_wgts_existing_both.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,15 @@ meta_oncoanalyser_wgts_existing_both <- function(pmeta, status = "Succeeded") {
s3_indir_oncoanalyser_wgs = purrr::map_chr(.data$input, "existing_wgs_dir", .default = NA),
s3_indir_oncoanalyser_wts = purrr::map_chr(.data$input, "existing_wts_dir", .default = NA),
# output
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
"LibraryID_tumor_wgs",
"LibraryID_normal_wgs",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_oncoanalyser_wts.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ meta_oncoanalyser_wts <- function(pmeta, status = "Succeeded") {
LibraryID = purrr::map_chr(.data$input, "tumor_wts_library_id", .default = NA),
s3_bam = purrr::map_chr(.data$input, "tumor_wts_bam", .default = NA),
# output
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA)
s3_outdir_oncoanalyser = purrr::map_chr(.data$output, "output_directory", .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
"LibraryID",
"SampleID",
Expand Down
4 changes: 4 additions & 0 deletions R/meta_rnasum.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,15 @@ meta_rnasum <- function(pmeta, status = "Succeeded") {
# output
gds_outfile_rnasum_html = purrr::map_chr(.data$output, list("rnasum_html", "location"), .default = NA),
gds_outdir_rnasum = purrr::map_chr(.data$output, list("rnasum_output_directory", "location"), .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
-dplyr::any_of(c("sequence_run", "batch_run")), # NA for rnasum
"year", "durationMin",
SubjectID = "sbjid1",
LibraryID = "libid1",
SampleID = "rnasum_sample_name",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_sash.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,15 @@ meta_sash <- function(pmeta, status = "Succeeded") {
gds_indir_dragen_germline = purrr::map_chr(.data$input, "dragen_germline_dir", .default = NA),
s3_indir_oncoanalyser = purrr::map_chr(.data$input, "oncoanalyser_dir", .default = NA),
# output
s3_outdir_sash = purrr::map_chr(.data$output, "output_directory", .default = NA)
s3_outdir_sash = purrr::map_chr(.data$output, "output_directory", .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
"LibraryID_tumor",
"LibraryID_normal",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_star_alignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ meta_star_alignment <- function(pmeta, status = "Succeeded") {
gds_fq_fwd = purrr::map_chr(.data$input, "fastq_fwd", .default = NA),
gds_fq_rev = purrr::map_chr(.data$input, "fastq_rev", .default = NA),
# output
s3_outdir_star = purrr::map_chr(.data$output, "output_directory", .default = NA)
s3_outdir_star = purrr::map_chr(.data$output, "output_directory", .default = NA),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
"LibraryID",
"SampleID",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_tso_ctdna_tumor_only.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ meta_tso_ctdna_tumor_only <- function(pmeta, status = c("Succeeded")) {
libid1 = sub(".*_(L.*)", "\\1", .data$sample_id),
rerun = grepl("rerun", .data$libid1),
subjectid = sub("umccr__automated__tso_ctdna_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name),
libid = sub("umccr__automated__tso_ctdna_tumor_only__SBJ.*__(L.*)__.*", "\\1", .data$wfr_name) # equal to libid1 wo _rerun
libid = sub("umccr__automated__tso_ctdna_tumor_only__SBJ.*__(L.*)__.*", "\\1", .data$wfr_name), # equal to libid1 wo _rerun
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
SubjectID = "subjectid",
LibraryID = "libid",
SampleID = "sample_name2",
Expand Down
6 changes: 5 additions & 1 deletion R/meta_umccrise.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,16 @@ meta_umccrise <- function(pmeta, status = "Succeeded") {
gds_outdir_umccrise1 = purrr::map_chr(.data$output, list("umccrise_output_directory", "location"), .default = NA),
gds_outdir_umccrise = dplyr::if_else(
is.na(.data$gds_outdir_umccrise1), .data$gds_outdir_umccrise2, .data$gds_outdir_umccrise1
)
),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
-dplyr::any_of(c("sequence_run", "batch_run")), # NA for umccrise
"year", "durationMin",
"SubjectID",
"LibraryID_tumor",
"LibraryID_normal",
Expand Down
4 changes: 4 additions & 0 deletions R/meta_wgs_alignment_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
SubjectID = sub("umccr__.*__wgs_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
) |>
tidyr::separate_wider_delim(
cols = "rgid", delim = ".",
Expand All @@ -48,6 +51,7 @@ meta_wgs_alignment_qc <- function(pmeta, status = "Succeeded") {
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
LibraryID = "rglb",
SampleID = "rgsm",
Expand Down
11 changes: 10 additions & 1 deletion R/meta_wgs_tumor_normal.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,21 @@ meta_wgs_tumor_normal <- function(pmeta, status = "Succeeded") {
gds_outfile_dragen_somatic_snv_vcf = purrr::map_chr(.data$output, list("somatic_snv_vcf_out", "location"), .default = NA),
gds_outfile_dragen_somatic_snv_vcf_hardfilt = purrr::map_chr(.data$output, list("somatic_snv_vcf_hard_filtered_out", "location"), .default = NA),
gds_outfile_dragen_somatic_sv_vcf = purrr::map_chr(.data$output, list("somatic_structural_vcf_out", "location"), .default = NA),
SubjectID = sub("umccr__automated__wgs_tumor_normal__(SBJ.....)__L.*", "\\1", .data$wfr_name) # infer from wfr name
SubjectID = sub("umccr__automated__wgs_tumor_normal__(SBJ.....)__L.*", "\\1", .data$wfr_name),
SubjectID = ifelse(
!grepl("external_apgi", .data$wfr_name),
.data$SubjectID,
sub("umccr__external_apgi__wgs_tumor_normal__(.*)", "\\1", .data$wfr_name)
),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
-dplyr::any_of(c("sequence_run", "batch_run")), # NA for wgs_tumor_normal
"year", "durationMin",
"SubjectID",
"LibraryID_tumor",
"LibraryID_normal",
Expand Down
4 changes: 4 additions & 0 deletions R/meta_wts_alignment_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_alignment_output_directory", "location"), .default = NA),
gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
SubjectID = sub("umccr__.*__wts_alignment_qc__(SBJ.*)__L.*", "\\1", .data$wfr_name),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
) |>
tidyr::separate_wider_delim(
cols = "rgid", delim = ".",
Expand All @@ -48,6 +51,7 @@ meta_wts_alignment_qc <- function(pmeta, status = "Succeeded") {
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
LibraryID = "rglb",
SampleID = "rgsm",
Expand Down
15 changes: 12 additions & 3 deletions R/meta_wts_tumor_only.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,29 @@ meta_wts_tumor_only <- function(pmeta, status = "Succeeded") {
meta_io_fromjson() |>
dplyr::mutate(
# input
rglb = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rglb"]])),
rgsm = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rgsm"]])),
rglb = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rglb"]]) %||% NA),
rgsm = purrr::map_chr(.data$input, \(x) unique(x[["fastq_list_rows"]][["rgsm"]]) %||% NA),
lane = purrr::map_chr(.data$input, \(x) paste(x[["fastq_list_rows"]][["lane"]], collapse = ",")),
lane = as.character(.data$lane),
# output
gds_outdir_dragen = purrr::map_chr(.data$output, list("dragen_transcriptome_output_directory", "location"), .default = NA),
gds_outdir_multiqc = purrr::map_chr(.data$output, list("multiqc_output_directory", "location"), .default = NA),
gds_outdir_arriba = purrr::map_chr(.data$output, list("arriba_output_directory", "location"), .default = NA),
gds_outdir_qualimap = purrr::map_chr(.data$output, list("qualimap_output_directory", "location"), .default = NA),
SubjectID = sub("umccr__.*__wts_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name)
SubjectID = sub("umccr__.*__wts_tumor_only__(SBJ.*)__L.*", "\\1", .data$wfr_name),
SubjectID = ifelse(
!grepl("external_apgi", .data$wfr_name),
.data$SubjectID,
sub("umccr__external_apgi__wts_tumor_only__(.*)", "\\1", .data$wfr_name)
),
# other
year = as.character(lubridate::year(.data$start)),
durationMin = round(as.numeric(difftime(.data$end, .data$start, units = "mins")))
)
d |>
dplyr::select(
dplyr::all_of(meta_main_cols()),
"year", "durationMin",
"SubjectID",
LibraryID = "rglb",
SampleID = "rgsm",
Expand Down
2 changes: 2 additions & 0 deletions conda/recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ requirements:
- r-httr2
- r-jose
- r-jsonlite
- r-lubridate
- r-optparse
- r-paws
- r-purrr
Expand All @@ -52,6 +53,7 @@ requirements:
- r-httr2
- r-jose
- r-jsonlite
- r-lubridate
- r-optparse
- r-paws
- r-purrr
Expand Down
4 changes: 4 additions & 0 deletions inst/reports/mega_seqrunsum/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/.quarto/
nogit
*html
report_files
13 changes: 13 additions & 0 deletions inst/reports/mega_seqrunsum/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Mega Sequencing Run Summary

Same as a typical Sequencing Run Summary, but for a large number of samples over a long period of time.

**Contents**

- Visualisation of workflow runtimes based on the PortalDB Workflow table.
- Summary of sample metadata and workflow input/output paths.

**Inputs**

- PortalDB `workflow` slice for a given timeframe.
- PortalDB `limsrow` slice for a given set of library IDs.
2 changes: 2 additions & 0 deletions inst/reports/mega_seqrunsum/_quarto.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
project:
title: "Sequencing Run Summary"
43 changes: 43 additions & 0 deletions inst/reports/mega_seqrunsum/funcs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
funcs <- list(
#----#
dt_view = function(x, id, height = 500, ...) {
htmltools::browsable(
htmltools::tagList(
htmltools::tags$button(
htmltools::tagList(fontawesome::fa("download"), "CSV"),
onclick = glue("Reactable.downloadDataCSV('{id}', '{id}.csv')")
),
x |>
reactable::reactable(
bordered = TRUE,
filterable = TRUE,
fullWidth = TRUE,
height = height,
highlight = TRUE,
pagination = FALSE,
resizable = TRUE,
searchable = TRUE,
sortable = TRUE,
striped = TRUE,
wrap = FALSE,
elementId = id,
...
)
)
)
},
func_eval = function(f) {
eval(parse(text = f))
},
#----#
get_ids = function(d, id) {
.get_ids <- function(tbl, id) {
tbl |>
select(contains(id)) |>
unlist() |>
unique()
}
d |>
mutate(ids = list(.get_ids(.data$tidy_meta, {{ id }})))
}
)
5 changes: 5 additions & 0 deletions inst/reports/mega_seqrunsum/render.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
out="mega_seqrunsum.html"

quarto render report.qmd \
-o ${out} \
--output-dir nogit
Loading

0 comments on commit bf2861d

Please sign in to comment.