Skip to content

Commit

Permalink
Merge branch 'develop' into lk-warp-doc-updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ekiernan committed Sep 12, 2024
2 parents 6ede17f + 07f8b2f commit dc7e04a
Show file tree
Hide file tree
Showing 24 changed files with 238 additions and 74 deletions.
16 changes: 8 additions & 8 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
Pipeline Name Version Date of Last Commit
MultiSampleSmartSeq2SingleNucleus 1.4.2 2024-08-25-02
MultiSampleSmartSeq2 2.2.21 2023-04-19
PairedTag 1.6.0 2024-08-02
Optimus 7.6.0 2024-08-06
atac 2.3.0 2024-08-29
MultiSampleSmartSeq2SingleNucleus 2.0.0 2024-09-11
MultiSampleSmartSeq2 2.2.22 2024-09-11
PairedTag 1.6.1 2024-09-11
Optimus 7.6.1 2024-09-11
atac 2.3.1 2024-09-11
snm3C 4.0.4 2024-08-06
SmartSeq2SingleSample 5.1.20 2023-04-19
Multiome 5.6.0 2024-08-02
SmartSeq2SingleSample 5.1.21 2024-09-11
Multiome 5.6.1 2024-09-11
scATAC 1.3.2 2023-08-03
BuildIndices 3.0.0 2023-12-06
SlideSeq 3.4.0 2024-08-06
SlideSeq 3.4.1 2024-09-11
BuildCembaReferences 1.0.0 2020-11-15
CEMBA 1.1.7 2024-09-06
GDCWholeGenomeSomaticSingleSample 1.3.3 2024-09-06
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.3.1
2024-09-11 (Date of Last Commit)

* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the atac pipeline

# 2.3.0
2024-08-29 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.3.0"
String pipeline_version = "2.3.1"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
Expand Down
4 changes: 4 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 5.6.1
2024-09-11 (Date of Last Commit)
* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Multiome pipeline

# 5.6.0
2024-08-02 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow Multiome {

String pipeline_version = "5.6.0"
String pipeline_version = "5.6.1"


input {
Expand Down
4 changes: 4 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 7.6.1
2024-09-11 (Date of Last Commit)
* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the Optimus pipeline

# 7.6.0
2024-08-06 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ workflow Optimus {
# version of this pipeline

String pipeline_version = "7.6.0"
String pipeline_version = "7.6.1"


# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Expand All @@ -91,7 +91,7 @@ workflow Optimus {
String pytools_docker = "pytools:1.0.0-1661263730"
String empty_drops_docker = "empty-drops:1.0.1-4.2"
String star_docker = "star:1.0.1-2.7.11a-1692706072"
String warp_tools_docker_2_2_0 = "warp-tools:2.2.0"
String warp_tools_docker_2_2_0 = "warp-tools:2.3.0"
String star_merge_docker = "star-merge-npz:1.2"

#TODO how do we handle these?
Expand Down
4 changes: 4 additions & 0 deletions pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 1.6.1
2024-09-11 (Date of Last Commit)
* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the PairedTag pipeline

# 1.6.0
2024-08-02 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/paired_tag/PairedTag.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow PairedTag {

String pipeline_version = "1.6.0"
String pipeline_version = "1.6.1"


input {
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.4.1
2024-09-11 (Date of Last Commit)

* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SlideSeq pipeline

# 3.4.0
2024-08-06 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/slideseq/SlideSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow SlideSeq {

String pipeline_version = "3.4.0"
String pipeline_version = "3.4.1"

input {
Array[File] r1_fastq
Expand All @@ -48,7 +48,7 @@ workflow SlideSeq {
# docker images
String pytools_docker = "pytools:1.0.0-1661263730"
String picard_cloud_docker = "picard-cloud:2.26.10"
String warp_tools_docker_2_2_0 = "warp-tools:2.2.0"
String warp_tools_docker_2_2_0 = "warp-tools:2.3.0"
String star_merge_docker = "star-merge-npz:1.2"

String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.2.22
2024-09-11 (Date of Last Commit)

* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the MultiSmartSeq2 pipeline

# 2.2.21
2023-04-19 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2 {
Boolean paired_end
}
# Version of this pipeline
String pipeline_version = "2.2.21"
String pipeline_version = "2.2.22"

if (false) {
String? none = "None"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.0.0
2024-09-11 (Dat of Last Commit)

* Added h5ad as a format option for the cell by gene matrix output. The h5ad has the same layers and global attributes (unstructured data in h5ad) as the previous Loom output

# 1.4.2
2024-08-25-02 (Dat of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "../../../tasks/skylab/TrimAdapters.wdl" as TrimAdapters
import "../../../tasks/skylab/StarAlign.wdl" as StarAlign
import "../../../tasks/skylab/Picard.wdl" as Picard
import "../../../tasks/skylab/FeatureCounts.wdl" as CountAlignments
import "../../../tasks/skylab/LoomUtils.wdl" as LoomUtils
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "../../../tasks/broad/Utilities.wdl" as utils

workflow MultiSampleSmartSeq2SingleNucleus {
Expand Down Expand Up @@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
}

# Version of this pipeline
String pipeline_version = "1.4.2"
String pipeline_version = "2.0.0"

if (false) {
String? none = "None"
Expand Down Expand Up @@ -129,7 +129,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
annotation_gtf = annotations_gtf
}

call LoomUtils.SingleNucleusSmartSeq2LoomOutput as LoomOutput {
call H5adUtils.SingleNucleusSmartSeq2H5adOutput as H5adOutput {
input:
input_ids = input_ids,
input_names = input_names,
Expand All @@ -144,28 +144,22 @@ workflow MultiSampleSmartSeq2SingleNucleus {
annotation_introns_added_gtf = annotations_gtf
}

### Aggregate the Loom Files Directly ###
call LoomUtils.AggregateSmartSeq2Loom as AggregateLoom {
### Aggregate the H5ad Files Directly ###
call H5adUtils.AggregateSmartSeq2H5ad as AggregateH5ad {
input:
loom_input = LoomOutput.loom_output,
batch_id = batch_id,
batch_name = batch_name,
project_id = if defined(project_id) then select_first([project_id])[0] else none,
project_name = if defined(project_name) then select_first([project_name])[0] else none,
library = if defined(library) then select_first([library])[0] else none,
species = if defined(species) then select_first([species])[0] else none,
organ = if defined(organ) then select_first([organ])[0] else none,
pipeline_version = "MultiSampleSmartSeq2SingleNucleus_v~{pipeline_version}"
h5ad_input = H5adOutput.h5ad_output,
pipeline_version = pipeline_version,
batch_id = batch_id
}



### Pipeline output ###
output {
# loom output, exon/intron count tsv files and the aligned bam files
File loom_output = AggregateLoom.loom_output_file
# h5ad output, exon/intron count tsv files and the aligned bam files
File h5ad_output = AggregateH5ad.h5ad_output_file
File genomic_reference_version = ReferenceCheck.genomic_ref_version
Array[File] exon_intron_count_files = LoomOutput.exon_intron_counts
Array[File] exon_intron_count_files = H5adOutput.exon_intron_counts
Array[File] bam_files = RemoveDuplicatesFromBam.output_bam
String pipeline_version_out = pipeline_version
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 5.1.21
2024-09-11 (Date of Last Commit)

* Updated warp-tools docker which added create_h5ad_snss2.py to the docker image. This change does not affect the SmartSeq2SingleSample pipeline

# 5.1.20
2023-04-19 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ workflow SmartSeq2SingleSample {
}

# version of this pipeline
String pipeline_version = "5.1.20"
String pipeline_version = "5.1.21"

parameter_meta {
genome_ref_fasta: "Genome reference in fasta format"
Expand Down
2 changes: 1 addition & 1 deletion tasks/skylab/FastqProcessing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq {
# Runtime attributes
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.0.0"
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
Int cpu = 16
Int machine_mb = 40000
Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50
Expand Down
134 changes: 134 additions & 0 deletions tasks/skylab/H5adUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -552,4 +552,138 @@ task SingleNucleusSlideseqH5adOutput {
output {
File h5ad_output = "~{input_id}.h5ad"
}
}
task SingleNucleusSmartSeq2H5adOutput {
input {
#runtime values
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
Array[File] alignment_summary_metrics
Array[File] dedup_metrics
Array[File] gc_bias_summary_metrics
# introns counts
Array[File] introns_counts
# exons counts
Array[File] exons_counts
# annotation file
File annotation_introns_added_gtf
# name of the sample
Array[String] input_ids
Array[String]? input_names
String? input_id_metadata_field
String? input_name_metadata_field
String pipeline_version
Int preemptible = 3
Int disk = 200
Int machine_mem_mb = 8000
Int cpu = 4
}
meta {
description: "This task will convert output from the SmartSeq2SingleNucleus pipeline into a loom file. Contrary to the SmartSeq2 single cell where there is only RSEM counts, here we have intronic and exonic counts per gene name"
}
parameter_meta {
preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine"
}
command <<<
set -euo pipefail
declare -a introns_counts_files=(~{sep=' ' introns_counts})
declare -a exons_counts_files=(~{sep=' ' exons_counts})
declare -a output_prefix=(~{sep=' ' input_ids})
declare -a alignment_summary_metrics_list=(~{sep=' 'alignment_summary_metrics})
declare -a dedup_metrics_list=(~{sep=' 'dedup_metrics})
declare -a gc_bias_summary_metrics_list=(~{sep=' 'gc_bias_summary_metrics})
for (( i=0; i<${#introns_counts_files[@]}; ++i));
do
# creates a table with gene_id, gene_name, intron and exon counts
echo "Running create_snss2_counts_csv."
python /warptools/scripts/create_snss2_counts_csv.py \
--in-gtf ~{annotation_introns_added_gtf} \
--intron-counts ${introns_counts_files[$i]} \
--exon-counts ${exons_counts_files[$i]} \
-o "${output_prefix[$i]}.exon_intron_counts.tsv"
echo "Success create_snss2_counts_csv."
# groups the QC file into one file
echo "Running GroupQCs"
GroupQCs -f "${alignment_summary_metrics_list[$i]}" "${dedup_metrics_list[$i]}" "${gc_bias_summary_metrics_list[$i]}" \
-t Picard -o "${output_prefix[$i]}.Picard_group"
echo "Success GroupQCs"
# create the loom file
echo "Running create_h5ad_snss2."
python3 /warptools/scripts/create_h5ad_snss2.py \
--qc_files "${output_prefix[$i]}.Picard_group.csv" \
--count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \
--output_h5ad_path "${output_prefix[$i]}" \
--input_id ${output_prefix[$i]} \
~{"--input_id_metadata_field " + input_id_metadata_field} \
~{"--input_name_metadata_field " + input_name_metadata_field} \
--pipeline_version ~{pipeline_version}
echo "Success create_h5ad_snss2"
done;
>>>
runtime {
docker: docker
cpu: cpu
memory: "~{machine_mem_mb} MiB"
disks: "local-disk ~{disk} HDD"
disk: disk + " GB" # TES
preemptible: preemptible
}
output {
Array[File] h5ad_output = glob("*.h5ad")
Array[File] exon_intron_counts = glob("*exon_intron_counts.tsv")
}
}
task AggregateSmartSeq2H5ad {
input {
Array[File] h5ad_input
String batch_id
String pipeline_version
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
Int disk = 200
Int machine_mem_mb = 4000
Int cpu = 1
}
meta {
description: "aggregate the H5AD output"
}
command {
set -e
# Merge the h5ad files
python3 /warptools/scripts/ss2_h5ad_merge.py \
--input-h5ad-files ~{sep=' ' h5ad_input} \
--output-h5ad-file "~{batch_id}.h5ad" \
--batch_id ~{batch_id} \
--pipeline_version ~{pipeline_version}
}
output {
File h5ad_output_file = "~{batch_id}.h5ad"
}
runtime {
docker: docker
cpu: cpu
memory: "~{machine_mem_mb} MiB"
disks: "local-disk ~{disk} HDD"
disk: disk + " GB" # TES
preemptible: 3
maxRetries: 1
}
}
Loading

0 comments on commit dc7e04a

Please sign in to comment.