diff --git a/.pullapprove.yml b/.pullapprove.yml
index 0b9e694ebf..315688e47c 100644
--- a/.pullapprove.yml
+++ b/.pullapprove.yml
@@ -47,6 +47,7 @@ groups:
'tasks/broad/IlluminaGenotypingArrayTasks.wdl' in files or
'tasks/broad/InternalArraysTasks.wdl' in files or
'tasks/broad/InternalTasks.wdl' in files or
+ 'tasks/broad/Qc.wdl' in files or
'tasks/broad/Utilities.wdl' in files or
'verification/VerifyArrays.wdl' in files or
'verification/VerifyIlluminaGenotypingArray.wdl' in files or
@@ -138,6 +139,8 @@ groups:
'tasks/broad/BamProcessing.wdl' in files or
'tasks/broad/BamToCram.wdl' in files or
'tasks/broad/CopyFilesFromCloudToCloud.wdl' in files or
+ 'tasks/broad/DragenTasks.wdl' in files or
+ 'tasks/broad/DragmapAlignment.wdl' in files or
'tasks/broad/GermlineVariantDiscovery.wdl' in files or
'tasks/broad/Qc.wdl' in files or
'tasks/broad/SplitLargeReadGroup.wdl' in files or
@@ -148,6 +151,7 @@ groups:
'verification/VerifyReprocessing.wdl' in files or
'verification/VerifyTasks.wdl' in files or
'pipelines/broad/dna_seq/germline/single_sample/exome' in files or
+ 'pipelines/broad/dna_seq/germline/single_sample/ugwgs' in files or
'pipelines/broad/dna_seq/germline/single_sample/wgs' in files or
'pipelines/broad/reprocessing/cram_to_unmapped_bams' in files or
'pipelines/broad/reprocessing/exome' in files or
@@ -160,7 +164,7 @@ groups:
request: 2
reviewers:
users:
- - ldgauthier # Laura Gauthier
+ - samuelklee # Samuel Lee
- kachulis # Chris Kachulis
scientific_owners_joint_genotyping:
@@ -173,13 +177,19 @@ groups:
'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome/JointGenotypingByChromosomePartOne.wdl' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome/JointGenotypingByChromosomePartTwo.wdl' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' in files or
+ 'tasks/broad/GermlineVariantDiscovery.wdl' in files or
'tasks/broad/JointGenotypingTasks.wdl' in files or
+ 'tasks/broad/Qc.wdl' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.changelog.md' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.options.json' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.wdl' in files or
+ 'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotypingOnReblockedValidate.md' in files or
+ 'pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/exome' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking' in files or
+ 'pipelines/broad/dna_seq/germline/joint_genotyping/test_data_overview.md' in files or
+ 'pipelines/broad/dna_seq/germline/joint_genotyping/test_inputs' in files or
'pipelines/broad/dna_seq/germline/joint_genotyping/wgs' in files
reviews:
@@ -189,7 +199,7 @@ groups:
request_order: given
reviewers:
users:
- - ldgauthier # Laura Gauthier
+ - samuelklee # Samuel Lee
scientific_owners_somatic_single_sample:
conditions:
@@ -203,6 +213,7 @@ groups:
'tasks/broad/Alignment.wdl' in files or
'tasks/broad/BamProcessing.wdl' in files or
'tasks/broad/BamToCram.wdl' in files or
+ 'tasks/broad/DragmapAlignment.wdl' in files or
'tasks/broad/Qc.wdl' in files or
'tasks/broad/SplitLargeReadGroup.wdl' in files or
'tasks/broad/UnmappedBamToAlignedBam.wdl' in files or
@@ -235,7 +246,8 @@ groups:
'pipelines/broad/arrays/imputation/Imputation.changelog.md' in files or
'pipelines/broad/arrays/imputation/Imputation.options.json' in files or
'pipelines/broad/arrays/imputation/Imputation.wdl' in files or
- 'pipelines/broad/arrays/imputation/example_inputs.json' in files
+ 'pipelines/broad/arrays/imputation/example_inputs.json' in files or
+ 'pipelines/broad/arrays/imputation/test_inputs' in files
reviews:
required: 1
@@ -262,6 +274,8 @@ groups:
'tasks/broad/Alignment.wdl' in files or
'tasks/broad/BamProcessing.wdl' in files or
'tasks/broad/BamToCram.wdl' in files or
+ 'tasks/broad/DragenTasks.wdl' in files or
+ 'tasks/broad/DragmapAlignment.wdl' in files or
'tasks/broad/GermlineVariantDiscovery.wdl' in files or
'tasks/broad/Qc.wdl' in files or
'tasks/broad/SplitLargeReadGroup.wdl' in files or
diff --git a/beta-pipelines/skylab/ATAC/ATAC.wdl b/beta-pipelines/skylab/ATAC/ATAC.wdl
index c1c791e1ba..d61d8ba9de 100644
--- a/beta-pipelines/skylab/ATAC/ATAC.wdl
+++ b/beta-pipelines/skylab/ATAC/ATAC.wdl
@@ -230,7 +230,7 @@ task BWAPairedEndAlignment {
String read_group_sample_name
Int cpu
String output_base_name
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463"
}
parameter_meta {
@@ -241,7 +241,7 @@ task BWAPairedEndAlignment {
read_group_sample_name: "the read group sample to be added upon alignment"
cpu: "the number of cpu cores to use during alignment"
output_base_name: "basename to be used for the output of the task"
- docker_image: "the docker image using BWA to be used (default: quay.io/humancellatlas/snaptools:0.0.1)"
+ docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730)"
}
# runtime requirements based upon input file size
@@ -580,7 +580,7 @@ task SnapPre {
String genome_name
Int max_fragment_length
File genome_size_file
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602"
}
parameter_meta {
@@ -589,7 +589,7 @@ task SnapPre {
genome_name: "the name of the genome being analyzed"
max_fragment_length: "the maximum fragment length for filtering out reads by snap-pre (snaptools task)"
genome_size_file: "size for the chromoomes for the genome; ex: mm10.chrom.size"
- docker_image: "the docker image using snaptools to be used (default: quay.io/humancellatlas/snaptools:0.0.1)"
+ docker_image: "the docker image using snaptools to be used (default: us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602)"
}
String snap_file_output_name = output_base_name + ".snap"
@@ -635,14 +635,14 @@ task SnapCellByBin {
File snap_input
String bin_size_list
String snap_output_name = "output.snap"
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602"
}
parameter_meta {
snap_input: "the bam to passed into snaptools tools"
bin_size_list: "space separated list of bins to generate"
snap_output_name: "output.snap"
- docker_image: "the docker image to be used (default: quay.io/humancellatlas/snaptools:0.0.1)"
+ docker_image: "the docker image to be used (default: us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602)"
}
Int num_threads = 1
@@ -673,13 +673,13 @@ task MakeCompliantBAM {
input {
File bam_input
String output_base_name
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
}
parameter_meta {
bam_input: "the bam with barcodes in the read ids that need to be converted to barcodes in bam tags"
output_base_name: "base name to be used for the output of the task"
- docker_image: "the docker image using the python script to convert the bam barcodes/read ids (default: quay.io/humancellatlas/snaptools:0.0.1)"
+ docker_image: "the docker image using the python script to convert the bam barcodes/read ids (default: us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730)"
}
Int disk_size = ceil(2.5 * (if size(bam_input, "GiB") < 1 then 1 else size(bam_input, "GiB")))
@@ -687,7 +687,7 @@ task MakeCompliantBAM {
String compliant_bam_output_name = output_base_name + ".compliant.bam"
command {
- makeCompliantBAM.py \
+ /usr/gitc/makeCompliantBAM.py \
--input-bam ~{bam_input} \
--output-bam ~{compliant_bam_output_name}
}
@@ -707,7 +707,7 @@ task MakeCompliantBAM {
task BreakoutSnap {
input {
File snap_input
- String docker_image = "quay.io/humancellatlas/snap-breakout:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
String bin_size_list
}
Int num_threads = 1
@@ -715,7 +715,7 @@ task BreakoutSnap {
command {
set -euo pipefail
mkdir output
- breakoutSnap.py --input ~{snap_input} \
+ /usr/gitc/breakoutSnap.py --input ~{snap_input} \
--output-prefix output/
}
output {
diff --git a/dockers/README.md b/dockers/README.md
index 7a9df45686..e8c9ed7da0 100644
--- a/dockers/README.md
+++ b/dockers/README.md
@@ -8,6 +8,7 @@ This style guide provides formatting guidelines and best practices for writing D
* [Goals](#goals)
* [Small images](#small)
* [Alpine base](#alpine)
+ * [Specifying image platform](#platform)
* [Minimal RUN steps](#minimal-run)
* [Publicly accessible](#publicly)
* [Image scanning](#scanning)
@@ -15,7 +16,7 @@ This style guide provides formatting guidelines and best practices for writing D
* [Proper process reaping](#process)
* [Build Scripts and README](#build)
* [Formatting](#formatting)
-* [Troubleshooting](#trouble)
+* [Troubleshooting and running standalone](#trouble)
## Overview
WARP maintains a collection of docker images which are used as execution environments for various cloud-optimized data processing pipelines. Many of these image require specific sets of tools and dependencies to run and can be thought of as _custom_ images rather than traditional application images.
@@ -35,7 +36,7 @@ The easiest way to have a small image is to use an [Alpine](https://alpinelinux.
Along with being a small base, Alpine also has built in deletion of package index and provides [tini](https://github.com/krallin/tini) natively through APK.
-There are some instances where a Debian base image is unavoidable, specifically in the case where dependencies don't exists in APK. It is suggested that you only go to a Debian base as a last resort.
+There are some instances where a Debian base image is unavoidable, specifically in the case where dependencies don't exist in APK. It is suggested that you only go to a Debian base as a last resort.
##### :eyes: Example
@@ -62,12 +63,23 @@ RUN set -eux; \
bash \
```
+#### Specifying image platform
+
+Docker images built on ARM-based machines such as the new M-series Macs may run into execution issues with our automated PR test suite.
+One way to avoid these issues is to use a `linux/amd64` base image by including the `--platform="linux/amd64` flag after the `FROM` keyword.
+
+##### :eyes: Example
+```dockerfile
+# Use the amd64 version of alpine
+FROM --platform="linux/amd64" alpine
+```
+
#### Minimal RUN steps
Having minimal `RUN`steps (ideally one) is another highly effective way to reduce the size of your image. Each instruction in a Dockerfile creates a [layer](https://docs.docker.com/storage/storagedriver/) and these layers are what add up to build the final image.
-When you use multple `RUN` steps it creates additional unnecessary layers and bloats your image.
+When you use multiple `RUN` steps it creates additional unnecessary layers and bloats your image.
-An alternative to having a single `RUN` step is to use [multi-stage builds](https://docs.docker.com/develop/develop-images/multistage-build/) which are effective when the application your are containerizing is just a statically linked binary.
+An alternative to having a single `RUN` step is to use [multi-stage builds](https://docs.docker.com/develop/develop-images/multistage-build/) which are effective when the application you are containerizing is just a statically linked binary.
Just to note, many of the images maintained in WARP require a handful of system-level dependencies and custom packages so multi-stages builds are typically not used.
##### :eyes: Example
@@ -84,7 +96,7 @@ RUN set -eux; \
apk add --no-cache \
curl \
bash \
- ; \
+ ; \
wget https://www.somezipfile.com/zip; \
unzip zip
```
@@ -96,22 +108,22 @@ The pipelines that we maintain in WARP are designed for public use, ideally we w
* Anybody can pull our images
* Anybody can build our images
-For anybody to be able to pull our images they must be hosted on a public container registry, we host all of our images in publics repos on GCR (our 'official' location) and Quay (for discoverability).
+For anybody to be able to pull our images they must be hosted on a public container registry, we host all of our images in public repos on GCR (our 'official' location) and Quay (for discoverability).
* GCR - `us.gcr.io/broad-gotc-prod`
* Quay - `quay.io/broadinstitute/broad-gotc-prod`
-For anybody to be able to build our images all of the functionality should be encapsulated in the Dockerfile. Any custom software packages, dependencies etc. have to be downloaded from public links within the Dockerfile, this obviously means that we should not be copying files from within the Broad network infrastucture into our images.
+For anybody to be able to build our images, all functionality should be encapsulated in the Dockerfile. Any custom software packages, dependencies etc. have to be downloaded from public links within the Dockerfile, this obviously means that we should not be copying files from within the Broad network infrastructure into our images.
### Image scanning
-All of the images that we build are scanned for critical vulnerabilities on every pull request. For this we use a github-action that leverages [trivy](https://github.com/aquasecurity/trivy) for scanning. If you build a new image please add it to the action [here](../.github/workflows/trivy.yml).
+All images that we build are scanned for critical vulnerabilities on every pull request. For this we use a github-action that leverages [trivy](https://github.com/aquasecurity/trivy) for scanning. If you build a new image please add it to the action [here](../.github/workflows/trivy.yml).
### Semantic tagging
-We recommend against using rolling tags like `master` or `latest` when building images. Rolling tags make it hard to track down versions of images since the underlying image hash and content could be different across the same tags. Instead we ask that you use a semantic tag that follows the convention below:
+We recommend against using rolling tags like `master` or `latest` when building images. Rolling tags make it hard to track down versions of images since the underlying image hash and content could be different across the same tags. Instead, we ask that you use a semantic tag that follows the convention below:
##### `us.gcr.io/broad-gotc-prod/samtools:--`
@@ -120,7 +132,7 @@ This example is for an image we use containing `samtools`. The 'image-version' i
### Proper process reaping
-Classic init systems like systemd are used to reap orphaned, zombie processes. Typically these orphaned processes are reattached to the process at PID 1 which will reap them when they die. In a container this responsibility falls to process at PID 1 which is by default `/bin/sh`...this obviously will not handle process reaping. Because of this you run the risk of expending excess memory or resources within your container. A simple solution to this is to use `tini` in all of our images, a lengthy explanation of what this package does can be found [here](https://github.com/krallin/tini/issues/8).
+Classic init systems like systemd are used to reap orphaned, zombie processes. Typically, these orphaned processes are reattached to the process at PID 1 which will reap them when they die. In a container this responsibility falls to process at PID 1 which is by default `/bin/sh`...this obviously will not handle process reaping. Because of this you run the risk of expending excess memory or resources within your container. A simple solution to this is to use `tini` in all of our images, a lengthy explanation of what this package does can be found [here](https://github.com/krallin/tini/issues/8).
Luckily `tini` is available natively through APK so all you have to do is install it and set it as the default entrypoint!
@@ -129,7 +141,7 @@ Luckily `tini` is available natively through APK so all you have to do is instal
FROM alpine:3.9
-RUN set -eux;
+RUN set -eux; \
apk add --no-cache \
tini
@@ -146,7 +158,7 @@ See the examples for samtools([docker_build](./broad/samtools/docker_build.sh),
## Formatting
-Formatting our Dockerfiles consistenty helps improve readability and eases maintenance headaches down the road. The following are a couple of tenants that we follow when writing our Dockerfiles:
+Formatting our Dockerfiles consistently helps improve readability and eases maintenance headaches down the road. The following are a couple of tenants that we follow when writing our Dockerfiles:
* ARGS, ENV, LABEL in that order
* Always add versions of tools in the LABEL
@@ -154,7 +166,7 @@ Formatting our Dockerfiles consistenty helps improve readability and eases maint
* Alphabetize package install
* Clean up package index cache
* Use ; instead of && for line continuation
-* Logically seperate steps within RUN
+* Logically separate steps within RUN
* Four spaces per tab indent
* Short comments to describe each step
* tini is always default entrypoint
@@ -180,13 +192,13 @@ WORKDIR /usr/gitc
# Install dependencies
RUN set -eux; \
apt-get update; \
- apt-get install -y \
+ apt-get install -y \
autoconf \
cmake \
g++ \
gcc \
git \
- libbz2-dev \
+ libbz2-dev \
libcurl4-openssl-dev \
libhts-dev \
libssl-dev \
@@ -222,6 +234,12 @@ RUN set -eux; \
ENTRYPOINT [ "/sbin/tini", "--" ]
```
-## Troubleshooting
+## Troubleshooting and running standalone
+
+The WARP dockers are designed to be run from their respective WDL pipelines. However, if you need to run a Docker independent of a WDL for testing or troubleshooting, you'll likely need to explicity instruct it to run a `bash` shell in the `run` command. An example of this is shown in the terminal command below:
+
+```bash
+docker run -it --rm bash
+```
If you have any questions or would like some more guidance on writing Dockerfiles please file a [GitHub issue in WARP](https://github.com/broadinstitute/warp/issues/new).
diff --git a/dockers/broad/imputation/bcftools_vcftools/Dockerfile b/dockers/broad/imputation/bcftools_vcftools/Dockerfile
index 843e5992ec..7385a5fefd 100644
--- a/dockers/broad/imputation/bcftools_vcftools/Dockerfile
+++ b/dockers/broad/imputation/bcftools_vcftools/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.8-alpine
+FROM --platform=linux/amd64 python:3.8-alpine
ARG BCFTOOLS_VERSION=1.10.2 \
VCFTOOLS_VERSION=0.1.16
@@ -70,7 +70,7 @@ RUN set -eux; \
./configure; \
make; \
make install; \
- \
+ \
cd ../..; \
rm -r samtools-1.10; \
rm samtools-1.10.tar.bz2
diff --git a/dockers/broad/imputation/bcftools_vcftools/README.md b/dockers/broad/imputation/bcftools_vcftools/README.md
index 5bfc4fa396..ec914326ce 100644
--- a/dockers/broad/imputation/bcftools_vcftools/README.md
+++ b/dockers/broad/imputation/bcftools_vcftools/README.md
@@ -4,7 +4,7 @@
Copy and paste to pull this image
-#### `us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623`
+#### `us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207`
- __What is this image:__ This image is a lightweight alpine-based image for running BCFtools and VCFtools for the [Imputation pipeline](../../../../pipelines/broad/arrays/imputation/Imputation.wdl).
- __What are BFCtools and VCFtools:__ BCFtools and VCFtools are a suite of tools for variant calling and manipulating BCFs and VCFs. See [here](https://github.com/samtools/vcftools) and [here](https://vcftools.github.io/man_latest.html) more information.
@@ -21,8 +21,8 @@ We keep track of all past versions in [docker_versions](docker_versions.tsv) wit
You can see more information about the image, including the tool versions, by running the following command:
```bash
-$ docker pull us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623
-$ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623
+$ docker pull us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207
+$ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207
```
## Usage
@@ -31,12 +31,12 @@ $ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.1
```bash
$ docker run --rm -it \
- us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 bcftools
+ us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 bcftools
```
### Display VCFtools default menu
```bash
$ docker run --rm -it \
- us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 vcftools
+ us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 vcftools
```
diff --git a/dockers/broad/imputation/bcftools_vcftools/docker_build.sh b/dockers/broad/imputation/bcftools_vcftools/docker_build.sh
index 5ab6ba8906..11c6ba2634 100755
--- a/dockers/broad/imputation/bcftools_vcftools/docker_build.sh
+++ b/dockers/broad/imputation/bcftools_vcftools/docker_build.sh
@@ -2,13 +2,13 @@
set -e
# Update version when changes to Dockerfile are made
-DOCKER_IMAGE_VERSION=1.0.5
+DOCKER_IMAGE_VERSION=1.0.6
TIMESTAMP=$(date +"%s")
DIR=$(cd $(dirname $0) && pwd)
# Registries and tags
GCR_URL="us.gcr.io/broad-gotc-prod/imputation-bcf-vcf"
-QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_bcf_vcf"
+# QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_bcf_vcf"
#BCFTOOLS version
BCFTOOLS_VERSION="1.10.2"
@@ -72,9 +72,9 @@ function main(){
--no-cache $DIR
docker push "$GCR_URL:$IMAGE_TAG"
- echo "tagging and pushing Quay Image"
- docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
- docker push "$QUAY_URL:$IMAGE_TAG"
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
echo "done"
diff --git a/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv b/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv
index 08e715f92b..09e858eb30 100644
--- a/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv
+++ b/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv
@@ -6,3 +6,4 @@ us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.2-1.10.2-0.1.16-1642608127
us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.3-1.10.2-0.1.16-1644255588
us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.4-1.10.2-0.1.16-1646091598
us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623
+us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207
diff --git a/dockers/broad/imputation/minimac4/Dockerfile b/dockers/broad/imputation/minimac4/Dockerfile
index 0ae8020a38..c71139c592 100644
--- a/dockers/broad/imputation/minimac4/Dockerfile
+++ b/dockers/broad/imputation/minimac4/Dockerfile
@@ -1,5 +1,5 @@
# Minimac image uses bcftools
-FROM us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623
+FROM --platform=linux/amd64 us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207
ARG MINIMAC4_VERSION=1.0.2
diff --git a/dockers/broad/imputation/minimac4/README.md b/dockers/broad/imputation/minimac4/README.md
index 7bdddb3dfe..b37c3fadd0 100644
--- a/dockers/broad/imputation/minimac4/README.md
+++ b/dockers/broad/imputation/minimac4/README.md
@@ -4,7 +4,7 @@
Copy and paste to pull this image
-#### `us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471`
+#### `us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783`
- __What is this image:__ This image is a lightweight alpine-based image for running Minimac4 in the [Imputation pipeline](../../../../pipelines/broad/arrays/imputation/Imputation.wdl).
- __What is Minimac4:__ Minimac4 is a low-memory and computationally efficient piece of software for genotype imputation. See [here](https://github.com/statgen/Minimac4) more information.
@@ -21,8 +21,8 @@ We keep track of all past versions in [docker_versions](docker_versions.tsv) wit
You can see more information about the image, including the tool versions, by running the following command:
```bash
-$ docker pull us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471
-$ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471
+$ docker pull us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783
+$ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783
```
## Usage
@@ -31,5 +31,5 @@ $ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-16499
```bash
$ docker run --rm -it \
- us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471 /usr/gitc/minimac4
+ us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783 /usr/gitc/minimac4
```
\ No newline at end of file
diff --git a/dockers/broad/imputation/minimac4/docker_build.sh b/dockers/broad/imputation/minimac4/docker_build.sh
index 0cac15aaa1..1ad3e1bacb 100755
--- a/dockers/broad/imputation/minimac4/docker_build.sh
+++ b/dockers/broad/imputation/minimac4/docker_build.sh
@@ -2,13 +2,13 @@
set -e
# Update version when changes to Dockerfile are made
-DOCKER_IMAGE_VERSION=1.0.5
+DOCKER_IMAGE_VERSION=1.0.6
TIMESTAMP=$(date +"%s")
DIR=$(cd $(dirname $0) && pwd)
# Registries and tags
GCR_URL="us.gcr.io/broad-gotc-prod/imputation-minimac4"
-QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_minimac"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_minimac"
# Minimac4 version
MINIMAC4_VERSION="1.0.2"
@@ -62,9 +62,9 @@ function main(){
--no-cache $DIR
docker push "$GCR_URL:$IMAGE_TAG"
- echo "tagging and pushing Quay Image"
- docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
- docker push "$QUAY_URL:$IMAGE_TAG"
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
echo "done"
diff --git a/dockers/broad/imputation/minimac4/docker_versions.tsv b/dockers/broad/imputation/minimac4/docker_versions.tsv
index a255f4e95d..4802e23227 100644
--- a/dockers/broad/imputation/minimac4/docker_versions.tsv
+++ b/dockers/broad/imputation/minimac4/docker_versions.tsv
@@ -6,3 +6,4 @@ us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.2-1.0.2-1642621010
us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.3-1.0.2-1644331595
us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.4-1.0.2-1646143013
us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471
+us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783
diff --git a/dockers/broad/picard_python/Dockerfile b/dockers/broad/picard_python/Dockerfile
index e53faa0cc8..5f6855f669 100644
--- a/dockers/broad/picard_python/Dockerfile
+++ b/dockers/broad/picard_python/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.8-alpine
+FROM --platform=linux/amd64 python:3.8-alpine
ARG PICARD_PUBLIC_VERSION=2.26.10
@@ -12,8 +12,9 @@ WORKDIR /usr/gitc
# Install dependencies
RUN set -eux; \
+ apk upgrade; \
apk add --no-cache \
- bash \
+ bash \
gcompat \
libc6-compat \
openjdk8 \
diff --git a/dockers/broad/picard_python/README.md b/dockers/broad/picard_python/README.md
index 15f04d3879..b73c2a3855 100644
--- a/dockers/broad/picard_python/README.md
+++ b/dockers/broad/picard_python/README.md
@@ -4,8 +4,7 @@
Copy and paste to pull this image
-#### `docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026`
-`
+#### `docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039`
- __What is this image:__ This image is a lightweight alpine-based custom image for running Picard and Python, it uses `python:3.8-alpine` as a base image.
- __What is Picard:__ Picard is a set of command line tools for manipulating high-throughput sequencing (HTS) data and formats, [more info](https://github.com/broadinstitute/picard).
@@ -15,15 +14,15 @@ Copy and paste to pull this image
Picard_Python uses the following convention for verisoning:
-#### `us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026`
+#### `us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039`
We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
You can see more information about the image, including the tool versions, by running the following command:
```bash
-$ docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026
-$ docker inspect us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026
+$ docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039
+$ docker inspect us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039
```
## Usage
diff --git a/dockers/broad/picard_python/docker_versions.tsv b/dockers/broad/picard_python/docker_versions.tsv
index 99dfae8d7e..2f3c475665 100644
--- a/dockers/broad/picard_python/docker_versions.tsv
+++ b/dockers/broad/picard_python/docker_versions.tsv
@@ -1,2 +1,2 @@
DOCKER_VERSION
-us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026
\ No newline at end of file
+us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039
diff --git a/dockers/broad/samtools_star/README.md b/dockers/broad/samtools_star/README.md
index f40f6256d6..686a77ee84 100644
--- a/dockers/broad/samtools_star/README.md
+++ b/dockers/broad/samtools_star/README.md
@@ -33,7 +33,7 @@ $ docker inspect us.gcr.io/broad-gotc-prod/samtools-star:1.0.0-1.11-2.7.10a-1642
```bash
$ docker run --rm -it \
- us.gcr.io/broad-gotc-prod/samtools-picard-bwa:1.0.0-0.7.15-2.23.8-1626449438 STAR
+ us.gcr.io/broad-gotc-prod/samtools-star:1.0.0-1.11-2.7.10a-1642556627 STAR
```
### Samtools
diff --git a/dockers/skylab/HCA_post_processing/Dockerfile b/dockers/skylab/HCA_post_processing/Dockerfile
deleted file mode 100644
index 36dfe2d0f3..0000000000
--- a/dockers/skylab/HCA_post_processing/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM gcr.io/google.com/cloudsdktool/cloud-sdk
-
-LABEL maintainer="Lantern Team "
-
-RUN pip3 install --upgrade pip
-
-COPY requirements.txt .
-RUN pip3 install numpy==1.17.0
-RUN pip3 install cython==0.29.15
-RUN pip3 install -r requirements.txt
-
-RUN mkdir /tools
-WORKDIR /tools
-
-COPY HCA_create_adapter_json.py .
-COPY file_utils.sh .
-COPY create_input_metadata_json.py .
diff --git a/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py b/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py
deleted file mode 100755
index 12b91de859..0000000000
--- a/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py
+++ /dev/null
@@ -1,290 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import json
-import uuid
-import re
-import os
-import subprocess
-
-NAMESPACE = uuid.UUID('c6591d1d-27bc-4c94-bd54-1b51f8a2456c')
-
-
-def get_uuid5(value_to_hash):
- return str(uuid.uuid5(NAMESPACE, value_to_hash))
-
-
-def get_analysis_workflow_id(analysis_output_path):
- """Parse the analysis workflow id from one of its output paths, and write the id to a file so that it is available
- outside of the get_analysis task.
- Args:
- analysis_output_path (str): path to workflow output file.
- Returns:
- workflow_id (str): string giving Cromwell UUID of the workflow.
- """
- # Get the last match for UUID prior to the file name (in case the file is
- # named with a UUID) to ensure it is the subworkflow id
- url = analysis_output_path.rsplit('/', 1)[0]
- uuid_regex = r"([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})"
- workflow_id = re.findall(uuid_regex, url)[-1]
- print('Got analysis workflow UUID: {0}'.format(workflow_id))
- return workflow_id
-
-
-def main():
- description = """Creates json files needed for HCA DCP2 MVP"""
- parser = argparse.ArgumentParser(description=description)
- parser.add_argument('--project-loom-file',
- dest='project_loom_file',
- required=True,
- help="Path to project loom file")
- parser.add_argument('--crc32c',
- dest='crc32c',
- required=True,
- help="crc32c of the loom file")
- parser.add_argument('--version-timestamp',
- dest='version_timestamp',
- required=True,
- help="A version for the output files in the form of a timestamp")
- parser.add_argument('--project-id',
- dest='project_id',
- required=True,
- help="project id of the loom file")
- parser.add_argument('--project-stratum-string',
- dest='project_stratum_string',
- required=True,
- help="project id of the loom file")
- parser.add_argument('--sha256',
- dest='sha256',
- required=True,
- help="sha256 of the loom file")
- parser.add_argument('--size',
- dest='size',
- required=True,
- help="Size of the loom file in bytes")
- parser.add_argument('--staging-bucket',
- dest='staging_bucket',
- help="Path to staging bucket")
- parser.add_argument('--input-metadata-json',
- dest='inputs_json',
- required=True,
- help="Json file with inputs metadata")
- parser.add_argument('--loom-timestamp',
- dest='loom_timestamp',
- required=True,
- help="The timestamp for the stratified project matrix loom file")
- parser.add_argument('--pipeline-version',
- dest='pipeline_version',
- required=True,
- help="The version of the pipeline used to create the stratified project matrix")
-
- args = parser.parse_args()
-
- project_loom_file = args.project_loom_file
- crc32c = args.crc32c
- file_version = args.version_timestamp
- loom_timestamp = args.loom_timestamp
- loom_version = loom_timestamp.replace('Z', '.000000Z')
- project_id = args.project_id
- project_stratum_string = args.project_stratum_string
- sha256 = args.sha256
- size = int(args.size)
- staging_bucket = args.staging_bucket
- pipeline_version = args.pipeline_version
- with open(args.inputs_json, "r") as i:
- inputs_dict = json.load(i) # this should be a list of dictionaries
- inputs = inputs_dict['inputs']
-
- analysis_type = "run"
- if "cacheCopy" in str(project_loom_file):
- analysis_type = "copy-forward"
-
- # Generate additional data from args
- file_name = os.path.basename(project_loom_file)
- process_id = get_analysis_workflow_id(project_loom_file)
-
- # Create UUIDs
- links_id = get_uuid5(project_stratum_string) # v5 UUID of project id and the values the data are stratified by
- matrix_entity_id = get_uuid5(str(links_id + "analysis_file" + "loom")) # v5 UUID of the links_id
- matrix_file_id = get_uuid5(matrix_entity_id) # v5 UUID of the matrix_entity_id
-
- analysis_file_dict = {
- "describedBy": "https://schema.humancellatlas.org/type/file/6.2.0/analysis_file",
- "file_core": {
- "file_name": file_name,
- "format": "loom",
- "content_description": [{
- "text": "DCP/2-generated matrix",
- "ontology": "data:3917",
- "ontology_label": "Count Matrix"
- }]
- },
- "provenance": {
- "document_id": matrix_entity_id,
- "submission_date": file_version,
- "submitter_id": "e67aaabe-93ea-564a-aa66-31bc0857b707"
- },
- "schema_type": "file"
- }
-
- analysis_process_dict = {
- "describedBy": "https://schema.humancellatlas.org/type/process/analysis/12.0.0/analysis_process",
- "schema_type": "process",
- "process_core": {
- "process_id": process_id
- },
- "type": {
- "text": "analysis; merge matrices"
- },
- "reference_files": [],
- "timestamp_start_utc": loom_version, # string;
- # Initial start time of the full pipeline in UTC.
- # format: yyyy-mm-ddThh:mm:ssZ
- "timestamp_stop_utc": loom_version, # string;
- # Terminal stop time of the full pipeline in UTC.
- # format: yyyy-mm-ddThh:mm:ssZ
- "tasks": [
- # {
- # "task_name": "", # string; Name of the task.
- # # example: CollectDuplicationMetrics; RSEMExpression
- # "start_time": "", # string; Date and time when the task started.
- # # Enter the time in date-time format: yyyy-mm-ddThh:mm:ssZ
- # "stop_time": "", # string; Date and time when the task finished.
- # # Enter the time in date-time format: yyyy-mm-ddThh:mm:ssZ
- # "disk_size": "", # string; Name of the disk volume mounted to the VM for the task.
- # # Indicate both disk type and disk size. example: local-disk 11 HDD
- # "docker_image": "",# string;
- # # Name of docker image where the task is stored and executed.
- # # us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10
- # "cpus": 0, # integer; Number of CPUs used to run this task.
- # "memory": "", # string; Amount of memory allocated for this task. example: 7.5 GB
- # "zone": "" # string Name of the Google Cloud zone where the task was run.
- # #example: us-central1-b; europe-north1-a
- # }
- ],
- "inputs": [
- # {
- # "parameter_name": "", # string; Name of parameter. example: stranded; rsem_ref_index
- # "parameter_value": "" # string; Path to file for or value of parameter.
- # # example: NONE;
- # # gs://hca-dcp-mint-test-data/../gencode_v27_primary.tar"
- # } # Input parameters used in the pipeline run.
- ],
- "analysis_run_type": analysis_type,
- "provenance": {
- "document_id": process_id,
- "submission_date": file_version,
- },
- }
-
- analysis_protocol_dict = {
- "describedBy": "https://schema.humancellatlas.org/type/protocol/analysis/9.1.0/analysis_protocol",
- "schema_type": "protocol",
- "protocol_core": {
- "protocol_id": pipeline_version
- },
- "computational_method": pipeline_version, # string; A URI to a versioned workflow and
- # versioned execution environment in a
- # GA4GH-compliant repository.
- # example: SmartSeq2SingleCell; 10x
- "type": {
- "text": "analysis; merge matrices"
- }
- }
- analysis_protocol_string = json.dumps(analysis_protocol_dict, sort_keys=True)
- analysis_protocol_entity_id = get_uuid5(analysis_protocol_string)
- analysis_protocol_dict['provenance'] = {
- 'document_id': analysis_protocol_entity_id,
- 'submission_date': file_version,
- 'update_date': file_version
- }
-
- file_descriptor_dict = {
- "crc32c": crc32c,
- "content_type": "application/vnd.loom",
- "describedBy": "https://schema.humancellatlas.org/system/2.0.0/file_descriptor",
- "file_id": matrix_file_id,
- "file_name": file_name,
- "file_version": loom_version,
- "schema_type": "file_descriptor",
- "schema_version": "2.0.0",
- "sha256": sha256,
- "size": size,
- }
-
- links_dict = {
- "describedBy": "https://schema.humancellatlas.org/system/2.1.1/links",
- "links": [
- {
- "inputs": inputs,
- "link_type": "process_link",
- "outputs": [
- {
- "output_id": matrix_entity_id,
- "output_type": "analysis_file"
- }
- ],
- "process_id": process_id,
- "process_type": "analysis_process",
- "protocols": [
- {
- "protocol_id": analysis_protocol_entity_id,
- "protocol_type": "analysis_protocol"
- }
- ]
- }
- ],
- "schema_type": "links",
- "schema_version": "2.1.1"
- }
-
- # filenames for staging directories
- analysis_file_basename = "{}_{}.json".format(matrix_entity_id, file_version)
- analysis_protocol_basename = "{}_{}.json".format(analysis_protocol_entity_id, file_version)
- analysis_process_basename = "{}_{}.json".format(process_id, file_version)
- links_basename = "{}_{}_{}.json".format(links_id, file_version, project_id)
-
- # files created in output directory for output
- analysis_file_json_file_name = "outputs/analysis_file_{}".format(analysis_file_basename)
- analysis_process_json_file_name = "outputs/analysis_process_{}".format(analysis_process_basename)
- analysis_protocol_json_file_name = "outputs/analysis_protocol_{}".format(analysis_protocol_basename)
- file_descriptor_json_file_name = "outputs/file_descriptor_{}".format(analysis_file_basename)
- links_json_file_name = "outputs/links_{}".format(links_basename)
-
- with open(analysis_file_json_file_name, "w") as f:
- json.dump(analysis_file_dict, f, sort_keys=True, indent=2)
-
- with open(analysis_process_json_file_name, "w") as f:
- json.dump(analysis_process_dict, f, sort_keys=True, indent=2)
-
- with open(analysis_protocol_json_file_name, "w") as f:
- json.dump(analysis_protocol_dict, f, sort_keys=True, indent=2)
-
- with open(file_descriptor_json_file_name, "w") as f:
- json.dump(file_descriptor_dict, f, sort_keys=True, indent=2)
-
- with open(links_json_file_name, "w") as f:
- json.dump(links_dict, f, sort_keys=True, indent=2)
-
- # Copy json files into the staging bucket
- subprocess.run('gsutil cp {0} {1}data/{2}'.format(project_loom_file, staging_bucket, file_name), shell=True)
- subprocess.run('gsutil cp {0} {1}metadata/analysis_file/{2}'.format(analysis_file_json_file_name,
- staging_bucket,
- analysis_file_basename), shell=True)
- subprocess.run('gsutil cp {0} {1}metadata/analysis_process/{2}'.format(analysis_process_json_file_name,
- staging_bucket,
- analysis_process_basename), shell=True)
- subprocess.run('gsutil cp {0} {1}metadata/analysis_protocol/{2}'.format(analysis_protocol_json_file_name,
- staging_bucket,
- analysis_protocol_basename), shell=True)
- subprocess.run('gsutil cp {0} {1}descriptors/analysis_file/{2}'.format(file_descriptor_json_file_name,
- staging_bucket,
- analysis_file_basename), shell=True)
- subprocess.run('gsutil cp {0} {1}links/{2}'.format(links_json_file_name,
- staging_bucket,
- links_basename), shell=True)
-
-
-if __name__ == '__main__':
- main()
-
diff --git a/dockers/skylab/HCA_post_processing/build.sh b/dockers/skylab/HCA_post_processing/build.sh
deleted file mode 100755
index ec0d2953bc..0000000000
--- a/dockers/skylab/HCA_post_processing/build.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-tag=$1
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build . --tag=quay.io/humancellatlas/hca_post_processing:$tag
-
-echo "You can now push with docker push quay.io/humancellatlas/hca_post_processing:$tag"
diff --git a/dockers/skylab/HCA_post_processing/create_input_metadata_json.py b/dockers/skylab/HCA_post_processing/create_input_metadata_json.py
deleted file mode 100755
index c4d8075bca..0000000000
--- a/dockers/skylab/HCA_post_processing/create_input_metadata_json.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import argparse
-
-
-def main():
- description = """Collects input metadata from individual analysis file jsons """
- parser = argparse.ArgumentParser(description=description)
- parser.add_argument('--input-json-files',
- dest='input_files',
- nargs="+",
- required=True,
- help="List of son files")
- parser.add_argument('--output',
- dest='output',
- required=True,
- help="Name of output file")
-
- args = parser.parse_args()
-
- analysis_files = args.input_files
-
- inputs = {"inputs": []}
-
- for analysis_file in analysis_files:
- with open(analysis_file, "r") as f:
- analysis_metadata = json.load(f)
- if analysis_metadata["file_core"]["file_name"].endswith(".loom"):
- input_uuid = analysis_metadata["provenance"]["document_id"]
- inputs["inputs"].append({"input_id": input_uuid, "input_type": "analysis_file"})
-
- with open(args.output, "w") as f:
- json.dump(inputs, f)
-
-
-if __name__ == '__main__':
- main()
-
diff --git a/dockers/skylab/HCA_post_processing/file_utils.sh b/dockers/skylab/HCA_post_processing/file_utils.sh
deleted file mode 100755
index 645369104c..0000000000
--- a/dockers/skylab/HCA_post_processing/file_utils.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-function get_timestamp() {
- local -r input_file=${1}
- timestamp=$(gsutil ls -l ${input_file} | egrep -o "([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)")
- echo ${timestamp}
-}
-
-function get_crc() {
- local -r input_file=${1}
- crc=$(gsutil hash -h ${input_file} | awk '/crc32c/ { print $3 }')
- echo ${crc}
-}
-
-function get_size() {
- local -r input_file=${1}
- size=$(gsutil stat ${input_file} | awk '/Content-Length/ { print $2 }')
- echo ${size}
-}
diff --git a/dockers/skylab/HCA_post_processing/requirements.txt b/dockers/skylab/HCA_post_processing/requirements.txt
deleted file mode 100644
index 3bef2eebc9..0000000000
--- a/dockers/skylab/HCA_post_processing/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-numpy==1.21.0
-scipy==1.4.1
-pandas==1.0.3
-loompy==3.0.6
-h5py==2.10.0
diff --git a/dockers/skylab/HCA_post_processing/testdata/a.loom b/dockers/skylab/HCA_post_processing/testdata/a.loom
deleted file mode 100644
index 4e2243e1da..0000000000
Binary files a/dockers/skylab/HCA_post_processing/testdata/a.loom and /dev/null differ
diff --git a/dockers/skylab/HCA_post_processing/testdata/b.loom b/dockers/skylab/HCA_post_processing/testdata/b.loom
deleted file mode 100644
index df22d1d5f9..0000000000
Binary files a/dockers/skylab/HCA_post_processing/testdata/b.loom and /dev/null differ
diff --git a/dockers/skylab/HCA_post_processing/testdata/c.loom b/dockers/skylab/HCA_post_processing/testdata/c.loom
deleted file mode 100644
index dbd77094ff..0000000000
Binary files a/dockers/skylab/HCA_post_processing/testdata/c.loom and /dev/null differ
diff --git a/dockers/skylab/HCA_post_processing/testdata/d.loom b/dockers/skylab/HCA_post_processing/testdata/d.loom
deleted file mode 100644
index 838ee16f16..0000000000
Binary files a/dockers/skylab/HCA_post_processing/testdata/d.loom and /dev/null differ
diff --git a/dockers/skylab/snss2-build-indices/Biotypes.tsv b/dockers/skylab/build-indices/Biotypes.tsv
similarity index 100%
rename from dockers/skylab/snss2-build-indices/Biotypes.tsv
rename to dockers/skylab/build-indices/Biotypes.tsv
diff --git a/dockers/skylab/build-indices/Dockerfile b/dockers/skylab/build-indices/Dockerfile
new file mode 100644
index 0000000000..7f8d70752b
--- /dev/null
+++ b/dockers/skylab/build-indices/Dockerfile
@@ -0,0 +1,43 @@
+# Adding a platform tag to ensure that images built on ARM-based machines (ex. M-series macs) won't cause issues with our automated PR test suite.
+# However, this is not relevant for automated builds in a CI/CD pipeline that is AMD-based.
+# Also upgraded to a more recent Debian version (Bullseye) so tini is part of apt.
+FROM --platform="linux/amd64" python:3.6-bullseye
+
+# Add metadata labels
+LABEL maintainer="Broad Institute DSDE --`
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340
+$ docker inspect us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340
+```
+
+## Usage
+
+### Build_indices
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340 \
+ build-indices bash
+```
+
+Then you can exec into the container and use STAR or any of the scripts accordingly. Alternatively, you can run one-off commands by passing the command as a docker run parameter.
\ No newline at end of file
diff --git a/dockers/skylab/snss2-build-indices/add-introns-to-gtf.py b/dockers/skylab/build-indices/add-introns-to-gtf.py
similarity index 100%
rename from dockers/skylab/snss2-build-indices/add-introns-to-gtf.py
rename to dockers/skylab/build-indices/add-introns-to-gtf.py
diff --git a/dockers/skylab/build-indices/docker_build.sh b/dockers/skylab/build-indices/docker_build.sh
new file mode 100644
index 0000000000..212e01d20b
--- /dev/null
+++ b/dockers/skylab/build-indices/docker_build.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#fail-fast
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/build-indices"
+#QUAY_URL="quay.io/humancellatlas/build-indices"
+#Image path before getting renamed: quay.io/humancellatlas/snss2-indices:1.2.0
+
+#STAR version
+STAR_VERSION="2.7.10a"
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the build-indices image and push to GCR
+where:
+ -h|--help Show help text
+ -s|--star_version Version of STAR to use (default: $STAR_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -s|--star_version)
+ STAR_VERSION="$2"
+ shift
+ shift
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$STAR_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg STAR_VERSION="$STAR_VERSION" \
+ --no-cache "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/build-indices/docker_versions.tsv b/dockers/skylab/build-indices/docker_versions.tsv
new file mode 100644
index 0000000000..a2aa9eace9
--- /dev/null
+++ b/dockers/skylab/build-indices/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340
diff --git a/dockers/skylab/snss2-build-indices/modify_gtf.py b/dockers/skylab/build-indices/modify_gtf.py
similarity index 100%
rename from dockers/skylab/snss2-build-indices/modify_gtf.py
rename to dockers/skylab/build-indices/modify_gtf.py
diff --git a/dockers/skylab/featureCounts/requirements.txt b/dockers/skylab/build-indices/requirements.txt
similarity index 100%
rename from dockers/skylab/featureCounts/requirements.txt
rename to dockers/skylab/build-indices/requirements.txt
diff --git a/dockers/skylab/bwa/Dockerfile b/dockers/skylab/bwa/Dockerfile
new file mode 100644
index 0000000000..b70372dd20
--- /dev/null
+++ b/dockers/skylab/bwa/Dockerfile
@@ -0,0 +1,38 @@
+FROM --platform=linux/amd64 ubuntu:16.04
+
+ARG BWA_VERSION=0.7.17
+
+ENV TERM=xterm-256color \
+ TINI_VERSION=v0.19.0 \
+ BWA_URL=https://sourceforge.net/projects/bio-bwa/files/bwa-${BWA_VERSION}.tar.bz2/download
+
+LABEL MAINTAINER="Broad Institute DSDE --`
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463
+$ docker inspect us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463
+```
+
+## Usage
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463 \
+ bwa
+```
\ No newline at end of file
diff --git a/dockers/skylab/bwa/docker_build.sh b/dockers/skylab/bwa/docker_build.sh
new file mode 100644
index 0000000000..6acaf9d5ff
--- /dev/null
+++ b/dockers/skylab/bwa/docker_build.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/bwa"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-bwa" # Update and uncomment push block below after setting up quay repo
+
+# BWA version
+BWA_VERSION="0.7.17"
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the bwa image and push to GCR
+where:
+ -h|--help Show help text
+ -v|--version Version of BWA to use (default: $BWA_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -v|--version)
+ BWA_VERSION="$2"
+ shift
+ shift
+ ;;
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$BWA_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg BWA_VERSION="$BWA_VERSION" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/bwa/docker_versions.tsv b/dockers/skylab/bwa/docker_versions.tsv
new file mode 100644
index 0000000000..ba49567301
--- /dev/null
+++ b/dockers/skylab/bwa/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463
diff --git a/dockers/skylab/create-npz-output/Dockerfile b/dockers/skylab/create-npz-output/Dockerfile
deleted file mode 100644
index 2efcd1908a..0000000000
--- a/dockers/skylab/create-npz-output/Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM python:3.7.2
-
-LABEL maintainer="Lantern Team "
-
-RUN pip install --upgrade pip
-
-RUN apt-get update
-
-COPY requirements.txt .
-RUN pip3 install -r requirements.txt
-
-RUN mkdir /tools
-WORKDIR /tools
-
-COPY create-npz-output.py .
-COPY create-merged-npz-output.py .
diff --git a/dockers/skylab/create-npz-output/create-npz-output.py b/dockers/skylab/create-npz-output/create-npz-output.py
deleted file mode 100755
index c32cc6c70d..0000000000
--- a/dockers/skylab/create-npz-output/create-npz-output.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import gzip
-import numpy as np
-
-import scipy.io
-import scipy.sparse
-
-
-def main():
- description = """Create npz, npy file from the mtx files produced by STARsolo"""
- parser = argparse.ArgumentParser(description=description)
- parser.add_argument('--barcodes',
- dest='barcodes',
- required=True,
- help="The barcodes file")
-
- parser.add_argument('--features',
- dest='features',
- required=True,
- help="The features file")
-
- parser.add_argument('--matrix',
- dest='matrix',
- required=True,
- help="The matrix file")
-
- args = parser.parse_args()
-
- # covert the mtx file to the matrix
- matrix = scipy.io.mmread(args.matrix).transpose().tocsr()
- nonzero_row_indices, _ = matrix.nonzero()
- unique_nonzero_row_indices = np.sort(np.unique(nonzero_row_indices))
- # we need to keep only those rows that have non-zero reads/counts
- scipy.sparse.save_npz("sparse_counts.npz", matrix[unique_nonzero_row_indices, :], compressed=True)
-
- # read the barcodes file and create the barcode to index
- barcodes = []
- with gzip.open(args.barcodes, 'rt') if args.barcodes.endswith('.gz') else \
- open(args.barcodes, 'r') as fin:
- for line in fin:
- if line.startswith(r'^#'): # skip comments
- continue
- fields = line.strip().split('\t')
- barcodes.append(fields[0])
-
- row_index = np.asarray(barcodes)
- # we need to keep only those barcodes that have non-zero reads/counts
- np.save("sparse_counts_row_index.npy", row_index[unique_nonzero_row_indices])
-
- # read the features file and create the feature to index map
- features = []
- with gzip.open(args.features, 'rt') if args.features.endswith('.gz') else \
- open(args.features, 'r') as fin:
- for line in fin:
- if line.startswith(r'^#'): # skip comments
- continue
- fields = line.strip().split('\t')
- features.append(fields[0])
-
- row_index = np.asarray(features)
- np.save("sparse_counts_col_index.npy", row_index)
-
-if __name__ == '__main__':
- main()
diff --git a/dockers/skylab/create-npz-output/requirements.txt b/dockers/skylab/create-npz-output/requirements.txt
deleted file mode 100644
index 6bad10388e..0000000000
--- a/dockers/skylab/create-npz-output/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy
-scipy
diff --git a/dockers/skylab/dropseq_tools/Dockerfile b/dockers/skylab/dropseq_tools/Dockerfile
deleted file mode 100644
index d671e352f5..0000000000
--- a/dockers/skylab/dropseq_tools/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM openjdk:8-jre
-
-LABEL maintainer="Ambrose J. Carr " \
- software="dropseqtools" \
- version="1.12" \
- description="tools for manipulation of drop-seq data and BAM files" \
- website="http://mccarrolllab.com/dropseq/"
-
-RUN apt update && apt install -y \
- curl \
- unzip
-
-RUN apt install -y python
-
-RUN curl http://mccarrolllab.com/download/922/ >> Drop-seq_tools-1.12.zip && \
- unzip Drop-seq_tools-1.12.zip && \
- cp -r Drop-seq_tools-1.12/* /usr/local/bin/
diff --git a/dockers/skylab/dropseq_tools_v2/Dockerfile b/dockers/skylab/dropseq_tools_v2/Dockerfile
deleted file mode 100644
index bffe832b6b..0000000000
--- a/dockers/skylab/dropseq_tools_v2/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM openjdk:8-jre
-
-LABEL maintainer="Lantern Team " \
- software="dropseqtools" \
- version="2.3.0" \
- description="tools for manipulation of drop-seq data and BAM files" \
- website="http://mccarrolllab.com/dropseq/"
-
-RUN apt update && apt install -y \
- curl \
- unzip
-
-RUN apt install -y python
-
-RUN curl -L https://github.com/broadinstitute/Drop-seq/releases/download/v2.3.0/Drop-seq_tools-2.3.0.zip > Drop-seq_tools-2.3.0.zip && \
- unzip Drop-seq_tools-2.3.0.zip && \
- chmod +x Drop-seq_tools-2.3.0/* && \
- cp -r Drop-seq_tools-2.3.0/* /usr/local/bin/ && \
- rm Drop-seq_tools-2.3.0.zip && \
- rm -rf Drop-seq_tools-2.3.0/
diff --git a/dockers/skylab/dropseq_tools_v2/build.sh b/dockers/skylab/dropseq_tools_v2/build.sh
deleted file mode 100755
index 1e28bddeed..0000000000
--- a/dockers/skylab/dropseq_tools_v2/build.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-tag=2.3.0
-image="quay.io/humancellatlas/secondary-analysis-dropseqtools"
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build -t $image:$tag .
-
-echo "You can now push with docker push $image:$tag"
diff --git a/dockers/skylab/ea-utils/Dockerfile b/dockers/skylab/ea-utils/Dockerfile
new file mode 100644
index 0000000000..e6ae626832
--- /dev/null
+++ b/dockers/skylab/ea-utils/Dockerfile
@@ -0,0 +1,38 @@
+FROM --platform=linux/amd64 ubuntu:16.04
+
+ARG EAUTILS_VERSION=1.04.807
+
+ENV TERM=xterm-256color \
+ EAUTILS_URL=https://github.com/ExpressionAnalysis/ea-utils/archive/refs/tags/${EAUTILS_VERSION}.tar.gz \
+ EAUTILS_VERSION=${EAUTILS_VERSION} \
+ TINI_VERSION=v0.19.0 \
+ PATH="$PATH:/usr/gitc/ea-utils-${EAUTILS_VERSION}/clipper/"
+
+LABEL MAINTAINER="Broad Institute DSDE --`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665
+$ docker inspect us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665
+```
+
+## Usage
+
+Show the `fastq-mcf` help screen:
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665 fastq-mcf -h
+```
\ No newline at end of file
diff --git a/dockers/skylab/ea-utils/docker_build.sh b/dockers/skylab/ea-utils/docker_build.sh
new file mode 100755
index 0000000000..c0c07f1684
--- /dev/null
+++ b/dockers/skylab/ea-utils/docker_build.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd $(dirname $0) && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/ea-utils"
+#QUAY_URL="quay.io/humancellatlas/ea-utils"
+
+# ea-utils version
+EAUTILS_VERSION="1.04.807"
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|--tools] -- script to build the ea-utils image and push to GCR & Quay
+
+where:
+ -h|--help Show help text
+ -v|--version Version of ea-utils to use (default: $EAUTILS_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which $t >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -v|--version)
+ EAUTILS_VERSION="$2"
+ shift
+ shift
+ ;;
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo $t; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$EAUTILS_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg EAUTILS_VERSION="$EAUTILS_VERSION" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+ #echo "tagging and pushing Quay Image"
+ #docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+ #docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
diff --git a/dockers/skylab/ea-utils/docker_versions.tsv b/dockers/skylab/ea-utils/docker_versions.tsv
new file mode 100644
index 0000000000..c7391feef4
--- /dev/null
+++ b/dockers/skylab/ea-utils/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665
diff --git a/dockers/skylab/featureCounts/Dockerfile b/dockers/skylab/featureCounts/Dockerfile
deleted file mode 100644
index f6656bc1e1..0000000000
--- a/dockers/skylab/featureCounts/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM python:3.6.2
-
-LABEL maintainer="Lantern Team " \
- software="subread package" \
- version="2.0.1" \
- description="RNA-seq high-performance read alignment, quantification and mutation discovery" \
- website="http://subread.sourceforge.net/"
-
-# Install compiler
-RUN apt-get update --fix-missing && apt-get install -y wget
-
-COPY requirements.txt .
-RUN pip3 install -r requirements.txt
-
-# Install subread
-WORKDIR /usr/local/
-ENV VERSION="2.0.1"
-RUN wget "https://downloads.sourceforge.net/project/subread/subread-${VERSION}/subread-${VERSION}-source.tar.gz" \
- && tar -xzvf subread-${VERSION}-source.tar.gz
-WORKDIR /usr/local/subread-${VERSION}-source/src
-RUN make -f Makefile.Linux
-ENV PATH /usr/local/subread-${VERSION}-source/bin/:$PATH
-# Cleanup
-RUN apt-get clean
-
-# copy the script that removes alignments spanning intron-exon junctions
-RUN mkdir /tools
-WORKDIR /tools
-COPY remove-reads-on-junctions.py .
diff --git a/dockers/skylab/hisat2/Dockerfile b/dockers/skylab/hisat2/Dockerfile
index ce9d8d9b29..05a372f821 100644
--- a/dockers/skylab/hisat2/Dockerfile
+++ b/dockers/skylab/hisat2/Dockerfile
@@ -1,62 +1,68 @@
-FROM ubuntu:16.04
-LABEL MAINTAINER="Jishu Xu "
-LABEL software="HISAT2"
-LABEL version="2-2.1.0"
-LABEL description="RNA-seq aligner"
-LABEL website="https://ccb.jhu.edu/software/hisat2/index.shtml"
+FROM --platform=linux/amd64 ubuntu:16.04
-RUN mkdir -p /opt/tools/
-WORKDIR /opt/tools
+ENV TERM=xterm-256color \
+ PATH="$PATH:/usr/gitc/hisat2-2.1.0" \
+ TINI_VERSION=v0.19.0
-RUN \
- apt update && \
- apt install -y \
- liblzma-dev \
- libbz2-dev \
- cmake \
- automake \
- curl \
- libboost-all-dev \
- libcurl4-openssl-dev \
- wget \
- build-essential \
- gcc-multilib \
- zlib1g-dev \
- libxml2-dev \
- libncurses5-dev \
- zip unzip \
- git \
- r-base \
- r-base-core \
- r-base-dev
-
-RUN \
- wget -c ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-source.zip && \
- unzip hisat2-2.1.0-source.zip && \
- cd hisat2-2.1.0 && \
- make && \
- cp hisat2* /usr/local/bin
+LABEL MAINTAINER="Broad Institute DSDE -`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171
+$ docker inspect us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171
+```
+
+## Usage
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171 hisat2
+```
\ No newline at end of file
diff --git a/dockers/skylab/hisat2/docker_build.sh b/dockers/skylab/hisat2/docker_build.sh
new file mode 100644
index 0000000000..4a2087576b
--- /dev/null
+++ b/dockers/skylab/hisat2/docker_build.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/hisat2"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-hisat2" # Update and uncomment push block below after setting up quay repo
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the hisat2 image and push to GCR
+where:
+ -h|--help Show help text
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/hisat2/docker_versions.tsv b/dockers/skylab/hisat2/docker_versions.tsv
new file mode 100644
index 0000000000..c9e47ddd63
--- /dev/null
+++ b/dockers/skylab/hisat2/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171
diff --git a/dockers/skylab/loom-output/Dockerfile b/dockers/skylab/loom-output/Dockerfile
deleted file mode 100644
index 0446f36abc..0000000000
--- a/dockers/skylab/loom-output/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM python:3.7.2
-
-LABEL maintainer="Lantern Team "
-
-RUN pip install --upgrade pip
-
-RUN apt-get update && apt-get install wget
-
-RUN python -m pip install git+https://github.com/HumanCellAtlas/sctools.git#egg=sctools
-
-COPY requirements.txt .
-RUN pip3 install -r requirements.txt
-
-RUN mkdir /tools
-WORKDIR /tools
-
-COPY create_loom_optimus.py .
-COPY create_loom_ss2.py .
-COPY loomCompare.py .
-COPY ss2_loom_merge.py .
-COPY create_snss2_counts_csv.py .
-COPY create_loom_snss2.py .
-COPY create_snrna_optimus.py .
-COPY create_snrna_optimus_counts.py .
diff --git a/dockers/skylab/loom-output/build.sh b/dockers/skylab/loom-output/build.sh
deleted file mode 100755
index e5a6c23fa9..0000000000
--- a/dockers/skylab/loom-output/build.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-tag=$1
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build -t quay.io/humancellatlas/secondary-analysis-loom-output:$tag .
-
-echo "You can now push with docker push quay.io/humancellatlas/secondary-analysis-loom-output:$tag"
diff --git a/dockers/skylab/loom-output/requirements.txt b/dockers/skylab/loom-output/requirements.txt
deleted file mode 100644
index 08493c4ca5..0000000000
--- a/dockers/skylab/loom-output/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-loompy==3.0.6
-h5py==2.10.0
diff --git a/dockers/skylab/picard/Dockerfile b/dockers/skylab/picard/Dockerfile
deleted file mode 100644
index 9d6c703637..0000000000
--- a/dockers/skylab/picard/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM openjdk:8-jre
-
-LABEL maintainer="Green Lantern " \
- software="Picard" \
- version="2.26.10" \
- description="A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF." \
- website="http://broadinstitute.github.io/picard"
-
-# Please follow the below instructions to invoke picard when you are using this docker image:
-# java jvm-args -jar /usr/picard/picard.jar PicardToolName OPTION1=value1 OPTION2=value2...
-ENV version 2.26.10
-WORKDIR /usr/picard
-ADD https://github.com/broadinstitute/picard/releases/download/${version}/picard.jar ./picard.jar
-
-# Install R dependencies for Picard
-RUN apt-get update && \
- apt-get upgrade -y && \
- apt-get install -y r-base
-
-RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile
-RUN Rscript -e "install.packages('reshape')"
-RUN Rscript -e "install.packages('gplots')"
-RUN Rscript -e "install.packages('ggplot2')"
-
-#TODO: did not find any task using the R scripts contained in this docker. Switched all tasks to cloud docker image. Test and remove if tests look OK
diff --git a/dockers/skylab/pytools/Dockerfile b/dockers/skylab/pytools/Dockerfile
new file mode 100644
index 0000000000..28c2e4214e
--- /dev/null
+++ b/dockers/skylab/pytools/Dockerfile
@@ -0,0 +1,29 @@
+FROM --platform=linux/amd64 python:3.7.2
+
+ENV TERM=xterm-256color \
+ TINI_VERSION=v0.19.0\
+ PATH=$PATH:/usr/gitc
+
+LABEL MAINTAINER="Broad Institute DSDE -`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730
+$ docker inspect us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730
+```
+
+## Usage
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730
+```
+
+## Scripts
+
+This image contains the following scripts:
+
+* `breakoutSnap.py` extracts the data in a snap file as csv files
+* `create-merged-npz-output.py` takes a barcode.tsv, feature.tsv and matrix.mtx from STAR alignment outputs and creates 2 npy files and an npz file for row_index, col_index and the matrix. These files are required in the empty_drop step.
+* `create_snss2_counts_csv.py` creates a csv file containing intron and exon counts from the Single Nucleus Smart-Seq2 pipeline
+* `loomCompare.py` compares differences between loom files
+* `ss2_loom_merge.py` creates a single loom file from multiple single sample loom files
+* `makeCompliantBAM.py` make a BAM file with cellular barcodes in the read names compliant by moving them to the CB tag
+
+The following scripts create a loom file from counts, metadata, and metrics from each pipeline:
+* `create_loom_optimus.py` for Optimus pipeline
+* `create_loom_snss2.py` for Single Nucleus Smart-Seq2 pipeline
+* `create_snrna_optimus.py` for Optimus in `sn_rna` mode with `count_exons=false`
+* `create_snrna_optimus_counts.py` for Optimus in `sn_rna` mode with `count_exons=true`
+
diff --git a/dockers/skylab/pytools/docker_build.sh b/dockers/skylab/pytools/docker_build.sh
new file mode 100644
index 0000000000..aa744a271a
--- /dev/null
+++ b/dockers/skylab/pytools/docker_build.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/pytools"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-pytools" # Update and uncomment push block below after setting up quay repo
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the pytools image and push to GCR
+where:
+ -h|--help Show help text
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/pytools/docker_versions.tsv b/dockers/skylab/pytools/docker_versions.tsv
new file mode 100644
index 0000000000..f877b400b4
--- /dev/null
+++ b/dockers/skylab/pytools/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730
diff --git a/dockers/skylab/pytools/requirements.txt b/dockers/skylab/pytools/requirements.txt
new file mode 100644
index 0000000000..72d747ee04
--- /dev/null
+++ b/dockers/skylab/pytools/requirements.txt
@@ -0,0 +1,4 @@
+numpy
+scipy
+loompy==3.0.6
+h5py==2.10.0
\ No newline at end of file
diff --git a/dockers/skylab/snap-breakout/breakoutSnap.py b/dockers/skylab/pytools/tools/breakoutSnap.py
similarity index 100%
rename from dockers/skylab/snap-breakout/breakoutSnap.py
rename to dockers/skylab/pytools/tools/breakoutSnap.py
diff --git a/dockers/skylab/create-npz-output/create-merged-npz-output.py b/dockers/skylab/pytools/tools/create-merged-npz-output.py
similarity index 100%
rename from dockers/skylab/create-npz-output/create-merged-npz-output.py
rename to dockers/skylab/pytools/tools/create-merged-npz-output.py
diff --git a/dockers/skylab/loom-output/create_loom_optimus.py b/dockers/skylab/pytools/tools/create_loom_optimus.py
similarity index 100%
rename from dockers/skylab/loom-output/create_loom_optimus.py
rename to dockers/skylab/pytools/tools/create_loom_optimus.py
diff --git a/dockers/skylab/loom-output/create_loom_snss2.py b/dockers/skylab/pytools/tools/create_loom_snss2.py
similarity index 100%
rename from dockers/skylab/loom-output/create_loom_snss2.py
rename to dockers/skylab/pytools/tools/create_loom_snss2.py
diff --git a/dockers/skylab/loom-output/create_loom_ss2.py b/dockers/skylab/pytools/tools/create_loom_ss2.py
similarity index 100%
rename from dockers/skylab/loom-output/create_loom_ss2.py
rename to dockers/skylab/pytools/tools/create_loom_ss2.py
diff --git a/dockers/skylab/loom-output/create_snrna_optimus.py b/dockers/skylab/pytools/tools/create_snrna_optimus.py
similarity index 97%
rename from dockers/skylab/loom-output/create_snrna_optimus.py
rename to dockers/skylab/pytools/tools/create_snrna_optimus.py
index fa6ebd28b0..571b1d971f 100755
--- a/dockers/skylab/loom-output/create_snrna_optimus.py
+++ b/dockers/skylab/pytools/tools/create_snrna_optimus.py
@@ -23,7 +23,7 @@ def create_gene_id_name_map(gtf_file):
# loop through the lines and find the gene_id and gene_name pairs
with gzip.open(gtf_file, "rt") if gtf_file.endswith(".gz") else open(
- gtf_file, "r"
+ gtf_file, "r"
) as fpin:
for _line in fpin:
line = _line.strip()
@@ -109,7 +109,7 @@ def generate_row_attr(args):
gene_metrics_data =np.array(gene_metric_values)
numeric_field_names = gene_metrics[0][1:]
- for i in range(len(numeric_field_names)):
+ for i in range(0, len(numeric_field_names)):
name = numeric_field_names[i]
data = gene_metrics_data[:, i]
row_attrs[name] = data
@@ -162,7 +162,7 @@ def generate_col_attr(args):
"reads_mapped_too_many_loci",
"n_genes",
"genes_detected_multiple_observations"
- ]
+ ]
FloatColumnNames = [ # Float32
"molecule_barcode_fraction_bases_above_30_mean",
@@ -199,18 +199,18 @@ def generate_col_attr(args):
bool_field_names = final_df_bool_column_names
# Create metadata tables and their headers for bool
- for i in range(bool_field_names.shape[0]):
+ for i in range(0, bool_field_names.shape[0]):
name = bool_field_names[i]
data = final_df_bool[:, i]
col_attrs[name] = data
-
+
# Create metadata tables and their headers for float
float_field_names = list(final_df_non_boolean.columns)
for i in range(len(float_field_names)):
name = float_field_names[i]
data = final_df_non_boolean[name].to_numpy()
- col_attrs[name] = data
+ col_attrs[name] = data
if args.verbose:
logging.info(
@@ -238,8 +238,9 @@ def generate_matrix(args):
nrows, ncols = csr_exp_counts.shape
expr_sp = sc.sparse.coo_matrix((nrows, ncols), np.float32)
- xcoord = ycoord = value = []
-
+ xcoord = []
+ ycoord = []
+ value = []
chunk_row_size = 10000
chunk_col_size = 10000
@@ -273,7 +274,7 @@ def generate_matrix(args):
def create_loom_files(args):
"""This function creates the loom file or folder structure in output_loom_path in format file_format,
with input_id from the input folder analysis_output_path
-
+
Args:
args (argparse.Namespace): input arguments for the run
"""
@@ -281,14 +282,14 @@ def create_loom_files(args):
# generate a dictionary of row attributes
- row_attrs = generate_row_attr(args)
-
+ row_attrs = generate_row_attr(args)
+
# generate a dictionarty of column attributes
- col_attrs = generate_col_attr(args)
+ col_attrs = generate_col_attr(args)
# add the expression count matrix data
expr_sp_t = generate_matrix(args)
-
+
# add input_id to col_attrs
col_attrs['input_id'] = np.repeat(args.input_id, expr_sp_t.shape[1])
@@ -304,7 +305,7 @@ def create_loom_files(args):
if args.input_name_metadata_field is not None:
attrDict['input_name_metadata_field'] = args.input_name_metadata_field
attrDict['pipeline_version'] = args.pipeline_version
- #generate loom file
+ #generate loom file
loompy.create(args.output_loom_path, expr_sp_t, row_attrs, col_attrs, file_attrs=attrDict)
def main():
@@ -395,7 +396,7 @@ def main():
action="store_true",
help="whether to output verbose debugging messages",
)
-
+
parser.add_argument(
"--expression_data_type",
dest="expression_data_type",
@@ -416,4 +417,4 @@ def main():
create_loom_files(args)
if __name__ == "__main__":
- main()
+ main()
\ No newline at end of file
diff --git a/dockers/skylab/loom-output/create_snrna_optimus_counts.py b/dockers/skylab/pytools/tools/create_snrna_optimus_counts.py
similarity index 100%
rename from dockers/skylab/loom-output/create_snrna_optimus_counts.py
rename to dockers/skylab/pytools/tools/create_snrna_optimus_counts.py
diff --git a/dockers/skylab/loom-output/create_snss2_counts_csv.py b/dockers/skylab/pytools/tools/create_snss2_counts_csv.py
similarity index 100%
rename from dockers/skylab/loom-output/create_snss2_counts_csv.py
rename to dockers/skylab/pytools/tools/create_snss2_counts_csv.py
diff --git a/dockers/skylab/loom-output/loomCompare.py b/dockers/skylab/pytools/tools/loomCompare.py
similarity index 100%
rename from dockers/skylab/loom-output/loomCompare.py
rename to dockers/skylab/pytools/tools/loomCompare.py
diff --git a/dockers/skylab/snaptools/makeCompliantBAM.py b/dockers/skylab/pytools/tools/makeCompliantBAM.py
similarity index 98%
rename from dockers/skylab/snaptools/makeCompliantBAM.py
rename to dockers/skylab/pytools/tools/makeCompliantBAM.py
index cecbfd260b..4211281e2f 100755
--- a/dockers/skylab/snaptools/makeCompliantBAM.py
+++ b/dockers/skylab/pytools/tools/makeCompliantBAM.py
@@ -34,7 +34,7 @@ def checkArgs(args):
for read in bamfile:
counter += 1
if (counter % 100000 == 0):
- print counter
+ print(counter)
qname = str(read.qname)
i = qname.find(':')
cb, qn = qname[:i], qname[i+1:]
diff --git a/dockers/skylab/loom-output/ss2_loom_merge.py b/dockers/skylab/pytools/tools/ss2_loom_merge.py
similarity index 100%
rename from dockers/skylab/loom-output/ss2_loom_merge.py
rename to dockers/skylab/pytools/tools/ss2_loom_merge.py
diff --git a/dockers/skylab/rsem/Dockerfile b/dockers/skylab/rsem/Dockerfile
new file mode 100644
index 0000000000..77074c07aa
--- /dev/null
+++ b/dockers/skylab/rsem/Dockerfile
@@ -0,0 +1,60 @@
+FROM --platform=linux/amd64 ubuntu:16.04
+
+ENV TERM=xterm-256color \
+ RSEM_VERSION=1.3.0 \
+ STAR_VERSION=2.5.3a \
+ TINI_VERSION=v0.19.0
+
+LABEL MAINTAINER="Broad Institute DSDE -`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024
+$ docker inspect us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024
+```
+
+## Usage
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024 rsem-prepare-reference --help
+```
\ No newline at end of file
diff --git a/dockers/skylab/rsem/docker_build.sh b/dockers/skylab/rsem/docker_build.sh
new file mode 100644
index 0000000000..a5515739b8
--- /dev/null
+++ b/dockers/skylab/rsem/docker_build.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/rsem"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-rsem" # Update and uncomment push block below after setting up quay repo
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the RSEM image and push to GCR
+where:
+ -h|--help Show help text
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/rsem/docker_versions.tsv b/dockers/skylab/rsem/docker_versions.tsv
new file mode 100644
index 0000000000..6ab703c6a7
--- /dev/null
+++ b/dockers/skylab/rsem/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024
diff --git a/dockers/skylab/samtools/Dockerfile b/dockers/skylab/samtools/Dockerfile
deleted file mode 100644
index e684fd2c02..0000000000
--- a/dockers/skylab/samtools/Dockerfile
+++ /dev/null
@@ -1,34 +0,0 @@
-FROM ubuntu:16.04
-
-LABEL maintainer="Ambrose J. Carr " \
- software="samtools" \
- version="1.6" \
- description="processing sequence alignments in SAM and BAM formats" \
- website="https://samtools.github.io"
-
-RUN apt update && \
- apt install -y \
- wget \
- bzip2 \
- g++ \
- cmake \
- curl \
- libncurses5-dev \
- zlib1g-dev \
- libbz2-dev \
- zip \
- unzip \
- liblzma-dev \
- openssl \
- libcurl4-openssl-dev \
- libssl-dev
-
-WORKDIR /usr/local/samtools
-ADD https://github.com/samtools/samtools/releases/download/1.6/samtools-1.6.tar.bz2 .
-
-RUN tar -xvf samtools-1.6.tar.bz2 && \
- rm samtools-1.6.tar.bz2 && \
- cd samtools-1.6 && \
- ./configure --prefix=/usr && \
- make && \
- make install
diff --git a/dockers/skylab/snap-breakout/Dockerfile b/dockers/skylab/snap-breakout/Dockerfile
deleted file mode 100644
index 43e4b0eba8..0000000000
--- a/dockers/skylab/snap-breakout/Dockerfile
+++ /dev/null
@@ -1,13 +0,0 @@
-FROM python:3.6.2
-
-LABEL maintainer="Lantern Team " \
- software="python for breakout snap step" \
- description="python for exporting snap files into csv"
-
-RUN pip install \
- pandas==0.20.3 \
- h5py==2.9.0
-
-RUN mkdir /tools/
-COPY breakoutSnap.py /tools/
-ENV PATH=/tools/:$PATH
diff --git a/dockers/skylab/snap-breakout/build.sh b/dockers/skylab/snap-breakout/build.sh
deleted file mode 100755
index 35241cb16b..0000000000
--- a/dockers/skylab/snap-breakout/build.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-docker build -t quay.io/humancellatlas/snap-breakout:0.0.1 .
-docker push quay.io/humancellatlas/snap-breakout:0.0.1
diff --git a/dockers/skylab/snaptools-bwa/Dockerfile b/dockers/skylab/snaptools-bwa/Dockerfile
new file mode 100644
index 0000000000..3c6ffcef30
--- /dev/null
+++ b/dockers/skylab/snaptools-bwa/Dockerfile
@@ -0,0 +1,39 @@
+FROM --platform=linux/amd64 python:3.7.2
+
+ARG BWA_VERSION=0.7.17
+
+ENV TERM=xterm-256color \
+ BWA_URL=https://sourceforge.net/projects/bio-bwa/files/bwa-${BWA_VERSION}.tar.bz2/download \
+ TINI_VERSION=v0.19.0 \
+ PATH=$PATH:/usr/gitc
+
+LABEL MAINTAINER="Broad Institute DSDE ---`
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602
+$ docker inspect us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602
+```
+
+## Usage
+
+### BWA
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602 \
+ bwa
+```
+
+### Snaptools
+
+See Snaptools GitHub for [more info](https://github.com/r3fang/SnapTools).
\ No newline at end of file
diff --git a/dockers/skylab/snaptools-bwa/docker_build.sh b/dockers/skylab/snaptools-bwa/docker_build.sh
new file mode 100755
index 0000000000..64c743f016
--- /dev/null
+++ b/dockers/skylab/snaptools-bwa/docker_build.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/snaptools-bwa"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-snaptools-bwa" # Update and uncomment push block below after setting up quay repo
+
+# BWA version
+BWA_VERSION=0.7.17
+
+# Snaptools version
+SNAPTOOLS_VERSION=1.4.8
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the snaptools-bwa image and push to GCR
+where:
+ -h|--help Show help text
+ -s|--snaptools_version Version of snaptools to use (default: SNAPTOOLS_VERSION=$SNAPTOOLS_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -s|--snaptools_version)
+ SNAPTOOLS_VERSION="$2"
+ shift
+ shift
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$SNAPTOOLS_VERSION-$BWA_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg SNAPTOOLS_VERSION="$SNAPTOOLS_VERSION" \
+ --build-arg BWA_VERSION="$BWA_VERSION" \
+ --no-cache "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/snaptools-bwa/docker_versions.tsv b/dockers/skylab/snaptools-bwa/docker_versions.tsv
new file mode 100644
index 0000000000..ab9d94cae2
--- /dev/null
+++ b/dockers/skylab/snaptools-bwa/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602
diff --git a/dockers/skylab/snaptools/Dockerfile b/dockers/skylab/snaptools/Dockerfile
deleted file mode 100644
index 76c0514981..0000000000
--- a/dockers/skylab/snaptools/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM ubuntu:18.04
-
-# Install system requirements
-RUN apt update && \
- apt install -y git python python-pip zlib1g zlib1g-dev wget
-
-# Install SnapTools
-RUN mkdir /install && \
- cd /install && \
- git clone https://github.com/r3fang/SnapTools.git && \
- cd SnapTools && \
- pip install .
-
-RUN cd /install && \
- wget -O "bwa-0.7.17.tar.bz2" "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.17.tar.bz2/download" && \
- tar xvjf bwa-0.7.17.tar.bz2 && \
- cd bwa-0.7.17 && \
- make && \
- mkdir /tools/ && \
- cp bwa /tools/
-
-# This fixes an error in the code of snaptools that
-# references an absolute path for the python binary
-RUN mkdir -p /home/r3fang/anaconda2/bin/ && \
- ln -s `which python` /home/r3fang/anaconda2/bin/python
-
-ENV PATH /tools/:$PATH
-
-COPY makeCompliantBAM.py /tools/
diff --git a/dockers/skylab/snaptools/build.sh b/dockers/skylab/snaptools/build.sh
deleted file mode 100755
index 3b638a19f3..0000000000
--- a/dockers/skylab/snaptools/build.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-docker build . -t quay.io/humancellatlas/snaptools:0.0.1
-
-echo You can now push to quay.io using the following command
-echo docker push quay.io/humancellatlas/snaptools:0.0.1
diff --git a/dockers/skylab/snss2-build-indices/Dockerfile b/dockers/skylab/snss2-build-indices/Dockerfile
deleted file mode 100644
index 78860c3186..0000000000
--- a/dockers/skylab/snss2-build-indices/Dockerfile
+++ /dev/null
@@ -1,29 +0,0 @@
-FROM python:3.6.2
-
-LABEL maintainer="Farzaneh Khajouei " \
- software="subread package" \
- version="2.0.2" \
- description="RNA-seq high-performance read alignment, quantification and mutation discovery" \
- website="http://subread.sourceforge.net/"
-
-# Install compiler
-RUN apt-get update --fix-missing && apt-get install -y \
- wget
-
-COPY requirements.txt .
-RUN pip3 install -r requirements.txt
-
-# Install subread
-WORKDIR /usr/local/
-RUN wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.10a.tar.gz && \
- tar -xf 2.7.10a.tar.gz
-RUN chmod +x /usr/local/STAR-2.7.10a/bin/Linux_x86_64_static/STAR
-ENV PATH /usr/local/STAR-2.7.10a/bin/Linux_x86_64_static/:$PATH
-
-
-# copy the script that removes alignments spanning intron-exon junctions
-RUN mkdir /script
-WORKDIR /script
-COPY add-introns-to-gtf.py .
-COPY modify_gtf.py .
-ENV PATH /script/:$PATH
diff --git a/dockers/skylab/star/Dockerfile b/dockers/skylab/star/Dockerfile
index 1bc8cb2459..377cb3e4fb 100644
--- a/dockers/skylab/star/Dockerfile
+++ b/dockers/skylab/star/Dockerfile
@@ -1,38 +1,33 @@
-FROM ubuntu:16.04
-LABEL maintainer="Kylee Degatano "\
- software="STAR" \
- version="2.7.9a" \
- description="RNA-seq aligner" \
- website="https://github.com/alexdobin/STAR"
-RUN mkdir build
-WORKDIR build
-# install additional python packages
-#Install wget, unzip
-RUN apt update && apt install -y \
- liblzma-dev \
- libbz2-dev \
- cmake automake \
- curl \
- libboost-all-dev \
- wget \
- build-essential \
- gcc-multilib \
- zlib1g-dev \
- libxml2-dev \
- libncurses5-dev \
- r-base \
- r-base-core \
- r-base-dev
-WORKDIR /usr/local/
+FROM --platform=linux/amd64 alpine:latest
-RUN wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.9a.tar.gz && \
- tar -xf 2.7.9a.tar.gz
-RUN chmod +x /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/STAR
-ENV PATH /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/:$PATH
-RUN cp /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/STAR /usr/local/bin
+ARG STAR_VERSION=2.7.9a
+ENV TERM=xterm-256color \
+ STAR_URL=https://github.com/alexdobin/STAR/archive/${STAR_VERSION}.tar.gz
-WORKDIR /
-RUN rm -rf /build
-COPY create-npz-output.py .
+LABEL MAINTAINER="Broad Institute DSDE --`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187
+$ docker inspect us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187
+```
+
+## Usage
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187 STAR
+```
\ No newline at end of file
diff --git a/dockers/skylab/star/docker_build.sh b/dockers/skylab/star/docker_build.sh
new file mode 100644
index 0000000000..02e1ba8933
--- /dev/null
+++ b/dockers/skylab/star/docker_build.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/star"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-star" # Update and uncomment push block below after setting up quay repo
+
+# STAR version
+STAR_VERSION="2.7.9a"
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the star image and push to GCR
+where:
+ -h|--help Show help text
+ -v|--version Version of STAR to use (default: $STAR_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -v|--version)
+ STAR_VERSION="$2"
+ shift
+ shift
+ ;;
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$STAR_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg STAR_VERSION="$STAR_VERSION" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/star/docker_versions.tsv b/dockers/skylab/star/docker_versions.tsv
new file mode 100644
index 0000000000..14ec79567f
--- /dev/null
+++ b/dockers/skylab/star/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884
diff --git a/dockers/skylab/subread/Dockerfile b/dockers/skylab/subread/Dockerfile
index 99473a24a7..bb4a7909fb 100644
--- a/dockers/skylab/subread/Dockerfile
+++ b/dockers/skylab/subread/Dockerfile
@@ -1,34 +1,38 @@
-FROM ubuntu:16.04
-LABEL maintainer=" Jishu Xu " \
- software="subread package" \
- version="1.6.0" \
- description="RNA-seq high-performance read alignment, quantification and mutation discovery" \
- website="http://subread.sourceforge.net/"
+FROM --platform=linux/amd64 python:3.6.2
-# Install compiler
-RUN apt-get update --fix-missing && apt-get install -y \
- build-essential \
- gcc-multilib \
- apt-utils \
- zlib1g-dev \
- libxml2-dev \
- curl \
- wget \
- libbz2-dev \
- cmake automake \
- libboost-all-dev \
- libncurses5-dev \
- r-base \
- r-base-core \
- r-base-dev
-
-# Install subread
-WORKDIR /usr/local/
-ENV VERSION="1.6.0"
-RUN wget "https://downloads.sourceforge.net/project/subread/subread-${VERSION}/subread-${VERSION}-source.tar.gz"
-RUN tar -xzvf subread-${VERSION}-source.tar.gz
-WORKDIR /usr/local/subread-${VERSION}-source/src
-RUN make -f Makefile.Linux
-ENV PATH /usr/local/subread-${VERSION}-source/bin/:$PATH
-# Cleanup
-RUN apt-get clean
+ARG SUBREAD_VERSION="2.0.1"
+
+ENV TERM=xterm-256color \
+ SUBREAD_URL="https://downloads.sourceforge.net/project/subread/subread-${SUBREAD_VERSION}/subread-${SUBREAD_VERSION}-source.tar.gz" \
+ TINI_VERSION=v0.19.0 \
+ PATH=$PATH:/usr/gitc/subread-${SUBREAD_VERSION}-source/bin
+
+LABEL MAINTANER="Broad Institute DSDE " \
+ SUBREAD_VERSION=${SUBREAD_VERSION}
+
+WORKDIR /usr/gitc
+
+COPY requirements.txt .
+COPY remove-reads-on-junctions.py .
+
+RUN set -eux; \
+ apt-get update --fix-missing; \
+ apt-get install -y wget; \
+ python3 -m pip install --upgrade pip; \
+ pip3 install -r requirements.txt \
+ ; \
+# Install subread
+ wget --no-check-certificate ${SUBREAD_URL}; \
+ tar -xzvf subread-${SUBREAD_VERSION}-source.tar.gz; \
+ cd /usr/gitc/subread-${SUBREAD_VERSION}-source/src; \
+ make -f Makefile.Linux; \
+# Install TINI \
+ cd /usr/gitc; \
+ mkdir temp; \
+ cd temp; \
+ wget https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -O /sbin/tini; \
+ chmod +x /sbin/tini; \
+ rm -r /usr/gitc/temp;
+
+# Set tini as default entrypoint
+ENTRYPOINT ["/sbin/tini", "--" ]
\ No newline at end of file
diff --git a/dockers/skylab/subread/README.md b/dockers/skylab/subread/README.md
new file mode 100644
index 0000000000..9f6bdd8986
--- /dev/null
+++ b/dockers/skylab/subread/README.md
@@ -0,0 +1,38 @@
+# Subread
+
+## Quick reference
+
+Copy and paste to pull this image
+
+#### `docker pull us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537`
+
+
+- __What is this image:__ This image is a lightweight alpine-based custom image for running the Subread sequencing data processing suite.
+- __What is Subread:__ Subread contains a suite of high-performance software programs for processing next-generation sequencing data, [more info](http://subread.sourceforge.net).
+- __How to see tool version used in image:__ Please see below.
+
+## Versioning
+
+Subread uses the following convention for versioning:
+
+#### `us.gcr.io/broad-gotc-prod/subread:--`
+
+
+We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP.
+
+You can see more information about the image, including the tool versions, by running the following command:
+
+```bash
+$ docker pull us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537
+$ docker inspect us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537
+```
+
+## Usage
+
+This image contains several tools. See [here](http://subread.sourceforge.net/subread.html) for a sample workflow.
+To show the `featureCounts` help page, for example:
+
+```bash
+$ docker run --rm -it \
+ us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537 featureCounts
+```
\ No newline at end of file
diff --git a/dockers/skylab/subread/docker_build.sh b/dockers/skylab/subread/docker_build.sh
new file mode 100644
index 0000000000..7b00a973fd
--- /dev/null
+++ b/dockers/skylab/subread/docker_build.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+set -e
+
+# Update version when changes to Dockerfile are made
+DOCKER_IMAGE_VERSION=1.0.0
+TIMESTAMP=$(date +"%s")
+DIR=$(cd "$(dirname "$0")" && pwd)
+
+# Registries and tags
+GCR_URL="us.gcr.io/broad-gotc-prod/subread"
+#QUAY_URL="quay.io/broadinstitute/gotc-prod-subread" # Update and uncomment push block below after setting up quay repo
+
+# STAR version
+SUBREAD_VERSION="2.0.1"
+
+# Necessary tools and help text
+TOOLS=(docker gcloud)
+HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the subread image and push to GCR
+where:
+ -h|--help Show help text
+ -v|--version Version of subread to use (default: $SUBREAD_VERSION)
+ -t|--tools Show tools needed to run script
+ "
+
+function main(){
+ for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done
+ if [[ $ok == no ]]; then
+ echo "Missing one of the following tools: "
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 1
+ fi
+
+ while [[ $# -gt 0 ]]
+ do
+ key="$1"
+ case $key in
+ -v|--version)
+ SUBREAD_VERSION="$2"
+ shift
+ shift
+ ;;
+ -h|--help)
+ echo "$HELP"
+ exit 0
+ ;;
+ -t|--tools)
+ for t in "${TOOLS[@]}"; do echo "$t"; done
+ exit 0
+ ;;
+ *)
+ shift
+ ;;
+ esac
+ done
+
+ IMAGE_TAG="$DOCKER_IMAGE_VERSION-$SUBREAD_VERSION-$TIMESTAMP"
+
+ echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG"
+ docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \
+ --build-arg SUBREAD_VERSION="$SUBREAD_VERSION" "$DIR"
+ docker push "$GCR_URL:$IMAGE_TAG"
+
+# echo "tagging and pushing Quay Image"
+# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG"
+# docker push "$QUAY_URL:$IMAGE_TAG"
+
+ echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv"
+ echo "done"
+}
+
+main "$@"
\ No newline at end of file
diff --git a/dockers/skylab/subread/docker_versions.tsv b/dockers/skylab/subread/docker_versions.tsv
new file mode 100644
index 0000000000..54ff55e7a6
--- /dev/null
+++ b/dockers/skylab/subread/docker_versions.tsv
@@ -0,0 +1,2 @@
+DOCKER_VERSION
+us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537
diff --git a/dockers/skylab/featureCounts/remove-reads-on-junctions.py b/dockers/skylab/subread/remove-reads-on-junctions.py
similarity index 100%
rename from dockers/skylab/featureCounts/remove-reads-on-junctions.py
rename to dockers/skylab/subread/remove-reads-on-junctions.py
diff --git a/dockers/skylab/snss2-build-indices/requirements.txt b/dockers/skylab/subread/requirements.txt
similarity index 100%
rename from dockers/skylab/snss2-build-indices/requirements.txt
rename to dockers/skylab/subread/requirements.txt
diff --git a/dockers/skylab/subset-fastq-dataset/Dockerfile b/dockers/skylab/subset-fastq-dataset/Dockerfile
deleted file mode 100644
index a22a8c12a0..0000000000
--- a/dockers/skylab/subset-fastq-dataset/Dockerfile
+++ /dev/null
@@ -1,36 +0,0 @@
-FROM python:3.7.2
-
-LABEL maintainer="Mint Team " \
- software="python 3.6.2" \
- description="python 3.6.2 with pysam, sctools, requests, and a basic science stack"
-
-RUN pip3 install \
- Click==7.0 \
- numpy==1.16.2 \
- pysam==0.15.2 \
- biopython==1.73
-
-## Install software
-RUN apt-get update && \
- apt-get install -y lsb-release && \
- export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
- echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
- curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \
- apt-get update && \
- apt-get install -y google-cloud-sdk
-
-## Install latest samtools from source
-RUN mkdir /tools && \
- cd /tools && \
- wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar xvjf samtools-1.9.tar.bz2 && \
- cd samtools-1.9 && \
- ./configure && \
- make -j 4 && \
- cp samtools ..
-
-## Append tools to path
-ENV PATH=/tools/:${PATH}
-
-## Copy Scripts
-COPY filterFastqByReadName.py /tools/
-
diff --git a/dockers/skylab/subset-fastq-dataset/build.sh b/dockers/skylab/subset-fastq-dataset/build.sh
deleted file mode 100755
index 73a71beb41..0000000000
--- a/dockers/skylab/subset-fastq-dataset/build.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-tag=$1
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build -t quay.io/humancellatlas/secondary-analysis-subset-fastq:$tag .
-
-echo You can now push with docker push quay.io/humancellatlas/secondary-analysis-subset-fastq:$tag
diff --git a/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py b/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py
deleted file mode 100755
index dafc25497a..0000000000
--- a/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-import click
-import sys
-from Bio import SeqIO
-import gzip
-
-@click.command()
-@click.option('--in-fastq-gz', help='gz compressed input fastq file')
-@click.option('--out-fastq-gz', help='gz compressed output fastq file')
-@click.option('--keep-reads', help='gz compressed file with read names to keep')
-@click.option('--verbose', help='verbose', default=False, is_flag=True, flag_value=True)
-def filter_by_read_name(in_fastq_gz, out_fastq_gz, keep_reads, verbose):
- # Put reads that are in the keep list in the output file
- keep_read_set = set(line.decode('ascii').rstrip() for line in open(keep_reads, 'rb'))
- if verbose:
- print('Done loading keep read list', file=sys.stderr)
-
- # Loop over input and filter reads
- with gzip.open(out_fastq_gz, 'wt') as output_file:
- with gzip.open(in_fastq_gz, 'rt') as input_file:
- counter = 0
- for rec in SeqIO.parse(input_file, 'fastq'):
- counter += 1
- if verbose and counter % 1e5 == 0:
- print('Processed {} reads'.format(counter))
- if rec.id in keep_read_set:
- SeqIO.write(rec, output_file, 'fastq')
-
- # Print completed message
- if verbose:
- print('Completed', file=sys.stderr)
-
-
-if __name__ == '__main__':
- filter_by_read_name()
diff --git a/dockers/skylab/trim_adapters/Dockerfile b/dockers/skylab/trim_adapters/Dockerfile
deleted file mode 100644
index 76f349fc02..0000000000
--- a/dockers/skylab/trim_adapters/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM ubuntu:16.04
-LABEL software="ea-utils package" \
- version="1.04.807" \
- description="Command-line tools for processing biological sequencing data. Barcode demultiplexing, adapter trimming, etc. Primarily written to support an Illumina based pipeline - but should work with any FASTQs." \
- website="http://expressionanalysis.github.io/ea-utils/"
-
-# Install compiler
-RUN apt update && apt install -y \
- zlib1g-dev \
- cmake \
- automake \
- wget \
- build-essential \
- libgsl0-dev
-
-# Install ea-utils
-WORKDIR /usr/local/
-RUN wget "https://github.com/ExpressionAnalysis/ea-utils/archive/refs/tags/1.04.807.tar.gz"
-RUN tar -xzvf 1.04.807.tar.gz
-WORKDIR /usr/local/ea-utils-1.04.807/clipper
-RUN make
-RUN make install
-ENV PATH /usr/local/ea-utils-1.04.807/clipper/:$PATH
-# Cleanup
-RUN apt-get clean
diff --git a/dockers/skylab/trim_adapters/build.sh b/dockers/skylab/trim_adapters/build.sh
deleted file mode 100644
index 85893f42e7..0000000000
--- a/dockers/skylab/trim_adapters/build.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-tag=$1
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build -t quay.io/humancellatlas/snss2-trim-adapters:$tag .
-
-echo "You can now push with docker push quay.io/humancellatlas/snss2-trim-adapters:$tag"
diff --git a/dockers/skylab/umi-tools/Dockerfile b/dockers/skylab/umi-tools/Dockerfile
deleted file mode 100644
index ce530e864b..0000000000
--- a/dockers/skylab/umi-tools/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-FROM python:3.6.2
-
-LABEL maintainer="Nick Barkas " \
- software="umi_tools" \
- version="0.5.5" \
- description="tools for extraction correction, deduplication and counting of UMIs" \
- website="https://github.com/CGATOxford/UMI-tools"
-
-RUN git clone https://github.com/CGATOxford/UMI-tools.git
-WORKDIR UMI-tools
-RUN git checkout tags/0.5.5
-RUN pip install .
-
-RUN mkdir /root/tools
-COPY getUntaggedReads /root/tools
-
-ENV PATH="/root/tools/:$PATH"
-
-RUN apt update && \
- apt install -y \
- wget \
- bzip2 \
- g++ \
- cmake \
- curl \
- libncurses5-dev \
- zlib1g-dev \
- libbz2-dev \
- zip \
- unzip \
- liblzma-dev \
- openssl \
- libcurl4-openssl-dev \
- libssl-dev
-
-WORKDIR /usr/local/samtools
-ADD https://github.com/samtools/samtools/releases/download/1.6/samtools-1.6.tar.bz2 .
-
-RUN tar -xvf samtools-1.6.tar.bz2 && \
- rm samtools-1.6.tar.bz2 && \
- cd samtools-1.6 && \
- ./configure --prefix=/usr && \
- make && \
- make install
\ No newline at end of file
diff --git a/dockers/skylab/umi-tools/build.sh b/dockers/skylab/umi-tools/build.sh
deleted file mode 100755
index 2f3be5936c..0000000000
--- a/dockers/skylab/umi-tools/build.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-tag=$1
-
-if [ -z $tag ]; then
- echo -e "\nYou must provide a tag"
- echo -e "\nUsage: bash build_docker.sh TAG\n"
- exit 1
-fi
-
-docker build -t quay.io/humancellatlas/secondary-analysis-umitools:$tag .
-
-echo You can now push with
-echo docker push quay.io/humancellatlas/secondary-analysis-umitools:$tag
\ No newline at end of file
diff --git a/dockers/skylab/umi-tools/getUntaggedReads b/dockers/skylab/umi-tools/getUntaggedReads
deleted file mode 100755
index 978071b832..0000000000
--- a/dockers/skylab/umi-tools/getUntaggedReads
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import pysam
-
-parser = argparse.ArgumentParser(description="Extract all the reads from a bam file that do not have CB or GE tags")
-parser.add_argument('--in-bam-file', dest='inbam', help='input bam file')
-parser.add_argument('--out-bam-file', dest='outbam', help='output bam file')
-
-args = parser.parse_args()
-
-
-def getUntaggedReads(in_bam_file,out_bam_file,verbose=True):
- inbam=pysam.AlignmentFile(in_bam_file,'rb')
- outbam=pysam.AlignmentFile(out_bam_file,'wb',template=inbam)
- counter=0
- for read in inbam:
- counter+=1
- try:
- GE_tag=read.get_tag('GE');
- CB_tag=read.get_tag('CB');
- except KeyError:
- outbam.write(read)
- pass
- if(counter % 1e5 == 0):
- print('Processed {} reads'.format(counter));
- outbam.close()
- inbam.close()
-
-getUntaggedReads(args.inbam,args.outbam)
diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md
index 8889d65e0f..9af896d368 100644
--- a/pipelines/broad/arrays/imputation/Imputation.changelog.md
+++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md
@@ -1,3 +1,24 @@
+# 1.1.5
+2022-09-30 (Date of Last Commit)
+
+* Updated BCFTools/VCFTools and Minimac4 Docker images to fix vulnerabilities.
+* Updated tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.1.4
+2022-08-23 (Date of Last Commit)
+
+* Updated BCFTools/VCFTools docker image
+
+# 1.1.3
+2022-08-03 (Date of Last Commit)
+
+* Updated BCFTools/VCFTools Minimac4 Docker images
+
+# 1.1.2
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 1.1.1
2022-06-01 (Date of Last Commit)
diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl
index d35b91fde5..7e0490fd04 100644
--- a/pipelines/broad/arrays/imputation/Imputation.wdl
+++ b/pipelines/broad/arrays/imputation/Imputation.wdl
@@ -6,7 +6,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
workflow Imputation {
- String pipeline_version = "1.1.1"
+ String pipeline_version = "1.1.5"
input {
Int chunkLength = 25000000
diff --git a/pipelines/broad/arrays/single_sample/Arrays.changelog.md b/pipelines/broad/arrays/single_sample/Arrays.changelog.md
index 3dffc24897..546e8621a6 100644
--- a/pipelines/broad/arrays/single_sample/Arrays.changelog.md
+++ b/pipelines/broad/arrays/single_sample/Arrays.changelog.md
@@ -1,3 +1,16 @@
+# 2.6.15
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated tasks FormatArraysOutputs and IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 2.6.14
+2022-09-07 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+* Updated task FormatArraysOutputs in InternalArrraysTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
# 2.6.13
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/broad/arrays/single_sample/Arrays.wdl b/pipelines/broad/arrays/single_sample/Arrays.wdl
index 5145beef60..c3654c3819 100644
--- a/pipelines/broad/arrays/single_sample/Arrays.wdl
+++ b/pipelines/broad/arrays/single_sample/Arrays.wdl
@@ -23,7 +23,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
workflow Arrays {
- String pipeline_version = "2.6.13"
+ String pipeline_version = "2.6.15"
input {
String chip_well_barcode
diff --git a/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md b/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md
index 50f5c41634..5f5c743439 100644
--- a/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md
+++ b/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md
@@ -1,3 +1,9 @@
+# 1.16.0
+2022-09-30 (Date of Last Commit)
+
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
# 1.15.9
2022-06-15 (Date of Last Commit)
diff --git a/pipelines/broad/arrays/validate_chip/ValidateChip.wdl b/pipelines/broad/arrays/validate_chip/ValidateChip.wdl
index 4798126a05..79a05c1e72 100644
--- a/pipelines/broad/arrays/validate_chip/ValidateChip.wdl
+++ b/pipelines/broad/arrays/validate_chip/ValidateChip.wdl
@@ -21,7 +21,7 @@ import "../../../../tasks/broad/InternalArraysTasks.wdl" as InternalTasks
workflow ValidateChip {
- String pipeline_version = "1.15.9"
+ String pipeline_version = "1.16.0"
input {
String sample_alias
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
index 913a5195d9..f3fcb9c7d0 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md
@@ -1,3 +1,13 @@
+# 3.1.7
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 3.1.6
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 3.1.5
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
index 1cf517f75d..959652fab1 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl
@@ -39,7 +39,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
# WORKFLOW DEFINITION
workflow ExomeGermlineSingleSample {
- String pipeline_version = "3.1.5"
+ String pipeline_version = "3.1.7"
input {
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
index fba658513f..73ccc94bf2 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md
@@ -1,3 +1,20 @@
+# 1.0.4
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.3
+2022-09-20 (Date of Last Commit)
+
+* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks.
+
+# 1.0.2
+2022-09-07 (Date of Last Commit)
+
+* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
# 1.0.1
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
index 3e750bf3a1..478f80f3e2 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl
@@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline {
filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs"
}
- String pipeline_version = "1.0.1"
+ String pipeline_version = "1.0.4"
References references = alignment_references.references
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
index 719fd7acf9..05a43b8916 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md
@@ -1,3 +1,13 @@
+# 3.1.8
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 3.1.7
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 3.1.6
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
index 1b52023908..92df16f290 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl
@@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl"
workflow WholeGenomeGermlineSingleSample {
- String pipeline_version = "3.1.6"
+ String pipeline_version = "3.1.8"
input {
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json
deleted file mode 100644
index 6812111404..0000000000
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
- "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": {
- "sample_name": "NA12878 PLUMBING",
- "base_file_name": "NA12878_PLUMBING",
- "flowcell_unmapped_bams": [
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
- ],
- "final_gvcf_base_name": "NA12878_PLUMBING",
- "unmapped_bam_suffix": ".bam"
- },
-
- "WholeGenomeGermlineSingleSample.references": {
- "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD",
- "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed",
- "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu",
- "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
- "reference_fasta" : {
- "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
- "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
- "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
- "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt",
- "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa",
- "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb",
- "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt",
- "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann",
- "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac",
- "ref_str": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.str"
- },
- "known_indels_sites_vcfs": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz"
- ],
- "known_indels_sites_indices": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi"
- ],
- "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf",
- "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx",
- "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list",
- "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt"
- },
- "WholeGenomeGermlineSingleSample.dragmap_reference": {
- "reference_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/reference.bin",
- "hash_table_cfg_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cfg.bin",
- "hash_table_cmp": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cmp"
- },
-
- "WholeGenomeGermlineSingleSample.scatter_settings": {
- "haplotype_scatter_count": 10,
- "break_bands_at_multiples_of": 100000
- },
-
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz",
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi",
- "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
-
- "WholeGenomeGermlineSingleSample.papi_settings": {
- "preemptible_tries": 3,
- "agg_preemptible_tries": 3
- },
-
- "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true
-}
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json
deleted file mode 100644
index 8b6fbb39d8..0000000000
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
- "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": {
- "sample_name": "NA12878 PLUMBING",
- "base_file_name": "NA12878_PLUMBING",
- "flowcell_unmapped_bams": [
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
- ],
- "final_gvcf_base_name": "NA12878_PLUMBING",
- "unmapped_bam_suffix": ".bam"
- },
-
- "WholeGenomeGermlineSingleSample.references": {
- "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD",
- "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed",
- "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu",
- "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
- "reference_fasta" : {
- "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
- "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
- "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
- "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt",
- "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa",
- "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb",
- "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt",
- "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann",
- "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac",
- "ref_str": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.str"
- },
- "known_indels_sites_vcfs": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz"
- ],
- "known_indels_sites_indices": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi"
- ],
- "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf",
- "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx",
- "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list",
- "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt"
- },
- "WholeGenomeGermlineSingleSample.dragmap_reference": {
- "reference_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/reference.bin",
- "hash_table_cfg_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cfg.bin",
- "hash_table_cmp": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cmp"
- },
-
- "WholeGenomeGermlineSingleSample.scatter_settings": {
- "haplotype_scatter_count": 10,
- "break_bands_at_multiples_of": 100000
- },
-
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz",
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi",
- "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
-
- "WholeGenomeGermlineSingleSample.papi_settings": {
- "preemptible_tries": 3,
- "agg_preemptible_tries": 3
- },
-
- "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true
-}
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json
deleted file mode 100644
index 3be62aa814..0000000000
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json
+++ /dev/null
@@ -1,57 +0,0 @@
-{
- "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": {
- "sample_name": "NA12878 PLUMBING",
- "base_file_name": "NA12878_PLUMBING",
- "flowcell_unmapped_bams": [
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam",
- "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam"
- ],
- "final_gvcf_base_name": "NA12878_PLUMBING",
- "unmapped_bam_suffix": ".bam"
- },
-
- "WholeGenomeGermlineSingleSample.references": {
- "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD",
- "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed",
- "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu",
- "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
- "reference_fasta" : {
- "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
- "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
- "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
- "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt",
- "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa",
- "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb",
- "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt",
- "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann",
- "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac"
- },
- "known_indels_sites_vcfs": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz"
- ],
- "known_indels_sites_indices": [
- "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
- "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi"
- ],
- "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf",
- "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx",
- "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list",
- "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt"
- },
-
- "WholeGenomeGermlineSingleSample.scatter_settings": {
- "haplotype_scatter_count": 10,
- "break_bands_at_multiples_of": 100000
- },
-
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz",
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi",
- "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
-
- "WholeGenomeGermlineSingleSample.papi_settings": {
- "preemptible_tries": 3,
- "agg_preemptible_tries": 3
- }
-}
diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
index 44ddf82824..772ee521b8 100644
--- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
+++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json
@@ -46,8 +46,8 @@
"break_bands_at_multiples_of": 100000
},
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz",
- "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz.tbi",
+ "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz",
+ "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz.tbi",
"WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list",
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
index b4328759e7..2f4fe5a840 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md
@@ -1,3 +1,13 @@
+# 2.1.7
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 2.1.6
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 2.1.5
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
index 08e191217c..6546254903 100644
--- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
@@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks
workflow VariantCalling {
- String pipeline_version = "2.1.5"
+ String pipeline_version = "2.1.7"
input {
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
index 8b972ff8fe..4a437c2ae8 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md
@@ -1,3 +1,20 @@
+# 1.0.4
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.3
+2022-09-20 (Date of Last Commit)
+
+* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks.
+
+# 1.0.2
+2022-09-07 (Date of Last Commit)
+
+* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
# 1.0.1
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
index fdc89ac17e..17a5a96053 100644
--- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
+++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl
@@ -43,7 +43,7 @@ workflow UltimaGenomicsWholeGenomeCramOnly {
save_bam_file: "If true, then save intermeidate ouputs used by germline pipeline (such as the output BAM) otherwise they won't be kept as outputs."
}
- String pipeline_version = "1.0.1"
+ String pipeline_version = "1.0.4"
References references = alignment_references.references
diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
index cc8649a5b8..f81da4b687 100644
--- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
+++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md
@@ -1,3 +1,40 @@
+# 1.1.1
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python, BCFTools/VCFTools, and Minimac4 Docker images to fix vulnerabilities.
+* Updated tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.1.0
+2022-09-20 (Date of Last Commit)
+
+* Updated call to IngestOutputsToTDR to remove 'prefix_column'. Python script has been updated and not longer requires this input parameter.
+* Update task IngestOutputsToTDR to not require 'prefix_column'. Python script has been updated and not longer requires this input parameter.
+
+* Update task FormatImputationOutputs with new docker tag.
+* Update task FormatImputationWideOutputs with new docker tag.
+* Update task IngestOutputsToTDR with new docker tag.
+* Update tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR image instead of DockerHub image.
+
+# 1.0.9
+2022-08-29 (Date of Last Commit)
+
+* Updated call to IngestOutputsToTDR to pass in column names to be used for user action in command block. Python script in task was updated to a new version containing a new required command line parameter, 'prefix_column'
+
+# 1.0.8
+2022-08-23 (Date of Last Commit)
+
+* Updated BCFTools/VCFTools docker image
+
+# 1.0.7
+2022-08-03 (Date of Last Commit)
+
+* Updated BCFTools/VCFTools Minimac4 Docker images
+
+# 1.0.6
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 1.0.5
2022-06-10 (Date of Last Commit)
diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
index ed50f8b2ce..89b4714161 100644
--- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
+++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl
@@ -8,7 +8,7 @@ workflow BroadInternalImputation {
meta {
description: "Push outputs of Imputation.wdl to TDR dataset table ImputationOutputsTable and split out Imputation arrays into ImputationWideOutputsTable."
}
- String pipeline_version = "1.0.5"
+ String pipeline_version = "1.1.1"
input {
# inputs to wrapper task
diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
index bc31e72c5e..ef3909e77e 100644
--- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
+++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md
@@ -1,3 +1,29 @@
+# 1.0.7
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated tasks FormatArraysOutputs and IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.6
+2022-09-20 (Date of Last Commit)
+
+* Updated call to IngestOutputsToTDR to remove 'prefix_column'. Python script has been updated and not longer requires this input parameter.
+* Update task IngestOutputsToTDR to not require 'prefix_column'. Python script has been updated and not longer requires this input parameter.
+
+* Update task FormatArraysOutputs with new docker tag.
+* Update task IngestOutputsToTDR with new docker tag.
+* Update tasks FormatArraysOutputs and IngestOutputsToTDR with GCR image instead of DockerHub image.
+
+# 1.0.5
+2022-08-29 (Date of Last Commit)
+
+* Updated call to IngestOutputsToTDR to pass in column names to be used for user action in command block. Python script in task was updated to a new version containing a new required command line parameter, 'prefix_column'
+
+# 1.0.4
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 1.0.3
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
index 68a52d1e4d..062a3f5dac 100644
--- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
+++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl
@@ -9,7 +9,7 @@ workflow BroadInternalArrays {
description: "Push outputs of Arrays.wdl to TDR dataset table ArraysOutputsTable."
}
- String pipeline_version = "1.0.3"
+ String pipeline_version = "1.0.7"
input {
# inputs to wrapper task
diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
index 0a1ba1167e..77c154b608 100644
--- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
+++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md
@@ -1,3 +1,20 @@
+# 1.0.5
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.4
+2022-09-20 (Date of Last Commit)
+
+* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks.
+
+# 1.0.3
+2022-09-07 (Date of Last Commit)
+
+* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
# 1.0.2
2022-07-07 (Date of Last Commit)
diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
index e9bfea1576..9214d8a103 100644
--- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
+++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl
@@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP
workflow BroadInternalUltimaGenomics {
- String pipeline_version = "1.0.2"
+ String pipeline_version = "1.0.5"
input {
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
index 5130065c36..c86855ce0b 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
@@ -1,3 +1,21 @@
+# 1.0.18
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.17
+2022-09-07 (Date of Last Commit)
+
+* Update TDR ingest script task and docker to remove staging bucket, specify timestamp fields, and use merge ingest strategy
+* Remove transcriptome bam index from output
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
+# 1.0.16
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 1.0.15
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
index db9b76b4aa..b2742fb046 100644
--- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
+++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl
@@ -7,7 +7,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils
workflow BroadInternalRNAWithUMIs {
- String pipeline_version = "1.0.15"
+ String pipeline_version = "1.0.18"
input {
@@ -32,7 +32,6 @@ workflow BroadInternalRNAWithUMIs {
# Terra Data Repo dataset information
String? tdr_dataset_uuid
String? tdr_sample_id
- String? tdr_staging_bucket
String environment
File vault_token_path
@@ -66,8 +65,7 @@ workflow BroadInternalRNAWithUMIs {
environment: "The environment (dev or prod) used for determining which service to use to retrieve Mercury fingerprints"
vault_token_path: "The path to the vault token used for accessing the Mercury Fingerprint Store"
tdr_dataset_uuid: "Optional string used to define the Terra Data Repo (TDR) dataset to which outputs will be ingested"
- tdr_sample_id: "Optional string used to identify the sample being processed; this is the primary key in the TDR dataset"
- tdr_staging_bucket: "Optional string defining the GCS bucket to use to stage files for loading to TDR; the workspace bucket is recommended"
+ tdr_sample_id: "Optional string used to identify the sample being processed; this must be the primary key in the TDR dataset"
}
# make sure either hg19 or hg38 is supplied as reference_build input
@@ -129,12 +127,11 @@ workflow BroadInternalRNAWithUMIs {
output_basename = RNAWithUMIs.sample_name
}
- if (defined(tdr_dataset_uuid) && defined(tdr_sample_id) && defined(tdr_staging_bucket)) {
+ if (defined(tdr_dataset_uuid) && defined(tdr_sample_id)) {
call tasks.formatPipelineOutputs {
input:
sample_id = select_first([tdr_sample_id, ""]),
transcriptome_bam = RNAWithUMIs.transcriptome_bam,
- transcriptome_bam_index = RNAWithUMIs.transcriptome_bam_index,
transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics,
output_bam = RNAWithUMIs.output_bam,
output_bam_index = RNAWithUMIs.output_bam_index,
@@ -166,19 +163,18 @@ workflow BroadInternalRNAWithUMIs {
call tasks.updateOutputsInTDR {
input:
tdr_dataset_uuid = select_first([tdr_dataset_uuid, ""]),
- outputs_json = formatPipelineOutputs.pipeline_outputs_json,
- sample_id = select_first([tdr_sample_id, ""]),
- staging_bucket = select_first([tdr_staging_bucket, ""])
+ outputs_json = formatPipelineOutputs.pipeline_outputs_json
}
}
output {
File transcriptome_bam = RNAWithUMIs.transcriptome_bam
- File transcriptome_bam_index = RNAWithUMIs.transcriptome_bam_index
- File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics
File output_bam = RNAWithUMIs.output_bam
File output_bam_index = RNAWithUMIs.output_bam_index
+
File duplicate_metrics = RNAWithUMIs.duplicate_metrics
+ File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics
+
File rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm
File rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts
File rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts
diff --git a/pipelines/broad/qc/CheckFingerprint.changelog.md b/pipelines/broad/qc/CheckFingerprint.changelog.md
index 971143f491..ee17bab988 100644
--- a/pipelines/broad/qc/CheckFingerprint.changelog.md
+++ b/pipelines/broad/qc/CheckFingerprint.changelog.md
@@ -1,3 +1,15 @@
+# 1.0.9
+2022-09-30 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub.
+
+# 1.0.8
+2022-09-07 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline.
+
# 1.0.7
2022-06-16 (Date of Last Commit)
diff --git a/pipelines/broad/qc/CheckFingerprint.wdl b/pipelines/broad/qc/CheckFingerprint.wdl
index 0078c8cc53..80670abce3 100644
--- a/pipelines/broad/qc/CheckFingerprint.wdl
+++ b/pipelines/broad/qc/CheckFingerprint.wdl
@@ -24,7 +24,7 @@ import "../../../tasks/broad/Qc.wdl" as Qc
workflow CheckFingerprint {
- String pipeline_version = "1.0.7"
+ String pipeline_version = "1.0.9"
input {
File? input_vcf
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
index c9b7a8dc36..4f9a3bc6d3 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md
@@ -1,3 +1,13 @@
+# 3.1.7
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 3.1.6
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 3.1.5
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
index b971fc2043..914e0909d6 100644
--- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl
@@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
workflow ExomeReprocessing {
- String pipeline_version = "3.1.5"
+ String pipeline_version = "3.1.7"
input {
File? input_cram
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
index 108e4c3091..626256fe10 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md
@@ -1,3 +1,13 @@
+# 3.1.9
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 3.1.8
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 3.1.7
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
index 58d4c7fc9c..88df545d86 100644
--- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl
@@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
workflow ExternalExomeReprocessing {
- String pipeline_version = "3.1.7"
+ String pipeline_version = "3.1.9"
input {
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
index 3e0a437989..2b34773730 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md
@@ -1,3 +1,13 @@
+# 2.1.9
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 2.1.8
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 2.1.7
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
index 4e1b65b828..6801a66f46 100644
--- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy
workflow ExternalWholeGenomeReprocessing {
- String pipeline_version = "2.1.7"
+ String pipeline_version = "2.1.9"
input {
File? input_cram
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
index c5466945db..52672aa1ef 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md
@@ -1,3 +1,13 @@
+# 3.1.8
+2022-09-23 (Date of Last Commit)
+
+* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities.
+
+# 3.1.7
+2022-07-15 (Date of Last Commit)
+
+* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline
+
# 3.1.6
2022-07-12 (Date of Last Commit)
diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
index df7884245b..e4c17f298c 100644
--- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
+++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl
@@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl"
workflow WholeGenomeReprocessing {
- String pipeline_version = "3.1.6"
+ String pipeline_version = "3.1.8"
input {
File? input_cram
diff --git a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md
index bc2e5d6d18..682e752a19 100644
--- a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md
+++ b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md
@@ -1,3 +1,13 @@
+# 1.0.8
+2022-07-29 (Date of Last Commit)
+
+* Specify the RSEM post-processed transcriptome bam as output
+* Dynamically allocate memory in Fastp task, increase fixed memory to 8gb in RNASeQC2, and increased fixed memory to 64gb in GroupByUMI
+* Remove transcriptome bam index from output
+* Add monitoring script to fastp and GroupByUMI tasks during soft-launch/continuous improvement
+* Add maxRestries to Fastp, GroupByUMI, and RNASeQC2. Multiplier = 2 is set elsewhere.
+
+
# 1.0.7
2022-04-26 (Date of Last Commit)
diff --git a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl
index b29e3d5a19..8ded234317 100644
--- a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl
+++ b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl
@@ -20,7 +20,7 @@ import "../../../tasks/broad/RNAWithUMIsTasks.wdl" as tasks
workflow RNAWithUMIsPipeline {
- String pipeline_version = "1.0.7"
+ String pipeline_version = "1.0.8"
input {
File? bam
@@ -173,7 +173,7 @@ workflow RNAWithUMIsPipeline {
call tasks.PostprocessTranscriptomeForRSEM {
input:
- prefix = output_basename + ".transcriptome.RSEM",
+ prefix = output_basename + ".transcriptome",
input_bam = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam
}
@@ -226,8 +226,7 @@ workflow RNAWithUMIsPipeline {
output {
String sample_name = GetSampleName.sample_name
- File transcriptome_bam = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam
- File transcriptome_bam_index = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam_index
+ File transcriptome_bam = PostprocessTranscriptomeForRSEM.output_bam
File transcriptome_duplicate_metrics = UMIAwareDuplicateMarkingTranscriptome.duplicate_metrics
File output_bam = UMIAwareDuplicateMarking.duplicate_marked_bam
File output_bam_index = UMIAwareDuplicateMarking.duplicate_marked_bam_index
diff --git a/pipelines/skylab/build_indices/BuildIndices.changelog.md b/pipelines/skylab/build_indices/BuildIndices.changelog.md
index bbc085a859..f03a678571 100644
--- a/pipelines/skylab/build_indices/BuildIndices.changelog.md
+++ b/pipelines/skylab/build_indices/BuildIndices.changelog.md
@@ -1,3 +1,11 @@
+# 1.0.1
+
+2022-09-21 (Date of Last Commit)
+
+* Docker image follows our guidelines
+* Changed the type of biotypes from String to File so it localizes properly
+* Changed the genome_fa to use the reference’s value instead of a modified_genome_fa that didn’t exist (which STAR was looking for and was then failing)
+
# 1.0.0
2022-02-01 (Date of Last Commit)
diff --git a/pipelines/skylab/build_indices/BuildIndices.wdl b/pipelines/skylab/build_indices/BuildIndices.wdl
index 258122c671..30b44c4a7c 100644
--- a/pipelines/skylab/build_indices/BuildIndices.wdl
+++ b/pipelines/skylab/build_indices/BuildIndices.wdl
@@ -91,7 +91,7 @@ task BuildStarSingleNucleus {
String organism
String organism_prefix
References references
- String? biotypes
+ File biotypes
}
meta {
@@ -99,7 +99,6 @@ task BuildStarSingleNucleus {
}
String ref_name = "star_primary_gencode_~{organism}_v~{gtf_version}"
String star_index_name = "modified_~{ref_name}.tar"
- String genome_fa_modified = "modified_GRC~{organism_prefix}38.primary_assembly.genome.fa"
String annotation_gtf_modified = "modified_gencode.v~{gtf_version}.primary_assembly.annotation.gtf"
String annotation_gtf_introns = "introns_modified_gencode.v~{gtf_version}.primary_assembly.annotation.gtf"
@@ -114,7 +113,7 @@ task BuildStarSingleNucleus {
mkdir star
STAR --runMode genomeGenerate \
--genomeDir star \
- --genomeFastaFiles ~{genome_fa_modified} \
+ --genomeFastaFiles ~{references.genome_fa} \
--sjdbGTFfile ~{annotation_gtf_modified} \
--sjdbOverhang 100 \
--runThreadN 16
@@ -128,13 +127,13 @@ task BuildStarSingleNucleus {
File star_index = star_index_name
File annotation_gtf_modified_introns = annotation_gtf_introns
References modified_references = object {
- genome_fa: genome_fa_modified,
+ genome_fa: references.genome_fa,
annotation_gtf: annotation_gtf_modified
}
}
-
+
runtime {
- docker: "quay.io/humancellatlas/snss2-indices:1.2.0 "
+ docker: "us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340"
memory: "50 GiB"
disks :"local-disk 100 HDD"
cpu:"16"
@@ -374,12 +373,12 @@ workflow BuildIndices {
String organism_prefix
String genome_short_string
String dbsnp_version
- String? biotypes
+ File biotypes
}
# version of this pipeline
- String pipeline_version = "1.0.0"
+ String pipeline_version = "1.0.1"
parameter_meta {
gtf_version: "the actual number of gencode, ex. 27"
diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md
index 142c7a97ef..507f31841e 100644
--- a/pipelines/skylab/optimus/Optimus.changelog.md
+++ b/pipelines/skylab/optimus/Optimus.changelog.md
@@ -1,18 +1,47 @@
+# 5.5.5
+2022-09-20 (Date of Last Commit)
+
+* Updated tasks in StarAlign.wdl to use an updated STAR docker image.
+
+# 5.5.4
+2022-09-01 (Date of Last Commit)
+
+* Updated CheckInputs.wdl to use a lightweight alpine-bash image.
+
+# 5.5.3
+2022-08-23 (Date of Last Commit)
+
+* Removed an unused script in pytools docker image and removed unused ConvertStarOutputs task.
+
+# 5.5.2
+2022-08-16 (Date of Last Commit)
+
+* Updated LoomUtils.wdl and StarAlign.wdl to use a rebuilt python utilities docker.
+
+# 5.5.1
+2022-07-21 (Date of Last Commit)
+
+* Updated STARsoloFastq runtime docker URL.
+
# 5.5.0
2022-05-18 (Date of Last Commit)
+
* Updated merge npz docker in StarAlign.wdl to fix a bug in the output loom matrix where gene names were inapporpriately assigned to counts. Any data previously processed with Optimus version 5.0.0 and above should be re-analyzed.
# 5.4.3
2022-04-22 (Date of Last Commit)
+
* Updated Optimus to not run emptydrop step in sn_rna mode.
# 5.4.2
2022-04-21 (Date of Last Commit)
+
* Updated to Picard version 2.26.10 and GATK version 4.2.6.1 to address log4j vulnerabilities
# 5.4.1
2022-04-21 (Date of Last Commit)
+
* Fixing syntax in changelog documentation
# 5.4.0
diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl
index 80b045f1a2..d2e861cc3c 100644
--- a/pipelines/skylab/optimus/Optimus.wdl
+++ b/pipelines/skylab/optimus/Optimus.wdl
@@ -56,7 +56,7 @@ workflow Optimus {
# version of this pipeline
- String pipeline_version = "5.5.0"
+ String pipeline_version = "5.5.5"
# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Array[Int] indices = range(length(r1_fastq))
diff --git a/pipelines/skylab/scATAC/scATAC.changelog.md b/pipelines/skylab/scATAC/scATAC.changelog.md
index 44e26facef..38594eb9e4 100644
--- a/pipelines/skylab/scATAC/scATAC.changelog.md
+++ b/pipelines/skylab/scATAC/scATAC.changelog.md
@@ -1,3 +1,23 @@
+# 1.3.0
+2022-09-23 (Date of Last Commit)
+
+* Added disk, memory and cpu as task inputs. Added pipeline version as a string output.
+
+# 1.2.4
+2022-08-23 (Date of Last Commit)
+
+* Remove an unused script in pytools docker image.
+
+# 1.2.3
+2022-08-18 (Date of Last Commit)
+
+* Update AlignPairedEnd, SnapPre, SnapCellByBin tasks to use rebuilt snaptools docker image.
+
+# 1.2.2
+2022-08-16 (Date of Last Commit)
+
+* Update MakeCompliantBAM and BreakoutSnap tasks to use a consolidated python utilities docker image.
+
# 1.2.1
2021-11-15 (Date of Last Commit)
diff --git a/pipelines/skylab/scATAC/scATAC.wdl b/pipelines/skylab/scATAC/scATAC.wdl
index 856f8c9d13..697a2fd971 100644
--- a/pipelines/skylab/scATAC/scATAC.wdl
+++ b/pipelines/skylab/scATAC/scATAC.wdl
@@ -15,7 +15,7 @@ workflow scATAC {
String bin_size_list = "10000"
}
- String pipeline_version = "1.2.1"
+ String pipeline_version = "1.3.0"
parameter_meta {
input_fastq1: "read 1 input fastq, the read names must be tagged with the cellular barcodes"
@@ -72,6 +72,7 @@ workflow scATAC {
File breakout_binCoordinates = BreakoutSnap.binCoordinates
File breakout_binCounts = BreakoutSnap.binCounts
File breakout_barcodesSection = BreakoutSnap.barcodesSection
+ String output_pipeline_version = pipeline_version
}
}
@@ -84,7 +85,11 @@ task AlignPairedEnd {
String reference_unpack_name = "genome/genome.fa"
String output_bam
Int min_cov = 0
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602"
+ Int machine_mem_mb = 16000
+ Int cpu = 16
+ Int disk = ceil(2*(size(input_fastq1, "GiB") + size(input_fastq2, "GiB") + size(input_reference, "GiB"))) + 100
+ Int preemptible = 3
}
parameter_meta {
@@ -96,8 +101,6 @@ task AlignPairedEnd {
min_cov: "--min-cov parameter for snaptools align-paired-end (default: 0)"
}
- Int num_threads = 16
- Float input_size = size(input_fastq1, "GiB") + size(input_fastq2, "GiB") + size(input_reference, "GiB")
command {
set -euo pipefail
@@ -115,10 +118,10 @@ task AlignPairedEnd {
--input-fastq2=~{input_fastq2} \
--output-bam=~{output_bam} \
--aligner=bwa \
- --path-to-aligner=/tools/ \
+ --path-to-aligner=/usr/local/bin/ \
--read-fastq-command=zcat \
--min-cov=~{min_cov} \
- --num-threads=~{num_threads} \
+ --num-threads=~{cpu} \
--tmp-folder=$TEMP_DIR \
--overwrite=TRUE \
--if-sort=True
@@ -130,9 +133,10 @@ task AlignPairedEnd {
runtime {
docker: docker_image
- cpu: num_threads
- memory: "16 GB"
- disks: "local-disk " + ceil(10 * (if input_size < 1 then 1 else input_size )) + " HDD"
+ memory: "${machine_mem_mb} MiB"
+ disks: "local-disk ${disk} HDD"
+ cpu: cpu
+ preemptible: preemptible
}
}
@@ -142,8 +146,12 @@ task SnapPre {
String output_snap_basename
String genome_name
String genome_size_file = "genome/chrom.sizes"
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602"
File input_reference
+ Int cpu = 1
+ Int machine_mem_mb = 16000
+ Int disk = 500
+ Int preemptible = 3
}
parameter_meta {
@@ -155,7 +163,7 @@ task SnapPre {
input_reference: "input reference tar file"
}
- Int num_threads = 1
+
command {
set -euo pipefail
@@ -187,9 +195,11 @@ task SnapPre {
runtime {
docker: docker_image
- cpu: num_threads
- memory: "16 GB"
- disks: "local-disk 150 HDD"
+ cpu: cpu
+ memory: "${machine_mem_mb} MiB"
+ disks: "local-disk ${disk} HDD"
+ cpu: cpu
+ preemptible: preemptible
}
}
@@ -198,7 +208,11 @@ task SnapCellByBin {
File snap_input
String bin_size_list
String snap_output_name
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602"
+ Int cpu = 1
+ Int machine_mem_mb = 16000
+ Int disk = 500
+ Int preemptible = 3
}
parameter_meta {
@@ -208,8 +222,6 @@ task SnapCellByBin {
docker_image: "docker image to use"
}
- Int num_threads = 1
-
command {
set -euo pipefail
@@ -228,9 +240,10 @@ task SnapCellByBin {
runtime {
docker: docker_image
- cpu: num_threads
- memory: "16 GB"
- disks: "local-disk 150 HDD"
+ cpu: cpu
+ memory: "${machine_mem_mb} MiB"
+ disks: "local-disk ${disk} HDD"
+ preemptible: preemptible
}
}
@@ -238,7 +251,12 @@ task MakeCompliantBAM {
input {
File input_bam
String output_bam_filename
- String docker_image = "quay.io/humancellatlas/snaptools:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
+ Int cpu = 1
+ Int disk = ceil(3 * (size(input_bam, "GiB"))) + 100
+ Int machine_mem_mb = 4000
+ Int preemptible = 3
+
}
parameter_meta {
@@ -247,13 +265,10 @@ task MakeCompliantBAM {
docker_image: "docker image to use"
}
- Int num_threads = 1
- Float input_size = size(input_bam, "GiB")
-
command {
set -euo pipefail
- /tools/makeCompliantBAM.py --input-bam ~{input_bam} --output-bam ~{output_bam_filename}
+ /usr/gitc/makeCompliantBAM.py --input-bam ~{input_bam} --output-bam ~{output_bam_filename}
}
output {
@@ -262,18 +277,23 @@ task MakeCompliantBAM {
runtime {
docker: docker_image
- cpu: num_threads
- memory: "4 GB"
- disks: "local-disk " + ceil(2.5 * (if input_size < 1 then 1 else input_size )) + " HDD"
+ cpu: cpu
+ memory: "${machine_mem_mb} MiB"
+ disks: "local-disk ${disk} HDD"
+ preemptible: preemptible
}
}
task BreakoutSnap {
input {
File snap_input
- String docker_image = "quay.io/humancellatlas/snap-breakout:0.0.1"
+ String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
String bin_size_list
String input_id
+ Int preemptible = 3
+ Int disk = ceil(10 * (if size(snap_input, "GiB") < 1 then 1 else size(snap_input, "GiB") )) + 100
+ Int machine_mem_mb = 16000
+ Int cpu = 1
}
parameter_meta {
@@ -283,13 +303,10 @@ task BreakoutSnap {
input_id : "name of the sample, used to name the outputs"
}
- Int num_threads = 1
- Float input_size = size(snap_input, "GiB")
-
command {
set -euo pipefail
mkdir output
- python3 /tools/breakoutSnap.py --input ~{snap_input} \
+ python3 /usr/gitc/breakoutSnap.py --input ~{snap_input} \
--output-prefix output/~{input_id}_
}
@@ -303,8 +320,9 @@ task BreakoutSnap {
runtime {
docker: docker_image
- cpu: num_threads
- memory: "16 GB"
- disks: "local-disk " + ceil(10 * (if input_size < 1 then 1 else input_size )) + " HDD"
+ memory: "${machine_mem_mb} MiB"
+ disks: "local-disk ${disk} HDD"
+ cpu: cpu
+ preemptible: preemptible
}
}
diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
index 984de1b6c7..5020a70ff2 100644
--- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
+++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md
@@ -1,12 +1,32 @@
+# 2.2.16
+2022-09-13 (Date of Last Commit)
+
+* Update RSEM.wdl in the SmartSeq2SingleSample pipeline to use an updated RSEM docker image. This change does not affect the MultiSampleSmartSeq2 pipeline.
+
+# 2.2.15
+2022-09-12 (Date of Last Commit)
+
+* Update HISAT2.wdl in the SmartSeq2SingleSample pipeline to use an updated HISAT2 docker image. This change does not affect the MultiSampleSmartSeq2 pipeline.
+
+# 2.2.14
+2022-08-23 (Date of Last Commit)
+
+* Remove an unused script in pytools docker image.
+
+# 2.2.13
+2022-08-16 (Date of Last Commit)
+
+* Update LoomUtils.wdl to use updated docker images. This change does not affect the MultiSampleSmartSeq2 pipeline.
+
# 2.2.12
-2022-06-2022 (Date of Last Commit)
+2022-06-22 (Date of Last Commit)
* Updated main workflow name from SmartSeq2SingleCell to SmartSeq2SingleSample in the SS2 single sample pipeline. This allows the pipeline to run in the updated scala tests.
# 2.2.11
2022-04-22 (Date of Last Commit)
-* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the SmartSeq2SingleSample pipeline.
+* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the MultiSampleSmartSeq2 pipeline.
# 2.2.10
2022-04-14 (Date of Last Commit)
diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
index 6784510416..9f486528fd 100644
--- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
+++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl
@@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2 {
Boolean paired_end
}
# Version of this pipeline
- String pipeline_version = "2.2.12"
+ String pipeline_version = "2.2.16"
if (false) {
String? none = "None"
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
index d5b18aac93..a1cce15e80 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md
@@ -1,11 +1,48 @@
+# 1.2.14
+2022-09-20 (Date of Last Commit)
+
+* Updated tasks in StarAlign.wdl to use an updated STAR docker image.
+
+# 1.2.13
+
+2022-09-01 (Date of Last Commit)
+
+* Updated CheckInputs.wdl to use a lightweight alpine-bash image.
+
+# 1.2.12
+2022-08-31 (Date of Last Commit)
+
+* Updated CountAlignments to use an updated docker image.
+
+# 1.2.11
+2022-08-23 (Date of Last Commit)
+
+* Removed an unused script in pytools docker image.
+
+# 1.2.10
+2022-08-16 (Date of Last Commit)
+
+* Updated LoomUtils.wdl to use a consolidated python utilities docker image. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline.
+
+# 1.2.9
+2022-08-08 (Date of Last Commit)
+
+* Updated TrimAdapters runtime docker URL.
+
+# 1.2.8
+2022-07-21 (Date of Last Commit)
+
+* Updated STARsoloFastq runtime docker URL.
+
# 1.2.7
2022-05-18 (Date of Last Commit)
+
* Updated merge npz docker in StarAlign.wdl
# 1.2.6
2022-04-22 (Date of Last Commit)
-* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the SmartSeq2SingleSample pipeline.
+* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline.
# 1.2.5
2022-04-19 (Date of Last Commit)
@@ -26,7 +63,7 @@
# 1.2.2
2022-02-10 (Date of Last Commit)
-* Rebuilt a docker to merge outputs of STAR in in StarAlign.wdl task and moved it to a public location.
+* Rebuilt a docker to merge outputs of STAR in StarAlign.wdl task and moved it to a public location.
# 1.2.1
2022-02-07 (Date of Last Commit)
diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
index 5b6227301a..79b97bf621 100644
--- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
+++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl
@@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
String? input_id_metadata_field
}
# Version of this pipeline
- String pipeline_version = "1.2.7"
+ String pipeline_version = "1.2.14"
if (false) {
String? none = "None"
diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
index d36b66a53d..a3bdcb705b 100644
--- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
+++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md
@@ -1,3 +1,23 @@
+# 5.1.15
+2022-09-13 (Date of Last Commit)
+
+* Update RSEM.wdl to use an updated RSEM docker image. This change does not affect the SmartSeq2SingleSample pipeline.
+
+# 5.1.14
+2022-09-12 (Date of Last Commit)
+
+* Update HISAT2.wdl to use an updated HISAT2 docker image. This change does not affect the SmartSeq2SingleSample pipeline.
+
+# 5.1.13
+2022-08-23 (Date of Last Commit)
+
+* Remove an unused script in pytools docker image.
+
+# 5.1.12
+2022-08-16 (Date of Last Commit)
+
+* Updated LoomUtils.wdl to use a consolidated python utilities docker image. This change does not affect the SmartSeq2SingleSample pipeline.
+
# 5.1.11
2022-06-21 (Date of Last Commit)
diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
index de230d8b66..c9c36def38 100644
--- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
+++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl
@@ -36,7 +36,7 @@ workflow SmartSeq2SingleSample {
}
# version of this pipeline
- String pipeline_version = "5.1.11"
+ String pipeline_version = "5.1.15"
parameter_meta {
genome_ref_fasta: "Genome reference in fasta format"
diff --git a/pullapprove_template.yml b/pullapprove_template.yml
index a2ac46db78..38d5c9c7ee 100644
--- a/pullapprove_template.yml
+++ b/pullapprove_template.yml
@@ -91,7 +91,7 @@ groups:
request: 2
reviewers:
users:
- - ldgauthier # Laura Gauthier
+ - samuelklee # Samuel Lee
- kachulis # Chris Kachulis
scientific_owners_joint_genotyping:
@@ -108,7 +108,7 @@ groups:
request_order: given
reviewers:
users:
- - ldgauthier # Laura Gauthier
+ - samuelklee # Samuel Lee
scientific_owners_somatic_single_sample:
conditions:
diff --git a/scripts/get_changed_pipeline_worklow_test_args.sh b/scripts/get_changed_pipeline_worklow_test_args.sh
index e0aebedd0b..05eda96021 100755
--- a/scripts/get_changed_pipeline_worklow_test_args.sh
+++ b/scripts/get_changed_pipeline_worklow_test_args.sh
@@ -8,95 +8,118 @@ declare ALL_PIPELINES=($(get_versioned_pipelines))
function pipeline_to_args() {
local -r pipeline=${1}
- local -r env=${2}
- local -r test=${3}
- local -r truth=${4}
- local -r uncached=${5}
+ local -r test=${2}
- local -r common_args="--env ${env} -t ${test} -b ${truth} ${uncached}"
+ local -r common_args="${test}"
case ${pipeline} in
AnnotationFiltration)
- echo AnnotationFiltration -t ${test} --env ${env};;
+ continue;;
Arrays)
- echo Arrays -a Single ${common_args};;
- MultiSampleArrays)
- echo Arrays -a Multi ${common_args};;
+ echo Arrays ${common_args};;
BroadInternalRNAWithUMIs)
echo BroadInternalRNAWithUMIs ${common_args};;
BroadInternalUltimaGenomics)
echo BroadInternalUltimaGenomics ${common_args};;
+ # CEMBA)
+ # echo CEMBA ${common_args};;
CheckFingerprint)
echo CheckFingerprint ${common_args};;
+ CramToUnmappedBams)
+ echo CramToUnmappedBams ${common_args};;
+ ExternalExomeReprocessing)
+ if [[ "${test}" == "Scientific" ]]; then
+ echo ExternalExomeReprocessing Plumbing
+ else
+ continue
+ fi;;
ExomeGermlineSingleSample)
- echo GermlineSingleSample -d Exome ${common_args};;
+ echo ExomeGermlineSingleSample ${common_args};;
ExomeReprocessing)
if [[ "${test}" == "Scientific" ]]; then
- echo Reprocessing -d Exome --env ${env} -t Plumbing -b ${truth} ${uncached}
+ echo ExomeReprocessing Plumbing
else
continue
fi;;
- JointGenotyping)
- echo JointGenotyping -d Exome ${common_args} --papi-version PAPIv2;
- echo JointGenotyping -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached} --papi-version PAPIv2;;
+ ExternalWholeGenomeReprocessing)
+ if [[ "${test}" == "Scientific" ]]; then
+ echo ExternalWholeGenomeReprocessing Plumbing
+ else
+ continue
+ fi;;
+ GDCWholeGenomeSomaticSingleSample)
+ echo GDCWholeGenomeSomaticSingleSample ${common_args};;
IlluminaGenotypingArray)
echo IlluminaGenotypingArray ${common_args};;
Imputation)
echo Imputation ${common_args};;
- ExternalExomeReprocessing)
+ JointGenotyping)
+ echo JointGenotyping ${common_args};;
+ JointGenotypingByChromosomePartOne)
+ continue;;
+ JointGenotypingByChromosomePartTwo)
+ continue;;
+ MultiSampleArrays)
+ echo MultiSampleArrays ${common_args};;
+ MultiSampleSmartSeq2)
if [[ "${test}" == "Scientific" ]]; then
- echo ExternalReprocessing -d Exome --env ${env} -t Plumbing -b ${truth} ${uncached}
+ echo MultiSampleSmartSeq2 Plumbing
else
- continue
+ echo MultiSampleSmartSeq2 ${common_args}
fi;;
- ExternalWholeGenomeReprocessing)
+ MultiSampleSmartSeq2SingleNucleus)
if [[ "${test}" == "Scientific" ]]; then
- echo ExternalReprocessing -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached}
+ echo MultiSampleSmartSeq2SingleNucleus Plumbing
else
- continue
+ echo MultiSampleSmartSeq2SingleNucleus ${common_args}
+ fi;;
+ Optimus)
+ echo Optimus ${common_args};;
+ ReblockGVCF)
+ echo ReblockGvcf ${common_args};;
+ RNAWithUMIsPipeline)
+ echo RNAWithUMIsPipeline ${common_args};;
+ scATAC)
+ if [[ "${test}" == "Scientific" ]]; then
+ echo scATAC Plumbing
+ else
+ echo scATAC ${common_args}
+ fi;;
+ SmartSeq2SingleSample)
+ if [[ "${test}" == "Scientific" ]]; then
+ echo SmartSeq2SingleSample Plumbing
+ else
+ echo SmartSeq2SingleSample ${common_args}
+ fi;;
+ TargetedSomaticSingleSample)ValidateChip
+ continue;;
+ ValidateChip)
+ echo ValidateChip ${common_args};;
+ VariantCalling)
+ if [[ "${test}" == "Scientific" ]]; then
+ echo VariantCalling Plumbing
+ else
+ echo VariantCalling ${common_args}
fi;;
WholeGenomeGermlineSingleSample)
- echo GermlineSingleSample -d WGS ${common_args};;
+ echo WholeGenomeGermlineSingleSample ${common_args};;
WholeGenomeReprocessing)
if [[ "${test}" == "Scientific" ]]; then
- echo Reprocessing -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached}
+ echo WholeGenomeReprocessing Plumbing
else
continue
fi;;
- ValidateChip)
- echo ValidateChip ${common_args};;
- ReblockGVCF)
- echo ReblockGvcf -d Exome ${common_args};
- echo ReblockGvcf -d WGS ${common_args};;
- RNAWithUMIsPipeline)
- echo RNAWithUMIs ${common_args};;
- TargetedSomaticSingleSample)
- echo SomaticSingleSample -d Targeted ${common_args};;
- CramToUnmappedBams)
- echo CramToUnmappedBams ${common_args};;
- JointGenotypingByChromosomePartOne)
- continue;;
- JointGenotypingByChromosomePartTwo)
- continue;;
- UltimaGenomicsGermlineSingleSample)
- echo UltimaGenomicsGermlineSingleSample ${common_args};;
+ UltimaGenomicsWholeGenomeGermline)
+ echo UltimaGenomicsWholeGenomeGermline ${common_args};;
UltimaGenomicsJointGenotyping)
echo UltimaGenomicsJointGenotyping ${common_args};;
- GDCWholeGenomeSomaticSingleSample)
- echo GDCWholeGenomeSomaticSingleSample -d WGS ${common_args};;
- VariantCalling)
- echo VariantCalling -d Exome -t Plumbing --env ${env} -b ${truth} ${uncached};
- echo VariantCalling -d WGS -t Plumbing --env ${env} -b ${truth} ${uncached};;
esac
}
function main() {
local -r gittish=${1}
local -r test_all=${2}
- local -r env=${3}
- local -r test=${4}
- local -r truth=${5}
- local -r uncached=${6}
+ local -r test=${3}
local -a changed_pipeline_paths=()
local -a args=()
@@ -109,7 +132,7 @@ function main() {
for changed_pipeline_path in ${changed_pipeline_paths[*]}; do
pipeline=$(basename ${changed_pipeline_path} .wdl)
- arg="$(pipeline_to_args ${pipeline} ${env} ${test} ${truth} ${uncached})"
+ arg="$(pipeline_to_args ${pipeline} ${test})"
if [[ -n ${arg} ]]; then
args+=("${arg}")
fi
@@ -120,4 +143,4 @@ function main() {
done
}
-main ${1} ${2} ${3} ${4} ${5} ${6}
+main ${1} ${2} ${3}
\ No newline at end of file
diff --git a/tasks/broad/ImputationTasks.wdl b/tasks/broad/ImputationTasks.wdl
index 1bc1ddfa7f..a7d9933349 100644
--- a/tasks/broad/ImputationTasks.wdl
+++ b/tasks/broad/ImputationTasks.wdl
@@ -148,7 +148,7 @@ task CheckChunks {
Int var_in_reference
Int disk_size_gb = ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB"))
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 4000
}
@@ -225,7 +225,7 @@ task Minimac4 {
Int end
Int window
- String minimac4_docker = "us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471"
+ String minimac4_docker = "us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783"
Int cpu = 1
Int memory_mb = 4000
Int disk_size_gb = ceil(size(ref_panel, "GiB") + 2*size(phased_vcf, "GiB")) + 50
@@ -304,7 +304,7 @@ task ReplaceHeader {
File vcf_to_replace_header
File vcf_with_new_header
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.4-1.10.2-0.1.16-1646091598"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
}
String output_name = basename(vcf_to_replace_header,".vcf.gz") + ".new_header.vcf.gz"
@@ -402,7 +402,7 @@ task SeparateMultiallelics {
String output_basename
Int disk_size_gb = ceil(2*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB")))
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 4000
}
@@ -432,7 +432,7 @@ task OptionalQCSites {
Float? optional_qc_max_missing
Float? optional_qc_hwe
- String bcftools_vcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_vcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 16000
Int disk_size_gb = ceil(2*(size(input_vcf, "GiB") + size(input_vcf_index, "GiB")))
@@ -465,7 +465,7 @@ task MergeSingleSampleVcfs {
Array[File] input_vcf_indices
String output_vcf_basename
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int memory_mb = 2000
Int cpu = 1
Int disk_size_gb = 3 * ceil(size(input_vcfs, "GiB") + size(input_vcf_indices, "GiB")) + 20
@@ -504,7 +504,7 @@ task CountSamples {
input {
File vcf
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 3000
Int disk_size_gb = 100 + ceil(size(vcf, "GiB"))
@@ -702,7 +702,7 @@ task SetIDs {
File vcf
String output_basename
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 4000
Int disk_size_gb = 100 + ceil(2.2 * size(vcf, "GiB"))
@@ -730,7 +730,7 @@ task ExtractIDs {
String output_basename
Int disk_size_gb = 2*ceil(size(vcf, "GiB")) + 100
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 4000
}
@@ -792,7 +792,7 @@ task RemoveAnnotations {
File vcf
String basename
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 3000
Int disk_size_gb = ceil(2.2*size(vcf, "GiB")) + 100
@@ -874,7 +874,7 @@ task SplitMultiSampleVcf {
input {
File multiSampleVcf
- String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
Int cpu = 1
Int memory_mb = 8000
Int disk_size_gb = ceil(3*size(multiSampleVcf, "GiB")) + 100
diff --git a/tasks/broad/InternalArraysTasks.wdl b/tasks/broad/InternalArraysTasks.wdl
index d6669ebf8d..7728c6c241 100644
--- a/tasks/broad/InternalArraysTasks.wdl
+++ b/tasks/broad/InternalArraysTasks.wdl
@@ -541,7 +541,7 @@ task FormatArraysOutputs {
>>>
runtime {
- docker: "broadinstitute/horsefish:eMerge_05192022"
+ docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022"
}
output {
diff --git a/tasks/broad/InternalImputationTasks.wdl b/tasks/broad/InternalImputationTasks.wdl
index e540fc6000..fb0d486a82 100644
--- a/tasks/broad/InternalImputationTasks.wdl
+++ b/tasks/broad/InternalImputationTasks.wdl
@@ -48,7 +48,7 @@ task FormatImputationOutputs {
>>>
runtime {
- docker: "broadinstitute/horsefish:eMerge_05192022"
+ docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022"
}
output {
@@ -115,7 +115,7 @@ task FormatImputationWideOutputs{
>>>
runtime {
- docker: "broadinstitute/horsefish:eMerge_05192022"
+ docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022"
}
output {
diff --git a/tasks/broad/InternalTasks.wdl b/tasks/broad/InternalTasks.wdl
index 04cb93bb3b..5bf3545e7a 100644
--- a/tasks/broad/InternalTasks.wdl
+++ b/tasks/broad/InternalTasks.wdl
@@ -193,7 +193,7 @@ task IngestOutputsToTDR {
}
runtime {
- docker: "broadinstitute/horsefish:eMerge_05192022"
+ docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022"
}
output {
diff --git a/tasks/broad/RNAWithUMIsTasks.wdl b/tasks/broad/RNAWithUMIsTasks.wdl
index f9e93556c2..a753d04236 100644
--- a/tasks/broad/RNAWithUMIsTasks.wdl
+++ b/tasks/broad/RNAWithUMIsTasks.wdl
@@ -98,11 +98,14 @@ task Fastp {
File adapter_fasta = "gs://gcp-public-data--broad-references/RNA/resources/Illumina_adapters.fasta"
String docker = "us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500"
- Int memory_mb = "16384"
+ Int memory_mb = ceil(1.5*size(fastq1, "MiB")) + 8192 # Experimentally determined formula for memory allocation
Int disk_size_gb = 5*ceil(size(fastq1, "GiB")) + 128
+ File monitoring_script = "gs://broad-dsde-methods-monitoring/cromwell_monitoring_script.sh"
}
command {
+ bash ~{monitoring_script} > monitoring.log &
+
fastp --in1 ~{fastq1} --in2 ~{fastq2} --out1 ~{output_prefix}_read1.fastq.gz --out2 ~{output_prefix}_read2.fastq.gz \
--disable_quality_filtering \
--disable_length_filtering \
@@ -115,9 +118,11 @@ task Fastp {
memory: "~{memory_mb} MiB"
disks: "local-disk ~{disk_size_gb} HDD"
preemptible: 0
+ maxRetries: 2
}
output {
+ File monitoring_log = "monitoring.log"
File fastq1_clipped = output_prefix + "_read1.fastq.gz"
File fastq2_clipped = output_prefix + "_read2.fastq.gz"
}
@@ -305,7 +310,7 @@ task rnaseqc2 {
String docker = "us.gcr.io/broad-dsde-methods/ckachulis/rnaseqc:2.4.2"
Int cpu = 1
- Int memory_mb = 3500
+ Int memory_mb = 8000
Int disk_size_gb = ceil(size(bam_file, 'GiB') + size(genes_gtf, 'GiB') + size(exon_bed, 'GiB')) + 50
}
@@ -331,6 +336,7 @@ task rnaseqc2 {
cpu: cpu
memory: "~{memory_mb} MiB"
disks: "local-disk ~{disk_size_gb} HDD"
+ maxRetries: 2
}
}
@@ -600,17 +606,22 @@ task GroupByUMIs {
String docker = "us.gcr.io/broad-gotc-prod/umi_tools:1.0.0-1.1.1-1638821470"
Int cpu = 2
- Int memory_mb = 7500
+ Int memory_mb = 64000
Int disk_size_gb = ceil(2.2 * size([bam, bam_index], "GiB")) + 100
+
+ File monitoring_script = "gs://broad-dsde-methods-monitoring/cromwell_monitoring_script.sh"
}
command <<<
+ bash ~{monitoring_script} > monitoring.log &
+
umi_tools group -I ~{bam} --paired --no-sort-output --output-bam --stdout ~{output_bam_basename}.bam --umi-tag-delimiter "-" \
--extract-umi-method tag --umi-tag RX --unmapped-reads use
>>>
output {
File grouped_bam = "~{output_bam_basename}.bam"
+ File monitoring_log = "monitoring.log"
}
runtime {
@@ -618,6 +629,7 @@ task GroupByUMIs {
cpu: cpu
memory: "~{memory_mb} MiB"
disks: "local-disk ~{disk_size_gb} HDD"
+ maxRetries: 1
}
}
@@ -626,7 +638,7 @@ task MarkDuplicatesUMIAware {
File bam
String output_basename
Boolean remove_duplicates
-
+ Boolean use_umi
String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.11"
Int cpu = 1
@@ -641,8 +653,9 @@ task MarkDuplicatesUMIAware {
INPUT=~{bam} \
OUTPUT=~{output_bam_basename}.bam \
METRICS_FILE=~{output_basename}.duplicate.metrics \
- READ_ONE_BARCODE_TAG=BX \
- REMOVE_DUPLICATES=~{remove_duplicates}
+ REMOVE_DUPLICATES=~{remove_duplicates} \
+ ~{true='READ_ONE_BARCODE_TAG=BX' false='' use_umi} \
+
>>>
output {
@@ -662,7 +675,6 @@ task formatPipelineOutputs {
input {
String sample_id
String transcriptome_bam
- String transcriptome_bam_index
String transcriptome_duplicate_metrics
String output_bam
String output_bam_index
@@ -706,7 +718,6 @@ task formatPipelineOutputs {
# NOTE: we rename some field names to match the TDR schema
outputs_dict["sample_id"]="~{sample_id}" # primary key
outputs_dict["transcriptome_bam"]="~{transcriptome_bam}"
- outputs_dict["transcriptome_bam_index"]="~{transcriptome_bam_index}"
outputs_dict["transcriptome_duplicate_metrics_file"]="~{transcriptome_duplicate_metrics}"
outputs_dict["genome_bam"]="~{output_bam}"
outputs_dict["genome_bam_index"]="~{output_bam_index}"
@@ -750,7 +761,7 @@ task formatPipelineOutputs {
>>>
runtime {
- docker: "broadinstitute/horsefish:tdr_import_v1.1"
+ docker: "broadinstitute/horsefish:tdr_import_v1.4"
cpu: cpu
memory: "~{memory_mb} MiB"
disks: "local-disk ~{disk_size_gb} HDD"
@@ -763,30 +774,30 @@ task formatPipelineOutputs {
task updateOutputsInTDR {
input {
- String staging_bucket
String tdr_dataset_uuid
File outputs_json
- String sample_id
Int cpu = 1
Int memory_mb = 2000
Int disk_size_gb = 10
}
- String tdr_target_table = "sample"
-
command <<<
+ # input args:
+ # -d dataset uuid
+ # -t target table in dataset
+ # -o json of data to ingest
+ # -f field to populate with timestamp at ingest (can have multiple)
python -u /scripts/export_pipeline_outputs_to_tdr.py \
-d "~{tdr_dataset_uuid}" \
- -b "~{staging_bucket}" \
- -t "~{tdr_target_table}" \
+ -t "sample" \
-o "~{outputs_json}" \
- -k "sample_id" \
- -v "~{sample_id}"
+ -f "version_timestamp" \
+ -f "analysis_end_time"
>>>
runtime {
- docker: "broadinstitute/horsefish:twisttcap_scripts"
+ docker: "broadinstitute/horsefish:tdr_import_v1.4"
cpu: cpu
memory: "~{memory_mb} MiB"
disks: "local-disk ~{disk_size_gb} HDD"
diff --git a/tasks/broad/UMIAwareDuplicateMarking.wdl b/tasks/broad/UMIAwareDuplicateMarking.wdl
index e28427f164..1c865b7e54 100644
--- a/tasks/broad/UMIAwareDuplicateMarking.wdl
+++ b/tasks/broad/UMIAwareDuplicateMarking.wdl
@@ -85,7 +85,8 @@ workflow UMIAwareDuplicateMarking {
input:
bam = SortSamByQueryNameBeforeDuplicateMarking.output_bam,
output_basename = output_basename,
- remove_duplicates = remove_duplicates
+ remove_duplicates = remove_duplicates,
+ use_umi = true
}
if (coordinate_sort_output){
diff --git a/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl b/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl
index f7af1860a4..847a9eac3a 100644
--- a/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl
+++ b/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl
@@ -651,7 +651,7 @@ task FilterVCF {
--flow_order ~{used_flow_order} \
~{true="--blacklist_cg_insertions" false="" filter_cg_insertions} \
--annotate_intervals ~{sep=" --annotate_intervals " annotation_intervals} \
- --output_file /cromwell_root/~{final_vcf_base_name}.filtered.vcf.gz
+ --output_file ~{final_vcf_base_name}.filtered.vcf.gz
>>>
runtime {
@@ -710,7 +710,7 @@ task TrainModel {
~{"--exome_weight " + exome_weight} \
~{"--exome_weight_annotation " + exome_weight_annotation} \
--annotate_intervals ~{sep=" --annotate_intervals " annotation_intervals} \
- --output_file_prefix /cromwell_root/~{input_vcf_name}.model
+ --output_file_prefix ~{input_vcf_name}.model
>>>
runtime {
diff --git a/tasks/broad/Utilities.wdl b/tasks/broad/Utilities.wdl
index 7244e3b773..947c56ae01 100644
--- a/tasks/broad/Utilities.wdl
+++ b/tasks/broad/Utilities.wdl
@@ -110,7 +110,7 @@ task ScatterIntervalList {
Int interval_count = read_int(stdout())
}
runtime {
- docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026"
+ docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039"
memory: "2000 MiB"
}
}
@@ -308,7 +308,7 @@ task MakeOptionalOutputBam {
Boolean keep_inputs
Int preemptible_tries = 3
}
- Int disk_size = ceil(size(bam_input, "GiB")) + 5
+ Int disk_size = ceil(size(bam_input, "GiB")) + 15
String basename = basename(bam_input, ".bam")
command<<<
if [ ~{keep_inputs} = "true" ]
diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl
index 9ff5187f32..a33bdf0811 100644
--- a/tasks/skylab/CheckInputs.wdl
+++ b/tasks/skylab/CheckInputs.wdl
@@ -41,7 +41,7 @@ task checkInputArrays {
}
runtime {
- docker: "ubuntu:18.04"
+ docker: "bashell/alpine-bash:latest"
cpu: 1
memory: "1 GiB"
disks: "local-disk 1 HDD"
@@ -111,7 +111,7 @@ task checkOptimusInput {
}
runtime {
- docker: "ubuntu:18.04"
+ docker: "bashell/alpine-bash:latest"
cpu: cpu
memory: "~{machine_mem_mb} GiB"
disks: "local-disk ~{disk} HDD"
diff --git a/tasks/skylab/FeatureCounts.wdl b/tasks/skylab/FeatureCounts.wdl
index bc260e465e..3530df3374 100644
--- a/tasks/skylab/FeatureCounts.wdl
+++ b/tasks/skylab/FeatureCounts.wdl
@@ -8,7 +8,7 @@ task CountAlignments {
File annotation_gtf
#runtime values
- String docker = "quay.io/humancellatlas/snss2-featurecount:0.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537"
Int machine_mem_mb = 8250
Int cpu = 1
Int disk = ceil(size(aligned_bam_inputs,"Gi")*2) + 10
@@ -43,7 +43,7 @@ task CountAlignments {
-g gene_id
# create a new input bam where the alignemnts crossing intron-exon junctions are removed
- python3 /tools/remove-reads-on-junctions.py --input-gtf ~{annotation_gtf} \
+ python3 /usr/gitc/remove-reads-on-junctions.py --input-gtf ~{annotation_gtf} \
--input-bam "${bam_files[$i]}" --output-bam "${output_prefix[$i]}.input.nojunc.bam"
# counting the exons
diff --git a/tasks/skylab/HISAT2.wdl b/tasks/skylab/HISAT2.wdl
index 1000fcbdb6..e009c1346a 100644
--- a/tasks/skylab/HISAT2.wdl
+++ b/tasks/skylab/HISAT2.wdl
@@ -10,7 +10,7 @@ task HISAT2PairedEnd {
String input_id
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171"
Int machine_mem_mb = 16500
Int cpu = 4
# Using (fastq1 + fastq2) x 100 gives factor of a few buffer. BAM can be up to ~5 x (fastq1 + fastq2).
@@ -136,7 +136,7 @@ task HISAT2RSEM {
String input_id
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171"
Int machine_mem_mb = 16500
Int cpu = 4
# Using (fastq1 + fastq2) x 100 gives factor of a few buffer. BAM can be up to ~5 x (fastq1 + fastq2).
@@ -267,7 +267,7 @@ input {
String input_id
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171"
Int machine_mem_mb = 16500
Int cpu = 4
# Using fastq x 100 gives factor of a few buffer. BAM can be up to ~5 x fastq.
@@ -359,7 +359,7 @@ task HISAT2InspectIndex {
String ref_name
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171"
Int machine_mem_mb = 3850
Int cpu = 1
# use provided disk number or dynamically size on our own, with 200GiB of additional disk
@@ -410,7 +410,7 @@ task HISAT2RSEMSingleEnd {
String input_id
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171"
Int machine_mem_mb = 15000
Int cpu = 4
Int disk = ceil((size(fastq, "GiB")) * 100 + size(hisat2_ref, "GiB") * 2 + 200)
diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl
index d4d50ce6d3..babde437d5 100644
--- a/tasks/skylab/LoomUtils.wdl
+++ b/tasks/skylab/LoomUtils.wdl
@@ -3,7 +3,7 @@ version 1.0
task SmartSeq2LoomOutput {
input {
#runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.6-1"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
# the gene count file "_rsem.genes.results" in the task results folder call-RSEMExpression
File rsem_gene_results
# file named "_QCs.csv" in the folder "call-GroupQCOutputs/glob-*" of the the SS2 output
@@ -32,7 +32,7 @@ task SmartSeq2LoomOutput {
command {
set -euo pipefail
- python3 /tools/create_loom_ss2.py \
+ python3 /usr/gitc/create_loom_ss2.py \
--qc_files ~{sep=' ' smartseq_qc_files} \
--rsem_genes_results ~{rsem_gene_results} \
--output_loom_path "~{input_id}.loom" \
@@ -61,7 +61,7 @@ task OptimusLoomGeneration {
input {
#runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:v1.3.0"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
# name of the sample
String input_id
# user provided id
@@ -104,7 +104,7 @@ task OptimusLoomGeneration {
set -euo pipefail
if [ "~{counting_mode}" == "sc_rna" ]; then
- python3 /tools/create_loom_optimus.py \
+ python3 /usr/gitc/create_loom_optimus.py \
--empty_drops_file ~{empty_drops_result} \
--add_emptydrops_data "yes" \
--annotation_file ~{annotation_file} \
@@ -121,7 +121,7 @@ task OptimusLoomGeneration {
--expression_data_type "exonic" \
--pipeline_version ~{pipeline_version}
else
- python3 /tools/create_snrna_optimus.py \
+ python3 /usr/gitc/create_snrna_optimus.py \
--annotation_file ~{annotation_file} \
--cell_metrics ~{cell_metrics} \
--gene_metrics ~{gene_metrics} \
@@ -163,7 +163,7 @@ task AggregateSmartSeq2Loom {
String? species
String? organ
String pipeline_version
- String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.6-1"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
Int disk = 200
Int machine_mem_mb = 4
Int cpu = 1
@@ -177,7 +177,7 @@ task AggregateSmartSeq2Loom {
set -e
# Merge the loom files
- python3 /tools/ss2_loom_merge.py \
+ python3 /usr/gitc/ss2_loom_merge.py \
--input-loom-files ~{sep=' ' loom_input} \
--output-loom-file "~{batch_id}.loom" \
--batch_id ~{batch_id} \
@@ -211,7 +211,7 @@ task SingleNucleusOptimusLoomOutput {
input {
#runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:v1.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
# name of the sample
String input_id
# user provided id
@@ -256,7 +256,7 @@ task SingleNucleusOptimusLoomOutput {
command {
set -euo pipefail
- python3 /tools/create_snrna_optimus_counts.py \
+ python3 /usr/gitc/create_snrna_optimus_counts.py \
--annotation_file ~{annotation_file} \
--cell_metrics ~{cell_metrics} \
--gene_metrics ~{gene_metrics} \
@@ -292,7 +292,7 @@ task SingleNucleusOptimusLoomOutput {
task SingleNucleusSmartSeq2LoomOutput {
input {
#runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.8"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
Array[File] alignment_summary_metrics
Array[File] dedup_metrics
@@ -339,7 +339,7 @@ task SingleNucleusSmartSeq2LoomOutput {
do
# creates a table with gene_id, gene_name, intron and exon counts
echo "Running create_snss2_counts_csv."
- python /tools/create_snss2_counts_csv.py \
+ python /usr/gitc/create_snss2_counts_csv.py \
--in-gtf ~{annotation_introns_added_gtf} \
--intron-counts ${introns_counts_files[$i]} \
--exon-counts ${exons_counts_files[$i]} \
@@ -354,7 +354,7 @@ task SingleNucleusSmartSeq2LoomOutput {
# create the loom file
echo "Running create_loom_snss2."
- python3 /tools/create_loom_snss2.py \
+ python3 /usr/gitc/create_loom_snss2.py \
--qc_files "${output_prefix[$i]}.Picard_group.csv" \
--count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \
--output_loom_path "${output_prefix[$i]}.loom" \
diff --git a/tasks/skylab/RSEM.wdl b/tasks/skylab/RSEM.wdl
index cd148448de..2f74bf4de3 100644
--- a/tasks/skylab/RSEM.wdl
+++ b/tasks/skylab/RSEM.wdl
@@ -8,7 +8,7 @@ task RSEMExpression {
Boolean is_paired
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-rsem:v0.2.2-1.3.0"
+ String docker = "us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024"
Int machine_mem_mb = 32768
Int cpu = 4
# use provided disk number or dynamically size on our own, with 200GiB of additional disk
diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl
index fd4bb79764..74f2042306 100644
--- a/tasks/skylab/StarAlign.wdl
+++ b/tasks/skylab/StarAlign.wdl
@@ -6,7 +6,7 @@ task StarAlignBamSingleEnd {
File tar_star_reference
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-star:v0.2.2-2.5.3a-40ead6e"
+ String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884"
Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100
Int cpu = 16
# multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
@@ -72,7 +72,7 @@ task StarAlignFastqPairedEnd {
File tar_star_reference
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a"
+ String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884"
Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100
Int cpu = 16
# multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
@@ -137,7 +137,7 @@ task StarAlignFastqMultisample {
File tar_star_reference
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a"
+ String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884"
Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100
Int cpu = 16
# multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
@@ -221,7 +221,7 @@ task STARsoloFastq {
Boolean? count_exons
# runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a"
+ String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884"
Int machine_mem_mb = 64000
Int cpu = 8
# multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
@@ -378,60 +378,6 @@ task STARsoloFastq {
}
}
-task ConvertStarOutput {
-
- input {
- File barcodes
- File features
- File matrix
-
- #runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-python3-scientific:0.1.12"
- Int machine_mem_mb = 8250
- Int cpu = 1
- Int disk = ceil(size(matrix, "Gi") * 2) + 10
- Int preemptible = 3
- }
-
- meta {
- description: "Create three numpy formats for the barcodes, gene names and the count matrix from the STARSolo count matrix in mtx format."
- }
-
- parameter_meta {
- docker: "(optional) the docker image containing the runtime environment for this task"
- machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
- cpu: "(optional) the number of cpus to provision for this task"
- disk: "(optional) the amount of disk space (GiB) to provision for this task"
- preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine"
- }
-
- command {
- set -e
-
- # create the compresed raw count matrix with the counts, gene names and the barcodes
- python3 /tools/create-npz-output.py \
- --barcodes ~{barcodes} \
- --features ~{features} \
- --matrix ~{matrix}
-
- }
-
- runtime {
- docker: docker
- memory: "${machine_mem_mb} MiB"
- disks: "local-disk ${disk} HDD"
- cpu: cpu
- preemptible: preemptible
- }
-
- output {
- File row_index = "sparse_counts_row_index.npy"
- File col_index = "sparse_counts_col_index.npy"
- File sparse_counts = "sparse_counts.npz"
- }
-}
-
-
task MergeStarOutput {
input {
@@ -441,7 +387,7 @@ task MergeStarOutput {
String input_id
#runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-star:merge-star-outputs-v1.1.9"
+ String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
Int machine_mem_mb = 8250
Int cpu = 1
Int disk = ceil(size(matrix, "Gi") * 2) + 10
@@ -466,7 +412,7 @@ task MergeStarOutput {
declare -a matrix_files=(~{sep=' ' matrix})
# create the compressed raw count matrix with the counts, gene names and the barcodes
- python3 /tools/create-merged-npz-output.py \
+ python3 /usr/gitc/create-merged-npz-output.py \
--barcodes ${barcodes_files[@]} \
--features ${features_files[@]} \
--matrix ${matrix_files[@]} \
diff --git a/tasks/skylab/TagGeneExon.wdl b/tasks/skylab/TagGeneExon.wdl
deleted file mode 100644
index daffec5da0..0000000000
--- a/tasks/skylab/TagGeneExon.wdl
+++ /dev/null
@@ -1,121 +0,0 @@
-version 1.0
-
-task TagGeneExon {
- input {
- File annotations_gtf
- File bam_input
-
- # runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-dropseqtools:v0.2.2-1.13"
- Int machine_mem_mb = 8250
- Int cpu = 1
- Int disk = ceil((size(bam_input, "Gi") + size(annotations_gtf, "Gi")) * 3) + 20
- Int preemptible = 3
- }
-
- meta {
- description: "Tags any read in bam_input that overlaps an intron or exon interval with the gene that those interals correspond to."
- }
-
- parameter_meta {
- annotations_gtf: "GTF annotation file for the species that bam input is derived from. Each record must have a gene_name and transcript_name in addition to a gene_id and transcript_id, no white space at the end of any record and must be in gtf format."
- bam_input: "Aligned bam file."
- docker: "(optional) the docker image containing the runtime environment for this task"
- machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
- cpu: "(optional) the number of cpus to provision for this task"
- disk: "(optional) the amount of disk space (GiB) to provision for this task"
- preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine"
- }
-
- command {
- set -e
-
- TagReadWithGeneExon \
- INPUT=${bam_input} \
- OUTPUT=bam_with_gene_exon.bam \
- SUMMARY=gene_exon_tag_summary.log \
- TAG=GE \
- ANNOTATIONS_FILE=${annotations_gtf}
- }
-
- # Larger genomes (mouse-human) require a 7.5gb instance; single-organism genomes work with 3.75gb
- runtime {
- docker: docker
- memory: "${machine_mem_mb} MiB"
- disks: "local-disk ${disk} HDD"
- cpu: cpu
- preemptible: preemptible
- }
-
- output {
- File bam_output = "bam_with_gene_exon.bam"
- File log = "gene_exon_tag_summary.log"
- }
-}
-
-
-task TagReadWithGeneFunction {
- input {
- File annotations_gtf
- File bam_input
-
- String gene_name_tag = "gn"
- String gene_strand_tag = "gs"
- String gene_function_tag = "gf"
-
- String use_strand_info = "true"
-
- # runtime values
- String docker = "quay.io/humancellatlas/secondary-analysis-dropseqtools:2.3.0"
- Int machine_mem_mb = 8250
- Int cpu = 1
- Int disk = ceil((size(bam_input, "Gi") + size(annotations_gtf, "Gi")) * 3) + 20
- Int preemptible = 3
- }
-
- meta {
- description: "Tags any read in bam_input that overlaps an intron or exon interval with the gene that those interals correspond to."
- }
-
- parameter_meta {
- annotations_gtf: "GTF annotation file for the species that bam input is derived from. Each record must have a gene_name and transcript_name in addition to a gene_id and transcript_id, no white space at the end of any record and must be in gtf format."
- bam_input: "Aligned bam file."
- gene_name_tag: "the tag used to denote gene name in the bam (default: gn)"
- gene_strand_tag: "the tag used to denote gene strand in the bam (default: gs)"
- gene_function_tag: "the tag used to denote gene function (INTRONIC, EXONIC, ...) in the output bam (default: gf)"
-
- docker: "(optional) the docker image containing the runtime environment for this task"
- machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task"
- cpu: "(optional) the number of cpus to provision for this task"
- disk: "(optional) the amount of disk space (GiB) to provision for this task"
- preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine"
- }
-
- command {
- set -e
-
- TagReadWithGeneFunction \
- INPUT=${bam_input} \
- OUTPUT=bam_with_gene_exon.bam \
- GENE_NAME_TAG=${gene_name_tag} \
- GENE_STRAND_TAG=${gene_strand_tag} \
- GENE_FUNCTION_TAG=${gene_function_tag} \
- SUMMARY=gene_exon_tag_summary.log \
- ANNOTATIONS_FILE=${annotations_gtf} \
- USE_STRAND_INFO=${use_strand_info}
- }
-
- # Larger genomes (mouse-human) require a 7.5gb instance; single-organism genomes work with 3.75gb
- runtime {
- docker: docker
- memory: "${machine_mem_mb} MiB"
- disks: "local-disk ${disk} HDD"
- cpu: cpu
- preemptible: preemptible
- }
-
- output {
- File bam_output = "bam_with_gene_exon.bam"
- File log = "gene_exon_tag_summary.log"
- }
-}
diff --git a/tasks/skylab/TrimAdapters.wdl b/tasks/skylab/TrimAdapters.wdl
index ed5a0067e7..c74f3fdcd9 100644
--- a/tasks/skylab/TrimAdapters.wdl
+++ b/tasks/skylab/TrimAdapters.wdl
@@ -9,7 +9,7 @@ task TrimAdapters {
Array[String] input_ids
#runtime values
- String docker = "quay.io/humancellatlas/snss2-trim-adapters:0.1.0"
+ String docker = "us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665"
Int machine_mem_mb = 8250
Int cpu = 1
Int disk = ceil(2*(size(fastq1_input_files, "Gi") + size(fastq2_input_files, "Gi"))) + 10
diff --git a/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl b/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl
index 5f0f0a03d1..1e4d559be1 100644
--- a/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl
+++ b/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl
@@ -40,7 +40,7 @@ task BuildBWAreference {
>>>
runtime {
- docker: "quay.io/humancellatlas/snaptools:0.0.1"
+ docker: "us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463"
memory: "96GB"
disks: "local-disk 100 HDD"
cpu: "4"
diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala
index e68dcc2ca6..163957773d 100644
--- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala
+++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala
@@ -119,7 +119,7 @@ object PipelineTestType extends Enum[PipelineTestType] {
extends PipelineTestType(
"TestUltimaGenomicsWholeGenomeGermline",
"UltimaGenomicsWholeGenomeGermline",
- "/broad/dna_seq/germline/single_sample/UGWGS/"
+ "/broad/dna_seq/germline/single_sample/ugwgs/"
)
case object ValidateChip
extends PipelineTestType(
diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala
index 0a9e1ed220..c44f08eca0 100644
--- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala
+++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala
@@ -110,9 +110,9 @@ class BroadInternalRNAWithUMIsTester(
truth_output_bam = truthCloudPath.resolve(
s"$outputBaseName.duplicate_marked.coordinate_sorted.bam"),
test_transcriptome_bam = resultsCloudPath.resolve(
- s"$outputBaseName.transcriptome.duplicate_marked.bam"),
+ s"$outputBaseName.transcriptome_RSEM_post_processed.bam"),
truth_transcriptome_bam = truthCloudPath.resolve(
- s"$outputBaseName.transcriptome.duplicate_marked.bam"),
+ s"$outputBaseName.transcriptome_RSEM_post_processed.bam"),
test_gene_tpm =
resultsCloudPath.resolve(s"$outputBaseName.gene_tpm.gct.gz"),
truth_gene_tpm =
diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala
index f3982b3279..ba8a59a6a1 100644
--- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala
+++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala
@@ -68,7 +68,10 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)(
// All of our plumbing or scientific test inputs
protected lazy val inputFileNames: Seq[String] =
- workflowInputRoot.list.toSeq.map(_.name.toString)
+ workflowInputRoot.list
+ .filter(_.name.endsWith(".json"))
+ .toSeq
+ .map(_.name.toString)
// plumbing or scientific
protected val testTypeString: String =
@@ -129,7 +132,7 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)(
* Generate the run parameters for each testing sample
*/
def generateRunParameters: Seq[WorkflowRunParameters] = {
- workflowInputRoot.list.toSeq.map(_.name.toString).map { fileName =>
+ inputFileNames.map { fileName =>
val inputsName = fileName.replace(".json", "")
val resultsPath = resultsPrefix.resolve(s"$inputsName/")
val truthPath = truthPrefix.resolve(s"$inputsName/")
@@ -190,7 +193,7 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)(
/** Find any instance of the pipeline followed by . and replace with wrapper workflow
* e.g.
* Arrays. -> TestArrays.
- *
+ *
* This handles the case where the wrapper workflow is a substring of a nested input (CheckFingerprint CheckFingerprintTask)
*/
var inputsString = (workflowInputRoot / fileName).contentAsString
diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala
index cdd2e63095..9815629c1c 100644
--- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala
+++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala
@@ -90,9 +90,9 @@ class RNAWithUMIsTester(testerConfig: RNAWithUMIsConfig)(
truth_output_bam = truthCloudPath.resolve(
s"$outputBaseName.duplicate_marked.coordinate_sorted.bam"),
test_transcriptome_bam = resultsCloudPath.resolve(
- s"$outputBaseName.transcriptome.duplicate_marked.bam"),
+ s"$outputBaseName.transcriptome_RSEM_post_processed.bam"),
truth_transcriptome_bam = truthCloudPath.resolve(
- s"$outputBaseName.transcriptome.duplicate_marked.bam"),
+ s"$outputBaseName.transcriptome_RSEM_post_processed.bam"),
test_gene_tpm =
resultsCloudPath.resolve(s"$outputBaseName.gene_tpm.gct.gz"),
truth_gene_tpm =
diff --git a/tests/skylab/ATAC/pr/ValidateATAC.wdl b/tests/skylab/ATAC/pr/ValidateATAC.wdl
index 78292aec6f..9638c7e873 100644
--- a/tests/skylab/ATAC/pr/ValidateATAC.wdl
+++ b/tests/skylab/ATAC/pr/ValidateATAC.wdl
@@ -52,7 +52,7 @@ task ValidateATAC {
>>>
runtime {
- docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6"
+ docker: "us.gcr.io/broad-gotc-prod/samtools:1.0.0-1.11-1624651616"
cpu: 1
memory: "3.75 GB"
disks: "local-disk ${required_disk} HDD"
diff --git a/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl b/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl
deleted file mode 100644
index 1ce70f5b53..0000000000
--- a/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl
+++ /dev/null
@@ -1,24 +0,0 @@
-version 1.0
-
-task CompareAdapterFiles {
- input {
- File test_json
- File truth_json
- }
- command <<<
- set -eo pipefail
- diff "~{test_json}" "~{truth_json}"
-
- if [ $? -ne 0 ];
- then
- echo "Error: ${test_json} and ${truth_json} differ"
- fi
- >>>
-
- runtime {
- docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6"
- cpu: 1
- memory: "3.75 GiB"
- disks: "local-disk 10 HDD"
- }
-}
\ No newline at end of file
diff --git a/tests/skylab/optimus/4kpbmc/dependencies.json b/tests/skylab/optimus/4kpbmc/dependencies.json
deleted file mode 100644
index 376e9f186b..0000000000
--- a/tests/skylab/optimus/4kpbmc/dependencies.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
- "Optimus.wdl": "pipelines/skylab/optimus/Optimus.wdl",
- "ValidateOptimus.wdl": "tests/skylab/optimus/pr/ValidateOptimus.wdl",
- "StarAlignBamSingleEnd.wdl": "tasks/skylab/StarAlignBamSingleEnd.wdl",
- "FastqToUBam.wdl": "tasks/skylab/FastqToUBam.wdl",
- "Attach10xBarcodes.wdl": "tasks/skylab/Attach10xBarcodes.wdl",
- "SplitBamByCellBarcode.wdl": "tasks/skylab/SplitBamByCellBarcode.wdl",
- "TagGeneExon.wdl": "tasks/skylab/TagGeneExon.wdl",
- "CorrectUmiMarkDuplicates.wdl": "tasks/skylab/CorrectUmiMarkDuplicates.wdl",
- "MergeSortBam.wdl": "tasks/skylab/MergeSortBam.wdl",
- "CreateCountMatrix.wdl": "tasks/skylab/CreateCountMatrix.wdl",
- "SequenceDataWithMoleculeTagMetrics.wdl": "tasks/skylab/SequenceDataWithMoleculeTagMetrics.wdl",
- "TagSortBam.wdl": "tasks/skylab/TagSortBam.wdl",
- "RunEmptyDrops.wdl": "tasks/skylab/RunEmptyDrops.wdl",
- "LoomUtils.wdl": "tasks/skylab/LoomUtils.wdl",
- "Picard.wdl": "tasks/skylab/Picard.wdl",
- "UmiCorrection.wdl": "tasks/skylab/UmiCorrection.wdl",
- "ScatterBam.wdl": "tasks/skylab/ScatterBam.wdl",
- "ModifyGtf.wdl": "tasks/skylab/ModifyGtf.wdl",
- "OptimusInputChecks.wdl": "tasks/skylab/OptimusInputChecks.wdl"
-}
diff --git a/tests/skylab/scATAC/pr/ValidateSCATAC.wdl b/tests/skylab/scATAC/pr/ValidateSCATAC.wdl
index c005143b1d..deaa8dd945 100644
--- a/tests/skylab/scATAC/pr/ValidateSCATAC.wdl
+++ b/tests/skylab/scATAC/pr/ValidateSCATAC.wdl
@@ -48,7 +48,7 @@ task ValidateSCATAC {
>>>
runtime {
- docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6"
+ docker: "us.gcr.io/broad-gotc-prod/samtools:1.0.0-1.11-1624651616"
cpu: 1
memory: "3.75 GB"
disks: "local-disk ${required_disk} HDD"
diff --git a/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl b/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl
index ad6d488dd4..cdfbbeb4ed 100644
--- a/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl
+++ b/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl
@@ -13,12 +13,12 @@ task ValidateSmartSeq2Plate {
# catch intermittent failures
set -eo pipefail
- python3 /tools/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10
+ python3 /usr/gitc/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10
>>>
runtime {
- docker: "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.3-fk-2"
+ docker: "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
cpu: 1
memory: "8 GiB"
disks: "local-disk 1${disk_size} HDD"
diff --git a/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl b/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl
index 2de15c9ec7..80880c2b3e 100644
--- a/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl
+++ b/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl
@@ -16,7 +16,7 @@ task ValidateSnSmartSeq2 {
set -eo pipefail
#compare looms
- python3 /tools/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10
+ python3 /usr/gitc/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10
# calculate hashes; awk is used to extract the hash from the md5sum output that contains both
# a hash and the filename that was passed. We parse the first 7 columns because a bug in RSEM
@@ -32,7 +32,7 @@ task ValidateSnSmartSeq2 {
>>>
runtime {
- docker: "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.3-fk-2"
+ docker: "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730"
cpu: 1
memory: "8 GB"
disks: "local-disk 1${disk_size} HDD"
diff --git a/verification/VerifyImputation.wdl b/verification/VerifyImputation.wdl
index ba36b60d7e..915044f1b2 100644
--- a/verification/VerifyImputation.wdl
+++ b/verification/VerifyImputation.wdl
@@ -43,7 +43,7 @@ workflow VerifyImputation {
Boolean? done
}
- String bcftools_docker_tag = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623"
+ String bcftools_docker_tag = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207"
scatter (idx in range(length(truth_metrics))) {
call CompareImputationMetrics {
diff --git a/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl b/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl
index 9e9ff34050..a073bbcecd 100644
--- a/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl
+++ b/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl
@@ -23,7 +23,6 @@ workflow TestBroadInternalRNAWithUMIs {
String sequencing_center = "BI"
String? tdr_dataset_uuid
String? tdr_sample_id
- String? tdr_staging_bucket
# These values will be determined and injected into the inputs by the scala test framework
String truth_path
@@ -55,7 +54,6 @@ workflow TestBroadInternalRNAWithUMIs {
sequencing_center = sequencing_center,
tdr_dataset_uuid = tdr_dataset_uuid,
tdr_sample_id = tdr_sample_id,
- tdr_staging_bucket = tdr_staging_bucket,
environment = environment,
vault_token_path = vault_token_path_arrays
@@ -76,7 +74,6 @@ workflow TestBroadInternalRNAWithUMIs {
BroadInternalRNAWithUMIs.rnaseqc2_gene_tpm,
BroadInternalRNAWithUMIs.output_bam_index,
BroadInternalRNAWithUMIs.output_bam,
- BroadInternalRNAWithUMIs.transcriptome_bam_index,
BroadInternalRNAWithUMIs.transcriptome_bam,
],
diff --git a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl
index 6b47add22b..14ec5771f8 100644
--- a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl
+++ b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl
@@ -73,7 +73,6 @@ workflow TestRNAWithUMIsPipeline {
Array[String] pipeline_outputs = select_all([
RNAWithUMIsPipeline.transcriptome_bam,
- RNAWithUMIsPipeline.transcriptome_bam_index,
RNAWithUMIsPipeline.output_bam,
RNAWithUMIsPipeline.output_bam_index,
RNAWithUMIsPipeline.rnaseqc2_gene_tpm,
diff --git a/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md b/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md
index a2b032b6f8..61f80a0a89 100644
--- a/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md
+++ b/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [CEMBA_v1.1.0](https://github.com/broadinstitute/warp/releases) | February, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [CEMBA_v1.1.0](https://github.com/broadinstitute/warp/releases) | February, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![CEMBA](./CEMBA.png)
@@ -183,11 +183,11 @@ Please identify the pipeline in your methods section using the CEMBA Pipeline's
## Consortia Support
This pipeline is supported and used by the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN).
-If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Have Suggestions?
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md
index f37867a91f..d13fd2f02d 100644
--- a/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [ExomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/releases?q=ExomeGermlineSingleSample_v3.0.0&expanded=true) | November, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [ExomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/releases?q=ExomeGermlineSingleSample_v3.0.0&expanded=true) | November, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
The Exome Germline Single Sample pipeline implements data pre-processing and initial variant calling according to the GATK Best Practices for germline SNP and Indel discovery in human exome sequencing data.
diff --git a/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md b/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md
index d29c436db1..b04313d299 100644
--- a/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md
+++ b/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [GDCWholeGenomeSomaticSingleSample_v1.0.1](https://github.com/broadinstitute/warp/releases) | January, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [GDCWholeGenomeSomaticSingleSample_v1.0.1](https://github.com/broadinstitute/warp/releases) | January, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
## Introduction to the GDC Whole Genome Somatic Single Sample pipeline
@@ -114,7 +114,7 @@ Alternatively, Cromwell allows you to specify an output directory using an optio
- Please visit the [GATK Technical Documentation](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591) site for further documentation on GATK-related workflows and tools.
## Contact us
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
## Licensing
diff --git a/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md b/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md
index 2e58261ae3..84e524016d 100644
--- a/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md
+++ b/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [Version 1.11.6](https://github.com/broadinstitute/warp/releases) | October, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [Version 1.11.6](https://github.com/broadinstitute/warp/releases) | October, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![The Illumina Genotyping Array Pipeline](./IlluminaGenotyping.png)
@@ -238,7 +238,7 @@ The Illumina Genotyping Array Pipeline is available on the cloud-based platform
## Feedback and questions
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Imputation_Pipeline/README.md b/website/docs/Pipelines/Imputation_Pipeline/README.md
index 900ac4251c..d0c24b1d9b 100644
--- a/website/docs/Pipelines/Imputation_Pipeline/README.md
+++ b/website/docs/Pipelines/Imputation_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [Imputation_v1.0.0](https://github.com/broadinstitute/warp/releases?q=Imputation_v1.0.0&expanded=true) | August, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [Imputation_v1.0.0](https://github.com/broadinstitute/warp/releases?q=Imputation_v1.0.0&expanded=true) | August, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
## Introduction to the Imputation pipeline
The Imputation pipeline imputes missing genotypes from either a multi-sample VCF or an array of single sample VCFs using a large genomic reference panel. It is based on the [Michigan Imputation Server pipeline](https://imputationserver.readthedocs.io/en/latest/pipeline/). Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF along with key imputation metrics.
@@ -140,7 +140,7 @@ The pipeline is cost-optimized for between 100 and 1,000 samples, where the cost
## Contact us
-Help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
## Licensing
diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md
index 574cf3b18d..6ac531aed9 100644
--- a/website/docs/Pipelines/Optimus_Pipeline/README.md
+++ b/website/docs/Pipelines/Optimus_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [optimus_v5.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [optimus_v5.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![Optimus_diagram](Optimus_diagram.png)
@@ -276,11 +276,11 @@ This pipeline is supported and used by the [Human Cell Atlas](https://www.humanc
Each consortia may use slightly different reference files for data analysis or have different post-processing steps. Learn more by reading the [Consortia Processing](./consortia-processing.md) overview.
-If your organization also uses this pipeline, we would like to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would like to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Feedback
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WAPR team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
## FAQs
diff --git a/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md b/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md
index c6c357f467..3d22604e04 100644
--- a/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md
+++ b/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Authors | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [RNAWithUMIsPipeline_v1.0.6](https://github.com/broadinstitute/warp/releases?q=RNAwithUMIs&expanded=true) | April, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [RNAWithUMIsPipeline_v1.0.6](https://github.com/broadinstitute/warp/releases?q=RNAwithUMIs&expanded=true) | April, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![RNAWithUMIs_diagram](rna-with-umis_diagram.png)
@@ -270,6 +270,6 @@ All RNA with UMIs pipeline releases are documented in the [pipeline changelog](h
## Feedback
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
\ No newline at end of file
diff --git a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md
index 99351bb089..526f284b0a 100644
--- a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md
+++ b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [scATAC 1.2.0 ](https://github.com/broadinstitute/warp/releases) | January 04 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [scATAC 1.2.0 ](https://github.com/broadinstitute/warp/releases) | January 04 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![scATAC_diagram](./scATAC_diagram.png)
@@ -162,8 +162,8 @@ Please identify the pipeline in your methods section using the scATAC Pipeline's
## Consortia Support
This pipeline is supported and used by the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN).
-If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Pipeline Improvements
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md
index 2a201d30d5..f6568530ad 100644
--- a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [MultiSampleSmartSeq2_v2.2.1](https://github.com/broadinstitute/warp/releases) | May, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [MultiSampleSmartSeq2_v2.2.1](https://github.com/broadinstitute/warp/releases) | May, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
## Introduction
@@ -104,9 +104,9 @@ Please identify the pipeline in your methods section using the Smart-seq2 Multi-
## Consortia Support
This pipeline is supported and used by the [Human Cell Atlas](https://www.humancellatlas.org/) (HCA) project.
-If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Have Suggestions?
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
index 147e23facc..14ee04fbf8 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [MultiSampleSmartSeq2SingleNuclei_v1.2.2](https://github.com/broadinstitute/warp/releases) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [MultiSampleSmartSeq2SingleNuclei_v1.2.2](https://github.com/broadinstitute/warp/releases) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![](./snSS2.png)
@@ -179,7 +179,7 @@ This pipeline is supported and used by the [BRAIN Initiative Cell Census Network
Each consortia may use slightly different reference files for data analysis or have different post-processing steps. Learn more by reading the [Consortia Processing](./consortia-processing.md) overview.
-If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Feedback
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
\ No newline at end of file
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
\ No newline at end of file
diff --git a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md
index 5dfa83220e..5b27c8fa1e 100644
--- a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [smartseq2_v5.1.1](https://github.com/broadinstitute/warp/releases) | December, 2020 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [smartseq2_v5.1.1](https://github.com/broadinstitute/warp/releases) | December, 2020 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![](./smartseq_image.png)
@@ -216,7 +216,7 @@ Please identify the SS2 pipeline in your methods section using the Smart-seq2 Si
## Consortia Support
This pipeline is supported and used by the [Human Cell Atlas](https://www.humancellatlas.org/) (HCA) project.
-If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org).
+If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org).
## Have Suggestions?
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md b/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md
index 2bfde8ca34..5a66f8663c 100644
--- a/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md
+++ b/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md
@@ -6,7 +6,7 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Authors | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| [UltimaGenomicsWholeGenomeGermline_v1.0.0](https://github.com/broadinstitute/warp/releases) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| [UltimaGenomicsWholeGenomeGermline_v1.0.0](https://github.com/broadinstitute/warp/releases) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [the wARP team](mailto:warp-pipelines-help@broadinstitute.org) |
![UG_diagram](ug_diagram.png)
@@ -275,7 +275,7 @@ All UG_WGS pipeline releases are documented in the [pipeline changelog](https://
## Feedback
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
diff --git a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md
index ead4636987..bcd06b1d32 100644
--- a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md
+++ b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md
@@ -6,9 +6,9 @@ sidebar_position: 1
| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |
| :----: | :---: | :----: | :--------------: |
-| WholeGenomeGermlineSingleSample_v3.1.2 (see [releases page](https://github.com/broadinstitute/warp/releases)) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) |
+| WholeGenomeGermlineSingleSample_v3.1.6 (see [releases page](https://github.com/broadinstitute/warp/releases)) | August, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) |
-## Introduction to the Whole Genome Germline Single Sample Pipeline
+## Introduction to the Whole Genome Germline Single Sample Pipeline
The Whole Genome Germline Single Sample (WGS) pipeline implements data pre-processing and initial variant calling according to the GATK Best Practices for germline SNP and Indel discovery in human whole-genome sequencing data. It includes the DRAGEN-GATK mode, which makes the pipeline functionally equivalent to DRAGEN’s analysis pipeline (read more in this [DRAGEN-GATK blog](https://gatk.broadinstitute.org/hc/en-us/articles/360039984151)).
@@ -76,7 +76,7 @@ The latest release of the workflow, example data, and dependencies are available
### Input descriptions
The tables below describe each of the WGS pipeline inputs and reference files.
-Examples of how to specify each input can be found in the example [input configuration files (JSONs)](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files).
+Examples of how to specify each input can be found in the example [input configuration files (JSONs)](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files).
Multiple references are imported as part of a struct from the [DNASeqStruct WDL](https://github.com/broadinstitute/warp/blob/master/structs/dna_seq/DNASeqStructs.wdl), which is located in the WARP [structs library](https://github.com/broadinstitute/warp/tree/master/structs). For references that are part of a struct, the tables below list the relevant struct’s name.
@@ -91,7 +91,7 @@ Overall, the workflow has the following input requirements:
* Reference genome must be Hg38 with ALT contigs
#### Struct inputs
-The following table describes the inputs imported from a struct. Although these are specified in the WGS workflow using the struct name, the actual inputs for each struct are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files).
+The following table describes the inputs imported from a struct. Although these are specified in the WGS workflow using the struct name, the actual inputs for each struct are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files).
| Input name | Struct name (alias) | Input description | Input type |
@@ -114,7 +114,7 @@ The following table describes the inputs imported from a struct. Although these
| agg_preemptible_tries | PapiSettings (papi_settings) | Number of preemtible machine tries for the BamtoCram task. | Int |
#### Additional inputs
-Additional inputs that are not contained in a struct are described in the table below. Similar to the struct inputs, these inputs are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files) or, when noted, are hardcoded into the WDL workflow.
+Additional inputs that are not contained in a struct are described in the table below. Similar to the struct inputs, these inputs are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files) or, when noted, are hardcoded into the WDL workflow.
* Optional inputs, like the fingerprint_genotypes_file, need to match your input samples. For example, the fingerprint file in the workflow's [test input configuration JSON](https://github.com/broadinstitute/warp/blob/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json) is set up to check fingerprints for the NA12878 Plumbing sample. The sample name in the VCF matches the name used for the `sample_name` input.
@@ -171,7 +171,7 @@ The table below details the subtasks called by the UnmappedBamToAlignedBam task,
| [Processing.SortSam](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | SortSam | Picard | Sorts the aggregated BAM by coordinate sort order. |
| [QC.CrossCheckFingerprints (CrossCheckFingerprints)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/Qc.wdl) | CrosscheckFingerprints | Picard | Optionally checks fingerprints if haplotype database is provided. |
| [Utils.CreateSequenceGroupingTSV (CreateSequenceGroupingTSV)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/Utilities.wdl) | --- | python | Creates the sequencing groupings used for BQSR and PrintReads Scatter. |
-| [Processing.CheckContamination](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | VerifyBamID | --- | Checks cross-sample contamination prior to variant calling. |
+| [Processing.CheckContamination](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | VerifyBamID2 | --- | Checks cross-sample contamination prior to variant calling. |
| [Processing.BaseRecalibrator (BaseRecalibrator)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | BaseRecalibrator | GATK | If `perform_bqsr` is true, performs base recalibration by interval. When using the DRAGEN-GATK mode, `perform_bqsr` is optionally false as base calling errors are corrected in the DRAGEN variant calling step.|
| [Processing.GatherBqsrReports (GatherBqsrReports)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | GatherBQSRReports | GATK | Merges the BQSR reports resulting from by-interval calibration. |
| [Processing.ApplyBQSR (ApplyBQSR)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | ApplyBQSR | GATK | Applies the BQSR base recalibration model by interval. |
@@ -276,7 +276,7 @@ The table below describes the final workflow outputs. If running the workflow on
| read_group_gc_bias_pdf | PDF of the GC bias by readgroup for the aggregated BAM. | File |
| read_group_gc_bias_summary_metrics | GC bias summary metrics by readgroup for the aggregated BAM. | File |
| cross_check_fingerprints_metrics | Fingerprint metrics file if optional fingerprinting is performed. | File |
-| selfSM | Contamination estimate from VerifyBamID. | File |
+| selfSM | Contamination estimate from VerifyBamID2. | File |
| contamination | Estimated contamination from the CheckContamination task. | Float |
| calculate_read_group_checksum_md5 | MD5 checksum for aggregated BAM. | File |
| agg_alignment_summary_metrics | Alignment summary metrics for the aggregated BAM. | File |
@@ -318,7 +318,7 @@ As of November 2021, reblocking is a default task in the WGS pipeline. To skip r
"WholeGenomeGermlineSingleSample.BamToGvcf.skip_reblocking": true
```
-The [Reblocking task](https://github.com/broadinstitute/warp/blob/develop/tasks/broad/GermlineVariantDiscovery.wdl) uses the GATK ReblockGVCF tool with the arguments:
+The [Reblocking task](https://github.com/broadinstitute/warp/blob/master/tasks/broad/GermlineVariantDiscovery.wdl) uses the GATK ReblockGVCF tool with the arguments:
```WDL
-do-qual-approx -floor-blocks -GQB 20 -GQB 30 -GQB 40
@@ -371,7 +371,7 @@ The final CRAM files have base quality scores binned according to the [Functiona
## Contact us
-Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions.
+Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions.
## Licensing
diff --git a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md
index 255a5f87ae..dd19f5bd03 100644
--- a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md
+++ b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md
@@ -2,17 +2,17 @@
sidebar_position: 2
---
-# Whole Genome Germline Single Sample v3.0.0 Methods (Default workflow)
+# Whole Genome Germline Single Sample v3.1.6 Methods (Default workflow)
The following contains a detailed methods description outlining the pipeline’s process, software, and tools that can be modified for a publication methods section.
## Detailed methods for the default Whole Genome Germline Single Sample workflow
-Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample 3.0.0 pipeline using Picard 2.23.8, GATK 4.2.2.0, and Samtools 1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline follows GATK Best Practices as previously described ([Van der Auwera & O'Connor, 2020](https://www.oreilly.com/library/view/genomics-in-the/9781491975183/)) as well as the Functional Equivalence specification ([Regier et al., 2018](https://www.nature.com/articles/s41467-018-06159-4)).
+Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample v3.1.6 pipeline using Picard v2.26.10, GATK v4.2.6.1, and Samtools v1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline follows GATK Best Practices as previously described ([Van der Auwera & O'Connor, 2020](https://www.oreilly.com/library/view/genomics-in-the/9781491975183/)) as well as the Functional Equivalence specification ([Regier et al., 2018](https://www.nature.com/articles/s41467-018-06159-4)).
### Pre-processing and quality control metrics
-Whole genome paired-end reads in unmapped BAM (uBAM) format were first scattered to perform QC and alignment in parallel. Quality metrics were calculated using Picard CollectQualityYieldMetrics. uBAMs were converted to FASTQ using Picard SamToFastq and aligned to the Hg38 reference genome using BWA mem 0.7.15 with batch size set using -K 100000000. Metadata from the uBAMs was then merged with the aligned BAMs using Picard MergeBamAlignment with the parameters --SORT_ORDER="unsorted", allowing the data to be grouped by read name for efficient downstream marking of duplicates, and --UNMAP_CONTAMINANT_READS=true, to remove cross-species contamination.
+Whole genome paired-end reads in unmapped BAM (uBAM) format were first scattered to perform QC and alignment in parallel. Quality metrics were calculated using Picard CollectQualityYieldMetrics. uBAMs were converted to FASTQ using Picard SamToFastq and aligned to the Hg38 reference genome using BWA mem v0.7.15 with batch size set using -K 100000000. Metadata from the uBAMs was then merged with the aligned BAMs using Picard MergeBamAlignment with the parameters --SORT_ORDER="unsorted", allowing the data to be grouped by read name for efficient downstream marking of duplicates, and --UNMAP_CONTAMINANT_READS=true, to remove cross-species contamination.
QC metrics (base distribution by cycle, insert size metrics, mean quality by cycle, and quality score distribution) were collected for the aligned, unsorted read-groups using Picard CollectMultipleMetrics. The read-group specific aligned BAMs were then aggregated and duplicate reads were flagged using Picard MarkDuplicates assuming queryname-sorted order and the parameter --OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500, which is appropriate for patterned flowcells.
@@ -34,7 +34,7 @@ The pipeline’s final outputs included metrics, validation reports, an aligned
## Detailed methods for the Functional Equivalence mode of the Whole Genome Germline Single Sample workflow
-Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample 3.0.0 pipeline using Picard 2.23.8, GATK 4.2.2.0, and Samtools 1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline is functionally equivalent (as described in GATK Support: https://gatk.broadinstitute.org/hc/en-us/articles/4410456501915) to DRAGEN version 3.4.12.
+Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample v3.1.6 pipeline using v2.26.10, GATK v4.2.6.1, and Samtools v1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline is functionally equivalent (as described in GATK Support: https://gatk.broadinstitute.org/hc/en-us/articles/4410456501915) to DRAGEN v3.4.12.
### Pre-processing and quality control metrics
@@ -57,5 +57,6 @@ Prior to variant calling, the DRAGEN STR model was calibrated using the Calibrat
The pipeline’s final outputs included metrics, validation reports, an aligned CRAM with index, and a reblocked GVCF containing variant calls with an accompanying index.
## Previous methods documents
+- [WholeGenomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v3.0.0/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md)
- [WholeGenomeGermlineSingleSample_v2.5.0](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v2.5.0/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md)
- [WholeGenomeGermlineSingleSample_v2.3.7](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v2.3.7/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md)