diff --git a/.pullapprove.yml b/.pullapprove.yml index 0b9e694ebf..315688e47c 100644 --- a/.pullapprove.yml +++ b/.pullapprove.yml @@ -47,6 +47,7 @@ groups: 'tasks/broad/IlluminaGenotypingArrayTasks.wdl' in files or 'tasks/broad/InternalArraysTasks.wdl' in files or 'tasks/broad/InternalTasks.wdl' in files or + 'tasks/broad/Qc.wdl' in files or 'tasks/broad/Utilities.wdl' in files or 'verification/VerifyArrays.wdl' in files or 'verification/VerifyIlluminaGenotypingArray.wdl' in files or @@ -138,6 +139,8 @@ groups: 'tasks/broad/BamProcessing.wdl' in files or 'tasks/broad/BamToCram.wdl' in files or 'tasks/broad/CopyFilesFromCloudToCloud.wdl' in files or + 'tasks/broad/DragenTasks.wdl' in files or + 'tasks/broad/DragmapAlignment.wdl' in files or 'tasks/broad/GermlineVariantDiscovery.wdl' in files or 'tasks/broad/Qc.wdl' in files or 'tasks/broad/SplitLargeReadGroup.wdl' in files or @@ -148,6 +151,7 @@ groups: 'verification/VerifyReprocessing.wdl' in files or 'verification/VerifyTasks.wdl' in files or 'pipelines/broad/dna_seq/germline/single_sample/exome' in files or + 'pipelines/broad/dna_seq/germline/single_sample/ugwgs' in files or 'pipelines/broad/dna_seq/germline/single_sample/wgs' in files or 'pipelines/broad/reprocessing/cram_to_unmapped_bams' in files or 'pipelines/broad/reprocessing/exome' in files or @@ -160,7 +164,7 @@ groups: request: 2 reviewers: users: - - ldgauthier # Laura Gauthier + - samuelklee # Samuel Lee - kachulis # Chris Kachulis scientific_owners_joint_genotyping: @@ -173,13 +177,19 @@ groups: 'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome/JointGenotypingByChromosomePartOne.wdl' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome/JointGenotypingByChromosomePartTwo.wdl' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' in files or + 'tasks/broad/GermlineVariantDiscovery.wdl' in files or 'tasks/broad/JointGenotypingTasks.wdl' in files or + 'tasks/broad/Qc.wdl' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.changelog.md' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.options.json' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.wdl' in files or + 'pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotypingOnReblockedValidate.md' in files or + 'pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/by_chromosome' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/exome' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking' in files or + 'pipelines/broad/dna_seq/germline/joint_genotyping/test_data_overview.md' in files or + 'pipelines/broad/dna_seq/germline/joint_genotyping/test_inputs' in files or 'pipelines/broad/dna_seq/germline/joint_genotyping/wgs' in files reviews: @@ -189,7 +199,7 @@ groups: request_order: given reviewers: users: - - ldgauthier # Laura Gauthier + - samuelklee # Samuel Lee scientific_owners_somatic_single_sample: conditions: @@ -203,6 +213,7 @@ groups: 'tasks/broad/Alignment.wdl' in files or 'tasks/broad/BamProcessing.wdl' in files or 'tasks/broad/BamToCram.wdl' in files or + 'tasks/broad/DragmapAlignment.wdl' in files or 'tasks/broad/Qc.wdl' in files or 'tasks/broad/SplitLargeReadGroup.wdl' in files or 'tasks/broad/UnmappedBamToAlignedBam.wdl' in files or @@ -235,7 +246,8 @@ groups: 'pipelines/broad/arrays/imputation/Imputation.changelog.md' in files or 'pipelines/broad/arrays/imputation/Imputation.options.json' in files or 'pipelines/broad/arrays/imputation/Imputation.wdl' in files or - 'pipelines/broad/arrays/imputation/example_inputs.json' in files + 'pipelines/broad/arrays/imputation/example_inputs.json' in files or + 'pipelines/broad/arrays/imputation/test_inputs' in files reviews: required: 1 @@ -262,6 +274,8 @@ groups: 'tasks/broad/Alignment.wdl' in files or 'tasks/broad/BamProcessing.wdl' in files or 'tasks/broad/BamToCram.wdl' in files or + 'tasks/broad/DragenTasks.wdl' in files or + 'tasks/broad/DragmapAlignment.wdl' in files or 'tasks/broad/GermlineVariantDiscovery.wdl' in files or 'tasks/broad/Qc.wdl' in files or 'tasks/broad/SplitLargeReadGroup.wdl' in files or diff --git a/beta-pipelines/skylab/ATAC/ATAC.wdl b/beta-pipelines/skylab/ATAC/ATAC.wdl index c1c791e1ba..d61d8ba9de 100644 --- a/beta-pipelines/skylab/ATAC/ATAC.wdl +++ b/beta-pipelines/skylab/ATAC/ATAC.wdl @@ -230,7 +230,7 @@ task BWAPairedEndAlignment { String read_group_sample_name Int cpu String output_base_name - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463" } parameter_meta { @@ -241,7 +241,7 @@ task BWAPairedEndAlignment { read_group_sample_name: "the read group sample to be added upon alignment" cpu: "the number of cpu cores to use during alignment" output_base_name: "basename to be used for the output of the task" - docker_image: "the docker image using BWA to be used (default: quay.io/humancellatlas/snaptools:0.0.1)" + docker_image: "the docker image using BWA to be used (default: us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730)" } # runtime requirements based upon input file size @@ -580,7 +580,7 @@ task SnapPre { String genome_name Int max_fragment_length File genome_size_file - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602" } parameter_meta { @@ -589,7 +589,7 @@ task SnapPre { genome_name: "the name of the genome being analyzed" max_fragment_length: "the maximum fragment length for filtering out reads by snap-pre (snaptools task)" genome_size_file: "size for the chromoomes for the genome; ex: mm10.chrom.size" - docker_image: "the docker image using snaptools to be used (default: quay.io/humancellatlas/snaptools:0.0.1)" + docker_image: "the docker image using snaptools to be used (default: us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602)" } String snap_file_output_name = output_base_name + ".snap" @@ -635,14 +635,14 @@ task SnapCellByBin { File snap_input String bin_size_list String snap_output_name = "output.snap" - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602" } parameter_meta { snap_input: "the bam to passed into snaptools tools" bin_size_list: "space separated list of bins to generate" snap_output_name: "output.snap" - docker_image: "the docker image to be used (default: quay.io/humancellatlas/snaptools:0.0.1)" + docker_image: "the docker image to be used (default: us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602)" } Int num_threads = 1 @@ -673,13 +673,13 @@ task MakeCompliantBAM { input { File bam_input String output_base_name - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" } parameter_meta { bam_input: "the bam with barcodes in the read ids that need to be converted to barcodes in bam tags" output_base_name: "base name to be used for the output of the task" - docker_image: "the docker image using the python script to convert the bam barcodes/read ids (default: quay.io/humancellatlas/snaptools:0.0.1)" + docker_image: "the docker image using the python script to convert the bam barcodes/read ids (default: us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730)" } Int disk_size = ceil(2.5 * (if size(bam_input, "GiB") < 1 then 1 else size(bam_input, "GiB"))) @@ -687,7 +687,7 @@ task MakeCompliantBAM { String compliant_bam_output_name = output_base_name + ".compliant.bam" command { - makeCompliantBAM.py \ + /usr/gitc/makeCompliantBAM.py \ --input-bam ~{bam_input} \ --output-bam ~{compliant_bam_output_name} } @@ -707,7 +707,7 @@ task MakeCompliantBAM { task BreakoutSnap { input { File snap_input - String docker_image = "quay.io/humancellatlas/snap-breakout:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" String bin_size_list } Int num_threads = 1 @@ -715,7 +715,7 @@ task BreakoutSnap { command { set -euo pipefail mkdir output - breakoutSnap.py --input ~{snap_input} \ + /usr/gitc/breakoutSnap.py --input ~{snap_input} \ --output-prefix output/ } output { diff --git a/dockers/README.md b/dockers/README.md index 7a9df45686..e8c9ed7da0 100644 --- a/dockers/README.md +++ b/dockers/README.md @@ -8,6 +8,7 @@ This style guide provides formatting guidelines and best practices for writing D * [Goals](#goals) * [Small images](#small) * [Alpine base](#alpine) + * [Specifying image platform](#platform) * [Minimal RUN steps](#minimal-run) * [Publicly accessible](#publicly) * [Image scanning](#scanning) @@ -15,7 +16,7 @@ This style guide provides formatting guidelines and best practices for writing D * [Proper process reaping](#process) * [Build Scripts and README](#build) * [Formatting](#formatting) -* [Troubleshooting](#trouble) +* [Troubleshooting and running standalone](#trouble) ## Overview WARP maintains a collection of docker images which are used as execution environments for various cloud-optimized data processing pipelines. Many of these image require specific sets of tools and dependencies to run and can be thought of as _custom_ images rather than traditional application images. @@ -35,7 +36,7 @@ The easiest way to have a small image is to use an [Alpine](https://alpinelinux. Along with being a small base, Alpine also has built in deletion of package index and provides [tini](https://github.com/krallin/tini) natively through APK. -There are some instances where a Debian base image is unavoidable, specifically in the case where dependencies don't exists in APK. It is suggested that you only go to a Debian base as a last resort. +There are some instances where a Debian base image is unavoidable, specifically in the case where dependencies don't exist in APK. It is suggested that you only go to a Debian base as a last resort. ##### :eyes: Example @@ -62,12 +63,23 @@ RUN set -eux; \ bash \ ``` +#### Specifying image platform + +Docker images built on ARM-based machines such as the new M-series Macs may run into execution issues with our automated PR test suite. +One way to avoid these issues is to use a `linux/amd64` base image by including the `--platform="linux/amd64` flag after the `FROM` keyword. + +##### :eyes: Example +```dockerfile +# Use the amd64 version of alpine +FROM --platform="linux/amd64" alpine +``` + #### Minimal RUN steps Having minimal `RUN`steps (ideally one) is another highly effective way to reduce the size of your image. Each instruction in a Dockerfile creates a [layer](https://docs.docker.com/storage/storagedriver/) and these layers are what add up to build the final image. -When you use multple `RUN` steps it creates additional unnecessary layers and bloats your image. +When you use multiple `RUN` steps it creates additional unnecessary layers and bloats your image. -An alternative to having a single `RUN` step is to use [multi-stage builds](https://docs.docker.com/develop/develop-images/multistage-build/) which are effective when the application your are containerizing is just a statically linked binary. +An alternative to having a single `RUN` step is to use [multi-stage builds](https://docs.docker.com/develop/develop-images/multistage-build/) which are effective when the application you are containerizing is just a statically linked binary. Just to note, many of the images maintained in WARP require a handful of system-level dependencies and custom packages so multi-stages builds are typically not used. ##### :eyes: Example @@ -84,7 +96,7 @@ RUN set -eux; \ apk add --no-cache \ curl \ bash \ - ; \ + ; \ wget https://www.somezipfile.com/zip; \ unzip zip ``` @@ -96,22 +108,22 @@ The pipelines that we maintain in WARP are designed for public use, ideally we w * Anybody can pull our images * Anybody can build our images -For anybody to be able to pull our images they must be hosted on a public container registry, we host all of our images in publics repos on GCR (our 'official' location) and Quay (for discoverability). +For anybody to be able to pull our images they must be hosted on a public container registry, we host all of our images in public repos on GCR (our 'official' location) and Quay (for discoverability). * GCR - `us.gcr.io/broad-gotc-prod` * Quay - `quay.io/broadinstitute/broad-gotc-prod` -For anybody to be able to build our images all of the functionality should be encapsulated in the Dockerfile. Any custom software packages, dependencies etc. have to be downloaded from public links within the Dockerfile, this obviously means that we should not be copying files from within the Broad network infrastucture into our images. +For anybody to be able to build our images, all functionality should be encapsulated in the Dockerfile. Any custom software packages, dependencies etc. have to be downloaded from public links within the Dockerfile, this obviously means that we should not be copying files from within the Broad network infrastructure into our images. ### Image scanning -All of the images that we build are scanned for critical vulnerabilities on every pull request. For this we use a github-action that leverages [trivy](https://github.com/aquasecurity/trivy) for scanning. If you build a new image please add it to the action [here](../.github/workflows/trivy.yml). +All images that we build are scanned for critical vulnerabilities on every pull request. For this we use a github-action that leverages [trivy](https://github.com/aquasecurity/trivy) for scanning. If you build a new image please add it to the action [here](../.github/workflows/trivy.yml). ### Semantic tagging -We recommend against using rolling tags like `master` or `latest` when building images. Rolling tags make it hard to track down versions of images since the underlying image hash and content could be different across the same tags. Instead we ask that you use a semantic tag that follows the convention below: +We recommend against using rolling tags like `master` or `latest` when building images. Rolling tags make it hard to track down versions of images since the underlying image hash and content could be different across the same tags. Instead, we ask that you use a semantic tag that follows the convention below: ##### `us.gcr.io/broad-gotc-prod/samtools:--` @@ -120,7 +132,7 @@ This example is for an image we use containing `samtools`. The 'image-version' i ### Proper process reaping -Classic init systems like systemd are used to reap orphaned, zombie processes. Typically these orphaned processes are reattached to the process at PID 1 which will reap them when they die. In a container this responsibility falls to process at PID 1 which is by default `/bin/sh`...this obviously will not handle process reaping. Because of this you run the risk of expending excess memory or resources within your container. A simple solution to this is to use `tini` in all of our images, a lengthy explanation of what this package does can be found [here](https://github.com/krallin/tini/issues/8). +Classic init systems like systemd are used to reap orphaned, zombie processes. Typically, these orphaned processes are reattached to the process at PID 1 which will reap them when they die. In a container this responsibility falls to process at PID 1 which is by default `/bin/sh`...this obviously will not handle process reaping. Because of this you run the risk of expending excess memory or resources within your container. A simple solution to this is to use `tini` in all of our images, a lengthy explanation of what this package does can be found [here](https://github.com/krallin/tini/issues/8). Luckily `tini` is available natively through APK so all you have to do is install it and set it as the default entrypoint! @@ -129,7 +141,7 @@ Luckily `tini` is available natively through APK so all you have to do is instal FROM alpine:3.9 -RUN set -eux; +RUN set -eux; \ apk add --no-cache \ tini @@ -146,7 +158,7 @@ See the examples for samtools([docker_build](./broad/samtools/docker_build.sh), ## Formatting -Formatting our Dockerfiles consistenty helps improve readability and eases maintenance headaches down the road. The following are a couple of tenants that we follow when writing our Dockerfiles: +Formatting our Dockerfiles consistently helps improve readability and eases maintenance headaches down the road. The following are a couple of tenants that we follow when writing our Dockerfiles: * ARGS, ENV, LABEL in that order * Always add versions of tools in the LABEL @@ -154,7 +166,7 @@ Formatting our Dockerfiles consistenty helps improve readability and eases maint * Alphabetize package install * Clean up package index cache * Use ; instead of && for line continuation -* Logically seperate steps within RUN +* Logically separate steps within RUN * Four spaces per tab indent * Short comments to describe each step * tini is always default entrypoint @@ -180,13 +192,13 @@ WORKDIR /usr/gitc # Install dependencies RUN set -eux; \ apt-get update; \ - apt-get install -y \ + apt-get install -y \ autoconf \ cmake \ g++ \ gcc \ git \ - libbz2-dev \ + libbz2-dev \ libcurl4-openssl-dev \ libhts-dev \ libssl-dev \ @@ -222,6 +234,12 @@ RUN set -eux; \ ENTRYPOINT [ "/sbin/tini", "--" ] ``` -## Troubleshooting +## Troubleshooting and running standalone + +The WARP dockers are designed to be run from their respective WDL pipelines. However, if you need to run a Docker independent of a WDL for testing or troubleshooting, you'll likely need to explicity instruct it to run a `bash` shell in the `run` command. An example of this is shown in the terminal command below: + +```bash +docker run -it --rm bash +``` If you have any questions or would like some more guidance on writing Dockerfiles please file a [GitHub issue in WARP](https://github.com/broadinstitute/warp/issues/new). diff --git a/dockers/broad/imputation/bcftools_vcftools/Dockerfile b/dockers/broad/imputation/bcftools_vcftools/Dockerfile index 843e5992ec..7385a5fefd 100644 --- a/dockers/broad/imputation/bcftools_vcftools/Dockerfile +++ b/dockers/broad/imputation/bcftools_vcftools/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-alpine +FROM --platform=linux/amd64 python:3.8-alpine ARG BCFTOOLS_VERSION=1.10.2 \ VCFTOOLS_VERSION=0.1.16 @@ -70,7 +70,7 @@ RUN set -eux; \ ./configure; \ make; \ make install; \ - \ + \ cd ../..; \ rm -r samtools-1.10; \ rm samtools-1.10.tar.bz2 diff --git a/dockers/broad/imputation/bcftools_vcftools/README.md b/dockers/broad/imputation/bcftools_vcftools/README.md index 5bfc4fa396..ec914326ce 100644 --- a/dockers/broad/imputation/bcftools_vcftools/README.md +++ b/dockers/broad/imputation/bcftools_vcftools/README.md @@ -4,7 +4,7 @@ Copy and paste to pull this image -#### `us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623` +#### `us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207` - __What is this image:__ This image is a lightweight alpine-based image for running BCFtools and VCFtools for the [Imputation pipeline](../../../../pipelines/broad/arrays/imputation/Imputation.wdl). - __What are BFCtools and VCFtools:__ BCFtools and VCFtools are a suite of tools for variant calling and manipulating BCFs and VCFs. See [here](https://github.com/samtools/vcftools) and [here](https://vcftools.github.io/man_latest.html) more information. @@ -21,8 +21,8 @@ We keep track of all past versions in [docker_versions](docker_versions.tsv) wit You can see more information about the image, including the tool versions, by running the following command: ```bash -$ docker pull us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 -$ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 +$ docker pull us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 +$ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 ``` ## Usage @@ -31,12 +31,12 @@ $ docker inspect us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.1 ```bash $ docker run --rm -it \ - us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 bcftools + us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 bcftools ``` ### Display VCFtools default menu ```bash $ docker run --rm -it \ - us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 vcftools + us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 vcftools ``` diff --git a/dockers/broad/imputation/bcftools_vcftools/docker_build.sh b/dockers/broad/imputation/bcftools_vcftools/docker_build.sh index 5ab6ba8906..11c6ba2634 100755 --- a/dockers/broad/imputation/bcftools_vcftools/docker_build.sh +++ b/dockers/broad/imputation/bcftools_vcftools/docker_build.sh @@ -2,13 +2,13 @@ set -e # Update version when changes to Dockerfile are made -DOCKER_IMAGE_VERSION=1.0.5 +DOCKER_IMAGE_VERSION=1.0.6 TIMESTAMP=$(date +"%s") DIR=$(cd $(dirname $0) && pwd) # Registries and tags GCR_URL="us.gcr.io/broad-gotc-prod/imputation-bcf-vcf" -QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_bcf_vcf" +# QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_bcf_vcf" #BCFTOOLS version BCFTOOLS_VERSION="1.10.2" @@ -72,9 +72,9 @@ function main(){ --no-cache $DIR docker push "$GCR_URL:$IMAGE_TAG" - echo "tagging and pushing Quay Image" - docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" - docker push "$QUAY_URL:$IMAGE_TAG" +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" echo "done" diff --git a/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv b/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv index 08e715f92b..09e858eb30 100644 --- a/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv +++ b/dockers/broad/imputation/bcftools_vcftools/docker_versions.tsv @@ -6,3 +6,4 @@ us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.2-1.10.2-0.1.16-1642608127 us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.3-1.10.2-0.1.16-1644255588 us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.4-1.10.2-0.1.16-1646091598 us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 +us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 diff --git a/dockers/broad/imputation/minimac4/Dockerfile b/dockers/broad/imputation/minimac4/Dockerfile index 0ae8020a38..c71139c592 100644 --- a/dockers/broad/imputation/minimac4/Dockerfile +++ b/dockers/broad/imputation/minimac4/Dockerfile @@ -1,5 +1,5 @@ # Minimac image uses bcftools -FROM us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623 +FROM --platform=linux/amd64 us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207 ARG MINIMAC4_VERSION=1.0.2 diff --git a/dockers/broad/imputation/minimac4/README.md b/dockers/broad/imputation/minimac4/README.md index 7bdddb3dfe..b37c3fadd0 100644 --- a/dockers/broad/imputation/minimac4/README.md +++ b/dockers/broad/imputation/minimac4/README.md @@ -4,7 +4,7 @@ Copy and paste to pull this image -#### `us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471` +#### `us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783` - __What is this image:__ This image is a lightweight alpine-based image for running Minimac4 in the [Imputation pipeline](../../../../pipelines/broad/arrays/imputation/Imputation.wdl). - __What is Minimac4:__ Minimac4 is a low-memory and computationally efficient piece of software for genotype imputation. See [here](https://github.com/statgen/Minimac4) more information. @@ -21,8 +21,8 @@ We keep track of all past versions in [docker_versions](docker_versions.tsv) wit You can see more information about the image, including the tool versions, by running the following command: ```bash -$ docker pull us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471 -$ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471 +$ docker pull us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783 +$ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783 ``` ## Usage @@ -31,5 +31,5 @@ $ docker inspect us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-16499 ```bash $ docker run --rm -it \ - us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471 /usr/gitc/minimac4 + us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783 /usr/gitc/minimac4 ``` \ No newline at end of file diff --git a/dockers/broad/imputation/minimac4/docker_build.sh b/dockers/broad/imputation/minimac4/docker_build.sh index 0cac15aaa1..1ad3e1bacb 100755 --- a/dockers/broad/imputation/minimac4/docker_build.sh +++ b/dockers/broad/imputation/minimac4/docker_build.sh @@ -2,13 +2,13 @@ set -e # Update version when changes to Dockerfile are made -DOCKER_IMAGE_VERSION=1.0.5 +DOCKER_IMAGE_VERSION=1.0.6 TIMESTAMP=$(date +"%s") DIR=$(cd $(dirname $0) && pwd) # Registries and tags GCR_URL="us.gcr.io/broad-gotc-prod/imputation-minimac4" -QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_minimac" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-imputation_minimac" # Minimac4 version MINIMAC4_VERSION="1.0.2" @@ -62,9 +62,9 @@ function main(){ --no-cache $DIR docker push "$GCR_URL:$IMAGE_TAG" - echo "tagging and pushing Quay Image" - docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" - docker push "$QUAY_URL:$IMAGE_TAG" +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" echo "done" diff --git a/dockers/broad/imputation/minimac4/docker_versions.tsv b/dockers/broad/imputation/minimac4/docker_versions.tsv index a255f4e95d..4802e23227 100644 --- a/dockers/broad/imputation/minimac4/docker_versions.tsv +++ b/dockers/broad/imputation/minimac4/docker_versions.tsv @@ -6,3 +6,4 @@ us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.2-1.0.2-1642621010 us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.3-1.0.2-1644331595 us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.4-1.0.2-1646143013 us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471 +us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783 diff --git a/dockers/broad/picard_python/Dockerfile b/dockers/broad/picard_python/Dockerfile index e53faa0cc8..5f6855f669 100644 --- a/dockers/broad/picard_python/Dockerfile +++ b/dockers/broad/picard_python/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-alpine +FROM --platform=linux/amd64 python:3.8-alpine ARG PICARD_PUBLIC_VERSION=2.26.10 @@ -12,8 +12,9 @@ WORKDIR /usr/gitc # Install dependencies RUN set -eux; \ + apk upgrade; \ apk add --no-cache \ - bash \ + bash \ gcompat \ libc6-compat \ openjdk8 \ diff --git a/dockers/broad/picard_python/README.md b/dockers/broad/picard_python/README.md index 15f04d3879..b73c2a3855 100644 --- a/dockers/broad/picard_python/README.md +++ b/dockers/broad/picard_python/README.md @@ -4,8 +4,7 @@ Copy and paste to pull this image -#### `docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026` -` +#### `docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039` - __What is this image:__ This image is a lightweight alpine-based custom image for running Picard and Python, it uses `python:3.8-alpine` as a base image. - __What is Picard:__ Picard is a set of command line tools for manipulating high-throughput sequencing (HTS) data and formats, [more info](https://github.com/broadinstitute/picard). @@ -15,15 +14,15 @@ Copy and paste to pull this image Picard_Python uses the following convention for verisoning: -#### `us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026` +#### `us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039` We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. You can see more information about the image, including the tool versions, by running the following command: ```bash -$ docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026 -$ docker inspect us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026 +$ docker pull us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039 +$ docker inspect us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039 ``` ## Usage diff --git a/dockers/broad/picard_python/docker_versions.tsv b/dockers/broad/picard_python/docker_versions.tsv index 99dfae8d7e..2f3c475665 100644 --- a/dockers/broad/picard_python/docker_versions.tsv +++ b/dockers/broad/picard_python/docker_versions.tsv @@ -1,2 +1,2 @@ DOCKER_VERSION -us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026 \ No newline at end of file +us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039 diff --git a/dockers/broad/samtools_star/README.md b/dockers/broad/samtools_star/README.md index f40f6256d6..686a77ee84 100644 --- a/dockers/broad/samtools_star/README.md +++ b/dockers/broad/samtools_star/README.md @@ -33,7 +33,7 @@ $ docker inspect us.gcr.io/broad-gotc-prod/samtools-star:1.0.0-1.11-2.7.10a-1642 ```bash $ docker run --rm -it \ - us.gcr.io/broad-gotc-prod/samtools-picard-bwa:1.0.0-0.7.15-2.23.8-1626449438 STAR + us.gcr.io/broad-gotc-prod/samtools-star:1.0.0-1.11-2.7.10a-1642556627 STAR ``` ### Samtools diff --git a/dockers/skylab/HCA_post_processing/Dockerfile b/dockers/skylab/HCA_post_processing/Dockerfile deleted file mode 100644 index 36dfe2d0f3..0000000000 --- a/dockers/skylab/HCA_post_processing/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM gcr.io/google.com/cloudsdktool/cloud-sdk - -LABEL maintainer="Lantern Team " - -RUN pip3 install --upgrade pip - -COPY requirements.txt . -RUN pip3 install numpy==1.17.0 -RUN pip3 install cython==0.29.15 -RUN pip3 install -r requirements.txt - -RUN mkdir /tools -WORKDIR /tools - -COPY HCA_create_adapter_json.py . -COPY file_utils.sh . -COPY create_input_metadata_json.py . diff --git a/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py b/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py deleted file mode 100755 index 12b91de859..0000000000 --- a/dockers/skylab/HCA_post_processing/HCA_create_adapter_json.py +++ /dev/null @@ -1,290 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import json -import uuid -import re -import os -import subprocess - -NAMESPACE = uuid.UUID('c6591d1d-27bc-4c94-bd54-1b51f8a2456c') - - -def get_uuid5(value_to_hash): - return str(uuid.uuid5(NAMESPACE, value_to_hash)) - - -def get_analysis_workflow_id(analysis_output_path): - """Parse the analysis workflow id from one of its output paths, and write the id to a file so that it is available - outside of the get_analysis task. - Args: - analysis_output_path (str): path to workflow output file. - Returns: - workflow_id (str): string giving Cromwell UUID of the workflow. - """ - # Get the last match for UUID prior to the file name (in case the file is - # named with a UUID) to ensure it is the subworkflow id - url = analysis_output_path.rsplit('/', 1)[0] - uuid_regex = r"([a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})" - workflow_id = re.findall(uuid_regex, url)[-1] - print('Got analysis workflow UUID: {0}'.format(workflow_id)) - return workflow_id - - -def main(): - description = """Creates json files needed for HCA DCP2 MVP""" - parser = argparse.ArgumentParser(description=description) - parser.add_argument('--project-loom-file', - dest='project_loom_file', - required=True, - help="Path to project loom file") - parser.add_argument('--crc32c', - dest='crc32c', - required=True, - help="crc32c of the loom file") - parser.add_argument('--version-timestamp', - dest='version_timestamp', - required=True, - help="A version for the output files in the form of a timestamp") - parser.add_argument('--project-id', - dest='project_id', - required=True, - help="project id of the loom file") - parser.add_argument('--project-stratum-string', - dest='project_stratum_string', - required=True, - help="project id of the loom file") - parser.add_argument('--sha256', - dest='sha256', - required=True, - help="sha256 of the loom file") - parser.add_argument('--size', - dest='size', - required=True, - help="Size of the loom file in bytes") - parser.add_argument('--staging-bucket', - dest='staging_bucket', - help="Path to staging bucket") - parser.add_argument('--input-metadata-json', - dest='inputs_json', - required=True, - help="Json file with inputs metadata") - parser.add_argument('--loom-timestamp', - dest='loom_timestamp', - required=True, - help="The timestamp for the stratified project matrix loom file") - parser.add_argument('--pipeline-version', - dest='pipeline_version', - required=True, - help="The version of the pipeline used to create the stratified project matrix") - - args = parser.parse_args() - - project_loom_file = args.project_loom_file - crc32c = args.crc32c - file_version = args.version_timestamp - loom_timestamp = args.loom_timestamp - loom_version = loom_timestamp.replace('Z', '.000000Z') - project_id = args.project_id - project_stratum_string = args.project_stratum_string - sha256 = args.sha256 - size = int(args.size) - staging_bucket = args.staging_bucket - pipeline_version = args.pipeline_version - with open(args.inputs_json, "r") as i: - inputs_dict = json.load(i) # this should be a list of dictionaries - inputs = inputs_dict['inputs'] - - analysis_type = "run" - if "cacheCopy" in str(project_loom_file): - analysis_type = "copy-forward" - - # Generate additional data from args - file_name = os.path.basename(project_loom_file) - process_id = get_analysis_workflow_id(project_loom_file) - - # Create UUIDs - links_id = get_uuid5(project_stratum_string) # v5 UUID of project id and the values the data are stratified by - matrix_entity_id = get_uuid5(str(links_id + "analysis_file" + "loom")) # v5 UUID of the links_id - matrix_file_id = get_uuid5(matrix_entity_id) # v5 UUID of the matrix_entity_id - - analysis_file_dict = { - "describedBy": "https://schema.humancellatlas.org/type/file/6.2.0/analysis_file", - "file_core": { - "file_name": file_name, - "format": "loom", - "content_description": [{ - "text": "DCP/2-generated matrix", - "ontology": "data:3917", - "ontology_label": "Count Matrix" - }] - }, - "provenance": { - "document_id": matrix_entity_id, - "submission_date": file_version, - "submitter_id": "e67aaabe-93ea-564a-aa66-31bc0857b707" - }, - "schema_type": "file" - } - - analysis_process_dict = { - "describedBy": "https://schema.humancellatlas.org/type/process/analysis/12.0.0/analysis_process", - "schema_type": "process", - "process_core": { - "process_id": process_id - }, - "type": { - "text": "analysis; merge matrices" - }, - "reference_files": [], - "timestamp_start_utc": loom_version, # string; - # Initial start time of the full pipeline in UTC. - # format: yyyy-mm-ddThh:mm:ssZ - "timestamp_stop_utc": loom_version, # string; - # Terminal stop time of the full pipeline in UTC. - # format: yyyy-mm-ddThh:mm:ssZ - "tasks": [ - # { - # "task_name": "", # string; Name of the task. - # # example: CollectDuplicationMetrics; RSEMExpression - # "start_time": "", # string; Date and time when the task started. - # # Enter the time in date-time format: yyyy-mm-ddThh:mm:ssZ - # "stop_time": "", # string; Date and time when the task finished. - # # Enter the time in date-time format: yyyy-mm-ddThh:mm:ssZ - # "disk_size": "", # string; Name of the disk volume mounted to the VM for the task. - # # Indicate both disk type and disk size. example: local-disk 11 HDD - # "docker_image": "",# string; - # # Name of docker image where the task is stored and executed. - # # us.gcr.io/broad-gotc-prod/picard-cloud:2.26.10 - # "cpus": 0, # integer; Number of CPUs used to run this task. - # "memory": "", # string; Amount of memory allocated for this task. example: 7.5 GB - # "zone": "" # string Name of the Google Cloud zone where the task was run. - # #example: us-central1-b; europe-north1-a - # } - ], - "inputs": [ - # { - # "parameter_name": "", # string; Name of parameter. example: stranded; rsem_ref_index - # "parameter_value": "" # string; Path to file for or value of parameter. - # # example: NONE; - # # gs://hca-dcp-mint-test-data/../gencode_v27_primary.tar" - # } # Input parameters used in the pipeline run. - ], - "analysis_run_type": analysis_type, - "provenance": { - "document_id": process_id, - "submission_date": file_version, - }, - } - - analysis_protocol_dict = { - "describedBy": "https://schema.humancellatlas.org/type/protocol/analysis/9.1.0/analysis_protocol", - "schema_type": "protocol", - "protocol_core": { - "protocol_id": pipeline_version - }, - "computational_method": pipeline_version, # string; A URI to a versioned workflow and - # versioned execution environment in a - # GA4GH-compliant repository. - # example: SmartSeq2SingleCell; 10x - "type": { - "text": "analysis; merge matrices" - } - } - analysis_protocol_string = json.dumps(analysis_protocol_dict, sort_keys=True) - analysis_protocol_entity_id = get_uuid5(analysis_protocol_string) - analysis_protocol_dict['provenance'] = { - 'document_id': analysis_protocol_entity_id, - 'submission_date': file_version, - 'update_date': file_version - } - - file_descriptor_dict = { - "crc32c": crc32c, - "content_type": "application/vnd.loom", - "describedBy": "https://schema.humancellatlas.org/system/2.0.0/file_descriptor", - "file_id": matrix_file_id, - "file_name": file_name, - "file_version": loom_version, - "schema_type": "file_descriptor", - "schema_version": "2.0.0", - "sha256": sha256, - "size": size, - } - - links_dict = { - "describedBy": "https://schema.humancellatlas.org/system/2.1.1/links", - "links": [ - { - "inputs": inputs, - "link_type": "process_link", - "outputs": [ - { - "output_id": matrix_entity_id, - "output_type": "analysis_file" - } - ], - "process_id": process_id, - "process_type": "analysis_process", - "protocols": [ - { - "protocol_id": analysis_protocol_entity_id, - "protocol_type": "analysis_protocol" - } - ] - } - ], - "schema_type": "links", - "schema_version": "2.1.1" - } - - # filenames for staging directories - analysis_file_basename = "{}_{}.json".format(matrix_entity_id, file_version) - analysis_protocol_basename = "{}_{}.json".format(analysis_protocol_entity_id, file_version) - analysis_process_basename = "{}_{}.json".format(process_id, file_version) - links_basename = "{}_{}_{}.json".format(links_id, file_version, project_id) - - # files created in output directory for output - analysis_file_json_file_name = "outputs/analysis_file_{}".format(analysis_file_basename) - analysis_process_json_file_name = "outputs/analysis_process_{}".format(analysis_process_basename) - analysis_protocol_json_file_name = "outputs/analysis_protocol_{}".format(analysis_protocol_basename) - file_descriptor_json_file_name = "outputs/file_descriptor_{}".format(analysis_file_basename) - links_json_file_name = "outputs/links_{}".format(links_basename) - - with open(analysis_file_json_file_name, "w") as f: - json.dump(analysis_file_dict, f, sort_keys=True, indent=2) - - with open(analysis_process_json_file_name, "w") as f: - json.dump(analysis_process_dict, f, sort_keys=True, indent=2) - - with open(analysis_protocol_json_file_name, "w") as f: - json.dump(analysis_protocol_dict, f, sort_keys=True, indent=2) - - with open(file_descriptor_json_file_name, "w") as f: - json.dump(file_descriptor_dict, f, sort_keys=True, indent=2) - - with open(links_json_file_name, "w") as f: - json.dump(links_dict, f, sort_keys=True, indent=2) - - # Copy json files into the staging bucket - subprocess.run('gsutil cp {0} {1}data/{2}'.format(project_loom_file, staging_bucket, file_name), shell=True) - subprocess.run('gsutil cp {0} {1}metadata/analysis_file/{2}'.format(analysis_file_json_file_name, - staging_bucket, - analysis_file_basename), shell=True) - subprocess.run('gsutil cp {0} {1}metadata/analysis_process/{2}'.format(analysis_process_json_file_name, - staging_bucket, - analysis_process_basename), shell=True) - subprocess.run('gsutil cp {0} {1}metadata/analysis_protocol/{2}'.format(analysis_protocol_json_file_name, - staging_bucket, - analysis_protocol_basename), shell=True) - subprocess.run('gsutil cp {0} {1}descriptors/analysis_file/{2}'.format(file_descriptor_json_file_name, - staging_bucket, - analysis_file_basename), shell=True) - subprocess.run('gsutil cp {0} {1}links/{2}'.format(links_json_file_name, - staging_bucket, - links_basename), shell=True) - - -if __name__ == '__main__': - main() - diff --git a/dockers/skylab/HCA_post_processing/build.sh b/dockers/skylab/HCA_post_processing/build.sh deleted file mode 100755 index ec0d2953bc..0000000000 --- a/dockers/skylab/HCA_post_processing/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -tag=$1 - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build . --tag=quay.io/humancellatlas/hca_post_processing:$tag - -echo "You can now push with docker push quay.io/humancellatlas/hca_post_processing:$tag" diff --git a/dockers/skylab/HCA_post_processing/create_input_metadata_json.py b/dockers/skylab/HCA_post_processing/create_input_metadata_json.py deleted file mode 100755 index c4d8075bca..0000000000 --- a/dockers/skylab/HCA_post_processing/create_input_metadata_json.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 - -import json -import argparse - - -def main(): - description = """Collects input metadata from individual analysis file jsons """ - parser = argparse.ArgumentParser(description=description) - parser.add_argument('--input-json-files', - dest='input_files', - nargs="+", - required=True, - help="List of son files") - parser.add_argument('--output', - dest='output', - required=True, - help="Name of output file") - - args = parser.parse_args() - - analysis_files = args.input_files - - inputs = {"inputs": []} - - for analysis_file in analysis_files: - with open(analysis_file, "r") as f: - analysis_metadata = json.load(f) - if analysis_metadata["file_core"]["file_name"].endswith(".loom"): - input_uuid = analysis_metadata["provenance"]["document_id"] - inputs["inputs"].append({"input_id": input_uuid, "input_type": "analysis_file"}) - - with open(args.output, "w") as f: - json.dump(inputs, f) - - -if __name__ == '__main__': - main() - diff --git a/dockers/skylab/HCA_post_processing/file_utils.sh b/dockers/skylab/HCA_post_processing/file_utils.sh deleted file mode 100755 index 645369104c..0000000000 --- a/dockers/skylab/HCA_post_processing/file_utils.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -function get_timestamp() { - local -r input_file=${1} - timestamp=$(gsutil ls -l ${input_file} | egrep -o "([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z)") - echo ${timestamp} -} - -function get_crc() { - local -r input_file=${1} - crc=$(gsutil hash -h ${input_file} | awk '/crc32c/ { print $3 }') - echo ${crc} -} - -function get_size() { - local -r input_file=${1} - size=$(gsutil stat ${input_file} | awk '/Content-Length/ { print $2 }') - echo ${size} -} diff --git a/dockers/skylab/HCA_post_processing/requirements.txt b/dockers/skylab/HCA_post_processing/requirements.txt deleted file mode 100644 index 3bef2eebc9..0000000000 --- a/dockers/skylab/HCA_post_processing/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy==1.21.0 -scipy==1.4.1 -pandas==1.0.3 -loompy==3.0.6 -h5py==2.10.0 diff --git a/dockers/skylab/HCA_post_processing/testdata/a.loom b/dockers/skylab/HCA_post_processing/testdata/a.loom deleted file mode 100644 index 4e2243e1da..0000000000 Binary files a/dockers/skylab/HCA_post_processing/testdata/a.loom and /dev/null differ diff --git a/dockers/skylab/HCA_post_processing/testdata/b.loom b/dockers/skylab/HCA_post_processing/testdata/b.loom deleted file mode 100644 index df22d1d5f9..0000000000 Binary files a/dockers/skylab/HCA_post_processing/testdata/b.loom and /dev/null differ diff --git a/dockers/skylab/HCA_post_processing/testdata/c.loom b/dockers/skylab/HCA_post_processing/testdata/c.loom deleted file mode 100644 index dbd77094ff..0000000000 Binary files a/dockers/skylab/HCA_post_processing/testdata/c.loom and /dev/null differ diff --git a/dockers/skylab/HCA_post_processing/testdata/d.loom b/dockers/skylab/HCA_post_processing/testdata/d.loom deleted file mode 100644 index 838ee16f16..0000000000 Binary files a/dockers/skylab/HCA_post_processing/testdata/d.loom and /dev/null differ diff --git a/dockers/skylab/snss2-build-indices/Biotypes.tsv b/dockers/skylab/build-indices/Biotypes.tsv similarity index 100% rename from dockers/skylab/snss2-build-indices/Biotypes.tsv rename to dockers/skylab/build-indices/Biotypes.tsv diff --git a/dockers/skylab/build-indices/Dockerfile b/dockers/skylab/build-indices/Dockerfile new file mode 100644 index 0000000000..7f8d70752b --- /dev/null +++ b/dockers/skylab/build-indices/Dockerfile @@ -0,0 +1,43 @@ +# Adding a platform tag to ensure that images built on ARM-based machines (ex. M-series macs) won't cause issues with our automated PR test suite. +# However, this is not relevant for automated builds in a CI/CD pipeline that is AMD-based. +# Also upgraded to a more recent Debian version (Bullseye) so tini is part of apt. +FROM --platform="linux/amd64" python:3.6-bullseye + +# Add metadata labels +LABEL maintainer="Broad Institute DSDE --` + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340 +$ docker inspect us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340 +``` + +## Usage + +### Build_indices + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340 \ + build-indices bash +``` + +Then you can exec into the container and use STAR or any of the scripts accordingly. Alternatively, you can run one-off commands by passing the command as a docker run parameter. \ No newline at end of file diff --git a/dockers/skylab/snss2-build-indices/add-introns-to-gtf.py b/dockers/skylab/build-indices/add-introns-to-gtf.py similarity index 100% rename from dockers/skylab/snss2-build-indices/add-introns-to-gtf.py rename to dockers/skylab/build-indices/add-introns-to-gtf.py diff --git a/dockers/skylab/build-indices/docker_build.sh b/dockers/skylab/build-indices/docker_build.sh new file mode 100644 index 0000000000..212e01d20b --- /dev/null +++ b/dockers/skylab/build-indices/docker_build.sh @@ -0,0 +1,74 @@ +#!/bin/bash +#fail-fast +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/build-indices" +#QUAY_URL="quay.io/humancellatlas/build-indices" +#Image path before getting renamed: quay.io/humancellatlas/snss2-indices:1.2.0 + +#STAR version +STAR_VERSION="2.7.10a" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the build-indices image and push to GCR +where: + -h|--help Show help text + -s|--star_version Version of STAR to use (default: $STAR_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + echo "$HELP" + exit 0 + ;; + -s|--star_version) + STAR_VERSION="$2" + shift + shift + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$STAR_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg STAR_VERSION="$STAR_VERSION" \ + --no-cache "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/build-indices/docker_versions.tsv b/dockers/skylab/build-indices/docker_versions.tsv new file mode 100644 index 0000000000..a2aa9eace9 --- /dev/null +++ b/dockers/skylab/build-indices/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340 diff --git a/dockers/skylab/snss2-build-indices/modify_gtf.py b/dockers/skylab/build-indices/modify_gtf.py similarity index 100% rename from dockers/skylab/snss2-build-indices/modify_gtf.py rename to dockers/skylab/build-indices/modify_gtf.py diff --git a/dockers/skylab/featureCounts/requirements.txt b/dockers/skylab/build-indices/requirements.txt similarity index 100% rename from dockers/skylab/featureCounts/requirements.txt rename to dockers/skylab/build-indices/requirements.txt diff --git a/dockers/skylab/bwa/Dockerfile b/dockers/skylab/bwa/Dockerfile new file mode 100644 index 0000000000..b70372dd20 --- /dev/null +++ b/dockers/skylab/bwa/Dockerfile @@ -0,0 +1,38 @@ +FROM --platform=linux/amd64 ubuntu:16.04 + +ARG BWA_VERSION=0.7.17 + +ENV TERM=xterm-256color \ + TINI_VERSION=v0.19.0 \ + BWA_URL=https://sourceforge.net/projects/bio-bwa/files/bwa-${BWA_VERSION}.tar.bz2/download + +LABEL MAINTAINER="Broad Institute DSDE --` + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463 +$ docker inspect us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463 +``` + +## Usage + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463 \ + bwa +``` \ No newline at end of file diff --git a/dockers/skylab/bwa/docker_build.sh b/dockers/skylab/bwa/docker_build.sh new file mode 100644 index 0000000000..6acaf9d5ff --- /dev/null +++ b/dockers/skylab/bwa/docker_build.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/bwa" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-bwa" # Update and uncomment push block below after setting up quay repo + +# BWA version +BWA_VERSION="0.7.17" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the bwa image and push to GCR +where: + -h|--help Show help text + -v|--version Version of BWA to use (default: $BWA_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -v|--version) + BWA_VERSION="$2" + shift + shift + ;; + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$BWA_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg BWA_VERSION="$BWA_VERSION" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/bwa/docker_versions.tsv b/dockers/skylab/bwa/docker_versions.tsv new file mode 100644 index 0000000000..ba49567301 --- /dev/null +++ b/dockers/skylab/bwa/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463 diff --git a/dockers/skylab/create-npz-output/Dockerfile b/dockers/skylab/create-npz-output/Dockerfile deleted file mode 100644 index 2efcd1908a..0000000000 --- a/dockers/skylab/create-npz-output/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM python:3.7.2 - -LABEL maintainer="Lantern Team " - -RUN pip install --upgrade pip - -RUN apt-get update - -COPY requirements.txt . -RUN pip3 install -r requirements.txt - -RUN mkdir /tools -WORKDIR /tools - -COPY create-npz-output.py . -COPY create-merged-npz-output.py . diff --git a/dockers/skylab/create-npz-output/create-npz-output.py b/dockers/skylab/create-npz-output/create-npz-output.py deleted file mode 100755 index c32cc6c70d..0000000000 --- a/dockers/skylab/create-npz-output/create-npz-output.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import gzip -import numpy as np - -import scipy.io -import scipy.sparse - - -def main(): - description = """Create npz, npy file from the mtx files produced by STARsolo""" - parser = argparse.ArgumentParser(description=description) - parser.add_argument('--barcodes', - dest='barcodes', - required=True, - help="The barcodes file") - - parser.add_argument('--features', - dest='features', - required=True, - help="The features file") - - parser.add_argument('--matrix', - dest='matrix', - required=True, - help="The matrix file") - - args = parser.parse_args() - - # covert the mtx file to the matrix - matrix = scipy.io.mmread(args.matrix).transpose().tocsr() - nonzero_row_indices, _ = matrix.nonzero() - unique_nonzero_row_indices = np.sort(np.unique(nonzero_row_indices)) - # we need to keep only those rows that have non-zero reads/counts - scipy.sparse.save_npz("sparse_counts.npz", matrix[unique_nonzero_row_indices, :], compressed=True) - - # read the barcodes file and create the barcode to index - barcodes = [] - with gzip.open(args.barcodes, 'rt') if args.barcodes.endswith('.gz') else \ - open(args.barcodes, 'r') as fin: - for line in fin: - if line.startswith(r'^#'): # skip comments - continue - fields = line.strip().split('\t') - barcodes.append(fields[0]) - - row_index = np.asarray(barcodes) - # we need to keep only those barcodes that have non-zero reads/counts - np.save("sparse_counts_row_index.npy", row_index[unique_nonzero_row_indices]) - - # read the features file and create the feature to index map - features = [] - with gzip.open(args.features, 'rt') if args.features.endswith('.gz') else \ - open(args.features, 'r') as fin: - for line in fin: - if line.startswith(r'^#'): # skip comments - continue - fields = line.strip().split('\t') - features.append(fields[0]) - - row_index = np.asarray(features) - np.save("sparse_counts_col_index.npy", row_index) - -if __name__ == '__main__': - main() diff --git a/dockers/skylab/create-npz-output/requirements.txt b/dockers/skylab/create-npz-output/requirements.txt deleted file mode 100644 index 6bad10388e..0000000000 --- a/dockers/skylab/create-npz-output/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy -scipy diff --git a/dockers/skylab/dropseq_tools/Dockerfile b/dockers/skylab/dropseq_tools/Dockerfile deleted file mode 100644 index d671e352f5..0000000000 --- a/dockers/skylab/dropseq_tools/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM openjdk:8-jre - -LABEL maintainer="Ambrose J. Carr " \ - software="dropseqtools" \ - version="1.12" \ - description="tools for manipulation of drop-seq data and BAM files" \ - website="http://mccarrolllab.com/dropseq/" - -RUN apt update && apt install -y \ - curl \ - unzip - -RUN apt install -y python - -RUN curl http://mccarrolllab.com/download/922/ >> Drop-seq_tools-1.12.zip && \ - unzip Drop-seq_tools-1.12.zip && \ - cp -r Drop-seq_tools-1.12/* /usr/local/bin/ diff --git a/dockers/skylab/dropseq_tools_v2/Dockerfile b/dockers/skylab/dropseq_tools_v2/Dockerfile deleted file mode 100644 index bffe832b6b..0000000000 --- a/dockers/skylab/dropseq_tools_v2/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM openjdk:8-jre - -LABEL maintainer="Lantern Team " \ - software="dropseqtools" \ - version="2.3.0" \ - description="tools for manipulation of drop-seq data and BAM files" \ - website="http://mccarrolllab.com/dropseq/" - -RUN apt update && apt install -y \ - curl \ - unzip - -RUN apt install -y python - -RUN curl -L https://github.com/broadinstitute/Drop-seq/releases/download/v2.3.0/Drop-seq_tools-2.3.0.zip > Drop-seq_tools-2.3.0.zip && \ - unzip Drop-seq_tools-2.3.0.zip && \ - chmod +x Drop-seq_tools-2.3.0/* && \ - cp -r Drop-seq_tools-2.3.0/* /usr/local/bin/ && \ - rm Drop-seq_tools-2.3.0.zip && \ - rm -rf Drop-seq_tools-2.3.0/ diff --git a/dockers/skylab/dropseq_tools_v2/build.sh b/dockers/skylab/dropseq_tools_v2/build.sh deleted file mode 100755 index 1e28bddeed..0000000000 --- a/dockers/skylab/dropseq_tools_v2/build.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -tag=2.3.0 -image="quay.io/humancellatlas/secondary-analysis-dropseqtools" - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build -t $image:$tag . - -echo "You can now push with docker push $image:$tag" diff --git a/dockers/skylab/ea-utils/Dockerfile b/dockers/skylab/ea-utils/Dockerfile new file mode 100644 index 0000000000..e6ae626832 --- /dev/null +++ b/dockers/skylab/ea-utils/Dockerfile @@ -0,0 +1,38 @@ +FROM --platform=linux/amd64 ubuntu:16.04 + +ARG EAUTILS_VERSION=1.04.807 + +ENV TERM=xterm-256color \ + EAUTILS_URL=https://github.com/ExpressionAnalysis/ea-utils/archive/refs/tags/${EAUTILS_VERSION}.tar.gz \ + EAUTILS_VERSION=${EAUTILS_VERSION} \ + TINI_VERSION=v0.19.0 \ + PATH="$PATH:/usr/gitc/ea-utils-${EAUTILS_VERSION}/clipper/" + +LABEL MAINTAINER="Broad Institute DSDE --` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665 +$ docker inspect us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665 +``` + +## Usage + +Show the `fastq-mcf` help screen: + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665 fastq-mcf -h +``` \ No newline at end of file diff --git a/dockers/skylab/ea-utils/docker_build.sh b/dockers/skylab/ea-utils/docker_build.sh new file mode 100755 index 0000000000..c0c07f1684 --- /dev/null +++ b/dockers/skylab/ea-utils/docker_build.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd $(dirname $0) && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/ea-utils" +#QUAY_URL="quay.io/humancellatlas/ea-utils" + +# ea-utils version +EAUTILS_VERSION="1.04.807" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|--tools] -- script to build the ea-utils image and push to GCR & Quay + +where: + -h|--help Show help text + -v|--version Version of ea-utils to use (default: $EAUTILS_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which $t >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -v|--version) + EAUTILS_VERSION="$2" + shift + shift + ;; + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo $t; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$EAUTILS_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg EAUTILS_VERSION="$EAUTILS_VERSION" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + + #echo "tagging and pushing Quay Image" + #docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" + #docker push "$QUAY_URL:$IMAGE_TAG" + + echo "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" diff --git a/dockers/skylab/ea-utils/docker_versions.tsv b/dockers/skylab/ea-utils/docker_versions.tsv new file mode 100644 index 0000000000..c7391feef4 --- /dev/null +++ b/dockers/skylab/ea-utils/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665 diff --git a/dockers/skylab/featureCounts/Dockerfile b/dockers/skylab/featureCounts/Dockerfile deleted file mode 100644 index f6656bc1e1..0000000000 --- a/dockers/skylab/featureCounts/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.6.2 - -LABEL maintainer="Lantern Team " \ - software="subread package" \ - version="2.0.1" \ - description="RNA-seq high-performance read alignment, quantification and mutation discovery" \ - website="http://subread.sourceforge.net/" - -# Install compiler -RUN apt-get update --fix-missing && apt-get install -y wget - -COPY requirements.txt . -RUN pip3 install -r requirements.txt - -# Install subread -WORKDIR /usr/local/ -ENV VERSION="2.0.1" -RUN wget "https://downloads.sourceforge.net/project/subread/subread-${VERSION}/subread-${VERSION}-source.tar.gz" \ - && tar -xzvf subread-${VERSION}-source.tar.gz -WORKDIR /usr/local/subread-${VERSION}-source/src -RUN make -f Makefile.Linux -ENV PATH /usr/local/subread-${VERSION}-source/bin/:$PATH -# Cleanup -RUN apt-get clean - -# copy the script that removes alignments spanning intron-exon junctions -RUN mkdir /tools -WORKDIR /tools -COPY remove-reads-on-junctions.py . diff --git a/dockers/skylab/hisat2/Dockerfile b/dockers/skylab/hisat2/Dockerfile index ce9d8d9b29..05a372f821 100644 --- a/dockers/skylab/hisat2/Dockerfile +++ b/dockers/skylab/hisat2/Dockerfile @@ -1,62 +1,68 @@ -FROM ubuntu:16.04 -LABEL MAINTAINER="Jishu Xu " -LABEL software="HISAT2" -LABEL version="2-2.1.0" -LABEL description="RNA-seq aligner" -LABEL website="https://ccb.jhu.edu/software/hisat2/index.shtml" +FROM --platform=linux/amd64 ubuntu:16.04 -RUN mkdir -p /opt/tools/ -WORKDIR /opt/tools +ENV TERM=xterm-256color \ + PATH="$PATH:/usr/gitc/hisat2-2.1.0" \ + TINI_VERSION=v0.19.0 -RUN \ - apt update && \ - apt install -y \ - liblzma-dev \ - libbz2-dev \ - cmake \ - automake \ - curl \ - libboost-all-dev \ - libcurl4-openssl-dev \ - wget \ - build-essential \ - gcc-multilib \ - zlib1g-dev \ - libxml2-dev \ - libncurses5-dev \ - zip unzip \ - git \ - r-base \ - r-base-core \ - r-base-dev - -RUN \ - wget -c ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-source.zip && \ - unzip hisat2-2.1.0-source.zip && \ - cd hisat2-2.1.0 && \ - make && \ - cp hisat2* /usr/local/bin +LABEL MAINTAINER="Broad Institute DSDE -` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171 +$ docker inspect us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171 +``` + +## Usage + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171 hisat2 +``` \ No newline at end of file diff --git a/dockers/skylab/hisat2/docker_build.sh b/dockers/skylab/hisat2/docker_build.sh new file mode 100644 index 0000000000..4a2087576b --- /dev/null +++ b/dockers/skylab/hisat2/docker_build.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/hisat2" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-hisat2" # Update and uncomment push block below after setting up quay repo + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the hisat2 image and push to GCR +where: + -h|--help Show help text + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/hisat2/docker_versions.tsv b/dockers/skylab/hisat2/docker_versions.tsv new file mode 100644 index 0000000000..c9e47ddd63 --- /dev/null +++ b/dockers/skylab/hisat2/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171 diff --git a/dockers/skylab/loom-output/Dockerfile b/dockers/skylab/loom-output/Dockerfile deleted file mode 100644 index 0446f36abc..0000000000 --- a/dockers/skylab/loom-output/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM python:3.7.2 - -LABEL maintainer="Lantern Team " - -RUN pip install --upgrade pip - -RUN apt-get update && apt-get install wget - -RUN python -m pip install git+https://github.com/HumanCellAtlas/sctools.git#egg=sctools - -COPY requirements.txt . -RUN pip3 install -r requirements.txt - -RUN mkdir /tools -WORKDIR /tools - -COPY create_loom_optimus.py . -COPY create_loom_ss2.py . -COPY loomCompare.py . -COPY ss2_loom_merge.py . -COPY create_snss2_counts_csv.py . -COPY create_loom_snss2.py . -COPY create_snrna_optimus.py . -COPY create_snrna_optimus_counts.py . diff --git a/dockers/skylab/loom-output/build.sh b/dockers/skylab/loom-output/build.sh deleted file mode 100755 index e5a6c23fa9..0000000000 --- a/dockers/skylab/loom-output/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -tag=$1 - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build -t quay.io/humancellatlas/secondary-analysis-loom-output:$tag . - -echo "You can now push with docker push quay.io/humancellatlas/secondary-analysis-loom-output:$tag" diff --git a/dockers/skylab/loom-output/requirements.txt b/dockers/skylab/loom-output/requirements.txt deleted file mode 100644 index 08493c4ca5..0000000000 --- a/dockers/skylab/loom-output/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -loompy==3.0.6 -h5py==2.10.0 diff --git a/dockers/skylab/picard/Dockerfile b/dockers/skylab/picard/Dockerfile deleted file mode 100644 index 9d6c703637..0000000000 --- a/dockers/skylab/picard/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM openjdk:8-jre - -LABEL maintainer="Green Lantern " \ - software="Picard" \ - version="2.26.10" \ - description="A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF." \ - website="http://broadinstitute.github.io/picard" - -# Please follow the below instructions to invoke picard when you are using this docker image: -# java jvm-args -jar /usr/picard/picard.jar PicardToolName OPTION1=value1 OPTION2=value2... -ENV version 2.26.10 -WORKDIR /usr/picard -ADD https://github.com/broadinstitute/picard/releases/download/${version}/picard.jar ./picard.jar - -# Install R dependencies for Picard -RUN apt-get update && \ - apt-get upgrade -y && \ - apt-get install -y r-base - -RUN echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile -RUN Rscript -e "install.packages('reshape')" -RUN Rscript -e "install.packages('gplots')" -RUN Rscript -e "install.packages('ggplot2')" - -#TODO: did not find any task using the R scripts contained in this docker. Switched all tasks to cloud docker image. Test and remove if tests look OK diff --git a/dockers/skylab/pytools/Dockerfile b/dockers/skylab/pytools/Dockerfile new file mode 100644 index 0000000000..28c2e4214e --- /dev/null +++ b/dockers/skylab/pytools/Dockerfile @@ -0,0 +1,29 @@ +FROM --platform=linux/amd64 python:3.7.2 + +ENV TERM=xterm-256color \ + TINI_VERSION=v0.19.0\ + PATH=$PATH:/usr/gitc + +LABEL MAINTAINER="Broad Institute DSDE -` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730 +$ docker inspect us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730 +``` + +## Usage + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730 +``` + +## Scripts + +This image contains the following scripts: + +* `breakoutSnap.py` extracts the data in a snap file as csv files +* `create-merged-npz-output.py` takes a barcode.tsv, feature.tsv and matrix.mtx from STAR alignment outputs and creates 2 npy files and an npz file for row_index, col_index and the matrix. These files are required in the empty_drop step. +* `create_snss2_counts_csv.py` creates a csv file containing intron and exon counts from the Single Nucleus Smart-Seq2 pipeline +* `loomCompare.py` compares differences between loom files +* `ss2_loom_merge.py` creates a single loom file from multiple single sample loom files +* `makeCompliantBAM.py` make a BAM file with cellular barcodes in the read names compliant by moving them to the CB tag + +The following scripts create a loom file from counts, metadata, and metrics from each pipeline: +* `create_loom_optimus.py` for Optimus pipeline +* `create_loom_snss2.py` for Single Nucleus Smart-Seq2 pipeline +* `create_snrna_optimus.py` for Optimus in `sn_rna` mode with `count_exons=false` +* `create_snrna_optimus_counts.py` for Optimus in `sn_rna` mode with `count_exons=true` + diff --git a/dockers/skylab/pytools/docker_build.sh b/dockers/skylab/pytools/docker_build.sh new file mode 100644 index 0000000000..aa744a271a --- /dev/null +++ b/dockers/skylab/pytools/docker_build.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/pytools" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-pytools" # Update and uncomment push block below after setting up quay repo + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the pytools image and push to GCR +where: + -h|--help Show help text + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/pytools/docker_versions.tsv b/dockers/skylab/pytools/docker_versions.tsv new file mode 100644 index 0000000000..f877b400b4 --- /dev/null +++ b/dockers/skylab/pytools/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730 diff --git a/dockers/skylab/pytools/requirements.txt b/dockers/skylab/pytools/requirements.txt new file mode 100644 index 0000000000..72d747ee04 --- /dev/null +++ b/dockers/skylab/pytools/requirements.txt @@ -0,0 +1,4 @@ +numpy +scipy +loompy==3.0.6 +h5py==2.10.0 \ No newline at end of file diff --git a/dockers/skylab/snap-breakout/breakoutSnap.py b/dockers/skylab/pytools/tools/breakoutSnap.py similarity index 100% rename from dockers/skylab/snap-breakout/breakoutSnap.py rename to dockers/skylab/pytools/tools/breakoutSnap.py diff --git a/dockers/skylab/create-npz-output/create-merged-npz-output.py b/dockers/skylab/pytools/tools/create-merged-npz-output.py similarity index 100% rename from dockers/skylab/create-npz-output/create-merged-npz-output.py rename to dockers/skylab/pytools/tools/create-merged-npz-output.py diff --git a/dockers/skylab/loom-output/create_loom_optimus.py b/dockers/skylab/pytools/tools/create_loom_optimus.py similarity index 100% rename from dockers/skylab/loom-output/create_loom_optimus.py rename to dockers/skylab/pytools/tools/create_loom_optimus.py diff --git a/dockers/skylab/loom-output/create_loom_snss2.py b/dockers/skylab/pytools/tools/create_loom_snss2.py similarity index 100% rename from dockers/skylab/loom-output/create_loom_snss2.py rename to dockers/skylab/pytools/tools/create_loom_snss2.py diff --git a/dockers/skylab/loom-output/create_loom_ss2.py b/dockers/skylab/pytools/tools/create_loom_ss2.py similarity index 100% rename from dockers/skylab/loom-output/create_loom_ss2.py rename to dockers/skylab/pytools/tools/create_loom_ss2.py diff --git a/dockers/skylab/loom-output/create_snrna_optimus.py b/dockers/skylab/pytools/tools/create_snrna_optimus.py similarity index 97% rename from dockers/skylab/loom-output/create_snrna_optimus.py rename to dockers/skylab/pytools/tools/create_snrna_optimus.py index fa6ebd28b0..571b1d971f 100755 --- a/dockers/skylab/loom-output/create_snrna_optimus.py +++ b/dockers/skylab/pytools/tools/create_snrna_optimus.py @@ -23,7 +23,7 @@ def create_gene_id_name_map(gtf_file): # loop through the lines and find the gene_id and gene_name pairs with gzip.open(gtf_file, "rt") if gtf_file.endswith(".gz") else open( - gtf_file, "r" + gtf_file, "r" ) as fpin: for _line in fpin: line = _line.strip() @@ -109,7 +109,7 @@ def generate_row_attr(args): gene_metrics_data =np.array(gene_metric_values) numeric_field_names = gene_metrics[0][1:] - for i in range(len(numeric_field_names)): + for i in range(0, len(numeric_field_names)): name = numeric_field_names[i] data = gene_metrics_data[:, i] row_attrs[name] = data @@ -162,7 +162,7 @@ def generate_col_attr(args): "reads_mapped_too_many_loci", "n_genes", "genes_detected_multiple_observations" - ] + ] FloatColumnNames = [ # Float32 "molecule_barcode_fraction_bases_above_30_mean", @@ -199,18 +199,18 @@ def generate_col_attr(args): bool_field_names = final_df_bool_column_names # Create metadata tables and their headers for bool - for i in range(bool_field_names.shape[0]): + for i in range(0, bool_field_names.shape[0]): name = bool_field_names[i] data = final_df_bool[:, i] col_attrs[name] = data - + # Create metadata tables and their headers for float float_field_names = list(final_df_non_boolean.columns) for i in range(len(float_field_names)): name = float_field_names[i] data = final_df_non_boolean[name].to_numpy() - col_attrs[name] = data + col_attrs[name] = data if args.verbose: logging.info( @@ -238,8 +238,9 @@ def generate_matrix(args): nrows, ncols = csr_exp_counts.shape expr_sp = sc.sparse.coo_matrix((nrows, ncols), np.float32) - xcoord = ycoord = value = [] - + xcoord = [] + ycoord = [] + value = [] chunk_row_size = 10000 chunk_col_size = 10000 @@ -273,7 +274,7 @@ def generate_matrix(args): def create_loom_files(args): """This function creates the loom file or folder structure in output_loom_path in format file_format, with input_id from the input folder analysis_output_path - + Args: args (argparse.Namespace): input arguments for the run """ @@ -281,14 +282,14 @@ def create_loom_files(args): # generate a dictionary of row attributes - row_attrs = generate_row_attr(args) - + row_attrs = generate_row_attr(args) + # generate a dictionarty of column attributes - col_attrs = generate_col_attr(args) + col_attrs = generate_col_attr(args) # add the expression count matrix data expr_sp_t = generate_matrix(args) - + # add input_id to col_attrs col_attrs['input_id'] = np.repeat(args.input_id, expr_sp_t.shape[1]) @@ -304,7 +305,7 @@ def create_loom_files(args): if args.input_name_metadata_field is not None: attrDict['input_name_metadata_field'] = args.input_name_metadata_field attrDict['pipeline_version'] = args.pipeline_version - #generate loom file + #generate loom file loompy.create(args.output_loom_path, expr_sp_t, row_attrs, col_attrs, file_attrs=attrDict) def main(): @@ -395,7 +396,7 @@ def main(): action="store_true", help="whether to output verbose debugging messages", ) - + parser.add_argument( "--expression_data_type", dest="expression_data_type", @@ -416,4 +417,4 @@ def main(): create_loom_files(args) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/dockers/skylab/loom-output/create_snrna_optimus_counts.py b/dockers/skylab/pytools/tools/create_snrna_optimus_counts.py similarity index 100% rename from dockers/skylab/loom-output/create_snrna_optimus_counts.py rename to dockers/skylab/pytools/tools/create_snrna_optimus_counts.py diff --git a/dockers/skylab/loom-output/create_snss2_counts_csv.py b/dockers/skylab/pytools/tools/create_snss2_counts_csv.py similarity index 100% rename from dockers/skylab/loom-output/create_snss2_counts_csv.py rename to dockers/skylab/pytools/tools/create_snss2_counts_csv.py diff --git a/dockers/skylab/loom-output/loomCompare.py b/dockers/skylab/pytools/tools/loomCompare.py similarity index 100% rename from dockers/skylab/loom-output/loomCompare.py rename to dockers/skylab/pytools/tools/loomCompare.py diff --git a/dockers/skylab/snaptools/makeCompliantBAM.py b/dockers/skylab/pytools/tools/makeCompliantBAM.py similarity index 98% rename from dockers/skylab/snaptools/makeCompliantBAM.py rename to dockers/skylab/pytools/tools/makeCompliantBAM.py index cecbfd260b..4211281e2f 100755 --- a/dockers/skylab/snaptools/makeCompliantBAM.py +++ b/dockers/skylab/pytools/tools/makeCompliantBAM.py @@ -34,7 +34,7 @@ def checkArgs(args): for read in bamfile: counter += 1 if (counter % 100000 == 0): - print counter + print(counter) qname = str(read.qname) i = qname.find(':') cb, qn = qname[:i], qname[i+1:] diff --git a/dockers/skylab/loom-output/ss2_loom_merge.py b/dockers/skylab/pytools/tools/ss2_loom_merge.py similarity index 100% rename from dockers/skylab/loom-output/ss2_loom_merge.py rename to dockers/skylab/pytools/tools/ss2_loom_merge.py diff --git a/dockers/skylab/rsem/Dockerfile b/dockers/skylab/rsem/Dockerfile new file mode 100644 index 0000000000..77074c07aa --- /dev/null +++ b/dockers/skylab/rsem/Dockerfile @@ -0,0 +1,60 @@ +FROM --platform=linux/amd64 ubuntu:16.04 + +ENV TERM=xterm-256color \ + RSEM_VERSION=1.3.0 \ + STAR_VERSION=2.5.3a \ + TINI_VERSION=v0.19.0 + +LABEL MAINTAINER="Broad Institute DSDE -` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024 +$ docker inspect us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024 +``` + +## Usage + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024 rsem-prepare-reference --help +``` \ No newline at end of file diff --git a/dockers/skylab/rsem/docker_build.sh b/dockers/skylab/rsem/docker_build.sh new file mode 100644 index 0000000000..a5515739b8 --- /dev/null +++ b/dockers/skylab/rsem/docker_build.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/rsem" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-rsem" # Update and uncomment push block below after setting up quay repo + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the RSEM image and push to GCR +where: + -h|--help Show help text + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/rsem/docker_versions.tsv b/dockers/skylab/rsem/docker_versions.tsv new file mode 100644 index 0000000000..6ab703c6a7 --- /dev/null +++ b/dockers/skylab/rsem/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024 diff --git a/dockers/skylab/samtools/Dockerfile b/dockers/skylab/samtools/Dockerfile deleted file mode 100644 index e684fd2c02..0000000000 --- a/dockers/skylab/samtools/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM ubuntu:16.04 - -LABEL maintainer="Ambrose J. Carr " \ - software="samtools" \ - version="1.6" \ - description="processing sequence alignments in SAM and BAM formats" \ - website="https://samtools.github.io" - -RUN apt update && \ - apt install -y \ - wget \ - bzip2 \ - g++ \ - cmake \ - curl \ - libncurses5-dev \ - zlib1g-dev \ - libbz2-dev \ - zip \ - unzip \ - liblzma-dev \ - openssl \ - libcurl4-openssl-dev \ - libssl-dev - -WORKDIR /usr/local/samtools -ADD https://github.com/samtools/samtools/releases/download/1.6/samtools-1.6.tar.bz2 . - -RUN tar -xvf samtools-1.6.tar.bz2 && \ - rm samtools-1.6.tar.bz2 && \ - cd samtools-1.6 && \ - ./configure --prefix=/usr && \ - make && \ - make install diff --git a/dockers/skylab/snap-breakout/Dockerfile b/dockers/skylab/snap-breakout/Dockerfile deleted file mode 100644 index 43e4b0eba8..0000000000 --- a/dockers/skylab/snap-breakout/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM python:3.6.2 - -LABEL maintainer="Lantern Team " \ - software="python for breakout snap step" \ - description="python for exporting snap files into csv" - -RUN pip install \ - pandas==0.20.3 \ - h5py==2.9.0 - -RUN mkdir /tools/ -COPY breakoutSnap.py /tools/ -ENV PATH=/tools/:$PATH diff --git a/dockers/skylab/snap-breakout/build.sh b/dockers/skylab/snap-breakout/build.sh deleted file mode 100755 index 35241cb16b..0000000000 --- a/dockers/skylab/snap-breakout/build.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -docker build -t quay.io/humancellatlas/snap-breakout:0.0.1 . -docker push quay.io/humancellatlas/snap-breakout:0.0.1 diff --git a/dockers/skylab/snaptools-bwa/Dockerfile b/dockers/skylab/snaptools-bwa/Dockerfile new file mode 100644 index 0000000000..3c6ffcef30 --- /dev/null +++ b/dockers/skylab/snaptools-bwa/Dockerfile @@ -0,0 +1,39 @@ +FROM --platform=linux/amd64 python:3.7.2 + +ARG BWA_VERSION=0.7.17 + +ENV TERM=xterm-256color \ + BWA_URL=https://sourceforge.net/projects/bio-bwa/files/bwa-${BWA_VERSION}.tar.bz2/download \ + TINI_VERSION=v0.19.0 \ + PATH=$PATH:/usr/gitc + +LABEL MAINTAINER="Broad Institute DSDE ---` + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602 +$ docker inspect us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602 +``` + +## Usage + +### BWA + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602 \ + bwa +``` + +### Snaptools + +See Snaptools GitHub for [more info](https://github.com/r3fang/SnapTools). \ No newline at end of file diff --git a/dockers/skylab/snaptools-bwa/docker_build.sh b/dockers/skylab/snaptools-bwa/docker_build.sh new file mode 100755 index 0000000000..64c743f016 --- /dev/null +++ b/dockers/skylab/snaptools-bwa/docker_build.sh @@ -0,0 +1,76 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/snaptools-bwa" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-snaptools-bwa" # Update and uncomment push block below after setting up quay repo + +# BWA version +BWA_VERSION=0.7.17 + +# Snaptools version +SNAPTOOLS_VERSION=1.4.8 + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-t|tools] -- script to build the snaptools-bwa image and push to GCR +where: + -h|--help Show help text + -s|--snaptools_version Version of snaptools to use (default: SNAPTOOLS_VERSION=$SNAPTOOLS_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -h|--help) + echo "$HELP" + exit 0 + ;; + -s|--snaptools_version) + SNAPTOOLS_VERSION="$2" + shift + shift + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$SNAPTOOLS_VERSION-$BWA_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg SNAPTOOLS_VERSION="$SNAPTOOLS_VERSION" \ + --build-arg BWA_VERSION="$BWA_VERSION" \ + --no-cache "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/snaptools-bwa/docker_versions.tsv b/dockers/skylab/snaptools-bwa/docker_versions.tsv new file mode 100644 index 0000000000..ab9d94cae2 --- /dev/null +++ b/dockers/skylab/snaptools-bwa/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602 diff --git a/dockers/skylab/snaptools/Dockerfile b/dockers/skylab/snaptools/Dockerfile deleted file mode 100644 index 76c0514981..0000000000 --- a/dockers/skylab/snaptools/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM ubuntu:18.04 - -# Install system requirements -RUN apt update && \ - apt install -y git python python-pip zlib1g zlib1g-dev wget - -# Install SnapTools -RUN mkdir /install && \ - cd /install && \ - git clone https://github.com/r3fang/SnapTools.git && \ - cd SnapTools && \ - pip install . - -RUN cd /install && \ - wget -O "bwa-0.7.17.tar.bz2" "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.17.tar.bz2/download" && \ - tar xvjf bwa-0.7.17.tar.bz2 && \ - cd bwa-0.7.17 && \ - make && \ - mkdir /tools/ && \ - cp bwa /tools/ - -# This fixes an error in the code of snaptools that -# references an absolute path for the python binary -RUN mkdir -p /home/r3fang/anaconda2/bin/ && \ - ln -s `which python` /home/r3fang/anaconda2/bin/python - -ENV PATH /tools/:$PATH - -COPY makeCompliantBAM.py /tools/ diff --git a/dockers/skylab/snaptools/build.sh b/dockers/skylab/snaptools/build.sh deleted file mode 100755 index 3b638a19f3..0000000000 --- a/dockers/skylab/snaptools/build.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -docker build . -t quay.io/humancellatlas/snaptools:0.0.1 - -echo You can now push to quay.io using the following command -echo docker push quay.io/humancellatlas/snaptools:0.0.1 diff --git a/dockers/skylab/snss2-build-indices/Dockerfile b/dockers/skylab/snss2-build-indices/Dockerfile deleted file mode 100644 index 78860c3186..0000000000 --- a/dockers/skylab/snss2-build-indices/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -FROM python:3.6.2 - -LABEL maintainer="Farzaneh Khajouei " \ - software="subread package" \ - version="2.0.2" \ - description="RNA-seq high-performance read alignment, quantification and mutation discovery" \ - website="http://subread.sourceforge.net/" - -# Install compiler -RUN apt-get update --fix-missing && apt-get install -y \ - wget - -COPY requirements.txt . -RUN pip3 install -r requirements.txt - -# Install subread -WORKDIR /usr/local/ -RUN wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.10a.tar.gz && \ - tar -xf 2.7.10a.tar.gz -RUN chmod +x /usr/local/STAR-2.7.10a/bin/Linux_x86_64_static/STAR -ENV PATH /usr/local/STAR-2.7.10a/bin/Linux_x86_64_static/:$PATH - - -# copy the script that removes alignments spanning intron-exon junctions -RUN mkdir /script -WORKDIR /script -COPY add-introns-to-gtf.py . -COPY modify_gtf.py . -ENV PATH /script/:$PATH diff --git a/dockers/skylab/star/Dockerfile b/dockers/skylab/star/Dockerfile index 1bc8cb2459..377cb3e4fb 100644 --- a/dockers/skylab/star/Dockerfile +++ b/dockers/skylab/star/Dockerfile @@ -1,38 +1,33 @@ -FROM ubuntu:16.04 -LABEL maintainer="Kylee Degatano "\ - software="STAR" \ - version="2.7.9a" \ - description="RNA-seq aligner" \ - website="https://github.com/alexdobin/STAR" -RUN mkdir build -WORKDIR build -# install additional python packages -#Install wget, unzip -RUN apt update && apt install -y \ - liblzma-dev \ - libbz2-dev \ - cmake automake \ - curl \ - libboost-all-dev \ - wget \ - build-essential \ - gcc-multilib \ - zlib1g-dev \ - libxml2-dev \ - libncurses5-dev \ - r-base \ - r-base-core \ - r-base-dev -WORKDIR /usr/local/ +FROM --platform=linux/amd64 alpine:latest -RUN wget https://github.com/alexdobin/STAR/archive/refs/tags/2.7.9a.tar.gz && \ - tar -xf 2.7.9a.tar.gz -RUN chmod +x /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/STAR -ENV PATH /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/:$PATH -RUN cp /usr/local/STAR-2.7.9a/bin/Linux_x86_64_static/STAR /usr/local/bin +ARG STAR_VERSION=2.7.9a +ENV TERM=xterm-256color \ + STAR_URL=https://github.com/alexdobin/STAR/archive/${STAR_VERSION}.tar.gz -WORKDIR / -RUN rm -rf /build -COPY create-npz-output.py . +LABEL MAINTAINER="Broad Institute DSDE --` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187 +$ docker inspect us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187 +``` + +## Usage + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658334187 STAR +``` \ No newline at end of file diff --git a/dockers/skylab/star/docker_build.sh b/dockers/skylab/star/docker_build.sh new file mode 100644 index 0000000000..02e1ba8933 --- /dev/null +++ b/dockers/skylab/star/docker_build.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/star" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-star" # Update and uncomment push block below after setting up quay repo + +# STAR version +STAR_VERSION="2.7.9a" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the star image and push to GCR +where: + -h|--help Show help text + -v|--version Version of STAR to use (default: $STAR_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -v|--version) + STAR_VERSION="$2" + shift + shift + ;; + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$STAR_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg STAR_VERSION="$STAR_VERSION" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/star/docker_versions.tsv b/dockers/skylab/star/docker_versions.tsv new file mode 100644 index 0000000000..14ec79567f --- /dev/null +++ b/dockers/skylab/star/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884 diff --git a/dockers/skylab/subread/Dockerfile b/dockers/skylab/subread/Dockerfile index 99473a24a7..bb4a7909fb 100644 --- a/dockers/skylab/subread/Dockerfile +++ b/dockers/skylab/subread/Dockerfile @@ -1,34 +1,38 @@ -FROM ubuntu:16.04 -LABEL maintainer=" Jishu Xu " \ - software="subread package" \ - version="1.6.0" \ - description="RNA-seq high-performance read alignment, quantification and mutation discovery" \ - website="http://subread.sourceforge.net/" +FROM --platform=linux/amd64 python:3.6.2 -# Install compiler -RUN apt-get update --fix-missing && apt-get install -y \ - build-essential \ - gcc-multilib \ - apt-utils \ - zlib1g-dev \ - libxml2-dev \ - curl \ - wget \ - libbz2-dev \ - cmake automake \ - libboost-all-dev \ - libncurses5-dev \ - r-base \ - r-base-core \ - r-base-dev - -# Install subread -WORKDIR /usr/local/ -ENV VERSION="1.6.0" -RUN wget "https://downloads.sourceforge.net/project/subread/subread-${VERSION}/subread-${VERSION}-source.tar.gz" -RUN tar -xzvf subread-${VERSION}-source.tar.gz -WORKDIR /usr/local/subread-${VERSION}-source/src -RUN make -f Makefile.Linux -ENV PATH /usr/local/subread-${VERSION}-source/bin/:$PATH -# Cleanup -RUN apt-get clean +ARG SUBREAD_VERSION="2.0.1" + +ENV TERM=xterm-256color \ + SUBREAD_URL="https://downloads.sourceforge.net/project/subread/subread-${SUBREAD_VERSION}/subread-${SUBREAD_VERSION}-source.tar.gz" \ + TINI_VERSION=v0.19.0 \ + PATH=$PATH:/usr/gitc/subread-${SUBREAD_VERSION}-source/bin + +LABEL MAINTANER="Broad Institute DSDE " \ + SUBREAD_VERSION=${SUBREAD_VERSION} + +WORKDIR /usr/gitc + +COPY requirements.txt . +COPY remove-reads-on-junctions.py . + +RUN set -eux; \ + apt-get update --fix-missing; \ + apt-get install -y wget; \ + python3 -m pip install --upgrade pip; \ + pip3 install -r requirements.txt \ + ; \ +# Install subread + wget --no-check-certificate ${SUBREAD_URL}; \ + tar -xzvf subread-${SUBREAD_VERSION}-source.tar.gz; \ + cd /usr/gitc/subread-${SUBREAD_VERSION}-source/src; \ + make -f Makefile.Linux; \ +# Install TINI \ + cd /usr/gitc; \ + mkdir temp; \ + cd temp; \ + wget https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -O /sbin/tini; \ + chmod +x /sbin/tini; \ + rm -r /usr/gitc/temp; + +# Set tini as default entrypoint +ENTRYPOINT ["/sbin/tini", "--" ] \ No newline at end of file diff --git a/dockers/skylab/subread/README.md b/dockers/skylab/subread/README.md new file mode 100644 index 0000000000..9f6bdd8986 --- /dev/null +++ b/dockers/skylab/subread/README.md @@ -0,0 +1,38 @@ +# Subread + +## Quick reference + +Copy and paste to pull this image + +#### `docker pull us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537` + + +- __What is this image:__ This image is a lightweight alpine-based custom image for running the Subread sequencing data processing suite. +- __What is Subread:__ Subread contains a suite of high-performance software programs for processing next-generation sequencing data, [more info](http://subread.sourceforge.net). +- __How to see tool version used in image:__ Please see below. + +## Versioning + +Subread uses the following convention for versioning: + +#### `us.gcr.io/broad-gotc-prod/subread:--` + + +We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. + +You can see more information about the image, including the tool versions, by running the following command: + +```bash +$ docker pull us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537 +$ docker inspect us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537 +``` + +## Usage + +This image contains several tools. See [here](http://subread.sourceforge.net/subread.html) for a sample workflow. +To show the `featureCounts` help page, for example: + +```bash +$ docker run --rm -it \ + us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537 featureCounts +``` \ No newline at end of file diff --git a/dockers/skylab/subread/docker_build.sh b/dockers/skylab/subread/docker_build.sh new file mode 100644 index 0000000000..7b00a973fd --- /dev/null +++ b/dockers/skylab/subread/docker_build.sh @@ -0,0 +1,71 @@ +#!/bin/bash +set -e + +# Update version when changes to Dockerfile are made +DOCKER_IMAGE_VERSION=1.0.0 +TIMESTAMP=$(date +"%s") +DIR=$(cd "$(dirname "$0")" && pwd) + +# Registries and tags +GCR_URL="us.gcr.io/broad-gotc-prod/subread" +#QUAY_URL="quay.io/broadinstitute/gotc-prod-subread" # Update and uncomment push block below after setting up quay repo + +# STAR version +SUBREAD_VERSION="2.0.1" + +# Necessary tools and help text +TOOLS=(docker gcloud) +HELP="$(basename "$0") [-h|--help] [-v|--version] [-t|tools] -- script to build the subread image and push to GCR +where: + -h|--help Show help text + -v|--version Version of subread to use (default: $SUBREAD_VERSION) + -t|--tools Show tools needed to run script + " + +function main(){ + for t in "${TOOLS[@]}"; do which "$t" >/dev/null || ok=no; done + if [[ $ok == no ]]; then + echo "Missing one of the following tools: " + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 1 + fi + + while [[ $# -gt 0 ]] + do + key="$1" + case $key in + -v|--version) + SUBREAD_VERSION="$2" + shift + shift + ;; + -h|--help) + echo "$HELP" + exit 0 + ;; + -t|--tools) + for t in "${TOOLS[@]}"; do echo "$t"; done + exit 0 + ;; + *) + shift + ;; + esac + done + + IMAGE_TAG="$DOCKER_IMAGE_VERSION-$SUBREAD_VERSION-$TIMESTAMP" + + echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" + docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \ + --build-arg SUBREAD_VERSION="$SUBREAD_VERSION" "$DIR" + docker push "$GCR_URL:$IMAGE_TAG" + +# echo "tagging and pushing Quay Image" +# docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" +# docker push "$QUAY_URL:$IMAGE_TAG" + + echo -e "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" + echo "done" +} + +main "$@" \ No newline at end of file diff --git a/dockers/skylab/subread/docker_versions.tsv b/dockers/skylab/subread/docker_versions.tsv new file mode 100644 index 0000000000..54ff55e7a6 --- /dev/null +++ b/dockers/skylab/subread/docker_versions.tsv @@ -0,0 +1,2 @@ +DOCKER_VERSION +us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537 diff --git a/dockers/skylab/featureCounts/remove-reads-on-junctions.py b/dockers/skylab/subread/remove-reads-on-junctions.py similarity index 100% rename from dockers/skylab/featureCounts/remove-reads-on-junctions.py rename to dockers/skylab/subread/remove-reads-on-junctions.py diff --git a/dockers/skylab/snss2-build-indices/requirements.txt b/dockers/skylab/subread/requirements.txt similarity index 100% rename from dockers/skylab/snss2-build-indices/requirements.txt rename to dockers/skylab/subread/requirements.txt diff --git a/dockers/skylab/subset-fastq-dataset/Dockerfile b/dockers/skylab/subset-fastq-dataset/Dockerfile deleted file mode 100644 index a22a8c12a0..0000000000 --- a/dockers/skylab/subset-fastq-dataset/Dockerfile +++ /dev/null @@ -1,36 +0,0 @@ -FROM python:3.7.2 - -LABEL maintainer="Mint Team " \ - software="python 3.6.2" \ - description="python 3.6.2 with pysam, sctools, requests, and a basic science stack" - -RUN pip3 install \ - Click==7.0 \ - numpy==1.16.2 \ - pysam==0.15.2 \ - biopython==1.73 - -## Install software -RUN apt-get update && \ - apt-get install -y lsb-release && \ - export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \ - echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ - apt-get update && \ - apt-get install -y google-cloud-sdk - -## Install latest samtools from source -RUN mkdir /tools && \ - cd /tools && \ - wget https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 && tar xvjf samtools-1.9.tar.bz2 && \ - cd samtools-1.9 && \ - ./configure && \ - make -j 4 && \ - cp samtools .. - -## Append tools to path -ENV PATH=/tools/:${PATH} - -## Copy Scripts -COPY filterFastqByReadName.py /tools/ - diff --git a/dockers/skylab/subset-fastq-dataset/build.sh b/dockers/skylab/subset-fastq-dataset/build.sh deleted file mode 100755 index 73a71beb41..0000000000 --- a/dockers/skylab/subset-fastq-dataset/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -tag=$1 - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build -t quay.io/humancellatlas/secondary-analysis-subset-fastq:$tag . - -echo You can now push with docker push quay.io/humancellatlas/secondary-analysis-subset-fastq:$tag diff --git a/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py b/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py deleted file mode 100755 index dafc25497a..0000000000 --- a/dockers/skylab/subset-fastq-dataset/filterFastqByReadName.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -import click -import sys -from Bio import SeqIO -import gzip - -@click.command() -@click.option('--in-fastq-gz', help='gz compressed input fastq file') -@click.option('--out-fastq-gz', help='gz compressed output fastq file') -@click.option('--keep-reads', help='gz compressed file with read names to keep') -@click.option('--verbose', help='verbose', default=False, is_flag=True, flag_value=True) -def filter_by_read_name(in_fastq_gz, out_fastq_gz, keep_reads, verbose): - # Put reads that are in the keep list in the output file - keep_read_set = set(line.decode('ascii').rstrip() for line in open(keep_reads, 'rb')) - if verbose: - print('Done loading keep read list', file=sys.stderr) - - # Loop over input and filter reads - with gzip.open(out_fastq_gz, 'wt') as output_file: - with gzip.open(in_fastq_gz, 'rt') as input_file: - counter = 0 - for rec in SeqIO.parse(input_file, 'fastq'): - counter += 1 - if verbose and counter % 1e5 == 0: - print('Processed {} reads'.format(counter)) - if rec.id in keep_read_set: - SeqIO.write(rec, output_file, 'fastq') - - # Print completed message - if verbose: - print('Completed', file=sys.stderr) - - -if __name__ == '__main__': - filter_by_read_name() diff --git a/dockers/skylab/trim_adapters/Dockerfile b/dockers/skylab/trim_adapters/Dockerfile deleted file mode 100644 index 76f349fc02..0000000000 --- a/dockers/skylab/trim_adapters/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM ubuntu:16.04 -LABEL software="ea-utils package" \ -      version="1.04.807" \ -      description="Command-line tools for processing biological sequencing data. Barcode demultiplexing, adapter trimming, etc. Primarily written to support an Illumina based pipeline - but should work with any FASTQs." \ -      website="http://expressionanalysis.github.io/ea-utils/" - -# Install compiler  -RUN apt update && apt install -y \ - zlib1g-dev \ - cmake \ - automake \ - wget \ - build-essential \ - libgsl0-dev - -# Install ea-utils -WORKDIR /usr/local/ -RUN wget "https://github.com/ExpressionAnalysis/ea-utils/archive/refs/tags/1.04.807.tar.gz" -RUN tar -xzvf 1.04.807.tar.gz -WORKDIR /usr/local/ea-utils-1.04.807/clipper -RUN make -RUN make install -ENV PATH /usr/local/ea-utils-1.04.807/clipper/:$PATH -# Cleanup -RUN apt-get clean diff --git a/dockers/skylab/trim_adapters/build.sh b/dockers/skylab/trim_adapters/build.sh deleted file mode 100644 index 85893f42e7..0000000000 --- a/dockers/skylab/trim_adapters/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -tag=$1 - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build -t quay.io/humancellatlas/snss2-trim-adapters:$tag . - -echo "You can now push with docker push quay.io/humancellatlas/snss2-trim-adapters:$tag" diff --git a/dockers/skylab/umi-tools/Dockerfile b/dockers/skylab/umi-tools/Dockerfile deleted file mode 100644 index ce530e864b..0000000000 --- a/dockers/skylab/umi-tools/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -FROM python:3.6.2 - -LABEL maintainer="Nick Barkas " \ - software="umi_tools" \ - version="0.5.5" \ - description="tools for extraction correction, deduplication and counting of UMIs" \ - website="https://github.com/CGATOxford/UMI-tools" - -RUN git clone https://github.com/CGATOxford/UMI-tools.git -WORKDIR UMI-tools -RUN git checkout tags/0.5.5 -RUN pip install . - -RUN mkdir /root/tools -COPY getUntaggedReads /root/tools - -ENV PATH="/root/tools/:$PATH" - -RUN apt update && \ - apt install -y \ - wget \ - bzip2 \ - g++ \ - cmake \ - curl \ - libncurses5-dev \ - zlib1g-dev \ - libbz2-dev \ - zip \ - unzip \ - liblzma-dev \ - openssl \ - libcurl4-openssl-dev \ - libssl-dev - -WORKDIR /usr/local/samtools -ADD https://github.com/samtools/samtools/releases/download/1.6/samtools-1.6.tar.bz2 . - -RUN tar -xvf samtools-1.6.tar.bz2 && \ - rm samtools-1.6.tar.bz2 && \ - cd samtools-1.6 && \ - ./configure --prefix=/usr && \ - make && \ - make install \ No newline at end of file diff --git a/dockers/skylab/umi-tools/build.sh b/dockers/skylab/umi-tools/build.sh deleted file mode 100755 index 2f3be5936c..0000000000 --- a/dockers/skylab/umi-tools/build.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -tag=$1 - -if [ -z $tag ]; then - echo -e "\nYou must provide a tag" - echo -e "\nUsage: bash build_docker.sh TAG\n" - exit 1 -fi - -docker build -t quay.io/humancellatlas/secondary-analysis-umitools:$tag . - -echo You can now push with -echo docker push quay.io/humancellatlas/secondary-analysis-umitools:$tag \ No newline at end of file diff --git a/dockers/skylab/umi-tools/getUntaggedReads b/dockers/skylab/umi-tools/getUntaggedReads deleted file mode 100755 index 978071b832..0000000000 --- a/dockers/skylab/umi-tools/getUntaggedReads +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -import argparse -import pysam - -parser = argparse.ArgumentParser(description="Extract all the reads from a bam file that do not have CB or GE tags") -parser.add_argument('--in-bam-file', dest='inbam', help='input bam file') -parser.add_argument('--out-bam-file', dest='outbam', help='output bam file') - -args = parser.parse_args() - - -def getUntaggedReads(in_bam_file,out_bam_file,verbose=True): - inbam=pysam.AlignmentFile(in_bam_file,'rb') - outbam=pysam.AlignmentFile(out_bam_file,'wb',template=inbam) - counter=0 - for read in inbam: - counter+=1 - try: - GE_tag=read.get_tag('GE'); - CB_tag=read.get_tag('CB'); - except KeyError: - outbam.write(read) - pass - if(counter % 1e5 == 0): - print('Processed {} reads'.format(counter)); - outbam.close() - inbam.close() - -getUntaggedReads(args.inbam,args.outbam) diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md index 8889d65e0f..9af896d368 100644 --- a/pipelines/broad/arrays/imputation/Imputation.changelog.md +++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md @@ -1,3 +1,24 @@ +# 1.1.5 +2022-09-30 (Date of Last Commit) + +* Updated BCFTools/VCFTools and Minimac4 Docker images to fix vulnerabilities. +* Updated tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.1.4 +2022-08-23 (Date of Last Commit) + +* Updated BCFTools/VCFTools docker image + +# 1.1.3 +2022-08-03 (Date of Last Commit) + +* Updated BCFTools/VCFTools Minimac4 Docker images + +# 1.1.2 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 1.1.1 2022-06-01 (Date of Last Commit) diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl index d35b91fde5..7e0490fd04 100644 --- a/pipelines/broad/arrays/imputation/Imputation.wdl +++ b/pipelines/broad/arrays/imputation/Imputation.wdl @@ -6,7 +6,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow Imputation { - String pipeline_version = "1.1.1" + String pipeline_version = "1.1.5" input { Int chunkLength = 25000000 diff --git a/pipelines/broad/arrays/single_sample/Arrays.changelog.md b/pipelines/broad/arrays/single_sample/Arrays.changelog.md index 3dffc24897..546e8621a6 100644 --- a/pipelines/broad/arrays/single_sample/Arrays.changelog.md +++ b/pipelines/broad/arrays/single_sample/Arrays.changelog.md @@ -1,3 +1,16 @@ +# 2.6.15 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated tasks FormatArraysOutputs and IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 2.6.14 +2022-09-07 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline +* Updated task FormatArraysOutputs in InternalArrraysTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + # 2.6.13 2022-06-21 (Date of Last Commit) diff --git a/pipelines/broad/arrays/single_sample/Arrays.wdl b/pipelines/broad/arrays/single_sample/Arrays.wdl index 5145beef60..c3654c3819 100644 --- a/pipelines/broad/arrays/single_sample/Arrays.wdl +++ b/pipelines/broad/arrays/single_sample/Arrays.wdl @@ -23,7 +23,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow Arrays { - String pipeline_version = "2.6.13" + String pipeline_version = "2.6.15" input { String chip_well_barcode diff --git a/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md b/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md index 50f5c41634..5f5c743439 100644 --- a/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md +++ b/pipelines/broad/arrays/validate_chip/ValidateChip.changelog.md @@ -1,3 +1,9 @@ +# 1.16.0 +2022-09-30 (Date of Last Commit) + +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + # 1.15.9 2022-06-15 (Date of Last Commit) diff --git a/pipelines/broad/arrays/validate_chip/ValidateChip.wdl b/pipelines/broad/arrays/validate_chip/ValidateChip.wdl index 4798126a05..79a05c1e72 100644 --- a/pipelines/broad/arrays/validate_chip/ValidateChip.wdl +++ b/pipelines/broad/arrays/validate_chip/ValidateChip.wdl @@ -21,7 +21,7 @@ import "../../../../tasks/broad/InternalArraysTasks.wdl" as InternalTasks workflow ValidateChip { - String pipeline_version = "1.15.9" + String pipeline_version = "1.16.0" input { String sample_alias diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md index 913a5195d9..f3fcb9c7d0 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md @@ -1,3 +1,13 @@ +# 3.1.7 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 3.1.6 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 3.1.5 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl index 1cf517f75d..959652fab1 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl @@ -39,7 +39,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" # WORKFLOW DEFINITION workflow ExomeGermlineSingleSample { - String pipeline_version = "3.1.5" + String pipeline_version = "3.1.7" input { diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md index fba658513f..73ccc94bf2 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.changelog.md @@ -1,3 +1,20 @@ +# 1.0.4 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.3 +2022-09-20 (Date of Last Commit) + +* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks. + +# 1.0.2 +2022-09-07 (Date of Last Commit) + +* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + # 1.0.1 2022-06-21 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl index 3e750bf3a1..478f80f3e2 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl @@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline { filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs" } - String pipeline_version = "1.0.1" + String pipeline_version = "1.0.4" References references = alignment_references.references diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md index 719fd7acf9..05a43b8916 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md @@ -1,3 +1,13 @@ +# 3.1.8 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 3.1.7 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 3.1.6 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl index 1b52023908..92df16f290 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl @@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" workflow WholeGenomeGermlineSingleSample { - String pipeline_version = "3.1.6" + String pipeline_version = "3.1.8" input { diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json deleted file mode 100644 index 6812111404..0000000000 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_best_results.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": { - "sample_name": "NA12878 PLUMBING", - "base_file_name": "NA12878_PLUMBING", - "flowcell_unmapped_bams": [ - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" - ], - "final_gvcf_base_name": "NA12878_PLUMBING", - "unmapped_bam_suffix": ".bam" - }, - - "WholeGenomeGermlineSingleSample.references": { - "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD", - "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed", - "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu", - "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", - "reference_fasta" : { - "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", - "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt", - "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa", - "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb", - "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt", - "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann", - "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac", - "ref_str": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.str" - }, - "known_indels_sites_vcfs": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz" - ], - "known_indels_sites_indices": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi" - ], - "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf", - "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx", - "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list", - "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt" - }, - "WholeGenomeGermlineSingleSample.dragmap_reference": { - "reference_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/reference.bin", - "hash_table_cfg_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cfg.bin", - "hash_table_cmp": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cmp" - }, - - "WholeGenomeGermlineSingleSample.scatter_settings": { - "haplotype_scatter_count": 10, - "break_bands_at_multiples_of": 100000 - }, - - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz", - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi", - "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", - - "WholeGenomeGermlineSingleSample.papi_settings": { - "preemptible_tries": 3, - "agg_preemptible_tries": 3 - }, - - "WholeGenomeGermlineSingleSample.dragen_maximum_quality_mode": true -} diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json deleted file mode 100644 index 8b6fbb39d8..0000000000 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.dragen_mode_functional_equivalence.json +++ /dev/null @@ -1,65 +0,0 @@ -{ - "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": { - "sample_name": "NA12878 PLUMBING", - "base_file_name": "NA12878_PLUMBING", - "flowcell_unmapped_bams": [ - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" - ], - "final_gvcf_base_name": "NA12878_PLUMBING", - "unmapped_bam_suffix": ".bam" - }, - - "WholeGenomeGermlineSingleSample.references": { - "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD", - "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed", - "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu", - "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", - "reference_fasta" : { - "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", - "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt", - "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa", - "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb", - "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt", - "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann", - "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac", - "ref_str": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.str" - }, - "known_indels_sites_vcfs": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz" - ], - "known_indels_sites_indices": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi" - ], - "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf", - "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx", - "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list", - "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt" - }, - "WholeGenomeGermlineSingleSample.dragmap_reference": { - "reference_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/reference.bin", - "hash_table_cfg_bin": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cfg.bin", - "hash_table_cmp": "gs://gcp-public-data--broad-references/hg38/v0/dragen_reference/hash_table.cmp" - }, - - "WholeGenomeGermlineSingleSample.scatter_settings": { - "haplotype_scatter_count": 10, - "break_bands_at_multiples_of": 100000 - }, - - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz", - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi", - "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", - - "WholeGenomeGermlineSingleSample.papi_settings": { - "preemptible_tries": 3, - "agg_preemptible_tries": 3 - }, - - "WholeGenomeGermlineSingleSample.dragen_functional_equivalence_mode": true -} diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json deleted file mode 100644 index 3be62aa814..0000000000 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": { - "sample_name": "NA12878 PLUMBING", - "base_file_name": "NA12878_PLUMBING", - "flowcell_unmapped_bams": [ - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", - "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" - ], - "final_gvcf_base_name": "NA12878_PLUMBING", - "unmapped_bam_suffix": ".bam" - }, - - "WholeGenomeGermlineSingleSample.references": { - "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD", - "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed", - "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu", - "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", - "reference_fasta" : { - "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", - "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt", - "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa", - "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb", - "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt", - "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann", - "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac" - }, - "known_indels_sites_vcfs": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz" - ], - "known_indels_sites_indices": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi" - ], - "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf", - "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx", - "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list", - "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt" - }, - - "WholeGenomeGermlineSingleSample.scatter_settings": { - "haplotype_scatter_count": 10, - "break_bands_at_multiples_of": 100000 - }, - - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz", - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/single_sample/plumbing/bams/G96830.NA12878/G96830.NA12878.hg38.reference.fingerprint.vcf.gz.tbi", - "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", - - "WholeGenomeGermlineSingleSample.papi_settings": { - "preemptible_tries": 3, - "agg_preemptible_tries": 3 - } -} diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json index 44ddf82824..772ee521b8 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/test_inputs/Plumbing/G96830.NA12878.json @@ -46,8 +46,8 @@ "break_bands_at_multiples_of": 100000 }, - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz", - "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz.tbi", + "WholeGenomeGermlineSingleSample.fingerprint_genotypes_file": "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz", + "WholeGenomeGermlineSingleSample.fingerprint_genotypes_index": "gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/NA12878_PLUMBING.hg38.reference.fingerprint.vcf.gz.tbi", "WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md index b4328759e7..2f4fe5a840 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md +++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md @@ -1,3 +1,13 @@ +# 2.1.7 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 2.1.6 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 2.1.5 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl index 08e191217c..6546254903 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl +++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl @@ -9,7 +9,7 @@ import "../../../../../tasks/broad/DragenTasks.wdl" as DragenTasks workflow VariantCalling { - String pipeline_version = "2.1.5" + String pipeline_version = "2.1.7" input { diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md index 8b972ff8fe..4a437c2ae8 100644 --- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md +++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.changelog.md @@ -1,3 +1,20 @@ +# 1.0.4 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.3 +2022-09-20 (Date of Last Commit) + +* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks. + +# 1.0.2 +2022-09-07 (Date of Last Commit) + +* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + # 1.0.1 2022-06-21 (Date of Last Commit) diff --git a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl index fdc89ac17e..17a5a96053 100644 --- a/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl +++ b/pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl @@ -43,7 +43,7 @@ workflow UltimaGenomicsWholeGenomeCramOnly { save_bam_file: "If true, then save intermeidate ouputs used by germline pipeline (such as the output BAM) otherwise they won't be kept as outputs." } - String pipeline_version = "1.0.1" + String pipeline_version = "1.0.4" References references = alignment_references.references diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md index cc8649a5b8..f81da4b687 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md @@ -1,3 +1,40 @@ +# 1.1.1 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python, BCFTools/VCFTools, and Minimac4 Docker images to fix vulnerabilities. +* Updated tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.1.0 +2022-09-20 (Date of Last Commit) + +* Updated call to IngestOutputsToTDR to remove 'prefix_column'. Python script has been updated and not longer requires this input parameter. +* Update task IngestOutputsToTDR to not require 'prefix_column'. Python script has been updated and not longer requires this input parameter. + +* Update task FormatImputationOutputs with new docker tag. +* Update task FormatImputationWideOutputs with new docker tag. +* Update task IngestOutputsToTDR with new docker tag. +* Update tasks FormatImputationOutputs, FormatImputationWideOutputs, and IngestOutputsToTDR with GCR image instead of DockerHub image. + +# 1.0.9 +2022-08-29 (Date of Last Commit) + +* Updated call to IngestOutputsToTDR to pass in column names to be used for user action in command block. Python script in task was updated to a new version containing a new required command line parameter, 'prefix_column' + +# 1.0.8 +2022-08-23 (Date of Last Commit) + +* Updated BCFTools/VCFTools docker image + +# 1.0.7 +2022-08-03 (Date of Last Commit) + +* Updated BCFTools/VCFTools Minimac4 Docker images + +# 1.0.6 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 1.0.5 2022-06-10 (Date of Last Commit) diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl index ed50f8b2ce..89b4714161 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl @@ -8,7 +8,7 @@ workflow BroadInternalImputation { meta { description: "Push outputs of Imputation.wdl to TDR dataset table ImputationOutputsTable and split out Imputation arrays into ImputationWideOutputsTable." } - String pipeline_version = "1.0.5" + String pipeline_version = "1.1.1" input { # inputs to wrapper task diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md index bc31e72c5e..ef3909e77e 100644 --- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md +++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.changelog.md @@ -1,3 +1,29 @@ +# 1.0.7 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated tasks FormatArraysOutputs and IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.6 +2022-09-20 (Date of Last Commit) + +* Updated call to IngestOutputsToTDR to remove 'prefix_column'. Python script has been updated and not longer requires this input parameter. +* Update task IngestOutputsToTDR to not require 'prefix_column'. Python script has been updated and not longer requires this input parameter. + +* Update task FormatArraysOutputs with new docker tag. +* Update task IngestOutputsToTDR with new docker tag. +* Update tasks FormatArraysOutputs and IngestOutputsToTDR with GCR image instead of DockerHub image. + +# 1.0.5 +2022-08-29 (Date of Last Commit) + +* Updated call to IngestOutputsToTDR to pass in column names to be used for user action in command block. Python script in task was updated to a new version containing a new required command line parameter, 'prefix_column' + +# 1.0.4 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 1.0.3 2022-06-21 (Date of Last Commit) diff --git a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl index 68a52d1e4d..062a3f5dac 100644 --- a/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl +++ b/pipelines/broad/internal/arrays/single_sample/BroadInternalArrays.wdl @@ -9,7 +9,7 @@ workflow BroadInternalArrays { description: "Push outputs of Arrays.wdl to TDR dataset table ArraysOutputsTable." } - String pipeline_version = "1.0.3" + String pipeline_version = "1.0.7" input { # inputs to wrapper task diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md index 0a1ba1167e..77c154b608 100644 --- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md +++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.changelog.md @@ -1,3 +1,20 @@ +# 1.0.5 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.4 +2022-09-20 (Date of Last Commit) + +* Removed /cromwell_root/ prefix for output file paths in FilterVCF and TrainModel tasks. + +# 1.0.3 +2022-09-07 (Date of Last Commit) + +* Increased disk space in the MakeOptionalOutputBam task in Utilities.wdl +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + # 1.0.2 2022-07-07 (Date of Last Commit) diff --git a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl index e9bfea1576..9214d8a103 100644 --- a/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl +++ b/pipelines/broad/internal/dna_seq/germline/single_sample/UltimaGenomics/BroadInternalUltimaGenomics.wdl @@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP workflow BroadInternalUltimaGenomics { - String pipeline_version = "1.0.2" + String pipeline_version = "1.0.5" input { diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md index 5130065c36..c86855ce0b 100644 --- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md +++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md @@ -1,3 +1,21 @@ +# 1.0.18 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.17 +2022-09-07 (Date of Last Commit) + +* Update TDR ingest script task and docker to remove staging bucket, specify timestamp fields, and use merge ingest strategy +* Remove transcriptome bam index from output +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + +# 1.0.16 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 1.0.15 2022-06-21 (Date of Last Commit) diff --git a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl index db9b76b4aa..b2742fb046 100644 --- a/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl +++ b/pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.wdl @@ -7,7 +7,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow BroadInternalRNAWithUMIs { - String pipeline_version = "1.0.15" + String pipeline_version = "1.0.18" input { @@ -32,7 +32,6 @@ workflow BroadInternalRNAWithUMIs { # Terra Data Repo dataset information String? tdr_dataset_uuid String? tdr_sample_id - String? tdr_staging_bucket String environment File vault_token_path @@ -66,8 +65,7 @@ workflow BroadInternalRNAWithUMIs { environment: "The environment (dev or prod) used for determining which service to use to retrieve Mercury fingerprints" vault_token_path: "The path to the vault token used for accessing the Mercury Fingerprint Store" tdr_dataset_uuid: "Optional string used to define the Terra Data Repo (TDR) dataset to which outputs will be ingested" - tdr_sample_id: "Optional string used to identify the sample being processed; this is the primary key in the TDR dataset" - tdr_staging_bucket: "Optional string defining the GCS bucket to use to stage files for loading to TDR; the workspace bucket is recommended" + tdr_sample_id: "Optional string used to identify the sample being processed; this must be the primary key in the TDR dataset" } # make sure either hg19 or hg38 is supplied as reference_build input @@ -129,12 +127,11 @@ workflow BroadInternalRNAWithUMIs { output_basename = RNAWithUMIs.sample_name } - if (defined(tdr_dataset_uuid) && defined(tdr_sample_id) && defined(tdr_staging_bucket)) { + if (defined(tdr_dataset_uuid) && defined(tdr_sample_id)) { call tasks.formatPipelineOutputs { input: sample_id = select_first([tdr_sample_id, ""]), transcriptome_bam = RNAWithUMIs.transcriptome_bam, - transcriptome_bam_index = RNAWithUMIs.transcriptome_bam_index, transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics, output_bam = RNAWithUMIs.output_bam, output_bam_index = RNAWithUMIs.output_bam_index, @@ -166,19 +163,18 @@ workflow BroadInternalRNAWithUMIs { call tasks.updateOutputsInTDR { input: tdr_dataset_uuid = select_first([tdr_dataset_uuid, ""]), - outputs_json = formatPipelineOutputs.pipeline_outputs_json, - sample_id = select_first([tdr_sample_id, ""]), - staging_bucket = select_first([tdr_staging_bucket, ""]) + outputs_json = formatPipelineOutputs.pipeline_outputs_json } } output { File transcriptome_bam = RNAWithUMIs.transcriptome_bam - File transcriptome_bam_index = RNAWithUMIs.transcriptome_bam_index - File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics File output_bam = RNAWithUMIs.output_bam File output_bam_index = RNAWithUMIs.output_bam_index + File duplicate_metrics = RNAWithUMIs.duplicate_metrics + File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics + File rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm File rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts File rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts diff --git a/pipelines/broad/qc/CheckFingerprint.changelog.md b/pipelines/broad/qc/CheckFingerprint.changelog.md index 971143f491..ee17bab988 100644 --- a/pipelines/broad/qc/CheckFingerprint.changelog.md +++ b/pipelines/broad/qc/CheckFingerprint.changelog.md @@ -1,3 +1,15 @@ +# 1.0.9 +2022-09-30 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. +* Updated task IngestOutputsToTDR with GCR images instead of Dockerhub. + +# 1.0.8 +2022-09-07 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline +* Updated task IngestOutputsToTDR in InternalTasks.wdl with new docker tag to accommodate changes for BroadInternalArrays pipeline. Change has no effect on this pipeline. + # 1.0.7 2022-06-16 (Date of Last Commit) diff --git a/pipelines/broad/qc/CheckFingerprint.wdl b/pipelines/broad/qc/CheckFingerprint.wdl index 0078c8cc53..80670abce3 100644 --- a/pipelines/broad/qc/CheckFingerprint.wdl +++ b/pipelines/broad/qc/CheckFingerprint.wdl @@ -24,7 +24,7 @@ import "../../../tasks/broad/Qc.wdl" as Qc workflow CheckFingerprint { - String pipeline_version = "1.0.7" + String pipeline_version = "1.0.9" input { File? input_vcf diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md index c9b7a8dc36..4f9a3bc6d3 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md @@ -1,3 +1,13 @@ +# 3.1.7 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 3.1.6 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 3.1.5 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl index b971fc2043..914e0909d6 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl @@ -7,7 +7,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow ExomeReprocessing { - String pipeline_version = "3.1.5" + String pipeline_version = "3.1.7" input { File? input_cram diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md index 108e4c3091..626256fe10 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md @@ -1,3 +1,13 @@ +# 3.1.9 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 3.1.8 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 3.1.7 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl index 58d4c7fc9c..88df545d86 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl @@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalExomeReprocessing { - String pipeline_version = "3.1.7" + String pipeline_version = "3.1.9" input { diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md index 3e0a437989..2b34773730 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md @@ -1,3 +1,13 @@ +# 2.1.9 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 2.1.8 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 2.1.7 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl index 4e1b65b828..6801a66f46 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalWholeGenomeReprocessing { - String pipeline_version = "2.1.7" + String pipeline_version = "2.1.9" input { File? input_cram diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md index c5466945db..52672aa1ef 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md @@ -1,3 +1,13 @@ +# 3.1.8 +2022-09-23 (Date of Last Commit) + +* Updated Picard-Python Docker image in Utilities.wdl to fix vulnerabilities. + +# 3.1.7 +2022-07-15 (Date of Last Commit) + +* Updated task MakeOptionalOutputBam in Utilities.wdl, this update has no effect on this pipeline + # 3.1.6 2022-07-12 (Date of Last Commit) diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl index df7884245b..e4c17f298c 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow WholeGenomeReprocessing { - String pipeline_version = "3.1.6" + String pipeline_version = "3.1.8" input { File? input_cram diff --git a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md index bc2e5d6d18..682e752a19 100644 --- a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md +++ b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.changelog.md @@ -1,3 +1,13 @@ +# 1.0.8 +2022-07-29 (Date of Last Commit) + +* Specify the RSEM post-processed transcriptome bam as output +* Dynamically allocate memory in Fastp task, increase fixed memory to 8gb in RNASeQC2, and increased fixed memory to 64gb in GroupByUMI +* Remove transcriptome bam index from output +* Add monitoring script to fastp and GroupByUMI tasks during soft-launch/continuous improvement +* Add maxRestries to Fastp, GroupByUMI, and RNASeQC2. Multiplier = 2 is set elsewhere. + + # 1.0.7 2022-04-26 (Date of Last Commit) diff --git a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl index b29e3d5a19..8ded234317 100644 --- a/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl +++ b/pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl @@ -20,7 +20,7 @@ import "../../../tasks/broad/RNAWithUMIsTasks.wdl" as tasks workflow RNAWithUMIsPipeline { - String pipeline_version = "1.0.7" + String pipeline_version = "1.0.8" input { File? bam @@ -173,7 +173,7 @@ workflow RNAWithUMIsPipeline { call tasks.PostprocessTranscriptomeForRSEM { input: - prefix = output_basename + ".transcriptome.RSEM", + prefix = output_basename + ".transcriptome", input_bam = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam } @@ -226,8 +226,7 @@ workflow RNAWithUMIsPipeline { output { String sample_name = GetSampleName.sample_name - File transcriptome_bam = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam - File transcriptome_bam_index = UMIAwareDuplicateMarkingTranscriptome.duplicate_marked_bam_index + File transcriptome_bam = PostprocessTranscriptomeForRSEM.output_bam File transcriptome_duplicate_metrics = UMIAwareDuplicateMarkingTranscriptome.duplicate_metrics File output_bam = UMIAwareDuplicateMarking.duplicate_marked_bam File output_bam_index = UMIAwareDuplicateMarking.duplicate_marked_bam_index diff --git a/pipelines/skylab/build_indices/BuildIndices.changelog.md b/pipelines/skylab/build_indices/BuildIndices.changelog.md index bbc085a859..f03a678571 100644 --- a/pipelines/skylab/build_indices/BuildIndices.changelog.md +++ b/pipelines/skylab/build_indices/BuildIndices.changelog.md @@ -1,3 +1,11 @@ +# 1.0.1 + +2022-09-21 (Date of Last Commit) + +* Docker image follows our guidelines +* Changed the type of biotypes from String to File so it localizes properly +* Changed the genome_fa to use the reference’s value instead of a modified_genome_fa that didn’t exist (which STAR was looking for and was then failing) + # 1.0.0 2022-02-01 (Date of Last Commit) diff --git a/pipelines/skylab/build_indices/BuildIndices.wdl b/pipelines/skylab/build_indices/BuildIndices.wdl index 258122c671..30b44c4a7c 100644 --- a/pipelines/skylab/build_indices/BuildIndices.wdl +++ b/pipelines/skylab/build_indices/BuildIndices.wdl @@ -91,7 +91,7 @@ task BuildStarSingleNucleus { String organism String organism_prefix References references - String? biotypes + File biotypes } meta { @@ -99,7 +99,6 @@ task BuildStarSingleNucleus { } String ref_name = "star_primary_gencode_~{organism}_v~{gtf_version}" String star_index_name = "modified_~{ref_name}.tar" - String genome_fa_modified = "modified_GRC~{organism_prefix}38.primary_assembly.genome.fa" String annotation_gtf_modified = "modified_gencode.v~{gtf_version}.primary_assembly.annotation.gtf" String annotation_gtf_introns = "introns_modified_gencode.v~{gtf_version}.primary_assembly.annotation.gtf" @@ -114,7 +113,7 @@ task BuildStarSingleNucleus { mkdir star STAR --runMode genomeGenerate \ --genomeDir star \ - --genomeFastaFiles ~{genome_fa_modified} \ + --genomeFastaFiles ~{references.genome_fa} \ --sjdbGTFfile ~{annotation_gtf_modified} \ --sjdbOverhang 100 \ --runThreadN 16 @@ -128,13 +127,13 @@ task BuildStarSingleNucleus { File star_index = star_index_name File annotation_gtf_modified_introns = annotation_gtf_introns References modified_references = object { - genome_fa: genome_fa_modified, + genome_fa: references.genome_fa, annotation_gtf: annotation_gtf_modified } } - + runtime { - docker: "quay.io/humancellatlas/snss2-indices:1.2.0 " + docker: "us.gcr.io/broad-gotc-prod/build-indices:1.0.0-2.7.10a-1663605340" memory: "50 GiB" disks :"local-disk 100 HDD" cpu:"16" @@ -374,12 +373,12 @@ workflow BuildIndices { String organism_prefix String genome_short_string String dbsnp_version - String? biotypes + File biotypes } # version of this pipeline - String pipeline_version = "1.0.0" + String pipeline_version = "1.0.1" parameter_meta { gtf_version: "the actual number of gencode, ex. 27" diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 142c7a97ef..507f31841e 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,18 +1,47 @@ +# 5.5.5 +2022-09-20 (Date of Last Commit) + +* Updated tasks in StarAlign.wdl to use an updated STAR docker image. + +# 5.5.4 +2022-09-01 (Date of Last Commit) + +* Updated CheckInputs.wdl to use a lightweight alpine-bash image. + +# 5.5.3 +2022-08-23 (Date of Last Commit) + +* Removed an unused script in pytools docker image and removed unused ConvertStarOutputs task. + +# 5.5.2 +2022-08-16 (Date of Last Commit) + +* Updated LoomUtils.wdl and StarAlign.wdl to use a rebuilt python utilities docker. + +# 5.5.1 +2022-07-21 (Date of Last Commit) + +* Updated STARsoloFastq runtime docker URL. + # 5.5.0 2022-05-18 (Date of Last Commit) + * Updated merge npz docker in StarAlign.wdl to fix a bug in the output loom matrix where gene names were inapporpriately assigned to counts. Any data previously processed with Optimus version 5.0.0 and above should be re-analyzed. # 5.4.3 2022-04-22 (Date of Last Commit) + * Updated Optimus to not run emptydrop step in sn_rna mode. # 5.4.2 2022-04-21 (Date of Last Commit) + * Updated to Picard version 2.26.10 and GATK version 4.2.6.1 to address log4j vulnerabilities # 5.4.1 2022-04-21 (Date of Last Commit) + * Fixing syntax in changelog documentation # 5.4.0 diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 80b045f1a2..d2e861cc3c 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -56,7 +56,7 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "5.5.0" + String pipeline_version = "5.5.5" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) diff --git a/pipelines/skylab/scATAC/scATAC.changelog.md b/pipelines/skylab/scATAC/scATAC.changelog.md index 44e26facef..38594eb9e4 100644 --- a/pipelines/skylab/scATAC/scATAC.changelog.md +++ b/pipelines/skylab/scATAC/scATAC.changelog.md @@ -1,3 +1,23 @@ +# 1.3.0 +2022-09-23 (Date of Last Commit) + +* Added disk, memory and cpu as task inputs. Added pipeline version as a string output. + +# 1.2.4 +2022-08-23 (Date of Last Commit) + +* Remove an unused script in pytools docker image. + +# 1.2.3 +2022-08-18 (Date of Last Commit) + +* Update AlignPairedEnd, SnapPre, SnapCellByBin tasks to use rebuilt snaptools docker image. + +# 1.2.2 +2022-08-16 (Date of Last Commit) + +* Update MakeCompliantBAM and BreakoutSnap tasks to use a consolidated python utilities docker image. + # 1.2.1 2021-11-15 (Date of Last Commit) diff --git a/pipelines/skylab/scATAC/scATAC.wdl b/pipelines/skylab/scATAC/scATAC.wdl index 856f8c9d13..697a2fd971 100644 --- a/pipelines/skylab/scATAC/scATAC.wdl +++ b/pipelines/skylab/scATAC/scATAC.wdl @@ -15,7 +15,7 @@ workflow scATAC { String bin_size_list = "10000" } - String pipeline_version = "1.2.1" + String pipeline_version = "1.3.0" parameter_meta { input_fastq1: "read 1 input fastq, the read names must be tagged with the cellular barcodes" @@ -72,6 +72,7 @@ workflow scATAC { File breakout_binCoordinates = BreakoutSnap.binCoordinates File breakout_binCounts = BreakoutSnap.binCounts File breakout_barcodesSection = BreakoutSnap.barcodesSection + String output_pipeline_version = pipeline_version } } @@ -84,7 +85,11 @@ task AlignPairedEnd { String reference_unpack_name = "genome/genome.fa" String output_bam Int min_cov = 0 - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602" + Int machine_mem_mb = 16000 + Int cpu = 16 + Int disk = ceil(2*(size(input_fastq1, "GiB") + size(input_fastq2, "GiB") + size(input_reference, "GiB"))) + 100 + Int preemptible = 3 } parameter_meta { @@ -96,8 +101,6 @@ task AlignPairedEnd { min_cov: "--min-cov parameter for snaptools align-paired-end (default: 0)" } - Int num_threads = 16 - Float input_size = size(input_fastq1, "GiB") + size(input_fastq2, "GiB") + size(input_reference, "GiB") command { set -euo pipefail @@ -115,10 +118,10 @@ task AlignPairedEnd { --input-fastq2=~{input_fastq2} \ --output-bam=~{output_bam} \ --aligner=bwa \ - --path-to-aligner=/tools/ \ + --path-to-aligner=/usr/local/bin/ \ --read-fastq-command=zcat \ --min-cov=~{min_cov} \ - --num-threads=~{num_threads} \ + --num-threads=~{cpu} \ --tmp-folder=$TEMP_DIR \ --overwrite=TRUE \ --if-sort=True @@ -130,9 +133,10 @@ task AlignPairedEnd { runtime { docker: docker_image - cpu: num_threads - memory: "16 GB" - disks: "local-disk " + ceil(10 * (if input_size < 1 then 1 else input_size )) + " HDD" + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible } } @@ -142,8 +146,12 @@ task SnapPre { String output_snap_basename String genome_name String genome_size_file = "genome/chrom.sizes" - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602" File input_reference + Int cpu = 1 + Int machine_mem_mb = 16000 + Int disk = 500 + Int preemptible = 3 } parameter_meta { @@ -155,7 +163,7 @@ task SnapPre { input_reference: "input reference tar file" } - Int num_threads = 1 + command { set -euo pipefail @@ -187,9 +195,11 @@ task SnapPre { runtime { docker: docker_image - cpu: num_threads - memory: "16 GB" - disks: "local-disk 150 HDD" + cpu: cpu + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible } } @@ -198,7 +208,11 @@ task SnapCellByBin { File snap_input String bin_size_list String snap_output_name - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/snaptools-bwa:1.0.0-1.4.8-0.7.17-1660844602" + Int cpu = 1 + Int machine_mem_mb = 16000 + Int disk = 500 + Int preemptible = 3 } parameter_meta { @@ -208,8 +222,6 @@ task SnapCellByBin { docker_image: "docker image to use" } - Int num_threads = 1 - command { set -euo pipefail @@ -228,9 +240,10 @@ task SnapCellByBin { runtime { docker: docker_image - cpu: num_threads - memory: "16 GB" - disks: "local-disk 150 HDD" + cpu: cpu + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + preemptible: preemptible } } @@ -238,7 +251,12 @@ task MakeCompliantBAM { input { File input_bam String output_bam_filename - String docker_image = "quay.io/humancellatlas/snaptools:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" + Int cpu = 1 + Int disk = ceil(3 * (size(input_bam, "GiB"))) + 100 + Int machine_mem_mb = 4000 + Int preemptible = 3 + } parameter_meta { @@ -247,13 +265,10 @@ task MakeCompliantBAM { docker_image: "docker image to use" } - Int num_threads = 1 - Float input_size = size(input_bam, "GiB") - command { set -euo pipefail - /tools/makeCompliantBAM.py --input-bam ~{input_bam} --output-bam ~{output_bam_filename} + /usr/gitc/makeCompliantBAM.py --input-bam ~{input_bam} --output-bam ~{output_bam_filename} } output { @@ -262,18 +277,23 @@ task MakeCompliantBAM { runtime { docker: docker_image - cpu: num_threads - memory: "4 GB" - disks: "local-disk " + ceil(2.5 * (if input_size < 1 then 1 else input_size )) + " HDD" + cpu: cpu + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + preemptible: preemptible } } task BreakoutSnap { input { File snap_input - String docker_image = "quay.io/humancellatlas/snap-breakout:0.0.1" + String docker_image = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" String bin_size_list String input_id + Int preemptible = 3 + Int disk = ceil(10 * (if size(snap_input, "GiB") < 1 then 1 else size(snap_input, "GiB") )) + 100 + Int machine_mem_mb = 16000 + Int cpu = 1 } parameter_meta { @@ -283,13 +303,10 @@ task BreakoutSnap { input_id : "name of the sample, used to name the outputs" } - Int num_threads = 1 - Float input_size = size(snap_input, "GiB") - command { set -euo pipefail mkdir output - python3 /tools/breakoutSnap.py --input ~{snap_input} \ + python3 /usr/gitc/breakoutSnap.py --input ~{snap_input} \ --output-prefix output/~{input_id}_ } @@ -303,8 +320,9 @@ task BreakoutSnap { runtime { docker: docker_image - cpu: num_threads - memory: "16 GB" - disks: "local-disk " + ceil(10 * (if input_size < 1 then 1 else input_size )) + " HDD" + memory: "${machine_mem_mb} MiB" + disks: "local-disk ${disk} HDD" + cpu: cpu + preemptible: preemptible } } diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md index 984de1b6c7..5020a70ff2 100644 --- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md +++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.changelog.md @@ -1,12 +1,32 @@ +# 2.2.16 +2022-09-13 (Date of Last Commit) + +* Update RSEM.wdl in the SmartSeq2SingleSample pipeline to use an updated RSEM docker image. This change does not affect the MultiSampleSmartSeq2 pipeline. + +# 2.2.15 +2022-09-12 (Date of Last Commit) + +* Update HISAT2.wdl in the SmartSeq2SingleSample pipeline to use an updated HISAT2 docker image. This change does not affect the MultiSampleSmartSeq2 pipeline. + +# 2.2.14 +2022-08-23 (Date of Last Commit) + +* Remove an unused script in pytools docker image. + +# 2.2.13 +2022-08-16 (Date of Last Commit) + +* Update LoomUtils.wdl to use updated docker images. This change does not affect the MultiSampleSmartSeq2 pipeline. + # 2.2.12 -2022-06-2022 (Date of Last Commit) +2022-06-22 (Date of Last Commit) * Updated main workflow name from SmartSeq2SingleCell to SmartSeq2SingleSample in the SS2 single sample pipeline. This allows the pipeline to run in the updated scala tests. # 2.2.11 2022-04-22 (Date of Last Commit) -* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the SmartSeq2SingleSample pipeline. +* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the MultiSampleSmartSeq2 pipeline. # 2.2.10 2022-04-14 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl index 6784510416..9f486528fd 100644 --- a/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl +++ b/pipelines/skylab/smartseq2_multisample/MultiSampleSmartSeq2.wdl @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2 { Boolean paired_end } # Version of this pipeline - String pipeline_version = "2.2.12" + String pipeline_version = "2.2.16" if (false) { String? none = "None" diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index d5b18aac93..a1cce15e80 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,11 +1,48 @@ +# 1.2.14 +2022-09-20 (Date of Last Commit) + +* Updated tasks in StarAlign.wdl to use an updated STAR docker image. + +# 1.2.13 + +2022-09-01 (Date of Last Commit) + +* Updated CheckInputs.wdl to use a lightweight alpine-bash image. + +# 1.2.12 +2022-08-31 (Date of Last Commit) + +* Updated CountAlignments to use an updated docker image. + +# 1.2.11 +2022-08-23 (Date of Last Commit) + +* Removed an unused script in pytools docker image. + +# 1.2.10 +2022-08-16 (Date of Last Commit) + +* Updated LoomUtils.wdl to use a consolidated python utilities docker image. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline. + +# 1.2.9 +2022-08-08 (Date of Last Commit) + +* Updated TrimAdapters runtime docker URL. + +# 1.2.8 +2022-07-21 (Date of Last Commit) + +* Updated STARsoloFastq runtime docker URL. + # 1.2.7 2022-05-18 (Date of Last Commit) + * Updated merge npz docker in StarAlign.wdl # 1.2.6 2022-04-22 (Date of Last Commit) -* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the SmartSeq2SingleSample pipeline. +* Updated LoomUtils.wdl for a task in the Optimus pipeline. This change does not affect the MultiSampleSmartSeq2SingleNucleus pipeline. # 1.2.5 2022-04-19 (Date of Last Commit) @@ -26,7 +63,7 @@ # 1.2.2 2022-02-10 (Date of Last Commit) -* Rebuilt a docker to merge outputs of STAR in in StarAlign.wdl task and moved it to a public location. +* Rebuilt a docker to merge outputs of STAR in StarAlign.wdl task and moved it to a public location. # 1.2.1 2022-02-07 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 5b6227301a..79b97bf621 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { String? input_id_metadata_field } # Version of this pipeline - String pipeline_version = "1.2.7" + String pipeline_version = "1.2.14" if (false) { String? none = "None" diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md index d36b66a53d..a3bdcb705b 100644 --- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md +++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.changelog.md @@ -1,3 +1,23 @@ +# 5.1.15 +2022-09-13 (Date of Last Commit) + +* Update RSEM.wdl to use an updated RSEM docker image. This change does not affect the SmartSeq2SingleSample pipeline. + +# 5.1.14 +2022-09-12 (Date of Last Commit) + +* Update HISAT2.wdl to use an updated HISAT2 docker image. This change does not affect the SmartSeq2SingleSample pipeline. + +# 5.1.13 +2022-08-23 (Date of Last Commit) + +* Remove an unused script in pytools docker image. + +# 5.1.12 +2022-08-16 (Date of Last Commit) + +* Updated LoomUtils.wdl to use a consolidated python utilities docker image. This change does not affect the SmartSeq2SingleSample pipeline. + # 5.1.11 2022-06-21 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl index de230d8b66..c9c36def38 100644 --- a/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl +++ b/pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl @@ -36,7 +36,7 @@ workflow SmartSeq2SingleSample { } # version of this pipeline - String pipeline_version = "5.1.11" + String pipeline_version = "5.1.15" parameter_meta { genome_ref_fasta: "Genome reference in fasta format" diff --git a/pullapprove_template.yml b/pullapprove_template.yml index a2ac46db78..38d5c9c7ee 100644 --- a/pullapprove_template.yml +++ b/pullapprove_template.yml @@ -91,7 +91,7 @@ groups: request: 2 reviewers: users: - - ldgauthier # Laura Gauthier + - samuelklee # Samuel Lee - kachulis # Chris Kachulis scientific_owners_joint_genotyping: @@ -108,7 +108,7 @@ groups: request_order: given reviewers: users: - - ldgauthier # Laura Gauthier + - samuelklee # Samuel Lee scientific_owners_somatic_single_sample: conditions: diff --git a/scripts/get_changed_pipeline_worklow_test_args.sh b/scripts/get_changed_pipeline_worklow_test_args.sh index e0aebedd0b..05eda96021 100755 --- a/scripts/get_changed_pipeline_worklow_test_args.sh +++ b/scripts/get_changed_pipeline_worklow_test_args.sh @@ -8,95 +8,118 @@ declare ALL_PIPELINES=($(get_versioned_pipelines)) function pipeline_to_args() { local -r pipeline=${1} - local -r env=${2} - local -r test=${3} - local -r truth=${4} - local -r uncached=${5} + local -r test=${2} - local -r common_args="--env ${env} -t ${test} -b ${truth} ${uncached}" + local -r common_args="${test}" case ${pipeline} in AnnotationFiltration) - echo AnnotationFiltration -t ${test} --env ${env};; + continue;; Arrays) - echo Arrays -a Single ${common_args};; - MultiSampleArrays) - echo Arrays -a Multi ${common_args};; + echo Arrays ${common_args};; BroadInternalRNAWithUMIs) echo BroadInternalRNAWithUMIs ${common_args};; BroadInternalUltimaGenomics) echo BroadInternalUltimaGenomics ${common_args};; + # CEMBA) + # echo CEMBA ${common_args};; CheckFingerprint) echo CheckFingerprint ${common_args};; + CramToUnmappedBams) + echo CramToUnmappedBams ${common_args};; + ExternalExomeReprocessing) + if [[ "${test}" == "Scientific" ]]; then + echo ExternalExomeReprocessing Plumbing + else + continue + fi;; ExomeGermlineSingleSample) - echo GermlineSingleSample -d Exome ${common_args};; + echo ExomeGermlineSingleSample ${common_args};; ExomeReprocessing) if [[ "${test}" == "Scientific" ]]; then - echo Reprocessing -d Exome --env ${env} -t Plumbing -b ${truth} ${uncached} + echo ExomeReprocessing Plumbing else continue fi;; - JointGenotyping) - echo JointGenotyping -d Exome ${common_args} --papi-version PAPIv2; - echo JointGenotyping -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached} --papi-version PAPIv2;; + ExternalWholeGenomeReprocessing) + if [[ "${test}" == "Scientific" ]]; then + echo ExternalWholeGenomeReprocessing Plumbing + else + continue + fi;; + GDCWholeGenomeSomaticSingleSample) + echo GDCWholeGenomeSomaticSingleSample ${common_args};; IlluminaGenotypingArray) echo IlluminaGenotypingArray ${common_args};; Imputation) echo Imputation ${common_args};; - ExternalExomeReprocessing) + JointGenotyping) + echo JointGenotyping ${common_args};; + JointGenotypingByChromosomePartOne) + continue;; + JointGenotypingByChromosomePartTwo) + continue;; + MultiSampleArrays) + echo MultiSampleArrays ${common_args};; + MultiSampleSmartSeq2) if [[ "${test}" == "Scientific" ]]; then - echo ExternalReprocessing -d Exome --env ${env} -t Plumbing -b ${truth} ${uncached} + echo MultiSampleSmartSeq2 Plumbing else - continue + echo MultiSampleSmartSeq2 ${common_args} fi;; - ExternalWholeGenomeReprocessing) + MultiSampleSmartSeq2SingleNucleus) if [[ "${test}" == "Scientific" ]]; then - echo ExternalReprocessing -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached} + echo MultiSampleSmartSeq2SingleNucleus Plumbing else - continue + echo MultiSampleSmartSeq2SingleNucleus ${common_args} + fi;; + Optimus) + echo Optimus ${common_args};; + ReblockGVCF) + echo ReblockGvcf ${common_args};; + RNAWithUMIsPipeline) + echo RNAWithUMIsPipeline ${common_args};; + scATAC) + if [[ "${test}" == "Scientific" ]]; then + echo scATAC Plumbing + else + echo scATAC ${common_args} + fi;; + SmartSeq2SingleSample) + if [[ "${test}" == "Scientific" ]]; then + echo SmartSeq2SingleSample Plumbing + else + echo SmartSeq2SingleSample ${common_args} + fi;; + TargetedSomaticSingleSample)ValidateChip + continue;; + ValidateChip) + echo ValidateChip ${common_args};; + VariantCalling) + if [[ "${test}" == "Scientific" ]]; then + echo VariantCalling Plumbing + else + echo VariantCalling ${common_args} fi;; WholeGenomeGermlineSingleSample) - echo GermlineSingleSample -d WGS ${common_args};; + echo WholeGenomeGermlineSingleSample ${common_args};; WholeGenomeReprocessing) if [[ "${test}" == "Scientific" ]]; then - echo Reprocessing -d WGS --env ${env} -t Plumbing -b ${truth} ${uncached} + echo WholeGenomeReprocessing Plumbing else continue fi;; - ValidateChip) - echo ValidateChip ${common_args};; - ReblockGVCF) - echo ReblockGvcf -d Exome ${common_args}; - echo ReblockGvcf -d WGS ${common_args};; - RNAWithUMIsPipeline) - echo RNAWithUMIs ${common_args};; - TargetedSomaticSingleSample) - echo SomaticSingleSample -d Targeted ${common_args};; - CramToUnmappedBams) - echo CramToUnmappedBams ${common_args};; - JointGenotypingByChromosomePartOne) - continue;; - JointGenotypingByChromosomePartTwo) - continue;; - UltimaGenomicsGermlineSingleSample) - echo UltimaGenomicsGermlineSingleSample ${common_args};; + UltimaGenomicsWholeGenomeGermline) + echo UltimaGenomicsWholeGenomeGermline ${common_args};; UltimaGenomicsJointGenotyping) echo UltimaGenomicsJointGenotyping ${common_args};; - GDCWholeGenomeSomaticSingleSample) - echo GDCWholeGenomeSomaticSingleSample -d WGS ${common_args};; - VariantCalling) - echo VariantCalling -d Exome -t Plumbing --env ${env} -b ${truth} ${uncached}; - echo VariantCalling -d WGS -t Plumbing --env ${env} -b ${truth} ${uncached};; esac } function main() { local -r gittish=${1} local -r test_all=${2} - local -r env=${3} - local -r test=${4} - local -r truth=${5} - local -r uncached=${6} + local -r test=${3} local -a changed_pipeline_paths=() local -a args=() @@ -109,7 +132,7 @@ function main() { for changed_pipeline_path in ${changed_pipeline_paths[*]}; do pipeline=$(basename ${changed_pipeline_path} .wdl) - arg="$(pipeline_to_args ${pipeline} ${env} ${test} ${truth} ${uncached})" + arg="$(pipeline_to_args ${pipeline} ${test})" if [[ -n ${arg} ]]; then args+=("${arg}") fi @@ -120,4 +143,4 @@ function main() { done } -main ${1} ${2} ${3} ${4} ${5} ${6} +main ${1} ${2} ${3} \ No newline at end of file diff --git a/tasks/broad/ImputationTasks.wdl b/tasks/broad/ImputationTasks.wdl index 1bc1ddfa7f..a7d9933349 100644 --- a/tasks/broad/ImputationTasks.wdl +++ b/tasks/broad/ImputationTasks.wdl @@ -148,7 +148,7 @@ task CheckChunks { Int var_in_reference Int disk_size_gb = ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 4000 } @@ -225,7 +225,7 @@ task Minimac4 { Int end Int window - String minimac4_docker = "us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.5-1.0.2-1649949471" + String minimac4_docker = "us.gcr.io/broad-gotc-prod/imputation-minimac4:1.0.6-1.0.2-1663948783" Int cpu = 1 Int memory_mb = 4000 Int disk_size_gb = ceil(size(ref_panel, "GiB") + 2*size(phased_vcf, "GiB")) + 50 @@ -304,7 +304,7 @@ task ReplaceHeader { File vcf_to_replace_header File vcf_with_new_header - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.4-1.10.2-0.1.16-1646091598" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" } String output_name = basename(vcf_to_replace_header,".vcf.gz") + ".new_header.vcf.gz" @@ -402,7 +402,7 @@ task SeparateMultiallelics { String output_basename Int disk_size_gb = ceil(2*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB"))) - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 4000 } @@ -432,7 +432,7 @@ task OptionalQCSites { Float? optional_qc_max_missing Float? optional_qc_hwe - String bcftools_vcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_vcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 16000 Int disk_size_gb = ceil(2*(size(input_vcf, "GiB") + size(input_vcf_index, "GiB"))) @@ -465,7 +465,7 @@ task MergeSingleSampleVcfs { Array[File] input_vcf_indices String output_vcf_basename - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int memory_mb = 2000 Int cpu = 1 Int disk_size_gb = 3 * ceil(size(input_vcfs, "GiB") + size(input_vcf_indices, "GiB")) + 20 @@ -504,7 +504,7 @@ task CountSamples { input { File vcf - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 3000 Int disk_size_gb = 100 + ceil(size(vcf, "GiB")) @@ -702,7 +702,7 @@ task SetIDs { File vcf String output_basename - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 4000 Int disk_size_gb = 100 + ceil(2.2 * size(vcf, "GiB")) @@ -730,7 +730,7 @@ task ExtractIDs { String output_basename Int disk_size_gb = 2*ceil(size(vcf, "GiB")) + 100 - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 4000 } @@ -792,7 +792,7 @@ task RemoveAnnotations { File vcf String basename - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 3000 Int disk_size_gb = ceil(2.2*size(vcf, "GiB")) + 100 @@ -874,7 +874,7 @@ task SplitMultiSampleVcf { input { File multiSampleVcf - String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" Int cpu = 1 Int memory_mb = 8000 Int disk_size_gb = ceil(3*size(multiSampleVcf, "GiB")) + 100 diff --git a/tasks/broad/InternalArraysTasks.wdl b/tasks/broad/InternalArraysTasks.wdl index d6669ebf8d..7728c6c241 100644 --- a/tasks/broad/InternalArraysTasks.wdl +++ b/tasks/broad/InternalArraysTasks.wdl @@ -541,7 +541,7 @@ task FormatArraysOutputs { >>> runtime { - docker: "broadinstitute/horsefish:eMerge_05192022" + docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022" } output { diff --git a/tasks/broad/InternalImputationTasks.wdl b/tasks/broad/InternalImputationTasks.wdl index e540fc6000..fb0d486a82 100644 --- a/tasks/broad/InternalImputationTasks.wdl +++ b/tasks/broad/InternalImputationTasks.wdl @@ -48,7 +48,7 @@ task FormatImputationOutputs { >>> runtime { - docker: "broadinstitute/horsefish:eMerge_05192022" + docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022" } output { @@ -115,7 +115,7 @@ task FormatImputationWideOutputs{ >>> runtime { - docker: "broadinstitute/horsefish:eMerge_05192022" + docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022" } output { diff --git a/tasks/broad/InternalTasks.wdl b/tasks/broad/InternalTasks.wdl index 04cb93bb3b..5bf3545e7a 100644 --- a/tasks/broad/InternalTasks.wdl +++ b/tasks/broad/InternalTasks.wdl @@ -193,7 +193,7 @@ task IngestOutputsToTDR { } runtime { - docker: "broadinstitute/horsefish:eMerge_05192022" + docker: "gcr.io/emerge-production/emerge_wdls:emerge_09022022" } output { diff --git a/tasks/broad/RNAWithUMIsTasks.wdl b/tasks/broad/RNAWithUMIsTasks.wdl index f9e93556c2..a753d04236 100644 --- a/tasks/broad/RNAWithUMIsTasks.wdl +++ b/tasks/broad/RNAWithUMIsTasks.wdl @@ -98,11 +98,14 @@ task Fastp { File adapter_fasta = "gs://gcp-public-data--broad-references/RNA/resources/Illumina_adapters.fasta" String docker = "us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500" - Int memory_mb = "16384" + Int memory_mb = ceil(1.5*size(fastq1, "MiB")) + 8192 # Experimentally determined formula for memory allocation Int disk_size_gb = 5*ceil(size(fastq1, "GiB")) + 128 + File monitoring_script = "gs://broad-dsde-methods-monitoring/cromwell_monitoring_script.sh" } command { + bash ~{monitoring_script} > monitoring.log & + fastp --in1 ~{fastq1} --in2 ~{fastq2} --out1 ~{output_prefix}_read1.fastq.gz --out2 ~{output_prefix}_read2.fastq.gz \ --disable_quality_filtering \ --disable_length_filtering \ @@ -115,9 +118,11 @@ task Fastp { memory: "~{memory_mb} MiB" disks: "local-disk ~{disk_size_gb} HDD" preemptible: 0 + maxRetries: 2 } output { + File monitoring_log = "monitoring.log" File fastq1_clipped = output_prefix + "_read1.fastq.gz" File fastq2_clipped = output_prefix + "_read2.fastq.gz" } @@ -305,7 +310,7 @@ task rnaseqc2 { String docker = "us.gcr.io/broad-dsde-methods/ckachulis/rnaseqc:2.4.2" Int cpu = 1 - Int memory_mb = 3500 + Int memory_mb = 8000 Int disk_size_gb = ceil(size(bam_file, 'GiB') + size(genes_gtf, 'GiB') + size(exon_bed, 'GiB')) + 50 } @@ -331,6 +336,7 @@ task rnaseqc2 { cpu: cpu memory: "~{memory_mb} MiB" disks: "local-disk ~{disk_size_gb} HDD" + maxRetries: 2 } } @@ -600,17 +606,22 @@ task GroupByUMIs { String docker = "us.gcr.io/broad-gotc-prod/umi_tools:1.0.0-1.1.1-1638821470" Int cpu = 2 - Int memory_mb = 7500 + Int memory_mb = 64000 Int disk_size_gb = ceil(2.2 * size([bam, bam_index], "GiB")) + 100 + + File monitoring_script = "gs://broad-dsde-methods-monitoring/cromwell_monitoring_script.sh" } command <<< + bash ~{monitoring_script} > monitoring.log & + umi_tools group -I ~{bam} --paired --no-sort-output --output-bam --stdout ~{output_bam_basename}.bam --umi-tag-delimiter "-" \ --extract-umi-method tag --umi-tag RX --unmapped-reads use >>> output { File grouped_bam = "~{output_bam_basename}.bam" + File monitoring_log = "monitoring.log" } runtime { @@ -618,6 +629,7 @@ task GroupByUMIs { cpu: cpu memory: "~{memory_mb} MiB" disks: "local-disk ~{disk_size_gb} HDD" + maxRetries: 1 } } @@ -626,7 +638,7 @@ task MarkDuplicatesUMIAware { File bam String output_basename Boolean remove_duplicates - + Boolean use_umi String docker = "us.gcr.io/broad-gotc-prod/picard-cloud:2.26.11" Int cpu = 1 @@ -641,8 +653,9 @@ task MarkDuplicatesUMIAware { INPUT=~{bam} \ OUTPUT=~{output_bam_basename}.bam \ METRICS_FILE=~{output_basename}.duplicate.metrics \ - READ_ONE_BARCODE_TAG=BX \ - REMOVE_DUPLICATES=~{remove_duplicates} + REMOVE_DUPLICATES=~{remove_duplicates} \ + ~{true='READ_ONE_BARCODE_TAG=BX' false='' use_umi} \ + >>> output { @@ -662,7 +675,6 @@ task formatPipelineOutputs { input { String sample_id String transcriptome_bam - String transcriptome_bam_index String transcriptome_duplicate_metrics String output_bam String output_bam_index @@ -706,7 +718,6 @@ task formatPipelineOutputs { # NOTE: we rename some field names to match the TDR schema outputs_dict["sample_id"]="~{sample_id}" # primary key outputs_dict["transcriptome_bam"]="~{transcriptome_bam}" - outputs_dict["transcriptome_bam_index"]="~{transcriptome_bam_index}" outputs_dict["transcriptome_duplicate_metrics_file"]="~{transcriptome_duplicate_metrics}" outputs_dict["genome_bam"]="~{output_bam}" outputs_dict["genome_bam_index"]="~{output_bam_index}" @@ -750,7 +761,7 @@ task formatPipelineOutputs { >>> runtime { - docker: "broadinstitute/horsefish:tdr_import_v1.1" + docker: "broadinstitute/horsefish:tdr_import_v1.4" cpu: cpu memory: "~{memory_mb} MiB" disks: "local-disk ~{disk_size_gb} HDD" @@ -763,30 +774,30 @@ task formatPipelineOutputs { task updateOutputsInTDR { input { - String staging_bucket String tdr_dataset_uuid File outputs_json - String sample_id Int cpu = 1 Int memory_mb = 2000 Int disk_size_gb = 10 } - String tdr_target_table = "sample" - command <<< + # input args: + # -d dataset uuid + # -t target table in dataset + # -o json of data to ingest + # -f field to populate with timestamp at ingest (can have multiple) python -u /scripts/export_pipeline_outputs_to_tdr.py \ -d "~{tdr_dataset_uuid}" \ - -b "~{staging_bucket}" \ - -t "~{tdr_target_table}" \ + -t "sample" \ -o "~{outputs_json}" \ - -k "sample_id" \ - -v "~{sample_id}" + -f "version_timestamp" \ + -f "analysis_end_time" >>> runtime { - docker: "broadinstitute/horsefish:twisttcap_scripts" + docker: "broadinstitute/horsefish:tdr_import_v1.4" cpu: cpu memory: "~{memory_mb} MiB" disks: "local-disk ~{disk_size_gb} HDD" diff --git a/tasks/broad/UMIAwareDuplicateMarking.wdl b/tasks/broad/UMIAwareDuplicateMarking.wdl index e28427f164..1c865b7e54 100644 --- a/tasks/broad/UMIAwareDuplicateMarking.wdl +++ b/tasks/broad/UMIAwareDuplicateMarking.wdl @@ -85,7 +85,8 @@ workflow UMIAwareDuplicateMarking { input: bam = SortSamByQueryNameBeforeDuplicateMarking.output_bam, output_basename = output_basename, - remove_duplicates = remove_duplicates + remove_duplicates = remove_duplicates, + use_umi = true } if (coordinate_sort_output){ diff --git a/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl b/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl index f7af1860a4..847a9eac3a 100644 --- a/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl +++ b/tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl @@ -651,7 +651,7 @@ task FilterVCF { --flow_order ~{used_flow_order} \ ~{true="--blacklist_cg_insertions" false="" filter_cg_insertions} \ --annotate_intervals ~{sep=" --annotate_intervals " annotation_intervals} \ - --output_file /cromwell_root/~{final_vcf_base_name}.filtered.vcf.gz + --output_file ~{final_vcf_base_name}.filtered.vcf.gz >>> runtime { @@ -710,7 +710,7 @@ task TrainModel { ~{"--exome_weight " + exome_weight} \ ~{"--exome_weight_annotation " + exome_weight_annotation} \ --annotate_intervals ~{sep=" --annotate_intervals " annotation_intervals} \ - --output_file_prefix /cromwell_root/~{input_vcf_name}.model + --output_file_prefix ~{input_vcf_name}.model >>> runtime { diff --git a/tasks/broad/Utilities.wdl b/tasks/broad/Utilities.wdl index 7244e3b773..947c56ae01 100644 --- a/tasks/broad/Utilities.wdl +++ b/tasks/broad/Utilities.wdl @@ -110,7 +110,7 @@ task ScatterIntervalList { Int interval_count = read_int(stdout()) } runtime { - docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1647265026" + docker: "us.gcr.io/broad-gotc-prod/picard-python:1.0.0-2.26.10-1663951039" memory: "2000 MiB" } } @@ -308,7 +308,7 @@ task MakeOptionalOutputBam { Boolean keep_inputs Int preemptible_tries = 3 } - Int disk_size = ceil(size(bam_input, "GiB")) + 5 + Int disk_size = ceil(size(bam_input, "GiB")) + 15 String basename = basename(bam_input, ".bam") command<<< if [ ~{keep_inputs} = "true" ] diff --git a/tasks/skylab/CheckInputs.wdl b/tasks/skylab/CheckInputs.wdl index 9ff5187f32..a33bdf0811 100644 --- a/tasks/skylab/CheckInputs.wdl +++ b/tasks/skylab/CheckInputs.wdl @@ -41,7 +41,7 @@ task checkInputArrays { } runtime { - docker: "ubuntu:18.04" + docker: "bashell/alpine-bash:latest" cpu: 1 memory: "1 GiB" disks: "local-disk 1 HDD" @@ -111,7 +111,7 @@ task checkOptimusInput { } runtime { - docker: "ubuntu:18.04" + docker: "bashell/alpine-bash:latest" cpu: cpu memory: "~{machine_mem_mb} GiB" disks: "local-disk ~{disk} HDD" diff --git a/tasks/skylab/FeatureCounts.wdl b/tasks/skylab/FeatureCounts.wdl index bc260e465e..3530df3374 100644 --- a/tasks/skylab/FeatureCounts.wdl +++ b/tasks/skylab/FeatureCounts.wdl @@ -8,7 +8,7 @@ task CountAlignments { File annotation_gtf #runtime values - String docker = "quay.io/humancellatlas/snss2-featurecount:0.1.0" + String docker = "us.gcr.io/broad-gotc-prod/subread:1.0.0-2.0.1-1662044537" Int machine_mem_mb = 8250 Int cpu = 1 Int disk = ceil(size(aligned_bam_inputs,"Gi")*2) + 10 @@ -43,7 +43,7 @@ task CountAlignments { -g gene_id # create a new input bam where the alignemnts crossing intron-exon junctions are removed - python3 /tools/remove-reads-on-junctions.py --input-gtf ~{annotation_gtf} \ + python3 /usr/gitc/remove-reads-on-junctions.py --input-gtf ~{annotation_gtf} \ --input-bam "${bam_files[$i]}" --output-bam "${output_prefix[$i]}.input.nojunc.bam" # counting the exons diff --git a/tasks/skylab/HISAT2.wdl b/tasks/skylab/HISAT2.wdl index 1000fcbdb6..e009c1346a 100644 --- a/tasks/skylab/HISAT2.wdl +++ b/tasks/skylab/HISAT2.wdl @@ -10,7 +10,7 @@ task HISAT2PairedEnd { String input_id # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171" Int machine_mem_mb = 16500 Int cpu = 4 # Using (fastq1 + fastq2) x 100 gives factor of a few buffer. BAM can be up to ~5 x (fastq1 + fastq2). @@ -136,7 +136,7 @@ task HISAT2RSEM { String input_id # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171" Int machine_mem_mb = 16500 Int cpu = 4 # Using (fastq1 + fastq2) x 100 gives factor of a few buffer. BAM can be up to ~5 x (fastq1 + fastq2). @@ -267,7 +267,7 @@ input { String input_id # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171" Int machine_mem_mb = 16500 Int cpu = 4 # Using fastq x 100 gives factor of a few buffer. BAM can be up to ~5 x fastq. @@ -359,7 +359,7 @@ task HISAT2InspectIndex { String ref_name # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171" Int machine_mem_mb = 3850 Int cpu = 1 # use provided disk number or dynamically size on our own, with 200GiB of additional disk @@ -410,7 +410,7 @@ task HISAT2RSEMSingleEnd { String input_id # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-hisat2:v0.2.2-2-2.1.0" + String docker = "us.gcr.io/broad-gotc-prod/hisat2:1.0.0-1662998171" Int machine_mem_mb = 15000 Int cpu = 4 Int disk = ceil((size(fastq, "GiB")) * 100 + size(hisat2_ref, "GiB") * 2 + 200) diff --git a/tasks/skylab/LoomUtils.wdl b/tasks/skylab/LoomUtils.wdl index d4d50ce6d3..babde437d5 100644 --- a/tasks/skylab/LoomUtils.wdl +++ b/tasks/skylab/LoomUtils.wdl @@ -3,7 +3,7 @@ version 1.0 task SmartSeq2LoomOutput { input { #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.6-1" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" # the gene count file "_rsem.genes.results" in the task results folder call-RSEMExpression File rsem_gene_results # file named "_QCs.csv" in the folder "call-GroupQCOutputs/glob-*" of the the SS2 output @@ -32,7 +32,7 @@ task SmartSeq2LoomOutput { command { set -euo pipefail - python3 /tools/create_loom_ss2.py \ + python3 /usr/gitc/create_loom_ss2.py \ --qc_files ~{sep=' ' smartseq_qc_files} \ --rsem_genes_results ~{rsem_gene_results} \ --output_loom_path "~{input_id}.loom" \ @@ -61,7 +61,7 @@ task OptimusLoomGeneration { input { #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:v1.3.0" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" # name of the sample String input_id # user provided id @@ -104,7 +104,7 @@ task OptimusLoomGeneration { set -euo pipefail if [ "~{counting_mode}" == "sc_rna" ]; then - python3 /tools/create_loom_optimus.py \ + python3 /usr/gitc/create_loom_optimus.py \ --empty_drops_file ~{empty_drops_result} \ --add_emptydrops_data "yes" \ --annotation_file ~{annotation_file} \ @@ -121,7 +121,7 @@ task OptimusLoomGeneration { --expression_data_type "exonic" \ --pipeline_version ~{pipeline_version} else - python3 /tools/create_snrna_optimus.py \ + python3 /usr/gitc/create_snrna_optimus.py \ --annotation_file ~{annotation_file} \ --cell_metrics ~{cell_metrics} \ --gene_metrics ~{gene_metrics} \ @@ -163,7 +163,7 @@ task AggregateSmartSeq2Loom { String? species String? organ String pipeline_version - String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.6-1" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" Int disk = 200 Int machine_mem_mb = 4 Int cpu = 1 @@ -177,7 +177,7 @@ task AggregateSmartSeq2Loom { set -e # Merge the loom files - python3 /tools/ss2_loom_merge.py \ + python3 /usr/gitc/ss2_loom_merge.py \ --input-loom-files ~{sep=' ' loom_input} \ --output-loom-file "~{batch_id}.loom" \ --batch_id ~{batch_id} \ @@ -211,7 +211,7 @@ task SingleNucleusOptimusLoomOutput { input { #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:v1.1.0" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" # name of the sample String input_id # user provided id @@ -256,7 +256,7 @@ task SingleNucleusOptimusLoomOutput { command { set -euo pipefail - python3 /tools/create_snrna_optimus_counts.py \ + python3 /usr/gitc/create_snrna_optimus_counts.py \ --annotation_file ~{annotation_file} \ --cell_metrics ~{cell_metrics} \ --gene_metrics ~{gene_metrics} \ @@ -292,7 +292,7 @@ task SingleNucleusOptimusLoomOutput { task SingleNucleusSmartSeq2LoomOutput { input { #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.8" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" Array[File] alignment_summary_metrics Array[File] dedup_metrics @@ -339,7 +339,7 @@ task SingleNucleusSmartSeq2LoomOutput { do # creates a table with gene_id, gene_name, intron and exon counts echo "Running create_snss2_counts_csv." - python /tools/create_snss2_counts_csv.py \ + python /usr/gitc/create_snss2_counts_csv.py \ --in-gtf ~{annotation_introns_added_gtf} \ --intron-counts ${introns_counts_files[$i]} \ --exon-counts ${exons_counts_files[$i]} \ @@ -354,7 +354,7 @@ task SingleNucleusSmartSeq2LoomOutput { # create the loom file echo "Running create_loom_snss2." - python3 /tools/create_loom_snss2.py \ + python3 /usr/gitc/create_loom_snss2.py \ --qc_files "${output_prefix[$i]}.Picard_group.csv" \ --count_results "${output_prefix[$i]}.exon_intron_counts.tsv" \ --output_loom_path "${output_prefix[$i]}.loom" \ diff --git a/tasks/skylab/RSEM.wdl b/tasks/skylab/RSEM.wdl index cd148448de..2f74bf4de3 100644 --- a/tasks/skylab/RSEM.wdl +++ b/tasks/skylab/RSEM.wdl @@ -8,7 +8,7 @@ task RSEMExpression { Boolean is_paired # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-rsem:v0.2.2-1.3.0" + String docker = "us.gcr.io/broad-gotc-prod/rsem:1.0.0-1663016024" Int machine_mem_mb = 32768 Int cpu = 4 # use provided disk number or dynamically size on our own, with 200GiB of additional disk diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index fd4bb79764..74f2042306 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -6,7 +6,7 @@ task StarAlignBamSingleEnd { File tar_star_reference # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-star:v0.2.2-2.5.3a-40ead6e" + String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884" Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100 Int cpu = 16 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -72,7 +72,7 @@ task StarAlignFastqPairedEnd { File tar_star_reference # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a" + String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884" Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100 Int cpu = 16 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -137,7 +137,7 @@ task StarAlignFastqMultisample { File tar_star_reference # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a" + String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884" Int machine_mem_mb = ceil((size(tar_star_reference, "Gi")) + 6) * 1100 Int cpu = 16 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -221,7 +221,7 @@ task STARsoloFastq { Boolean? count_exons # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-star:v2.7.9a" + String docker = "us.gcr.io/broad-gotc-prod/star:1.0.0-2.7.9a-1658781884" Int machine_mem_mb = 64000 Int cpu = 8 # multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference. @@ -378,60 +378,6 @@ task STARsoloFastq { } } -task ConvertStarOutput { - - input { - File barcodes - File features - File matrix - - #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-python3-scientific:0.1.12" - Int machine_mem_mb = 8250 - Int cpu = 1 - Int disk = ceil(size(matrix, "Gi") * 2) + 10 - Int preemptible = 3 - } - - meta { - description: "Create three numpy formats for the barcodes, gene names and the count matrix from the STARSolo count matrix in mtx format." - } - - parameter_meta { - docker: "(optional) the docker image containing the runtime environment for this task" - machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" - cpu: "(optional) the number of cpus to provision for this task" - disk: "(optional) the amount of disk space (GiB) to provision for this task" - preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" - } - - command { - set -e - - # create the compresed raw count matrix with the counts, gene names and the barcodes - python3 /tools/create-npz-output.py \ - --barcodes ~{barcodes} \ - --features ~{features} \ - --matrix ~{matrix} - - } - - runtime { - docker: docker - memory: "${machine_mem_mb} MiB" - disks: "local-disk ${disk} HDD" - cpu: cpu - preemptible: preemptible - } - - output { - File row_index = "sparse_counts_row_index.npy" - File col_index = "sparse_counts_col_index.npy" - File sparse_counts = "sparse_counts.npz" - } -} - - task MergeStarOutput { input { @@ -441,7 +387,7 @@ task MergeStarOutput { String input_id #runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-star:merge-star-outputs-v1.1.9" + String docker = "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" Int machine_mem_mb = 8250 Int cpu = 1 Int disk = ceil(size(matrix, "Gi") * 2) + 10 @@ -466,7 +412,7 @@ task MergeStarOutput { declare -a matrix_files=(~{sep=' ' matrix}) # create the compressed raw count matrix with the counts, gene names and the barcodes - python3 /tools/create-merged-npz-output.py \ + python3 /usr/gitc/create-merged-npz-output.py \ --barcodes ${barcodes_files[@]} \ --features ${features_files[@]} \ --matrix ${matrix_files[@]} \ diff --git a/tasks/skylab/TagGeneExon.wdl b/tasks/skylab/TagGeneExon.wdl deleted file mode 100644 index daffec5da0..0000000000 --- a/tasks/skylab/TagGeneExon.wdl +++ /dev/null @@ -1,121 +0,0 @@ -version 1.0 - -task TagGeneExon { - input { - File annotations_gtf - File bam_input - - # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-dropseqtools:v0.2.2-1.13" - Int machine_mem_mb = 8250 - Int cpu = 1 - Int disk = ceil((size(bam_input, "Gi") + size(annotations_gtf, "Gi")) * 3) + 20 - Int preemptible = 3 - } - - meta { - description: "Tags any read in bam_input that overlaps an intron or exon interval with the gene that those interals correspond to." - } - - parameter_meta { - annotations_gtf: "GTF annotation file for the species that bam input is derived from. Each record must have a gene_name and transcript_name in addition to a gene_id and transcript_id, no white space at the end of any record and must be in gtf format." - bam_input: "Aligned bam file." - docker: "(optional) the docker image containing the runtime environment for this task" - machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" - cpu: "(optional) the number of cpus to provision for this task" - disk: "(optional) the amount of disk space (GiB) to provision for this task" - preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" - } - - command { - set -e - - TagReadWithGeneExon \ - INPUT=${bam_input} \ - OUTPUT=bam_with_gene_exon.bam \ - SUMMARY=gene_exon_tag_summary.log \ - TAG=GE \ - ANNOTATIONS_FILE=${annotations_gtf} - } - - # Larger genomes (mouse-human) require a 7.5gb instance; single-organism genomes work with 3.75gb - runtime { - docker: docker - memory: "${machine_mem_mb} MiB" - disks: "local-disk ${disk} HDD" - cpu: cpu - preemptible: preemptible - } - - output { - File bam_output = "bam_with_gene_exon.bam" - File log = "gene_exon_tag_summary.log" - } -} - - -task TagReadWithGeneFunction { - input { - File annotations_gtf - File bam_input - - String gene_name_tag = "gn" - String gene_strand_tag = "gs" - String gene_function_tag = "gf" - - String use_strand_info = "true" - - # runtime values - String docker = "quay.io/humancellatlas/secondary-analysis-dropseqtools:2.3.0" - Int machine_mem_mb = 8250 - Int cpu = 1 - Int disk = ceil((size(bam_input, "Gi") + size(annotations_gtf, "Gi")) * 3) + 20 - Int preemptible = 3 - } - - meta { - description: "Tags any read in bam_input that overlaps an intron or exon interval with the gene that those interals correspond to." - } - - parameter_meta { - annotations_gtf: "GTF annotation file for the species that bam input is derived from. Each record must have a gene_name and transcript_name in addition to a gene_id and transcript_id, no white space at the end of any record and must be in gtf format." - bam_input: "Aligned bam file." - gene_name_tag: "the tag used to denote gene name in the bam (default: gn)" - gene_strand_tag: "the tag used to denote gene strand in the bam (default: gs)" - gene_function_tag: "the tag used to denote gene function (INTRONIC, EXONIC, ...) in the output bam (default: gf)" - - docker: "(optional) the docker image containing the runtime environment for this task" - machine_mem_mb: "(optional) the amount of memory (MiB) to provision for this task" - cpu: "(optional) the number of cpus to provision for this task" - disk: "(optional) the amount of disk space (GiB) to provision for this task" - preemptible: "(optional) if non-zero, request a pre-emptible instance and allow for this number of preemptions before running the task on a non preemptible machine" - } - - command { - set -e - - TagReadWithGeneFunction \ - INPUT=${bam_input} \ - OUTPUT=bam_with_gene_exon.bam \ - GENE_NAME_TAG=${gene_name_tag} \ - GENE_STRAND_TAG=${gene_strand_tag} \ - GENE_FUNCTION_TAG=${gene_function_tag} \ - SUMMARY=gene_exon_tag_summary.log \ - ANNOTATIONS_FILE=${annotations_gtf} \ - USE_STRAND_INFO=${use_strand_info} - } - - # Larger genomes (mouse-human) require a 7.5gb instance; single-organism genomes work with 3.75gb - runtime { - docker: docker - memory: "${machine_mem_mb} MiB" - disks: "local-disk ${disk} HDD" - cpu: cpu - preemptible: preemptible - } - - output { - File bam_output = "bam_with_gene_exon.bam" - File log = "gene_exon_tag_summary.log" - } -} diff --git a/tasks/skylab/TrimAdapters.wdl b/tasks/skylab/TrimAdapters.wdl index ed5a0067e7..c74f3fdcd9 100644 --- a/tasks/skylab/TrimAdapters.wdl +++ b/tasks/skylab/TrimAdapters.wdl @@ -9,7 +9,7 @@ task TrimAdapters { Array[String] input_ids #runtime values - String docker = "quay.io/humancellatlas/snss2-trim-adapters:0.1.0" + String docker = "us.gcr.io/broad-gotc-prod/ea-utils:1.0.0-1.04.807-1659990665" Int machine_mem_mb = 8250 Int cpu = 1 Int disk = ceil(2*(size(fastq1_input_files, "Gi") + size(fastq2_input_files, "Gi"))) + 10 diff --git a/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl b/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl index 5f0f0a03d1..1e4d559be1 100644 --- a/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl +++ b/tasks/skylab/accessory_workflows/build_bwa_reference/bwa-mk-index.wdl @@ -40,7 +40,7 @@ task BuildBWAreference { >>> runtime { - docker: "quay.io/humancellatlas/snaptools:0.0.1" + docker: "us.gcr.io/broad-gotc-prod/bwa:1.0.0-0.7.17-1660770463" memory: "96GB" disks: "local-disk 100 HDD" cpu: "4" diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala index e68dcc2ca6..163957773d 100644 --- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala +++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/commandline/PipelineTestType.scala @@ -119,7 +119,7 @@ object PipelineTestType extends Enum[PipelineTestType] { extends PipelineTestType( "TestUltimaGenomicsWholeGenomeGermline", "UltimaGenomicsWholeGenomeGermline", - "/broad/dna_seq/germline/single_sample/UGWGS/" + "/broad/dna_seq/germline/single_sample/ugwgs/" ) case object ValidateChip extends PipelineTestType( diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala index 0a9e1ed220..c44f08eca0 100644 --- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala +++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/BroadInternalRNAWithUMIsTester.scala @@ -110,9 +110,9 @@ class BroadInternalRNAWithUMIsTester( truth_output_bam = truthCloudPath.resolve( s"$outputBaseName.duplicate_marked.coordinate_sorted.bam"), test_transcriptome_bam = resultsCloudPath.resolve( - s"$outputBaseName.transcriptome.duplicate_marked.bam"), + s"$outputBaseName.transcriptome_RSEM_post_processed.bam"), truth_transcriptome_bam = truthCloudPath.resolve( - s"$outputBaseName.transcriptome.duplicate_marked.bam"), + s"$outputBaseName.transcriptome_RSEM_post_processed.bam"), test_gene_tpm = resultsCloudPath.resolve(s"$outputBaseName.gene_tpm.gct.gz"), truth_gene_tpm = diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala index f3982b3279..ba8a59a6a1 100644 --- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala +++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/CloudWorkflowTester.scala @@ -68,7 +68,10 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)( // All of our plumbing or scientific test inputs protected lazy val inputFileNames: Seq[String] = - workflowInputRoot.list.toSeq.map(_.name.toString) + workflowInputRoot.list + .filter(_.name.endsWith(".json")) + .toSeq + .map(_.name.toString) // plumbing or scientific protected val testTypeString: String = @@ -129,7 +132,7 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)( * Generate the run parameters for each testing sample */ def generateRunParameters: Seq[WorkflowRunParameters] = { - workflowInputRoot.list.toSeq.map(_.name.toString).map { fileName => + inputFileNames.map { fileName => val inputsName = fileName.replace(".json", "") val resultsPath = resultsPrefix.resolve(s"$inputsName/") val truthPath = truthPrefix.resolve(s"$inputsName/") @@ -190,7 +193,7 @@ class CloudWorkflowTester(testerConfig: CloudWorkflowConfig)( /** Find any instance of the pipeline followed by . and replace with wrapper workflow * e.g. * Arrays. -> TestArrays. - * + * * This handles the case where the wrapper workflow is a substring of a nested input (CheckFingerprint CheckFingerprintTask) */ var inputsString = (workflowInputRoot / fileName).contentAsString diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala index cdd2e63095..9815629c1c 100644 --- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala +++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/RNAWithUMIsTester.scala @@ -90,9 +90,9 @@ class RNAWithUMIsTester(testerConfig: RNAWithUMIsConfig)( truth_output_bam = truthCloudPath.resolve( s"$outputBaseName.duplicate_marked.coordinate_sorted.bam"), test_transcriptome_bam = resultsCloudPath.resolve( - s"$outputBaseName.transcriptome.duplicate_marked.bam"), + s"$outputBaseName.transcriptome_RSEM_post_processed.bam"), truth_transcriptome_bam = truthCloudPath.resolve( - s"$outputBaseName.transcriptome.duplicate_marked.bam"), + s"$outputBaseName.transcriptome_RSEM_post_processed.bam"), test_gene_tpm = resultsCloudPath.resolve(s"$outputBaseName.gene_tpm.gct.gz"), truth_gene_tpm = diff --git a/tests/skylab/ATAC/pr/ValidateATAC.wdl b/tests/skylab/ATAC/pr/ValidateATAC.wdl index 78292aec6f..9638c7e873 100644 --- a/tests/skylab/ATAC/pr/ValidateATAC.wdl +++ b/tests/skylab/ATAC/pr/ValidateATAC.wdl @@ -52,7 +52,7 @@ task ValidateATAC { >>> runtime { - docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6" + docker: "us.gcr.io/broad-gotc-prod/samtools:1.0.0-1.11-1624651616" cpu: 1 memory: "3.75 GB" disks: "local-disk ${required_disk} HDD" diff --git a/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl b/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl deleted file mode 100644 index 1ce70f5b53..0000000000 --- a/tests/skylab/hca_adapter/pr/ValidateHcaAdapter.wdl +++ /dev/null @@ -1,24 +0,0 @@ -version 1.0 - -task CompareAdapterFiles { - input { - File test_json - File truth_json - } - command <<< - set -eo pipefail - diff "~{test_json}" "~{truth_json}" - - if [ $? -ne 0 ]; - then - echo "Error: ${test_json} and ${truth_json} differ" - fi - >>> - - runtime { - docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6" - cpu: 1 - memory: "3.75 GiB" - disks: "local-disk 10 HDD" - } -} \ No newline at end of file diff --git a/tests/skylab/optimus/4kpbmc/dependencies.json b/tests/skylab/optimus/4kpbmc/dependencies.json deleted file mode 100644 index 376e9f186b..0000000000 --- a/tests/skylab/optimus/4kpbmc/dependencies.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "Optimus.wdl": "pipelines/skylab/optimus/Optimus.wdl", - "ValidateOptimus.wdl": "tests/skylab/optimus/pr/ValidateOptimus.wdl", - "StarAlignBamSingleEnd.wdl": "tasks/skylab/StarAlignBamSingleEnd.wdl", - "FastqToUBam.wdl": "tasks/skylab/FastqToUBam.wdl", - "Attach10xBarcodes.wdl": "tasks/skylab/Attach10xBarcodes.wdl", - "SplitBamByCellBarcode.wdl": "tasks/skylab/SplitBamByCellBarcode.wdl", - "TagGeneExon.wdl": "tasks/skylab/TagGeneExon.wdl", - "CorrectUmiMarkDuplicates.wdl": "tasks/skylab/CorrectUmiMarkDuplicates.wdl", - "MergeSortBam.wdl": "tasks/skylab/MergeSortBam.wdl", - "CreateCountMatrix.wdl": "tasks/skylab/CreateCountMatrix.wdl", - "SequenceDataWithMoleculeTagMetrics.wdl": "tasks/skylab/SequenceDataWithMoleculeTagMetrics.wdl", - "TagSortBam.wdl": "tasks/skylab/TagSortBam.wdl", - "RunEmptyDrops.wdl": "tasks/skylab/RunEmptyDrops.wdl", - "LoomUtils.wdl": "tasks/skylab/LoomUtils.wdl", - "Picard.wdl": "tasks/skylab/Picard.wdl", - "UmiCorrection.wdl": "tasks/skylab/UmiCorrection.wdl", - "ScatterBam.wdl": "tasks/skylab/ScatterBam.wdl", - "ModifyGtf.wdl": "tasks/skylab/ModifyGtf.wdl", - "OptimusInputChecks.wdl": "tasks/skylab/OptimusInputChecks.wdl" -} diff --git a/tests/skylab/scATAC/pr/ValidateSCATAC.wdl b/tests/skylab/scATAC/pr/ValidateSCATAC.wdl index c005143b1d..deaa8dd945 100644 --- a/tests/skylab/scATAC/pr/ValidateSCATAC.wdl +++ b/tests/skylab/scATAC/pr/ValidateSCATAC.wdl @@ -48,7 +48,7 @@ task ValidateSCATAC { >>> runtime { - docker: "quay.io/humancellatlas/secondary-analysis-samtools:v0.2.2-1.6" + docker: "us.gcr.io/broad-gotc-prod/samtools:1.0.0-1.11-1624651616" cpu: 1 memory: "3.75 GB" disks: "local-disk ${required_disk} HDD" diff --git a/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl b/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl index ad6d488dd4..cdfbbeb4ed 100644 --- a/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl +++ b/tests/skylab/smartseq2_multisample/pr/ValidateMultiSampleSmartSeq2.wdl @@ -13,12 +13,12 @@ task ValidateSmartSeq2Plate { # catch intermittent failures set -eo pipefail - python3 /tools/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10 + python3 /usr/gitc/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10 >>> runtime { - docker: "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.3-fk-2" + docker: "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" cpu: 1 memory: "8 GiB" disks: "local-disk 1${disk_size} HDD" diff --git a/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl b/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl index 2de15c9ec7..80880c2b3e 100644 --- a/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl +++ b/tests/skylab/smartseq2_single_nucleus/pr/ValidateSmartSeq2SingleNucleus.wdl @@ -16,7 +16,7 @@ task ValidateSnSmartSeq2 { set -eo pipefail #compare looms - python3 /tools/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10 + python3 /usr/gitc/loomCompare.py --truth-loom ~{truth_loom} --check-loom ~{loom_output} --delta-cutoff 10 # calculate hashes; awk is used to extract the hash from the md5sum output that contains both # a hash and the filename that was passed. We parse the first 7 columns because a bug in RSEM @@ -32,7 +32,7 @@ task ValidateSnSmartSeq2 { >>> runtime { - docker: "quay.io/humancellatlas/secondary-analysis-loom-output:0.0.3-fk-2" + docker: "us.gcr.io/broad-gotc-prod/pytools:1.0.0-1661263730" cpu: 1 memory: "8 GB" disks: "local-disk 1${disk_size} HDD" diff --git a/verification/VerifyImputation.wdl b/verification/VerifyImputation.wdl index ba36b60d7e..915044f1b2 100644 --- a/verification/VerifyImputation.wdl +++ b/verification/VerifyImputation.wdl @@ -43,7 +43,7 @@ workflow VerifyImputation { Boolean? done } - String bcftools_docker_tag = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.5-1.10.2-0.1.16-1649948623" + String bcftools_docker_tag = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.6-1.10.2-0.1.16-1663946207" scatter (idx in range(length(truth_metrics))) { call CompareImputationMetrics { diff --git a/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl b/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl index 9e9ff34050..a073bbcecd 100644 --- a/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl +++ b/verification/test-wdls/TestBroadInternalRNAWithUMIs.wdl @@ -23,7 +23,6 @@ workflow TestBroadInternalRNAWithUMIs { String sequencing_center = "BI" String? tdr_dataset_uuid String? tdr_sample_id - String? tdr_staging_bucket # These values will be determined and injected into the inputs by the scala test framework String truth_path @@ -55,7 +54,6 @@ workflow TestBroadInternalRNAWithUMIs { sequencing_center = sequencing_center, tdr_dataset_uuid = tdr_dataset_uuid, tdr_sample_id = tdr_sample_id, - tdr_staging_bucket = tdr_staging_bucket, environment = environment, vault_token_path = vault_token_path_arrays @@ -76,7 +74,6 @@ workflow TestBroadInternalRNAWithUMIs { BroadInternalRNAWithUMIs.rnaseqc2_gene_tpm, BroadInternalRNAWithUMIs.output_bam_index, BroadInternalRNAWithUMIs.output_bam, - BroadInternalRNAWithUMIs.transcriptome_bam_index, BroadInternalRNAWithUMIs.transcriptome_bam, ], diff --git a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl index 6b47add22b..14ec5771f8 100644 --- a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl +++ b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl @@ -73,7 +73,6 @@ workflow TestRNAWithUMIsPipeline { Array[String] pipeline_outputs = select_all([ RNAWithUMIsPipeline.transcriptome_bam, - RNAWithUMIsPipeline.transcriptome_bam_index, RNAWithUMIsPipeline.output_bam, RNAWithUMIsPipeline.output_bam_index, RNAWithUMIsPipeline.rnaseqc2_gene_tpm, diff --git a/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md b/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md index a2b032b6f8..61f80a0a89 100644 --- a/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md +++ b/website/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [CEMBA_v1.1.0](https://github.com/broadinstitute/warp/releases) | February, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [CEMBA_v1.1.0](https://github.com/broadinstitute/warp/releases) | February, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![CEMBA](./CEMBA.png) @@ -183,11 +183,11 @@ Please identify the pipeline in your methods section using the CEMBA Pipeline's ## Consortia Support This pipeline is supported and used by the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN). -If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Have Suggestions? -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md index f37867a91f..d13fd2f02d 100644 --- a/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Exome_Germline_Single_Sample_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [ExomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/releases?q=ExomeGermlineSingleSample_v3.0.0&expanded=true) | November, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [ExomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/releases?q=ExomeGermlineSingleSample_v3.0.0&expanded=true) | November, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | The Exome Germline Single Sample pipeline implements data pre-processing and initial variant calling according to the GATK Best Practices for germline SNP and Indel discovery in human exome sequencing data. diff --git a/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md b/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md index d29c436db1..b04313d299 100644 --- a/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md +++ b/website/docs/Pipelines/Genomic_Data_Commons_Whole_Genome_Somatic/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [GDCWholeGenomeSomaticSingleSample_v1.0.1](https://github.com/broadinstitute/warp/releases) | January, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [GDCWholeGenomeSomaticSingleSample_v1.0.1](https://github.com/broadinstitute/warp/releases) | January, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the GDC Whole Genome Somatic Single Sample pipeline @@ -114,7 +114,7 @@ Alternatively, Cromwell allows you to specify an output directory using an optio - Please visit the [GATK Technical Documentation](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591) site for further documentation on GATK-related workflows and tools. ## Contact us -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. ## Licensing diff --git a/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md b/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md index 2e58261ae3..84e524016d 100644 --- a/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md +++ b/website/docs/Pipelines/Illumina_Genotyping_Arrays_Pipeline/IlluminaGenotypingArray.documentation.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Version 1.11.6](https://github.com/broadinstitute/warp/releases) | October, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [Version 1.11.6](https://github.com/broadinstitute/warp/releases) | October, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![The Illumina Genotyping Array Pipeline](./IlluminaGenotyping.png) @@ -238,7 +238,7 @@ The Illumina Genotyping Array Pipeline is available on the cloud-based platform ## Feedback and questions -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Imputation_Pipeline/README.md b/website/docs/Pipelines/Imputation_Pipeline/README.md index 900ac4251c..d0c24b1d9b 100644 --- a/website/docs/Pipelines/Imputation_Pipeline/README.md +++ b/website/docs/Pipelines/Imputation_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Imputation_v1.0.0](https://github.com/broadinstitute/warp/releases?q=Imputation_v1.0.0&expanded=true) | August, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [Imputation_v1.0.0](https://github.com/broadinstitute/warp/releases?q=Imputation_v1.0.0&expanded=true) | August, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the Imputation pipeline The Imputation pipeline imputes missing genotypes from either a multi-sample VCF or an array of single sample VCFs using a large genomic reference panel. It is based on the [Michigan Imputation Server pipeline](https://imputationserver.readthedocs.io/en/latest/pipeline/). Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF along with key imputation metrics. @@ -140,7 +140,7 @@ The pipeline is cost-optimized for between 100 and 1,000 samples, where the cost ## Contact us -Help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. ## Licensing diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 574cf3b18d..6ac531aed9 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [optimus_v5.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [optimus_v5.4.0](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![Optimus_diagram](Optimus_diagram.png) @@ -276,11 +276,11 @@ This pipeline is supported and used by the [Human Cell Atlas](https://www.humanc Each consortia may use slightly different reference files for data analysis or have different post-processing steps. Learn more by reading the [Consortia Processing](./consortia-processing.md) overview. -If your organization also uses this pipeline, we would like to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would like to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Feedback -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WAPR team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. ## FAQs diff --git a/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md b/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md index c6c357f467..3d22604e04 100644 --- a/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md +++ b/website/docs/Pipelines/RNA_with_UMIs_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Authors | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [RNAWithUMIsPipeline_v1.0.6](https://github.com/broadinstitute/warp/releases?q=RNAwithUMIs&expanded=true) | April, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [RNAWithUMIsPipeline_v1.0.6](https://github.com/broadinstitute/warp/releases?q=RNAwithUMIs&expanded=true) | April, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![RNAWithUMIs_diagram](rna-with-umis_diagram.png) @@ -270,6 +270,6 @@ All RNA with UMIs pipeline releases are documented in the [pipeline changelog](h ## Feedback -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. \ No newline at end of file diff --git a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md index 99351bb089..526f284b0a 100644 --- a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md +++ b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [scATAC 1.2.0 ](https://github.com/broadinstitute/warp/releases) | January 04 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [scATAC 1.2.0 ](https://github.com/broadinstitute/warp/releases) | January 04 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![scATAC_diagram](./scATAC_diagram.png) @@ -162,8 +162,8 @@ Please identify the pipeline in your methods section using the scATAC Pipeline's ## Consortia Support This pipeline is supported and used by the [BRAIN Initiative Cell Census Network](https://biccn.org/) (BICCN). -If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Pipeline Improvements -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md index 2a201d30d5..f6568530ad 100644 --- a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [MultiSampleSmartSeq2_v2.2.1](https://github.com/broadinstitute/warp/releases) | May, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [MultiSampleSmartSeq2_v2.2.1](https://github.com/broadinstitute/warp/releases) | May, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction @@ -104,9 +104,9 @@ Please identify the pipeline in your methods section using the Smart-seq2 Multi- ## Consortia Support This pipeline is supported and used by the [Human Cell Atlas](https://www.humancellatlas.org/) (HCA) project. -If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Have Suggestions? -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index 147e23facc..14ee04fbf8 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [MultiSampleSmartSeq2SingleNuclei_v1.2.2](https://github.com/broadinstitute/warp/releases) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [MultiSampleSmartSeq2SingleNuclei_v1.2.2](https://github.com/broadinstitute/warp/releases) | February, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![](./snSS2.png) @@ -179,7 +179,7 @@ This pipeline is supported and used by the [BRAIN Initiative Cell Census Network Each consortia may use slightly different reference files for data analysis or have different post-processing steps. Learn more by reading the [Consortia Processing](./consortia-processing.md) overview. -If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Feedback -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. \ No newline at end of file +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. \ No newline at end of file diff --git a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md index 5dfa83220e..5b27c8fa1e 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [smartseq2_v5.1.1](https://github.com/broadinstitute/warp/releases) | December, 2020 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [smartseq2_v5.1.1](https://github.com/broadinstitute/warp/releases) | December, 2020 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![](./smartseq_image.png) @@ -216,7 +216,7 @@ Please identify the SS2 pipeline in your methods section using the Smart-seq2 Si ## Consortia Support This pipeline is supported and used by the [Human Cell Atlas](https://www.humancellatlas.org/) (HCA) project. -If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org). +If your organization also uses this pipeline, we would love to list you! Please reach out to us by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org). ## Have Suggestions? -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md b/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md index 2bfde8ca34..5a66f8663c 100644 --- a/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md +++ b/website/docs/Pipelines/Ultima_Genomics_Whole_Genome_Germline_Pipeline/README.md @@ -6,7 +6,7 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Authors | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [UltimaGenomicsWholeGenomeGermline_v1.0.0](https://github.com/broadinstitute/warp/releases) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| [UltimaGenomicsWholeGenomeGermline_v1.0.0](https://github.com/broadinstitute/warp/releases) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) & [Kaylee Mathews](mailto:kmathews@broadinstitute.org)| Please file GitHub issues in warp or contact [the wARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![UG_diagram](ug_diagram.png) @@ -275,7 +275,7 @@ All UG_WGS pipeline releases are documented in the [pipeline changelog](https:// ## Feedback -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. diff --git a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md index ead4636987..bcd06b1d32 100644 --- a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/README.md @@ -6,9 +6,9 @@ sidebar_position: 1 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| WholeGenomeGermlineSingleSample_v3.1.2 (see [releases page](https://github.com/broadinstitute/warp/releases)) | June, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | +| WholeGenomeGermlineSingleSample_v3.1.6 (see [releases page](https://github.com/broadinstitute/warp/releases)) | August, 2022 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | -## Introduction to the Whole Genome Germline Single Sample Pipeline +## Introduction to the Whole Genome Germline Single Sample Pipeline The Whole Genome Germline Single Sample (WGS) pipeline implements data pre-processing and initial variant calling according to the GATK Best Practices for germline SNP and Indel discovery in human whole-genome sequencing data. It includes the DRAGEN-GATK mode, which makes the pipeline functionally equivalent to DRAGEN’s analysis pipeline (read more in this [DRAGEN-GATK blog](https://gatk.broadinstitute.org/hc/en-us/articles/360039984151)). @@ -76,7 +76,7 @@ The latest release of the workflow, example data, and dependencies are available ### Input descriptions The tables below describe each of the WGS pipeline inputs and reference files. -Examples of how to specify each input can be found in the example [input configuration files (JSONs)](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files). +Examples of how to specify each input can be found in the example [input configuration files (JSONs)](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files). Multiple references are imported as part of a struct from the [DNASeqStruct WDL](https://github.com/broadinstitute/warp/blob/master/structs/dna_seq/DNASeqStructs.wdl), which is located in the WARP [structs library](https://github.com/broadinstitute/warp/tree/master/structs). For references that are part of a struct, the tables below list the relevant struct’s name. @@ -91,7 +91,7 @@ Overall, the workflow has the following input requirements: * Reference genome must be Hg38 with ALT contigs #### Struct inputs -The following table describes the inputs imported from a struct. Although these are specified in the WGS workflow using the struct name, the actual inputs for each struct are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files). +The following table describes the inputs imported from a struct. Although these are specified in the WGS workflow using the struct name, the actual inputs for each struct are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files). | Input name | Struct name (alias) | Input description | Input type | @@ -114,7 +114,7 @@ The following table describes the inputs imported from a struct. Although these | agg_preemptible_tries | PapiSettings (papi_settings) | Number of preemtible machine tries for the BamtoCram task. | Int | #### Additional inputs -Additional inputs that are not contained in a struct are described in the table below. Similar to the struct inputs, these inputs are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files) or, when noted, are hardcoded into the WDL workflow. +Additional inputs that are not contained in a struct are described in the table below. Similar to the struct inputs, these inputs are specified in the [example configuration files](https://github.com/broadinstitute/warp/tree/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files) or, when noted, are hardcoded into the WDL workflow. * Optional inputs, like the fingerprint_genotypes_file, need to match your input samples. For example, the fingerprint file in the workflow's [test input configuration JSON](https://github.com/broadinstitute/warp/blob/master/pipelines/broad/dna_seq/germline/single_sample/wgs/input_files/WholeGenomeGermlineSingleSample.inputs.plumbing.masked_reference.json) is set up to check fingerprints for the NA12878 Plumbing sample. The sample name in the VCF matches the name used for the `sample_name` input. @@ -171,7 +171,7 @@ The table below details the subtasks called by the UnmappedBamToAlignedBam task, | [Processing.SortSam](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | SortSam | Picard | Sorts the aggregated BAM by coordinate sort order. | | [QC.CrossCheckFingerprints (CrossCheckFingerprints)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/Qc.wdl) | CrosscheckFingerprints | Picard | Optionally checks fingerprints if haplotype database is provided. | | [Utils.CreateSequenceGroupingTSV (CreateSequenceGroupingTSV)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/Utilities.wdl) | --- | python | Creates the sequencing groupings used for BQSR and PrintReads Scatter. | -| [Processing.CheckContamination](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | VerifyBamID | --- | Checks cross-sample contamination prior to variant calling. | +| [Processing.CheckContamination](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | VerifyBamID2 | --- | Checks cross-sample contamination prior to variant calling. | | [Processing.BaseRecalibrator (BaseRecalibrator)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | BaseRecalibrator | GATK | If `perform_bqsr` is true, performs base recalibration by interval. When using the DRAGEN-GATK mode, `perform_bqsr` is optionally false as base calling errors are corrected in the DRAGEN variant calling step.| | [Processing.GatherBqsrReports (GatherBqsrReports)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | GatherBQSRReports | GATK | Merges the BQSR reports resulting from by-interval calibration. | | [Processing.ApplyBQSR (ApplyBQSR)](https://github.com/broadinstitute/warp/blob/master/tasks/broad/BamProcessing.wdl) | ApplyBQSR | GATK | Applies the BQSR base recalibration model by interval. | @@ -276,7 +276,7 @@ The table below describes the final workflow outputs. If running the workflow on | read_group_gc_bias_pdf | PDF of the GC bias by readgroup for the aggregated BAM. | File | | read_group_gc_bias_summary_metrics | GC bias summary metrics by readgroup for the aggregated BAM. | File | | cross_check_fingerprints_metrics | Fingerprint metrics file if optional fingerprinting is performed. | File | -| selfSM | Contamination estimate from VerifyBamID. | File | +| selfSM | Contamination estimate from VerifyBamID2. | File | | contamination | Estimated contamination from the CheckContamination task. | Float | | calculate_read_group_checksum_md5 | MD5 checksum for aggregated BAM. | File | | agg_alignment_summary_metrics | Alignment summary metrics for the aggregated BAM. | File | @@ -318,7 +318,7 @@ As of November 2021, reblocking is a default task in the WGS pipeline. To skip r "WholeGenomeGermlineSingleSample.BamToGvcf.skip_reblocking": true ``` -The [Reblocking task](https://github.com/broadinstitute/warp/blob/develop/tasks/broad/GermlineVariantDiscovery.wdl) uses the GATK ReblockGVCF tool with the arguments: +The [Reblocking task](https://github.com/broadinstitute/warp/blob/master/tasks/broad/GermlineVariantDiscovery.wdl) uses the GATK ReblockGVCF tool with the arguments: ```WDL -do-qual-approx -floor-blocks -GQB 20 -GQB 30 -GQB 40 @@ -371,7 +371,7 @@ The final CRAM files have base quality scores binned according to the [Functiona ## Contact us -Please help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. +Please help us make our tools better by contacting [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) for pipeline-related suggestions or questions. ## Licensing diff --git a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md index 255a5f87ae..dd19f5bd03 100644 --- a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md +++ b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md @@ -2,17 +2,17 @@ sidebar_position: 2 --- -# Whole Genome Germline Single Sample v3.0.0 Methods (Default workflow) +# Whole Genome Germline Single Sample v3.1.6 Methods (Default workflow) The following contains a detailed methods description outlining the pipeline’s process, software, and tools that can be modified for a publication methods section. ## Detailed methods for the default Whole Genome Germline Single Sample workflow -Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample 3.0.0 pipeline using Picard 2.23.8, GATK 4.2.2.0, and Samtools 1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline follows GATK Best Practices as previously described ([Van der Auwera & O'Connor, 2020](https://www.oreilly.com/library/view/genomics-in-the/9781491975183/)) as well as the Functional Equivalence specification ([Regier et al., 2018](https://www.nature.com/articles/s41467-018-06159-4)). +Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample v3.1.6 pipeline using Picard v2.26.10, GATK v4.2.6.1, and Samtools v1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline follows GATK Best Practices as previously described ([Van der Auwera & O'Connor, 2020](https://www.oreilly.com/library/view/genomics-in-the/9781491975183/)) as well as the Functional Equivalence specification ([Regier et al., 2018](https://www.nature.com/articles/s41467-018-06159-4)). ### Pre-processing and quality control metrics -Whole genome paired-end reads in unmapped BAM (uBAM) format were first scattered to perform QC and alignment in parallel. Quality metrics were calculated using Picard CollectQualityYieldMetrics. uBAMs were converted to FASTQ using Picard SamToFastq and aligned to the Hg38 reference genome using BWA mem 0.7.15 with batch size set using -K 100000000. Metadata from the uBAMs was then merged with the aligned BAMs using Picard MergeBamAlignment with the parameters --SORT_ORDER="unsorted", allowing the data to be grouped by read name for efficient downstream marking of duplicates, and --UNMAP_CONTAMINANT_READS=true, to remove cross-species contamination. +Whole genome paired-end reads in unmapped BAM (uBAM) format were first scattered to perform QC and alignment in parallel. Quality metrics were calculated using Picard CollectQualityYieldMetrics. uBAMs were converted to FASTQ using Picard SamToFastq and aligned to the Hg38 reference genome using BWA mem v0.7.15 with batch size set using -K 100000000. Metadata from the uBAMs was then merged with the aligned BAMs using Picard MergeBamAlignment with the parameters --SORT_ORDER="unsorted", allowing the data to be grouped by read name for efficient downstream marking of duplicates, and --UNMAP_CONTAMINANT_READS=true, to remove cross-species contamination. QC metrics (base distribution by cycle, insert size metrics, mean quality by cycle, and quality score distribution) were collected for the aligned, unsorted read-groups using Picard CollectMultipleMetrics. The read-group specific aligned BAMs were then aggregated and duplicate reads were flagged using Picard MarkDuplicates assuming queryname-sorted order and the parameter --OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500, which is appropriate for patterned flowcells. @@ -34,7 +34,7 @@ The pipeline’s final outputs included metrics, validation reports, an aligned ## Detailed methods for the Functional Equivalence mode of the Whole Genome Germline Single Sample workflow -Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample 3.0.0 pipeline using Picard 2.23.8, GATK 4.2.2.0, and Samtools 1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline is functionally equivalent (as described in GATK Support: https://gatk.broadinstitute.org/hc/en-us/articles/4410456501915) to DRAGEN version 3.4.12. +Preprocessing and variant calling was performed using the WholeGenomeGermlineSingleSample v3.1.6 pipeline using v2.26.10, GATK v4.2.6.1, and Samtools v1.11 with default tool parameters unless otherwise specified. All reference files are available in the public [Broad References Google Bucket](https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0). The pipeline is functionally equivalent (as described in GATK Support: https://gatk.broadinstitute.org/hc/en-us/articles/4410456501915) to DRAGEN v3.4.12. ### Pre-processing and quality control metrics @@ -57,5 +57,6 @@ Prior to variant calling, the DRAGEN STR model was calibrated using the Calibrat The pipeline’s final outputs included metrics, validation reports, an aligned CRAM with index, and a reblocked GVCF containing variant calls with an accompanying index. ## Previous methods documents +- [WholeGenomeGermlineSingleSample_v3.0.0](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v3.0.0/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md) - [WholeGenomeGermlineSingleSample_v2.5.0](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v2.5.0/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md) - [WholeGenomeGermlineSingleSample_v2.3.7](https://github.com/broadinstitute/warp/blob/WholeGenomeGermlineSingleSample_v2.3.7/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/wgs.methods.md)