broadinstitute · tomkinsc · May 19, 2023 · Jun 1, 2023 · Jul 6, 2023 · Jul 14, 2023
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -154,6 +154,16 @@ workflows:
     primaryDescriptorPath: /pipes/WDL/workflows/fetch_sra_to_bam.wdl
     testParameterFiles:
       - empty.json
+  - name: fetch_sra_bams_for_genbank_accession
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/fetch_sra_bams_for_genbank_accession.wdl
+    testParameterFiles:
+      - empty.json
+  - name: fetch_fasta_for_genbank_accessions
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/fetch_fasta_for_genbank_accessions.wdl
+    testParameterFiles:
+      - empty.json
   - name: filter_classified_bam_to_taxa
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/filter_classified_bam_to_taxa.wdl

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -189,7 +189,7 @@ jobs:
         run: git fetch --prune --unshallow --tags
       - name: Programmatic environment setup
         run: |
-          set -e -x
+          set -ex
           # $GITHUB_ENV is available for subsequent steps
           GITHUB_ACTIONS_TAG=$(git describe --tags --exact-match && sed 's/^v//g' || echo '')
           echo "GITHUB_ACTIONS_TAG=$GITHUB_ACTIONS_TAG" >> $GITHUB_ENV
@@ -218,6 +218,8 @@ jobs:
         shell: bash
         run: |
           github_actions_ci/install-wdl.sh
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
       - name: test with cromwell
         shell: bash
         run: |
@@ -237,7 +239,7 @@ jobs:
         run: git fetch --prune --unshallow --tags
       - name: Programmatic environment setup
         run: |
-          set -e -x
+          set -ex
           # $GITHUB_ENV is available for subsequent steps
           GITHUB_ACTIONS_TAG=$(git describe --tags --exact-match && sed 's/^v//g' || echo '')
           echo "GITHUB_ACTIONS_TAG=$GITHUB_ACTIONS_TAG" >> $GITHUB_ENV
@@ -269,7 +271,7 @@ jobs:
         run: |
           pip3 install miniwdl docker[tls] six
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v2
       - name: test with miniwdl
         shell: bash
         run: |

diff --git a/github_actions_ci/check-wdl-runtimes.sh b/github_actions_ci/check-wdl-runtimes.sh
@@ -4,13 +4,18 @@
 
 echo "Checking wdl container versions against ${MODULE_VERSIONS}"
 
+
 # this is the newer script that simply validates existing version strings
 should_error=false
 for task_file in $(ls -1 pipes/WDL/tasks/*.wdl); do
     echo "Checking ${task_file}"
     while IFS='=' read module version; do
     	OLD_TAG=$module
-    	NEW_TAG="$module:$version"
+        if ! grep -q "sha256" <<< "$version"; then
+    	   NEW_TAG="$module:$version"
+        else
+           NEW_TAG="$module@$version"
+        fi
 
         offending_lines="$(grep -nE "^[^#]*$OLD_TAG" "${task_file}" | grep -v $NEW_TAG)"
 

diff --git a/github_actions_ci/tests-cromwell.sh b/github_actions_ci/tests-cromwell.sh
@@ -1,10 +1,43 @@
 #!/bin/bash
 set -e  # intentionally allow for pipe failures below
 
-mkdir -p workflows
-cp *.jar pipes/WDL/workflows/*.wdl pipes/WDL/tasks/*.wdl workflows
-cp -r test workflows/
-cd workflows
+# increase docker timeouts to allow for staging of larger images (seconds)
+export DOCKER_CLIENT_TIMEOUT=240
+export COMPOSE_HTTP_TIMEOUT=240
+
+starting_dir="$(pwd)"
+test_dir="cromwell_testing"
+
+function cleanup(){
+    echo "Cleaning up from miniwdl run; exit code: $?"
+    cd "$starting_dir"
+    if [ -d "$test_dir" ] && [[ $KEEP_OUTPUT != "true" ]]; then
+      rm -r "$test_dir"
+    fi
+}
+trap cleanup EXIT SIGINT SIGQUIT SIGTERM
+
+mkdir -p ${test_dir}
+cp pipes/WDL/workflows/*.wdl pipes/WDL/tasks/*.wdl $test_dir
+sed -i -- 's|import \"../tasks/|import \"|g' ${test_dir}/*.wdl
+cp -r test ${test_dir}/
+
+CROMWELL_LOG_LEVEL="${CROMWELL_LOG_LEVEL:=WARN}"
+
+# if "cromwell" exists on the PATH (no .jar file extension suffix)
+# it means it was installed from bioconda
+if hash cromwell &>/dev/null; then
+	echo "conda cromwell present";
+	# this is the bioconda java-launching script
+	JAVA_ENTRYPOINT="cromwell"
+else
+	# otherwise if cromwell is not installed via conda, call java
+	JAVA_ENTRYPOINT="java"
+	cp *.jar ${test_dir}
+	CROMWELL_JAR_ARG="-jar cromwell.jar"
+fi
+
+cd ${test_dir}
 
 for workflow in ../pipes/WDL/workflows/*.wdl; do
 	workflow_name=$(basename $workflow .wdl)
@@ -13,8 +46,10 @@ for workflow in ../pipes/WDL/workflows/*.wdl; do
 		date
 		echo "Executing $workflow_name using Cromwell on local instance"
 		# the "cat" is to allow a pipe failure (otherwise it halts because of set -e)
-		java -Dconfig.file=../pipes/cromwell/cromwell.local-github_actions.conf \
-			-jar cromwell.jar run \
+		${JAVA_ENTRYPOINT} -Dconfig.file=../pipes/cromwell/cromwell.local-github_actions.conf \
+			-DLOG_MODE=pretty \
+			-DLOG_LEVEL=${CROMWELL_LOG_LEVEL} \
+			${CROMWELL_JAR_ARG} run \
 			$workflow_name.wdl \
 			-i $input_json | tee cromwell.out
 		if [ ${PIPESTATUS[0]} -gt 0 ]; then

diff --git a/github_actions_ci/tests-miniwdl.sh b/github_actions_ci/tests-miniwdl.sh
@@ -1,13 +1,17 @@
 #!/bin/bash
 set -ex -o pipefail
 
+# increase docker timeouts to allow for staging of larger images (seconds)
+export DOCKER_CLIENT_TIMEOUT=240
+export COMPOSE_HTTP_TIMEOUT=240
+
 starting_dir="$(pwd)"
 test_dir="miniwdl_testing"
 
 function cleanup(){
     echo "Cleaning up from miniwdl run; exit code: $?"
     cd "$starting_dir"
-    if [ -d "$test_dir" ]; then
+    if [ -d "$test_dir" ] && [[ $KEEP_OUTPUT != "true" ]]; then
       rm -r "$test_dir"
     fi
 }
@@ -19,6 +23,13 @@ cd $test_dir
 
 docker --version
 
+if [ "$(uname)" == "Darwin" ]; then
+	# miniwdl tries to chown output files to the UID
+	# of the user executing miniwdl, but this can cause problems
+	# when docker is itself running in a virtualized environment (macOS)
+	export MINIWDL__FILE_IO__CHOWN=false
+fi
+
 # make sure our system has everything it needs to perform "miniwdl run" (e.g. docker swarm works)
 miniwdl run_self_test
 

diff --git a/github_actions_ci/version-wdl-runtimes.sh b/github_actions_ci/version-wdl-runtimes.sh
@@ -3,11 +3,18 @@
 # use sed to replace version strings of docker images based on versions defined in txt file
 
 # requires $MODULE_VERSIONS to be set to point to a text file with equal-sign-separated values
-# export MODULE_VERSIONS="./requirements-modules.txt" && ./github_actions_ci/check-wdl-runtimes.sh
+# export MODULE_VERSIONS="./requirements-modules.txt" && ./github_actions_ci/version-wdl-runtimes.sh
 
 while IFS='=' read module version; do
   OLD_TAG=$module
-  NEW_TAG="$module:$version"
+  if ! grep -q "sha256" <<< "$version"; then
+    echo "$module is specified using image tag"
+    NEW_TAG="$module:$version"
+  else
+    echo "$module is specified using image build hash"
+    NEW_TAG="$module@$version"
+  fi
   echo Replacing $OLD_TAG with $NEW_TAG in all task WDL files
   sed -i '' "s|$OLD_TAG[^\"\']*|$NEW_TAG|g" pipes/WDL/tasks/*.wdl
+
 done < $MODULE_VERSIONS
diff --git a/pipes/WDL/tasks/tasks_16S_amplicon.wdl b/pipes/WDL/tasks/tasks_16S_amplicon.wdl
@@ -9,7 +9,7 @@ task qiime_import_from_bam {
         Int    memory_mb = 7000
         Int     cpu = 5
         Int     disk_size_gb = ceil(2*20) + 5
-        String  docker     = "quay.io/broadinstitute/qiime2" 
+        String  docker     = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta {
         reads_bam: {
@@ -27,7 +27,7 @@ task qiime_import_from_bam {
     }
 
     command <<<
-        set -ex -o pipefail
+        set -ex
 
         #Part 1A | BAM -> FASTQ [Simple samtools command]
         manifest_TSV=manifest.tsv
@@ -86,7 +86,7 @@ task trim_reads {
         Int     memory_mb = 2000
         Int     cpu = 4
         Int     disk_size_gb = ceil(2*size(reads_qza, "GiB")) + 5
-        String  docker          = "quay.io/broadinstitute/qiime2" 
+        String  docker = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta {
         reads_qza: {
@@ -119,7 +119,7 @@ task trim_reads {
         }
     }
     command <<<
-        set -ex -o pipefail
+        set -ex
         qiime cutadapt trim-paired \
         --i-demultiplexed-sequences "~{reads_qza}" \
         --p-front-f "~{forward_adapter}" \
@@ -160,7 +160,7 @@ task join_paired_ends {
         Int     memory_mb = 2000
         Int     cpu = 1
         Int     disk_size_gb = ceil(2*size(trimmed_reads_qza, "GiB")) + 50
-        String  docker = "quay.io/broadinstitute/qiime2"
+        String  docker = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta{
         trimmed_reads_qza: {
@@ -177,7 +177,7 @@ task join_paired_ends {
         }
     }
     command <<< 
-        set -ex -o pipefail
+        set -ex
         qiime vsearch join-pairs \
         --i-demultiplexed-seqs ~{trimmed_reads_qza} \
         --o-joined-sequences "joined.qza"
@@ -210,7 +210,7 @@ task deblur {
         Int     memory_mb = 2000
         Int     cpu = 1
         Int     disk_size_gb = ceil(2*size(joined_end_reads_qza, "GiB")) + 5
-        String  docker = "quay.io/broadinstitute/qiime2"
+        String  docker = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta {
         joined_end_reads_qza: {
@@ -239,7 +239,7 @@ task deblur {
         }
     }
         command <<< 
-        set -ex -o pipefail
+        set -ex
 
             qiime deblur denoise-16S \
             --i-demultiplexed-seqs ~{joined_end_reads_qza}\
@@ -288,7 +288,7 @@ task train_classifier {
         Int     memory_mb = 2000
         Int     cpu = 1
         Int     disk_size_gb = ceil(2*size(otu_ref, "GiB")) + 5
-        String  docker = "quay.io/broadinstitute/qiime2"
+        String  docker = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta{
         otu_ref: {
@@ -322,7 +322,7 @@ task train_classifier {
     }
 
     command <<<
-     set -ex -o pipefail
+     set -ex
         CONDA_ENV_NAME=$(conda info --envs -q | awk -F" " '/qiime.*/{ print $1 }')
         conda activate ${CONDA_ENV_NAME}
 
@@ -372,7 +372,7 @@ task tax_analysis {
         Int     memory_mb = 5
         Int     cpu = 1
         Int     disk_size_gb = 375
-        String  docker = "quay.io/broadinstitute/qiime2"
+        String  docker = "quay.io/broadinstitute/qiime2@sha256:b1b8824516dc8b2d829cf562d4525d87f0ba5aec0a08a4c63d640eff5f91978b"
     }
     parameter_meta{ 
         trained_classifier: {
@@ -397,7 +397,7 @@ task tax_analysis {
             }
     }
     command <<<
-        set -ex -o pipefail
+        set -ex
         qiime feature-classifier classify-sklearn \
         --i-classifier ~{trained_classifier} \
         --i-reads ~{representative_seqs_qza} \