From 84aeaacac1405b9d64ca9e8f20371150c9f1ab2b Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Wed, 25 Jan 2023 10:56:28 -0600 Subject: [PATCH 1/9] Initial setup for biostats_mcgee This is for STAT 6358: Statistical Methods for High Throughput Biological Assays taught by Dr. Monnie McGee. --- biostats_mcgee/Dockerfile | 12 ++++++++++++ biostats_mcgee/install_packages.R | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 biostats_mcgee/Dockerfile create mode 100644 biostats_mcgee/install_packages.R diff --git a/biostats_mcgee/Dockerfile b/biostats_mcgee/Dockerfile new file mode 100644 index 00000000..1db74ec5 --- /dev/null +++ b/biostats_mcgee/Dockerfile @@ -0,0 +1,12 @@ +# docker build -t biostats:latest . +# docker build --platform linux/amd64 -t biostats:latest . +# docker build --no-cache --progress=plain -t biostats:latest . +# docker run --entrypoint /bin/bash -it biostats:latest + +FROM rocker/rstudio:4.2.2 +LABEL maintainer "Robert Kalescky " + +COPY install_packages.R / + +RUN Rscript /install_packages.R + diff --git a/biostats_mcgee/install_packages.R b/biostats_mcgee/install_packages.R new file mode 100644 index 00000000..3e41672e --- /dev/null +++ b/biostats_mcgee/install_packages.R @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript + +install.packages("BiocManager") +BiocManager::install(version = "3.16") + +packages = c( + "FastQC", + "cutadapt", + "TrimGalore!", + "STAR", + "RSEM", + "edgeR", + "AnnotationDbi", + "org.Hs.eg.db" +) + +BiocManager::install(packages) + From cdf5876d897b5ba074607e063ad828c7fd4738b6 Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Wed, 25 Jan 2023 11:57:52 -0600 Subject: [PATCH 2/9] Missing needed zlib.h. --- biostats_mcgee/Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/biostats_mcgee/Dockerfile b/biostats_mcgee/Dockerfile index 1db74ec5..0517f965 100644 --- a/biostats_mcgee/Dockerfile +++ b/biostats_mcgee/Dockerfile @@ -6,6 +6,12 @@ FROM rocker/rstudio:4.2.2 LABEL maintainer "Robert Kalescky " +ENV DEBIAN_FRONTEND noninteractive + +RUN apt-get update &&\ + apt-get install -y\ + libz-dev + COPY install_packages.R / RUN Rscript /install_packages.R From 43f30803050fa9bbd99c493ac05bda5e1982e56d Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Wed, 25 Jan 2023 13:43:55 -0600 Subject: [PATCH 3/9] =?UTF-8?q?Bioconductor=20doesn=E2=80=99t=20install=20?= =?UTF-8?q?CRAN=20packages.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- biostats_mcgee/install_packages.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/biostats_mcgee/install_packages.R b/biostats_mcgee/install_packages.R index 3e41672e..a47e69fa 100644 --- a/biostats_mcgee/install_packages.R +++ b/biostats_mcgee/install_packages.R @@ -1,18 +1,22 @@ #!/usr/bin/env Rscript -install.packages("BiocManager") -BiocManager::install(version = "3.16") - -packages = c( +packages_cran = c( "FastQC", "cutadapt", "TrimGalore!", "STAR", "RSEM", + "BiocManager" +) + +bioc_version = "3.16" +packages_bioc = c( "edgeR", "AnnotationDbi", "org.Hs.eg.db" ) -BiocManager::install(packages) +install.packages(packages_cran) +BiocManager::install(version = bioc_version) +BiocManager::install(packages_bioc) From 1c0f4a503610211866eec8f3d123f3ce561997b4 Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Fri, 27 Jan 2023 13:48:52 -0600 Subject: [PATCH 4/9] Initial container finished. Some apps were previously mistaken as R packages. --- biostats_mcgee/Dockerfile | 39 +++++++++++++++++++++++++++---- biostats_mcgee/install_packages.R | 29 ++++++++++------------- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/biostats_mcgee/Dockerfile b/biostats_mcgee/Dockerfile index 0517f965..57c21c4c 100644 --- a/biostats_mcgee/Dockerfile +++ b/biostats_mcgee/Dockerfile @@ -4,15 +4,46 @@ # docker run --entrypoint /bin/bash -it biostats:latest FROM rocker/rstudio:4.2.2 -LABEL maintainer "Robert Kalescky " +LABEL maintainer="Robert Kalescky " ENV DEBIAN_FRONTEND noninteractive RUN apt-get update &&\ - apt-get install -y\ - libz-dev + apt-get install -y\ + libz-dev\ + default-jre\ + default-jdk\ + python3-pip -COPY install_packages.R / +# FastQC +RUN git clone --depth 1 --branch v0.11.9\ + https://github.com/s-andrews/FastQC.git /opt/FastQC &&\ + ln -s /opt/FastQC/fastqc /usr/local/bin/fastqc + +# Cutadapt (https://cutadapt.readthedocs.io) +RUN pip3 install cutadapt + +# TrimGalore! +RUN git clone --depth 1 --branch 0.6.7\ + https://github.com/FelixKrueger/TrimGalore.git /opt/TrimGalore &&\ + ln -s /opt/TrimGalore/trim_galore /usr/local/bin/trim_galore +# STAR +RUN git clone --depth 1 --branch 2.7.10b\ + https://github.com/alexdobin/STAR.git /opt/STAR &&\ + cd /opt/STAR/source &&\ + make STAR &&\ + mv /opt/STAR/bin/Linux_x86_64_static/STAR* /usr/local/bin/ &&\ + rm -rf /opt/STAR + +# RSEM +RUN git clone --depth 1 --branch v1.3.3\ + https://github.com/deweylab/RSEM.git /opt/RSEM &&\ + cd /opt/RSEM &&\ + make -j install &&\ + rm -rf /opt/RSEM + +# Install R Packages +COPY install_packages.R / RUN Rscript /install_packages.R diff --git a/biostats_mcgee/install_packages.R b/biostats_mcgee/install_packages.R index a47e69fa..3df6cd2d 100644 --- a/biostats_mcgee/install_packages.R +++ b/biostats_mcgee/install_packages.R @@ -1,22 +1,17 @@ -#!/usr/bin/env Rscript - -packages_cran = c( - "FastQC", - "cutadapt", - "TrimGalore!", - "STAR", - "RSEM", - "BiocManager" +cran = list( + packages = c( + "BiocManager") ) -bioc_version = "3.16" -packages_bioc = c( - "edgeR", - "AnnotationDbi", - "org.Hs.eg.db" +bioc = list( + packages = c( + "edgeR", + "AnnotationDbi", + "org.Hs.eg.db"), + version = "3.16" ) -install.packages(packages_cran) -BiocManager::install(version = bioc_version) -BiocManager::install(packages_bioc) +install.packages(cran$packages) +BiocManager::install(version = bioc$version) +BiocManager::install(bioc$packages) From 75fdf3f3212f33747a5bb3899dd3f08ec5a17b9c Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Fri, 27 Jan 2023 14:25:54 -0600 Subject: [PATCH 5/9] Build and module scripts. The build script builds the Docker image, converts it to a Singularity image, and updates the module file with the images file name. The module file defines shell functions for specific exectuables inside the image. --- biostats_mcgee/biostats.lua | 55 ++++++++++++++++++++++++++++++++++ biostats_mcgee/build_images.sh | 35 ++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 biostats_mcgee/biostats.lua create mode 100755 biostats_mcgee/build_images.sh diff --git a/biostats_mcgee/biostats.lua b/biostats_mcgee/biostats.lua new file mode 100644 index 00000000..7035e818 --- /dev/null +++ b/biostats_mcgee/biostats.lua @@ -0,0 +1,55 @@ +load("singularity") + +local img_name = 'biostats_latest-2023-01-27-60654e0170ff.simg' +local img_directory = '/hpc/applications/containers/' +local img_path = pathJoin(img_directory, img_name) + +function build_command(cmd) + local cmd_beginning = 'singularity exec ' + local cmd_ending = img_path .. ' ' .. cmd + local sh_ending = ' "$@"' + local csh_ending = ' $*' + local sh_cmd = cmd_beginning .. cmd_ending .. sh_ending + local csh_cmd = cmd_beginning .. cmd_ending .. csh_ending + set_shell_function(cmd, sh_cmd, csh_cmd) +end + +local executables = { + 'python3' + 'R', + 'Rscript', + 'rserver', + 'rstudio-server', + 'fastqc', + 'cutadapt', + 'STAR', + 'STARlong', + 'rsem-bam2readdepth', + 'rsem-get-unique', + 'rsem-run-em', + 'rsem-bam2wig', + 'rsem-gff3-to-gtf', + 'rsem-run-gibbs', + 'rsem-build-read-index', + 'rsem-parse-alignments', + 'rsem-sam-validator', + 'rsem-calculate-credibility-intervals', + 'rsem-plot-model', + 'rsem-scan-for-paired-end-reads', + 'rsem-calculate-expression', + 'rsem-plot-transcript-wiggles', + 'rsem-simulate-reads', + 'rsem-extract-reference-transcripts', + 'rsem-prepare-reference', + 'rsem-synthesis-reference-transcripts', + 'rsem-generate-data-matrix', + 'rsem-preref', + 'rsem-tbam2gbam', + 'rsem-gen-transcript-plots', + 'rsem-refseq-extract-primary-assembly' +} + +for _, executable in ipairs(executables) do + build_command(executable) +done + diff --git a/biostats_mcgee/build_images.sh b/biostats_mcgee/build_images.sh new file mode 100755 index 00000000..a221a7b3 --- /dev/null +++ b/biostats_mcgee/build_images.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env zsh + +# Set version +version="latest" + +# Set platform +#if [ -z "$1" ]; then +case $1 in +m[2,3,p]) + platform="linux/amd64" + ;; +*) + platform="linux/$(uname -m)" + ;; +esac + +# Build container with Docker +docker build --platform ${platform} -t biostats:${version} . + +# Convert Docker image to Singularity image +docker run -v /var/run/docker.sock:/var/run/docker.sock\ + -v $PWD:/output --privileged -t --rm\ + singularityware/docker2singularity biostats:${version} | tee build.log + +# Get Singularity image name +img_name=$(grep "Build complete:" build.log | tr -s ' ' | cut -d' ' -f5 | cut -d'/' -f3 | tr -d '\b\r') + +# Change Singularity image permissions +if [[ $(uname -s) == "Linux" ]]; then + sudo chown $USER:$USER $img_name +fi + +# Update module file with new Singularity image name +sed -i'' -e "s/^local img_name.*/local img_name = \'${img_name}\'/g" biostats.lua + From 2404d7d71b021443bacb15fec4e2774c267604f0 Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Fri, 27 Jan 2023 14:43:50 -0600 Subject: [PATCH 6/9] Fixed Lua loop bug. --- biostats_mcgee/biostats.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biostats_mcgee/biostats.lua b/biostats_mcgee/biostats.lua index 7035e818..b2805293 100644 --- a/biostats_mcgee/biostats.lua +++ b/biostats_mcgee/biostats.lua @@ -51,5 +51,5 @@ local executables = { for _, executable in ipairs(executables) do build_command(executable) -done +end From dbb83fa4fc48bb792e24e93b5341906f5435c149 Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Mon, 6 Feb 2023 11:07:56 -0600 Subject: [PATCH 7/9] Added custom R_LIBS_USER. Users can install to container-specific isolated directory. --- biostats_mcgee/biostats.lua | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/biostats_mcgee/biostats.lua b/biostats_mcgee/biostats.lua index b2805293..c7b25e45 100644 --- a/biostats_mcgee/biostats.lua +++ b/biostats_mcgee/biostats.lua @@ -5,7 +5,10 @@ local img_directory = '/hpc/applications/containers/' local img_path = pathJoin(img_directory, img_name) function build_command(cmd) - local cmd_beginning = 'singularity exec ' + local home = os.getenv("HOME") + local user_libs = pathJoin(home, 'R/portal/rocker-images/biostats_0') + local cmd = 'singularity exec --nv ' + local cmd_beginning = cmd .. '--env R_LIBS_USER=' .. user_libs.. ' ' local cmd_ending = img_path .. ' ' .. cmd local sh_ending = ' "$@"' local csh_ending = ' $*' From 70586150ff1be40b3e451d6209e995186f57a80e Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Mon, 6 Feb 2023 12:50:06 -0600 Subject: [PATCH 8/9] Use *.sif extension for Singularity images. The Portal needs *.sif as the extension. Also simplfied extracting the image name from the build log. --- biostats_mcgee/build_images.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/biostats_mcgee/build_images.sh b/biostats_mcgee/build_images.sh index a221a7b3..fd59ea3d 100755 --- a/biostats_mcgee/build_images.sh +++ b/biostats_mcgee/build_images.sh @@ -23,13 +23,16 @@ docker run -v /var/run/docker.sock:/var/run/docker.sock\ singularityware/docker2singularity biostats:${version} | tee build.log # Get Singularity image name -img_name=$(grep "Build complete:" build.log | tr -s ' ' | cut -d' ' -f5 | cut -d'/' -f3 | tr -d '\b\r') +simg=$(basename $(awk '/Build complete/{print $NF}' build.log) | tr -d '\b\r') +img="${simg%.*}.sif" +mv $simg $img # Change Singularity image permissions if [[ $(uname -s) == "Linux" ]]; then - sudo chown $USER:$USER $img_name + sudo chown $USER:$USER $img fi # Update module file with new Singularity image name -sed -i'' -e "s/^local img_name.*/local img_name = \'${img_name}\'/g" biostats.lua +sed -i'' -e "s/^local img_name.*/local img_name = \'${img}\'/g"\ + biostats.lua From bd3ce4f7fa4381dd628e7fe529fa1d1d842e9a06 Mon Sep 17 00:00:00 2001 From: Robert Kalescky Date: Mon, 6 Feb 2023 21:42:21 -0600 Subject: [PATCH 9/9] Added Salmon --- biostats_mcgee/Dockerfile | 50 +++++++++++++++++++++++++++++++++---- biostats_mcgee/biostats.lua | 2 +- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/biostats_mcgee/Dockerfile b/biostats_mcgee/Dockerfile index 57c21c4c..87470048 100644 --- a/biostats_mcgee/Dockerfile +++ b/biostats_mcgee/Dockerfile @@ -9,12 +9,38 @@ LABEL maintainer="Robert Kalescky " ENV DEBIAN_FRONTEND noninteractive RUN apt-get update &&\ - apt-get install -y\ - libz-dev\ - default-jre\ + apt-get remove -y libcurl4 &&\ + apt-get upgrade -y &&\ + apt-get install -y --no-install-recommends\ + apt-transport-https\ + autoconf\ + ca-certificates\ + cmake\ + curl\ default-jdk\ - python3-pip - + default-jre\ + g++\ + gcc\ + git\ + git-lfs\ + gnupg\ + libboost-all-dev\ + libbz2-dev\ + libcereal-dev\ + libcurl4-openssl-dev\ + libgff-dev\ + libjemalloc-dev\ + liblzma-dev\ + libtbb-dev\ + libz-dev\ + make\ + ninja-build\ + python3-pip\ + software-properties-common\ + unzip\ + wget\ + zlib1g-dev + # FastQC RUN git clone --depth 1 --branch v0.11.9\ https://github.com/s-andrews/FastQC.git /opt/FastQC &&\ @@ -47,3 +73,17 @@ RUN git clone --depth 1 --branch v1.3.3\ COPY install_packages.R / RUN Rscript /install_packages.R +# GATK4 +RUN git clone --depth 1 --branch 4.2.6.1\ + https://github.com/broadinstitute/gatk.git /opt/gatk &&\ + cd /opt/gatk &&\ + ./gradlew installAll + +# Salmon +RUN git clone --depth 1 --branch v1.9.0\ + https://github.com/COMBINE-lab/salmon.git /opt/salmon &&\ + mkdir /opt/salmon/build &&\ + cd /opt/salmon/build &&\ + cmake .. &&\ + make install + diff --git a/biostats_mcgee/biostats.lua b/biostats_mcgee/biostats.lua index c7b25e45..03007a73 100644 --- a/biostats_mcgee/biostats.lua +++ b/biostats_mcgee/biostats.lua @@ -1,6 +1,6 @@ load("singularity") -local img_name = 'biostats_latest-2023-01-27-60654e0170ff.simg' +local img_name = 'biostats_latest-2023-02-07-858e2914ccfc.sif' local img_directory = '/hpc/applications/containers/' local img_path = pathJoin(img_directory, img_name)