Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PD-2501 StarSolo on one machine #1194

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tasks/skylab/FastqProcessing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ task FastqProcessing {
fi

fastqprocess \
--bam-size 30.0 \
--num-output-files 1 \
--sample-id "~{sample_id}" \
$FASTQS \
--white-list "~{whitelist}" \
Expand Down
86 changes: 27 additions & 59 deletions tasks/skylab/StarAlign.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,15 @@ task STARsoloFastq {

# runtime values
String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072"
Int machine_mem_mb = 64000
Int cpu = 8

String cpu_platform = "Intel Ice Lake"
Int machine_mem_mb = 512000
Int cpu = 128
Int disk = 2000
# multiply input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
Int disk = ceil((size(tar_star_reference, "Gi") * 3)) + ceil(size(r1_fastq, "Gi") * 20) + ceil(size(r2_fastq, "Gi") * 20)
# Int disk = ceil((size(tar_star_reference, "Gi") * 3)) + ceil(size(r1_fastq, "Gi") * 20) + ceil(size(r2_fastq, "Gi") * 20)
# by default request non preemptible machine to make sure the slow star alignment step completes
Int preemptible = 3
Int preemptible = 1
}

meta {
Expand Down Expand Up @@ -292,7 +295,23 @@ task STARsoloFastq {
## single cell or whole cell
COUNTING_MODE="Gene"
echo "Running in ~{counting_mode} mode. The Star parameter --soloFeatures will be set to $COUNTING_MODE"
STAR \
elif [[ "~{counting_mode}" == "sn_rna" ]]
then
## single nuclei
if [[ ~{count_exons} == false ]]
then
COUNTING_MODE="GeneFull_Ex50pAS"
echo "Running in ~{counting_mode} mode. Count_exons is false and the Star parameter --soloFeatures will be set to $COUNTING_MODE"
else
COUNTING_MODE="GeneFull_Ex50pAS Gene"
echo "Running in ~{counting_mode} mode. Count_exons is true and the Star parameter --soloFeatures will be set to $COUNTING_MODE"
fi
else
echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna.
exit 1;
fi

STAR \
--soloType Droplet \
--soloStrand ~{star_strand_mode} \
--runThreadN ~{cpu} \
Expand All @@ -311,62 +330,10 @@ task STARsoloFastq {
--soloBarcodeReadLength 0 \
--soloCellReadStats Standard \
~{"--soloMultiMappers " + soloMultiMappers}
elif [[ "~{counting_mode}" == "sn_rna" ]]
then
## single nuclei
if [[ ~{count_exons} == false ]]
then
COUNTING_MODE="GeneFull_Ex50pAS"
echo "Running in ~{counting_mode} mode. Count_exons is false and the Star parameter --soloFeatures will be set to $COUNTING_MODE"
STAR \
--soloType Droplet \
--soloStrand ~{star_strand_mode} \
--runThreadN ~{cpu} \
--genomeDir genome_reference \
--readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \
--readFilesCommand "gunzip -c" \
--soloCBwhitelist ~{white_list} \
--soloUMIlen $UMILen --soloCBlen $CBLen \
--soloFeatures $COUNTING_MODE \
--clipAdapterType CellRanger4 \
--outFilterScoreMin 30 \
--soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts \
--soloUMIdedup 1MM_Directional_UMItools \
--outSAMtype BAM SortedByCoordinate \
--outSAMattributes UB UR UY CR CB CY NH GX GN sF \
--soloBarcodeReadLength 0 \
--soloCellReadStats Standard \
~{"--soloMultiMappers " + soloMultiMappers}
else
COUNTING_MODE="GeneFull_Ex50pAS Gene"
echo "Running in ~{counting_mode} mode. Count_exons is true and the Star parameter --soloFeatures will be set to $COUNTING_MODE"
STAR \
--soloType Droplet \
--soloStrand ~{star_strand_mode} \
--runThreadN ~{cpu} \
--genomeDir genome_reference \
--readFilesIn "~{sep=',' r2_fastq}" "~{sep=',' r1_fastq}" \
--readFilesCommand "gunzip -c" \
--soloCBwhitelist ~{white_list} \
--soloUMIlen $UMILen --soloCBlen $CBLen \
--soloFeatures $COUNTING_MODE \
--clipAdapterType CellRanger4 \
--outFilterScoreMin 30 \
--soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts \
--soloUMIdedup 1MM_Directional_UMItools \
--outSAMtype BAM SortedByCoordinate \
--outSAMattributes UB UR UY CR CB CY NH GX GN sF \
--soloBarcodeReadLength 0 \
--soloCellReadStats Standard \
~{"--soloMultiMappers " + soloMultiMappers}
fi
else
echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna.
exit 1;
fi

echo "UMI LEN " $UMILen

# why is this here?
touch barcodes_sn_rna.tsv
touch features_sn_rna.tsv
touch matrix_sn_rna.mtx
Expand Down Expand Up @@ -434,10 +401,11 @@ task STARsoloFastq {
runtime {
docker: docker
memory: "~{machine_mem_mb} MiB"
disks: "local-disk ~{disk} HDD"
disks: "local-disk ~{disk} SSD"
disk: disk + " GB" # TES
cpu: cpu
preemptible: preemptible
cpuPlatform: cpu_platform
}

output {
Expand Down
Loading