-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add shapeit4 pipeline for hidive experiment
- Loading branch information
Showing
1 changed file
with
179 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
version 1.0 | ||
|
||
workflow Shapeit4Hidive { | ||
|
||
input { | ||
File joint_vcf | ||
File joint_vcf_tbi | ||
File genetic_mapping_tsv_for_shapeit4 | ||
File regionlist | ||
File chromosomelist | ||
Int shapeit4_num_threads | ||
Int shapeit4_memory | ||
String shapeit4_extra_args | ||
String output_prefix | ||
|
||
} | ||
|
||
Map[String, String] genetic_mapping_dict = read_map(genetic_mapping_tsv_for_shapeit4) | ||
|
||
Array[String] region_list = read_lines(regionlist) | ||
|
||
Array[String] chromosome_list = read_lines(chromosomelist) | ||
|
||
scatter (j in range(length(region_list))) { | ||
String chromosome = chromosome_list[j] | ||
call Shapeit4 { input: | ||
vcf_input = joint_vcf, | ||
vcf_index = joint_vcf_tbi, | ||
mappingfile = genetic_mapping_dict[chromosome], | ||
region = region_list[j], | ||
prefix = output_prefix + "." + chromosome + ".shard-" + j + ".phased", | ||
num_threads = shapeit4_num_threads, | ||
memory = shapeit4_memory, | ||
extra_args = shapeit4_extra_args | ||
} | ||
} | ||
|
||
call LigateVcfs { input: | ||
vcfs = Shapeit4.phased_bcf, | ||
prefix = output_prefix + "." + ".phased.ligated" | ||
} | ||
|
||
output { | ||
|
||
File phased_vcf_gz = LigateVcfs.ligated_vcf_gz | ||
File phased_vcf_gz_tbi = LigateVcfs.ligated_vcf_gz_tbi | ||
|
||
} | ||
} | ||
|
||
struct RuntimeAttr { | ||
Float? mem_gb | ||
Int? cpu_cores | ||
Int? disk_gb | ||
Int? boot_disk_gb | ||
Int? preemptible_tries | ||
Int? max_retries | ||
String? docker | ||
} | ||
|
||
struct DataTypeParameters { | ||
Int num_shards | ||
String map_preset | ||
} | ||
|
||
|
||
task LigateVcfs { | ||
|
||
input { | ||
Array[File] vcfs | ||
Array[File]? vcf_idxs | ||
String prefix | ||
|
||
RuntimeAttr? runtime_attr_override | ||
} | ||
Int disk_size = 2*ceil(size(vcfs, "GB")) + 1 | ||
command <<< | ||
set -euxo pipefail | ||
if ! ~{defined(vcf_idxs)}; then | ||
for ff in ~{sep=' ' vcfs}; do bcftools index $ff; done | ||
fi | ||
wget https://github.com/odelaneau/shapeit5/releases/download/v5.1.1/ligate_static | ||
chmod +x ligate_static | ||
./ligate_static --input ~{write_lines(vcfs)} --output ~{prefix}.vcf.gz | ||
bcftools index -t ~{prefix}.vcf.gz | ||
>>> | ||
output { | ||
File ligated_vcf_gz = "~{prefix}.vcf.gz" | ||
File ligated_vcf_gz_tbi = "~{prefix}.vcf.gz.tbi" | ||
} | ||
|
||
######################### | ||
RuntimeAttr default_attr = object { | ||
cpu_cores: 2, | ||
mem_gb: 8, | ||
disk_gb: disk_size, | ||
boot_disk_gb: 10, | ||
preemptible_tries: 2, | ||
max_retries: 1, | ||
docker:"us.gcr.io/broad-dsp-lrma/lr-gcloud-samtools:0.1.2" | ||
} | ||
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) | ||
runtime { | ||
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) | ||
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" | ||
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" | ||
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) | ||
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) | ||
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) | ||
docker: select_first([runtime_attr.docker, default_attr.docker]) | ||
} | ||
} | ||
task Shapeit4 { | ||
input{ | ||
File vcf_input | ||
File vcf_index | ||
File mappingfile | ||
String region | ||
String prefix | ||
Int num_threads | ||
Int memory | ||
String extra_args | ||
RuntimeAttr? runtime_attr_override | ||
String zones = "us-central1-a us-central1-b us-central1-c us-central1-f" | ||
} | ||
command <<< | ||
# add AN AC tag | ||
# export MONITOR_MOUNT_POINT="/cromwell_root/" | ||
# bash /opt/vm_local_monitoring_script.sh &> resources.log & | ||
# job_id=$(ps -aux | grep -F 'vm_local_monitoring_script.sh' | head -1 | awk '{print $2}') | ||
shapeit4.2 --input ~{vcf_input} \ | ||
--map ~{mappingfile} \ | ||
--region ~{region} \ | ||
--sequencing \ | ||
--output ~{prefix}.bcf \ | ||
--thread ~{num_threads} \ | ||
~{extra_args} | ||
# if ps -p "${job_id}" > /dev/null; then kill "${job_id}"; fi | ||
>>> | ||
output{ | ||
# File resouce_monitor_log = "resources.log" | ||
File phased_bcf = "~{prefix}.bcf" | ||
} | ||
|
||
#Int disk_size = 100 + ceil(2 * size(vcf_input, "GiB")) | ||
######################### | ||
RuntimeAttr default_attr = object { | ||
cpu_cores: num_threads, | ||
mem_gb: memory, | ||
disk_gb: 100, | ||
boot_disk_gb: 100, | ||
preemptible_tries: 0, | ||
max_retries: 0, | ||
docker: "hangsuunc/shapeit4:v1" | ||
} | ||
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) | ||
runtime { | ||
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) | ||
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" | ||
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " SSD" | ||
zones: zones | ||
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) | ||
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) | ||
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) | ||
docker: select_first([runtime_attr.docker, default_attr.docker]) | ||
} | ||
} |