Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ngaddis committed Mar 9, 2020
2 parents c0f3511 + 8814af3 commit 83691df
Show file tree
Hide file tree
Showing 9 changed files with 498 additions and 48 deletions.
105 changes: 105 additions & 0 deletions association_testing/rvtests/rvtests_framomni_kin_wf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{
"rvtests_gwas_wf.study_output_basename": "fram_omni_kin_rvtests_gwas",

"rvtests_gwas_wf.chrs": [
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X"],

"rvtests_gwas_wf.vcfs_in": [
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr1.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr2.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr3.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr4.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr5.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr6.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr7.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr8.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr9.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr10.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr11.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr12.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr13.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr14.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr15.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr16.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr17.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr18.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr19.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr20.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr21.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr22.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chrX.dose.vcf.gz"],

"rvtests_gwas_wf.infos_in": [
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr1.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr2.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr3.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr4.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr5.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr6.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr7.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr8.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr9.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr10.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr11.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr12.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr13.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr14.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr15.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr16.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr17.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr18.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr19.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr20.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr21.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr22.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chrX.info.gz"],

"rvtests_gwas_wf.maf_population" : "EUR",
"rvtests_gwas_wf.pop_maf_files": [
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr1.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr2.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr3.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr4.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr5.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr6.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr7.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr8.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr9.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr10.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr11.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr12.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr13.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr14.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr15.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr16.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr17.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr18.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr19.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr20.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr21.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr22.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chrX.tsv.gz"],

"rvtests_gwas_wf.is_continuous": false,
"rvtests_gwas_wf.is_related": true,

"rvtests_gwas_wf.pheno_file": "s3://rti-biocloud-test-data/vcfs/FramOmni/fram_omni.var.ped",
"rvtests_gwas_wf.pheno_name": "handedness",
"rvtests_gwas_wf.covar_file": "s3://rti-biocloud-test-data/vcfs/FramOmni/fram_omni.covar.ped",
"rvtests_gwas_wf.covars": ["sex", "age", "EV3"],
"rvtests_gwas_wf.dosage": "DS",

"rvtests_gwas_wf.min_rsq": 0.3,
"rvtests_gwas_wf.sig_alpha": 0.000005,
"rvtests_gwas_wf.filter_by_pop_maf": false,
"rvtests_gwas_wf.filter_by_sample_maf": true,
"rvtests_gwas_wf.maf_cutoffs": [0.005, 0.01, 0.05],

"rvtests_gwas_wf.split_vcfs": false,
"rvtests_gwas_wf.split_vcf_cpus": 16,
"rvtests_gwas_wf.records_per_split": 1000000,
"rvtests_gwas_wf.rvtests_cpu_per_split": 8,
"rvtests_gwas_wf.rvtests_mem_gb_per_split": 8

}
96 changes: 96 additions & 0 deletions association_testing/rvtests/rvtests_framomni_wf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"rvtests_gwas_wf.vcfs_in": [
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr1.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr2.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr3.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr4.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr5.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr6.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr7.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr8.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr9.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr10.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr11.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr12.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr13.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr14.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr15.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr16.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr17.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr18.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr19.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr20.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr21.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr22.dose.vcf.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chrX.dose.vcf.gz"],

"rvtests_gwas_wf.infos_in": [
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr1.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr2.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr3.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr4.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr5.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr6.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr7.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr8.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr9.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr10.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr11.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr12.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr13.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr14.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr15.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr16.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr17.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr18.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr19.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr20.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr21.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chr22.info.gz",
"s3://rti-biocloud-test-data/vcfs/FramOmni/chrX.info.gz"],

"rvtests_gwas_wf.pop_maf_files": [
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr1.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr2.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr3.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr4.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr5.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr6.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr7.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr8.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr9.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr10.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr11.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr12.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr13.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr14.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr15.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr16.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr17.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr18.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr19.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr20.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr21.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chr22.tsv.gz",
"s3://rti-biocloud-ref/ref_panels/1000g/phase3/2.0.0/pop_mafs/eur/chrX.tsv.gz"],

"rvtests_gwas_wf.pheno_file": "s3://rti-biocloud-test-data/vcfs/FramOmni/fram_omni.var.ped",
"rvtests_gwas_wf.pheno_name": "handedness",
"rvtests_gwas_wf.covar_file": "s3://rti-biocloud-test-data/vcfs/FramOmni/fram_omni.covar.ped",
"rvtests_gwas_wf.covars": ["sex", "age", "EV3"],
"rvtests_gwas_wf.dosage": "DS",
"rvtests_gwas_wf.study_output_basename": "fram_omni_rvtests_gwas",
"rvtests_gwas_wf.maf_population" : "EUR",
"rvtests_gwas_wf.is_continuous": false,
"rvtests_gwas_wf.min_rsq": 0.3,
"rvtests_gwas_wf.sig_alpha": 0.000005,
"rvtests_gwas_wf.filter_by_pop_maf": false,
"rvtests_gwas_wf.filter_by_sample_maf": true,
"rvtests_gwas_wf.maf_cutoffs": [0.005, 0.01, 0.05],
"rvtests_gwas_wf.split_vcfs": false,
"rvtests_gwas_wf.split_vcf_cpus": 16,
"rvtests_gwas_wf.records_per_split": 1000000,
"rvtests_gwas_wf.rvtests_cpu_per_split": 8,
"rvtests_gwas_wf.rvtests_mem_gb_per_split": 8

}
73 changes: 46 additions & 27 deletions association_testing/rvtests/rvtests_gwas_chr_wf.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,16 @@ workflow rvtests_gwas_chr_wf{
String pheno_name
File? covar_file
Array[String]? covars
File? kinship_matrix

String dosage

# For annotating with population MAF info
File pop_maf_file
String maf_population

# Optional for X chr
File? xHemiKinship_matrix
File? kinship
File? xHemiKinship
Boolean? xHemi

Array[String]? singleTestsMaybe
Expand All @@ -71,40 +72,49 @@ workflow rvtests_gwas_chr_wf{
Boolean? qtl
Boolean? multipleAllele
String? xLabel
Int rvtests_cpu = 1
Int rvtests_mem_gb = 2
# Number of records per VCF split
Int records_per_split = 50000
Int split_vcf_cpu = 8
Boolean split_vcf_records = true
# Set output basename
String? user_output_basename
String default_output_basename = basename(sub(vcf_in, "\\.gz$",""), ".vcf")
String output_basename = select_first([user_output_basename, default_output_basename])
# Chunk VCF and INFO files for parallel processing
call SPLIT.split_vcf as split_vcf{
input:
input_vcf = vcf_in,
records_per_split = records_per_split,
output_basename = output_basename
# Optionally chunk VCF and INFO files for parallel processing
if(split_vcf_records){
call SPLIT.split_vcf as split_vcf{
input:
input_vcf = vcf_in,
records_per_split = records_per_split,
output_basename = output_basename,
cpu = split_vcf_cpu
}
}

# Loop through splits and do association testing on each
scatter(split_index in range(length(split_vcf.split_vcfs))){
Array[File] split_vcfs = select_first([split_vcf.split_vcfs, [vcf_in]])

String split_output_basename = basename(sub(split_vcf.split_vcfs[split_index], "\\.gz$",""), ".vcf")
scatter(split_index in range(length(split_vcfs))){

String split_output_basename = basename(sub(split_vcfs[split_index], "\\.gz$",""), ".vcf")


# Run rvtests for association
call RV.rvtests{
input:
inVCF = split_vcf.split_vcfs[split_index],
inVCF = split_vcfs[split_index],
phenoFile = pheno_file,
output_basename = split_output_basename,
covarFile = covar_file,
phenoName = pheno_name,
covarsMaybe = covars,
kinship = kinship_matrix,
xHemiKinship = xHemiKinship_matrix,
kinship = kinship,
xHemiKinship = xHemiKinship,
xHemi = xHemi,
singleTestsMaybe = singleTestsMaybe,
burdenTestsMaybe = burdenTestsMaybe,
Expand All @@ -116,7 +126,10 @@ workflow rvtests_gwas_chr_wf{
sex = sex,
qtl = qtl,
multipleAllele = multipleAllele,
xLabel = xLabel
xLabel = xLabel,
dosage = dosage,
cpu = rvtests_cpu,
mem_gb = rvtests_mem_gb
}
# Remove header from association output file
Expand All @@ -128,24 +141,30 @@ workflow rvtests_gwas_chr_wf{
}
}

# Collect chunked sumstats files into single zip folder
call COLLECT.collect_large_file_list_wf as collect_sumstats{
input:
# input_files = flatten_sumstats.flat_array,
input_files = strip_rvtests_headers.sumstats_out,
output_dir_name = output_basename + ".rvtests_output"
}
# Gather chunked RVTests output if > 1 split
if(length(split_vcfs) > 1){
# Collect chunked sumstats files into single zip folder
call COLLECT.collect_large_file_list_wf as collect_rvtests_sumstats{
input:
input_files = strip_rvtests_headers.sumstats_out,
output_dir_name = output_basename + ".rvtests_output"
}

# Concat all sumstats files into single sumstat file
call TSV.tsv_append as cat_sumstats{
input:
tsv_inputs_tarball = collect_sumstats.output_dir,
output_filename = output_basename + ".rvtests.MetaAssoc.tsv"
# Concat all sumstats files into single sumstat file
call TSV.tsv_append as cat_rvtests_sumstats{
input:
tsv_inputs_tarball = collect_rvtests_sumstats.output_dir,
output_filename = output_basename + ".rvtests.MetaAssoc.tsv"
}
}

# Use the combined sumstats file if >1 splits was merged, otherwise just use output from strip_headers call
File full_sumstats = select_first([cat_rvtests_sumstats.tsv_output, strip_rvtests_headers.sumstats_out[0]])

# Annotate sumstats with features from info file (R2, MAF) and pop MAF file (MAF from pop of interest)
call STAT.make_gwas_summary_stats as annotate_sumstats{
input:
file_in_summary_stats = cat_sumstats.tsv_output,
file_in_summary_stats = full_sumstats,
file_in_info = info_in,
file_in_pop_mafs = pop_maf_file,
file_in_summary_stats_format = "rvtests",
Expand Down
Loading

0 comments on commit 83691df

Please sign in to comment.