|
| 1 | +version 1.0 |
| 2 | + |
| 3 | +task pvacsplice { |
| 4 | + input { |
| 5 | + Int n_threads = 8 |
| 6 | + File input_vcf |
| 7 | + File input_vcf_tbi |
| 8 | + String sample_name |
| 9 | + Array[String] alleles |
| 10 | + Array[String] prediction_algorithms |
| 11 | + File? peptide_fasta |
| 12 | + |
| 13 | + Array[Int]? epitope_lengths_class_i |
| 14 | + Array[Int]? epitope_lengths_class_ii |
| 15 | + Int? binding_threshold |
| 16 | + Int? percentile_threshold |
| 17 | + Int? iedb_retries |
| 18 | + |
| 19 | + String? normal_sample_name |
| 20 | + String? net_chop_method # enum [cterm , 20s] |
| 21 | + String? top_score_metric # enum [lowest, median] |
| 22 | + Float? net_chop_threshold |
| 23 | + String? additional_report_columns # enum [sample_name] |
| 24 | + Int? fasta_size |
| 25 | + Int? downstream_sequence_length |
| 26 | + Boolean exclude_nas = false |
| 27 | + File? phased_proximal_variants_vcf |
| 28 | + File? phased_proximal_variants_vcf_tbi |
| 29 | + Float? minimum_fold_change |
| 30 | + Int? normal_cov |
| 31 | + Int? tdna_cov |
| 32 | + Int? trna_cov |
| 33 | + Float? normal_vaf |
| 34 | + Float? tdna_vaf |
| 35 | + Float? trna_vaf |
| 36 | + Float? expn_val |
| 37 | + Int? maximum_transcript_support_level # enum [1, 2, 3, 4, 5] |
| 38 | + Int? aggregate_inclusion_binding_threshold |
| 39 | + Array[String]? problematic_amino_acids |
| 40 | + Float? anchor_contribution_threshold |
| 41 | + |
| 42 | + Boolean allele_specific_binding_thresholds = false |
| 43 | + Boolean keep_tmp_files = false |
| 44 | + Boolean netmhc_stab = false |
| 45 | + Boolean run_reference_proteome_similarity = false |
| 46 | + Boolean allele_specific_anchors = false |
| 47 | + |
| 48 | + Float? tumor_purity |
| 49 | + } |
| 50 | + |
| 51 | + Float input_size = size([input_vcf, input_vcf_tbi], "GB") |
| 52 | + Float phased_variants_size = size([phased_proximal_variants_vcf, phased_proximal_variants_vcf_tbi], "GB") |
| 53 | + Int space_needed_gb = 10 + round(input_size + phased_variants_size) |
| 54 | + runtime { |
| 55 | + preemptible: 1 |
| 56 | + maxRetries: 2 |
| 57 | + memory: "32GB" |
| 58 | + cpu: n_threads |
| 59 | + docker: "griffithlab/pvactools:4.4.1" |
| 60 | + disks: "local-disk ~{space_needed_gb} HDD" |
| 61 | + } |
| 62 | + |
| 63 | + # explicit typing required, don't inline |
| 64 | + Array[Int] epitope_i = select_first([epitope_lengths_class_i, []]) |
| 65 | + Array[Int] epitope_ii = select_first([epitope_lengths_class_ii, []]) |
| 66 | + Array[String] problematic_aa = select_first([problematic_amino_acids, []]) |
| 67 | + command <<< |
| 68 | + # touch each tbi to ensure they have a timestamp after the vcf |
| 69 | + touch ~{phased_proximal_variants_vcf_tbi} |
| 70 | + touch ~{input_vcf_tbi} |
| 71 | + |
| 72 | + ln -s "$TMPDIR" /tmp/pvacsplice && export TMPDIR=/tmp/pvacsplice && \ |
| 73 | + /usr/local/bin/pvacsplice run --iedb-install-directory /opt/iedb \ |
| 74 | + --pass-only \ |
| 75 | + ~{if defined(tumor_purity) then "--tumor-purity " + select_first([tumor_purity]) else ""} \ |
| 76 | + ~{if length(epitope_i ) > 0 then "-e1 " else ""} ~{sep="," epitope_i} \ |
| 77 | + ~{if length(epitope_ii) > 0 then "-e2 " else ""} ~{sep="," epitope_ii} \ |
| 78 | + ~{if defined(binding_threshold) then "-b ~{binding_threshold}" else ""} \ |
| 79 | + ~{if defined(percentile_threshold) then "--percentile-threshold ~{percentile_threshold}" else ""} \ |
| 80 | + ~{if allele_specific_binding_thresholds then "--allele-specific-binding-thresholds" else ""} \ |
| 81 | + ~{if defined(aggregate_inclusion_binding_threshold) then "--aggregate-inclusion-binding-threshold ~{aggregate_inclusion_binding_threshold}" else ""} \ |
| 82 | + ~{if defined(iedb_retries) then "-r ~{iedb_retries}" else ""} \ |
| 83 | + ~{if keep_tmp_files then "-k" else ""} \ |
| 84 | + ~{if defined(normal_sample_name) then "--normal-sample-name ~{normal_sample_name}" else ""} \ |
| 85 | + ~{if defined(net_chop_method) then "--net-chop-method ~{net_chop_method}" else ""} \ |
| 86 | + ~{if netmhc_stab then "--netmhc-stab" else ""} \ |
| 87 | + ~{if run_reference_proteome_similarity then "--run-reference-proteome-similarity" else ""} \ |
| 88 | + ~{if defined(peptide_fasta) then "--peptide-fasta ~{peptide_fasta}" else ""} \ |
| 89 | + ~{if defined(top_score_metric) then "-m ~{top_score_metric}" else ""} \ |
| 90 | + ~{if defined(net_chop_threshold) then "--net-chop-threshold ~{net_chop_threshold}" else ""} \ |
| 91 | + ~{if defined(additional_report_columns) then "-m ~{additional_report_columns}" else ""} \ |
| 92 | + ~{if defined(fasta_size) then "-s ~{fasta_size}" else ""} \ |
| 93 | + ~{if defined(downstream_sequence_length) then "-d ~{downstream_sequence_length}" else ""} \ |
| 94 | + ~{if exclude_nas then "--exclude-NAs" else ""} \ |
| 95 | + ~{if defined(phased_proximal_variants_vcf) then "-p ~{phased_proximal_variants_vcf}" else ""} \ |
| 96 | + ~{if defined(minimum_fold_change) then "-c ~{minimum_fold_change}" else ""} \ |
| 97 | + ~{if defined(normal_cov) then "--normal-cov ~{normal_cov}" else ""} \ |
| 98 | + ~{if defined(tdna_cov) then "--tdna-cov ~{tdna_cov}" else ""} \ |
| 99 | + ~{if defined(trna_cov) then "--trna-cov ~{trna_cov}" else ""} \ |
| 100 | + ~{if defined(normal_vaf) then "--normal-vaf ~{normal_vaf}" else ""} \ |
| 101 | + ~{if defined(tdna_vaf) then "--tdna-vaf ~{tdna_vaf}" else ""} \ |
| 102 | + ~{if defined(trna_vaf) then "--trna-vaf ~{trna_vaf}" else ""} \ |
| 103 | + ~{if defined(expn_val) then "--expn-val ~{expn_val}" else ""} \ |
| 104 | + ~{if defined(maximum_transcript_support_level) then "--maximum-transcript-support-level ~{maximum_transcript_support_level}" else ""} \ |
| 105 | + ~{if length(problematic_aa) > 0 then "--problematic-amino-acids" else ""} ~{sep="," problematic_aa} \ |
| 106 | + ~{if allele_specific_anchors then "--allele-specific-anchors" else ""} \ |
| 107 | + ~{if defined(anchor_contribution_threshold) then "--anchor-contribution-threshold ~{anchor_contribution_threshold}" else ""} \ |
| 108 | + --n-threads ~{n_threads} \ |
| 109 | + ~{input_vcf} ~{sample_name} ~{sep="," alleles} ~{sep=" " prediction_algorithms} \ |
| 110 | + pvacsplice_predictions |
| 111 | + >>> |
| 112 | + |
| 113 | + output { |
| 114 | + File? mhc_i_all_epitopes = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.all_epitopes.tsv" |
| 115 | + File? mhc_i_aggregated_report = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.all_epitopes.aggregated.tsv" |
| 116 | + File? mhc_i_filtered_epitopes = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.filtered.tsv" |
| 117 | + File? mhc_i_aggregated_metrics_file = "pvacsplice_predictions/MHC_Class_I/" + sample_name + ".all_epitopes.aggregated.metrics.json" |
| 118 | + File? mhc_ii_all_epitopes = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.all_epitopes.tsv" |
| 119 | + File? mhc_ii_aggregated_report = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.all_epitopes.aggregated.tsv" |
| 120 | + File? mhc_ii_filtered_epitopes = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.filtered.tsv" |
| 121 | + File? mhc_ii_aggregated_metrics_file = "pvacsplice_predictions/MHC_Class_II/" + sample_name + ".all_epitopes.aggregated.metrics.json" |
| 122 | + File? combined_all_epitopes = "pvacsplice_predictions/combined/~{sample_name}.all_epitopes.tsv" |
| 123 | + File? combined_aggregated_report = "pvacsplice_predictions/combined/~{sample_name}.all_epitopes.aggregated.tsv" |
| 124 | + File? combined_filtered_epitopes = "pvacsplice_predictions/combined/~{sample_name}.filtered.tsv" |
| 125 | + File? combined_aggregated_metrics_file = "pvacsplice_predictions/combined/" + sample_name + ".all_epitopes.aggregated.metrics.json" |
| 126 | + |
| 127 | + # glob documentations |
| 128 | + # https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#globs |
| 129 | + Array[File] mhc_i = glob("pvacsplice_predictions/MHC_Class_I/*") |
| 130 | + Array[File] mhc_ii = glob("pvacsplice_predictions/MHC_Class_II/*") |
| 131 | + Array[File] combined = glob("pvacsplice_predictions/combined/*") |
| 132 | + |
| 133 | + } |
| 134 | +} |
| 135 | + |
| 136 | +workflow wf { |
| 137 | + input { |
| 138 | + Int? n_threads |
| 139 | + File input_vcf |
| 140 | + File input_vcf_tbi |
| 141 | + String sample_name |
| 142 | + Array[String] alleles |
| 143 | + Array[String] prediction_algorithms |
| 144 | + |
| 145 | + Array[Int]? epitope_lengths_class_i |
| 146 | + Array[Int]? epitope_lengths_class_ii |
| 147 | + Int? binding_threshold |
| 148 | + Int? percentile_threshold |
| 149 | + Int? iedb_retries |
| 150 | + |
| 151 | + String? normal_sample_name |
| 152 | + String? net_chop_method # enum [cterm , 20s] |
| 153 | + String? top_score_metric # enum [lowest, median] |
| 154 | + Float? net_chop_threshold |
| 155 | + String? additional_report_columns # enum [sample_name] |
| 156 | + Int? fasta_size |
| 157 | + Int? downstream_sequence_length |
| 158 | + Boolean exclude_nas = false |
| 159 | + File? phased_proximal_variants_vcf |
| 160 | + File? phased_proximal_variants_vcf_tbi |
| 161 | + Float? minimum_fold_change |
| 162 | + Int? normal_cov |
| 163 | + Int? tdna_cov |
| 164 | + Int? trna_cov |
| 165 | + Float? normal_vaf |
| 166 | + Float? tdna_vaf |
| 167 | + Float? trna_vaf |
| 168 | + Float? expn_val |
| 169 | + String? maximum_transcript_support_level # enum [1, 2, 3, 4, 5] |
| 170 | + Int? aggregate_inclusion_binding_threshold |
| 171 | + Array[String]? problematic_amino_acids |
| 172 | + Float? anchor_contribution_threshold |
| 173 | + |
| 174 | + Boolean? allele_specific_binding_thresholds |
| 175 | + Boolean? keep_tmp_files |
| 176 | + Boolean? netmhc_stab |
| 177 | + Boolean? run_reference_proteome_similarity |
| 178 | + Boolean allele_specific_anchors = false |
| 179 | + } |
| 180 | + call pvacsplice { |
| 181 | + input: |
| 182 | + n_threads=n_threads, |
| 183 | + input_vcf=input_vcf, |
| 184 | + input_vcf_tbi=input_vcf_tbi, |
| 185 | + sample_name=sample_name, |
| 186 | + alleles=alleles, |
| 187 | + prediction_algorithms=prediction_algorithms, |
| 188 | + epitope_lengths_class_i=epitope_lengths_class_i, |
| 189 | + epitope_lengths_class_ii=epitope_lengths_class_ii, |
| 190 | + binding_threshold=binding_threshold, |
| 191 | + percentile_threshold=percentile_threshold, |
| 192 | + aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold, |
| 193 | + iedb_retries=iedb_retries, |
| 194 | + normal_sample_name=normal_sample_name, |
| 195 | + net_chop_method=net_chop_method, |
| 196 | + top_score_metric=top_score_metric, |
| 197 | + net_chop_threshold=net_chop_threshold, |
| 198 | + additional_report_columns=additional_report_columns, |
| 199 | + fasta_size=fasta_size, |
| 200 | + downstream_sequence_length=downstream_sequence_length, |
| 201 | + exclude_nas=exclude_nas, |
| 202 | + phased_proximal_variants_vcf=phased_proximal_variants_vcf, |
| 203 | + phased_proximal_variants_vcf_tbi=phased_proximal_variants_vcf_tbi, |
| 204 | + minimum_fold_change=minimum_fold_change, |
| 205 | + normal_cov=normal_cov, |
| 206 | + tdna_cov=tdna_cov, |
| 207 | + trna_cov=trna_cov, |
| 208 | + normal_vaf=normal_vaf, |
| 209 | + tdna_vaf=tdna_vaf, |
| 210 | + trna_vaf=trna_vaf, |
| 211 | + expn_val=expn_val, |
| 212 | + maximum_transcript_support_level=maximum_transcript_support_level, |
| 213 | + allele_specific_binding_thresholds=allele_specific_binding_thresholds, |
| 214 | + problematic_amino_acids=problematic_amino_acids, |
| 215 | + allele_specific_anchors=allele_specific_anchors, |
| 216 | + anchor_contribution_threshold=anchor_contribution_threshold, |
| 217 | + keep_tmp_files=keep_tmp_files, |
| 218 | + netmhc_stab=netmhc_stab, |
| 219 | + run_reference_proteome_similarity=run_reference_proteome_similarity |
| 220 | + } |
| 221 | +} |
0 commit comments