Skip to content

Commit 71af733

Browse files
committed
modify tools/pvacsplice and regtools wdl
1 parent f359b30 commit 71af733

File tree

2 files changed

+79
-99
lines changed

2 files changed

+79
-99
lines changed

definitions/tools/pvacsplice.wdl

Lines changed: 20 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ task pvacsplice {
66
File input_vcf
77
File input_vcf_tbi
88
File input_regtools_tsv
9-
File input_reference_fasta
9+
File input_reference_dna_fasta
1010
File input_reference_gtf
1111
String sample_name
1212
Array[String] alleles
@@ -36,6 +36,13 @@ task pvacsplice {
3636
Int? maximum_transcript_support_level # enum [1, 2, 3, 4, 5]
3737
Int? aggregate_inclusion_binding_threshold
3838
Array[String]? problematic_amino_acids
39+
Array[String] biotypes_list = select_first([biotypes, []])
40+
Int? aggregate_inclusion_count_limit
41+
42+
Int? juction_score
43+
Int? variant_distance
44+
Boolean save_gtf = false
45+
Array[String]? junction_anchor_types
3946

4047
Boolean allele_specific_binding_thresholds = false
4148
Boolean keep_tmp_files = false
@@ -45,7 +52,7 @@ task pvacsplice {
4552
Float? tumor_purity
4653
}
4754

48-
Float input_size = size([input_vcf, input_vcf_tbi, input_regtools_tsv, input_reference_fasta,input_reference_gtf], "GB") #input files: annotated vcf, regtools tsv, reference fasta, reference gtf
55+
Float input_size = size([input_vcf, input_vcf_tbi, input_regtools_tsv, input_reference_dna_fasta,input_reference_gtf], "GB") #input files: annotated vcf, regtools tsv, reference fasta, reference gtf
4956
Int space_needed_gb = 10 + round(input_size)
5057
runtime {
5158
preemptible: 1
@@ -67,13 +74,14 @@ task pvacsplice {
6774
ln -s "$TMPDIR" /tmp/pvacsplice && export TMPDIR=/tmp/pvacsplice && \
6875
/usr/local/bin/pvacsplice run --iedb-install-directory /opt/iedb \
6976
--pass-only \
70-
#~{if defined(tumor_purity) then "--tumor-purity " + select_first([tumor_purity]) else ""} \
77+
~{if defined(tumor_purity) then "--tumor-purity " + select_first([tumor_purity]) else ""} \
7178
~{if length(epitope_i ) > 0 then "-e1 " else ""} ~{sep="," epitope_i} \
7279
~{if length(epitope_ii) > 0 then "-e2 " else ""} ~{sep="," epitope_ii} \
7380
~{if defined(binding_threshold) then "-b ~{binding_threshold}" else ""} \
7481
~{if defined(percentile_threshold) then "--percentile-threshold ~{percentile_threshold}" else ""} \
7582
~{if allele_specific_binding_thresholds then "--allele-specific-binding-thresholds" else ""} \
7683
~{if defined(aggregate_inclusion_binding_threshold) then "--aggregate-inclusion-binding-threshold ~{aggregate_inclusion_binding_threshold}" else ""} \
84+
~{if defined(aggregate_inclusion_count_limit) then "--aggregate-inclusion-count-limit ~{aggregate_inclusion_count_limit}" else ""} \
7785
~{if defined(iedb_retries) then "-r ~{iedb_retries}" else ""} \
7886
~{if keep_tmp_files then "-k" else ""} \
7987
~{if defined(normal_sample_name) then "--normal-sample-name ~{normal_sample_name}" else ""} \
@@ -93,28 +101,28 @@ task pvacsplice {
93101
~{if defined(tdna_vaf) then "--tdna-vaf ~{tdna_vaf}" else ""} \
94102
~{if defined(trna_vaf) then "--trna-vaf ~{trna_vaf}" else ""} \
95103
~{if defined(expn_val) then "--expn-val ~{expn_val}" else ""} \
96-
#~{if defined(maximum_transcript_support_level) then "--maximum-transcript-support-level ~{maximum_transcript_support_level}" else ""} \
97-
#~{if length(problematic_aa) > 0 then "--problematic-amino-acids" else ""} ~{sep="," problematic_aa} \
98-
#~{if allele_specific_anchors then "--allele-specific-anchors" else ""} \
99-
#~{if defined(anchor_contribution_threshold) then "--anchor-contribution-threshold ~{anchor_contribution_threshold}" else ""} \
104+
~{if defined(maximum_transcript_support_level) then "--maximum-transcript-support-level ~{maximum_transcript_support_level}" else ""} \
105+
~{if length(problematic_aa) > 0 then "--problematic-amino-acids" else ""} ~{sep="," problematic_aa} \
106+
~{if length(biotypes_list) > 0 then "--biotypes" else ""} ~{sep="," biotypes_list} \
107+
~{if defined(juction_score) then "--junction-score ~{junction_score}" else ""} \
108+
~{if defined(variant_distance) then "--variant-distance ~{variant_distance}" else ""} \
109+
~{if save_gtf then "-g" else ""} \
110+
~{if length(junction_anchor_types) > 0 then "--anchor-types" else ""} ~{sep="," junction_anchor_types}
100111
--n-threads ~{n_threads} \
101-
~{input_vcf} ~{sample_name} ~{sep="," alleles} ~{sep=" " prediction_algorithms} \
102-
pvacsplice_predictions
112+
~{input_regtools_tsv} ~{sample_name} ~{sep="," alleles} ~{sep=" " prediction_algorithms} \
113+
pvacsplice_predictions ~{input_vcf} ~{input_reference_dna_fasta} ~{input_reference_gtf}
103114
>>>
104115

105116
output {
106117
File? mhc_i_all_epitopes = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.all_epitopes.tsv"
107118
File? mhc_i_aggregated_report = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.all_epitopes.aggregated.tsv"
108119
File? mhc_i_filtered_epitopes = "pvacsplice_predictions/MHC_Class_I/~{sample_name}.filtered.tsv"
109-
File? mhc_i_aggregated_metrics_file = "pvacsplice_predictions/MHC_Class_I/" + sample_name + ".all_epitopes.aggregated.metrics.json"
110120
File? mhc_ii_all_epitopes = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.all_epitopes.tsv"
111121
File? mhc_ii_aggregated_report = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.all_epitopes.aggregated.tsv"
112122
File? mhc_ii_filtered_epitopes = "pvacsplice_predictions/MHC_Class_II/~{sample_name}.filtered.tsv"
113-
File? mhc_ii_aggregated_metrics_file = "pvacsplice_predictions/MHC_Class_II/" + sample_name + ".all_epitopes.aggregated.metrics.json"
114123
File? combined_all_epitopes = "pvacsplice_predictions/combined/~{sample_name}.all_epitopes.tsv"
115124
File? combined_aggregated_report = "pvacsplice_predictions/combined/~{sample_name}.all_epitopes.aggregated.tsv"
116125
File? combined_filtered_epitopes = "pvacsplice_predictions/combined/~{sample_name}.filtered.tsv"
117-
File? combined_aggregated_metrics_file = "pvacsplice_predictions/combined/" + sample_name + ".all_epitopes.aggregated.metrics.json"
118126

119127
# glob documentations
120128
# https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#globs
@@ -124,90 +132,3 @@ task pvacsplice {
124132

125133
}
126134
}
127-
############################### may delete these######
128-
workflow wf {
129-
input {
130-
Int? n_threads
131-
File input_vcf
132-
File input_vcf_tbi
133-
String sample_name
134-
Array[String] alleles
135-
Array[String] prediction_algorithms
136-
137-
Array[Int]? epitope_lengths_class_i
138-
Array[Int]? epitope_lengths_class_ii
139-
Int? binding_threshold
140-
Int? percentile_threshold
141-
Int? iedb_retries
142-
143-
String? normal_sample_name
144-
String? net_chop_method # enum [cterm , 20s]
145-
String? top_score_metric # enum [lowest, median]
146-
Float? net_chop_threshold
147-
String? additional_report_columns # enum [sample_name]
148-
Int? fasta_size
149-
Int? downstream_sequence_length
150-
Boolean exclude_nas = false
151-
File? phased_proximal_variants_vcf
152-
File? phased_proximal_variants_vcf_tbi
153-
Float? minimum_fold_change
154-
Int? normal_cov
155-
Int? tdna_cov
156-
Int? trna_cov
157-
Float? normal_vaf
158-
Float? tdna_vaf
159-
Float? trna_vaf
160-
Float? expn_val
161-
String? maximum_transcript_support_level # enum [1, 2, 3, 4, 5]
162-
Int? aggregate_inclusion_binding_threshold
163-
Array[String]? problematic_amino_acids
164-
Float? anchor_contribution_threshold
165-
166-
Boolean? allele_specific_binding_thresholds
167-
Boolean? keep_tmp_files
168-
Boolean? netmhc_stab
169-
Boolean? run_reference_proteome_similarity
170-
Boolean allele_specific_anchors = false
171-
}
172-
call pvacsplice {
173-
input:
174-
n_threads=n_threads,
175-
input_vcf=input_vcf,
176-
input_vcf_tbi=input_vcf_tbi,
177-
sample_name=sample_name,
178-
alleles=alleles,
179-
prediction_algorithms=prediction_algorithms,
180-
epitope_lengths_class_i=epitope_lengths_class_i,
181-
epitope_lengths_class_ii=epitope_lengths_class_ii,
182-
binding_threshold=binding_threshold,
183-
percentile_threshold=percentile_threshold,
184-
aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
185-
iedb_retries=iedb_retries,
186-
normal_sample_name=normal_sample_name,
187-
net_chop_method=net_chop_method,
188-
top_score_metric=top_score_metric,
189-
net_chop_threshold=net_chop_threshold,
190-
additional_report_columns=additional_report_columns,
191-
fasta_size=fasta_size,
192-
downstream_sequence_length=downstream_sequence_length,
193-
exclude_nas=exclude_nas,
194-
phased_proximal_variants_vcf=phased_proximal_variants_vcf,
195-
phased_proximal_variants_vcf_tbi=phased_proximal_variants_vcf_tbi,
196-
minimum_fold_change=minimum_fold_change,
197-
normal_cov=normal_cov,
198-
tdna_cov=tdna_cov,
199-
trna_cov=trna_cov,
200-
normal_vaf=normal_vaf,
201-
tdna_vaf=tdna_vaf,
202-
trna_vaf=trna_vaf,
203-
expn_val=expn_val,
204-
maximum_transcript_support_level=maximum_transcript_support_level,
205-
allele_specific_binding_thresholds=allele_specific_binding_thresholds,
206-
problematic_amino_acids=problematic_amino_acids,
207-
allele_specific_anchors=allele_specific_anchors,
208-
anchor_contribution_threshold=anchor_contribution_threshold,
209-
keep_tmp_files=keep_tmp_files,
210-
netmhc_stab=netmhc_stab,
211-
run_reference_proteome_similarity=run_reference_proteome_similarity
212-
}
213-
}

definitions/tools/regtools.wdl

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
version 1.0
2+
3+
task regtools {
4+
input {
5+
String output_filename_tsv = "splice_junction.tsv"
6+
String? output_filename_vcf = "splice_junction.vcf"
7+
String? output_filename_bed = "splice_junction.bed"
8+
9+
String strand # enum [XS, RF, FR]
10+
Int? window_size
11+
Int? max_distance_exon # max distance from exon/intron boundary to annotate a variant in exonic region as splicing variant
12+
Int? max_distance_intron
13+
14+
Boolean annotate_intronic_variant = false
15+
Boolean annotate_exonic_variant = false
16+
Boolean not_skipping_single_exon_transcripts = false
17+
Boolean singecell_barcode = false
18+
Boolean intron_motif_priority = false
19+
20+
File input_vcf
21+
File input_bam # indexed,aligned (and preferably sorted) bam or cram
22+
File input_reference_dna_fasta
23+
File input_reference_gtf
24+
25+
}
26+
27+
Float input_size = size([input_vcf,input_bam, input_reference_dna_fasta,input_reference_gtf], "GB")
28+
Int space_needed_gb = 10 + round(input_size)
29+
runtime {
30+
preemptible: 1
31+
maxRetries: 2
32+
memory: "32GB"
33+
cpu: n_threads
34+
docker: "griffithlab/regtools:release-1.0.0"
35+
disks: "local-disk ~{space_needed_gb} HDD"
36+
}
37+
38+
command <<<
39+
/regtools/build/regtools cis-splice-effects identify \
40+
-o ~{output_filename_tsv} \
41+
~{if defined(output_filename_vcf) then "-v ~{output_filename_vcf}" else ""} \
42+
~{if defined(output_filename_bed) then "-j ~{output_filename_bed}" else ""} \
43+
-s ~{strand} \
44+
~{if defined(window_size) then "-w ~{window_size}" else ""} \
45+
~{if defined(max_distance_exon) then "-e ~{max_distance_exon}" else ""} \
46+
~{if defined(max_distance_intron) then "-i ~{max_distance_intron}" else ""} \
47+
~{if annotate_intronic_variant then "-I" else ""} \
48+
~{if annotate_exonic_variant then "-E" else ""} \
49+
~{if not_skipping_single_exon_transcripts then "-S" else ""} \
50+
~{if singecell_barcode then "-b" else ""} \
51+
~{if intron_motif_priority then "-C" else ""} \
52+
~{input_vcf} ~{input_bam} ~{input_reference_dna_fasta} ~{input_reference_gtf}
53+
>>>
54+
55+
output {
56+
File? output_splice_junction_tsv = output_filename_tsv
57+
}
58+
}
59+

0 commit comments

Comments
 (0)