@@ -54,7 +54,7 @@ rule merge_samples_01:
54
54
shell ("ls {sv_ref} >> {output.vcf_list}; " )
55
55
56
56
# Options of using SURVIVOR or Jasmine. Jasmine by default.
57
- useJAMINE = True
57
+ useJAMINE = False
58
58
if (useJAMINE ):
59
59
checkpoint merge_samples_02 :
60
60
input :
@@ -69,6 +69,7 @@ if (useJAMINE):
69
69
vcf_list = OUT_FOLDER + "/merged_cohort/vcf.list" ,
70
70
shell :
71
71
"jasmine file_list={params.vcf_list} out_file={output.vcf}; "
72
+ "jasmine --dup_to_ins --postprocess_only out_file={output.vcf}; "
72
73
"bcftools sort -Oz -o {output.vcfgz} {output.vcf}; "
73
74
"tabix -p vcf {output.vcfgz}; "
74
75
else :
83
84
SNAKEDIR + "envs/survivor.yaml"
84
85
params :
85
86
vcf_list = OUT_FOLDER + "/merged_cohort/vcf.list" ,
86
- breakpoint_dist = "500 " ,
87
+ breakpoint_dist = "100 " ,
87
88
min_num_calls = "1" ,
88
89
use_type = "1" ,
89
90
use_strand = "1" ,
@@ -146,14 +147,18 @@ rule genotype_02:
146
147
output :
147
148
vcf = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.vcf" ,
148
149
vcfgz = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.vcf.gz" ,
149
- tbi = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.vcf.gz.tbi"
150
+ tbi = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.vcf.gz.tbi" ,
151
+ filt_vcfgz = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.filt.vcf.gz" ,
152
+ filt_tbi = OUT_FOLDER + "/sv_genotyping/{sample}/{sample}.filt.vcf.gz.tbi"
150
153
conda :
151
154
SNAKEDIR + "envs/graphtyper.yaml"
152
155
priority : 0
153
156
shell :
154
- "graphtyper vcf_concatenate {input.vcf} | bcftools sort > {output.vcf}; "
157
+ "graphtyper vcf_concatenate {input.vcf} | bcftools view --include \" SVMODEL='AGGREGATED' \" | bcftools sort > {output.vcf}; "
155
158
"bgzip -c {output.vcf} > {output.vcfgz}; "
156
159
"tabix -p vcf {output.vcfgz}; "
160
+ "bcftools view -e QUAL==0 -Oz -o {output.filt_vcfgz} {output.vcfgz}; "
161
+ "tabix -p vcf {output.filt_vcfgz}; "
157
162
158
163
rule merge_genotyped_samples :
159
164
input :
@@ -168,11 +173,10 @@ rule merge_genotyped_samples:
168
173
conda :
169
174
SNAKEDIR + "envs/graphtyper.yaml"
170
175
shell :
171
- "graphtyper vcf_merge {input.vcf} --sv | \
172
- grep -E -v \" BREAKPOINT|COVERAGE\" | grep -E -v \" VarType=XG|VarType=IG\" > {output.vcf_full}; "
176
+ "graphtyper vcf_merge {input.vcf} --sv | grep -E -v \" VarType=XG|VarType=IG\" > {output.vcf_full}; "
173
177
"bcftools sort -Oz -o {output.vcfgz_full} {output.vcf_full}; "
174
178
"tabix -p vcf {output.vcfgz_full}; "
175
- "bcftools view -f PASS {output.vcfgz_full} > {output.vcf}; "
179
+ "bcftools view -e QUAL==0 {output.vcfgz_full} > {output.vcf}; "
176
180
"bgzip -c {output.vcf} > {output.vcfgz}; "
177
181
"tabix -p vcf {output.vcfgz}; "
178
182
@@ -207,6 +211,6 @@ rule genotype_discovery:
207
211
--output={params.outdir}; \
208
212
done; "
209
213
"graphtyper vcf_concatenate --no_sort {params.outdir}/*/*.vcf.gz | \
210
- grep -E -v \" BREAKPOINT|COVERAGE \" | bcftools sort > {output.vcf};"
214
+ bcftools view --include \" SVMODEL='AGGREGATED' \" | bcftools view -e QUAL==0 | bcftools sort > {output.vcf};"
211
215
"bgzip -c {output.vcf} > {output.vcfgz}; "
212
216
"tabix -p vcf {output.vcfgz}; "
0 commit comments