@@ -43,7 +43,7 @@ def merge_results(outdir, varsim_tp, varsim_fn, vcfeval_tp,
43
43
44
44
45
45
class VCFComparator (object ):
46
- def __init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , match_geno , log_to_file , opts ):
46
+ def __init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , match_geno , log_to_file , opts , java = "java" ):
47
47
self .prefix = prefix
48
48
self .true_vcf = true_vcf
49
49
self .reference = reference
@@ -55,6 +55,7 @@ def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_f
55
55
self .regions = regions
56
56
self .opts = opts #additional options
57
57
self .tp ,self .tp_predict ,self .fp ,self .fn = None , None , None , None
58
+ self .java = java
58
59
59
60
def run (self ):
60
61
'''
@@ -96,8 +97,8 @@ def get_fn(self):
96
97
return self .fn
97
98
98
99
class VarSimVCFComparator (VCFComparator ):
99
- def __init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , disallow_partial_fp , match_geno , log_to_file , opts ):
100
- VCFComparator .__init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , match_geno , log_to_file , opts )
100
+ def __init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , disallow_partial_fp , match_geno , log_to_file , opts , java = 'java' ):
101
+ VCFComparator .__init__ (self , prefix , true_vcf , reference , regions , sample , vcfs , exclude_filtered , match_geno , log_to_file , opts , java )
101
102
self .disallow_partial_fp = disallow_partial_fp
102
103
def get_tp_predict (self ):
103
104
'''
@@ -111,7 +112,7 @@ def run(self):
111
112
112
113
:return:
113
114
'''
114
- cmd = [' java' , utils .JAVA_XMX , '-jar' , utils .VARSIMJAR , 'vcfcompare' ,
115
+ cmd = [self . java , utils .JAVA_XMX , '-jar' , utils .VARSIMJAR , 'vcfcompare' ,
115
116
'-prefix' , self .prefix , '-true_vcf' ,
116
117
self .true_vcf ,
117
118
'-reference' , self .reference ,
@@ -154,7 +155,7 @@ def run(self):
154
155
#command example
155
156
#rtg-tools-3.8.4-bdba5ea_install/rtg vcfeval --baseline truth.vcf.gz \
156
157
#--calls compare1.vcf.gz -o vcfeval_split_snp -t ref.sdf --output-mode=annotate --sample xx --squash-ploidy --regions ?? \
157
- cmd = [' java' , utils .JAVA_XMX , '-jar' , utils .RTGJAR , 'vcfeval' ,
158
+ cmd = [self . java , utils .JAVA_XMX , '-jar' , utils .RTGJAR , 'vcfeval' ,
158
159
'-o' , self .prefix , '--baseline' ,
159
160
self .true_vcf ,
160
161
'-t' , self .reference ,
@@ -211,7 +212,7 @@ def run(self):
211
212
raise Exception ('{0} was not generated by vcfeval. Please check and rerun.' .format (i ))
212
213
self .tp , self .tp_predict , self .fn , self .fp = tp , tp_predict , fn , fp
213
214
214
- def generate_sdf (reference , log ):
215
+ def generate_sdf (reference , log , java = 'java' ):
215
216
'''
216
217
take reference and generate SDF
217
218
:param reference:
@@ -222,7 +223,7 @@ def generate_sdf(reference, log):
222
223
LOGGER .info ('{0} exists, doing nothing' .format (sdf ))
223
224
LOGGER .info ('to rerun SDF generation, please remove or rename {0}' .format (sdf ))
224
225
return sdf
225
- cmd = [' java' , utils .JAVA_XMX , '-jar' ,utils .RTGJAR ,'format' ,
226
+ cmd = [java , utils .JAVA_XMX , '-jar' ,utils .RTGJAR ,'format' ,
226
227
'-o' , sdf , reference ]
227
228
if log :
228
229
with utils .versatile_open (log , 'a' ) as logout :
@@ -237,6 +238,8 @@ def process(args):
237
238
:param args:
238
239
:return:
239
240
'''
241
+ args .java = utils .get_java (args .java )
242
+ utils .check_java (args .java )
240
243
241
244
# Setup logging
242
245
FORMAT = '%(levelname)s %(asctime)-15s %(name)-20s %(message)s'
@@ -264,7 +267,7 @@ def process(args):
264
267
sample = args .sample , vcfs = args .vcfs ,
265
268
exclude_filtered = args .exclude_filtered ,
266
269
disallow_partial_fp = args .disallow_partial_fp ,
267
- match_geno = args .match_geno , log_to_file = args .log_to_file , opts = args .vcfcompare_options )
270
+ match_geno = args .match_geno , log_to_file = args .log_to_file , opts = args .vcfcompare_options , java = args . java )
268
271
varsim_tp , varsim_fn , varsim_fp = varsim_comparator .get_tp (), varsim_comparator .get_fn (), varsim_comparator .get_fp ()
269
272
varsim_tp = utils .sort_and_compress (varsim_tp )
270
273
varsim_fn = utils .sort_and_compress (varsim_fn )
@@ -273,7 +276,7 @@ def process(args):
273
276
sdf = args .sdf
274
277
if not sdf :
275
278
LOGGER .info ("user did not supply SDF-formatted reference, trying to generate one..." )
276
- sdf = generate_sdf (args .reference , args .log_to_file )
279
+ sdf = generate_sdf (args .reference , args .log_to_file , java = args . java )
277
280
278
281
'''for vcfeval
279
282
sample column must be present, and not empty
@@ -290,25 +293,25 @@ def process(args):
290
293
sample = args .sample , vcfs = [varsim_fp ],
291
294
exclude_filtered = args .exclude_filtered ,
292
295
match_geno = args .match_geno , log_to_file = args .log_to_file ,
293
- opts = args .vcfeval_options )
296
+ opts = args .vcfeval_options , java = args . java )
294
297
vcfeval_tp , vcfeval_tp_predict = vcfeval_comparator .get_tp (), vcfeval_comparator .get_tp_predict ()
295
298
augmented_tp , augmented_fn , augmented_fp , augmented_t = merge_results (
296
299
outdir = args .out_dir ,
297
300
varsim_tp = varsim_tp , varsim_fn = varsim_fn ,
298
301
vcfeval_tp = vcfeval_tp , varsim_fp = varsim_fp , vcfeval_tp_predict = vcfeval_tp_predict )
299
302
augmented_tp , augmented_fn , augmented_fp , augmented_t = summarize_results (os .path .join (args .out_dir ,"augmented" ), augmented_tp , augmented_fn , augmented_fp , augmented_t ,
300
- var_types = args .var_types , sv_length = args .sv_length , regions = args .regions , bed_either = args .bed_either )
303
+ var_types = args .var_types , sv_length = args .sv_length , regions = args .regions , bed_either = args .bed_either , java = args . java )
301
304
302
305
303
306
if args .master_vcf and args .call_vcf :
304
- match_false (augmented_fp , [args .call_vcf , args .master_vcf , augmented_fn ], args .out_dir , args .sample , args .log_to_file , args .vcfeval_options , sdf )
305
- match_false (augmented_fn , [args .call_vcf ], args .out_dir , args .sample , args .log_to_file , args .vcfeval_options , sdf )
307
+ match_false (augmented_fp , [args .call_vcf , args .master_vcf , augmented_fn ], args .out_dir , args .sample , args .log_to_file , args .vcfeval_options , sdf , args . java )
308
+ match_false (augmented_fn , [args .call_vcf ], args .out_dir , args .sample , args .log_to_file , args .vcfeval_options , sdf , args . java )
306
309
307
310
LOGGER .info ("Variant comparison done.\n True positive: {0}\n False negative: {1}\n False positive: {2}\n " .
308
311
format (augmented_tp , augmented_fn , augmented_fp ))
309
312
310
313
311
- def match_false (augmented_file , files_to_pair_with , out_dir , sample , log_to_file , vcfeval_options , sdf ):
314
+ def match_false (augmented_file , files_to_pair_with , out_dir , sample , log_to_file , vcfeval_options , sdf , java = "java" ):
312
315
"""Try to pair up each false call in a file (augmented_file) with a variant in the other files provided in a list (files_to_pair_with) to create an annotated version of the first file.
313
316
By default the the first variant in the list is provided to get an AF, the 2nd to determine the simulated variant (for false positives) and the 3rd to determine if a false positive is
314
317
a pure false positive (not simulated) or not (wrong genotype)"""
@@ -350,7 +353,7 @@ def match_false(augmented_file, files_to_pair_with, out_dir, sample, log_to_file
350
353
exclude_filtered = False ,
351
354
match_geno = False ,
352
355
log_to_file = log_to_file ,
353
- opts = vcfeval_options )
356
+ opts = vcfeval_options , java = java )
354
357
355
358
equivalent_variant = utils .get_equivalent_variant (line_split , vcfeval_comparator .get_tp ())
356
359
@@ -443,7 +446,7 @@ def parse_jsons(jsonfile, stats, count_sv = False, count_all = False):
443
446
print ("error in {}. No {} field" .format (jsonfile , err ))
444
447
stats [vt ][mt ] += 0
445
448
446
- def summarize_results (prefix , tp , fn , fp , t , var_types , sv_length = 100 , regions = None , bed_either = False ):
449
+ def summarize_results (prefix , tp , fn , fp , t , var_types , sv_length = 100 , regions = None , bed_either = False , java = 'java' ):
447
450
'''
448
451
count variants by type and tabulate
449
452
:param augmented_tp:
@@ -452,7 +455,7 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
452
455
:param augmented_t:
453
456
:return:
454
457
'''
455
- cmd = [' java' , utils .JAVA_XMX , '-jar' , utils .VARSIMJAR , 'vcfcompareresultsparser' ,
458
+ cmd = [java , utils .JAVA_XMX , '-jar' , utils .VARSIMJAR , 'vcfcompareresultsparser' ,
456
459
'-prefix' , prefix , '-tp' ,tp ,
457
460
'-fn' , fn , '-fp' , fp ,
458
461
'-t' , t ,
@@ -492,8 +495,6 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
492
495
493
496
494
497
if __name__ == "__main__" :
495
- utils .check_java ()
496
-
497
498
main_parser = argparse .ArgumentParser (description = "VarSim: A high-fidelity simulation validation framework" ,
498
499
formatter_class = argparse .ArgumentDefaultsHelpFormatter )
499
500
main_parser .add_argument ("--reference" , metavar = "FASTA" , help = "reference filename" , required = True , type = str )
@@ -520,6 +521,7 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
520
521
main_parser .add_argument ("--vcfeval_options" , metavar = "OPT" , help = "additional options for RTG vcfeval" , default = "" , type = str )
521
522
main_parser .add_argument ("--bed_either" , action = 'store_true' , help = "Use either break-end of the variant for filtering instead of both" )
522
523
main_parser .add_argument ("--java_max_mem" , metavar = "XMX" , help = "max java memory" , default = "10g" , type = str )
524
+ main_parser .add_argument ("--java" , metavar = "PATH" , help = "path to java" , default = "java" , type = str )
523
525
524
526
args = main_parser .parse_args ()
525
527
process (args )
0 commit comments