Skip to content

Commit 631be05

Browse files
committed
enable specification of java; utilize default java when available
1 parent 8e676a0 commit 631be05

File tree

6 files changed

+87
-67
lines changed

6 files changed

+87
-67
lines changed

compare_vcf.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def merge_results(outdir, varsim_tp, varsim_fn, vcfeval_tp,
4343

4444

4545
class VCFComparator(object):
46-
def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, match_geno, log_to_file, opts):
46+
def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, match_geno, log_to_file, opts, java = "java"):
4747
self.prefix = prefix
4848
self.true_vcf = true_vcf
4949
self.reference = reference
@@ -55,6 +55,7 @@ def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_f
5555
self.regions = regions
5656
self.opts = opts #additional options
5757
self.tp,self.tp_predict,self.fp,self.fn = None, None, None, None
58+
self.java = java
5859

5960
def run(self):
6061
'''
@@ -96,8 +97,8 @@ def get_fn(self):
9697
return self.fn
9798

9899
class VarSimVCFComparator(VCFComparator):
99-
def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, disallow_partial_fp, match_geno, log_to_file, opts):
100-
VCFComparator.__init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, match_geno, log_to_file, opts)
100+
def __init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, disallow_partial_fp, match_geno, log_to_file, opts, java = 'java'):
101+
VCFComparator.__init__(self, prefix, true_vcf, reference, regions, sample, vcfs, exclude_filtered, match_geno, log_to_file, opts, java)
101102
self.disallow_partial_fp = disallow_partial_fp
102103
def get_tp_predict(self):
103104
'''
@@ -111,7 +112,7 @@ def run(self):
111112
112113
:return:
113114
'''
114-
cmd = ['java', utils.JAVA_XMX, '-jar', utils.VARSIMJAR, 'vcfcompare',
115+
cmd = [self.java, utils.JAVA_XMX, '-jar', utils.VARSIMJAR, 'vcfcompare',
115116
'-prefix', self.prefix, '-true_vcf',
116117
self.true_vcf,
117118
'-reference', self.reference,
@@ -154,7 +155,7 @@ def run(self):
154155
#command example
155156
#rtg-tools-3.8.4-bdba5ea_install/rtg vcfeval --baseline truth.vcf.gz \
156157
#--calls compare1.vcf.gz -o vcfeval_split_snp -t ref.sdf --output-mode=annotate --sample xx --squash-ploidy --regions ?? \
157-
cmd = ['java', utils.JAVA_XMX, '-jar', utils.RTGJAR, 'vcfeval',
158+
cmd = [self.java, utils.JAVA_XMX, '-jar', utils.RTGJAR, 'vcfeval',
158159
'-o', self.prefix, '--baseline',
159160
self.true_vcf,
160161
'-t', self.reference,
@@ -211,7 +212,7 @@ def run(self):
211212
raise Exception('{0} was not generated by vcfeval. Please check and rerun.'.format(i))
212213
self.tp, self.tp_predict, self.fn, self.fp = tp, tp_predict, fn, fp
213214

214-
def generate_sdf(reference, log):
215+
def generate_sdf(reference, log, java = 'java'):
215216
'''
216217
take reference and generate SDF
217218
:param reference:
@@ -222,7 +223,7 @@ def generate_sdf(reference, log):
222223
LOGGER.info('{0} exists, doing nothing'.format(sdf))
223224
LOGGER.info('to rerun SDF generation, please remove or rename {0}'.format(sdf))
224225
return sdf
225-
cmd = ['java', utils.JAVA_XMX, '-jar',utils.RTGJAR,'format',
226+
cmd = [java, utils.JAVA_XMX, '-jar',utils.RTGJAR,'format',
226227
'-o', sdf, reference]
227228
if log:
228229
with utils.versatile_open(log, 'a') as logout:
@@ -237,6 +238,8 @@ def process(args):
237238
:param args:
238239
:return:
239240
'''
241+
args.java = utils.get_java(args.java)
242+
utils.check_java(args.java)
240243

241244
# Setup logging
242245
FORMAT = '%(levelname)s %(asctime)-15s %(name)-20s %(message)s'
@@ -264,7 +267,7 @@ def process(args):
264267
sample = args.sample, vcfs = args.vcfs,
265268
exclude_filtered = args.exclude_filtered,
266269
disallow_partial_fp = args.disallow_partial_fp,
267-
match_geno = args.match_geno, log_to_file= args.log_to_file, opts = args.vcfcompare_options)
270+
match_geno = args.match_geno, log_to_file= args.log_to_file, opts = args.vcfcompare_options, java = args.java)
268271
varsim_tp, varsim_fn, varsim_fp = varsim_comparator.get_tp(), varsim_comparator.get_fn(), varsim_comparator.get_fp()
269272
varsim_tp = utils.sort_and_compress(varsim_tp)
270273
varsim_fn = utils.sort_and_compress(varsim_fn)
@@ -273,7 +276,7 @@ def process(args):
273276
sdf = args.sdf
274277
if not sdf:
275278
LOGGER.info("user did not supply SDF-formatted reference, trying to generate one...")
276-
sdf = generate_sdf(args.reference, args.log_to_file)
279+
sdf = generate_sdf(args.reference, args.log_to_file, java = args.java)
277280

278281
'''for vcfeval
279282
sample column must be present, and not empty
@@ -290,25 +293,25 @@ def process(args):
290293
sample = args.sample, vcfs = [varsim_fp],
291294
exclude_filtered = args.exclude_filtered,
292295
match_geno = args.match_geno, log_to_file= args.log_to_file,
293-
opts = args.vcfeval_options)
296+
opts = args.vcfeval_options, java = args.java)
294297
vcfeval_tp, vcfeval_tp_predict = vcfeval_comparator.get_tp(), vcfeval_comparator.get_tp_predict()
295298
augmented_tp, augmented_fn, augmented_fp, augmented_t = merge_results(
296299
outdir = args.out_dir,
297300
varsim_tp = varsim_tp, varsim_fn = varsim_fn,
298301
vcfeval_tp = vcfeval_tp, varsim_fp = varsim_fp, vcfeval_tp_predict = vcfeval_tp_predict)
299302
augmented_tp, augmented_fn, augmented_fp, augmented_t = summarize_results(os.path.join(args.out_dir,"augmented"), augmented_tp, augmented_fn, augmented_fp, augmented_t,
300-
var_types= args.var_types, sv_length= args.sv_length, regions = args.regions, bed_either = args.bed_either)
303+
var_types= args.var_types, sv_length= args.sv_length, regions = args.regions, bed_either = args.bed_either, java = args.java)
301304

302305

303306
if args.master_vcf and args.call_vcf:
304-
match_false(augmented_fp, [args.call_vcf, args.master_vcf, augmented_fn], args.out_dir, args.sample, args.log_to_file, args.vcfeval_options, sdf)
305-
match_false(augmented_fn, [args.call_vcf], args.out_dir, args.sample, args.log_to_file, args.vcfeval_options, sdf)
307+
match_false(augmented_fp, [args.call_vcf, args.master_vcf, augmented_fn], args.out_dir, args.sample, args.log_to_file, args.vcfeval_options, sdf, args.java)
308+
match_false(augmented_fn, [args.call_vcf], args.out_dir, args.sample, args.log_to_file, args.vcfeval_options, sdf, args.java)
306309

307310
LOGGER.info("Variant comparison done.\nTrue positive: {0}\nFalse negative: {1}\nFalse positive: {2}\n".
308311
format(augmented_tp, augmented_fn, augmented_fp))
309312

310313

311-
def match_false(augmented_file, files_to_pair_with, out_dir, sample, log_to_file, vcfeval_options, sdf):
314+
def match_false(augmented_file, files_to_pair_with, out_dir, sample, log_to_file, vcfeval_options, sdf, java = "java"):
312315
"""Try to pair up each false call in a file (augmented_file) with a variant in the other files provided in a list (files_to_pair_with) to create an annotated version of the first file.
313316
By default the the first variant in the list is provided to get an AF, the 2nd to determine the simulated variant (for false positives) and the 3rd to determine if a false positive is
314317
a pure false positive (not simulated) or not (wrong genotype)"""
@@ -350,7 +353,7 @@ def match_false(augmented_file, files_to_pair_with, out_dir, sample, log_to_file
350353
exclude_filtered = False,
351354
match_geno = False,
352355
log_to_file= log_to_file,
353-
opts = vcfeval_options)
356+
opts = vcfeval_options, java = java)
354357

355358
equivalent_variant = utils.get_equivalent_variant(line_split, vcfeval_comparator.get_tp())
356359

@@ -443,7 +446,7 @@ def parse_jsons(jsonfile, stats, count_sv = False, count_all = False):
443446
print ("error in {}. No {} field".format(jsonfile, err))
444447
stats[vt][mt] += 0
445448

446-
def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions = None, bed_either = False):
449+
def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions = None, bed_either = False, java = 'java'):
447450
'''
448451
count variants by type and tabulate
449452
:param augmented_tp:
@@ -452,7 +455,7 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
452455
:param augmented_t:
453456
:return:
454457
'''
455-
cmd = ['java', utils.JAVA_XMX, '-jar', utils.VARSIMJAR, 'vcfcompareresultsparser',
458+
cmd = [java, utils.JAVA_XMX, '-jar', utils.VARSIMJAR, 'vcfcompareresultsparser',
456459
'-prefix', prefix, '-tp',tp,
457460
'-fn', fn, '-fp', fp,
458461
'-t', t,
@@ -492,8 +495,6 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
492495

493496

494497
if __name__ == "__main__":
495-
utils.check_java()
496-
497498
main_parser = argparse.ArgumentParser(description="VarSim: A high-fidelity simulation validation framework",
498499
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
499500
main_parser.add_argument("--reference", metavar="FASTA", help="reference filename", required=True, type=str)
@@ -520,6 +521,7 @@ def summarize_results(prefix, tp, fn, fp, t, var_types, sv_length = 100, regions
520521
main_parser.add_argument("--vcfeval_options", metavar="OPT", help="additional options for RTG vcfeval", default="", type = str)
521522
main_parser.add_argument("--bed_either", action = 'store_true', help="Use either break-end of the variant for filtering instead of both")
522523
main_parser.add_argument("--java_max_mem", metavar="XMX", help="max java memory", default="10g", type = str)
524+
main_parser.add_argument("--java", metavar="PATH", help="path to java", default="java", type = str)
523525

524526
args = main_parser.parse_args()
525527
process(args)

utils.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,20 @@
1111
SORT_VCF = os.path.realpath(os.path.join(MY_DIR, "src","sort_vcf.sh"))
1212
BGZIP = os.path.realpath(os.path.join(MY_DIR, "opt","htslib-1.9_install/bin/bgzip"))
1313
JAVA_XMX = "-Xmx"
14-
14+
DEFAULT_JAVA = os.path.realpath(os.path.join(MY_DIR, "opt",
15+
"jdk1.8.0_131", "bin", "java"))
1516
COMBINE_KEEP_ALL_DUPLICATE = 1
1617
COMBINE_KEEP_FIRST_DUPLICATE = 2
1718
COMBINE_KEEP_NO_DUPLICATE = 3
1819

20+
def get_java(java = "java"):
21+
'''
22+
return default java if it exists, otherwise use user-specificed version
23+
'''
24+
if os.path.isfile(DEFAULT_JAVA):
25+
return DEFAULT_JAVA
26+
return java
27+
1928
def count_variants(vcf):
2029
'''
2130
count number of variants
@@ -29,10 +38,10 @@ def count_variants(vcf):
2938
count += 1
3039
return count
3140

32-
def check_java():
41+
def check_java(java="java"):
3342
logger = logging.getLogger(check_java.__name__)
3443
try:
35-
jv = subprocess.check_output("java -Xmx100m -version", stderr=subprocess.STDOUT, shell=True)
44+
jv = subprocess.check_output("{} -Xmx100m -version".format(java), stderr=subprocess.STDOUT, shell=True)
3645
if "openjdk" in jv or "OpenJDK" in jv:
3746
raise EnvironmentError("Please replace OpenJDK with Oracle JDK")
3847
jv = filter(lambda x: x.startswith("java version"), jv.split("\n"))[0].split()[2].replace("\"", "")
@@ -42,8 +51,9 @@ def check_java():
4251
except subprocess.CalledProcessError:
4352
raise EnvironmentError("No java (>=1.8) found")
4453

45-
def get_version():
46-
return subprocess.check_output("java -jar {} -version".format(VARSIMJAR), shell=True).strip()
54+
def get_version(java="java"):
55+
java = get_java(java)
56+
return subprocess.check_output("{} -jar {} -version".format(java, VARSIMJAR), shell=True).strip()
4757

4858
def run_shell_command(cmd, cmd_stdout, cmd_stderr, cmd_dir="."):
4959
'''

0 commit comments

Comments
 (0)