22
33import argparse
44import os
5+ import shlex
56import shutil
7+ import subprocess
68import sys
79from importlib .metadata import version
810from pathlib import Path
11+ from typing import Any
912
1013import numpy as np
1114
@@ -41,30 +44,32 @@ def get_binary_path(name: str) -> str:
4144 if system_binary :
4245 return system_binary
4346
44- raise FileNotFoundError (
45- f"Binary '{ name } ' not found. Please ensure smudgeplot is properly installed. "
47+ msg = (
48+ f"Binary '{ name } ' not found. Please ensure smudgeplot is properly installed.\n "
4649 f"Checked locations:\n "
47- f" - Package: { bundled_binary } \n "
48- f" - System PATH: (not found) \n "
50+ f" - Package: { bundled_binary . parent } \n "
51+ f" - System PATH: { os . get_exec_path () } \n "
4952 f"\n You may need to reinstall smudgeplot or install the binaries manually."
5053 )
54+ raise FileNotFoundError (msg )
5155
5256
53- def run_binary (name : str , args : str ) -> int :
57+ def run_binary (name : str , args : list [ Any ] ) -> None :
5458 """
5559 Run a binary with the given arguments.
5660
5761 Args:
5862 name: Name of the binary
59- args: Space-separated argument string
63+ args: List of (stringify-able) arguments
6064
61- Returns :
62- Return code from the binary
65+ Throws :
66+ subprocess.CalledProcessError on non-zero exit of the command
6367 """
64- binary_path = get_binary_path (name )
65- cmd = f"{ binary_path } { args } "
66- sys .stderr .write (f"Calling: { name } { args } \n " )
67- return os .system (cmd )
68+ cmd_line = [get_binary_path (name )]
69+ for x in args :
70+ cmd_line .append (str (x ))
71+ sys .stderr .write (f"Calling: { shlex .join (cmd_line )} \n " )
72+ subprocess .run (cmd_line , check = True )
6873
6974
7075class Parser :
@@ -332,41 +337,38 @@ def main():
332337 fin ()
333338
334339 if _parser .task == "hetmers" :
335- # PloidyPlot is expected to be installed in the system as well as the R library supporting it
336- plot_args = " -o" + str (args .o )
337- plot_args += " -e" + str (args .L )
338- plot_args += " -T" + str (args .t )
340+ hetmer_args = [
341+ f"-o{ args .o } " ,
342+ f"-e{ args .L } " ,
343+ f"-T{ args .t } " ,
344+ ]
339345 if args .verbose :
340- plot_args += " -v"
346+ hetmer_args . append ( " -v")
341347 if args .tmp != "." :
342- plot_args += " -P" + args .tmp
343- plot_args += " " + args .infile
348+ hetmer_args . append ( f"-P { args .tmp } " )
349+ hetmer_args . append ( args .infile )
344350
345- run_binary ("hetmers" , plot_args )
351+ run_binary ("hetmers" , hetmer_args )
346352
347353 fin ()
348354
349355 if _parser .task == "extract" :
350- plot_args = " -o" + str (args .o )
351- plot_args += " -T" + str (args .t )
356+ extract_args = [
357+ f"-o{ args .o } " ,
358+ f"-T{ args .t } " ,
359+ ]
352360 if args .verbose :
353- plot_args += " -v"
361+ extract_args . append ( " -v")
354362 if args .tmp != "." :
355- plot_args += " -P" + args .tmp
356- plot_args += " " + args .infile
357- if args .sma .endswith (".sma" ):
358- plot_args += " " + args .sma .removesuffix (".sma" )
359- else :
360- plot_args += " " + args .sma
363+ extract_args .append (f"-P{ args .tmp } " )
364+ extract_args .append (args .infile )
365+ extract_args .append (args .sma .removesuffix (".sma" ))
361366
362- run_binary ("extract_kmer_pairs" , plot_args )
367+ run_binary ("extract_kmer_pairs" , extract_args )
363368
364369 fin ()
365370
366- if args .title :
367- title = args .title
368- else :
369- title = "." .join (args .infile .split ("/" )[- 1 ].split ("." )[0 :2 ])
371+ title = args .title or str (Path (args .infile ).with_suffix ("" ).name )
370372
371373 if _parser .task == "plot" :
372374 smudge_tab = smg .read_csv (args .smudgefile , sep = "\t " , names = ["structure" , "size" , "rel_size" ])
@@ -396,12 +398,11 @@ def main():
396398 coverages .local_aggregation (distance = args .d , noise_filter = 1000 , mask_errors = True )
397399 coverages .count_kmers ()
398400 sys .stderr .write (
399- f"\t \
400- Total kmers: { coverages .total_kmers } \n \t \
401- Genomic kmers: { coverages .total_genomic_kmers } \n \t \
402- Genomic kmers in smudges: { coverages .total_genomic_kmers_in_smudges } \n \t \
403- Sequencing errors: { coverages .total_error_kmers } \n \t \
404- Fraction of errors: { round (coverages .total_error_kmers / coverages .total_kmers , 3 )} "
401+ f"\n Total kmers: { coverages .total_kmers } \n "
402+ f"Genomic kmers: { coverages .total_genomic_kmers } \n "
403+ f"Genomic kmers in smudges: { coverages .total_genomic_kmers_in_smudges } \n "
404+ f"Sequencing errors: { coverages .total_error_kmers } \n "
405+ f"Fraction of errors: { coverages .error_fraction :.3f} \n "
405406 )
406407
407408 smudge_size_cutoff = (
@@ -420,11 +421,7 @@ def main():
420421 delimiter = "\t " ,
421422 )
422423
423- limit = 0.7
424- if coverages .error_fraction < limit :
425- cov = smudges .cov
426- else :
427- cov = 0
424+ cov = smudges .cov if coverages .error_fraction < 0.7 else 0
428425
429426 sys .stderr .write ("\n Creating centrality plot\n " )
430427 smudges .centrality_plot (args .o , args .format )
@@ -459,10 +456,11 @@ def main():
459456 json_report = args .json_report ,
460457 input_params = vars (args ),
461458 palette = args .col_ramp ,
462- invert_cols = args .invert_cols
459+ invert_cols = args .invert_cols ,
463460 )
464461
465462 fin ()
466463
464+
467465if __name__ == "__main__" :
468466 main ()
0 commit comments