Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
-
name: Test
run: |
docker run --rm -i -v $PWD/tests:/input -v $PWD/tests/:/reads -v $PWD/output:/out -v $PWD/run:/run ${{ env.TEST_TAG }} --tree --standalone_path /input/marker_genes --dna_reference /input/dna_ref.fa --reads /reads/sample_1.fastq --output_path /out
docker run --rm -i -v $PWD/tests:/input -v $PWD/tests/:/reads -v $PWD/output:/out -v $PWD/run:/run ${{ env.TEST_TAG }} --tree --standalone_path /input/marker_genes --dna_reference /input/dna_ref.fa --reads /reads/sample_1.fastq --output_path /out/output
if [ ! -f output/tree_sample_1.nwk ] ; then exit 1; fi
-
name: Login to DockerHub
Expand Down
10 changes: 9 additions & 1 deletion read2tree/Aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
from .wrappers.aligners import Mafft, DataType
from .utils.seq_utils import concatenate


logger = logging.getLogger(__name__)


class Aligner(object):

def __init__(self, args, og_set=None, step=None):
Expand All @@ -33,6 +35,11 @@ def __init__(self, args, og_set=None, step=None):
self.mapped_aligns = {}
self.elapsed_time = 0

# # Configure logging based on provided args
# if self.args.debug:
# logger.setLevel(logging.debug)
# logger.debug("Debug mode is on")

self._reads = self.args.reads
self._species_name = self.args.species_name

Expand Down Expand Up @@ -285,7 +292,8 @@ def _align_worker(self, og_set):
mafft_wrapper = Mafft(value.aa, datatype=DataType.PROTEIN)
mafft_wrapper.options.options['--localpair'].set_value(True)
mafft_wrapper.options.options['--maxiterate'].set_value(1000)
logger.debug("aligning OG {} with {} proteins".format(key, len(value.aa)))
mafft_wrapper.options.options['--thread'].set_value(self.args.threads)
logger.info("aligning OG {} with {} proteins".format(key, len(value.aa)))
alignment = mafft_wrapper()
codons = self._get_codon_dict_og(value)
align = Alignment()
Expand Down
62 changes: 35 additions & 27 deletions read2tree/Mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def __init__(self, args, ref_set=None, og_set=None, species_name=None, step=None
self.elapsed_time = 0

self.logger = logging.getLogger(__name__)
if self.args.debug:
self.logger.setLevel(logging.DEBUG)
self.logger.debug("Debug mode is on")


self._reads = self.args.reads
if not species_name:
Expand Down Expand Up @@ -126,7 +130,7 @@ def _call_wrapper(self, ref_file_handle, reads, tmp_output_folder):
line_minimap= minimap2_ex +" "+ minimap_argm + " -t " + str(self.args.threads) +" "+ ref_file_handle + " " + reads_str + " > " + sam_file
#self._rm_file(ref_file_handle + "-enc.2.ngm", ignore_error=True)
self._output_shell(line_minimap)
self.logger.info('mapping with ' + line_minimap)
self.logger.debug('mapping with ' + line_minimap)

# if len(self._reads) == 2:
# ngm_wrapper = NGM(ref_file_handle, reads, tmp_output_folder)
Expand Down Expand Up @@ -180,19 +184,20 @@ def _read_mapping_from_folder(self, mapping_name=None, ref_records=None):
map_reads_species = {}
if not mapping_name:
mapping_name = self._mapping_name
self.logger.debug('--- Retrieve mapped consensus sequences ---'+str(mapping_name))
self.logger.info('--- Retrieve mapped consensus sequences ---'+str(mapping_name))
in_folder = os.path.join(self.args.output_path, "04_mapping_"+mapping_name)

bam_files = glob.glob(os.path.join(in_folder, "*.bam"))
if self.args.min_cons_coverage >= 2 and bam_files: # default self.args.min_cons_coverage is 1
self.logger.debug('Generating consensus from bam files' )
self.logger.info('Generating consensus from bam files' )
for file in tqdm(bam_files, desc='Generating consensus from bam files ', unit=' species'):
species = file.split("/")[-1].split("_")[0]
ref_file = os.path.join(self.args.output_path, '02_ref_dna',species+'_OGs.fa')
map_reads_species[species] = Reference()
line_samtools_index = samtools+' index -@ ' + str(self.args.threads) + ' ' +file
self._output_shell(line_samtools_index)
self.logger.debug(line_samtools_index)
self._output_shell(line_samtools_index)


consensus = self._build_consensus_seq_v2(ref_file, file)
records = []
Expand All @@ -213,7 +218,7 @@ def _read_mapping_from_folder(self, mapping_name=None, ref_records=None):
seqC.write_seq_completeness(os.path.join(in_folder,species + "_OGs_sc.txt"))
self.all_sc.update(seqC.seq_completeness)
else:
self.logger.debug('Loading consensus read mappings '+in_folder+ "*_consensus.fa")
self.logger.info('Loading consensus read mappings '+in_folder+ "*_consensus.fa")
for file in tqdm(glob.glob(os.path.join(in_folder, "*_consensus.fa")), desc='Loading consensus read mappings ', unit=' species'):
species = file.split("/")[-1].split("_")[0]
map_reads_species[species] = Reference()
Expand Down Expand Up @@ -266,7 +271,7 @@ def _make_tmpdir(self):
# .format(self._species_name))
# else:
# tmp_output_folder = tempfile.TemporaryDirectory(prefix='ngm_')
# self.logger.debug('--- Creating tmp directory on local node ---')
# self.logger.info('--- Creating tmp directory on local node ---')
tmp_output_folder=self.args.output_path
return tmp_output_folder

Expand Down Expand Up @@ -340,14 +345,14 @@ def _map_reads_to_references(self, ref):
species+"_OGs_sc.txt"))
self.all_sc.update(seqC.seq_completeness)
except AttributeError as a:
self.logger.debug('Reads not properly processed for further steps.')
self.logger.debug('AttributeError: {}'.format(a))
self.logger.info('Reads not properly processed for further steps.')
self.logger.info('AttributeError: {}'.format(a))
except ValueError as v:
self.logger.debug('Reads not properly processed for further steps.')
self.logger.debug('ValueError: {}'.format(v))
self.logger.info('Reads not properly processed for further steps.')
self.logger.info('ValueError: {}'.format(v))
except TypeError as t:
self.logger.debug('Reads not properly processed for further steps.')
self.logger.debug('TypeError: {}'.format(t))
self.logger.info('Reads not properly processed for further steps.')
self.logger.info('TypeError: {}'.format(t))
else:
mapped_reads = []

Expand Down Expand Up @@ -462,7 +467,7 @@ def _get_mapping_stats(self, bam_file):
# """
# bam_file = sam_file_base+ '_sorted.bam'
#
# self.logger.debug("{}: --- Binning reads ---".format(self._species_name))
# self.logger.info("{}: --- Binning reads ---".format(self._species_name))
# output_folder = os.path.join(self.args.output_path, "04_read_ogs_" +
# self._species_name)
# if not os.path.exists(output_folder):
Expand Down Expand Up @@ -574,38 +579,41 @@ def _post_process_read_mapping(self, ref_file, sam_file):
# ref_file.split('/')[-1].split('.')[0] +
# "_post")
# if self.args.single_mapping:
# self.logger.debug("{}: --- POSTPROCESSING MAPPING ---".format(self._species_name))
# self.logger.info("{}: --- POSTPROCESSING MAPPING ---".format(self._species_name))

# ngmlr doesn't have the option to write in bam file directly
#if 'sam' in bam_file.split(".")[-1]:
# sam_file = bam_file
sam_file_base = sam_file[:-4]
if os.path.exists(sam_file):
line_samtools_view = samtools+' view -F 4 -bh -S -@ ' + str(self.args.threads) +' -o ' + sam_file_base + ".bam " + sam_file
self._output_shell(line_samtools_view)
self.logger.debug(line_samtools_view)
self._output_shell(line_samtools_view)

else:
self.logger.debug("Sam file is not generated", sam_file)
self.logger.info("Sam file is not generated", sam_file)

# if self.args.single_mapping:
# self.logger.error("single mapping is not tested with this version ")
#self.logger.debug("{}: ---- Samtools view completed".format(self._species_name))
#self.logger.info("{}: ---- Samtools view completed".format(self._species_name))

if os.path.exists(sam_file_base+".bam"):

line_samtools_sort = samtools+' sort -@ ' + str(self.args.threads) + ' -o ' + sam_file_base + "_sorted.bam " + sam_file_base+".bam"
self.logger.debug("Running "+ line_samtools_sort)
self._output_shell(line_samtools_sort)
self.logger.debug("running "+ line_samtools_sort)

else:
self.logger.debug("bam file is not generated", sam_file_base+".bam")
self.logger.info("BAM file is not generated", sam_file_base+".bam")

# if self.args.single_mapping:
# self.logger.debug("{}: ---- Samtools sort completed".format(self._species_name))
# self.logger.info("{}: ---- Samtools sort completed".format(self._species_name))

if os.path.exists(sam_file_base + "_sorted.bam"):
line_samtools_index= samtools+' index -@ ' + str(self.args.threads) + ' ' + sam_file_base + "_sorted.bam"
self._output_shell(line_samtools_index)
self.logger.debug(line_samtools_index)
self._output_shell(line_samtools_index)


self._rm_file(sam_file_base + ".bam", ignore_error=True)
self._rm_file(sam_file_base + ".sam", ignore_error=True)
Expand Down Expand Up @@ -682,27 +690,27 @@ def _output_shell(self, line):
shell_command = subprocess.Popen( line, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)

#except:
# self.logger.debug("Shell command failed to execute by running ")
# self.logger.info("Shell command failed to execute by running ")
# return None

# try:
# self.logger.debug("Running " + line)
# self.logger.info("Running " + line)
# shell_command = subprocess.Popen(
# line, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
# shell=True)
# except:
# self.logger.debug("Shell command failed to execute by running ")
# self.logger.info("Shell command failed to execute by running ")
# return None

(output, err) = shell_command.communicate()
if output:
self.logger.debug("Shell output: "+ str(output))
print("Shell output: " + str(output))
#print("Shell output: " + str(output))
if err:
self.logger.debug("Shell err: " + str(err))
print("Shell err: " + str(err))
#print("Shell err: " + str(err))
# if err:
# self.logger.debug("Shell err: " + str(err))
# self.logger.info("Shell err: " + str(err))

shell_command.wait()
if shell_command.returncode != 0:
Expand Down
2 changes: 1 addition & 1 deletion read2tree/TreeInference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class TreeInference(object):

def __init__(self, args, concat_alignment=None):
print('--- Tree inference ---')
#print('--- Tree inference ---')

self.args = args

Expand Down
4 changes: 2 additions & 2 deletions read2tree/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from pkg_resources import resource_string
logging.getLogger(__name__).addHandler(logging.NullHandler())

__version__ = '1.5.3'
__copyright__ = 'read2tree (C) 2017-{:d} David Dylus ' \
__version__ = '2.0.0'
__copyright__ = 'read2tree (C) 2017-{:d} David Dylus, Adrian M. Altenhoff, Sina Majidian ' \
.format(date.today().year)


Expand Down
12 changes: 7 additions & 5 deletions read2tree/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,10 @@
COPYRIGHT = '(C) 2017-{:d} David V Dylus'.format(date.today().year)

# logger = logging.getLogger(__name__)
logger_level = "DEBUG" # DEBUG INFO # TRACE DEBUG INFO WARN ERROR FATAL

logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

if logger_level == "INFO":
logger.setLevel(logging.INFO)

# logger.disabled = True

Expand Down Expand Up @@ -78,7 +76,7 @@ def parse_args(argv, exe_name, desc):
'Examples: -ax sr , -ax map-hifi , -ax map-pb or -ax map-ont ')

arg_parser.add_argument('--threads', type=int, default=1,
help='[Default is 1] Number of threads for the mapping ')
help='[Default is 1] Number of threads for gene marker alignment (mafft) and read mapping (minimap2) and tree inference (iqtree) ')

# arg_parser.add_argument('--split_reads', action='store_true',
# help='[Default is off] Splits reads as defined by split_len (200) '
Expand Down Expand Up @@ -213,6 +211,10 @@ def parse_args(argv, exe_name, desc):
# Parse the arguments.
args = arg_parser.parse_args(argv)

if args.debug: # "DEBUG" # DEBUG INFO # TRACE DEBUG INFO WARN ERROR FATAL
logger.setLevel(logging.DEBUG)
logger.debug("Debug mode is on")

_reads = ""
_species_name = ""

Expand Down Expand Up @@ -331,7 +333,7 @@ def main(argv, exe_name, desc=''):
logger.error("read file doesn't exist")
sys.exit()

if args.step == "all":
if args.step == "all":
logger.info('{}: ------- NEW RUN -------'.format(args.species_name))
oma_output = OMAOutputParser(args)
args.oma_output_path = oma_output.oma_output_path
Expand Down
2 changes: 2 additions & 0 deletions read2tree/wrappers/aligners/mafft.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ def get_default_options():
# --fastapair or --blastpair is selected. Default: 2.7
FloatOption('--weighti', 2.7, active=False),

FloatOption('--thread', 1, active=False),

# Guide tree is built number times in the progressive stage. Valid with 6mer
# distance. Default: 2
IntegerOption('--retree', 2, active=False),
Expand Down
Loading