Skip to content

Commit

Permalink
Merge pull request #404 from PoonLab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ArtPoon authored Jun 7, 2022
2 parents ac00d0c + 9125219 commit 5ed3683
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 21 deletions.
33 changes: 17 additions & 16 deletions covizu/treetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import covizu
from covizu.utils.seq_utils import *
from covizu.utils.progress_utils import Callback
#from covizu.utils.batch_utils import unpack_records
import covizu.utils.batch_utils


def fasttree(fasta, binpath='fasttree2', seed=1, gtr=True, collapse=True):
Expand Down Expand Up @@ -41,12 +43,14 @@ def fasttree(fasta, binpath='fasttree2', seed=1, gtr=True, collapse=True):
stdout, stderr = p.communicate(input=in_str.encode('utf-8'))
nwk = stdout.decode('utf-8')

# collapse low support nodes into polytomies
# parse Newick tree string from output
phy = Phylo.read(StringIO(nwk), format='newick')
phy.root_with_outgroup('reference') # issue #396
phy.prune('reference')
if collapse:
phy = Phylo.read(StringIO(nwk), format='newick')
phy.collapse_all(lambda x: x.confidence is not None and
x.confidence < 0.5)
nwk = phy.format('newick')
# collapse low support nodes into polytomies
phy.collapse_all(lambda x: x.confidence is not None and x.confidence < 0.5)
nwk = phy.format('newick')
return nwk


Expand Down Expand Up @@ -80,7 +84,8 @@ def treetime(nwk, fasta, outdir, binpath='treetime', clock=None, verbosity=1):
'--outdir', outdir, '--verbose', str(verbosity),
'--plot-rtt', 'none', # see issue #66
'--clock-filter', '0', # issue #245
'--keep-polytomies' # issue #339
'--keep-polytomies', # issue #339
'--keep-root' # issue 396
]
if clock:
call.extend(['--clock-rate', str(clock)])
Expand Down Expand Up @@ -118,6 +123,8 @@ def parse_nexus(nexus_file, fasta, callback=None):
"""
coldates = {}
for h, _ in fasta.items():
if 'reference' in h:
continue
_, accn, coldate = h.split('|')
coldates.update({accn: date2float(coldate)})

Expand Down Expand Up @@ -170,7 +177,7 @@ def parse_nexus(nexus_file, fasta, callback=None):
def retrieve_genomes(by_lineage, known_seqs, ref_file, earliest=True, callback=None):
"""
Identify most recent sampled genome sequence for each Pangolin lineage.
Export as FASTA for TreeTime analysis.
Export as FASTA for TreeTime analysis, including reference genome.
:param by_lineage: dict, return value from gisaid_utils::sort_by_lineage
:param known_seqs: dict, sequences used in Pango lineage designations
Expand All @@ -185,9 +192,9 @@ def retrieve_genomes(by_lineage, known_seqs, ref_file, earliest=True, callback=N
_, refseq = convert_fasta(handle)[0]

# allocate lists
coldates = []
lineages = []
seqs = []
coldates = [None]
lineages = ['reference']
seqs = [refseq]

# retrieve unaligned genomes from database
for lineage, records in by_lineage.items():
Expand Down Expand Up @@ -286,12 +293,6 @@ def parse_args():
cb.callback("Identifying lineage representative genomes")
fasta = retrieve_genomes(by_lineage, known_seqs=lineages, ref_file=args.ref, earliest=args.earliest,
callback=cb.callback)
outfile = open("iss385.fasta", 'w')
for header, seq in fasta.items():
outfile.write(f">{header}\n{seq}\n")
outfile.close()

sys.exit()

cb.callback("Reconstructing tree with {}".format(args.ft2bin))
nwk = fasttree(fasta, binpath=args.ft2bin)
Expand Down
11 changes: 6 additions & 5 deletions covizu/utils/batch_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import subprocess
from Bio import Phylo
from covizu import clustering, treetime, beadplot
from covizu import clustering, beadplot
import sys
import json
import covizu.treetime


def unpack_records(records):
Expand Down Expand Up @@ -55,20 +56,20 @@ def build_timetree(by_lineage, args, callback=None):

if callback:
callback("Identifying lineage representative genomes")
fasta = treetime.retrieve_genomes(by_lineage, known_seqs=lineages, ref_file=args.ref,
fasta = covizu.treetime.retrieve_genomes(by_lineage, known_seqs=lineages, ref_file=args.ref,
earliest=True)

if callback:
callback("Reconstructing tree with {}".format(args.ft2bin))
nwk = treetime.fasttree(fasta, binpath=args.ft2bin)
nwk = covizu.treetime.fasttree(fasta, binpath=args.ft2bin)

if callback:
callback("Reconstructing time-scaled tree with {}".format(args.ttbin))
nexus_file = treetime.treetime(nwk, fasta, outdir=args.outdir, binpath=args.ttbin,
nexus_file = covizu.treetime.treetime(nwk, fasta, outdir=args.outdir, binpath=args.ttbin,
clock=args.clock, verbosity=0)

# writes output to treetime.nwk at `nexus_file` path
return treetime.parse_nexus(nexus_file, fasta)
return covizu.treetime.parse_nexus(nexus_file, fasta)


def beadplot_serial(lineage, features, args, callback=None):
Expand Down

0 comments on commit 5ed3683

Please sign in to comment.