@@ -1747,108 +1747,32 @@ cdef class Tree:
1747
1747
"""
1748
1748
ops.resolve_polytomy(self , descendants)
1749
1749
1750
- def cophenetic_matrix (self ):
1751
- """ Return a cophenetic distance matrix of the tree.
1752
-
1753
- The `cophenetic matrix
1754
- <https://en.wikipedia.org/wiki/Cophenetic>`_ is a matrix
1755
- representation of the distance between each node.
1756
-
1757
- If we have a tree like::
1758
-
1759
- ╭╴A
1760
- ╭╴y╶┤
1761
- │ ╰╴B
1762
- ╴z╶┤
1763
- │ ╭╴C
1764
- ╰╴x╶┤
1765
- │ ╭╴D
1766
- ╰╴w╶┤
1767
- ╰╴E
1768
-
1769
- where w, x, y, z are internal nodes, then::
1770
-
1771
- d(A,B) = d(y,A) + d(y,B)
1772
-
1773
- and::
1774
-
1775
- d(A,E) = d(z,A) + d(z,E)
1776
- = (d(z,y) + d(y,A)) + (d(z,x) + d(x,w) + d(w,E))
1777
-
1778
- To compute it, we use an idea from
1779
- https://gist.github.com/jhcepas/279f9009f46bf675e3a890c19191158b
1780
-
1781
- First, for each node we find its path to the root. For example::
1782
-
1783
- A -> A, y, z
1784
- E -> E, w, x, z
1785
-
1786
- and make these orderless sets. Then we XOR the two sets to
1787
- only find the elements that are in one or other sets but not
1788
- both. In this case A, E, y, x, w.
1789
-
1790
- The distance between the two nodes is the sum of the distances
1791
- from each of those nodes to the parent
1792
-
1793
- One more optimization: since the distances are symmetric, and
1794
- distance to itself is zero we user itertools.combinations
1795
- rather than itertools.permutations. This cuts our computes
1796
- from theta(n^2) 1/2n^2 - n (= O(n^2), which is still not
1797
- great, but in reality speeds things up for large trees).
1750
+ def distance_matrix (self , selector = None , topological = False , squared = False ):
1751
+ """ Return a matrix of paired distances between all the selected leaves.
1798
1752
1799
- For this tree, we will return the two dimensional array::
1800
-
1801
- A B C D E
1802
- A 0 d(A,y) + d(B,y) d(A,z) + d(C,z) d(A,z) + d(D,z) d(A,z) + d(E,z)
1803
- B d(B,y) + d(A,y) 0 d(B,z) + d(C,z) d(B,z) + d(D,z) d(B,z) + d(E,z)
1804
- C d(C,z) + d(A,z) d(C,z) + d(B,z) 0 d(C,x) + d(D,x) d(C,x) + d(E,x)
1805
- D d(D,z) + d(A,z) d(D,z) + d(B,z) d(D,x) + d(C,x) 0 d(D,w) + d(E,w)
1806
- E d(E,z) + d(A,z) d(E,z) + d(B,z) d(E,x) + d(C,x) d(E,w) + d(D,w) 0
1807
-
1808
- We will also return the one dimensional array with the leaves
1809
- in the order in which they appear in the matrix (i.e. the
1810
- column and/or row headers).
1753
+ :param selector: Function that returns True for the selected leaves.
1754
+ If None, all leaves will be selected.
1755
+ :param topological: If True, the distance between nodes will be the
1756
+ number of nodes between them (instead of the sum of branch lenghts).
1757
+ :param squared: If True, the output matrix will be squared and symmetrical.
1758
+ Otherwise, only the upper triangle is returned (to save memory).
1811
1759
"""
1812
- # TODO: Consider naming this distance_matrix(), and also writing it
1813
- # more clearly (and much faster, and with more options) as:
1814
- # return ops.distance_matrix(self, ...)
1760
+ return ops.distance_matrix(self , selector, topological, squared)
1815
1761
1816
- leaves = list (self .leaves())
1817
- paths = {x: set () for x in leaves}
1818
-
1819
- # get the paths going up the tree
1820
- # we get all the nodes up to the last one and store them in a set
1821
-
1822
- for n in leaves:
1823
- if n.is_root:
1824
- continue
1825
- movingnode = n
1826
- while not movingnode.is_root:
1827
- paths[n].add(movingnode)
1828
- movingnode = movingnode.up
1829
-
1830
- # now we want to get all pairs of nodes using itertools combinations. We need AB AC etc but don't need BA CA
1831
-
1832
- leaf_distances = {x.name: {} for x in leaves}
1833
-
1834
- for (leaf1, leaf2) in itertools.combinations(leaves, 2 ):
1835
- # figure out the unique nodes in the path
1836
- uniquenodes = paths[leaf1] ^ paths[leaf2]
1837
- distance = sum (x.dist for x in uniquenodes)
1838
- leaf_distances[leaf1.name][leaf2.name] = leaf_distances[leaf2.name][leaf1.name] = distance
1762
+ def cophenetic_matrix (self ):
1763
+ """ Return a cophenetic matrix, and an array with the leaf names.
1839
1764
1840
- allleaves = sorted (leaf_distances.keys()) # the leaves in order that we will return
1765
+ The `cophenetic matrix <https://en.wikipedia.org/wiki/Cophenetic>`_ is
1766
+ a matrix where each element is the distance between two leaves.
1841
1767
1842
- output = [] # the two dimensional array that we will return
1768
+ The matrix elements correspond to the order of the leaf names array.
1769
+ """
1770
+ from warnings import warn
1771
+ warn(' Use distance_matrix() instead' , DeprecationWarning )
1843
1772
1844
- for i, n in enumerate (allleaves):
1845
- output.append([])
1846
- for m in allleaves:
1847
- if m == n:
1848
- output[i].append(0 ) # distance to ourself = 0
1849
- else :
1850
- output[i].append(leaf_distances[n][m])
1851
- return output, allleaves
1773
+ matrix = ops.distance_matrix(self , squared = True )
1774
+ names = [leaf.name for leaf in self .leaves()]
1775
+ return matrix, names
1852
1776
1853
1777
# TODO: The next two functions should really be parsers (which may
1854
1778
# be also used when calling __init__().
0 commit comments