Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor bpt canonical #222

Merged
merged 4 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
508 changes: 508 additions & 0 deletions doc/source/fig/canonical_binary_partition_tree_example.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 11 additions & 7 deletions doc/source/python/graph_core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,28 @@ Algorithms for graphs

.. autosummary::

ultrametric_open
labelisation_2_graph_cut
adjacency_matrix_2_undirected_graph
graph_cut_2_labelisation
labelisation_2_graph_cut
make_graph_from_points
minimum_spanning_tree
subgraph
ultrametric_open
undirected_graph_2_adjacency_matrix
adjacency_matrix_2_undirected_graph
make_graph_from_points

.. autofunction:: higra.ultrametric_open

.. autofunction:: higra.labelisation_2_graph_cut
.. autofunction:: higra.adjacency_matrix_2_undirected_graph

.. autofunction:: higra.graph_cut_2_labelisation

.. autofunction:: higra.labelisation_2_graph_cut

.. autofunction:: higra.make_graph_from_points

.. autofunction:: higra.minimum_spanning_tree

.. autofunction:: higra.subgraph

.. autofunction:: higra.undirected_graph_2_adjacency_matrix

.. autofunction:: higra.adjacency_matrix_2_undirected_graph
.. autofunction:: higra.ultrametric_open
6 changes: 6 additions & 0 deletions doc/source/python/hg_utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Utility functions

.. autosummary::

arg_sort
sort
is_iterable
extend_class
normalize_shape
Expand All @@ -22,6 +24,10 @@ Utility functions
get_lib_include
get_lib_cmake

.. autofunction:: higra.arg_sort

.. autofunction:: higra.sort

.. autofunction:: higra.is_iterable

.. autofunction:: higra.extend_class
Expand Down
4 changes: 3 additions & 1 deletion higra/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ set(PY_FILES
__init__.py
concept.py
data_cache.py
hg_utils.py)
hg_utils.py
sorting.py)

REGISTER_PYTHON_MODULE_FILES("${PY_FILES}")

set(PYMODULE_COMPONENTS
py_sorting.cpp
pymodule.cpp)

add_subdirectory(accumulator)
Expand Down
1 change: 1 addition & 0 deletions higra/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
data_cache._data_cache__init()

# normal modules
from .sorting import *
from .accumulator import *
from .algo import *
from .assessment import *
Expand Down
68 changes: 68 additions & 0 deletions higra/algo/graph_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,71 @@ def symmetrization_fun(A):
raise ValueError("Unknown graph_type: " + str(graph_type))

return g, edge_weights


def subgraph(graph, edge_indices, spanning=True, return_vertex_map=False):
"""
Extract a subgraph of the input graph. Let :math:`G=(V,E)` be the graph :attr:`graph` and let :math:`E^*`
be a subset of :math:`E`. The subgraph of :math:`G` induced by :math:`E^*` is equal to:

- :math:`(V, E^*)` is :attr:`spanning` is ``True``; and
- :math:`(\\bigcup E^*, E^*)` otherwise (the set of vertices of the subgraph is equal to the set of vertices present at
an extremity of an edge in :math:`E^*`).

The array :attr:`edge_indices` contains the indices of the edges in the set :math:`E^*`. The edges in the subgraph
are in the same order as the edges in the array :attr:`edge_indices`.

If :attr:`spanning` is ``False``, the subgraph may contain less vertices than the input graph. In such case, the
optional array result :math:`vertex\_map` (returned if :attr:`return_vertex_map` is ``True``) indicates for each
vertex :math:`i` of the subgraph, its corresponding index in the input graph.

:Example:

>>> # linear graph with 6 vertices
>>> graph = hg.UndirectedGraph(6)
>>> graph.add_edges(np.arange(5), np.arange(1, 6))
>>>
>>> # select edges (4, 5), (0, 1), and (3, 4), note that vertex 2 is not in any edge
>>> edge_indices = np.asarray((4, 0, 3))
>>> subgraph, vertex_map = hg.subgraph(graph, edge_indices, spanning=False, return_vertex_map=True)
>>>
>>> subgraph.num_vertices()
5
>>> vertex_map
[0 1 3 4 5]
>>> subgraph.edge_list()
([3 0 2], [4 1 3])
>>> vertex_map
[0 1 3 4 5]

:param graph: input graph.
:param edge_indices: an array of edge indices of the input graph.
:param spanning: if ``True``, the subgraph has the same vertex set as the input graph.
:param return_vertex_map: if ``True``, also returns an array mapping each vertex of the current to its corresponding
vertex in the input graph.
:return: a subgraph and, if :attr:`return_vertex_map` is ``True``, a vertex map
"""
if spanning:
subgraph = hg.UndirectedGraph(graph.num_vertices())
sources, targets = graph.edge_list()
subgraph.add_edges(sources[edge_indices], targets[edge_indices])

if return_vertex_map:
vertex_map = np.arange(graph.num_vertices())
else:
sources, targets = graph.edge_list()
sources = sources[edge_indices]
targets = targets[edge_indices]
all_vertices = np.concatenate((sources, targets))
vertex_map, inverse = np.unique(all_vertices, return_inverse=True)

sources = inverse[:edge_indices.size]
targets = inverse[edge_indices.size:]

subgraph = hg.UndirectedGraph(vertex_map.size)
subgraph.add_edges(sources, targets)

if return_vertex_map:
return subgraph, vertex_map
else:
return subgraph
1 change: 1 addition & 0 deletions higra/all.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@
#include "interop/all.hpp"
#include "io_utils/all.hpp"
#include "structure/all.hpp"
#include "py_sorting.hpp"
30 changes: 27 additions & 3 deletions higra/concept.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,34 @@ class CptBinaryHierarchy(CptHierarchy):
_description = "A binary tree representing a hierarchy with an associated " \
"minimum spanning tree on the hierarchy leaf graph."
_data_elements = {"tree": ("a hierarchy h", None),
"mst": ("a minimum spanning tree of the leaf graph of the hierarchy", "mst")}
"mst": ("a minimum spanning tree of the leaf graph of the hierarchy", "mst"),
"mst_edge_map": (
"Map each internal vertex i of the tree to the edge 'mst_edge_map[i - tree.num_leaves()]' of the"
"leaf graph of the hierarchy corresponding to a minimum spanning tree edge.",
"mst_edge_map")}
_canonical_data_element = "tree"

def __init__(self, **kwargs):
super(CptBinaryHierarchy, self).__init__(**kwargs)

@staticmethod
def link(hierarchy, mst):
def link(hierarchy, mst_edge_map, mst):
hg.add_tag(hierarchy, CptBinaryHierarchy)
hg.set_attribute(hierarchy, "mst_edge_map", mst_edge_map)
hg.set_attribute(hierarchy, "mst", mst)

@staticmethod
def get_mst_edge_map(tree):
"""
The :math:`mst\_edge\_map` of the :attr:`tree` is an array that maps each internal vertex :math:`i` of the tree
to the edge :math:`mst\_edge\_map[i - tree.num_leaves()]` of the leaf graph of the hierarchy corresponding
to a minimum spanning tree edge.

:param tree:
:return:
"""
return hg.get_attribute(tree, "mst_edge_map")

@staticmethod
def get_mst(tree):
"""
Expand All @@ -268,7 +285,14 @@ def get_mst(tree):
:param tree:
:return:
"""
return hg.get_attribute(tree, "mst")
mst = hg.get_attribute(tree, "mst")
if mst is None:
mst_edge_map = hg.CptBinaryHierarchy.get_mst_edge_map(tree)
leaf_graph = hg.CptHierarchy.get_leaf_graph(tree)
mst = hg.subgraph(leaf_graph, mst_edge_map, spanning=True)
hg.CptMinimumSpanningTree.link(mst, leaf_graph, mst_edge_map)
hg.set_attribute(tree, "mst", mst)
return mst


@auto_concept_docstring
Expand Down
156 changes: 142 additions & 14 deletions higra/hierarchy/hierarchy_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,159 @@
import numpy as np


def bpt_canonical(graph, edge_weights):
def bpt_canonical(graph, edge_weights=None, sorted_edge_indices=None, return_altitudes=True, compute_mst=True):
"""
Computes the canonical binary partition tree (binary tree by altitude ordering) of the given weighted graph.
This is also known as single/min linkage clustering.
Computes the *canonical binary partition tree*, also called *binary partition tree by altitude ordering* or
*connectivity constrained single min/linkage clustering* of the given graph.

:param graph: input graph
:param edge_weights: edge weights of the input graph
:return: a tree (Concept :class:`~higra.CptBinaryHierarchy`) and its node altitudes
:Definition:

The following definition is adapted from:

Cousty, Jean, Laurent Najman, Yukiko Kenmochi, and Silvio Guimarães.
`"Hierarchical segmentations with graphs: quasi-flat zones, minimum spanning trees, and saliency maps."
<https://hal.archives-ouvertes.fr/hal-01344727/document>`_
Journal of Mathematical Imaging and Vision 60, no. 4 (2018): 479-502.

Let :math:`G=(V,E)` be an undirected graph, let :math:`\\prec` be a total order on :math:`E`, and let :math:`e_k` be
the edge in :math:`E` that has exactly :math:`k` smaller edges according to :math:`\\prec`: we then say that :math:`k`
is the rank of :math:`e_k` (for :math:`\\prec`).
The *canonical binary partition hierarchy* of :math:`G` for :math:`\\prec` is defined as the sequence of nested partitions:

- :math:`P_0 = \{\{v\}, v\in V\}`, the finest partion is composed of every singleton of :math:`V`; and
- :math:`P_n = (P_{n-1} \\backslash \{P_{n-1}^x, P_{n-1}^y\}) \cup (P_{n-1}^x \cup P_{n-1}^y)` where :math:`e_n=\{x,y\}`
and :math:`P_{n-1}^x` and :math:`P_{n-1}^y` are the regions of :math:`P_{n-1}` that contain :math:`x` and :math:`y`
respectively.

At the step :math:`n`, we remove the regions at the two extremities of the :math:`n`-th smallest edge
and we add their union. Note that we may have :math:`P_n = P_{n-1}` if both extremities of the edge :math:`e_n`
were in a same region of :math:`P_{n-1}`. Otherwise, :math:`P_n` is obtained by merging two regions of :math:`P_{n-1}`.

The *canonical binary partition tree* is then the tree representing the merging steps in this sequence,
it is thus binary. Each merging step, and thus each non leaf node of the tree, is furthermore associated to a
specific edge of the graph, called *a building edge* that led to this merge. It can be shown that the set of all
building edges associated to a canonical binary partition tree is a minimum spanning tree of the graph for the given
edge ordering :math:`\\prec`.

The map that associates every non leaf node of the canonical binary partition tree to its building edge is called
the *mst_edge_map*. In practice this map is represented by an array of size :math:`tree.num\_vertices() - tree.num\_leaves()`
and, for any internal node :math:`i` of the tree, :math:`mst\_edge\_map[i - tree.num\_leaves()]` is equal to the index
of the building edge in :math:`G` associated to :math:`i`.

The ordering :math:`\\prec` can be specified explicitly by providing the array of indices :attr:`sorted_edge_indices`
that sort the edges, or implicitly by providing the array of edge weights :attr:`edge_weights`. In this case,
:attr:`sorted_edge_indices` is set equal to ``hg.arg_sort(edge_weights, stable=True)``. If :attr:`edge_weights`
is an array with more than 1 dimension, a lexicographic ordering is used.

If requested, altitudes associated to the nodes of the canonical binary partition tree are computed as follows:

- if :attr:`edge_weights` are provided, the altitude of a non-leaf node is equal to the edge weight of its
building edge; and
- otherwise, the altitude of a non-leaf node is equal to the rank of its building edge.

The altitude of a leaf node is always equal to 0.

:Example:

.. figure:: /fig/canonical_binary_partition_tree_example.svg
:alt: Example of a binary partition tree by altitude ordering
:align: center

Given an edge weighted graph :math:`G`, the binary partition tree by altitude ordering :math:`T` (in blue) is
associated to a minimum spanning tree :math:`S` of :math:`G` (whose edges are thick and gray). Each leaf node of
the tree corresponds to a vertex of :math:`G` while each non-leaf node :math:`n_i` of :math:`T` corresponds to a
building edge of :math:`T` which belongs to the minimum spanning tree :math:`S`. The association between the non-leaf
nodes and the minimum spanning tree edges, called *mst_edge_map*, is depicted by green arrows .


The above figure corresponds to the following code (note that vertex indices start at 0 in the code):

>>> g = hg.UndirectedGraph(5)
>>> g.add_edges((0, 0, 1, 1, 1, 2, 3),
>>> (1, 2, 2, 3, 4, 4, 4))
>>> edge_weights = np.asarray((4, 6, 3, 7, 11, 8, 5))
>>> tree, altitudes = hg.bpt_canonical(g, edge_weights)
>>> tree.parents()
array([6, 5, 5, 7, 7, 6, 8, 8, 8])
>>> altitudes
array([0, 0, 0, 0, 0, 3, 4, 5, 7])
>>> tree.mst_edge_map
array([2, 0, 6, 3])
>>> tree.mst.edge_list()
(array([1, 0, 3, 1]), array([2, 1, 4, 3]))

:Complexity:

The algorithm used is based on Kruskal's minimum spanning tree algorithm and is described in:

Laurent Najman, Jean Cousty, Benjamin Perret.
`Playing with Kruskal: Algorithms for Morphological Trees in Edge-Weighted Graphs
<https://hal.archives-ouvertes.fr/file/index/docid/798621/filename/ismm2013-algo.pdf>`_.
ISMM 2013: 135-146.

If :attr:`sorted_edge_indices` is provided the algorithm runs in quasi linear :math:`\mathcal{O}(n \\alpha(n))`,
with :math:`n` the number of elements in the graph and with :math`\\alpha` the inverse of the Ackermann function.
Otherwise, the computation time is dominated by the sorting of the edge weights which is performed in linearithmic
:math:`\mathcal{O}(n \log(n))` time.

:param graph: input graph.
:param edge_weights: edge weights of the input graph (may be omitted if :attr:`sorted_edge_indices` is given).
:param sorted_edge_indices: array of indices that sort the edges of the input graph by increasing weight (may be
omitted if :attr:`edge_weights` is given).
:param return_altitudes: if ``True`` an array representing the altitudes of the tree vertices is returned.
(default: ``True``).
:param compute_mst: if ``True`` computes an explicit undirected graph representing the minimum spanning tree
associated to the hierarchy, accessible through the :class:`~higra.CptBinaryHierarchy` Concept
(e.g. with ``tree.mst``). (default: ``True``).
:return: a tree (Concept :class:`~higra.CptBinaryHierarchy`), and, if :attr:`return_altitudes` is ``True``, its node
altitudes
"""
res = hg.cpp._bpt_canonical(graph, edge_weights)
tree = res.tree()
altitudes = res.altitudes()
mst = res.mst()
mst_edge_map = res.mst_edge_map()

if edge_weights is None and sorted_edge_indices is None:
raise ValueError("edge_weights and sorted_edge_indices cannot be both equal to None.")

if sorted_edge_indices is None:
if edge_weights.ndim > 2:
tmp_edge_weights = edge_weights.reshape((edge_weights.shape[0], -1))
else:
tmp_edge_weights = edge_weights
sorted_edge_indices = hg.arg_sort(tmp_edge_weights, stable=True)

parents, mst_edge_map = hg.cpp._bpt_canonical(graph, sorted_edge_indices)
tree = hg.Tree(parents)

if return_altitudes:
if edge_weights is None:
edge_weights = np.empty_like(sorted_edge_indices)
edge_weights[sorted_edge_indices] = np.arange(sorted_edge_indices.size)

if edge_weights.ndim == 1:
altitudes = np.zeros((tree.num_vertices(),), dtype=edge_weights.dtype)
altitudes[graph.num_vertices():] = edge_weights[mst_edge_map]
else:
shape = [tree.num_vertices()] + list(edge_weights.shape[1:])
altitudes = np.zeros(shape, dtype=edge_weights.dtype)
altitudes[graph.num_vertices():, ...] = edge_weights[mst_edge_map, ...]

# if the base graph is itself a mst, we take the base graph of this mst as the new base graph
if hg.CptMinimumSpanningTree.validate(graph):
leaf_graph = hg.CptMinimumSpanningTree.construct(graph)["base_graph"]
else:
leaf_graph = graph

hg.CptMinimumSpanningTree.link(mst, leaf_graph, mst_edge_map)
if compute_mst:
mst = hg.subgraph(graph, mst_edge_map)
hg.CptMinimumSpanningTree.link(mst, leaf_graph, mst_edge_map)
else:
mst = None

hg.CptHierarchy.link(tree, leaf_graph)
hg.CptBinaryHierarchy.link(tree, mst)
hg.CptBinaryHierarchy.link(tree, mst_edge_map, mst)

return tree, altitudes
if not return_altitudes:
return tree
else:
return (tree, altitudes)


def quasi_flat_zone_hierarchy(graph, edge_weights):
Expand Down
4 changes: 2 additions & 2 deletions higra/hierarchy/py_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ struct def_node_weighted_tree {
(std::string("NodeWeightedTree_") + typeid(class_t).name()).c_str(),
"A simple structure to hold the result of hierarchy construction algorithms, "
"namely a tree and its associated node altitude array.");
c.def("tree", [](class_t &self) -> tree_t& {return self.tree;}, "The tree!");
c.def("altitudes", [](class_t &self) -> array_1d<value_t>& {return self.altitudes;}, "An array of tree node altitude.");
c.def("tree", [](class_t &self) -> tree_t& {return self.tree;}, "The tree!", py::return_value_policy::reference_internal);
c.def("altitudes", [](class_t &self) -> array_1d<value_t>& {return self.altitudes;}, "An array of tree node altitude.", py::return_value_policy::reference_internal);
}
};

Expand Down
Loading