jakarto3d · YertleTurtleGit · May 23, 2023 · May 23, 2023 · May 23, 2023 · May 23, 2023
diff --git a/jakteristics/extension.pyx b/jakteristics/extension.pyx
@@ -4,31 +4,35 @@
 import numpy as np
 import multiprocessing
 
-from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from libc.stdlib cimport malloc, free # Is needed to ensure thread-safety.
 from libc.string cimport memset
-from libcpp cimport bool
 cimport openmp
 cimport cython
-from cython.parallel import prange, parallel
+from cython.parallel import prange
 from libc.math cimport fabs, pow, log, sqrt
 cimport numpy as np
 from libcpp.vector cimport vector
 from libcpp.map cimport map as cppmap
 from libcpp.string cimport string
-from libc.stdint cimport uintptr_t, uint32_t, int8_t, uint8_t, int64_t
+from libc.stdint cimport uint32_t, uint8_t, int64_t
 
-from .ckdtree.ckdtree cimport cKDTree, ckdtree, query_ball_point
+from .ckdtree.ckdtree cimport cKDTree, query_ball_point
 from . cimport utils
 from .constants import FEATURE_NAMES
 
+cdef double INFINITY
+INFINITY = np.inf
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
 @cython.cdivision(True)
 def compute_features(
     double [:, ::1] points,
-    float search_radius,
+    double search_radius,
+    double max_graph_edge_length=INFINITY,
     *,
     cKDTree kdtree=None,
     int num_threads=-1,
@@ -54,10 +58,16 @@ def compute_features(
         float [:, :] features = np.full((n_points, len(feature_names)), float("NaN"), dtype=np.float32)
 
         const np.float64_t[:, ::1] radius_vector
-        np.float64_t p = 2 if euclidean_distance else 1
-        np.float64_t eps_scipy = 0.0
+        np.float64_t p = <np.float64_t> float(2 if euclidean_distance else 1)
+        np.float64_t eps_scipy = <np.float64_t> float(0)
         vector[np.intp_t] *** threaded_vvres
-        int return_length = <int> False
+        int return_length = int(False)
+
+        bint compute_graph_distance
+        unsigned int start_node_id, max_edge_weight_count, neighbor_point_id_offset, row, column, coordinate_index, edge_weight_id, threshold_length, path_index
+        double edge_weight
+        double [:, ::1] edge_weights, edge_vector, shortest_edge_weights, new_array
+        unsigned int [:, ::1] over_threshold_indices
 
     if not points.shape[1] == 3:
         raise ValueError("You must provide an (n x 3) numpy array.")
@@ -78,11 +88,27 @@ def compute_features(
     eigenvectors = np.zeros([3, 3 * num_threads], dtype=np.float64, order="F")
     eigenvalues = np.zeros(3 * num_threads, dtype=np.float64)
 
+    compute_graph_distance = <bint> (max_graph_edge_length < search_radius)
+
+    if compute_graph_distance:
+        # max edge weight count equals (n²-n)/2 (similar to nth triangular number), where n is the number of points, because
+        # n² : every node has an edge with every node
+        # -n : edge between point a-a, b-b et cetera is always zero and redundant
+        # /2 : only one weight per edge, undirected graph a->b == b<-a
+        max_edge_weight_count = (max_k_neighbors * max_k_neighbors - max_k_neighbors) // 2
+
+        edge_weights = np.empty((num_threads, max_edge_weight_count), dtype=np.float64)
+        edge_vector = np.empty((num_threads, 3), dtype=np.float64)
+        shortest_edge_weights = np.empty((num_threads, max_k_neighbors), dtype=np.float64)
+        over_threshold_indices = np.empty((num_threads, max_k_neighbors), dtype=np.uint32)
+        new_array = np.empty((num_threads, max_k_neighbors), dtype=np.float64) # TODO Find better name
+
     threaded_vvres = init_result_vectors(num_threads)
 
     try:
         for i in prange(n_points, nogil=True, num_threads=num_threads):
             thread_id = openmp.omp_get_thread_num()
+            neighbor_point_id_offset = thread_id * max_k_neighbors
 
             threaded_vvres[thread_id][0].clear()
             query_ball_point(
@@ -107,10 +133,60 @@ def compute_features(
             for j in range(n_neighbors_at_id):
                 neighbor_id = threaded_vvres[thread_id][0][0][j]
                 for k in range(3):
-                    neighbor_points[k, thread_id * max_k_neighbors + j] = kdtree.cself.raw_data[neighbor_id * 3 + k]
+                    neighbor_points[k, neighbor_point_id_offset + j] = kdtree.cself.raw_data[neighbor_id * 3 + k]
+
+            if compute_graph_distance:
+                for row in range(n_neighbors_at_id):
+                    for column in range(n_neighbors_at_id):
+
+                        if column >= row:
+                            continue
+
+                        for coordinate_index in range(3):
+                            edge_vector[thread_id, coordinate_index] = (neighbor_points[coordinate_index, neighbor_point_id_offset + row]
+                                                                        - neighbor_points[coordinate_index, neighbor_point_id_offset + column])
+
+                        if euclidean_distance:
+                            edge_weight = sqrt(edge_vector[thread_id, 0] * edge_vector[thread_id, 0]
+                                                + edge_vector[thread_id, 1] * edge_vector[thread_id, 1]
+                                                + edge_vector[thread_id, 2] * edge_vector[thread_id, 2])
+                        else: # Manhattan distance.
+                            edge_weight = (fabs(edge_vector[thread_id, 0])
+                                            + fabs(edge_vector[thread_id, 1])
+                                            + fabs(edge_vector[thread_id, 2]))
+
+                        if edge_weight > max_graph_edge_length:
+                            edge_weight = INFINITY
+
+                        edge_weight_id = (row * (row + 1)) // 2 + (column - row) # sum of an arithmetic series        
+                        edge_weights[thread_id, edge_weight_id] = edge_weight
+
+                # TODO This is weird. Would be great if [0] is the start node in the first place.
+                for start_node_id in range(n_neighbors_at_id):
+                    if (neighbor_points[0, neighbor_point_id_offset + start_node_id] == points[i, 0] and
+                        neighbor_points[1, neighbor_point_id_offset + start_node_id] == points[i, 1] and
+                        neighbor_points[2, neighbor_point_id_offset + start_node_id] == points[i, 2]):
+                        break
+
+                shortest_edge_weights[thread_id, :n_neighbors_at_id] = dijkstra_all_shortest_edge_paths_weights(start_node_id, edge_weights[thread_id, :], n_neighbors_at_id)
+
+                threshold_length = 0
+                for path_index in range(n_neighbors_at_id):
+                    if shortest_edge_weights[thread_id, path_index] > search_radius:
+                        over_threshold_indices[thread_id, threshold_length] = path_index
+                        threshold_length = threshold_length + 1
+
+                n_neighbors_at_id = n_neighbors_at_id - threshold_length
+                number_of_neighbors = number_of_neighbors - threshold_length
+
+                for k in range(3):
+                    neighbor_points[k, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id] = move_to_end(
+                        new_array[thread_id, :n_neighbors_at_id],
+                        neighbor_points[k, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id + threshold_length],
+                        over_threshold_indices[thread_id, :threshold_length])
 
             utils.c_covariance(
-                neighbor_points[:, thread_id * max_k_neighbors:thread_id * max_k_neighbors + n_neighbors_at_id],
+                neighbor_points[:, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id],
                 eigenvectors[:, thread_id * 3:(thread_id + 1) * 3],
             )
             utils.c_eigenvectors(
@@ -129,7 +205,87 @@ def compute_features(
     finally:
         free_result_vectors(threaded_vvres, num_threads)
 
-    return np.asarray(features)
+    return np.array(features)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+@cython.cdivision(True)
+cdef inline double[:] move_to_end(double[:] new_array, double[:] array, unsigned int[:] indices_to_move) nogil:
+    cdef bint move
+    cdef unsigned int array_length, indices_length, new_array_index
+    array_length = array.shape[0]
+    indices_length = indices_to_move.shape[0]
+    new_array_index = 0
+
+    for array_index in range(array_length):
+
+        move = <bint> False
+        for indices_index in range(indices_length):
+            if array_index == indices_to_move[indices_index]:
+                move = <bint> True
+                break
+
+        if not move:
+            new_array[new_array_index] = array[array_index]
+            new_array_index = new_array_index + 1
+
+    return new_array
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+@cython.cdivision(True)
+cdef inline double[:] dijkstra_all_shortest_edge_paths_weights(unsigned int start_node_id, double[:] edge_weights, unsigned int node_count) nogil:
+    cdef double* shortest_edges_weights
+    shortest_edges_weights = <double*> malloc(<size_t>(node_count * sizeof(double)))
+    cdef bint* queue
+    queue = <bint*> malloc(<size_t>(node_count * sizeof(bint)))
+    cdef unsigned int queue_size
+    queue_size = node_count
+
+    cdef double candidate_weight, candidate_distance
+    cdef unsigned int node_id, queue_node_id, candidate_node_id, row, column, edge_weight_id, row_index
+
+    for node_id in range(node_count):
+        shortest_edges_weights[node_id] = INFINITY
+        queue[node_id] = True
+    shortest_edges_weights[start_node_id] = 0
+
+    while queue_size > 0:
+
+        min_distance = INFINITY
+        for node_id in range(node_count):
+            candidate_distance = shortest_edges_weights[node_id]
+            if queue[node_id] and candidate_distance <= min_distance:
+                queue_node_id = node_id
+                min_distance = candidate_distance
+
+        queue[queue_node_id] = False
+        queue_size -= 1
+
+        for candidate_node_id in range(node_count):
+            if queue[candidate_node_id] and queue_node_id != candidate_node_id:
+
+                row = max(candidate_node_id, queue_node_id)
+                column = min(candidate_node_id, queue_node_id)
+                edge_weight_id = (row * (row + 1)) // 2 + (column - row) # sum of an arithmetic series
+
+                candidate_weight = shortest_edges_weights[queue_node_id] + edge_weights[edge_weight_id]
+                if candidate_weight < shortest_edges_weights[candidate_node_id]:
+                    shortest_edges_weights[candidate_node_id] = candidate_weight
+
+    free(queue)
+
+    # Reusing the allocated memory.
+    # TODO Check whether this is a good 'Cythonic' practice.
+    for i in range(node_count):
+        edge_weights[i] = shortest_edges_weights[i]
+    free(shortest_edges_weights)
+    return edge_weights[:node_count]
+
 
 @cython.boundscheck(False)
 @cython.wraparound(False)

diff --git a/jakteristics/main.py b/jakteristics/main.py
@@ -12,6 +12,7 @@
 def compute_features(
     points: np.ndarray,
     search_radius: float,
+    max_graph_edge_length: float = np.inf,
     *,
     kdtree: ckdtree.cKDTree = None,
     num_threads: int = -1,
@@ -28,6 +29,12 @@ def compute_features(
             A contiguous (n, 3) array of xyz coordinates to query.
         search_radius:
             The radius to query neighbors at each point.
+        max_graph_edge_length:
+            The maximum single edge length for computing the geodesic graph distance.
+            The geodesic graph distance will only be calculated if
+            `max_graph_edge_length < search_radius`; otherwise only "classic" Euclidean
+            or Manhattan distance is used for the neighborhood search (can be changed with
+            parameter `euclidean_distance`).
         kdtree:
             If None, the kdtree is computed from the list of points.
             Must be an instance of `jakteristics.cKDTree`
@@ -71,6 +78,7 @@ def compute_features(
     return jakteristics.extension.compute_features(
         points,
         search_radius,
+        max_graph_edge_length=max_graph_edge_length,
         kdtree=kdtree,
         num_threads=num_threads,
         max_k_neighbors=max_k_neighbors,

diff --git a/tests/test_features.py b/tests/test_features.py
@@ -79,6 +79,17 @@ def test_compute_features():
     assert features.shape == (n_points, len(FEATURE_NAMES))
 
 
+def test_compute_features_graph_geodesic():
+    n_points = 1000
+    points = np.random.random((n_points, 3)) * 10
+
+    features = extension.compute_features(
+        points, 0.15, max_graph_edge_length=0.5, feature_names=FEATURE_NAMES
+    )
+
+    assert features.shape == (n_points, len(FEATURE_NAMES))
+
+
 def test_compute_some_features():
     input_path = data_dir / "test_0.02_seconde.las"
     xyz = las_utils.read_las_xyz(input_path)