Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

geodesic graph distance #11

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
180 changes: 168 additions & 12 deletions jakteristics/extension.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,35 @@
import numpy as np
import multiprocessing

from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.stdlib cimport malloc, free # Is needed to ensure thread-safety.
from libc.string cimport memset
from libcpp cimport bool
cimport openmp
cimport cython
from cython.parallel import prange, parallel
from cython.parallel import prange
from libc.math cimport fabs, pow, log, sqrt
cimport numpy as np
from libcpp.vector cimport vector
from libcpp.map cimport map as cppmap
from libcpp.string cimport string
from libc.stdint cimport uintptr_t, uint32_t, int8_t, uint8_t, int64_t
from libc.stdint cimport uint32_t, uint8_t, int64_t

from .ckdtree.ckdtree cimport cKDTree, ckdtree, query_ball_point
from .ckdtree.ckdtree cimport cKDTree, query_ball_point
from . cimport utils
from .constants import FEATURE_NAMES

cdef double INFINITY
INFINITY = np.inf


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
@cython.cdivision(True)
def compute_features(
double [:, ::1] points,
float search_radius,
double search_radius,
double max_graph_edge_length=INFINITY,
*,
cKDTree kdtree=None,
int num_threads=-1,
Expand All @@ -54,10 +58,16 @@ def compute_features(
float [:, :] features = np.full((n_points, len(feature_names)), float("NaN"), dtype=np.float32)

const np.float64_t[:, ::1] radius_vector
np.float64_t p = 2 if euclidean_distance else 1
np.float64_t eps_scipy = 0.0
np.float64_t p = <np.float64_t> float(2 if euclidean_distance else 1)
np.float64_t eps_scipy = <np.float64_t> float(0)
vector[np.intp_t] *** threaded_vvres
int return_length = <int> False
int return_length = int(False)

bint compute_graph_distance
unsigned int start_node_id, max_edge_weight_count, neighbor_point_id_offset, row, column, coordinate_index, edge_weight_id, threshold_length, path_index
double edge_weight
double [:, ::1] edge_weights, edge_vector, shortest_edge_weights, new_array
unsigned int [:, ::1] over_threshold_indices

if not points.shape[1] == 3:
raise ValueError("You must provide an (n x 3) numpy array.")
Expand All @@ -78,11 +88,27 @@ def compute_features(
eigenvectors = np.zeros([3, 3 * num_threads], dtype=np.float64, order="F")
eigenvalues = np.zeros(3 * num_threads, dtype=np.float64)

compute_graph_distance = <bint> (max_graph_edge_length < search_radius)

if compute_graph_distance:
# max edge weight count equals (n²-n)/2 (similar to nth triangular number), where n is the number of points, because
# n² : every node has an edge with every node
# -n : edge between point a-a, b-b et cetera is always zero and redundant
# /2 : only one weight per edge, undirected graph a->b == b<-a
max_edge_weight_count = (max_k_neighbors * max_k_neighbors - max_k_neighbors) // 2

edge_weights = np.empty((num_threads, max_edge_weight_count), dtype=np.float64)
edge_vector = np.empty((num_threads, 3), dtype=np.float64)
shortest_edge_weights = np.empty((num_threads, max_k_neighbors), dtype=np.float64)
over_threshold_indices = np.empty((num_threads, max_k_neighbors), dtype=np.uint32)
new_array = np.empty((num_threads, max_k_neighbors), dtype=np.float64) # TODO Find better name

threaded_vvres = init_result_vectors(num_threads)

try:
for i in prange(n_points, nogil=True, num_threads=num_threads):
thread_id = openmp.omp_get_thread_num()
neighbor_point_id_offset = thread_id * max_k_neighbors

threaded_vvres[thread_id][0].clear()
query_ball_point(
Expand All @@ -107,10 +133,60 @@ def compute_features(
for j in range(n_neighbors_at_id):
neighbor_id = threaded_vvres[thread_id][0][0][j]
for k in range(3):
neighbor_points[k, thread_id * max_k_neighbors + j] = kdtree.cself.raw_data[neighbor_id * 3 + k]
neighbor_points[k, neighbor_point_id_offset + j] = kdtree.cself.raw_data[neighbor_id * 3 + k]

if compute_graph_distance:
for row in range(n_neighbors_at_id):
for column in range(n_neighbors_at_id):

if column >= row:
continue

for coordinate_index in range(3):
edge_vector[thread_id, coordinate_index] = (neighbor_points[coordinate_index, neighbor_point_id_offset + row]
- neighbor_points[coordinate_index, neighbor_point_id_offset + column])

if euclidean_distance:
edge_weight = sqrt(edge_vector[thread_id, 0] * edge_vector[thread_id, 0]
+ edge_vector[thread_id, 1] * edge_vector[thread_id, 1]
+ edge_vector[thread_id, 2] * edge_vector[thread_id, 2])
else: # Manhattan distance.
edge_weight = (fabs(edge_vector[thread_id, 0])
+ fabs(edge_vector[thread_id, 1])
+ fabs(edge_vector[thread_id, 2]))

if edge_weight > max_graph_edge_length:
edge_weight = INFINITY

edge_weight_id = (row * (row + 1)) // 2 + (column - row) # sum of an arithmetic series
edge_weights[thread_id, edge_weight_id] = edge_weight

# TODO This is weird. Would be great if [0] is the start node in the first place.
for start_node_id in range(n_neighbors_at_id):
if (neighbor_points[0, neighbor_point_id_offset + start_node_id] == points[i, 0] and
neighbor_points[1, neighbor_point_id_offset + start_node_id] == points[i, 1] and
neighbor_points[2, neighbor_point_id_offset + start_node_id] == points[i, 2]):
break

shortest_edge_weights[thread_id, :n_neighbors_at_id] = dijkstra_all_shortest_edge_paths_weights(start_node_id, edge_weights[thread_id, :], n_neighbors_at_id)

threshold_length = 0
for path_index in range(n_neighbors_at_id):
if shortest_edge_weights[thread_id, path_index] > search_radius:
over_threshold_indices[thread_id, threshold_length] = path_index
threshold_length = threshold_length + 1

n_neighbors_at_id = n_neighbors_at_id - threshold_length
number_of_neighbors = number_of_neighbors - threshold_length

for k in range(3):
neighbor_points[k, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id] = move_to_end(
new_array[thread_id, :n_neighbors_at_id],
neighbor_points[k, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id + threshold_length],
over_threshold_indices[thread_id, :threshold_length])

utils.c_covariance(
neighbor_points[:, thread_id * max_k_neighbors:thread_id * max_k_neighbors + n_neighbors_at_id],
neighbor_points[:, neighbor_point_id_offset : neighbor_point_id_offset + n_neighbors_at_id],
eigenvectors[:, thread_id * 3:(thread_id + 1) * 3],
)
utils.c_eigenvectors(
Expand All @@ -129,7 +205,87 @@ def compute_features(
finally:
free_result_vectors(threaded_vvres, num_threads)

return np.asarray(features)
return np.array(features)


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
@cython.cdivision(True)
cdef inline double[:] move_to_end(double[:] new_array, double[:] array, unsigned int[:] indices_to_move) nogil:
cdef bint move
cdef unsigned int array_length, indices_length, new_array_index
array_length = array.shape[0]
indices_length = indices_to_move.shape[0]
new_array_index = 0

for array_index in range(array_length):

move = <bint> False
for indices_index in range(indices_length):
if array_index == indices_to_move[indices_index]:
move = <bint> True
break

if not move:
new_array[new_array_index] = array[array_index]
new_array_index = new_array_index + 1

return new_array


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
@cython.cdivision(True)
cdef inline double[:] dijkstra_all_shortest_edge_paths_weights(unsigned int start_node_id, double[:] edge_weights, unsigned int node_count) nogil:
cdef double* shortest_edges_weights
shortest_edges_weights = <double*> malloc(<size_t>(node_count * sizeof(double)))
cdef bint* queue
queue = <bint*> malloc(<size_t>(node_count * sizeof(bint)))
cdef unsigned int queue_size
queue_size = node_count

cdef double candidate_weight, candidate_distance
cdef unsigned int node_id, queue_node_id, candidate_node_id, row, column, edge_weight_id, row_index

for node_id in range(node_count):
shortest_edges_weights[node_id] = INFINITY
queue[node_id] = True
shortest_edges_weights[start_node_id] = 0

while queue_size > 0:

min_distance = INFINITY
for node_id in range(node_count):
candidate_distance = shortest_edges_weights[node_id]
if queue[node_id] and candidate_distance <= min_distance:
queue_node_id = node_id
min_distance = candidate_distance

queue[queue_node_id] = False
queue_size -= 1

for candidate_node_id in range(node_count):
if queue[candidate_node_id] and queue_node_id != candidate_node_id:

row = max(candidate_node_id, queue_node_id)
column = min(candidate_node_id, queue_node_id)
edge_weight_id = (row * (row + 1)) // 2 + (column - row) # sum of an arithmetic series

candidate_weight = shortest_edges_weights[queue_node_id] + edge_weights[edge_weight_id]
if candidate_weight < shortest_edges_weights[candidate_node_id]:
shortest_edges_weights[candidate_node_id] = candidate_weight

free(queue)

# Reusing the allocated memory.
# TODO Check whether this is a good 'Cythonic' practice.
for i in range(node_count):
edge_weights[i] = shortest_edges_weights[i]
free(shortest_edges_weights)
return edge_weights[:node_count]


@cython.boundscheck(False)
@cython.wraparound(False)
Expand Down
8 changes: 8 additions & 0 deletions jakteristics/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
def compute_features(
points: np.ndarray,
search_radius: float,
max_graph_edge_length: float = np.inf,
*,
kdtree: ckdtree.cKDTree = None,
num_threads: int = -1,
Expand All @@ -28,6 +29,12 @@ def compute_features(
A contiguous (n, 3) array of xyz coordinates to query.
search_radius:
The radius to query neighbors at each point.
max_graph_edge_length:
The maximum single edge length for computing the geodesic graph distance.
The geodesic graph distance will only be calculated if
`max_graph_edge_length < search_radius`; otherwise only "classic" Euclidean
or Manhattan distance is used for the neighborhood search (can be changed with
parameter `euclidean_distance`).
kdtree:
If None, the kdtree is computed from the list of points.
Must be an instance of `jakteristics.cKDTree`
Expand Down Expand Up @@ -71,6 +78,7 @@ def compute_features(
return jakteristics.extension.compute_features(
points,
search_radius,
max_graph_edge_length=max_graph_edge_length,
kdtree=kdtree,
num_threads=num_threads,
max_k_neighbors=max_k_neighbors,
Expand Down
11 changes: 11 additions & 0 deletions tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ def test_compute_features():
assert features.shape == (n_points, len(FEATURE_NAMES))


def test_compute_features_graph_geodesic():
n_points = 1000
points = np.random.random((n_points, 3)) * 10

features = extension.compute_features(
points, 0.15, max_graph_edge_length=0.5, feature_names=FEATURE_NAMES
)

assert features.shape == (n_points, len(FEATURE_NAMES))


def test_compute_some_features():
input_path = data_dir / "test_0.02_seconde.las"
xyz = las_utils.read_las_xyz(input_path)
Expand Down