Skip to content

Commit

Permalink
feat: Add support for euclidian distance for KNNclassifier
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnemoi committed Nov 14, 2023
1 parent 82a8f31 commit b4def1f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
14 changes: 10 additions & 4 deletions cmnemoi_learn/classification/_knn_classifier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
File defining a K-Nearest Neighbors classifier model.
"""
from typing import Self
from typing import Callable, Self
import numpy as np

from ._abstract_classifier import AbstractClassifier
Expand All @@ -19,16 +19,22 @@ class KNNClassifier(AbstractClassifier):
a specific norm (L1, L2,...) and returns
the majority label as prediction.
(For the moment only the L2 norm, ie. Euclidian distance, is available)
For the moment only L1 and L2 norms are available,
via the `metrics` module.
Args:
k (int): The number of neighbors to evaluate.
distance (Callable): The function to use to compute the distance
between points. By default: Manhattan distance
"""

def __init__(self, k: int) -> None:
def __init__(
self, k: int, distance: Callable[[np.ndarray, np.ndarray], float] = manhattan_distance
) -> None:
super().__init__()
self.k = k
self.dataset = np.array([])
self.distance = distance

def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
self.dataset = np.concatenate([X, self._reshape_ndarray(y)], axis=1)
Expand All @@ -40,7 +46,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
distances_between_input_and_dataset = np.full((nb_rows, self.dataset.shape[0]), np.inf)
for i, x_i in enumerate(X):
for j, x_j in enumerate(self.dataset[:, :max_column]):
distances_between_input_and_dataset[i, j] = manhattan_distance(x_i, x_j)
distances_between_input_and_dataset[i, j] = self.distance(x_i, x_j)

# get K nearest neighbors and associate them their labels
nearest_neighbor_indexes = np.full((nb_rows, self.k), 0)
Expand Down
27 changes: 22 additions & 5 deletions tests/classification/test_knn_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
np.random.seed(RANDOM_STATE)


@pytest.mark.parametrize("k", [1, 3, 10])
@pytest.mark.parametrize("k", [1, 3, 7])
def test_predict_moons_dataset(classification_moons_dataset: np.ndarray, k: int) -> None:
"""
Test `predict` on a circle pattern dataset.
Expand All @@ -33,7 +33,7 @@ def test_predict_moons_dataset(classification_moons_dataset: np.ndarray, k: int)
assert np.array_equal(cmnemoi_prediction, sklearn_prediction)


@pytest.mark.parametrize("k", [1, 3, 10])
@pytest.mark.parametrize("k", [1, 3, 7])
def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: int) -> None:
"""
Test `predict` on a linearly separable dataset.
Expand All @@ -42,7 +42,7 @@ def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: in
cmnemoi_model = KNNClassifier(k=k)
cmnemoi_model = cmnemoi_model.fit(X, y)

sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=1)
sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=1) # p = 1 for Manhattan distance
sklearn_model = sklearn_model.fit(X, y)

cmnemoi_prediction = cmnemoi_model.predict(X)
Expand All @@ -51,7 +51,7 @@ def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: in
assert np.array_equal(cmnemoi_prediction, sklearn_prediction)


@pytest.mark.parametrize("k", [1, 3, 10])
@pytest.mark.parametrize("k", [1, 3, 7])
def test_predict_iris_dataset(k: int) -> None:
"""
Test `predict` on Iris dataset.
Expand All @@ -68,8 +68,25 @@ def test_predict_iris_dataset(k: int) -> None:

assert np.array_equal(cmnemoi_prediction, sklearn_prediction)

@pytest.mark.parametrize("k", [1, 3, 7])
def test_predict_with_l2_norm(k: int) -> None:
"""
Test `predict` on Iris dataset.
"""
X, y = load_iris(return_X_y=True)
cmnemoi_model = KNNClassifier(k=k)
cmnemoi_model = cmnemoi_model.fit(X, y)

sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=2)
sklearn_model = sklearn_model.fit(X, y)

cmnemoi_prediction = cmnemoi_model.predict(X)
sklearn_prediction = sklearn_model.predict(X)

assert np.array_equal(cmnemoi_prediction, sklearn_prediction)


@pytest.mark.parametrize("k", [1, 3, 10])
@pytest.mark.parametrize("k", [1, 3, 7])
def test_score(classification_linear_dataset: np.ndarray, k: int) -> None:
"""Test `score` method against sklearn implementation.
Expand Down

0 comments on commit b4def1f

Please sign in to comment.