diff --git a/cmnemoi_learn/classification/_knn_classifier.py b/cmnemoi_learn/classification/_knn_classifier.py index 2a2d3ad..19e7352 100644 --- a/cmnemoi_learn/classification/_knn_classifier.py +++ b/cmnemoi_learn/classification/_knn_classifier.py @@ -1,7 +1,7 @@ """ File defining a K-Nearest Neighbors classifier model. """ -from typing import Self +from typing import Callable, Self import numpy as np from ._abstract_classifier import AbstractClassifier @@ -19,16 +19,22 @@ class KNNClassifier(AbstractClassifier): a specific norm (L1, L2,...) and returns the majority label as prediction. - (For the moment only the L2 norm, ie. Euclidian distance, is available) + For the moment only L1 and L2 norms are available, + via the `metrics` module. Args: k (int): The number of neighbors to evaluate. + distance (Callable): The function to use to compute the distance + between points. By default: Manhattan distance """ - def __init__(self, k: int) -> None: + def __init__( + self, k: int, distance: Callable[[np.ndarray, np.ndarray], float] = manhattan_distance + ) -> None: super().__init__() self.k = k self.dataset = np.array([]) + self.distance = distance def fit(self, X: np.ndarray, y: np.ndarray) -> Self: self.dataset = np.concatenate([X, self._reshape_ndarray(y)], axis=1) @@ -40,7 +46,7 @@ def predict(self, X: np.ndarray) -> np.ndarray: distances_between_input_and_dataset = np.full((nb_rows, self.dataset.shape[0]), np.inf) for i, x_i in enumerate(X): for j, x_j in enumerate(self.dataset[:, :max_column]): - distances_between_input_and_dataset[i, j] = manhattan_distance(x_i, x_j) + distances_between_input_and_dataset[i, j] = self.distance(x_i, x_j) # get K nearest neighbors and associate them their labels nearest_neighbor_indexes = np.full((nb_rows, self.k), 0) diff --git a/tests/classification/test_knn_classifier.py b/tests/classification/test_knn_classifier.py index 51cd44e..0ad6dca 100644 --- a/tests/classification/test_knn_classifier.py +++ b/tests/classification/test_knn_classifier.py @@ -15,7 +15,7 @@ np.random.seed(RANDOM_STATE) -@pytest.mark.parametrize("k", [1, 3, 10]) +@pytest.mark.parametrize("k", [1, 3, 7]) def test_predict_moons_dataset(classification_moons_dataset: np.ndarray, k: int) -> None: """ Test `predict` on a circle pattern dataset. @@ -33,7 +33,7 @@ def test_predict_moons_dataset(classification_moons_dataset: np.ndarray, k: int) assert np.array_equal(cmnemoi_prediction, sklearn_prediction) -@pytest.mark.parametrize("k", [1, 3, 10]) +@pytest.mark.parametrize("k", [1, 3, 7]) def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: int) -> None: """ Test `predict` on a linearly separable dataset. @@ -42,7 +42,7 @@ def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: in cmnemoi_model = KNNClassifier(k=k) cmnemoi_model = cmnemoi_model.fit(X, y) - sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=1) + sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=1) # p = 1 for Manhattan distance sklearn_model = sklearn_model.fit(X, y) cmnemoi_prediction = cmnemoi_model.predict(X) @@ -51,7 +51,7 @@ def test_predict_linear_dataset(classification_linear_dataset: np.ndarray, k: in assert np.array_equal(cmnemoi_prediction, sklearn_prediction) -@pytest.mark.parametrize("k", [1, 3, 10]) +@pytest.mark.parametrize("k", [1, 3, 7]) def test_predict_iris_dataset(k: int) -> None: """ Test `predict` on Iris dataset. @@ -68,8 +68,25 @@ def test_predict_iris_dataset(k: int) -> None: assert np.array_equal(cmnemoi_prediction, sklearn_prediction) +@pytest.mark.parametrize("k", [1, 3, 7]) +def test_predict_with_l2_norm(k: int) -> None: + """ + Test `predict` on Iris dataset. + """ + X, y = load_iris(return_X_y=True) + cmnemoi_model = KNNClassifier(k=k) + cmnemoi_model = cmnemoi_model.fit(X, y) + + sklearn_model = SklearnKNNClassifier(n_neighbors=k, p=2) + sklearn_model = sklearn_model.fit(X, y) + + cmnemoi_prediction = cmnemoi_model.predict(X) + sklearn_prediction = sklearn_model.predict(X) + + assert np.array_equal(cmnemoi_prediction, sklearn_prediction) + -@pytest.mark.parametrize("k", [1, 3, 10]) +@pytest.mark.parametrize("k", [1, 3, 7]) def test_score(classification_linear_dataset: np.ndarray, k: int) -> None: """Test `score` method against sklearn implementation.