From ac0366c456d07325a95c6334b2fc6380b82e669b Mon Sep 17 00:00:00 2001 From: Charles-Meldhine Madi Mnemoi <63333367+cmnemoi@users.noreply.github.com> Date: Sun, 6 Aug 2023 03:04:53 +0200 Subject: [PATCH] feat: Add Linear Regression (#4) * style: fix linter * test: add tests * feat: add LinearRegression .fit and .predict method * chore: bump version to 0.2 * refactor: put fixtures parameters into constants * fix: add a bias column to the LinearRegression model * style: apply linter fixes --- cmnemoi_learn/linear_regression.py | 31 ++++++++++++--- mypy.ini | 2 + pyproject.toml | 2 +- tests/__init__.py | 0 tests/conftest.py | 63 ++++++++++++++++++++++++++++++ tests/test_linear_regression.py | 55 ++++++++++++++++++++++++-- 6 files changed, 143 insertions(+), 10 deletions(-) create mode 100644 mypy.ini create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py diff --git a/cmnemoi_learn/linear_regression.py b/cmnemoi_learn/linear_regression.py index cb15b16..83f2bdc 100644 --- a/cmnemoi_learn/linear_regression.py +++ b/cmnemoi_learn/linear_regression.py @@ -4,6 +4,7 @@ from typing import Self import numpy as np +from numpy.linalg import inv class LinearRegression: @@ -15,9 +16,13 @@ class LinearRegression: def __init__(self) -> None: self.X = np.array([]) self.y = np.array([]) + self.theta = np.array([]) def fit(self, X: np.ndarray, y: np.ndarray) -> Self: - """Fit the Linear Regression model + """Fit the Linear Regression model with normal equations solution. + + The optimal parameters `theta` of the model are the ones which minimize + Residuals Sum of Squares : `RSS = Sum(y - X.theta)**2`. Args: X (np.ndarray): Inputs @@ -26,12 +31,13 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> Self: Returns: LinearRegression: Fitted Linear Regression model. """ - self.X = X + self.X = self._get_inputs_with_bias_column(X) self.y = y + self.theta = inv(self.X.T @ self.X) @ (self.X.T @ self.y) return self - def predict(self, X: np.ndarray) -> Self: - """Predict new values with the Linear Regression model for the inputs given on arguments. + def predict(self, X: np.ndarray) -> np.ndarray: + """Predict new values with the Linear Regression model for the inputs given on argument. Args: X (np.ndarray): New inputs on which to predict. @@ -39,5 +45,18 @@ def predict(self, X: np.ndarray) -> Self: Returns: LinearRegression: Linear Regression model used to predict. """ - print(X) - return self + X = self._get_inputs_with_bias_column(X) + return X @ self.theta + + def _get_inputs_with_bias_column(self, X: np.ndarray) -> np.ndarray: + """Returns the inputs `X` with a `1`-filled bias column. + + Args: + X (np.ndarray): Model inputs + + Returns: + np.ndarray: New inputs with a bias column. + """ + number_of_rows = X.shape[0] + bias_column = np.ones((number_of_rows, 1)) + return np.hstack((bias_column, X)) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..5d5a0ca --- /dev/null +++ b/mypy.ini @@ -0,0 +1,2 @@ +[mypy-sklearn.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 41f4b30..29d9a90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cmnemoi-learn" -version = "0.1.0" +version = "0.2.0" description = "Machine Learning from scratch by Charles-Meldhine Madi Mnemoi" authors = ["Charles-Meldhine Madi Mnemoi "] license = "MIT" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6be2d49 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,63 @@ +""" +Fixtures for unit tests +""" + +import numpy as np +from sklearn.datasets import make_regression, make_circles +import pytest + +BIAS = 5 +NOISE = 2 +NUMBER_OF_FEATURES = 2 +NUMBER_OF_SAMPLES = 50 +RANDOM_STATE = 42 + + +@pytest.fixture +def regression_circle_dataset() -> np.ndarray: + """Regression dataset which follows circles pattern + `X, y = regression_circle_dataset` to use + + Returns: + np.ndarray: The dataset + """ + return make_circles( + n_samples=NUMBER_OF_SAMPLES, shuffle=False, random_state=RANDOM_STATE + ) + + +@pytest.fixture +def regression_linear_dataset() -> np.ndarray: + """Regression dataset which follows a linear pattern + `X, y = regression_circle_dataset` to use + + Returns: + np.ndarray: The dataset + """ + return make_regression( + n_samples=NUMBER_OF_SAMPLES, + n_features=NUMBER_OF_FEATURES, + n_informative=NUMBER_OF_FEATURES, + bias=BIAS, + shuffle=False, + random_state=RANDOM_STATE, + ) + + +@pytest.fixture +def regression_linear_dataset_with_noise() -> np.ndarray: + """Regression dataset which follows a linear pattern + `X, y = regression_circle_dataset` to use + + Returns: + np.ndarray: The dataset + """ + return make_regression( + n_samples=NUMBER_OF_SAMPLES, + n_features=NUMBER_OF_FEATURES, + n_informative=NUMBER_OF_FEATURES, + bias=BIAS, + noise=NOISE, + shuffle=False, + random_state=RANDOM_STATE, + ) diff --git a/tests/test_linear_regression.py b/tests/test_linear_regression.py index cc00abb..70cf2f9 100644 --- a/tests/test_linear_regression.py +++ b/tests/test_linear_regression.py @@ -1,12 +1,61 @@ """ Unit tests for Linear Regression model """ +import numpy as np + +from sklearn.linear_model import LinearRegression as SklearnLinearRegression from cmnemoi_learn.linear_regression import LinearRegression +np.random.seed(42) + + +def test_linear_predict(regression_linear_dataset) -> None: + """ + Test `predict` against sklearn implementation. + """ + X, y = regression_linear_dataset + cmnemoi_model = LinearRegression() + cmnemoi_model = cmnemoi_model.fit(X, y) + + sklearn_model = SklearnLinearRegression() + sklearn_model = sklearn_model.fit(X, y) + + cmnemoi_prediction = cmnemoi_model.predict(X) + sklearn_prediction = sklearn_model.predict(X) + + assert np.allclose(cmnemoi_prediction, sklearn_prediction) -def test_predict() -> None: + +def test_linear_with_noise_predict(regression_linear_dataset_with_noise) -> None: """ - Test `predict` againt sklearn implementation. + Test `predict` against sklearn implementation. """ - LinearRegression() + X, y = regression_linear_dataset_with_noise + cmnemoi_model = LinearRegression() + cmnemoi_model = cmnemoi_model.fit(X, y) + + sklearn_model = SklearnLinearRegression() + sklearn_model = sklearn_model.fit(X, y) + + cmnemoi_prediction = cmnemoi_model.predict(X) + sklearn_prediction = sklearn_model.predict(X) + + assert np.allclose(cmnemoi_prediction, sklearn_prediction) + + +def test_circle_predict(regression_circle_dataset) -> None: + """ + Test `predict` against sklearn implementation. + """ + X, y = regression_circle_dataset + cmnemoi_model = LinearRegression() + cmnemoi_model = cmnemoi_model.fit(X, y) + + sklearn_model = SklearnLinearRegression() + sklearn_model = sklearn_model.fit(X, y) + + cmnemoi_prediction = cmnemoi_model.predict(X) + sklearn_prediction = sklearn_model.predict(X) + + assert np.allclose(cmnemoi_prediction, sklearn_prediction)