Skip to content

Commit

Permalink
feat: Add Linear Regression (#4)
Browse files Browse the repository at this point in the history
* style: fix linter

* test: add tests

* feat: add LinearRegression .fit and .predict method

* chore: bump version to 0.2

* refactor: put fixtures parameters into constants

* fix: add a bias column to the LinearRegression model

* style: apply linter fixes
  • Loading branch information
cmnemoi authored Aug 6, 2023
1 parent 1b0b237 commit ac0366c
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 10 deletions.
31 changes: 25 additions & 6 deletions cmnemoi_learn/linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import Self
import numpy as np
from numpy.linalg import inv


class LinearRegression:
Expand All @@ -15,9 +16,13 @@ class LinearRegression:
def __init__(self) -> None:
self.X = np.array([])
self.y = np.array([])
self.theta = np.array([])

def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
"""Fit the Linear Regression model
"""Fit the Linear Regression model with normal equations solution.
The optimal parameters `theta` of the model are the ones which minimize
Residuals Sum of Squares : `RSS = Sum(y - X.theta)**2`.
Args:
X (np.ndarray): Inputs
Expand All @@ -26,18 +31,32 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> Self:
Returns:
LinearRegression: Fitted Linear Regression model.
"""
self.X = X
self.X = self._get_inputs_with_bias_column(X)
self.y = y
self.theta = inv(self.X.T @ self.X) @ (self.X.T @ self.y)
return self

def predict(self, X: np.ndarray) -> Self:
"""Predict new values with the Linear Regression model for the inputs given on arguments.
def predict(self, X: np.ndarray) -> np.ndarray:
"""Predict new values with the Linear Regression model for the inputs given on argument.
Args:
X (np.ndarray): New inputs on which to predict.
Returns:
LinearRegression: Linear Regression model used to predict.
"""
print(X)
return self
X = self._get_inputs_with_bias_column(X)
return X @ self.theta

def _get_inputs_with_bias_column(self, X: np.ndarray) -> np.ndarray:
"""Returns the inputs `X` with a `1`-filled bias column.
Args:
X (np.ndarray): Model inputs
Returns:
np.ndarray: New inputs with a bias column.
"""
number_of_rows = X.shape[0]
bias_column = np.ones((number_of_rows, 1))
return np.hstack((bias_column, X))
2 changes: 2 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[mypy-sklearn.*]
ignore_missing_imports = True
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cmnemoi-learn"
version = "0.1.0"
version = "0.2.0"
description = "Machine Learning from scratch by Charles-Meldhine Madi Mnemoi"
authors = ["Charles-Meldhine Madi Mnemoi <[email protected]>"]
license = "MIT"
Expand Down
Empty file added tests/__init__.py
Empty file.
63 changes: 63 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Fixtures for unit tests
"""

import numpy as np
from sklearn.datasets import make_regression, make_circles
import pytest

BIAS = 5
NOISE = 2
NUMBER_OF_FEATURES = 2
NUMBER_OF_SAMPLES = 50
RANDOM_STATE = 42


@pytest.fixture
def regression_circle_dataset() -> np.ndarray:
"""Regression dataset which follows circles pattern
`X, y = regression_circle_dataset` to use
Returns:
np.ndarray: The dataset
"""
return make_circles(
n_samples=NUMBER_OF_SAMPLES, shuffle=False, random_state=RANDOM_STATE
)


@pytest.fixture
def regression_linear_dataset() -> np.ndarray:
"""Regression dataset which follows a linear pattern
`X, y = regression_circle_dataset` to use
Returns:
np.ndarray: The dataset
"""
return make_regression(
n_samples=NUMBER_OF_SAMPLES,
n_features=NUMBER_OF_FEATURES,
n_informative=NUMBER_OF_FEATURES,
bias=BIAS,
shuffle=False,
random_state=RANDOM_STATE,
)


@pytest.fixture
def regression_linear_dataset_with_noise() -> np.ndarray:
"""Regression dataset which follows a linear pattern
`X, y = regression_circle_dataset` to use
Returns:
np.ndarray: The dataset
"""
return make_regression(
n_samples=NUMBER_OF_SAMPLES,
n_features=NUMBER_OF_FEATURES,
n_informative=NUMBER_OF_FEATURES,
bias=BIAS,
noise=NOISE,
shuffle=False,
random_state=RANDOM_STATE,
)
55 changes: 52 additions & 3 deletions tests/test_linear_regression.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,61 @@
"""
Unit tests for Linear Regression model
"""
import numpy as np

from sklearn.linear_model import LinearRegression as SklearnLinearRegression

from cmnemoi_learn.linear_regression import LinearRegression

np.random.seed(42)


def test_linear_predict(regression_linear_dataset) -> None:
"""
Test `predict` against sklearn implementation.
"""
X, y = regression_linear_dataset
cmnemoi_model = LinearRegression()
cmnemoi_model = cmnemoi_model.fit(X, y)

sklearn_model = SklearnLinearRegression()
sklearn_model = sklearn_model.fit(X, y)

cmnemoi_prediction = cmnemoi_model.predict(X)
sklearn_prediction = sklearn_model.predict(X)

assert np.allclose(cmnemoi_prediction, sklearn_prediction)

def test_predict() -> None:

def test_linear_with_noise_predict(regression_linear_dataset_with_noise) -> None:
"""
Test `predict` againt sklearn implementation.
Test `predict` against sklearn implementation.
"""
LinearRegression()
X, y = regression_linear_dataset_with_noise
cmnemoi_model = LinearRegression()
cmnemoi_model = cmnemoi_model.fit(X, y)

sklearn_model = SklearnLinearRegression()
sklearn_model = sklearn_model.fit(X, y)

cmnemoi_prediction = cmnemoi_model.predict(X)
sklearn_prediction = sklearn_model.predict(X)

assert np.allclose(cmnemoi_prediction, sklearn_prediction)


def test_circle_predict(regression_circle_dataset) -> None:
"""
Test `predict` against sklearn implementation.
"""
X, y = regression_circle_dataset
cmnemoi_model = LinearRegression()
cmnemoi_model = cmnemoi_model.fit(X, y)

sklearn_model = SklearnLinearRegression()
sklearn_model = sklearn_model.fit(X, y)

cmnemoi_prediction = cmnemoi_model.predict(X)
sklearn_prediction = sklearn_model.predict(X)

assert np.allclose(cmnemoi_prediction, sklearn_prediction)

0 comments on commit ac0366c

Please sign in to comment.