diff --git a/skfda/misc/scoring.py b/skfda/misc/scoring.py index 7491b92ff..2c1546e1d 100644 --- a/skfda/misc/scoring.py +++ b/skfda/misc/scoring.py @@ -11,7 +11,7 @@ from typing_extensions import Literal, Protocol from .._utils import nquad_vec -from ..representation import FData, FDataBasis, FDataGrid +from ..representation import FData, FDataBasis, FDataGrid, FDataIrregular from ..representation._functional_data import EvalPointsType from ..typing._numpy import NDArrayFloat @@ -125,6 +125,37 @@ def _multioutput_score_grid( return float(np.mean(score.integrate()[0]) / _domain_measure(score)) +def _integral_average_fdatairregular( + score: FDataIrregular, + squared: bool = True, + weights: NDArrayFloat | None = None, +) -> float: + """Calculate the weighted average of the normalized integrals of the score. + + The integral of the score is normalized because each integral is divided by + the length of the curve's domain. + + If the score is vector-valued, then the mean of each codimension integral + is calculated for every functional observation. + + Args: + score: Score of the functions. + squared: If False, the square root is taken. + weights: Weights for the mean. + """ + if score.dim_domain != 1: + raise ValueError( + "Only univariate FDataIrregular objects are supported", + ) + if not squared: + score = np.sqrt(score) + + integrals = np.mean(score.integrate(), axis=1) + lebesgue_measures = np.diff(score.sample_range, axis=-1).reshape(-1) + normalized_integrals = integrals / lebesgue_measures + return np.average(normalized_integrals, weights=weights) + + @overload def explained_variance_score( y_true: DataType, @@ -361,8 +392,9 @@ def mean_absolute_error( where :math:`D` is the function domain and :math:`V` the volume of that domain. - For :class:`~skfda.representation.FDataBasis` only - 'uniform_average' is available. + For :class:`~skfda.representation.FDataBasis` and + :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is + available. If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function is called. @@ -378,8 +410,10 @@ def mean_absolute_error( Mean absolute error. If multioutput = 'uniform_average' or - :math:`y\_pred` and :math:`y\_true` are - :class:`~skfda.representation.FDataBasis` objects, float is returned. + :math:`y\_pred` and :math:`y\_true` are both + :class:`~skfda.representation.FDataBasis` or both + :class:`~skfda.representation.FDataIrregular` objects, float is + returned. If both :math:`y\_pred` and :math:`y\_true` are :class:`~skfda.representation.FDataGrid` @@ -412,6 +446,20 @@ def _mean_absolute_error_fdatagrid( return _multioutput_score_grid(error, multioutput) +@mean_absolute_error.register # type: ignore[attr-defined, misc] +def _mean_absolute_error_fdatairregular( + y_true: FDataIrregular, + y_pred: FDataIrregular, + *, + sample_weight: Optional[NDArrayFloat] = None, + multioutput: MultiOutputType = 'uniform_average', +) -> float: + return _integral_average_fdatairregular( + np.abs(y_true - y_pred), + weights=sample_weight, + ) + + @mean_absolute_error.register # type: ignore[attr-defined, misc] def _mean_absolute_error_fdatabasis( y_true: FDataBasis, @@ -491,8 +539,9 @@ def mean_absolute_percentage_error( where :math:`D` is the function domain and :math:`V` the volume of that domain. - For :class:`~skfda.representation.FDataBasis` only - 'uniform_average' is available. + For :class:`~skfda.representation.FDataBasis` and + :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is + available. If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function is called. @@ -511,8 +560,10 @@ def mean_absolute_percentage_error( Mean absolute percentage error. If multioutput = 'uniform_average' or - :math:`y\_pred` and :math:`y\_true` are - :class:`~skfda.representation.FDataBasis` objects, float is returned. + :math:`y\_pred` and :math:`y\_true` are both + :class:`~skfda.representation.FDataBasis` or both + :class:`~skfda.representation.FDataIrregular` objects, float is + returned. If both :math:`y\_pred` and :math:`y\_true` are :class:`~skfda.representation.FDataGrid` @@ -554,6 +605,23 @@ def _mean_absolute_percentage_error_fdatagrid( return _multioutput_score_grid(error, multioutput) +@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc] +def _mean_absolute_percentage_error_fdatairregular( + y_true: FDataIrregular, + y_pred: FDataIrregular, + *, + sample_weight: Optional[NDArrayFloat] = None, + multioutput: MultiOutputType = 'uniform_average', +) -> float: + epsilon = np.finfo(np.float64).eps + + if np.any(np.abs(y_true.values) < epsilon): + warnings.warn('Zero denominator', RuntimeWarning) + + mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon) + return _integral_average_fdatairregular(mape, weights=sample_weight) + + @mean_absolute_percentage_error.register # type: ignore[attr-defined, misc] def _mean_absolute_percentage_error_fdatabasis( y_true: FDataBasis, @@ -644,8 +712,9 @@ def mean_squared_error( where :math:`D` is the function domain and :math:`V` the volume of that domain. - For :class:`~skfda.representation.FDataBasis` only - 'uniform_average' is available. + For :class:`~skfda.representation.FDataBasis` and + :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is + available. If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function is called. @@ -662,8 +731,10 @@ def mean_squared_error( Mean squared error. If multioutput = 'uniform_average' or - :math:`y\_pred` and :math:`y\_true` are - :class:`~skfda.representation.FDataBasis` objects, float is returned. + :math:`y\_pred` and :math:`y\_true` are both + :class:`~skfda.representation.FDataBasis` or both + :class:`~skfda.representation.FDataIrregular` objects, float is + returned. If both :math:`y\_pred` and :math:`y\_true` are :class:`~skfda.representation.FDataGrid` @@ -702,6 +773,22 @@ def _mean_squared_error_fdatagrid( return _multioutput_score_grid(error, multioutput, squared=squared) +@mean_squared_error.register # type: ignore[attr-defined, misc] +def _mean_squared_error_fdatairregular( + y_true: FDataIrregular, + y_pred: FDataIrregular, + *, + sample_weight: Optional[NDArrayFloat] = None, + multioutput: MultiOutputType = 'uniform_average', + squared: bool = True, +) -> float: + return _integral_average_fdatairregular( + np.power(y_true - y_pred, 2), + weights=sample_weight, + squared=squared, + ) + + @mean_squared_error.register # type: ignore[attr-defined, misc] def _mean_squared_error_fdatabasis( y_true: FDataBasis, @@ -791,8 +878,9 @@ def mean_squared_log_error( where :math:`D` is the function domain and :math:`V` the volume of that domain. - For :class:`~skfda.representation.FDataBasis` only - 'uniform_average' is available. + For :class:`~skfda.representation.FDataBasis` and + :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is + available. If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function is called. @@ -812,8 +900,10 @@ def mean_squared_log_error( Mean squared log error. If multioutput = 'uniform_average' or - :math:`y\_pred` and :math:`y\_true` are - :class:`~skfda.representation.FDataBasis` objects, float is returned. + :math:`y\_pred` and :math:`y\_true` are both + :class:`~skfda.representation.FDataBasis` or both + :class:`~skfda.representation.FDataIrregular` objects, float is + returned. If both :math:`y\_pred` and :math:`y\_true` are :class:`~skfda.representation.FDataGrid` @@ -860,6 +950,30 @@ def _mean_squared_log_error_fdatagrid( ) +@mean_squared_log_error.register # type: ignore[attr-defined, misc] +def _mean_squared_log_error_fdatairregular( + y_true: FDataIrregular, + y_pred: FDataIrregular, + *, + sample_weight: Optional[NDArrayFloat] = None, + multioutput: MultiOutputType = 'uniform_average', + squared: bool = True, +) -> float: + if np.any(y_true.values < 0) or np.any(y_pred.values < 0): + raise ValueError( + "Mean Squared Logarithmic Error cannot be used when " + "targets functions have negative values.", + ) + + return mean_squared_error( + np.log1p(y_true), + np.log1p(y_pred), + sample_weight=sample_weight, + multioutput=multioutput, + squared=squared, + ) + + @mean_squared_log_error.register # type: ignore[attr-defined, misc] def _mean_squared_log_error_fdatabasis( y_true: FDataBasis, diff --git a/skfda/tests/test_scoring.py b/skfda/tests/test_scoring.py index bdc288551..ac4a17750 100644 --- a/skfda/tests/test_scoring.py +++ b/skfda/tests/test_scoring.py @@ -4,9 +4,10 @@ from typing import Any, Optional, Sequence, Tuple import numpy as np +import pytest import sklearn.metrics -from skfda import FDataBasis, FDataGrid +from skfda import FDataBasis, FDataGrid, FDataIrregular from skfda.misc.scoring import ( ScoreFunction, explained_variance_score, @@ -32,6 +33,13 @@ r2_score, ) +irregular_score_functions: Sequence[ScoreFunction] = ( + mean_absolute_error, + mean_absolute_percentage_error, + mean_squared_error, + mean_squared_log_error, +) + def _create_data_basis() -> Tuple[FDataBasis, FDataBasis]: coef_true = [[1, 2, 3], [4, 5, 6]] @@ -461,3 +469,101 @@ def test_negative_msle(self) -> None: y_true_grid, y_pred_grid, ) + + +############### Test irregular data scoring #################### + + +@pytest.fixture(params=irregular_score_functions) +def irregular_score_function(request) -> ScoreFunction: + """Fixture to test score functions with irregular data.""" + return request.param + + +_y_true_grid, _y_pred_grid = _create_data_grid() +_y_true_irregular = FDataIrregular.from_fdatagrid(_y_true_grid) +_y_pred_irregular = FDataIrregular.from_fdatagrid(_y_pred_grid) + + +@pytest.fixture +def y_true_grid() -> FDataGrid: + """Fixture with FDataGrid true representation.""" + return _y_true_grid + + +@pytest.fixture +def y_pred_grid() -> FDataGrid: + """Fixture with FDataGrid prediction representation.""" + return _y_pred_grid + + +@pytest.fixture +def y_true_irregular() -> FDataIrregular: + """Fixture with FDataIrregular true representation. + + Same data as y_true_grid. + """ + return _y_true_irregular + + +@pytest.fixture +def y_pred_irregular() -> FDataIrregular: + """Fixture with FDataIrregular true representation. + + Same data as y_pred_grid. + """ + return _y_pred_irregular + + +def _cmp_score_functions( + y_true_grid: FDataGrid, + y_pred_grid: FDataGrid, + y_true_irregular: FDataIrregular, + y_pred_irregular: FDataIrregular, + irregular_score_function: ScoreFunction, + **kwargs: Any, +) -> None: + score_grid = irregular_score_function( + y_true_grid, + y_pred_grid, + **kwargs, + ) + score_irregular = irregular_score_function( + y_true_irregular, + y_pred_irregular, + **kwargs, + ) + np.testing.assert_allclose( + score_grid, score_irregular, + ) + + +def test_score_functions_irregular( + y_true_grid: FDataGrid, + y_pred_grid: FDataGrid, + y_true_irregular: FDataIrregular, + y_pred_irregular: FDataIrregular, + irregular_score_function: ScoreFunction, +) -> None: + """Test score functions with irregular data.""" + weight = np.array([3, 1]) + + try: + _cmp_score_functions( + y_true_grid, + y_pred_grid, + y_true_irregular, + y_pred_irregular, + irregular_score_function, + sample_weight=weight, + ) + except TypeError: + pass + + _cmp_score_functions( + y_true_grid, + y_pred_grid, + y_true_irregular, + y_pred_irregular, + irregular_score_function, + )