Skip to content

Commit

Permalink
Implement scores for FDatairregular objects as described in #609
Browse files Browse the repository at this point in the history
(testing included to assert equality with the `FDataGrid` case)
  • Loading branch information
pcuestas committed Apr 1, 2024
1 parent 9730d51 commit 951dea3
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 18 deletions.
148 changes: 131 additions & 17 deletions skfda/misc/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing_extensions import Literal, Protocol

from .._utils import nquad_vec
from ..representation import FData, FDataBasis, FDataGrid
from ..representation import FData, FDataBasis, FDataGrid, FDataIrregular
from ..representation._functional_data import EvalPointsType
from ..typing._numpy import NDArrayFloat

Expand Down Expand Up @@ -125,6 +125,37 @@ def _multioutput_score_grid(
return float(np.mean(score.integrate()[0]) / _domain_measure(score))


def _integral_average_fdatairregular(
score: FDataIrregular,
squared: bool = True,
weights: NDArrayFloat | None = None,
) -> float:
"""Calculate the weighted average of the normalized integrals of the score.
The integral of the score is normalized because each integral is divided by
the length of the curve's domain.
If the score is vector-valued, then the mean of each codimension integral
is calculated for every functional observation.
Args:
score: Score of the functions.
squared: If False, the square root is taken.
weights: Weights for the mean.
"""
if score.dim_domain != 1:
raise ValueError(

Check warning on line 147 in skfda/misc/scoring.py

View check run for this annotation

Codecov / codecov/patch

skfda/misc/scoring.py#L147

Added line #L147 was not covered by tests
"Only univariate FDataIrregular objects are supported",
)
if not squared:
score = np.sqrt(score)

Check warning on line 151 in skfda/misc/scoring.py

View check run for this annotation

Codecov / codecov/patch

skfda/misc/scoring.py#L151

Added line #L151 was not covered by tests

integrals = np.mean(score.integrate(), axis=1)
lebesgue_measures = np.diff(score.sample_range, axis=-1).reshape(-1)
normalized_integrals = integrals / lebesgue_measures
return np.average(normalized_integrals, weights=weights)


@overload
def explained_variance_score(
y_true: DataType,
Expand Down Expand Up @@ -361,8 +392,9 @@ def mean_absolute_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -378,8 +410,10 @@ def mean_absolute_error(
Mean absolute error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand Down Expand Up @@ -412,6 +446,20 @@ def _mean_absolute_error_fdatagrid(
return _multioutput_score_grid(error, multioutput)


@mean_absolute_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: Optional[NDArrayFloat] = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
return _integral_average_fdatairregular(
np.abs(y_true - y_pred),
weights=sample_weight,
)


@mean_absolute_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_error_fdatabasis(
y_true: FDataBasis,
Expand Down Expand Up @@ -491,8 +539,9 @@ def mean_absolute_percentage_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -511,8 +560,10 @@ def mean_absolute_percentage_error(
Mean absolute percentage error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand Down Expand Up @@ -554,6 +605,23 @@ def _mean_absolute_percentage_error_fdatagrid(
return _multioutput_score_grid(error, multioutput)


@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_percentage_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: Optional[NDArrayFloat] = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
epsilon = np.finfo(np.float64).eps

if np.any(np.abs(y_true.values) < epsilon):
warnings.warn('Zero denominator', RuntimeWarning)

Check warning on line 619 in skfda/misc/scoring.py

View check run for this annotation

Codecov / codecov/patch

skfda/misc/scoring.py#L619

Added line #L619 was not covered by tests

mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
return _integral_average_fdatairregular(mape, weights=sample_weight)


@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_percentage_error_fdatabasis(
y_true: FDataBasis,
Expand Down Expand Up @@ -644,8 +712,9 @@ def mean_squared_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -662,8 +731,10 @@ def mean_squared_error(
Mean squared error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand Down Expand Up @@ -702,6 +773,22 @@ def _mean_squared_error_fdatagrid(
return _multioutput_score_grid(error, multioutput, squared=squared)


@mean_squared_error.register # type: ignore[attr-defined, misc]
def _mean_squared_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: Optional[NDArrayFloat] = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> float:
return _integral_average_fdatairregular(
np.power(y_true - y_pred, 2),
weights=sample_weight,
squared=squared,
)


@mean_squared_error.register # type: ignore[attr-defined, misc]
def _mean_squared_error_fdatabasis(
y_true: FDataBasis,
Expand Down Expand Up @@ -791,8 +878,9 @@ def mean_squared_log_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -812,8 +900,10 @@ def mean_squared_log_error(
Mean squared log error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand Down Expand Up @@ -860,6 +950,30 @@ def _mean_squared_log_error_fdatagrid(
)


@mean_squared_log_error.register # type: ignore[attr-defined, misc]
def _mean_squared_log_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: Optional[NDArrayFloat] = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> float:
if np.any(y_true.values < 0) or np.any(y_pred.values < 0):
raise ValueError(

Check warning on line 963 in skfda/misc/scoring.py

View check run for this annotation

Codecov / codecov/patch

skfda/misc/scoring.py#L963

Added line #L963 was not covered by tests
"Mean Squared Logarithmic Error cannot be used when "
"targets functions have negative values.",
)

return mean_squared_error(
np.log1p(y_true),
np.log1p(y_pred),
sample_weight=sample_weight,
multioutput=multioutput,
squared=squared,
)


@mean_squared_log_error.register # type: ignore[attr-defined, misc]
def _mean_squared_log_error_fdatabasis(
y_true: FDataBasis,
Expand Down
108 changes: 107 additions & 1 deletion skfda/tests/test_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from typing import Any, Optional, Sequence, Tuple

import numpy as np
import pytest
import sklearn.metrics

from skfda import FDataBasis, FDataGrid
from skfda import FDataBasis, FDataGrid, FDataIrregular
from skfda.misc.scoring import (
ScoreFunction,
explained_variance_score,
Expand All @@ -32,6 +33,13 @@
r2_score,
)

irregular_score_functions: Sequence[ScoreFunction] = (
mean_absolute_error,
mean_absolute_percentage_error,
mean_squared_error,
mean_squared_log_error,
)


def _create_data_basis() -> Tuple[FDataBasis, FDataBasis]:
coef_true = [[1, 2, 3], [4, 5, 6]]
Expand Down Expand Up @@ -461,3 +469,101 @@ def test_negative_msle(self) -> None:
y_true_grid,
y_pred_grid,
)


############### Test irregular data scoring ####################


@pytest.fixture(params=irregular_score_functions)
def irregular_score_function(request) -> ScoreFunction:
"""Fixture to test score functions with irregular data."""
return request.param


_y_true_grid, _y_pred_grid = _create_data_grid()
_y_true_irregular = FDataIrregular.from_fdatagrid(_y_true_grid)
_y_pred_irregular = FDataIrregular.from_fdatagrid(_y_pred_grid)


@pytest.fixture
def y_true_grid() -> FDataGrid:
"""Fixture with FDataGrid true representation."""
return _y_true_grid


@pytest.fixture
def y_pred_grid() -> FDataGrid:
"""Fixture with FDataGrid prediction representation."""
return _y_pred_grid


@pytest.fixture
def y_true_irregular() -> FDataIrregular:
"""Fixture with FDataIrregular true representation.
Same data as y_true_grid.
"""
return _y_true_irregular


@pytest.fixture
def y_pred_irregular() -> FDataIrregular:
"""Fixture with FDataIrregular true representation.
Same data as y_pred_grid.
"""
return _y_pred_irregular


def _cmp_score_functions(
y_true_grid: FDataGrid,
y_pred_grid: FDataGrid,
y_true_irregular: FDataIrregular,
y_pred_irregular: FDataIrregular,
irregular_score_function: ScoreFunction,
**kwargs: Any,
) -> None:
score_grid = irregular_score_function(
y_true_grid,
y_pred_grid,
**kwargs,
)
score_irregular = irregular_score_function(
y_true_irregular,
y_pred_irregular,
**kwargs,
)
np.testing.assert_allclose(
score_grid, score_irregular,
)


def test_score_functions_irregular(
y_true_grid: FDataGrid,
y_pred_grid: FDataGrid,
y_true_irregular: FDataIrregular,
y_pred_irregular: FDataIrregular,
irregular_score_function: ScoreFunction,
) -> None:
"""Test score functions with irregular data."""
weight = np.array([3, 1])

try:
_cmp_score_functions(
y_true_grid,
y_pred_grid,
y_true_irregular,
y_pred_irregular,
irregular_score_function,
sample_weight=weight,
)
except TypeError:
pass

Check warning on line 561 in skfda/tests/test_scoring.py

View check run for this annotation

Codecov / codecov/patch

skfda/tests/test_scoring.py#L560-L561

Added lines #L560 - L561 were not covered by tests

_cmp_score_functions(
y_true_grid,
y_pred_grid,
y_true_irregular,
y_pred_irregular,
irregular_score_function,
)

0 comments on commit 951dea3

Please sign in to comment.