Implement scores for FDatairregular objects as described in #609

(testing included to assert equality with the `FDataGrid` case)
GAA-UAM · Apr 1, 2024 · 951dea3 · 951dea3
1 parent 9730d51
commit 951dea3
Show file tree

Hide file tree

Showing 2 changed files with 238 additions and 18 deletions.
diff --git a/skfda/misc/scoring.py b/skfda/misc/scoring.py
@@ -11,7 +11,7 @@
 from typing_extensions import Literal, Protocol
 
 from .._utils import nquad_vec
-from ..representation import FData, FDataBasis, FDataGrid
+from ..representation import FData, FDataBasis, FDataGrid, FDataIrregular
 from ..representation._functional_data import EvalPointsType
 from ..typing._numpy import NDArrayFloat
 
@@ -125,6 +125,37 @@ def _multioutput_score_grid(
     return float(np.mean(score.integrate()[0]) / _domain_measure(score))
 
 
+def _integral_average_fdatairregular(
+    score: FDataIrregular,
+    squared: bool = True,
+    weights: NDArrayFloat | None = None,
+) -> float:
+    """Calculate the weighted average of the normalized integrals of the score.
+
+    The integral of the score is normalized because each integral is divided by
+    the length of the curve's domain.
+
+    If the score is vector-valued, then the mean of each codimension integral
+    is calculated for every functional observation.
+
+    Args:
+        score: Score of the functions.
+        squared: If False, the square root is taken.
+        weights: Weights for the mean.
+    """
+    if score.dim_domain != 1:
+        raise ValueError(
+            "Only univariate FDataIrregular objects are supported",
+        )
+    if not squared:
+        score = np.sqrt(score)
+
+    integrals = np.mean(score.integrate(), axis=1)
+    lebesgue_measures = np.diff(score.sample_range, axis=-1).reshape(-1)
+    normalized_integrals = integrals / lebesgue_measures
+    return np.average(normalized_integrals, weights=weights)
+
+
 @overload
 def explained_variance_score(
     y_true: DataType,
@@ -361,8 +392,9 @@ def mean_absolute_error(
     where :math:`D` is the function domain and :math:`V` the volume of that
     domain.
 
-    For :class:`~skfda.representation.FDataBasis` only
-    'uniform_average' is available.
+    For :class:`~skfda.representation.FDataBasis` and
+    :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
+    available.
 
     If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
     is called.
@@ -378,8 +410,10 @@ def mean_absolute_error(
         Mean absolute error.
 
         If multioutput = 'uniform_average' or
-        :math:`y\_pred` and :math:`y\_true` are
-        :class:`~skfda.representation.FDataBasis` objects, float is returned.
+        :math:`y\_pred` and :math:`y\_true` are both
+        :class:`~skfda.representation.FDataBasis` or both
+        :class:`~skfda.representation.FDataIrregular` objects, float is
+        returned.
 
         If both :math:`y\_pred` and :math:`y\_true` are
         :class:`~skfda.representation.FDataGrid`
@@ -412,6 +446,20 @@ def _mean_absolute_error_fdatagrid(
     return _multioutput_score_grid(error, multioutput)
 
 
+@mean_absolute_error.register  # type: ignore[attr-defined, misc]
+def _mean_absolute_error_fdatairregular(
+    y_true: FDataIrregular,
+    y_pred: FDataIrregular,
+    *,
+    sample_weight: Optional[NDArrayFloat] = None,
+    multioutput: MultiOutputType = 'uniform_average',
+) -> float:
+    return _integral_average_fdatairregular(
+        np.abs(y_true - y_pred),
+        weights=sample_weight,
+    )
+
+
 @mean_absolute_error.register  # type: ignore[attr-defined, misc]
 def _mean_absolute_error_fdatabasis(
     y_true: FDataBasis,
@@ -491,8 +539,9 @@ def mean_absolute_percentage_error(
     where :math:`D` is the function domain and :math:`V` the volume of that
     domain.
 
-    For :class:`~skfda.representation.FDataBasis` only
-    'uniform_average' is available.
+    For :class:`~skfda.representation.FDataBasis` and
+    :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
+    available.
 
     If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
     is called.
@@ -511,8 +560,10 @@ def mean_absolute_percentage_error(
         Mean absolute percentage error.
 
         If multioutput = 'uniform_average' or
-        :math:`y\_pred` and :math:`y\_true` are
-        :class:`~skfda.representation.FDataBasis` objects, float is returned.
+        :math:`y\_pred` and :math:`y\_true` are both
+        :class:`~skfda.representation.FDataBasis` or both
+        :class:`~skfda.representation.FDataIrregular` objects, float is
+        returned.
 
         If both :math:`y\_pred` and :math:`y\_true` are
         :class:`~skfda.representation.FDataGrid`
@@ -554,6 +605,23 @@ def _mean_absolute_percentage_error_fdatagrid(
     return _multioutput_score_grid(error, multioutput)
 
 
+@mean_absolute_percentage_error.register  # type: ignore[attr-defined, misc]
+def _mean_absolute_percentage_error_fdatairregular(
+    y_true: FDataIrregular,
+    y_pred: FDataIrregular,
+    *,
+    sample_weight: Optional[NDArrayFloat] = None,
+    multioutput: MultiOutputType = 'uniform_average',
+) -> float:
+    epsilon = np.finfo(np.float64).eps
+
+    if np.any(np.abs(y_true.values) < epsilon):
+        warnings.warn('Zero denominator', RuntimeWarning)
+
+    mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
+    return _integral_average_fdatairregular(mape, weights=sample_weight)
+
+
 @mean_absolute_percentage_error.register  # type: ignore[attr-defined, misc]
 def _mean_absolute_percentage_error_fdatabasis(
     y_true: FDataBasis,
@@ -644,8 +712,9 @@ def mean_squared_error(
     where :math:`D` is the function domain and :math:`V` the volume of that
     domain.
 
-    For :class:`~skfda.representation.FDataBasis` only
-    'uniform_average' is available.
+    For :class:`~skfda.representation.FDataBasis` and
+    :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
+    available.
 
     If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
     is called.
@@ -662,8 +731,10 @@ def mean_squared_error(
         Mean squared error.
 
         If multioutput = 'uniform_average' or
-        :math:`y\_pred` and :math:`y\_true` are
-        :class:`~skfda.representation.FDataBasis` objects, float is returned.
+        :math:`y\_pred` and :math:`y\_true` are both
+        :class:`~skfda.representation.FDataBasis` or both
+        :class:`~skfda.representation.FDataIrregular` objects, float is
+        returned.
 
         If both :math:`y\_pred` and :math:`y\_true` are
         :class:`~skfda.representation.FDataGrid`
@@ -702,6 +773,22 @@ def _mean_squared_error_fdatagrid(
     return _multioutput_score_grid(error, multioutput, squared=squared)
 
 
+@mean_squared_error.register  # type: ignore[attr-defined, misc]
+def _mean_squared_error_fdatairregular(
+    y_true: FDataIrregular,
+    y_pred: FDataIrregular,
+    *,
+    sample_weight: Optional[NDArrayFloat] = None,
+    multioutput: MultiOutputType = 'uniform_average',
+    squared: bool = True,
+) -> float:
+    return _integral_average_fdatairregular(
+        np.power(y_true - y_pred, 2),
+        weights=sample_weight,
+        squared=squared,
+    )
+
+
 @mean_squared_error.register  # type: ignore[attr-defined, misc]
 def _mean_squared_error_fdatabasis(
     y_true: FDataBasis,
@@ -791,8 +878,9 @@ def mean_squared_log_error(
     where :math:`D` is the function domain and :math:`V` the volume of that
     domain.
 
-    For :class:`~skfda.representation.FDataBasis` only
-    'uniform_average' is available.
+    For :class:`~skfda.representation.FDataBasis` and
+    :class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
+    available.
 
     If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
     is called.
@@ -812,8 +900,10 @@ def mean_squared_log_error(
         Mean squared log error.
 
         If multioutput = 'uniform_average' or
-        :math:`y\_pred` and :math:`y\_true` are
-        :class:`~skfda.representation.FDataBasis` objects, float is returned.
+        :math:`y\_pred` and :math:`y\_true` are both
+        :class:`~skfda.representation.FDataBasis` or both
+        :class:`~skfda.representation.FDataIrregular` objects, float is
+        returned.
 
         If both :math:`y\_pred` and :math:`y\_true` are
         :class:`~skfda.representation.FDataGrid`
@@ -860,6 +950,30 @@ def _mean_squared_log_error_fdatagrid(
     )
 
 
+@mean_squared_log_error.register  # type: ignore[attr-defined, misc]
+def _mean_squared_log_error_fdatairregular(
+    y_true: FDataIrregular,
+    y_pred: FDataIrregular,
+    *,
+    sample_weight: Optional[NDArrayFloat] = None,
+    multioutput: MultiOutputType = 'uniform_average',
+    squared: bool = True,
+) -> float:
+    if np.any(y_true.values < 0) or np.any(y_pred.values < 0):
+        raise ValueError(
+            "Mean Squared Logarithmic Error cannot be used when "
+            "targets functions have negative values.",
+        )
+
+    return mean_squared_error(
+        np.log1p(y_true),
+        np.log1p(y_pred),
+        sample_weight=sample_weight,
+        multioutput=multioutput,
+        squared=squared,
+    )
+
+
 @mean_squared_log_error.register  # type: ignore[attr-defined, misc]
 def _mean_squared_log_error_fdatabasis(
     y_true: FDataBasis,

diff --git a/skfda/tests/test_scoring.py b/skfda/tests/test_scoring.py
@@ -4,9 +4,10 @@
 from typing import Any, Optional, Sequence, Tuple
 
 import numpy as np
+import pytest
 import sklearn.metrics
 
-from skfda import FDataBasis, FDataGrid
+from skfda import FDataBasis, FDataGrid, FDataIrregular
 from skfda.misc.scoring import (
     ScoreFunction,
     explained_variance_score,
@@ -32,6 +33,13 @@
     r2_score,
 )
 
+irregular_score_functions: Sequence[ScoreFunction] = (
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_squared_error,
+    mean_squared_log_error,
+)
+
 
 def _create_data_basis() -> Tuple[FDataBasis, FDataBasis]:
     coef_true = [[1, 2, 3], [4, 5, 6]]
@@ -461,3 +469,101 @@ def test_negative_msle(self) -> None:
             y_true_grid,
             y_pred_grid,
         )
+
+
+############### Test irregular data scoring ####################
+
+
+@pytest.fixture(params=irregular_score_functions)
+def irregular_score_function(request) -> ScoreFunction:
+    """Fixture to test score functions with irregular data."""
+    return request.param
+
+
+_y_true_grid, _y_pred_grid = _create_data_grid()
+_y_true_irregular = FDataIrregular.from_fdatagrid(_y_true_grid)
+_y_pred_irregular = FDataIrregular.from_fdatagrid(_y_pred_grid)
+
+
+@pytest.fixture
+def y_true_grid() -> FDataGrid:
+    """Fixture with FDataGrid true representation."""
+    return _y_true_grid
+
+
+@pytest.fixture
+def y_pred_grid() -> FDataGrid:
+    """Fixture with FDataGrid prediction representation."""
+    return _y_pred_grid
+
+
+@pytest.fixture
+def y_true_irregular() -> FDataIrregular:
+    """Fixture with FDataIrregular true representation.
+
+    Same data as y_true_grid.
+    """
+    return _y_true_irregular
+
+
+@pytest.fixture
+def y_pred_irregular() -> FDataIrregular:
+    """Fixture with FDataIrregular true representation.
+
+    Same data as y_pred_grid.
+    """
+    return _y_pred_irregular
+
+
+def _cmp_score_functions(
+    y_true_grid: FDataGrid,
+    y_pred_grid: FDataGrid,
+    y_true_irregular: FDataIrregular,
+    y_pred_irregular: FDataIrregular,
+    irregular_score_function: ScoreFunction,
+    **kwargs: Any,
+) -> None:
+    score_grid = irregular_score_function(
+        y_true_grid,
+        y_pred_grid,
+        **kwargs,
+    )
+    score_irregular = irregular_score_function(
+        y_true_irregular,
+        y_pred_irregular,
+        **kwargs,
+    )
+    np.testing.assert_allclose(
+        score_grid, score_irregular,
+    )
+
+
+def test_score_functions_irregular(
+    y_true_grid: FDataGrid,
+    y_pred_grid: FDataGrid,
+    y_true_irregular: FDataIrregular,
+    y_pred_irregular: FDataIrregular,
+    irregular_score_function: ScoreFunction,
+) -> None:
+    """Test score functions with irregular data."""
+    weight = np.array([3, 1])
+
+    try:
+        _cmp_score_functions(
+            y_true_grid,
+            y_pred_grid,
+            y_true_irregular,
+            y_pred_irregular,
+            irregular_score_function,
+            sample_weight=weight,
+        )
+    except TypeError:
+        pass
+
+    _cmp_score_functions(
+        y_true_grid,
+        y_pred_grid,
+        y_true_irregular,
+        y_pred_irregular,
+        irregular_score_function,
+    )