From d064916c6ea31773fd2adea154f51796b406a096 Mon Sep 17 00:00:00 2001 From: Michael Van de Steene <124588413+michael-nml@users.noreply.github.com> Date: Thu, 2 May 2024 12:38:03 +0200 Subject: [PATCH] Fix handling single class in chunk for CBPE (#384) * Fix handling single class in CBPE fitting The `confusion_matrix` function used in various CBPE metrics returns values for each class/label present in the input. For binary classification this means we expect 4 values (TP, FP, FN, TN). However if only one class is represented in the input, the function will only return one value. This commit addresses that failure case by explicitly providing the expected labels to the `confusion_matrix` function. Currently these values are hard-coded for binary classification, but we may want to derive them from the input later on if we were to support string-based pass/fail classes. * Add test case for single class CBPE fitting * Fix F1 sampling error when no positive cases --- .../confidence_based/metrics.py | 29 +++++++++++++++---- .../sampling_error/binary_classification.py | 4 +++ .../performance_estimation/CBPE/test_cbpe.py | 28 ++++++++++++++++++ 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py index 23a70913..eb35c04e 100644 --- a/nannyml/performance_estimation/confidence_based/metrics.py +++ b/nannyml/performance_estimation/confidence_based/metrics.py @@ -997,6 +997,9 @@ def __init__( # sampling error self._sampling_error_components: Tuple = () + # Set labels expected in y_true/y_pred. Currently hard-coded to 0, 1 for binary classification + self._labels = [0, 1] + def _fit(self, reference_data: pd.DataFrame): self._sampling_error_components = bse.specificity_sampling_error_components( y_true_reference=reference_data[self.y_true], @@ -1039,7 +1042,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: warnings.warn(f"Not enough data to compute estimated {self.display_name}.") return np.NaN y_pred, y_true = _dat - tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() + tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=self._labels).ravel() denominator = tn + fp if denominator == 0: return np.NaN @@ -1213,6 +1216,9 @@ def __init__( self.false_negative_lower_threshold: Optional[float] = None self.false_negative_upper_threshold: Optional[float] = None + # Set labels expected in y_true/y_pred. Currently hard-coded to 0, 1 for binary classification + self._labels = [0, 1] + def fit(self, reference_data: pd.DataFrame): # override the superclass fit method """Fits a Metric on reference data. @@ -1348,7 +1354,9 @@ def _true_positive_realized_performance(self, data: pd.DataFrame) -> float: return np.NaN y_pred, y_true = _dat - _, _, _, tp = confusion_matrix(y_true, y_pred, normalize=self.normalize_confusion_matrix).ravel() + _, _, _, tp = confusion_matrix( + y_true, y_pred, labels=self._labels, normalize=self.normalize_confusion_matrix + ).ravel() return tp def _true_negative_realized_performance(self, data: pd.DataFrame) -> float: @@ -1368,7 +1376,9 @@ def _true_negative_realized_performance(self, data: pd.DataFrame) -> float: y_pred, y_true = _dat - tn, _, _, _ = confusion_matrix(y_true, y_pred, normalize=self.normalize_confusion_matrix).ravel() + tn, _, _, _ = confusion_matrix( + y_true, y_pred, labels=self._labels, normalize=self.normalize_confusion_matrix + ).ravel() return tn def _false_positive_realized_performance(self, data: pd.DataFrame) -> float: @@ -1387,7 +1397,9 @@ def _false_positive_realized_performance(self, data: pd.DataFrame) -> float: return np.NaN y_pred, y_true = _dat - _, fp, _, _ = confusion_matrix(y_true, y_pred, normalize=self.normalize_confusion_matrix).ravel() + _, fp, _, _ = confusion_matrix( + y_true, y_pred, labels=self._labels, normalize=self.normalize_confusion_matrix + ).ravel() return fp def _false_negative_realized_performance(self, data: pd.DataFrame) -> float: @@ -1406,7 +1418,9 @@ def _false_negative_realized_performance(self, data: pd.DataFrame) -> float: return np.NaN y_pred, y_true = _dat - _, _, fn, _ = confusion_matrix(y_true, y_pred, normalize=self.normalize_confusion_matrix).ravel() + _, _, fn, _ = confusion_matrix( + y_true, y_pred, labels=self._labels, normalize=self.normalize_confusion_matrix + ).ravel() return fn def get_true_positive_estimate(self, chunk_data: pd.DataFrame) -> float: @@ -1907,6 +1921,9 @@ def __init__( self.business_value_matrix = business_value_matrix self.normalize_business_value: Optional[str] = normalize_business_value + # Set labels expected in y_true/y_pred. Currently hard-coded to 0, 1 for binary classification + self._labels = [0, 1] + # self.lower_threshold: Optional[float] = 0 # self.upper_threshold: Optional[float] = 1 @@ -1940,7 +1957,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: fn_value = self.business_value_matrix[1, 0] bv_array = np.array([[tn_value, fp_value], [fn_value, tp_value]]) - cm = confusion_matrix(y_true, y_pred) + cm = confusion_matrix(y_true, y_pred, labels=self._labels) if self.normalize_business_value == 'per_prediction': with np.errstate(all="ignore"): cm = cm / cm.sum(axis=0, keepdims=True) diff --git a/nannyml/sampling_error/binary_classification.py b/nannyml/sampling_error/binary_classification.py index e8232ce9..bcd4a0f6 100644 --- a/nannyml/sampling_error/binary_classification.py +++ b/nannyml/sampling_error/binary_classification.py @@ -178,6 +178,10 @@ def f1_sampling_error_components(y_true_reference: pd.Series, y_pred_reference: tp_fp_fn = np.concatenate([TP, FN, FP]) + # If there's no true positives, false negatives or false positives, sampling error is NaN + if tp_fp_fn.size == 0: + return np.nan, 0 + correcting_factor = len(tp_fp_fn) / ((len(FN) + len(FP)) * 0.5 + len(TP)) obs_level_f1 = tp_fp_fn * correcting_factor fraction_of_relevant = len(tp_fp_fn) / len(y_pred_reference) diff --git a/tests/performance_estimation/CBPE/test_cbpe.py b/tests/performance_estimation/CBPE/test_cbpe.py index 3436723a..1748fe37 100644 --- a/tests/performance_estimation/CBPE/test_cbpe.py +++ b/tests/performance_estimation/CBPE/test_cbpe.py @@ -721,3 +721,31 @@ def test_cbpe_without_predictions(): _ = cbpe.estimate(ana_df) except Exception as exc: pytest.fail(f'unexpected exception: {exc}') + + +@pytest.mark.filterwarnings("ignore:Too few unique values", "ignore:'y_true' contains a single class") +def test_cbpe_fitting_does_not_generate_error_when_single_class_present(): + ref_df = pd.DataFrame({ + 'y_true': [0] * 1000, + 'y_pred': [0] * 1000, + 'y_pred_proba': [0.5] * 1000, + }) + sut = CBPE( + y_true='y_true', + y_pred='y_pred', + y_pred_proba='y_pred_proba', + problem_type='classification_binary', + metrics=[ + 'roc_auc', + 'f1', + 'precision', + 'recall', + 'specificity', + 'accuracy', + 'confusion_matrix', + 'business_value', + ], + chunk_size=100, + business_value_matrix=[[1, -1], [-1, 1]] + ) + sut.fit(ref_df)