Skip to content

Commit d431b9d

Browse files
authored
FIX raise informative error message when SV are all noise (#1016)
1 parent a8e44ae commit d431b9d

File tree

3 files changed

+34
-0
lines changed

3 files changed

+34
-0
lines changed

doc/whats_new/v0.11.rst

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ Bug fixes
1414
they are plugged into an Euclidean distance computation.
1515
:pr:`1014` by :user:`Guillaume Lemaitre <glemaitre>`.
1616

17+
- Raise an informative error message when all support vectors are tagged as noise in
18+
:class:`~imblearn.over_sampling.SVMSMOTE`.
19+
:pr:`1016` by :user:`Guillaume Lemaitre <glemaitre>`.
20+
1721
- Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard
1822
deviation of the continuous features was only computed on the minority class. Now,
1923
we are computing this statistic for each class that is up-sampled.

imblearn/over_sampling/_smote/filter.py

+5
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,11 @@ def _fit_resample(self, X, y):
495495
support_vector = _safe_indexing(
496496
support_vector, np.flatnonzero(np.logical_not(noise_bool))
497497
)
498+
if support_vector.shape[0] == 0:
499+
raise ValueError(
500+
"All support vectors are considered as noise. SVM-SMOTE is not "
501+
"adapted to your dataset. Try another SMOTE variant."
502+
)
498503
danger_bool = self._in_danger_noise(
499504
self.nn_m_, support_vector, class_sample, y, kind="danger"
500505
)

imblearn/over_sampling/_smote/tests/test_svm_smote.py

+25
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import pytest
3+
from sklearn.datasets import make_classification
34
from sklearn.linear_model import LogisticRegression
45
from sklearn.neighbors import NearestNeighbors
56
from sklearn.svm import SVC
@@ -61,3 +62,27 @@ def test_svm_smote_not_svm(data):
6162
err_msg = "`svm_estimator` is required to exposed a `support_` fitted attribute."
6263
with pytest.raises(RuntimeError, match=err_msg):
6364
SVMSMOTE(svm_estimator=LogisticRegression()).fit_resample(*data)
65+
66+
67+
def test_svm_smote_all_noise(data):
68+
"""Check that we raise a proper error message when all support vectors are
69+
detected as noise and there is nothing that we can do.
70+
71+
Non-regression test for:
72+
https://github.com/scikit-learn-contrib/imbalanced-learn/issues/742
73+
"""
74+
X, y = make_classification(
75+
n_classes=3,
76+
class_sep=0.001,
77+
weights=[0.004, 0.451, 0.545],
78+
n_informative=3,
79+
n_redundant=0,
80+
flip_y=0,
81+
n_features=3,
82+
n_clusters_per_class=2,
83+
n_samples=1000,
84+
random_state=10,
85+
)
86+
87+
with pytest.raises(ValueError, match="SVM-SMOTE is not adapted to your dataset"):
88+
SVMSMOTE(k_neighbors=4, random_state=42).fit_resample(X, y)

0 commit comments

Comments
 (0)