automl · eddiebergman · Apr 16, 2024 · Apr 16, 2024 · Apr 16, 2024 · Apr 16, 2024
diff --git a/src/ConfigSpace/api/types/categorical.py b/src/ConfigSpace/api/types/categorical.py
@@ -5,6 +5,7 @@
 from typing_extensions import TypeAlias
 
 from ConfigSpace.hyperparameters import CategoricalHyperparameter, OrdinalHyperparameter
+from ConfigSpace.types import NotSet, _NotSet
 
 # We only accept these types in `items`
 T: TypeAlias = Union[str, int, float]
@@ -16,7 +17,7 @@ def Categorical(
     name: str,
     items: Sequence[T],
     *,
-    default: T | None = None,
+    default: T | _NotSet = NotSet,
     weights: Sequence[float] | None = None,
     ordered: Literal[False],
     meta: dict | None = None,
@@ -29,7 +30,7 @@ def Categorical(
     name: str,
     items: Sequence[T],
     *,
-    default: T | None = None,
+    default: T | _NotSet = NotSet,
     weights: Sequence[float] | None = None,
     ordered: Literal[True],
     meta: dict | None = None,
@@ -42,7 +43,7 @@ def Categorical(
     name: str,
     items: Sequence[T],
     *,
-    default: T | None = None,
+    default: T | _NotSet = NotSet,
     weights: Sequence[float] | None = None,
     ordered: bool = ...,
     meta: dict | None = None,
@@ -53,7 +54,7 @@ def Categorical(
     name: str,
     items: Sequence[T],
     *,
-    default: T | None = None,
+    default: T | _NotSet = NotSet,
     weights: Sequence[float] | None = None,
     ordered: bool = False,
     meta: dict | None = None,

diff --git a/src/ConfigSpace/conditions.py b/src/ConfigSpace/conditions.py
@@ -37,21 +37,13 @@
 
 import numpy as np
 
-from ConfigSpace.types import f64
+from ConfigSpace.types import NotSet, f64
 
 if TYPE_CHECKING:
     from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter
     from ConfigSpace.types import Array, Mask
 
 
-class _NotSet:
-    def __repr__(self):
-        return "ValueNotSetObject"
-
-
-NotSet = _NotSet()  # Sentinal value for unset values
-
-
 class Condition(ABC):
     def __init__(
         self,

diff --git a/src/ConfigSpace/configuration.py b/src/ConfigSpace/configuration.py
@@ -6,10 +6,9 @@
 
 import numpy as np
 
-from ConfigSpace.conditions import NotSet
 from ConfigSpace.exceptions import IllegalValueError
 from ConfigSpace.hyperparameters import FloatHyperparameter
-from ConfigSpace.types import f64
+from ConfigSpace.types import NotSet, f64
 
 if TYPE_CHECKING:
     from ConfigSpace.configuration_space import ConfigurationSpace

diff --git a/src/ConfigSpace/hyperparameters/_hp_components.py b/src/ConfigSpace/hyperparameters/_hp_components.py
@@ -56,7 +56,7 @@ def __call__(
 @dataclass
 class TransformerSeq(_Transformer[Any]):
     lower_vectorized: ClassVar[i64] = i64(0)
-    seq: Array[Any]
+    seq: Array[Any] | list[Any]  # If `list`, assumed to contain sequence objects
     _lookup: dict[Any, int] | None = field(init=False)
 
     def __post_init__(self) -> None:
@@ -88,18 +88,37 @@ def to_value(self, vector: Array[f64]) -> Array[Any]:
                 f" representation into a value in {self.seq}."
                 f"Expected integers but got {vector} (dtype: {vector.dtype})",
             )
-        indices = np.rint(vector).astype(i64)
-        return self.seq[indices]
+        if isinstance(self.seq, np.ndarray):
+            indices = np.rint(vector).astype(i64)
+            return self.seq[indices]
+
+        items = [self.seq[int(np.rint(i))] for i in vector]
+        if isinstance(self.seq, list):
+            # We have to convert it into a numpy array of objects carefully
+            # https://stackoverflow.com/a/47389566/5332072
+            _v = np.empty(len(items), dtype=object)
+            _v[:] = items
+            return _v
+
+        return np.array(items, dtype=object)
 
     def to_vector(self, value: Array[Any]) -> Array[f64]:
         if self._lookup is not None:
             return np.array([self._lookup[v] for v in value], dtype=f64)
-        return np.flatnonzero(np.isin(self.seq, value)).astype(f64)
+
+        if isinstance(self.seq, np.ndarray):
+            return np.flatnonzero(np.isin(self.seq, value)).astype(f64)
+
+        return np.array([self.seq.index(v) for v in value], dtype=f64)
 
     def legal_value(self, value: Array[Any]) -> Mask:
         if self._lookup is not None:
             return np.array([v in self._lookup for v in value], dtype=np.bool_)
-        return np.isin(value, self.seq)
+
+        if isinstance(self.seq, np.ndarray):
+            return np.isin(value, self.seq)
+
+        return np.array([v in self.seq for v in value], dtype=np.bool_)
 
     def legal_vector(self, vector: Array[f64]) -> Mask:
         return (

diff --git a/src/ConfigSpace/hyperparameters/categorical.py b/src/ConfigSpace/hyperparameters/categorical.py
@@ -3,8 +3,9 @@
 from collections import Counter
 from collections.abc import Hashable, Mapping, Sequence
 from dataclasses import dataclass, field
+from itertools import product
 from typing import TYPE_CHECKING, Any, ClassVar, Set
-from typing_extensions import deprecated
+from typing_extensions import deprecated, override
 
 import numpy as np
 
@@ -14,7 +15,7 @@
 )
 from ConfigSpace.hyperparameters._hp_components import TransformerSeq, _Neighborhood
 from ConfigSpace.hyperparameters.hyperparameter import Hyperparameter
-from ConfigSpace.types import Array, f64
+from ConfigSpace.types import Array, Mask, NotSet, _NotSet, f64
 
 if TYPE_CHECKING:
     from ConfigSpace.types import Array
@@ -85,36 +86,43 @@ class CategoricalHyperparameter(Hyperparameter[Any, Any]):
     meta: Mapping[Hashable, Any] | None
     size: int
 
+    _contains_sequence_as_value: bool
+
     def __init__(
         self,
         name: str,
         choices: Sequence[Any],
-        default_value: Any | None = None,
+        default_value: Any | _NotSet = NotSet,
         meta: Mapping[Hashable, Any] | None = None,
         weights: Sequence[float] | Array[np.number] | None = None,
     ) -> None:
-        # TODO: We can allow for None but we need to be sure it doesn't break
-        # anything elsewhere.
-        if any(choice is None for choice in choices):
-            raise TypeError("Choice 'None' is not supported")
-
         if isinstance(choices, Set):
             raise TypeError(
                 "Using a set of choices is prohibited as it can result in "
                 "non-deterministic behavior. Please use a list or a tuple.",
             )
 
-        # TODO:For now we assume hashable for choices to make the below check with
-        # Counter work. We can probably relax this assumption
         choices = tuple(choices)
-        counter = Counter(choices)
-        for choice, count in counter.items():
-            if count > 1:
-                raise ValueError(
-                    f"Choices for categorical hyperparameters {name} contain"
-                    f" choice `{choice}` {count} times, while only a single oocurence"
-                    " is allowed.",
-                )
+
+        # We first try the fast route if it's Hashable, otherwise we resort to doing
+        # an N^2 check.
+        try:
+            counter = Counter(choices)
+            for choice, count in counter.items():
+                if count > 1:
+                    raise ValueError(
+                        f"Choices for categorical hyperparameters {name} contain"
+                        f" choice `{choice}` {count} times, while only a single"
+                        " occurence is allowed.",
+                    )
+        except TypeError:
+            for a, b in product(choices, choices):
+                if a is not b and a == b:
+                    raise ValueError(  # noqa: B904
+                        f"Choices for categorical hyperparameters {name} contain"
+                        f" choice `{a}` multiple times, while only a single occurence"
+                        " is allowed.",
+                    )
 
         if isinstance(weights, set):
             raise TypeError(
@@ -146,7 +154,7 @@ def __init__(
         else:
             tupled_weights = None
 
-        if default_value is not None and default_value not in choices:
+        if default_value is not NotSet and default_value not in choices:
             raise ValueError(
                 "The default value has to be one of the choices. "
                 f"Got {default_value!r} which is not in {choices}.",
@@ -159,9 +167,9 @@ def __init__(
             _weights = np.asarray(weights, dtype=np.float64)
             probabilities = _weights / np.sum(_weights)
 
-        if default_value is None and weights is None:
+        if default_value is NotSet and weights is None:
             default_value = choices[0]
-        elif default_value is None:
+        elif default_value is NotSet:
             highest_prob_index = np.argmax(probabilities)
             default_value = choices[highest_prob_index]
         elif default_value in choices:
@@ -178,14 +186,29 @@ def __init__(
         else:
             vector_dist = UniformIntegerDistribution(size=size)
 
-        # NOTE: Unfortunatly, numpy will promote number types to str
-        # if there are string types in the array, where we'd rather
-        # stick to object type in that case. Hence the manual...
-        seq_choices = np.asarray(choices)
-        if seq_choices.dtype.kind in {"U", "S"} and not all(
-            isinstance(choice, str) for choice in choices
-        ):
-            seq_choices = np.asarray(choices, dtype=object)
+        try:
+            # This can fail with a ValueError if the choices contain arbitrary objects
+            # that are list like.
+            seq_choices = np.asarray(choices)
+
+            # NOTE: Unfortunatly, numpy will promote number types to str
+            # if there are string types in the array, where we'd rather
+            # stick to object type in that case. Hence the manual...
+            if seq_choices.dtype.kind in {"U", "S"} and not all(
+                isinstance(choice, str) for choice in choices
+            ):
+                seq_choices = np.array(choices, dtype=object)
+
+        except ValueError:
+            seq_choices = list(choices)
+
+        # If the Hyperparameter recieves as a Sequence during legality checks or
+        # conversions, we need to inform it that one of the values is a Sequence itself,
+        # i.e. we should treat it as a single value and not a list of multiple values
+        self._contains_sequence_as_value = any(
+            isinstance(choice, Sequence) and not isinstance(choice, str)
+            for choice in choices
+        )
 
         self.probabilities = probabilities
         self.choices = choices
@@ -236,8 +259,8 @@ def __eq__(self, other: Any) -> bool:
 
         return True
 
-    def _neighborhood_size(self, value: Any | None) -> int:
-        if value is None or value not in self.choices:
+    def _neighborhood_size(self, value: Any | _NotSet) -> int:
+        if value is NotSet or value not in self.choices:
             return self.size
         return self.size - 1
 
@@ -262,3 +285,73 @@ def __str__(self) -> str:
             parts.append(f"Probabilities: {self.probabilities}")
 
         return ", ".join(parts)
+
+    @override
+    def to_vector(self, value: Any | Sequence[Any] | Array[Any]) -> f64 | Array[f64]:
+        if isinstance(value, np.ndarray):
+            return self._transformer.to_vector(value)
+
+        if isinstance(value, str):
+            return self._transformer.to_vector(np.array([value]))[0]
+
+        # Got a sequence of things, could be a list of stuff or a single value which is
+        # itself a list, e.g. a tuple (1, 2) indicating a single value
+        # If we could have single values which are sequences, we need to do some
+        # magic to get it into an array without numpy flattening it down
+        if isinstance(value, Sequence):
+            if self._contains_sequence_as_value:
+                # https://stackoverflow.com/a/47389566/5332072
+                _v = np.empty(1, dtype=object)
+                _v[0] = value
+                return self._transformer.to_vector(_v)[0]
+
+            # A sequence of things containing different values
+            return self._transformer.to_vector(np.asarray(value))
+
+        # Single value that is not a sequence
+        return self._transformer.to_vector(np.array([value]))[0]
+
+    @override
+    def legal_value(self, value: Any | Sequence[Any] | Array[Any]) -> bool | Mask:
+        if isinstance(value, np.ndarray):
+            return self._transformer.legal_value(value)
+
+        if isinstance(value, str):
+            return self._transformer.legal_value(np.array([value]))[0]
+
+        # Got a sequence of things, could be a list of stuff or a single value which is
+        # itself a list, e.g. a tuple (1, 2) indicating a single value
+        # If we could have single values which are sequences, we need to do some
+        # magic to get it into an array without numpy flattening it down
+        if isinstance(value, Sequence):
+            if self._contains_sequence_as_value:
+                # https://stackoverflow.com/a/47389566/5332072
+                _v = np.empty(1, dtype=object)
+                _v[0] = value
+                return self._transformer.legal_value(_v)[0]
+
+            # A sequence of things containing different values
+            return self._transformer.legal_value(np.asarray(value))
+
+        # Single value that is not a sequence
+        return self._transformer.legal_value(np.array([value]))[0]
+
+    @override
+    def pdf_values(self, values: Sequence[Any] | Array[Any]) -> Array[f64]:
+        if isinstance(values, np.ndarray):
+            if values.ndim != 1:
+                raise ValueError("Method pdf expects a one-dimensional numpy array")
+
+            vector = self.to_vector(values)  # type: ignore
+            return self.pdf_vector(vector)
+
+        if self._contains_sequence_as_value:
+            # We have to convert it into a numpy array of objects carefully
+            # https://stackoverflow.com/a/47389566/5332072
+            _v = np.empty(len(values), dtype=object)
+            _v[:] = values
+            _vector: Array[f64] = self.to_vector(_v)  # type: ignore
+            return self.pdf_vector(_vector)
+
+        vector: Array[f64] = self.to_vector(values)  # type: ignore
+        return self.pdf_vector(vector)