From 67be7bd51b433d1bdd09045d10b7f5b749c19712 Mon Sep 17 00:00:00 2001 From: Maximilian Date: Tue, 6 Feb 2024 11:44:38 +0100 Subject: [PATCH 1/2] updated tests --- tests/test_integration_import_all.py | 14 ++++----- .../test_approximator_base_approximator.py | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 tests/tests_approximators/test_approximator_base_approximator.py diff --git a/tests/test_integration_import_all.py b/tests/test_integration_import_all.py index 448ba68c..43439074 100644 --- a/tests/test_integration_import_all.py +++ b/tests/test_integration_import_all.py @@ -1,4 +1,4 @@ -"""This integration test module contains all tests regarding the importability of the package. +"""This integration test module contains all tests regarding the import-ability of the package. (I.e. it checks whether all modules can be imported without errors.)""" import importlib @@ -7,12 +7,12 @@ import pytest import shapiq -import approximator as approximator -import explainer as explainer -import games as games -import utils as utils -import plot as plot -import datasets as datasets +from shapiq import approximator +from shapiq import explainer +from shapiq import games +from shapiq import utils +from shapiq import plot +from shapiq import datasets @pytest.mark.parametrize( diff --git a/tests/tests_approximators/test_approximator_base_approximator.py b/tests/tests_approximators/test_approximator_base_approximator.py new file mode 100644 index 00000000..d1f6e656 --- /dev/null +++ b/tests/tests_approximators/test_approximator_base_approximator.py @@ -0,0 +1,30 @@ +"""This test module contains all tests regarding the base approximator class.""" +import pytest + +from shapiq.approximator._base import Approximator + + +def concreter(abclass): + """Class decorator to create a concrete class from an abstract class. + Directly taken from https://stackoverflow.com/a/37574495.""" + + class concreteCls(abclass): + pass + + concreteCls.__abstractmethods__ = frozenset() + return type("DummyConcrete" + abclass.__name__, (concreteCls,), {}) + + +def test_approximator(): + approx = concreter(Approximator)(n=7, max_order=2, index="SII", top_order=False) + assert approx.n == 7 + assert approx.max_order == 2 + assert approx.index == "SII" + assert approx.top_order is False + + with pytest.raises(NotImplementedError): + approx.approximate(budget=100, game=lambda x: x) + + wrong_index = "something" + with pytest.raises(ValueError): + approx = concreter(Approximator)(n=7, max_order=2, index=wrong_index, top_order=False) From d3545d686dadc6fa00b2d438ae1e825de11650d4 Mon Sep 17 00:00:00 2001 From: Maximilian Date: Tue, 6 Feb 2024 13:50:51 +0100 Subject: [PATCH 2/2] renamed nSII to k-SII and closes #37 --- README.md | 14 +- shapiq/approximator/__init__.py | 4 +- shapiq/approximator/_base.py | 504 +----------------- shapiq/approximator/_config.py | 1 + shapiq/approximator/_interaction_values.py | 145 +++++ shapiq/approximator/_utils.py | 21 + shapiq/approximator/k_sii.py | 167 ++++++ shapiq/approximator/permutation/sii.py | 18 +- shapiq/approximator/permutation/sti.py | 3 +- shapiq/approximator/regression/_base.py | 4 +- shapiq/approximator/regression/fsi.py | 4 +- shapiq/approximator/regression/sii.py | 4 +- shapiq/approximator/sampling.py | 181 +++++++ shapiq/approximator/shapiq/shapiq.py | 15 +- shapiq/explainer/_base.py | 2 +- shapiq/explainer/interaction.py | 15 +- shapiq/explainer/tree.py | 2 +- shapiq/plot/network.py | 2 +- ...st_approximator_base_interaction_values.py | 4 +- .../test_approximator_ksii_estimation.py | 72 +++ .../test_approximator_nsii_estimation.py | 69 --- .../test_approximator_permutation_sii.py | 4 +- .../test_approximator_permutation_sti.py | 2 +- .../test_approximator_regression_fsi.py | 2 +- .../test_approximator_regression_sii.py | 6 +- .../test_approximator_regression_sv.py | 2 +- .../test_approximator_shapiq.py | 2 +- .../test_explainer_interaction.py | 6 +- tests/tests_plots/test_network_plot.py | 4 +- 29 files changed, 656 insertions(+), 623 deletions(-) create mode 100644 shapiq/approximator/_config.py create mode 100644 shapiq/approximator/_interaction_values.py create mode 100644 shapiq/approximator/_utils.py create mode 100644 shapiq/approximator/k_sii.py create mode 100644 shapiq/approximator/sampling.py create mode 100644 tests/tests_approximators/test_approximator_ksii_estimation.py delete mode 100644 tests/tests_approximators/test_approximator_nsii_estimation.py diff --git a/README.md b/README.md index aa4775ba..42a143cc 100644 --- a/README.md +++ b/README.md @@ -62,9 +62,9 @@ You can use `shapiq` in different ways. If you have a trained model you can rely If you are interested in the underlying game theoretic algorithms, then check out the `shapiq.approximator` modules. You can also plot and visualize your interaction scores with `shapiq.plot`. -## 📈 Compute n-SII values +## 📈 Compute k-SII values -Explain your models with Shapley interaction values like the n-SII values: +Explain your models with Shapley interaction values like the k-SII values: ```python # train a model @@ -72,19 +72,19 @@ from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor(n_estimators=50, random_state=42) model.fit(x_train, y_train) -# explain with nSII interaction scores +# explain with k-SII interaction scores from shapiq import InteractionExplainer explainer = InteractionExplainer( model=model.predict, background_data=x_train, - index="nSII", + index="k-SII", max_order=2 ) interaction_values = explainer.explain(x_explain, budget=2000) print(interaction_values) >>> InteractionValues( ->>> index=nSII, max_order=2, min_order=1, estimated=True, estimation_budget=2000, +>>> index=k-SII, max_order=2, min_order=1, estimated=True, estimation_budget=2000, >>> values={ >>> (0,): -91.0403, # main effect for feature 0 >>> (1,): 4.1264, # main effect for feature 1 @@ -110,8 +110,8 @@ attribution scores and interaction scores, respectively. from shapiq.plot import network_plot network_plot( - first_order_values=n_sii_first_order, # first order n-SII values - second_order_values=n_sii_second_order # second order n-SII values + first_order_values=k_sii_first_order, # first order k-SII values + second_order_values=k_sii_second_order # second order k-SII values ) ``` diff --git a/shapiq/approximator/__init__.py b/shapiq/approximator/__init__.py index f05bbeec..b4743192 100644 --- a/shapiq/approximator/__init__.py +++ b/shapiq/approximator/__init__.py @@ -1,5 +1,5 @@ """This module contains the approximators to estimate the Shapley interaction values.""" -from ._base import convert_nsii_into_one_dimension, transforms_sii_to_nsii # TODO add to tests +from .k_sii import convert_ksii_into_one_dimension, transforms_sii_to_ksii from .permutation.sii import PermutationSamplingSII from .permutation.sti import PermutationSamplingSTI from .regression import RegressionSII, RegressionFSI, KernelSHAP @@ -12,6 +12,4 @@ "RegressionFSI", "RegressionSII", "ShapIQ", - "transforms_sii_to_nsii", - "convert_nsii_into_one_dimension", ] diff --git a/shapiq/approximator/_base.py b/shapiq/approximator/_base.py index 18bd0130..d0708d4a 100644 --- a/shapiq/approximator/_base.py +++ b/shapiq/approximator/_base.py @@ -1,162 +1,18 @@ """This module contains the base approximator classes for the shapiq package.""" -import copy from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Callable, Optional, Union +from typing import Callable, Optional import numpy as np -from scipy.special import binom, bernoulli -from utils import get_explicit_subsets, powerset, split_subsets_budget - -AVAILABLE_INDICES = {"SII", "nSII", "STI", "FSI", "SV"} +from approximator._config import AVAILABLE_INDICES +from approximator._interaction_values import InteractionValues +from approximator._utils import _generate_interaction_lookup __all__ = [ - "InteractionValues", "Approximator", - "ShapleySamplingMixin", - "NShapleyMixin", - "transforms_sii_to_nsii", - "convert_nsii_into_one_dimension", ] -@dataclass -class InteractionValues: - """This class contains the interaction values as estimated by an approximator. - - Attributes: - values: The interaction values of the model in vectorized form. - index: The interaction index estimated. Available indices are 'SII', 'nSII', 'STI', and - 'FSI'. - max_order: The order of the approximation. - min_order: The minimum order of the approximation. - n_players: The number of players. - interaction_lookup: A dictionary that maps interactions to their index in the values - vector. If `interaction_lookup` is not provided, it is computed from the `n_players`, - `min_order`, and `max_order` parameters. Defaults to `None`. - estimated: Whether the interaction values are estimated or not. Defaults to `True`. - estimation_budget: The budget used for the estimation. Defaults to `None`. - """ - - values: np.ndarray[float] - index: str - max_order: int - min_order: int - n_players: int - interaction_lookup: dict[tuple[int], int] = None - estimated: bool = True - estimation_budget: Optional[int] = None - - def __post_init__(self) -> None: - """Checks if the index is valid.""" - if self.index not in AVAILABLE_INDICES: - raise ValueError( - f"Index {self.index} is not valid. " f"Available indices are {AVAILABLE_INDICES}." - ) - if self.interaction_lookup is None: - self.interaction_lookup = _generate_interaction_lookup( - self.n_players, self.min_order, self.max_order - ) - - def __repr__(self) -> str: - """Returns the representation of the InteractionValues object.""" - representation = "InteractionValues(\n" - representation += ( - f" index={self.index}, max_order={self.max_order}, min_order={self.min_order}" - f", estimated={self.estimated}, estimation_budget={self.estimation_budget},\n" - ) + " values={\n" - for interaction in powerset( - set(range(self.n_players)), min_size=1, max_size=self.max_order - ): - representation += f" {interaction}: " - interaction_value = str(round(self[interaction], 4)) - interaction_value = interaction_value.replace("-0.0", "0.0").replace(" 0.0", " 0") - interaction_value = interaction_value.replace("0.0 ", "0 ") - representation += f"{interaction_value},\n" - representation = representation[:-2] # remove last "," and add closing bracket - representation += "\n }\n)" - return representation - - def __str__(self) -> str: - """Returns the string representation of the InteractionValues object.""" - return self.__repr__() - - def __getitem__(self, item: tuple[int, ...]) -> float: - """Returns the score for the given interaction. - - Args: - item: The interaction for which to return the score. - - Returns: - The interaction value. - """ - item = tuple(sorted(item)) - return float(self.values[self.interaction_lookup[item]]) - - def __eq__(self, other: object) -> bool: - """Checks if two InteractionValues objects are equal. - - Args: - other: The other InteractionValues object. - - Returns: - True if the two objects are equal, False otherwise. - """ - if not isinstance(other, InteractionValues): - raise TypeError("Cannot compare InteractionValues with other types.") - if ( - self.index != other.index - or self.max_order != other.max_order - or self.n_players != other.n_players - ): - return False - if not np.allclose(self.values, other.values): - return False - return True - - def __ne__(self, other: object) -> bool: - """Checks if two InteractionValues objects are not equal. - - Args: - other: The other InteractionValues object. - - Returns: - True if the two objects are not equal, False otherwise. - """ - return not self.__eq__(other) - - def __hash__(self) -> int: - """Returns the hash of the InteractionValues object.""" - return hash((self.index, self.max_order, tuple(self.values.flatten()))) - - def __copy__(self) -> "InteractionValues": - """Returns a copy of the InteractionValues object.""" - return InteractionValues( - values=copy.deepcopy(self.values), - index=self.index, - max_order=self.max_order, - estimated=self.estimated, - estimation_budget=self.estimation_budget, - n_players=self.n_players, - interaction_lookup=copy.deepcopy(self.interaction_lookup), - min_order=self.min_order, - ) - - def __deepcopy__(self, memo) -> "InteractionValues": - """Returns a deep copy of the InteractionValues object.""" - return InteractionValues( - values=copy.deepcopy(self.values), - index=self.index, - max_order=self.max_order, - estimated=self.estimated, - estimation_budget=self.estimation_budget, - n_players=self.n_players, - interaction_lookup=copy.deepcopy(self.interaction_lookup), - min_order=self.min_order, - ) - - class Approximator(ABC): """This class is the base class for all approximators. @@ -167,7 +23,7 @@ class Approximator(ABC): Args: n: The number of players. max_order: The interaction order of the approximation. - index: The interaction index to be estimated. Available indices are 'SII', 'nSII', 'STI', + index: The interaction index to be estimated. Available indices are 'SII', 'kSII', 'STI', and 'FSI'. top_order: If True, the approximation is performed only for the top order interactions. If False, the approximation is performed for all orders up to the specified order. @@ -269,7 +125,7 @@ def _finalize_result( result: The result dictionary. estimated: Whether the interaction values are estimated or not. Defaults to True. budget: The budget used for the estimation. Defaults to None. - index: The interaction index estimated. Available indices are 'SII', 'nSII', 'STI', and + index: The interaction index estimated. Available indices are 'SII', 'kSII', 'STI', and 'FSI'. Defaults to None (i.e., the index of the approximator is used). Returns: @@ -350,351 +206,3 @@ def __hash__(self) -> int: @property def interaction_lookup(self): return self._interaction_lookup - - -class ShapleySamplingMixin(ABC): - """Mixin class for the computation of Shapley weights. - - Provides the common functionality for regression-based approximators like - :class:`~shapiq.approximators.RegressionFSI`. The class offers computation of Shapley weights - and the corresponding sampling weights for the KernelSHAP-like estimation approaches. - """ - - def _init_ksh_sampling_weights( - self: Union[Approximator, "ShapleySamplingMixin"] - ) -> np.ndarray[float]: - """Initializes the weights for sampling subsets. - - The sampling weights are of size n + 1 and indexed by the size of the subset. The edges - (the first, empty coalition, and the last element, full coalition) are set to 0. - - Returns: - The weights for sampling subsets of size s in shape (n + 1,). - """ - - weight_vector = np.zeros(shape=self.n - 1, dtype=float) - for subset_size in range(1, self.n): - weight_vector[subset_size - 1] = (self.n - 1) / (subset_size * (self.n - subset_size)) - sampling_weight = (np.asarray([0] + [*weight_vector] + [0])) / sum(weight_vector) - return sampling_weight - - def _get_ksh_subset_weights( - self: Union[Approximator, "ShapleySamplingMixin"], subsets: np.ndarray[bool] - ) -> np.ndarray[float]: - """Computes the KernelSHAP regression weights for the given subsets. - - The weights for the subsets of size s are set to ksh_weights[s] / binom(n, s). The weights - for the empty and full sets are set to a big number. - - Args: - subsets: one-hot matrix of subsets for which to compute the weights in shape - (n_subsets, n). - - Returns: - The KernelSHAP regression weights in shape (n_subsets,). - """ - # set the weights for each subset to ksh_weights[|S|] / binom(n, |S|) - ksh_weights = self._init_ksh_sampling_weights() # indexed by subset size - subset_sizes = np.sum(subsets, axis=1) - weights = ksh_weights[subset_sizes] # set the weights for each subset size - weights /= binom(self.n, subset_sizes) # divide by the number of subsets of the same size - - # set the weights for the empty and full sets to big M - weights[np.logical_not(subsets).all(axis=1)] = float(1_000_000) - weights[subsets.all(axis=1)] = float(1_000_000) - return weights - - def _sample_subsets( - self: Union[Approximator, "ShapleySamplingMixin"], - budget: int, - sampling_weights: np.ndarray[float], - replacement: bool = False, - pairing: bool = True, - ) -> np.ndarray[bool]: - """Samples subsets with the given budget. - - Args: - budget: budget for the sampling. - sampling_weights: weights for sampling subsets of certain sizes and indexed by the size. - The shape is expected to be (n + 1,). A size that is not to be sampled has weight 0. - pairing: whether to use pairing (`True`) sampling or not (`False`). Defaults to `False`. - - Returns: - sampled subsets. - """ - # sanitize input parameters - sampling_weights = copy.copy(sampling_weights) - sampling_weights /= np.sum(sampling_weights) - - # adjust budget for paired sampling - if pairing: - budget = budget - budget % 2 # must be even for pairing - budget = int(budget / 2) - - # create storage array for given budget - subset_matrix = np.zeros(shape=(budget, self.n), dtype=bool) - - # sample subsets - sampled_sizes = self._rng.choice(self.N_arr, size=budget, p=sampling_weights).astype(int) - if replacement: # sample subsets with replacement - permutations = np.tile(np.arange(self.n), (budget, 1)) - self._rng.permuted(permutations, axis=1, out=permutations) - for i, subset_size in enumerate(sampled_sizes): - subset = permutations[i, :subset_size] - subset_matrix[i, subset] = True - else: # sample subsets without replacement - sampled_subsets, n_sampled = set(), 0 # init sampling variables - while n_sampled < budget: - subset_size = sampled_sizes[n_sampled] - subset = tuple(sorted(self._rng.choice(np.arange(0, self.n), size=subset_size))) - sampled_subsets.add(subset) - if len(sampled_subsets) != n_sampled: # subset was not already sampled - subset_matrix[n_sampled, subset] = True - n_sampled += 1 # continue sampling - - if pairing: - subset_matrix = np.repeat(subset_matrix, repeats=2, axis=0) # extend the subset matrix - subset_matrix[1::2] = np.logical_not(subset_matrix[1::2]) # flip sign of paired subsets - - return subset_matrix - - def _generate_shapley_dataset( - self: Union[Approximator, "ShapleySamplingMixin"], - budget: int, - pairing: bool = True, - replacement: bool = False, - ) -> tuple[np.ndarray[bool], bool, int]: - """Generates the two-part dataset containing explicit and sampled subsets. - - The first part of the dataset contains all explicit subsets. The second half contains the - sampled subsets. The parts can be determined by the `n_explicit_subsets` parameter. - - Args: - budget: The budget for the approximation (i.e., the number of allowed game evaluations). - pairing: Whether to use pairwise sampling (`True`) or not (`False`). Defaults to `True`. - Paired sampling can increase the approximation quality. - replacement: Whether to sample with replacement (`True`) or without replacement - (`False`). Defaults to `False`. - - Returns: - - The dataset containing explicit and sampled subsets. The dataset is a 2D array of - shape (n_subsets, n_players) where each row is a subset. - - A flag indicating whether the approximation is estimated (`True`) or exact (`False`). - - The number of explicit subsets. - """ - estimation_flag = True - # create storage array for given budget - all_subsets: np.ndarray[bool] = np.zeros(shape=(budget, self.n), dtype=bool) - n_subsets = 0 - # split the subset sizes into explicit and sampling parts - sampling_weights: np.ndarray[float] = self._init_ksh_sampling_weights() - explicit_sizes, sampling_sizes, remaining_budget = split_subsets_budget( - order=1, n=self.n, budget=budget, sampling_weights=sampling_weights - ) - # enumerate all explicit subsets - explicit_subsets: np.ndarray[bool] = get_explicit_subsets(self.n, explicit_sizes) - n_explicit_subsets = explicit_subsets.shape[0] - all_subsets[:n_explicit_subsets] = explicit_subsets - n_subsets += n_explicit_subsets - sampling_weights[explicit_sizes] = 0.0 # zero out sampling weights for explicit sizes - # sample the remaining subsets with the remaining budget - if len(sampling_sizes) > 0: - if remaining_budget > 0: - sampling_subsets: np.ndarray[bool] = self._sample_subsets( - budget=remaining_budget, - sampling_weights=sampling_weights, - replacement=replacement, - pairing=pairing, - ) - n_subsets += sampling_subsets.shape[0] - all_subsets[n_explicit_subsets:n_subsets] = sampling_subsets - all_subsets = all_subsets[:n_subsets] # remove unnecessary rows - else: - estimation_flag = False # no sampling needed computation is exact - all_subsets = all_subsets[:n_explicit_subsets] # remove unnecessary rows - # add empty and full set to all_subsets in the beginning - all_subsets = np.concatenate( - ( - np.zeros(shape=(1, self.n), dtype=bool), # empty set - np.ones(shape=(1, self.n), dtype=bool), # full set - all_subsets, # explicit and sampled subsets - ) - ) - n_explicit_subsets += 2 # add empty and full set - return all_subsets, estimation_flag, n_explicit_subsets - - -class NShapleyMixin: - """Mixin class for the computation of n-Shapley values from SII estimators. - - Provides the common functionality for SII-based approximators like `PermutationSamplingSII` or - `ShapIQ` for SII to transform their interaction scores into nSII values. The nSII values are - proposed in this `paper`_. - """ - - def transforms_sii_to_nsii( - self: Approximator, - sii_values: Union[np.ndarray[float], InteractionValues], - ) -> Union[np.ndarray[float], InteractionValues]: - """Transforms the SII values into nSII values. - - Args: - sii_values: The SII values to transform. Can be either a numpy array or an - InteractionValues object. The output will be of the same type. - - Returns: - The nSII values in the same format as the input. - """ - return transforms_sii_to_nsii( - sii_values=sii_values, - approximator=self, - ) - - -def transforms_sii_to_nsii( - sii_values: Union[np.ndarray[float], InteractionValues], - *, - approximator: Optional[Approximator] = None, - n: Optional[int] = None, - max_order: Optional[int] = None, - interaction_lookup: Optional[dict] = None, -) -> Union[np.ndarray[float], InteractionValues]: - """Transforms the SII values into nSII values. - - Args: - sii_values: The SII values to transform. Can be either a numpy array or an - InteractionValues object. The output will be of the same type. - approximator: The approximator used to estimate the SII values. If provided, meta - information for the transformation is taken from the approximator. Defaults to None. - n: The number of players. Required if `approximator` is not provided. Defaults to None. - max_order: The maximum order of the approximation. Required if `approximator` is not - provided. Defaults to None. - interaction_lookup: A dictionary that maps interactions to their index in the values - vector. If `interaction_lookup` is not provided, it is computed from the `n_players` - and the `max_order` parameters. Defaults to `None`. - - Returns: - The nSII values in the same format as the input. - """ - if isinstance(sii_values, InteractionValues): - n_sii_values = _calculate_nsii_from_sii( - sii_values.values, - sii_values.n_players, - sii_values.max_order, - sii_values.interaction_lookup, - ) - return InteractionValues( - values=n_sii_values, - index="nSII", - max_order=sii_values.max_order, - min_order=sii_values.min_order, - n_players=sii_values.n_players, - interaction_lookup=sii_values.interaction_lookup, - estimated=sii_values.estimated, - estimation_budget=sii_values.estimation_budget, - ) - elif approximator is not None: - return _calculate_nsii_from_sii( - sii_values, approximator.n, approximator.max_order, approximator.interaction_lookup - ) - elif n is not None and max_order is not None: - if interaction_lookup is None: - interaction_lookup = _generate_interaction_lookup(n, 1, max_order) - return _calculate_nsii_from_sii(sii_values, n, max_order, interaction_lookup) - else: - raise ValueError( - "If the SII values are not provided as InteractionValues, the approximator " - "or the number of players and the maximum order of the approximation must be " - "provided." - ) - - -def _calculate_nsii_from_sii( - sii_values: np.ndarray[float], - n: int, - max_order: int, - interaction_lookup: Optional[dict] = None, -) -> np.ndarray[float]: - """Calculates the nSII values from the SII values. - - Args: - sii_values: The SII values to transform. - n: The number of players. - max_order: The maximum order of the approximation. - interaction_lookup: A dictionary that maps interactions to their index in the values - vector. If `interaction_lookup` is not provided, it is computed from the `n_players`, - `min_order`, and `max_order` parameters. Defaults to `None`. - - Returns: - The nSII values. - """ - # compute nSII values from SII values - bernoulli_numbers = bernoulli(max_order) - nsii_values = np.zeros_like(sii_values) - # all subsets S with 1 <= |S| <= max_order - for subset in powerset(set(range(n)), min_size=1, max_size=max_order): - interaction_index = interaction_lookup[subset] - interaction_size = len(subset) - n_sii_value = sii_values[interaction_index] - # go over all subsets T of length |S| + 1, ..., n that contain S - for T in powerset(set(range(n)), min_size=interaction_size + 1, max_size=max_order): - if set(subset).issubset(T): - effect_index = interaction_lookup[T] # get the index of T - effect_value = sii_values[effect_index] # get the effect of T - bernoulli_factor = bernoulli_numbers[len(T) - interaction_size] - n_sii_value += bernoulli_factor * effect_value - nsii_values[interaction_index] = n_sii_value - return nsii_values - - -def convert_nsii_into_one_dimension( - n_sii_values: InteractionValues, -) -> tuple[np.ndarray[float], np.ndarray[float]]: - """Converts the nSII values into one-dimensional values. - - Args: - n_sii_values: The nSII values to convert. - - Returns: - The positive and negative one-dimensional values. - """ - if n_sii_values.index != "nSII": - raise ValueError( - "Only nSII values can be converted into one-dimensional nSII values. Please use the " - "transforms_sii_to_nsii method to convert SII values into nSII values." - ) - max_order = n_sii_values.max_order - min_order = n_sii_values.min_order - n = n_sii_values.n_players - - pos_nsii_values = np.zeros(shape=(n,), dtype=float) - neg_nsii_values = np.zeros(shape=(n,), dtype=float) - - for subset in powerset(set(range(n)), min_size=min_order, max_size=max_order): - n_sii_value = n_sii_values[subset] / len(subset) # distribute uniformly - for player in subset: - if n_sii_value >= 0: - pos_nsii_values[player] += n_sii_value - else: - neg_nsii_values[player] += n_sii_value - return pos_nsii_values, neg_nsii_values - - -def _generate_interaction_lookup(n: int, min_order: int, max_order: int) -> dict[tuple[int], int]: - """Generates a lookup dictionary for interactions. - - Args: - n: The number of players. - min_order: The minimum order of the approximation. - max_order: The maximum order of the approximation. - - Returns: - A dictionary that maps interactions to their index in the values vector. - """ - interaction_lookup = { - interaction: i - for i, interaction in enumerate( - powerset(set(range(n)), min_size=min_order, max_size=max_order) - ) - } - return interaction_lookup diff --git a/shapiq/approximator/_config.py b/shapiq/approximator/_config.py new file mode 100644 index 00000000..a9ac57c2 --- /dev/null +++ b/shapiq/approximator/_config.py @@ -0,0 +1 @@ +AVAILABLE_INDICES = {"k-SII", "SII", "STI", "FSI", "SV"} diff --git a/shapiq/approximator/_interaction_values.py b/shapiq/approximator/_interaction_values.py new file mode 100644 index 00000000..da34b1bd --- /dev/null +++ b/shapiq/approximator/_interaction_values.py @@ -0,0 +1,145 @@ +import copy +from dataclasses import dataclass +from typing import Optional + +import numpy as np + +from approximator._utils import _generate_interaction_lookup +from approximator._config import AVAILABLE_INDICES +from shapiq.utils import powerset + + +@dataclass +class InteractionValues: + """This class contains the interaction values as estimated by an approximator. + + Attributes: + values: The interaction values of the model in vectorized form. + index: The interaction index estimated. Available indices are 'SII', 'kSII', 'STI', and + 'FSI'. + max_order: The order of the approximation. + min_order: The minimum order of the approximation. + n_players: The number of players. + interaction_lookup: A dictionary that maps interactions to their index in the values + vector. If `interaction_lookup` is not provided, it is computed from the `n_players`, + `min_order`, and `max_order` parameters. Defaults to `None`. + estimated: Whether the interaction values are estimated or not. Defaults to `True`. + estimation_budget: The budget used for the estimation. Defaults to `None`. + """ + + values: np.ndarray[float] + index: str + max_order: int + min_order: int + n_players: int + interaction_lookup: dict[tuple[int], int] = None + estimated: bool = True + estimation_budget: Optional[int] = None + + def __post_init__(self) -> None: + """Checks if the index is valid.""" + if self.index not in AVAILABLE_INDICES: + raise ValueError( + f"Index {self.index} is not valid. " f"Available indices are {AVAILABLE_INDICES}." + ) + if self.interaction_lookup is None: + self.interaction_lookup = _generate_interaction_lookup( + self.n_players, self.min_order, self.max_order + ) + + def __repr__(self) -> str: + """Returns the representation of the InteractionValues object.""" + representation = "InteractionValues(\n" + representation += ( + f" index={self.index}, max_order={self.max_order}, min_order={self.min_order}" + f", estimated={self.estimated}, estimation_budget={self.estimation_budget},\n" + ) + " values={\n" + for interaction in powerset( + set(range(self.n_players)), min_size=1, max_size=self.max_order + ): + representation += f" {interaction}: " + interaction_value = str(round(self[interaction], 4)) + interaction_value = interaction_value.replace("-0.0", "0.0").replace(" 0.0", " 0") + interaction_value = interaction_value.replace("0.0 ", "0 ") + representation += f"{interaction_value},\n" + representation = representation[:-2] # remove last "," and add closing bracket + representation += "\n }\n)" + return representation + + def __str__(self) -> str: + """Returns the string representation of the InteractionValues object.""" + return self.__repr__() + + def __getitem__(self, item: tuple[int, ...]) -> float: + """Returns the score for the given interaction. + + Args: + item: The interaction for which to return the score. + + Returns: + The interaction value. + """ + item = tuple(sorted(item)) + return float(self.values[self.interaction_lookup[item]]) + + def __eq__(self, other: object) -> bool: + """Checks if two InteractionValues objects are equal. + + Args: + other: The other InteractionValues object. + + Returns: + True if the two objects are equal, False otherwise. + """ + if not isinstance(other, InteractionValues): + raise TypeError("Cannot compare InteractionValues with other types.") + if ( + self.index != other.index + or self.max_order != other.max_order + or self.n_players != other.n_players + ): + return False + if not np.allclose(self.values, other.values): + return False + return True + + def __ne__(self, other: object) -> bool: + """Checks if two InteractionValues objects are not equal. + + Args: + other: The other InteractionValues object. + + Returns: + True if the two objects are not equal, False otherwise. + """ + return not self.__eq__(other) + + def __hash__(self) -> int: + """Returns the hash of the InteractionValues object.""" + return hash((self.index, self.max_order, tuple(self.values.flatten()))) + + def __copy__(self) -> "InteractionValues": + """Returns a copy of the InteractionValues object.""" + return InteractionValues( + values=copy.deepcopy(self.values), + index=self.index, + max_order=self.max_order, + estimated=self.estimated, + estimation_budget=self.estimation_budget, + n_players=self.n_players, + interaction_lookup=copy.deepcopy(self.interaction_lookup), + min_order=self.min_order, + ) + + def __deepcopy__(self, memo) -> "InteractionValues": + """Returns a deep copy of the InteractionValues object.""" + return InteractionValues( + values=copy.deepcopy(self.values), + index=self.index, + max_order=self.max_order, + estimated=self.estimated, + estimation_budget=self.estimation_budget, + n_players=self.n_players, + interaction_lookup=copy.deepcopy(self.interaction_lookup), + min_order=self.min_order, + ) diff --git a/shapiq/approximator/_utils.py b/shapiq/approximator/_utils.py new file mode 100644 index 00000000..bf29f552 --- /dev/null +++ b/shapiq/approximator/_utils.py @@ -0,0 +1,21 @@ +from shapiq.utils import powerset + + +def _generate_interaction_lookup(n: int, min_order: int, max_order: int) -> dict[tuple[int], int]: + """Generates a lookup dictionary for interactions. + + Args: + n: The number of players. + min_order: The minimum order of the approximation. + max_order: The maximum order of the approximation. + + Returns: + A dictionary that maps interactions to their index in the values vector. + """ + interaction_lookup = { + interaction: i + for i, interaction in enumerate( + powerset(set(range(n)), min_size=min_order, max_size=max_order) + ) + } + return interaction_lookup diff --git a/shapiq/approximator/k_sii.py b/shapiq/approximator/k_sii.py new file mode 100644 index 00000000..c6ca65f8 --- /dev/null +++ b/shapiq/approximator/k_sii.py @@ -0,0 +1,167 @@ +"""This module provides the functionality to transform SII values into k-SII values.""" +from typing import Union, Optional + +import numpy as np +from scipy.special import bernoulli + +from approximator._base import Approximator +from approximator._utils import _generate_interaction_lookup + +from approximator._interaction_values import InteractionValues +from shapiq.utils import powerset + + +class KShapleyMixin: + """Mixin class for the computation of k-Shapley values from SII estimators. + + Provides the common functionality for SII-based approximators like `PermutationSamplingSII` or + `ShapIQ` for SII to transform their interaction scores into nSII values. The k-SII values are + proposed in this `paper`_. + """ + + def transforms_sii_to_ksii( + self: Approximator, + sii_values: Union[np.ndarray[float], InteractionValues], + ) -> Union[np.ndarray[float], InteractionValues]: + """Transforms the SII values into k-SII values. + + Args: + sii_values: The SII values to transform. Can be either a numpy array or an + InteractionValues object. The output will be of the same type. + + Returns: + The k-SII values in the same format as the input. + """ + return transforms_sii_to_ksii( + sii_values=sii_values, + approximator=self, + ) + + +def transforms_sii_to_ksii( + sii_values: Union[np.ndarray[float], InteractionValues], + *, + approximator: Optional[Approximator] = None, + n: Optional[int] = None, + max_order: Optional[int] = None, + interaction_lookup: Optional[dict] = None, +) -> Union[np.ndarray[float], InteractionValues]: + """Transforms the SII values into k-SII values. + + Args: + sii_values: The SII values to transform. Can be either a numpy array or an + InteractionValues object. The output will be of the same type. + approximator: The approximator used to estimate the SII values. If provided, meta + information for the transformation is taken from the approximator. Defaults to None. + n: The number of players. Required if `approximator` is not provided. Defaults to None. + max_order: The maximum order of the approximation. Required if `approximator` is not + provided. Defaults to None. + interaction_lookup: A dictionary that maps interactions to their index in the values + vector. If `interaction_lookup` is not provided, it is computed from the `n_players` + and the `max_order` parameters. Defaults to `None`. + + Returns: + The k-SII values in the same format as the input. + """ + if isinstance(sii_values, InteractionValues): + ksii_values = _calculate_ksii_from_sii( + sii_values.values, + sii_values.n_players, + sii_values.max_order, + sii_values.interaction_lookup, + ) + return InteractionValues( + values=ksii_values, + index="k-SII", + max_order=sii_values.max_order, + min_order=sii_values.min_order, + n_players=sii_values.n_players, + interaction_lookup=sii_values.interaction_lookup, + estimated=sii_values.estimated, + estimation_budget=sii_values.estimation_budget, + ) + elif approximator is not None: + return _calculate_ksii_from_sii( + sii_values, approximator.n, approximator.max_order, approximator.interaction_lookup + ) + elif n is not None and max_order is not None: + if interaction_lookup is None: + interaction_lookup = _generate_interaction_lookup(n, 1, max_order) + return _calculate_ksii_from_sii(sii_values, n, max_order, interaction_lookup) + else: + raise ValueError( + "If the SII values are not provided as InteractionValues, the approximator " + "or the number of players and the maximum order of the approximation must be " + "provided." + ) + + +def _calculate_ksii_from_sii( + sii_values: np.ndarray[float], + n: int, + max_order: int, + interaction_lookup: Optional[dict] = None, +) -> np.ndarray[float]: + """Calculates the k-SII values from the SII values. + + Args: + sii_values: The SII values to transform. + n: The number of players. + max_order: The maximum order of the approximation. + interaction_lookup: A dictionary that maps interactions to their index in the values + vector. If `interaction_lookup` is not provided, it is computed from the `n_players`, + `min_order`, and `max_order` parameters. Defaults to `None`. + + Returns: + The nSII values. + """ + # compute nSII values from SII values + bernoulli_numbers = bernoulli(max_order) + nsii_values = np.zeros_like(sii_values) + # all subsets S with 1 <= |S| <= max_order + for subset in powerset(set(range(n)), min_size=1, max_size=max_order): + interaction_index = interaction_lookup[subset] + interaction_size = len(subset) + ksii_value = sii_values[interaction_index] + # go over all subsets T of length |S| + 1, ..., n that contain S + for T in powerset(set(range(n)), min_size=interaction_size + 1, max_size=max_order): + if set(subset).issubset(T): + effect_index = interaction_lookup[T] # get the index of T + effect_value = sii_values[effect_index] # get the effect of T + bernoulli_factor = bernoulli_numbers[len(T) - interaction_size] + ksii_value += bernoulli_factor * effect_value + nsii_values[interaction_index] = ksii_value + return nsii_values + + +def convert_ksii_into_one_dimension( + ksii_values: InteractionValues, +) -> tuple[np.ndarray[float], np.ndarray[float]]: + """Converts the k-SII values into one-dimensional values. + + Args: + ksii_values: The k-SII values to convert. + + Returns: + The positive and negative one-dimensional values. + """ + if ksii_values.index != "k-SII": + raise ValueError( + "Only nSII values can be converted into one-dimensional k-SII values. Please use the " + "transforms_sii_to_ksii method to convert SII values into k-SII values." + ) + max_order = ksii_values.max_order + min_order = ksii_values.min_order + n = ksii_values.n_players + + pos_ksii_values = np.zeros(shape=(n,), dtype=float) + neg_ksii_values = np.zeros(shape=(n,), dtype=float) + + for subset in powerset(set(range(n)), min_size=min_order, max_size=max_order): + ksii_value = ksii_values[subset] / len(subset) # distribute uniformly + for player in subset: + if ksii_value >= 0: + pos_ksii_values[player] += ksii_value + else: + neg_ksii_values[player] += ksii_value + return pos_ksii_values, neg_ksii_values diff --git a/shapiq/approximator/permutation/sii.py b/shapiq/approximator/permutation/sii.py index 09b527b7..2e7933dd 100644 --- a/shapiq/approximator/permutation/sii.py +++ b/shapiq/approximator/permutation/sii.py @@ -1,13 +1,15 @@ -"""This module implements the Permutation Sampling approximator for the SII (and nSII) index.""" +"""This module implements the Permutation Sampling approximator for the SII (and k-SII) index.""" from typing import Callable, Optional import numpy as np -from approximator._base import Approximator, InteractionValues, NShapleyMixin +from approximator._base import Approximator +from approximator.k_sii import KShapleyMixin +from approximator._interaction_values import InteractionValues from utils import powerset -class PermutationSamplingSII(Approximator, NShapleyMixin): - """Permutation Sampling approximator for the SII (and nSII) index. +class PermutationSamplingSII(Approximator, KShapleyMixin): + """Permutation Sampling approximator for the SII (and k-SII) index. Args: n: The number of players. @@ -60,8 +62,8 @@ def __init__( top_order: bool = False, random_state: Optional[int] = None, ) -> None: - if index not in ["SII", "nSII"]: - raise ValueError(f"Invalid index {index}. Must be either 'SII' or 'nSII'.") + if index not in ["SII", "k-SII"]: + raise ValueError(f"Invalid index {index}. Must be either 'SII' or 'k-SII'.") super().__init__(n, max_order, index, top_order, random_state) self.iteration_cost: int = self._compute_iteration_cost() @@ -153,7 +155,7 @@ def approximate( # compute mean of interactions result = np.divide(result, counts, out=result, where=counts != 0) - if self.index == "nSII": - result: np.ndarray[float] = self.transforms_sii_to_nsii(result) + if self.index == "k-SII": + result: np.ndarray[float] = self.transforms_sii_to_ksii(result) return self._finalize_result(result, budget=used_budget, estimated=True) diff --git a/shapiq/approximator/permutation/sti.py b/shapiq/approximator/permutation/sti.py index 2a75b85d..fafcc362 100644 --- a/shapiq/approximator/permutation/sti.py +++ b/shapiq/approximator/permutation/sti.py @@ -3,7 +3,8 @@ from typing import Callable, Optional import numpy as np -from approximator._base import Approximator, InteractionValues +from approximator._base import Approximator +from approximator._interaction_values import InteractionValues from scipy.special import binom from utils import get_explicit_subsets, powerset diff --git a/shapiq/approximator/regression/_base.py b/shapiq/approximator/regression/_base.py index 9623145a..33e9d849 100644 --- a/shapiq/approximator/regression/_base.py +++ b/shapiq/approximator/regression/_base.py @@ -2,7 +2,9 @@ from typing import Callable, Optional import numpy as np -from approximator._base import Approximator, InteractionValues, ShapleySamplingMixin +from approximator._base import Approximator +from approximator.sampling import ShapleySamplingMixin +from approximator._interaction_values import InteractionValues from scipy.special import binom, bernoulli from utils import powerset, get_explicit_subsets diff --git a/shapiq/approximator/regression/fsi.py b/shapiq/approximator/regression/fsi.py index a0d888ad..01035838 100644 --- a/shapiq/approximator/regression/fsi.py +++ b/shapiq/approximator/regression/fsi.py @@ -2,10 +2,10 @@ from typing import Optional from ._base import Regression -from .._base import NShapleyMixin +from ..k_sii import KShapleyMixin -class RegressionFSI(Regression, NShapleyMixin): +class RegressionFSI(Regression, KShapleyMixin): """Estimates the FSI values [1] using the weighted least square approach. Args: diff --git a/shapiq/approximator/regression/sii.py b/shapiq/approximator/regression/sii.py index 7a7a8535..39b248e3 100644 --- a/shapiq/approximator/regression/sii.py +++ b/shapiq/approximator/regression/sii.py @@ -2,10 +2,10 @@ from typing import Optional from ._base import Regression -from .._base import NShapleyMixin +from ..k_sii import KShapleyMixin -class RegressionSII(Regression, NShapleyMixin): +class RegressionSII(Regression, KShapleyMixin): """Estimates the SII values using the weighted least square approach. Args: diff --git a/shapiq/approximator/sampling.py b/shapiq/approximator/sampling.py new file mode 100644 index 00000000..34320e82 --- /dev/null +++ b/shapiq/approximator/sampling.py @@ -0,0 +1,181 @@ +import copy +from abc import ABC +from typing import Union + +import numpy as np +from scipy.special import binom + +from approximator._base import Approximator +from shapiq.utils import split_subsets_budget, get_explicit_subsets + + +class ShapleySamplingMixin(ABC): + """Mixin class for the computation of Shapley weights. + + Provides the common functionality for regression-based approximators like + :class:`~shapiq.approximators.RegressionFSI`. The class offers computation of Shapley weights + and the corresponding sampling weights for the KernelSHAP-like estimation approaches. + """ + + def _init_ksh_sampling_weights( + self: Union[Approximator, "ShapleySamplingMixin"] + ) -> np.ndarray[float]: + """Initializes the weights for sampling subsets. + + The sampling weights are of size n + 1 and indexed by the size of the subset. The edges + (the first, empty coalition, and the last element, full coalition) are set to 0. + + Returns: + The weights for sampling subsets of size s in shape (n + 1,). + """ + + weight_vector = np.zeros(shape=self.n - 1, dtype=float) + for subset_size in range(1, self.n): + weight_vector[subset_size - 1] = (self.n - 1) / (subset_size * (self.n - subset_size)) + sampling_weight = (np.asarray([0] + [*weight_vector] + [0])) / sum(weight_vector) + return sampling_weight + + def _get_ksh_subset_weights( + self: Union[Approximator, "ShapleySamplingMixin"], subsets: np.ndarray[bool] + ) -> np.ndarray[float]: + """Computes the KernelSHAP regression weights for the given subsets. + + The weights for the subsets of size s are set to ksh_weights[s] / binom(n, s). The weights + for the empty and full sets are set to a big number. + + Args: + subsets: one-hot matrix of subsets for which to compute the weights in shape + (n_subsets, n). + + Returns: + The KernelSHAP regression weights in shape (n_subsets,). + """ + # set the weights for each subset to ksh_weights[|S|] / binom(n, |S|) + ksh_weights = self._init_ksh_sampling_weights() # indexed by subset size + subset_sizes = np.sum(subsets, axis=1) + weights = ksh_weights[subset_sizes] # set the weights for each subset size + weights /= binom(self.n, subset_sizes) # divide by the number of subsets of the same size + + # set the weights for the empty and full sets to big M + weights[np.logical_not(subsets).all(axis=1)] = float(1_000_000) + weights[subsets.all(axis=1)] = float(1_000_000) + return weights + + def _sample_subsets( + self: Union[Approximator, "ShapleySamplingMixin"], + budget: int, + sampling_weights: np.ndarray[float], + replacement: bool = False, + pairing: bool = True, + ) -> np.ndarray[bool]: + """Samples subsets with the given budget. + + Args: + budget: budget for the sampling. + sampling_weights: weights for sampling subsets of certain sizes and indexed by the size. + The shape is expected to be (n + 1,). A size that is not to be sampled has weight 0. + pairing: whether to use pairing (`True`) sampling or not (`False`). Defaults to `False`. + + Returns: + sampled subsets. + """ + # sanitize input parameters + sampling_weights = copy.copy(sampling_weights) + sampling_weights /= np.sum(sampling_weights) + + # adjust budget for paired sampling + if pairing: + budget = budget - budget % 2 # must be even for pairing + budget = int(budget / 2) + + # create storage array for given budget + subset_matrix = np.zeros(shape=(budget, self.n), dtype=bool) + + # sample subsets + sampled_sizes = self._rng.choice(self.N_arr, size=budget, p=sampling_weights).astype(int) + if replacement: # sample subsets with replacement + permutations = np.tile(np.arange(self.n), (budget, 1)) + self._rng.permuted(permutations, axis=1, out=permutations) + for i, subset_size in enumerate(sampled_sizes): + subset = permutations[i, :subset_size] + subset_matrix[i, subset] = True + else: # sample subsets without replacement + sampled_subsets, n_sampled = set(), 0 # init sampling variables + while n_sampled < budget: + subset_size = sampled_sizes[n_sampled] + subset = tuple(sorted(self._rng.choice(np.arange(0, self.n), size=subset_size))) + sampled_subsets.add(subset) + if len(sampled_subsets) != n_sampled: # subset was not already sampled + subset_matrix[n_sampled, subset] = True + n_sampled += 1 # continue sampling + + if pairing: + subset_matrix = np.repeat(subset_matrix, repeats=2, axis=0) # extend the subset matrix + subset_matrix[1::2] = np.logical_not(subset_matrix[1::2]) # flip sign of paired subsets + + return subset_matrix + + def _generate_shapley_dataset( + self: Union[Approximator, "ShapleySamplingMixin"], + budget: int, + pairing: bool = True, + replacement: bool = False, + ) -> tuple[np.ndarray[bool], bool, int]: + """Generates the two-part dataset containing explicit and sampled subsets. + + The first part of the dataset contains all explicit subsets. The second half contains the + sampled subsets. The parts can be determined by the `n_explicit_subsets` parameter. + + Args: + budget: The budget for the approximation (i.e., the number of allowed game evaluations). + pairing: Whether to use pairwise sampling (`True`) or not (`False`). Defaults to `True`. + Paired sampling can increase the approximation quality. + replacement: Whether to sample with replacement (`True`) or without replacement + (`False`). Defaults to `False`. + + Returns: + - The dataset containing explicit and sampled subsets. The dataset is a 2D array of + shape (n_subsets, n_players) where each row is a subset. + - A flag indicating whether the approximation is estimated (`True`) or exact (`False`). + - The number of explicit subsets. + """ + estimation_flag = True + # create storage array for given budget + all_subsets: np.ndarray[bool] = np.zeros(shape=(budget, self.n), dtype=bool) + n_subsets = 0 + # split the subset sizes into explicit and sampling parts + sampling_weights: np.ndarray[float] = self._init_ksh_sampling_weights() + explicit_sizes, sampling_sizes, remaining_budget = split_subsets_budget( + order=1, n=self.n, budget=budget, sampling_weights=sampling_weights + ) + # enumerate all explicit subsets + explicit_subsets: np.ndarray[bool] = get_explicit_subsets(self.n, explicit_sizes) + n_explicit_subsets = explicit_subsets.shape[0] + all_subsets[:n_explicit_subsets] = explicit_subsets + n_subsets += n_explicit_subsets + sampling_weights[explicit_sizes] = 0.0 # zero out sampling weights for explicit sizes + # sample the remaining subsets with the remaining budget + if len(sampling_sizes) > 0: + if remaining_budget > 0: + sampling_subsets: np.ndarray[bool] = self._sample_subsets( + budget=remaining_budget, + sampling_weights=sampling_weights, + replacement=replacement, + pairing=pairing, + ) + n_subsets += sampling_subsets.shape[0] + all_subsets[n_explicit_subsets:n_subsets] = sampling_subsets + all_subsets = all_subsets[:n_subsets] # remove unnecessary rows + else: + estimation_flag = False # no sampling needed computation is exact + all_subsets = all_subsets[:n_explicit_subsets] # remove unnecessary rows + # add empty and full set to all_subsets in the beginning + all_subsets = np.concatenate( + ( + np.zeros(shape=(1, self.n), dtype=bool), # empty set + np.ones(shape=(1, self.n), dtype=bool), # full set + all_subsets, # explicit and sampled subsets + ) + ) + n_explicit_subsets += 2 # add empty and full set + return all_subsets, estimation_flag, n_explicit_subsets diff --git a/shapiq/approximator/shapiq/shapiq.py b/shapiq/approximator/shapiq/shapiq.py index c118de9a..ff868a7b 100644 --- a/shapiq/approximator/shapiq/shapiq.py +++ b/shapiq/approximator/shapiq/shapiq.py @@ -3,13 +3,16 @@ from typing import Callable, Optional import numpy as np -from approximator._base import Approximator, InteractionValues, ShapleySamplingMixin, NShapleyMixin +from approximator._base import Approximator +from approximator.sampling import ShapleySamplingMixin +from approximator.k_sii import KShapleyMixin +from approximator._interaction_values import InteractionValues from utils import powerset -AVAILABLE_INDICES_SHAPIQ = {"SII, STI, FSI, nSII"} +AVAILABLE_INDICES_SHAPIQ = {"SII", "STI", "FSI", "k-SII"} -class ShapIQ(Approximator, ShapleySamplingMixin, NShapleyMixin): +class ShapIQ(Approximator, ShapleySamplingMixin, KShapleyMixin): """The ShapIQ estimator. Args: @@ -146,8 +149,8 @@ def approximate( result_sampled = np.divide(result_sampled, counts, out=result_sampled, where=counts != 0) result = result_explicit + result_sampled - if self.index == "nSII": - result: np.ndarray[float] = self.transforms_sii_to_nsii(result) + if self.index == "k-SII": + result: np.ndarray[float] = self.transforms_sii_to_ksii(result) return self._finalize_result(result, budget=used_budget, estimated=estimation_flag) @@ -224,7 +227,7 @@ def _weight_kernel(self, subset_size: int, interaction_size: int) -> float: Returns: float: The weight for the interaction type. """ - if self.index == "SII" or self.index == "nSII": # in both cases return SII kernel + if self.index == "SII" or self.index == "k-SII": # in both cases return SII kernel return self._sii_weight_kernel(subset_size, interaction_size) elif self.index == "STI": return self._sti_weight_kernel(subset_size, interaction_size) diff --git a/shapiq/explainer/_base.py b/shapiq/explainer/_base.py index 8716fa78..29afdf5c 100644 --- a/shapiq/explainer/_base.py +++ b/shapiq/explainer/_base.py @@ -4,7 +4,7 @@ import numpy as np -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from explainer.imputer.marginal_imputer import MarginalImputer diff --git a/shapiq/explainer/interaction.py b/shapiq/explainer/interaction.py index 623d3f57..205e80af 100644 --- a/shapiq/explainer/interaction.py +++ b/shapiq/explainer/interaction.py @@ -4,7 +4,8 @@ import numpy as np -from approximator._base import InteractionValues, Approximator +from approximator._base import Approximator +from approximator._interaction_values import InteractionValues from ._base import Explainer from approximator import ( RegressionSII, @@ -19,13 +20,13 @@ APPROXIMATOR_CONFIGURATIONS = { - "Regression": {"SII": RegressionSII, "FSI": RegressionFSI, "nSII": RegressionSII}, + "Regression": {"SII": RegressionSII, "FSI": RegressionFSI, "k-SII": RegressionSII}, "Permutation": { "SII": PermutationSamplingSII, "STI": PermutationSamplingSTI, - "nSII": PermutationSamplingSII, + "kSII": PermutationSamplingSII, }, - "ShapIQ": {"SII": ShapIQ, "STI": ShapIQ, "FSI": ShapIQ, "nSII": ShapIQ}, + "ShapIQ": {"SII": ShapIQ, "STI": ShapIQ, "FSI": ShapIQ, "k-SII": ShapIQ}, } AVAILABLE_INDICES = { @@ -49,8 +50,8 @@ class InteractionExplainer(Explainer): automatically choose the approximator based on the number of features and the number of samples in the background data. index: The Shapley interaction index to use. Must be one of `"SII"` (Shapley Interaction Index), - `"nSII"` (n-Shapley Interaction Index), `"STI"` (Shapley-Taylor Interaction Index), or - `"FSI"` (Faithful Shapley Interaction Index). Defaults to `"nSII"`. + `"kSII"` (n-Shapley Interaction Index), `"STI"` (Shapley-Taylor Interaction Index), or + `"FSI"` (Faithful Shapley Interaction Index). Defaults to `"kSII"`. """ def __init__( @@ -58,7 +59,7 @@ def __init__( model: Callable[[np.ndarray], np.ndarray], background_data: np.ndarray, approximator: Union[str, Approximator] = "auto", - index: str = "nSII", + index: str = "k-SII", max_order: int = 2, random_state: Optional[int] = None, ) -> None: diff --git a/shapiq/explainer/tree.py b/shapiq/explainer/tree.py index f45b6b5d..977e732d 100644 --- a/shapiq/explainer/tree.py +++ b/shapiq/explainer/tree.py @@ -2,7 +2,7 @@ for trees and tree ensembles.""" import numpy as np -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from explainer._base import Explainer diff --git a/shapiq/plot/network.py b/shapiq/plot/network.py index 5004559f..5a8d9a7c 100644 --- a/shapiq/plot/network.py +++ b/shapiq/plot/network.py @@ -8,7 +8,7 @@ from matplotlib import pyplot as plt from PIL import Image -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from utils import powerset from ._config import BLUE, RED, NEUTRAL, LINES diff --git a/tests/tests_approximators/test_approximator_base_interaction_values.py b/tests/tests_approximators/test_approximator_base_interaction_values.py index ff47a507..7352b129 100644 --- a/tests/tests_approximators/test_approximator_base_interaction_values.py +++ b/tests/tests_approximators/test_approximator_base_interaction_values.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from utils import powerset @@ -14,7 +14,7 @@ ("SII", 5, 1, 2, 100, True), ("STI", 5, 1, 2, 100, True), ("FSI", 5, 1, 2, 100, True), - ("nSII", 5, 1, 2, 100, True), + ("k-SII", 5, 1, 2, 100, True), ("SII", 5, 1, 2, 100, False), ("something", 5, 1, 2, 100, False), # expected to fail with ValueError ], diff --git a/tests/tests_approximators/test_approximator_ksii_estimation.py b/tests/tests_approximators/test_approximator_ksii_estimation.py new file mode 100644 index 00000000..61ee4c8a --- /dev/null +++ b/tests/tests_approximators/test_approximator_ksii_estimation.py @@ -0,0 +1,72 @@ +"""Tests the approximiation of nSII values with PermutationSamplingSII and ShapIQ.""" +import numpy as np +import pytest + +from approximator import ( + convert_ksii_into_one_dimension, + transforms_sii_to_ksii, + PermutationSamplingSII, + ShapIQ, +) +from games import DummyGame + + +@pytest.mark.parametrize( + "sii_approximator, ksii_approximator", + [ + ( + PermutationSamplingSII(7, 2, "SII", False, random_state=42), + PermutationSamplingSII(7, 2, "k-SII", False, random_state=42), + ), + ( + ShapIQ(7, 2, "SII", False, random_state=42), + ShapIQ(7, 2, "k-SII", False, random_state=42), + ), + ], +) +def test_nsii_estimation(sii_approximator, ksii_approximator): + """Tests the approximation of k-SII values with PermutationSamplingSII and ShapIQ.""" + n = 7 + max_order = 2 + interaction = (1, 2) + game = DummyGame(n, interaction) + # sii_approximator = PermutationSamplingSII(n, max_order, "SII", False, random_state=42) + sii_estimates = sii_approximator.approximate(1_000, game, batch_size=None) + # nsii_approximator = PermutationSamplingSII(n, max_order, "kSII", False, random_state=42) + ksii_estimates = ksii_approximator.approximate(1_000, game, batch_size=None) + assert sii_estimates != ksii_estimates + assert ksii_estimates.index == "k-SII" + + k_sii_transformed = ksii_approximator.transforms_sii_to_ksii(sii_estimates) + assert k_sii_transformed.index == "k-SII" + assert k_sii_transformed == ksii_estimates # check weather transform and estimation are equal + + # nSII values for player 1 and 2 should be approximately 0.1429 and the interaction 1.0 + assert ksii_estimates[(1,)] == pytest.approx(0.1429, 0.4) + assert ksii_estimates[(2,)] == pytest.approx(0.1429, 0.4) + assert ksii_estimates[(1, 2)] == pytest.approx(1.0, 0.2) + + # check efficiency + efficiency = np.sum(ksii_estimates.values) + assert efficiency == pytest.approx(2.0, 0.01) + + # check one dim transform + pos_ksii_values, neg_ksii_values = convert_ksii_into_one_dimension(ksii_estimates) + assert pos_ksii_values.shape == (n,) and neg_ksii_values.shape == (n,) + assert np.all(pos_ksii_values >= 0) and np.all(neg_ksii_values <= 0) + sum_of_both = np.sum(pos_ksii_values) + np.sum(neg_ksii_values) + assert sum_of_both == pytest.approx(efficiency, 0.01) + assert sum_of_both != pytest.approx(0.0, 0.01) + + with pytest.raises(ValueError): + _ = convert_ksii_into_one_dimension(sii_estimates) + + # check transforms_sii_to_nsii function + transformed = transforms_sii_to_ksii(sii_estimates) + assert transformed.index == "k-SII" + transformed = transforms_sii_to_ksii(sii_estimates.values, approximator=sii_approximator) + assert isinstance(transformed, np.ndarray) + transformed = transforms_sii_to_ksii(sii_estimates.values, n=n, max_order=max_order) + assert isinstance(transformed, np.ndarray) + with pytest.raises(ValueError): + _ = transforms_sii_to_ksii(sii_estimates.values) diff --git a/tests/tests_approximators/test_approximator_nsii_estimation.py b/tests/tests_approximators/test_approximator_nsii_estimation.py deleted file mode 100644 index 84a76333..00000000 --- a/tests/tests_approximators/test_approximator_nsii_estimation.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Tests the approximiation of nSII values with PermutationSamplingSII and ShapIQ.""" -import numpy as np -import pytest - -from approximator import ( - convert_nsii_into_one_dimension, - transforms_sii_to_nsii, - PermutationSamplingSII, - ShapIQ, -) -from games import DummyGame - - -@pytest.mark.parametrize( - "sii_approximator, nsii_approximator", - [ - ( - PermutationSamplingSII(7, 2, "SII", False, random_state=42), - PermutationSamplingSII(7, 2, "nSII", False, random_state=42), - ), - (ShapIQ(7, 2, "SII", False, random_state=42), ShapIQ(7, 2, "nSII", False, random_state=42)), - ], -) -def test_nsii_estimation(sii_approximator, nsii_approximator): - """Tests the approximation of nSII values with PermutationSamplingSII and ShapIQ.""" - n = 7 - max_order = 2 - interaction = (1, 2) - game = DummyGame(n, interaction) - # sii_approximator = PermutationSamplingSII(n, max_order, "SII", False, random_state=42) - sii_estimates = sii_approximator.approximate(1_000, game, batch_size=None) - # nsii_approximator = PermutationSamplingSII(n, max_order, "nSII", False, random_state=42) - nsii_estimates = nsii_approximator.approximate(1_000, game, batch_size=None) - assert sii_estimates != nsii_estimates - assert nsii_estimates.index == "nSII" - - n_sii_transformed = nsii_approximator.transforms_sii_to_nsii(sii_estimates) - assert n_sii_transformed.index == "nSII" - assert n_sii_transformed == nsii_estimates # check weather transform and estimation are equal - - # nSII values for player 1 and 2 should be approximately 0.1429 and the interaction 1.0 - assert nsii_estimates[(1,)] == pytest.approx(0.1429, 0.4) - assert nsii_estimates[(2,)] == pytest.approx(0.1429, 0.4) - assert nsii_estimates[(1, 2)] == pytest.approx(1.0, 0.2) - - # check efficiency - efficiency = np.sum(nsii_estimates.values) - assert efficiency == pytest.approx(2.0, 0.01) - - # check one dim transform - pos_nsii_values, neg_nsii_values = convert_nsii_into_one_dimension(nsii_estimates) - assert pos_nsii_values.shape == (n,) and neg_nsii_values.shape == (n,) - assert np.all(pos_nsii_values >= 0) and np.all(neg_nsii_values <= 0) - sum_of_both = np.sum(pos_nsii_values) + np.sum(neg_nsii_values) - assert sum_of_both == pytest.approx(efficiency, 0.01) - assert sum_of_both != pytest.approx(0.0, 0.01) - - with pytest.raises(ValueError): - _ = convert_nsii_into_one_dimension(sii_estimates) - - # check transforms_sii_to_nsii function - transformed = transforms_sii_to_nsii(sii_estimates) - assert transformed.index == "nSII" - transformed = transforms_sii_to_nsii(sii_estimates.values, approximator=sii_approximator) - assert isinstance(transformed, np.ndarray) - transformed = transforms_sii_to_nsii(sii_estimates.values, n=n, max_order=max_order) - assert isinstance(transformed, np.ndarray) - with pytest.raises(ValueError): - _ = transforms_sii_to_nsii(sii_estimates.values) diff --git a/tests/tests_approximators/test_approximator_permutation_sii.py b/tests/tests_approximators/test_approximator_permutation_sii.py index 9d2824cb..587cb5fe 100644 --- a/tests/tests_approximators/test_approximator_permutation_sii.py +++ b/tests/tests_approximators/test_approximator_permutation_sii.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.permutation import PermutationSamplingSII from games import DummyGame @@ -17,7 +17,7 @@ (3, 2, True, "SII", 8), (3, 2, False, "SII", 14), (10, 3, False, "SII", 120), - (10, 3, False, "nSII", 120), + (10, 3, False, "k-SII", 120), (10, 3, False, "something", 120), # expected to fail with ValueError ], ) diff --git a/tests/tests_approximators/test_approximator_permutation_sti.py b/tests/tests_approximators/test_approximator_permutation_sti.py index fe0f93d8..c3b4ac44 100644 --- a/tests/tests_approximators/test_approximator_permutation_sti.py +++ b/tests/tests_approximators/test_approximator_permutation_sti.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.permutation import PermutationSamplingSTI from games import DummyGame diff --git a/tests/tests_approximators/test_approximator_regression_fsi.py b/tests/tests_approximators/test_approximator_regression_fsi.py index 3c3767aa..b5eeb02a 100644 --- a/tests/tests_approximators/test_approximator_regression_fsi.py +++ b/tests/tests_approximators/test_approximator_regression_fsi.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.regression import RegressionFSI from games import DummyGame diff --git a/tests/tests_approximators/test_approximator_regression_sii.py b/tests/tests_approximators/test_approximator_regression_sii.py index e6b223b2..53fad9ca 100644 --- a/tests/tests_approximators/test_approximator_regression_sii.py +++ b/tests/tests_approximators/test_approximator_regression_sii.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.regression._base import Regression from approximator.regression import RegressionSII from games import DummyGame @@ -75,5 +75,5 @@ def test_approximate(n, max_order, budget, batch_size): assert efficiency == pytest.approx(2.0, 0.01) # try covert to nSII - nsii_estimates = approximator.transforms_sii_to_nsii(sii_estimates) - assert nsii_estimates.index == "nSII" + nsii_estimates = approximator.transforms_sii_to_ksii(sii_estimates) + assert nsii_estimates.index == "k-SII" diff --git a/tests/tests_approximators/test_approximator_regression_sv.py b/tests/tests_approximators/test_approximator_regression_sv.py index 8801601f..c7e786b0 100644 --- a/tests/tests_approximators/test_approximator_regression_sv.py +++ b/tests/tests_approximators/test_approximator_regression_sv.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.regression import KernelSHAP from games import DummyGame diff --git a/tests/tests_approximators/test_approximator_shapiq.py b/tests/tests_approximators/test_approximator_shapiq.py index 02321089..d8bed400 100644 --- a/tests/tests_approximators/test_approximator_shapiq.py +++ b/tests/tests_approximators/test_approximator_shapiq.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues from approximator.shapiq import ShapIQ from games import DummyGame diff --git a/tests/tests_explainer/test_explainer_interaction.py b/tests/tests_explainer/test_explainer_interaction.py index 3bc5bc4b..70c95d83 100644 --- a/tests/tests_explainer/test_explainer_interaction.py +++ b/tests/tests_explainer/test_explainer_interaction.py @@ -35,7 +35,7 @@ def background_data(): return X -INDICES = ["SII", "nSII", "STI", "FSI"] +INDICES = ["SII", "k-SII", "STI", "FSI"] MAX_ORDERS = [2, 3] @@ -70,8 +70,8 @@ def test_auto_params(dt_model, background_data): model=model_function, background_data=background_data, ) - assert explainer.index == "nSII" - assert explainer.approximator.index == "nSII" + assert explainer.index == "k-SII" + assert explainer.approximator.index == "k-SII" assert explainer._max_order == 2 assert explainer._random_state is None assert explainer.approximator.__class__.__name__ == "ShapIQ" diff --git a/tests/tests_plots/test_network_plot.py b/tests/tests_plots/test_network_plot.py index 48db6c8e..ee45d662 100644 --- a/tests/tests_plots/test_network_plot.py +++ b/tests/tests_plots/test_network_plot.py @@ -6,7 +6,7 @@ from scipy.special import binom from shapiq.plot import network_plot -from shapiq.approximator._base import InteractionValues +from approximator._interaction_values import InteractionValues def test_network_plot(): @@ -37,7 +37,7 @@ def test_network_plot(): n_values = n_players + int(binom(n_players, 2)) iv = InteractionValues( values=np.random.rand(n_values), - index="nSII", + index="k-SII", n_players=n_players, min_order=1, max_order=2,