From 901cc81c348a34ac491ddbcc90472ec28de456ba Mon Sep 17 00:00:00 2001
From: Jan Blunk <jan.blunk@uni-jena.de>
Date: Mon, 28 Aug 2023 10:58:38 +0200
Subject: [PATCH] contextual decomposition following Singh et al.

---
 README.md                   |   6 +-
 contextual_decomposition.py | 133 ++++++++++++++++++++++++++++++++++++
 mixed_cmiI_estimator.py     |  36 +++++-----
 3 files changed, 156 insertions(+), 19 deletions(-)
 create mode 100644 contextual_decomposition.py

diff --git a/README.md b/README.md
index f5be8ba..8ec73e9 100644
--- a/README.md
+++ b/README.md
@@ -9,4 +9,8 @@ This repository provides code to use the method presented in our GCPR 2023 paper
     year = {2023},
     }
 
-This repository includes a Python implementation of the hybrid CMI estimator CMIh presented by [Zan et al.](https://doi.org/10.3390/e24091234) The authors' original R implementation can be found [here](https://github.com/leizan/CMIh2022). CMIh was published under the MIT license. 
\ No newline at end of file
+This repository includes a Python implementation of the hybrid CMI estimator CMIh presented by [Zan et al.](https://doi.org/10.3390/e24091234) The authors' original R implementation can be found [here](https://github.com/leizan/CMIh2022). CMIh was published under the MIT license.
+
+## Installation
+First, you have to install ACD as described in the [Repository for "Hierarchical interpretations for neural network predictions" by Singh et al.](https://github.com/csinva/hierarchical-dnn-interpretations)
+
diff --git a/contextual_decomposition.py b/contextual_decomposition.py
new file mode 100644
index 0000000..8c93d6a
--- /dev/null
+++ b/contextual_decomposition.py
@@ -0,0 +1,133 @@
+"""
+This file provides a way to use contextual decomposition in the loss function.
+
+The implementation requires and is partly based on the implementation provided
+by Singh et al. for their paper "Hierarchical interpretations for neural network
+predictions":
+
+    @inproceedings{
+       singh2019hierarchical,
+       title={Hierarchical interpretations for neural network predictions},
+       author={Chandan Singh and W. James Murdoch and Bin Yu},
+       booktitle={International Conference on Learning Representations},
+       year={2019},
+       url={https://openreview.net/forum?id=SkEqro0ctQ},
+    }
+
+The original implementation can be found here:
+    https://github.com/csinva/hierarchical-dnn-interpretations
+
+It was published under the following license:
+
+    MIT License
+
+    Copyright (c) 2019 Chandan Singh
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
+"""
+
+import acd
+import numpy as np
+import torch
+
+
+def get_cd_1d_by_modules(model, modules, inputs, feat_of_interest, device="cpu"):
+    # Device.
+    inputs = inputs.to(device)
+
+    # Prepare mask.
+    # The mask answers the following question: For which dimensions do you want
+    # to get their feature attribution compared to all other features?
+    n_dim = list(inputs.size())[1]
+    mask = np.zeros(n_dim, dtype=np.int32)
+    mask[feat_of_interest] = 1
+
+    # Set up relevant/irrelevant based on mask.
+    # Starting here, we start to follow the CD implementation from
+    # https://github.com/csinva/hierarchical-dnn-interpretations/blob/master/acd/scores/cd.py.
+    im_torch = inputs
+    mask = torch.FloatTensor(mask).to(device)
+    relevant = mask * im_torch
+    irrelevant = (1 - mask) * im_torch
+    relevant = relevant.to(device)
+    irrelevant = irrelevant.to(device)
+
+    relevant, irrelevant = acd.cd_generic(modules, relevant, irrelevant)
+
+    return relevant, irrelevant
+
+
+def get_cd_1d(model, inputs, feat_of_interest, device="cpu"):
+    """Calculates contextual decomposition scores for the given model.
+
+    The contextual decomposition performs feature attribution by decomposing
+    the output of the model into two parts: The contribution of the feature(s)
+    of interest and the contribution of all other features.
+    Therefore, you have to specify which features are of interest. In a 1d
+    scenario you are typically interested in the influence of a single
+    feature compared to all other features, but this method also allows you
+    to specify a list of features that, together, form the features of
+    interest.
+
+    Interpretation of the generated scores:
+    The output is (scores_feat, scores_other) with both being a one-dimensional
+    tensor. Since this method works with batched data, that means that for
+    each input sample two floating point scores are generated: the contribution
+    of the feature(s) of interest and the contribution of all other features.
+        Prediction of the Network = score of the features of interest
+                                    + score of the other features
+
+    :param model: PyTorch-Model to generate the CD scores for.
+    :param inputs: Batched inputs to the model. Typically 2-dimensional tensor
+    containing the inputs for a single batch.
+    :param feat_of_interest: Integer or list of integers. Define which
+    dimensions of the input are part of the feature(s) of interest.
+    :param device: Device used by PyTorch (cuda / cpu).
+    :return: Tuple (scores_feat, scores_other). These are the scores for each
+    of the batched inputs. Here, scores_feat[i] + scores_other[i]=prediction[i].
+    Note that the feature scores are determined in a per-batch manner. Therefore,
+    the resulting feature scores are vectors.
+    :rtype: Tupel of one-dimensional tensors.
+
+    """
+
+    # Set model in evaluation mode.
+    prev_training_status = model.training
+    model.eval()
+
+    # Prepare mask.
+    # The mask answers the following question: For which dimensions do you want
+    # to get their feature attribution compared to all other features?
+    n_dim = list(inputs.size())[1]
+    mask = np.zeros(n_dim, dtype=np.int32)
+    mask[feat_of_interest] = 1
+
+    # Contextual decomposition.
+    # We receive the contribution of the feature(s) of interest compared to all
+    # other features.
+    # The output is a tensor with a length >= 1, because we are considering batches.
+    # That is, for each element of the batch we get the contribution of the
+    # feature(s) of interest.
+    scores_feat, scores_other = acd.cd(inputs, model=model, mask=mask, device=device)
+
+    # Reset evaluation mode if necessary.
+    if prev_training_status:
+        model.train()
+
+    return (torch.flatten(scores_feat), torch.flatten(scores_other))
diff --git a/mixed_cmiI_estimator.py b/mixed_cmiI_estimator.py
index 85bb5e4..067bb58 100644
--- a/mixed_cmiI_estimator.py
+++ b/mixed_cmiI_estimator.py
@@ -9,30 +9,30 @@
 
 The original R implementation can be found here:
     https://github.com/leizan/CMIh2022
-It was published under the following license:
 
+It was published under the following license:
 
-MIT License
+    MIT License
 
-Copyright (c) 2022 leizan
+    Copyright (c) 2022 leizan
 
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
 
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
 """
 
 import torch