fill in recalibration step

mitre · Oct 19, 2023 · 29303ed · 29303ed
1 parent ced0bf6
commit 29303ed
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 32 deletions.
diff --git a/docs/source/examples/nlp/wilds_datasets.ipynb b/docs/source/examples/nlp/wilds_datasets.ipynb
@@ -1,5 +1,14 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__This code is experimental. Some notable issues__\n",
+    "- transforms are very slow on even moderate batch sizes\n",
+    "- detection scheme design is not finalized"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -57,7 +66,10 @@
     "# civil comments\n",
     "# dataset_civil = get_dataset(dataset=\"civilcomments\", download=True, root_dir=\"./wilds_datasets\")\n",
     "dataset_civil = pd.read_csv('wilds_datasets/civilcomments_v1.0/all_data_with_identities.csv')\n",
-    "dataset_civil = dataset_civil['comment_text'][:5].tolist()"
+    "dataset_civil = dataset_civil['comment_text'][:300].tolist()\n",
+    "batch1 = dataset_civil[:100]\n",
+    "batch2 = dataset_civil[100:200]\n",
+    "batch3 = dataset_civil[200:300]"
    ]
   },
   {
@@ -71,14 +83,45 @@
      "text": [
       "c:\\Users\\ASRIVASTAVA\\Documents\\repos\\menelaus\\venv\\lib\\site-packages\\transformers\\tokenization_utils_base.py:2418: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
       "  warnings.warn(\n",
-      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']\n",
+      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n",
       "- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
       "All the weights of TFBertModel were initialized from the PyTorch model.\n",
-      "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "State after initial batch: baseline\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
       "c:\\Users\\ASRIVASTAVA\\Documents\\repos\\menelaus\\venv\\lib\\site-packages\\transformers\\tokenization_utils_base.py:2418: FutureWarning: The `pad_to_max_length` argument is deprecated and will be removed in a future version, use `padding=True` or `padding='longest'` to pad to the longest sequence in the batch, or use `padding='max_length'` to pad to a max length. In this case, you can give a specific length with `max_length` (e.g. `max_length=45`) or leave max_length to None to pad to the maximal input size of the model (e.g. 512 for Bert).\n",
       "  warnings.warn(\n",
-      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']\n",
+      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n",
+      "- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "All the weights of TFBertModel were initialized from the PyTorch model.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "State after test batch alarm\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']\n",
       "- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
       "- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
       "All the weights of TFBertModel were initialized from the PyTorch model.\n",
@@ -89,17 +132,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.       0.01     0.0025   0.       0.       0.01     0.       0.\n",
-      " 0.025625 0.       0.       0.       0.       0.0025   0.       0.\n",
-      " 0.0025   0.01     0.025625 0.03125  0.025625 0.03125  0.       0.\n",
-      " 0.       0.       0.0025   0.       0.       0.       0.       0.      ]\n"
+      "State after new batch, recalibration alarm\n"
      ]
     }
    ],
    "source": [
     "# tokens \n",
     "tokenizer = auto_tokenize(model_name='bert-base-cased', pad_to_max_length=True, return_tensors='tf')\n",
-    "tokens = tokenizer(data=dataset_civil)\n",
+    "tokens = tokenizer(data=batch1)\n",
     "\n",
     "# embedding (TODO abstract this layers line)\n",
     "layers = [-_ for _ in range(1, 8 + 1)]\n",
@@ -111,25 +151,16 @@
     "# detector + set reference\n",
     "ks_alarm = KolmogorovSmirnovAlarm()\n",
     "detector = Detector(alarm=ks_alarm, transforms=[tokenizer, embedder, uae_reduce])\n",
-    "detector.step(dataset_civil)\n",
-    "assert detector.rep_test is None and detector.rep_reference.shape == (5, 32)\n",
+    "detector.step(batch1)\n",
+    "print(f\"State after initial batch: {detector.state}\")\n",
     "\n",
     "# detector + add test (copy reference)  \n",
-    "detector.step(dataset_civil)\n",
-    "assert detector.rep_test.shape == (5, 32)\n",
+    "detector.step(batch2)\n",
+    "print(f\"State after test batch {detector.state}\")\n",
     "\n",
-    "# TODO - recalibrate and re-evaluate ..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# todo check things work on new branch installs etc\n",
-    "# todo remove this exp code from test requirement\n",
-    "# todo ?"
+    "# recalibrate and re-evaluate (XXX - all batches must be same length)\n",
+    "detector.step(batch3)\n",
+    "print(f\"State after new batch, recalibration {detector.state}\")"
    ]
   }
  ],

diff --git a/menelaus/experimental/__init__.py b/menelaus/experimental/__init__.py
@@ -1,3 +0,0 @@
-from menelaus.nlp_experimental.alarm import Alarm
-from menelaus.nlp_experimental.detector import Detector
-from menelaus.nlp_experimental.representation import Representation

diff --git a/menelaus/experimental/alarm.py b/menelaus/experimental/alarm.py
@@ -16,8 +16,6 @@ def __init__(self, alpha=0.05, critical_feature_proportion=0.25):
     def evaluate(self, rep_reference, rep_test):
         rep_reference = rep_reference.reshape(rep_reference.shape[0], -1)
         rep_test = rep_test.reshape(rep_test.shape[0], -1)
-        if rep_reference.shape[0] != rep_test.shape[0]:
-            raise ValueError(f"...")
         n_features = rep_reference.shape[1]
         p_values = np.zeros(n_features, dtype=np.float32)
         # distances = np.zeros_like(p_values)
@@ -38,4 +36,3 @@ def evaluate(self, rep_reference, rep_test):
             self._state = STATE_DRIFT
         else:
             self._state = STATE_INIT
-        print(p_values)
diff --git a/menelaus/experimental/detector.py b/menelaus/experimental/detector.py
@@ -1,3 +1,4 @@
+import numpy as np
 from typing import List
 from toolz import pipe
 
@@ -14,7 +15,11 @@ def transform(self, raw_values):
         return ret
 
     def recalibrate(self, raw_values):
-        return 0
+        if self.state == "alarm":
+            rep_new = self.transform(raw_values)
+            self.rep_test = np.vstack((self.rep_test, rep_new))
+        else:
+            self.rep_test = self.transform(raw_values)
 
     @property
     def state(self):

diff --git a/menelaus/experimental/transforms.py → menelaus/experimental/transform.py b/menelaus/experimental/transforms.py → menelaus/experimental/transform.py