From b8bd2e4dc633ee8eb41270c8be14ed963063381d Mon Sep 17 00:00:00 2001
From: Janis Pagel <janis.pagel@uni-koeln.de>
Date: Thu, 25 Jan 2024 13:38:43 +0100
Subject: [PATCH] Change eval method

---
 platypus_entail.ipynb | 90 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 80 insertions(+), 10 deletions(-)
diff --git a/platypus_entail.ipynb b/platypus_entail.ipynb
index 866b21a..07304c9 100644
--- a/platypus_entail.ipynb
+++ b/platypus_entail.ipynb
@@ -67,7 +67,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-12-12 16:30:36.733817: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2024-01-25 12:24:38.657140: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
      ]
     }
@@ -282,7 +282,77 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "89530cf06d934e9c932a4853fd12f3d8",
+       "model_id": "dbd0bcc4d9aa44fda86e1a70ec61dfca",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)fetensors.index.json:   0%|          | 0.00/35.1k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "adf5cfa907d64305bb31862fc9f962eb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6f2d5f6e8b3d4f66962bc33feeef3246",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "76fb47e950884a1090185fe994196ba0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f9ea8cdf567d4fa99c211d4e05390550",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6ec068c8bcf14cb293846bf54e535de5",
        "version_major": 2,
        "version_minor": 0
       },
@@ -679,9 +749,9 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.304688</td>\n",
-       "      <td>0.5</td>\n",
-       "      <td>0.219101</td>\n",
+       "      <td>0.267029</td>\n",
+       "      <td>0.438202</td>\n",
+       "      <td>0.192021</td>\n",
        "      <td>0.438202</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -689,8 +759,8 @@
        "</div>"
       ],
       "text/plain": [
-       "         F1  Recall  Precision  Accuracy\n",
-       "0  0.304688     0.5   0.219101  0.438202"
+       "         F1    Recall  Precision  Accuracy\n",
+       "0  0.267029  0.438202   0.192021  0.438202"
       ]
      },
      "execution_count": 25,
@@ -699,9 +769,9 @@
     }
    ],
    "source": [
-    "pd.DataFrame(list(zip([f1_score(ground_truth, predictions, average=\"macro\")],\n",
-    "                      [recall_score(ground_truth, predictions,  average=\"macro\")],\n",
-    "                      [precision_score(ground_truth, predictions, average=\"macro\")],\n",
+    "pd.DataFrame(list(zip([f1_score(ground_truth, predictions, average=\"weighted\")],\n",
+    "                      [recall_score(ground_truth, predictions,  average=\"weighted\")],\n",
+    "                      [precision_score(ground_truth, predictions, average=\"weighted\")],\n",
     "                      [accuracy_score(ground_truth, predictions,)])),\n",
     "                      columns = [\"F1\", \"Recall\", \"Precision\", \"Accuracy\"])"
    ]