fastai · artste · Oct 11, 2023
diff --git a/lm-hackers.ipynb b/lm-hackers.ipynb
@@ -1256,6 +1256,33 @@
     "tokr.batch_decode(res)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e769bff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Funciton to clean memory before using new model\n",
+    "import gc\n",
+    "\n",
+    "# Conveninence function\n",
+    "def get_cuda_memory_reserved_gb():\n",
+    "    return torch.cuda.memory_reserved()/1_000_000_000\n",
+    "    \n",
+    "def free_memory(verbose=False):\n",
+    "    gc.collect() \n",
+    "    torch.cuda.empty_cache()\n",
+    "    if verbose:\n",
+    "        print(f'ℹ️ CUDA MEMORY RESERVED AFTER free_memory: {get_cuda_memory_reserved_gb()} GB')    \n",
+    "\n",
+    "# Let's see how it works:\n",
+    "print(f'BREFORE free_memory: {get_cuda_memory_reserved_gb()} GB')\n",
+    "del model # Delete model to be able to free memory\n",
+    "print(f'BREFORE free_memory after deleting `model`: {get_cuda_memory_reserved_gb()} GB')\n",
+    "free_memory(verbose=True)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 28,
@@ -1321,7 +1348,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = AutoModelForCausalLM.from_pretrained('TheBloke/Llama-2-7b-Chat-GPTQ', device_map=0, torch_dtype=torch.float16)"
+    "del model; free_memory(True); # Cleanup before loading new one\n",
+    "model = AutoModelForCausalLM.from_pretrained('TheBloke/Llama-2-7b-Chat-GPTQ', device_map=0, torch_dtype=torch.float16)\n",
+    "print(f'CUDA MEMORY RESERVED AFTER LOADING model: {get_cuda_memory_reserved_gb()} GB')"
    ]
   },
   {
@@ -1364,6 +1393,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "del model; free_memory();\n",
     "mn = 'TheBloke/Llama-2-13B-GPTQ'\n",
     "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.float16)"
    ]
@@ -1465,6 +1495,7 @@
     }
    ],
    "source": [
+    "del model; free_memory();\n",
     "mn = \"stabilityai/StableBeluga-7B\"\n",
     "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.bfloat16)"
    ]
@@ -1535,6 +1566,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "del model; free_memory();\n",
     "mn = 'TheBloke/OpenOrca-Platypus2-13B-GPTQ'\n",
     "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.float16)"
    ]
@@ -2069,6 +2101,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "del model; free_memory();\n",
     "model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf',\n",
     "                                             torch_dtype=torch.bfloat16, device_map=0)\n",
     "model = PeftModel.from_pretrained(model, ax_model)\n",
@@ -2119,6 +2152,17 @@
     "print(tokr.batch_decode(res)[0])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b17c43c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally release model and memory\n",
+    "del model; free_memory(True);"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "991d4a93-dab8-4777-82d2-301c494deba0",