diff --git a/notebooks/memory_profiling_bark.ipynb b/notebooks/memory_profiling_bark.ipynb
index 4c500da..19c3527 100644
--- a/notebooks/memory_profiling_bark.ipynb
+++ b/notebooks/memory_profiling_bark.ipynb
@@ -10,8 +10,6 @@
     " - Small models: a smaller version of the model. This can be set by using the environment variable `SUNO_USE_SMALL_MODELS`\n",
     " - offloading models to CPU: Holding only one model at a time on the GPU, and shuttling the models to the CPU in between generations. \n",
     "\n",
-    "## NOTE: this requires a GPU to run\n",
-    "\n",
     "# $ \\\\ $\n",
     "## First, we'll use the most memory efficient configuration"
    ]
@@ -29,7 +27,10 @@
     "os.environ[\"SUNO_USE_SMALL_MODELS\"] = \"1\"\n",
     "os.environ[\"SUNO_OFFLOAD_CPU\"] = \"1\"\n",
     "\n",
-    "from bark.generation import preload_models\n",
+    "from bark.generation import (\n",
+    "    generate_text_semantic,\n",
+    "    preload_models,\n",
+    ")\n",
     "from bark import generate_audio, SAMPLE_RATE\n",
     "\n",
     "import torch"
@@ -102,8 +103,17 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "from bark.generation import preload_models\n",
+    "\n",
+    "from bark.generation import (\n",
+    "    generate_text_semantic,\n",
+    "    preload_models,\n",
+    "    models,\n",
+    ")\n",
+    "import bark.generation\n",
+    "\n",
+    "from bark.api import semantic_to_waveform\n",
     "from bark import generate_audio, SAMPLE_RATE\n",
+    "\n",
     "import torch\n",
     "import time"
    ]
@@ -119,26 +129,30 @@
      "output_type": "stream",
      "text": [
       "Small models True, offloading to CPU: True\n",
-      "\tmax memory usage = 2949MB, time 3s\n",
+      "\tmax memory usage = 967MB, time 4s\n",
       "\n",
       "Small models False, offloading to CPU: True\n",
-      "\tmax memory usage = 7826MB, time 4s\n",
+      "\tmax memory usage = 2407MB, time 8s\n",
       "\n",
       "Small models True, offloading to CPU: False\n",
-      "\tmax memory usage = 5504MB, time 2s\n",
+      "\tmax memory usage = 2970MB, time 3s\n",
       "\n",
       "Small models False, offloading to CPU: False\n",
-      "\tmax memory usage = 7825MB, time 5s\n",
+      "\tmax memory usage = 7824MB, time 6s\n",
       "\n"
      ]
     }
    ],
    "source": [
-    "offload_models = True\n",
-    "use_small_models = True\n",
+    "global models\n",
     "\n",
     "for offload_models in (True, False):\n",
+    "    # this setattr is needed to do on the fly\n",
+    "    # the easier way to do this is with `os.environ[\"SUNO_OFFLOAD_CPU\"] = \"1\"`\n",
+    "    setattr(bark.generation, \"OFFLOAD_CPU\", offload_models)\n",
     "    for use_small_models in (True, False):\n",
+    "        models = {}\n",
+    "        torch.cuda.empty_cache()\n",
     "        torch.cuda.reset_peak_memory_stats()\n",
     "        preload_models(\n",
     "            text_use_small=use_small_models,\n",