diff --git a/notebooks/load_and_visualize_opencorporates_corporate_entity_data_llc_corp.ipynb b/notebooks/load_and_visualize_opencorporates_corporate_entity_data_llc_corp.ipynb
new file mode 100644
index 0000000..e00eca9
--- /dev/null
+++ b/notebooks/load_and_visualize_opencorporates_corporate_entity_data_llc_corp.ipynb
@@ -0,0 +1,1674 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction & Dependencies\n",
+    "\n",
+    "https://opencorporates.com/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext jupyter_ai"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%dotenv "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Deploy Dash apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/me/projects/new-york-real-estate/.venv/lib/python3.10/site-packages/sql/traits.py:20: FutureWarning: named_parameters: boolean values are now deprecated. Value True will be treated as \"enabled\". \n",
+      "Please use a valid option: \"warn\", \"enabled\", or \"disabled\". \n",
+      "For more information, see the docs: https://jupysql.ploomber.io/en/latest/api/configuration.html#named-parameters\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load duckdb, which lets us efficiently load large files\n",
+    "import duckdb\n",
+    "\n",
+    "# Load pandas, which lets us manipulate dataframes\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Import jupysql Jupyter extension to create SQL cells\n",
+    "%load_ext sql\n",
+    "\n",
+    "# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.\n",
+    "%config SqlMagic.autopandas = True\n",
+    "\n",
+    "%config SqlMagic.feedback = False\n",
+    "%config SqlMagic.displaycon = False\n",
+    "\n",
+    "# Allow named parameters (python variables) in SQL cells\n",
+    "%config SqlMagic.named_parameters=True\n",
+    "\n",
+    "# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.\n",
+    "%sql duckdb:///:memory:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test on one state: gather context for language model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture path_structure\n",
+    "import seedir as sd\n",
+    "path = '~/data/opencorporates'\n",
+    "sd.seedir(path, style='lines', depthlimit=2, exclude_folders=['.git', '.ipynb_checkpoints'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "opencorporates/\n",
+      "├─us_mi/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_mn/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nh/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_al/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ky/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ak/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_mt/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ms/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ga/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ma/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_mo/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ut/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_sc/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─.DS_Store\n",
+      "├─us_sd/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_vt/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_va/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_ns/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_tx/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_bc/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_wy/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_nu/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ri/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_wv/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ca/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_co/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_pe/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_fl/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ct/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ia/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ok/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_in/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_qc/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ne/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ks/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─pr/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_md/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ny/\n",
+      "│ ├─.DS_Store\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─companies.csv\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_az/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_de/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_hi/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_me/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nj/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nm/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nd/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nc/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_dc/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_nv/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_ar/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_pa/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_on/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_nl/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─ca_nb/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_tn/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_wa/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─date.txt\n",
+      "├─us_wi/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_il/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_or/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─md5sum.txt\n",
+      "├─us_la/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "├─us_id/\n",
+      "│ ├─companies.csv.gz\n",
+      "│ ├─officers.csv.gz\n",
+      "│ ├─additional_identifiers.csv.gz\n",
+      "│ ├─non_reg_addresses.csv.gz\n",
+      "│ └─alternative_names.csv.gz\n",
+      "└─us_oh/\n",
+      "  ├─companies.csv.gz\n",
+      "  ├─officers.csv.gz\n",
+      "  ├─additional_identifiers.csv.gz\n",
+      "  ├─non_reg_addresses.csv.gz\n",
+      "  └─alternative_names.csv.gz\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(path_structure.stdout)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture ny_companies\n",
+    "!gzcat ~/data/opencorporates/us_ny/companies.csv.gz | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "company_number,jurisdiction_code,name,normalised_name,company_type,nonprofit,current_status,incorporation_date,dissolution_date,branch,business_number,current_alternative_legal_name,current_alternative_legal_name_language,home_jurisdiction_text,native_company_number,previous_names,retrieved_at,registry_url,restricted_for_marketing,inactive,accounts_next_due,accounts_reference_date,accounts_last_made_up_date,annual_return_next_due,annual_return_last_made_up_date,has_been_liquidated,has_insolvency_history,has_charges,number_of_employees,registered_address.street_address,registered_address.locality,registered_address.region,registered_address.postal_code,registered_address.country,registered_address.in_full,home_jurisdiction_code,home_jurisdiction_company_number,industry_code_uids,latest_accounts_date,latest_accounts_cash,latest_accounts_assets,latest_accounts_liabilities\n",
+      "1000000,us_ny,TAYLOR RAND LTD.,taylor rand limited,DOMESTIC BUSINESS CORPORATION,false,Inactive   Dissolution By Proclamation / Annulment Of Authority,1985-05-28,1994-03-23,,,,,,,TAYLOR RAND LTD.,2016-07-05 07:01:09 UTC,https://appext20.dos.ny.gov/corp_public/CORPSEARCH.ENTITY_INFORMATION?p_nameid=0&p_corpid=1000000&p_entity_name=%25&p_name_type=%25&p_search_type=BEGINS&p_srch_results_page=0,,true,,,,,,,,,,\"CAMPOS & PAVLIDES PC, 186-09 UNION TPKE, FLUSHING, NEW YORK, 11366\",,,,United States,\"CAMPOS & PAVLIDES PC, 186-09 UNION TPKE, FLUSHING, NEW YORK, 11366, United States\",,,,,,,\n",
+      "1000001,us_ny,\"CRYSTAL BEACH ENTERPRISES, INC.\",crystal beach enterprises incorporated,DOMESTIC BUSINESS CORPORATION,false,Inactive   Dissolution By Proclamation / Annulment Of Authority,1985-05-28,1992-06-24,,,,,NEW YORK,,\"CRYSTAL BEACH ENTERPRISES, INC.\",2016-07-05 07:01:09 UTC,https://appext20.dos.ny.gov/corp_public/CORPSEARCH.ENTITY_INFORMATION?p_nameid=0&p_corpid=1000001&p_entity_name=%25&p_name_type=%25&p_search_type=BEGINS&p_srch_results_page=0,,true,,,,,,,,,,\"%JOHN GIARDINO, 181 FRANKLIN ST, BUFFALO, NEW YORK, 14202\",,,,United States,\"%JOHN GIARDINO, 181 FRANKLIN ST, BUFFALO, NEW YORK, 14202, United States\",,,,,,,\n",
+      "1000002,us_ny,\"PAUL'S GIRL, INC.\",pauls girl incorporated,DOMESTIC BUSINESS CORPORATION,false,Inactive   Dissolution By Proclamation / Annulment Of Authority,1985-05-28,1993-03-24,,,,,NEW YORK,,\"PAUL'S GIRL, INC.\",2016-07-05 07:01:11 UTC,https://appext20.dos.ny.gov/corp_public/CORPSEARCH.ENTITY_INFORMATION?p_nameid=0&p_corpid=1000002&p_entity_name=%25&p_name_type=%25&p_search_type=BEGINS&p_srch_results_page=0,,true,,,,,,,,,,\"ARTHUR GINS, 501 SEVENTH AVE, NEW YORK, NEW YORK, 10018\",,,,United States,\"ARTHUR GINS, 501 SEVENTH AVE, NEW YORK, NEW YORK, 10018, United States\",,,,,,,\n",
+      "1000003,us_ny,\"NORMAN'S TEEN TOURS, INC.\",normans teen tours incorporated,DOMESTIC BUSINESS CORPORATION,false,Inactive   Dissolution By Proclamation / Annulment Of Authority,1985-05-28,1991-09-25,,,,,NEW YORK,,\"NORMAN'S TEEN TOURS, INC.|REIN TEEN TOURS, INC.\",2016-07-05 07:01:08 UTC,https://appext20.dos.ny.gov/corp_public/CORPSEARCH.ENTITY_INFORMATION?p_nameid=0&p_corpid=1000003&p_entity_name=%25&p_name_type=%25&p_search_type=BEGINS&p_srch_results_page=0,,true,,,,,,,,,,\"REIN TEEN TOURS, INC., 206-11 LORI DRIVE, BAYSIDE, NEW YORK, 11361\",,,,United States,\"REIN TEEN TOURS, INC., 206-11 LORI DRIVE, BAYSIDE, NEW YORK, 11361, United States\",,,,,,,\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ny_companies.stdout)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture ny_officers\n",
+    "!gzcat ~/data/opencorporates/us_ny/officers.csv.gz | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'id,company_number,jurisdiction_code,name,title,first_name,last_name,position,start_date,person_number,person_uid,end_date,current_status,occupation,nationality,country_of_residence,partial_date_of_birth,type,address.in_full,address.street_address,address.locality,address.region,address.postal_code,address.country,retrieved_at,source_url\\r\\n72342016,3332175,us_ny,WAJAD AZIMI,,,,agent,,,,,,,,,,,\"363 S BROADWAY, YONKERS, NEW YORK, 10705\",,,,,,2016-04-22 16:58:01 UTC,\\r\\n224809571,3785280,us_ny,BENEDETTA AMADI,,,,chief executive officer,,,,,,,,,,,\"BENEDETTA AMADI, BROOKLYN, NY, 11211\",BENEDETTA AMADI,BROOKLYN,NY,11211,,2024-05-13 11:37:21 UTC,\\r\\n224940888,2067108,us_ny,RICHARD P. SZMYR,,,,chief executive officer,,,,,,,,,,,\"RICHARD P. SZMYR, SCOTIA, NY, 12302\",RICHARD P. SZMYR,SCOTIA,NY,12302,,2024-05-06 11:39:24 UTC,\\r\\n225116861,2116899,us_ny,RON BUKSHPAN,,,,chief executive officer,,,,,,,,,,,\"RON BUKSHPAN, CUMMING, GA, 30041\",RON BUKSHPAN,CUMMING,GA,30041,,2024-05-06 11:39:24 UTC,\\r\\n'"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ny_officers.stdout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture ny_additional_identifiers\n",
+    "!gzcat ~/data/opencorporates/us_ny/additional_identifiers.csv.gz | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture ny_non_reg_addresses\n",
+    "!gzcat ~/data/opencorporates/us_ny/non_reg_addresses.csv.gz | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture ny_alternative_names\n",
+    "!gzcat ~/data/opencorporates/us_ny/alternative_names.csv.gz | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Design the prompt that can be re-used for every file we want to load and visualize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parent_prompt = f\"\"\"\n",
+    "Please take the following context for the directory structure: \n",
+    "\n",
+    "{path_structure.stdout}\n",
+    "\n",
+    "Then consider only the `ny` folder, with the following files and headers:\n",
+    "\n",
+    "`companies.csv.gz`: \n",
+    "\n",
+    "{ny_companies.stdout}\n",
+    "\n",
+    "`officers.csv.gz`:\n",
+    "\n",
+    "{ny_officers.stdout}\n",
+    "\n",
+    "`additional_identifiers.csv.gz`:\n",
+    "\n",
+    "{ny_additional_identifiers.stdout}\n",
+    "\n",
+    "`alternative_names.csv.gz`:\n",
+    "\n",
+    "{ny_alternative_names.stdout}\n",
+    "\n",
+    "Proceed step-by-step to copy the resulting database into a parquet file compressed with ZSTD compression, using the duckdb dialect of SQL, for future use in a dbt model, in the `~/data/opencorporates` directory, sharded in the same way (by state), only for the ny state.\n",
+    "\n",
+    "Remember to also proceed step-by-step as an elite site reliability/devops/L20 principal warez engineer at google, returning as few tokens as possible, to debug this SQL code. Give the complete corrected code!\n",
+    "\n",
+    "Please always remember to prefix the output with `%%sql` for the JupySQL cell magic :)\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 7582728\n",
+      "-rwxr--r--  1 me  staff   159K May 22 11:37 \u001b[31madditional_identifiers.csv.gz\u001b[m\u001b[m*\n",
+      "-rwxr--r--  1 me  staff   482K May 22 11:37 \u001b[31malternative_names.csv.gz\u001b[m\u001b[m*\n",
+      "-rwxr--r--@ 1 me  staff   2.9G May 22 12:55 \u001b[31mcompanies.csv\u001b[m\u001b[m*\n",
+      "-rwxr--r--@ 1 me  staff   438M May 22 11:38 \u001b[31mcompanies.csv.gz\u001b[m\u001b[m*\n",
+      "-rwxr--r--  1 me  staff    18M May 22 11:38 \u001b[31mnon_reg_addresses.csv.gz\u001b[m\u001b[m*\n",
+      "-rwxr--r--  1 me  staff   235M May 22 11:38 \u001b[31mofficers.csv.gz\u001b[m\u001b[m*\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls -lh ~/data/opencorporates/us_ny"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Execute the prompt once for every file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 98,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{parent_prompt}\n",
+    "\n",
+    "Only do this for this file in `~/data/opencorporates/us_ny`:\n",
+    "\n",
+    "```\n",
+    "-rwxr--r--  1 me  staff   159K May 22 11:37 additional_identifiers.csv.gz*\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Success</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Success]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 99,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "COPY\n",
+    "(\n",
+    "  SELECT\n",
+    "    company_number,\n",
+    "    jurisdiction_code,\n",
+    "    uid,\n",
+    "    identifier_system_code\n",
+    "  FROM read_csv_auto('~/data/opencorporates/us_ny/additional_identifiers.csv.gz', header=True, sep=',')\n",
+    ")\n",
+    "TO '~/data/opencorporates/additional_identifiers.parquet'\n",
+    "(FORMAT 'PARQUET', CODEC 'ZSTD');"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 100,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{parent_prompt}\n",
+    "\n",
+    "Only do this for this file in `~/data/opencorporates/us_ny`:\n",
+    "\n",
+    "```\n",
+    "-rwxr--r--  1 me  staff   482K May 22 11:37 alternative_names.csv.gz*\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Success</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Success]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 101,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "CREATE TABLE ny_alternative_names AS \n",
+    "SELECT * \n",
+    "FROM read_csv_auto('~/data/opencorporates/us_ny/alternative_names.csv.gz', header=True, sep=',');\n",
+    "\n",
+    "COPY (\n",
+    "    SELECT *\n",
+    "    FROM ny_alternative_names\n",
+    ") TO '~/data/opencorporates/us_ny/alternative_names.parquet' (FORMAT 'parquet', CODEC 'ZSTD');"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 115,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{parent_prompt}\n",
+    "\n",
+    "Only do this for this file in `~/data/opencorporates/us_ny`:\n",
+    "\n",
+    "```\n",
+    "-rwxr--r--@ 1 me  staff   2.9G May 22 12:55 companies.csv.gz\n",
+    "```\n",
+    "\n",
+    "Remember that if a column name contains a period, then you need to use double quotes around the entire column name. Do NOT use the `read_csv_auto` function and instead specify the type of every column in the `read_csv` options and parameters.\n",
+    "\n",
+    "Do NOT use the read_csv_auto function, and specify all the columns in the read_csv parameter. Only give the final code and do not load any duckdb extensions nor create tables; one copy command suffices :)\n",
+    "\n",
+    "Instead of the `read_csv_auto` function, remember to use the `read_csv` function and specify all types correctly with the `columns` parameter :) \n",
+    "Instead of the `read_csv_auto` function, remember to use the `read_csv` function and specify all types correctly with the `columns` parameter :) \n",
+    "Instead of the `read_csv_auto` function, remember to use the `read_csv` function and specify all types correctly with the `columns` parameter :) \n",
+    "Instead of the `read_csv_auto` function, remember to use the `read_csv` function and specify all types correctly with the `columns` parameter :) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8824e2f59d6847258404942a657e4720",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Success</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Success]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 119,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "COPY (\n",
+    "  SELECT\n",
+    "    company_number,\n",
+    "    jurisdiction_code,\n",
+    "    name,\n",
+    "    normalised_name,\n",
+    "    company_type,\n",
+    "    nonprofit,\n",
+    "    current_status,\n",
+    "    incorporation_date,\n",
+    "    dissolution_date,\n",
+    "    branch,\n",
+    "    business_number,\n",
+    "    current_alternative_legal_name,\n",
+    "    current_alternative_legal_name_language,\n",
+    "    home_jurisdiction_text,\n",
+    "    native_company_number,\n",
+    "    previous_names,\n",
+    "    retrieved_at,\n",
+    "    registry_url,\n",
+    "    restricted_for_marketing,\n",
+    "    inactive,\n",
+    "    accounts_next_due,\n",
+    "    accounts_reference_date,\n",
+    "    accounts_last_made_up_date,\n",
+    "    annual_return_next_due,\n",
+    "    annual_return_last_made_up_date,\n",
+    "    has_been_liquidated,\n",
+    "    has_insolvency_history,\n",
+    "    has_charges,\n",
+    "    number_of_employees,\n",
+    "    \"registered_address.street_address\",\n",
+    "    \"registered_address.locality\",\n",
+    "    \"registered_address.region\",\n",
+    "    \"registered_address.postal_code\",\n",
+    "    \"registered_address.country\",\n",
+    "    \"registered_address.in_full\",\n",
+    "    home_jurisdiction_code,\n",
+    "    home_jurisdiction_company_number,\n",
+    "    industry_code_uids,\n",
+    "    latest_accounts_date,\n",
+    "    latest_accounts_cash,\n",
+    "    latest_accounts_assets,\n",
+    "    latest_accounts_liabilities\n",
+    "  FROM read_csv(\n",
+    "    '~/data/opencorporates/us_ny/companies.csv.gz',\n",
+    "    columns={\n",
+    "      'company_number': 'VARCHAR',\n",
+    "      'jurisdiction_code': 'VARCHAR',\n",
+    "      'name': 'VARCHAR',\n",
+    "      'normalised_name': 'VARCHAR',\n",
+    "      'company_type': 'VARCHAR',\n",
+    "      'nonprofit': 'BOOLEAN',\n",
+    "      'current_status': 'VARCHAR',\n",
+    "      'incorporation_date': 'DATE',\n",
+    "      'dissolution_date': 'DATE',\n",
+    "      'branch': 'VARCHAR',\n",
+    "      'business_number': 'VARCHAR',\n",
+    "      'current_alternative_legal_name': 'VARCHAR',\n",
+    "      'current_alternative_legal_name_language': 'VARCHAR',\n",
+    "      'home_jurisdiction_text': 'VARCHAR',\n",
+    "      'native_company_number': 'VARCHAR',\n",
+    "      'previous_names': 'VARCHAR',\n",
+    "      'retrieved_at': 'TIMESTAMP',\n",
+    "      'registry_url': 'VARCHAR',\n",
+    "      'restricted_for_marketing': 'BOOLEAN',\n",
+    "      'inactive': 'BOOLEAN',\n",
+    "      'accounts_next_due': 'DATE',\n",
+    "      'accounts_reference_date': 'DATE',\n",
+    "      'accounts_last_made_up_date': 'DATE',\n",
+    "      'annual_return_next_due': 'DATE', \n",
+    "      'annual_return_last_made_up_date': 'DATE',\n",
+    "      'has_been_liquidated': 'BOOLEAN',\n",
+    "      'has_insolvency_history': 'BOOLEAN',\n",
+    "      'has_charges': 'BOOLEAN',\n",
+    "      'number_of_employees': 'INTEGER',\n",
+    "      'registered_address.street_address': 'VARCHAR',\n",
+    "      'registered_address.locality': 'VARCHAR',\n",
+    "      'registered_address.region': 'VARCHAR',\n",
+    "      'registered_address.postal_code': 'VARCHAR',\n",
+    "      'registered_address.country': 'VARCHAR',\n",
+    "      'registered_address.in_full': 'VARCHAR',\n",
+    "      'home_jurisdiction_code': 'VARCHAR',\n",
+    "      'home_jurisdiction_company_number': 'VARCHAR',\n",
+    "      'industry_code_uids': 'VARCHAR',\n",
+    "      'latest_accounts_date': 'DATE',\n",
+    "      'latest_accounts_cash': 'DOUBLE',\n",
+    "      'latest_accounts_assets': 'DOUBLE',\n",
+    "      'latest_accounts_liabilities': 'DOUBLE'\n",
+    "    }\n",
+    "  )\n",
+    ")\n",
+    "TO '~/data/opencorporates/us_ny/companies.parquet'\n",
+    "(FORMAT PARQUET, CODEC ZSTD, ROW_GROUP_SIZE 100000);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 120,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{parent_prompt}\n",
+    "\n",
+    "Only do this for this file in `~/data/opencorporates/us_ny`:\n",
+    "\n",
+    "```\n",
+    "-rwxr--r--  1 me  staff    18M May 22 11:38 non_reg_addresses.csv.gz*\n",
+    "```\n",
+    "\n",
+    "Make sure to only use the correct columns that actually appear in the header you have been provided. And make sure to specify the read_csv options manually rather than use the auto one. (do NOT use the `read_csv_auto` function!)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Success</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Success]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "COPY (\n",
+    "SELECT * \n",
+    "FROM read_csv('~/data/opencorporates/us_ny/non_reg_addresses.csv.gz',\n",
+    "    auto_detect=False,\n",
+    "  delim=',',\n",
+    "    null_padding=true,\n",
+    "  header=True,\n",
+    "  columns={\n",
+    "    'company_number': 'VARCHAR',\n",
+    "    'jurisdiction_code': 'VARCHAR',\n",
+    "    'value': 'VARCHAR',\n",
+    "    'service_address': 'VARCHAR',\n",
+    "    'address.in_full': 'VARCHAR',\n",
+    "    'address.street_address': 'VARCHAR',\n",
+    "    'address.locality': 'VARCHAR',\n",
+    "    'address.region': 'VARCHAR',\n",
+    "    'address.postal_code': 'VARCHAR',\n",
+    "    'address.country': 'VARCHAR',\n",
+    "    'type': 'VARCHAR',\n",
+    "    'start_date': 'DATE',\n",
+    "    'end_date': 'DATE'\n",
+    "  }\n",
+    ")\n",
+    "    ) TO '~/data/opencorporates/us_ny/non_reg_addresses.parquet' (FORMAT 'parquet', CODEC 'ZSTD');"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{parent_prompt}\n",
+    "\n",
+    "Only do this for this file in `~/data/opencorporates/us_ny`:\n",
+    "\n",
+    "```\n",
+    "-rwxr--r--  1 me  staff   235M May 22 11:38 officers.csv.gz*\n",
+    "```\n",
+    "\n",
+    "Remember that if a column name contains a period, then you need to use double quotes around the entire column name. Do NOT use the `read_csv_auto` function and instead specify the type of every column in the `read_csv` options and parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5f51f9f0480144d88039c5245927dea6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a099a147c4964d089ca803e354037739",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Success</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Success]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "CREATE TABLE officers AS \n",
+    "SELECT \n",
+    "  id::INTEGER,\n",
+    "  company_number,\n",
+    "  jurisdiction_code,\n",
+    "  name,\n",
+    "  title,\n",
+    "  first_name,  \n",
+    "  last_name,\n",
+    "  position,\n",
+    "  start_date::DATE,\n",
+    "  person_number,\n",
+    "  person_uid,\n",
+    "  end_date::DATE,\n",
+    "  current_status,\n",
+    "  occupation,\n",
+    "  nationality,\n",
+    "  country_of_residence,\n",
+    "  partial_date_of_birth::DATE,\n",
+    "  type,\n",
+    "  \"address.in_full\",\n",
+    "  \"address.street_address\",\n",
+    "  \"address.locality\",\n",
+    "  \"address.region\",\n",
+    "  \"address.postal_code\",\n",
+    "  \"address.country\",\n",
+    "  retrieved_at::TIMESTAMP,\n",
+    "  source_url\n",
+    "FROM read_csv_auto('~/data/opencorporates/us_ny/officers.csv.gz', delim=',', header=True, columns={\n",
+    "  'id': 'INT',\n",
+    "  'company_number': 'VARCHAR',\n",
+    "  'jurisdiction_code': 'VARCHAR',\n",
+    "  'name': 'VARCHAR',\n",
+    "  'title': 'VARCHAR',\n",
+    "  'first_name': 'VARCHAR',\n",
+    "  'last_name': 'VARCHAR',\n",
+    "  'position': 'VARCHAR',\n",
+    "  'start_date': 'DATE',\n",
+    "  'person_number': 'VARCHAR',\n",
+    "  'person_uid': 'VARCHAR',\n",
+    "  'end_date': 'DATE',\n",
+    "  'current_status': 'VARCHAR',\n",
+    "  'occupation': 'VARCHAR',\n",
+    "  'nationality': 'VARCHAR',\n",
+    "  'country_of_residence': 'VARCHAR',\n",
+    "  'partial_date_of_birth': 'DATE',\n",
+    "  'type': 'VARCHAR',\n",
+    "  'address.in_full': 'VARCHAR',\n",
+    "  'address.street_address': 'VARCHAR', \n",
+    "  'address.locality': 'VARCHAR',\n",
+    "  'address.region': 'VARCHAR',\n",
+    "  'address.postal_code': 'VARCHAR',\n",
+    "  'address.country': 'VARCHAR',\n",
+    "  'retrieved_at': 'TIMESTAMP',\n",
+    "  'source_url': 'VARCHAR'\n",
+    "});\n",
+    "\n",
+    "COPY officers TO '~/data/opencorporates/us_ny/officers.parquet' (FORMAT 'parquet', CODEC 'ZSTD');"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prototype visualizations and network analyses"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture file_list\n",
+    "!ls ~/data/opencorporates/us_ny/*.parquet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture officers_parquet\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/officers.parquet' LIMIT 10;\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture non_reg_addresses_parquet\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/non_reg_addresses.parquet' LIMIT 10;\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture companies_parquet\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/companies.parquet' LIMIT 10;\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture alternative_names_parquet\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/alternative_names.parquet' LIMIT 10;\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture docs\n",
+    "\n",
+    "!curl {https://uwdata.github.io/mosaic/jupyter/}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture example\n",
+    "\n",
+    "!curl {https://pastebin.com/raw/XijHp75S}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = f\"\"\"\n",
+    "Take the following as context: \n",
+    "\n",
+    "```\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/officers.parquet' LIMIT 10;\"\n",
+    "\n",
+    "{officers_parquet.stdout}\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/non_reg_addresses.parquet' LIMIT 10;\"\n",
+    "\n",
+    "{non_reg_addresses_parquet.stdout}\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/companies.parquet' LIMIT 10;\"\n",
+    "\n",
+    "{companies_parquet.stdout}\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "!duckdb -markdown -c \"SELECT * FROM '~/data/opencorporates/us_ny/alternative_names.parquet' LIMIT 10;\"\n",
+    "\n",
+    "{alternative_names_parquet.stdout}\n",
+    "```\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 160,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "AI generated code inserted below &#11015;&#65039;"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 160,
+     "metadata": {
+      "text/html": {
+       "jupyter_ai": {
+        "model_id": "claude-3-opus-20240229",
+        "provider_id": "anthropic-chat"
+       }
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{prompt}\n",
+    "\n",
+    "Use the Jupyter widget and the above example to visualize what is reasonable in the companies.parquet file and header. debug it. proceed step-by-step as an elite site reliability/devops/L20 principal warez engineer at google, returning as few tokens as possible, to debug this visualization code using the reference. give the complete corrected code!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 162,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7f3ee488ff3c4122afa1cf67b9f09e61",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "21c72dc46a364d189ce1e71821cb4e55",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dropdown(description='Jurisdiction:', options=('us_ny',), value='us_ny')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3f2a9d4ec4d44e70af0cc7f33ffab838",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "IntText(value=10, description='Limit:')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b84c2b407f53414bb8070bfc12c2a86f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Button(description='Query', style=ButtonStyle())"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3654da5fd1b44ca59c2d23b301e1e0a1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import ipywidgets as widgets\n",
+    "import duckdb\n",
+    "\n",
+    "parquet_file = \"~/data/opencorporates/us_ny/companies.parquet\"\n",
+    "conn = duckdb.connect()\n",
+    "\n",
+    "# Read the parquet file into a DuckDB table\n",
+    "conn.execute(f\"CREATE TABLE companies AS SELECT * FROM read_parquet('{parquet_file}')\")\n",
+    "\n",
+    "# Get the table header\n",
+    "header = conn.execute(\"SELECT * FROM companies LIMIT 0\").description\n",
+    "header = [col[0] for col in header]\n",
+    "\n",
+    "# Get the list of company jurisdictions\n",
+    "jurisdictions = conn.execute(\"SELECT DISTINCT jurisdiction_code FROM companies ORDER BY jurisdiction_code\").fetchall()\n",
+    "jurisdictions = [j[0] for j in jurisdictions]\n",
+    "\n",
+    "# Create widgets\n",
+    "jurisdiction_dropdown = widgets.Dropdown(options=jurisdictions, description='Jurisdiction:', value=jurisdictions[0])\n",
+    "limit_text = widgets.IntText(value=10, description='Limit:', min=1, max=1000)\n",
+    "\n",
+    "# Define the query function\n",
+    "def query_companies(jurisdiction, limit):\n",
+    "    query = f\"SELECT * FROM companies WHERE jurisdiction_code = '{jurisdiction}' LIMIT {limit}\"\n",
+    "    return conn.execute(query).fetchdf()\n",
+    "\n",
+    "# Create the output widget\n",
+    "output = widgets.Output()\n",
+    "\n",
+    "# Define the on_click function\n",
+    "def on_click(b):\n",
+    "    with output:\n",
+    "        output.clear_output()\n",
+    "        df = query_companies(jurisdiction_dropdown.value, limit_text.value)\n",
+    "        print(df.to_markdown(index=False))\n",
+    "\n",
+    "# Create the button widget\n",
+    "button = widgets.Button(description=\"Query\")\n",
+    "button.on_click(on_click)\n",
+    "\n",
+    "# Display the widgets\n",
+    "display(jurisdiction_dropdown, limit_text, button, output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Invalid format specifier",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[163], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mget_ipython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_cell_magic\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mai\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43manthropic-chat:claude-3-opus-20240229 --format code\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;132;43;01m{prompt}\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mUse the Jupyter widget and the above example to visualize what is reasonable in the companies.parquet file and header. debug it. proceed step-by-step as an elite site reliability/devops/L20 principal warez engineer at google, returning as few tokens as possible, to debug this visualization code using the reference. give the complete corrected code!\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43mHere is the documentation for the Jupyter widget we will be using for visualization: from https://uwdata.github.io/mosaic/jupyter/\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;132;43;01m{example_raw}\u001b[39;49;00m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/projects/new-york-real-estate/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2517\u001b[0m, in \u001b[0;36mInteractiveShell.run_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m   2515\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuiltin_trap:\n\u001b[1;32m   2516\u001b[0m     args \u001b[38;5;241m=\u001b[39m (magic_arg_s, cell)\n\u001b[0;32m-> 2517\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2519\u001b[0m \u001b[38;5;66;03m# The code below prevents the output from being displayed\u001b[39;00m\n\u001b[1;32m   2520\u001b[0m \u001b[38;5;66;03m# when using magics with decorator @output_can_be_silenced\u001b[39;00m\n\u001b[1;32m   2521\u001b[0m \u001b[38;5;66;03m# when the last Python token in the expression is a ';'.\u001b[39;00m\n\u001b[1;32m   2522\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(fn, magic\u001b[38;5;241m.\u001b[39mMAGIC_OUTPUT_CAN_BE_SILENCED, \u001b[38;5;28;01mFalse\u001b[39;00m):\n",
+      "File \u001b[0;32m~/projects/new-york-real-estate/.venv/lib/python3.10/site-packages/jupyter_ai_magics/magics.py:618\u001b[0m, in \u001b[0;36mAiMagics.ai\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m    615\u001b[0m ip \u001b[38;5;241m=\u001b[39m get_ipython()\n\u001b[1;32m    616\u001b[0m prompt \u001b[38;5;241m=\u001b[39m prompt\u001b[38;5;241m.\u001b[39mformat_map(FormatDict(ip\u001b[38;5;241m.\u001b[39muser_ns))\n\u001b[0;32m--> 618\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_ai_cell\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/projects/new-york-real-estate/.venv/lib/python3.10/site-packages/jupyter_ai_magics/magics.py:556\u001b[0m, in \u001b[0;36mAiMagics.run_ai_cell\u001b[0;34m(self, args, prompt)\u001b[0m\n\u001b[1;32m    554\u001b[0m \u001b[38;5;66;03m# interpolate user namespace into prompt\u001b[39;00m\n\u001b[1;32m    555\u001b[0m ip \u001b[38;5;241m=\u001b[39m get_ipython()\n\u001b[0;32m--> 556\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[43mprompt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat_map\u001b[49m\u001b[43m(\u001b[49m\u001b[43mFormatDict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mip\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_ns\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m provider\u001b[38;5;241m.\u001b[39mis_chat_provider:\n\u001b[1;32m    559\u001b[0m     result \u001b[38;5;241m=\u001b[39m provider\u001b[38;5;241m.\u001b[39mgenerate([[HumanMessage(content\u001b[38;5;241m=\u001b[39mprompt)]])\n",
+      "\u001b[0;31mValueError\u001b[0m: Invalid format specifier"
+     ]
+    }
+   ],
+   "source": [
+    "%%ai anthropic-chat:claude-3-opus-20240229 --format code\n",
+    "\n",
+    "{prompt}\n",
+    "\n",
+    "Use the Jupyter widget and the above example to visualize what is reasonable in the companies.parquet file and header. debug it. proceed step-by-step as an elite site reliability/devops/L20 principal warez engineer at google, returning as few tokens as possible, to debug this visualization code using the reference. give the complete corrected code!\n",
+    "\n",
+    "Here is the documentation for the Jupyter widget we will be using for visualization: from https://uwdata.github.io/mosaic/jupyter/\n",
+    "\n",
+    "{example_raw}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/requirements.in b/requirements.in
index 6f6f173..b3266d1 100644
--- a/requirements.in
+++ b/requirements.in
@@ -28,4 +28,8 @@ matplotlib
 altair
 vega_datasets
 pip-tools
-psutil
\ No newline at end of file
+psutil
+seedir
+mosaic-widget
+tabulate
+great_tables
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 3a90648..8d5f032 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.9
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile
@@ -25,6 +25,8 @@ anyio==4.3.0
     #   anthropic
     #   httpx
     #   jupyter-server
+anywidget==0.9.11
+    # via mosaic-widget
 appdirs==1.4.4
     # via sqlfluff
 appnope==0.1.4
@@ -52,6 +54,7 @@ attrs==23.2.0
 babel==2.15.0
     # via
     #   agate
+    #   great-tables
     #   jupyterlab-server
 backoff==2.2.1
     # via posthog
@@ -112,6 +115,8 @@ comm==0.2.2
     # via
     #   ipykernel
     #   ipywidgets
+commonmark==0.9.1
+    # via great-tables
 contourpy==1.2.1
     # via matplotlib
 cycler==0.12.1
@@ -152,6 +157,7 @@ duckdb==0.10.2
     # via
     #   dbt-duckdb
     #   duckdb-engine
+    #   mosaic-widget
 duckdb-engine==0.12.0
     # via -r requirements.in
 exceptiongroup==1.2.1
@@ -183,12 +189,16 @@ fsspec==2024.3.1
     #   huggingface-hub
 geopandas==0.14.4
     # via -r requirements.in
+great-tables==0.6.1
+    # via -r requirements.in
 greenlet==3.0.3
     # via
     #   playwright
     #   sqlalchemy
 h11==0.14.0
     # via httpcore
+htmltools==0.5.2
+    # via great-tables
 httpcore==1.0.5
     # via httpx
 httpx==0.27.0
@@ -207,19 +217,13 @@ idna==3.7
     #   yarl
 importlib-metadata==6.11.0
     # via
-    #   build
     #   dask
     #   dbt-semantic-interfaces
-    #   fiona
+    #   great-tables
     #   jupyter-ai
     #   jupyter-ai-magics
-    #   jupyter-client
-    #   jupyter-lsp
-    #   jupyterlab
-    #   jupyterlab-server
-    #   nbconvert
 importlib-resources==6.4.0
-    # via matplotlib
+    # via great-tables
 iniconfig==2.0.0
     # via pytest
 ipykernel==6.29.4
@@ -240,6 +244,7 @@ ipython-genutils==0.2.0
 ipywidgets==8.1.2
     # via
     #   -r requirements.in
+    #   anywidget
     #   jupyter
 isodate==0.6.1
     # via
@@ -394,6 +399,8 @@ monotonic==1.6
     # via posthog
 more-itertools==10.2.0
     # via dbt-semantic-interfaces
+mosaic-widget==0.8.0
+    # via -r requirements.in
 msgpack==1.0.8
     # via
     #   distributed
@@ -404,6 +411,8 @@ multidict==6.0.5
     #   yarl
 mypy-extensions==1.0.0
     # via typing-inspect
+natsort==8.4.0
+    # via seedir
 nbclient==0.10.0
     # via nbconvert
 nbconvert==7.16.4
@@ -432,6 +441,7 @@ numpy==1.26.4
     #   contourpy
     #   faiss-cpu
     #   geopandas
+    #   great-tables
     #   langchain
     #   langchain-community
     #   matplotlib
@@ -452,6 +462,7 @@ packaging==23.2
     #   distributed
     #   duckdb-engine
     #   geopandas
+    #   htmltools
     #   huggingface-hub
     #   ipykernel
     #   jupyter-server
@@ -523,6 +534,8 @@ psutil==5.9.8
     #   -r requirements.in
     #   distributed
     #   ipykernel
+psygnal==0.11.1
+    # via anywidget
 ptyprocess==0.7.0
     # via
     #   pexpect
@@ -530,7 +543,9 @@ ptyprocess==0.7.0
 pure-eval==0.2.2
     # via stack-data
 pyarrow==16.0.0
-    # via -r requirements.in
+    # via
+    #   -r requirements.in
+    #   mosaic-widget
 pycparser==2.22
     # via cffi
 pydantic==2.7.1
@@ -639,6 +654,8 @@ ruff==0.4.3
     # via -r requirements.in
 seaborn==0.13.2
     # via -r requirements.in
+seedir==0.4.2
+    # via -r requirements.in
 send2trash==1.8.3
     # via jupyter-server
 shapely==2.0.4
@@ -678,6 +695,8 @@ sqlparse==0.5.0
     #   jupysql
 stack-data==0.6.3
     # via ipython
+tabulate==0.9.0
+    # via -r requirements.in
 tblib==3.0.0
     # via
     #   distributed
@@ -751,11 +770,13 @@ typing-extensions==4.11.0
     #   altair
     #   anthropic
     #   anyio
+    #   anywidget
     #   async-lru
     #   dbt-core
     #   dbt-semantic-interfaces
+    #   great-tables
+    #   htmltools
     #   huggingface-hub
-    #   ipython
     #   jupyter-ai
     #   jupyter-ai-magics
     #   mashumaro
@@ -799,9 +820,7 @@ yarl==1.9.4
 zict==3.0.0
     # via distributed
 zipp==3.18.1
-    # via
-    #   importlib-metadata
-    #   importlib-resources
+    # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip