diff --git a/.nojekyll b/.nojekyll
index 4ff88a3..3889da9 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-80e1cb7c
\ No newline at end of file
+3fa9d1f5
\ No newline at end of file
diff --git a/aemo_data.html b/aemo_data.html
index 129c3d6..70fcd81 100644
--- a/aemo_data.html
+++ b/aemo_data.html
@@ -20,6 +20,40 @@
   margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
   vertical-align: middle;
 }
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
 </style>
 
 
@@ -150,7 +184,16 @@
 <!-- sidebar -->
 <!-- margin-sidebar -->
     <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
-        
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#dividing-large-aemo-data-csvs-into-parquet-partitions" id="toc-dividing-large-aemo-data-csvs-into-parquet-partitions" class="nav-link active" data-scroll-target="#dividing-large-aemo-data-csvs-into-parquet-partitions">Dividing large AEMO Data CSVs into parquet partitions</a>
+  <ul class="collapse">
+  <li><a href="#requirements" id="toc-requirements" class="nav-link" data-scroll-target="#requirements">Requirements</a></li>
+  </ul></li>
+  </ul>
+<div class="toc-actions"><div><i class="bi bi-github"></i></div><div class="action-links"><p><a href="https://github.com/UNSW-CEEM/CEEM-Gists/edit/main/aemo_data.qmd" class="toc-action">Edit this page</a></p><p><a href="https://github.com/UNSW-CEEM/CEEM-Gists/issues/new" class="toc-action">Report an issue</a></p></div></div></nav>
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
@@ -172,8 +215,125 @@ <h1 class="title">AEMO Data Snippets</h1>
 
 </header>
 
+<section id="dividing-large-aemo-data-csvs-into-parquet-partitions" class="level2">
+<h2 class="anchored" data-anchor-id="dividing-large-aemo-data-csvs-into-parquet-partitions">Dividing large AEMO Data CSVs into parquet partitions</h2>
+<p>This script can be run via the command line to divide a large AEMO data CSV (e.g.&nbsp;from the <a href="https://visualisations.aemo.com.au/aemo/nemweb/index.html#mms-data-model">Monthly Data Archive</a>, such as rebids in BIDPEROFFER) into Parquet partitions. This is advantageous for using packages such as <a href="https://www.dask.org/">Dask</a> to analyse such data.</p>
+<p>It assumes that the first row of the table is the header (i.e.&nbsp;columns) for a single data table.</p>
+<section id="requirements" class="level3">
+<h3 class="anchored" data-anchor-id="requirements">Requirements</h3>
+<p>Written using Python 3.11. Uses <code>pathlib</code> and type annotations, so probably need at least Python &gt; 3.5.</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Python script (executable via CLI) to create parquet partitions</span></span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="co"># for large AEMO data CSVs. Assumes first line is table header and that only one table</span></span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="co"># type is in the file</span></span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="co">#</span></span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Copyright (C) 2023 Abhijith Prakash</span></span>
+<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="co">#</span></span>
+<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="co"># This program is free software: you can redistribute it and/or modify</span></span>
+<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="co"># it under the terms of the GNU General Public License as published by</span></span>
+<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a><span class="co"># the Free Software Foundation, either version 3 of the License, or</span></span>
+<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a><span class="co"># (at your option) any later version.</span></span>
+<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a><span class="co">#</span></span>
+<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a><span class="co"># This program is distributed in the hope that it will be useful,</span></span>
+<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a><span class="co"># but WITHOUT ANY WARRANTY; without even the implied warranty of</span></span>
+<span id="cb1-14"><a href="#cb1-14" aria-hidden="true" tabindex="-1"></a><span class="co"># MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span></span>
+<span id="cb1-15"><a href="#cb1-15" aria-hidden="true" tabindex="-1"></a><span class="co"># GNU General Public License for more details.</span></span>
+<span id="cb1-16"><a href="#cb1-16" aria-hidden="true" tabindex="-1"></a><span class="co">#</span></span>
+<span id="cb1-17"><a href="#cb1-17" aria-hidden="true" tabindex="-1"></a><span class="co"># You should have received a copy of the GNU General Public License</span></span>
+<span id="cb1-18"><a href="#cb1-18" aria-hidden="true" tabindex="-1"></a><span class="co"># along with this program.  If not, see &lt;https://www.gnu.org/licenses/&gt;.</span></span>
+<span id="cb1-19"><a href="#cb1-19" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-20"><a href="#cb1-20" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> argparse</span>
+<span id="cb1-21"><a href="#cb1-21" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> logging</span>
+<span id="cb1-22"><a href="#cb1-22" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> pathlib <span class="im">import</span> Path</span>
+<span id="cb1-23"><a href="#cb1-23" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-24"><a href="#cb1-24" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb1-25"><a href="#cb1-25" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> tqdm <span class="im">import</span> tqdm</span>
+<span id="cb1-26"><a href="#cb1-26" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-27"><a href="#cb1-27" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-28"><a href="#cb1-28" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arg_parser():</span>
+<span id="cb1-29"><a href="#cb1-29" aria-hidden="true" tabindex="-1"></a>    description <span class="op">=</span> (</span>
+<span id="cb1-30"><a href="#cb1-30" aria-hidden="true" tabindex="-1"></a>        <span class="st">"Chunk large monthly AEMO data table CSVs into parquet partitions. "</span></span>
+<span id="cb1-31"><a href="#cb1-31" aria-hidden="true" tabindex="-1"></a>        <span class="op">+</span> <span class="st">"Assumes that the table header is in the 2nd row"</span></span>
+<span id="cb1-32"><a href="#cb1-32" aria-hidden="true" tabindex="-1"></a>    )</span>
+<span id="cb1-33"><a href="#cb1-33" aria-hidden="true" tabindex="-1"></a>    parser <span class="op">=</span> argparse.ArgumentParser(description<span class="op">=</span>description)</span>
+<span id="cb1-34"><a href="#cb1-34" aria-hidden="true" tabindex="-1"></a>    parser.add_argument(</span>
+<span id="cb1-35"><a href="#cb1-35" aria-hidden="true" tabindex="-1"></a>        <span class="st">"-file"</span>, <span class="bu">type</span><span class="op">=</span><span class="bu">str</span>, required<span class="op">=</span><span class="va">True</span>, <span class="bu">help</span><span class="op">=</span>(<span class="st">"File to process. Must be CSV"</span>)</span>
+<span id="cb1-36"><a href="#cb1-36" aria-hidden="true" tabindex="-1"></a>    )</span>
+<span id="cb1-37"><a href="#cb1-37" aria-hidden="true" tabindex="-1"></a>    parser.add_argument(</span>
+<span id="cb1-38"><a href="#cb1-38" aria-hidden="true" tabindex="-1"></a>        <span class="st">"-output_dir"</span>,</span>
+<span id="cb1-39"><a href="#cb1-39" aria-hidden="true" tabindex="-1"></a>        <span class="bu">type</span><span class="op">=</span><span class="bu">str</span>,</span>
+<span id="cb1-40"><a href="#cb1-40" aria-hidden="true" tabindex="-1"></a>        required<span class="op">=</span><span class="va">True</span>,</span>
+<span id="cb1-41"><a href="#cb1-41" aria-hidden="true" tabindex="-1"></a>        <span class="bu">help</span><span class="op">=</span>(</span>
+<span id="cb1-42"><a href="#cb1-42" aria-hidden="true" tabindex="-1"></a>            <span class="st">"Directory to write parquet chunks to. Will be created if it does not exist"</span></span>
+<span id="cb1-43"><a href="#cb1-43" aria-hidden="true" tabindex="-1"></a>        ),</span>
+<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a>    )</span>
+<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a>    parser.add_argument(</span>
+<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a>        <span class="st">"-chunksize"</span>,</span>
+<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a>        <span class="bu">type</span><span class="op">=</span><span class="bu">int</span>,</span>
+<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a>        default<span class="op">=</span><span class="dv">10</span><span class="op">**</span><span class="dv">6</span>,</span>
+<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a>        <span class="bu">help</span><span class="op">=</span>(<span class="st">"Size of each DataFrame chunk (# of lines). Default 10^6"</span>),</span>
+<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a>    )</span>
+<span id="cb1-51"><a href="#cb1-51" aria-hidden="true" tabindex="-1"></a>    args <span class="op">=</span> parser.parse_args()</span>
+<span id="cb1-52"><a href="#cb1-52" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> args</span>
+<span id="cb1-53"><a href="#cb1-53" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-54"><a href="#cb1-54" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-55"><a href="#cb1-55" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_columns(file_path: Path) <span class="op">-&gt;</span> pd.Index:</span>
+<span id="cb1-56"><a href="#cb1-56" aria-hidden="true" tabindex="-1"></a>    col_df <span class="op">=</span> pd.read_csv(file_path, header<span class="op">=</span><span class="dv">1</span>, nrows<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb1-57"><a href="#cb1-57" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> col_df.columns</span>
+<span id="cb1-58"><a href="#cb1-58" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-59"><a href="#cb1-59" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-60"><a href="#cb1-60" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> estimate_size_of_lines(file_path: Path, columns<span class="op">=</span>pd.Index) <span class="op">-&gt;</span> <span class="bu">float</span>:</span>
+<span id="cb1-61"><a href="#cb1-61" aria-hidden="true" tabindex="-1"></a>    sample_size <span class="op">=</span> <span class="dv">1000</span></span>
+<span id="cb1-62"><a href="#cb1-62" aria-hidden="true" tabindex="-1"></a>    sample <span class="op">=</span> pd.read_csv(file_path, skiprows<span class="op">=</span><span class="dv">2</span>, nrows<span class="op">=</span>sample_size, header<span class="op">=</span><span class="va">None</span>)</span>
+<span id="cb1-63"><a href="#cb1-63" aria-hidden="true" tabindex="-1"></a>    sample.columns <span class="op">=</span> columns</span>
+<span id="cb1-64"><a href="#cb1-64" aria-hidden="true" tabindex="-1"></a>    total_size <span class="op">=</span> sample.memory_usage().<span class="bu">sum</span>()</span>
+<span id="cb1-65"><a href="#cb1-65" aria-hidden="true" tabindex="-1"></a>    size_per_line <span class="op">=</span> total_size <span class="op">/</span> <span class="bu">len</span>(sample)</span>
+<span id="cb1-66"><a href="#cb1-66" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> size_per_line</span>
+<span id="cb1-67"><a href="#cb1-67" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-68"><a href="#cb1-68" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-69"><a href="#cb1-69" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> chunk_file(file_path: Path, output_dir: Path, chunksize: <span class="bu">int</span>) <span class="op">-&gt;</span> <span class="va">None</span>:</span>
+<span id="cb1-70"><a href="#cb1-70" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> <span class="kw">not</span> file_path.suffix.lower() <span class="op">==</span> <span class="st">".csv"</span>:</span>
+<span id="cb1-71"><a href="#cb1-71" aria-hidden="true" tabindex="-1"></a>        logging.error(<span class="st">"File is not a CSV"</span>)</span>
+<span id="cb1-72"><a href="#cb1-72" aria-hidden="true" tabindex="-1"></a>        exit()</span>
+<span id="cb1-73"><a href="#cb1-73" aria-hidden="true" tabindex="-1"></a>    cols <span class="op">=</span> get_columns(file_path)</span>
+<span id="cb1-74"><a href="#cb1-74" aria-hidden="true" tabindex="-1"></a>    size_per_line <span class="op">=</span> estimate_size_of_lines(file_path, cols)</span>
+<span id="cb1-75"><a href="#cb1-75" aria-hidden="true" tabindex="-1"></a>    file_size <span class="op">=</span> file_path.stat().st_size</span>
+<span id="cb1-76"><a href="#cb1-76" aria-hidden="true" tabindex="-1"></a>    file_stem <span class="op">=</span> file_path.stem</span>
+<span id="cb1-77"><a href="#cb1-77" aria-hidden="true" tabindex="-1"></a>    <span class="cf">with</span> pd.read_csv(file_path, chunksize<span class="op">=</span>chunksize, skiprows<span class="op">=</span><span class="dv">2</span>, header<span class="op">=</span><span class="va">None</span>) <span class="im">as</span> reader:</span>
+<span id="cb1-78"><a href="#cb1-78" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> tqdm(total<span class="op">=</span>file_size, desc<span class="op">=</span><span class="st">"Progress estimate based on file size"</span>) <span class="im">as</span> pbar:</span>
+<span id="cb1-79"><a href="#cb1-79" aria-hidden="true" tabindex="-1"></a>            <span class="cf">for</span> i, chunk <span class="kw">in</span> <span class="bu">enumerate</span>(reader):</span>
+<span id="cb1-80"><a href="#cb1-80" aria-hidden="true" tabindex="-1"></a>                chunk.columns <span class="op">=</span> cols</span>
+<span id="cb1-81"><a href="#cb1-81" aria-hidden="true" tabindex="-1"></a>                out_file <span class="op">=</span> Path(file_stem <span class="op">+</span> <span class="ss">f"_chunk</span><span class="sc">{</span>i<span class="sc">}</span><span class="ss">.parquet"</span>)</span>
+<span id="cb1-82"><a href="#cb1-82" aria-hidden="true" tabindex="-1"></a>                chunk.to_parquet(output_dir <span class="op">/</span> out_file)</span>
+<span id="cb1-83"><a href="#cb1-83" aria-hidden="true" tabindex="-1"></a>                <span class="co"># See here for comparison of pandas DataFrame size vs CSV size:</span></span>
+<span id="cb1-84"><a href="#cb1-84" aria-hidden="true" tabindex="-1"></a>                <span class="co"># https://stackoverflow.com/questions/18089667/how-to-estimate-how-much-memory-a-pandas-dataframe-will-need#32970117</span></span>
+<span id="cb1-85"><a href="#cb1-85" aria-hidden="true" tabindex="-1"></a>                pbar.update((size_per_line <span class="op">*</span> chunksize) <span class="op">/</span> <span class="dv">2</span>)</span>
+<span id="cb1-86"><a href="#cb1-86" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-87"><a href="#cb1-87" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-88"><a href="#cb1-88" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> main():</span>
+<span id="cb1-89"><a href="#cb1-89" aria-hidden="true" tabindex="-1"></a>    logging.basicConfig(<span class="bu">format</span><span class="op">=</span><span class="st">"</span><span class="ch">\n</span><span class="sc">%(levelname)s</span><span class="st">:</span><span class="sc">%(message)s</span><span class="st">"</span>, level<span class="op">=</span>logging.INFO)</span>
+<span id="cb1-90"><a href="#cb1-90" aria-hidden="true" tabindex="-1"></a>    args <span class="op">=</span> arg_parser()</span>
+<span id="cb1-91"><a href="#cb1-91" aria-hidden="true" tabindex="-1"></a>    f <span class="op">=</span> Path(args.<span class="bu">file</span>)</span>
+<span id="cb1-92"><a href="#cb1-92" aria-hidden="true" tabindex="-1"></a>    output_dir <span class="op">=</span> Path(args.output_dir)</span>
+<span id="cb1-93"><a href="#cb1-93" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> <span class="kw">not</span> output_dir.exists():</span>
+<span id="cb1-94"><a href="#cb1-94" aria-hidden="true" tabindex="-1"></a>        output_dir.mkdir(parents<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb1-95"><a href="#cb1-95" aria-hidden="true" tabindex="-1"></a>    <span class="cf">elif</span> <span class="bu">len</span>(<span class="bu">sorted</span>(output_dir.glob(f.stem <span class="op">+</span> <span class="st">"*.parquet"</span>))) <span class="op">&gt;</span> <span class="dv">1</span>:</span>
+<span id="cb1-96"><a href="#cb1-96" aria-hidden="true" tabindex="-1"></a>        logging.error(<span class="st">"Pre-existing chunks of this file in output directory. Exiting."</span>)</span>
+<span id="cb1-97"><a href="#cb1-97" aria-hidden="true" tabindex="-1"></a>        exit()</span>
+<span id="cb1-98"><a href="#cb1-98" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> <span class="kw">not</span> f.exists():</span>
+<span id="cb1-99"><a href="#cb1-99" aria-hidden="true" tabindex="-1"></a>        logging.error(<span class="st">"Path does not exist"</span>)</span>
+<span id="cb1-100"><a href="#cb1-100" aria-hidden="true" tabindex="-1"></a>        exit()</span>
+<span id="cb1-101"><a href="#cb1-101" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> <span class="kw">not</span> f.is_file():</span>
+<span id="cb1-102"><a href="#cb1-102" aria-hidden="true" tabindex="-1"></a>        logging.error(<span class="st">"Path provided does not point to a file"</span>)</span>
+<span id="cb1-103"><a href="#cb1-103" aria-hidden="true" tabindex="-1"></a>        exit()</span>
+<span id="cb1-104"><a href="#cb1-104" aria-hidden="true" tabindex="-1"></a>    chunk_file(f, output_dir, args.chunksize)</span>
+<span id="cb1-105"><a href="#cb1-105" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-106"><a href="#cb1-106" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb1-107"><a href="#cb1-107" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> <span class="va">__name__</span> <span class="op">==</span> <span class="st">"__main__"</span>:</span>
+<span id="cb1-108"><a href="#cb1-108" aria-hidden="true" tabindex="-1"></a>    main()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 
 
+</section>
+</section>
 
 </main> <!-- /main -->
 <script id="quarto-html-after-body" type="application/javascript">
@@ -415,7 +575,7 @@ <h1 class="title">AEMO Data Snippets</h1>
     <div class="nav-footer-left">
       &nbsp;
     </div>   
-    <div class="nav-footer-center">All source code made available on this site is licensed under GPL-3.0-or-later<div class="toc-actions"><div><i class="bi bi-github"></i></div><div class="action-links"><p><a href="https://github.com/UNSW-CEEM/CEEM-Gists/edit/main/aemo_data.qmd" class="toc-action">Edit this page</a></p><p><a href="https://github.com/UNSW-CEEM/CEEM-Gists/issues/new" class="toc-action">Report an issue</a></p></div></div></div>
+    <div class="nav-footer-center">All source code made available on this site is licensed under GPL-3.0-or-later</div>
     <div class="nav-footer-right">
       &nbsp;
     </div>
diff --git a/search.json b/search.json
index 57ae236..ae5a879 100644
--- a/search.json
+++ b/search.json
@@ -1,4 +1,18 @@
 [
+  {
+    "objectID": "aemo_data.html",
+    "href": "aemo_data.html",
+    "title": "AEMO Data Snippets",
+    "section": "",
+    "text": "This script can be run via the command line to divide a large AEMO data CSV (e.g. from the Monthly Data Archive, such as rebids in BIDPEROFFER) into Parquet partitions. This is advantageous for using packages such as Dask to analyse such data.\nIt assumes that the first row of the table is the header (i.e. columns) for a single data table.\n\n\nWritten using Python 3.11. Uses pathlib and type annotations, so probably need at least Python &gt; 3.5.\n# Python script (executable via CLI) to create parquet partitions\n# for large AEMO data CSVs. Assumes first line is table header and that only one table\n# type is in the file\n#\n# Copyright (C) 2023 Abhijith Prakash\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program.  If not, see &lt;https://www.gnu.org/licenses/&gt;.\n\nimport argparse\nimport logging\nfrom pathlib import Path\n\nimport pandas as pd\nfrom tqdm import tqdm\n\n\ndef arg_parser():\n    description = (\n        \"Chunk large monthly AEMO data table CSVs into parquet partitions. \"\n        + \"Assumes that the table header is in the 2nd row\"\n    )\n    parser = argparse.ArgumentParser(description=description)\n    parser.add_argument(\n        \"-file\", type=str, required=True, help=(\"File to process. Must be CSV\")\n    )\n    parser.add_argument(\n        \"-output_dir\",\n        type=str,\n        required=True,\n        help=(\n            \"Directory to write parquet chunks to. Will be created if it does not exist\"\n        ),\n    )\n    parser.add_argument(\n        \"-chunksize\",\n        type=int,\n        default=10**6,\n        help=(\"Size of each DataFrame chunk (# of lines). Default 10^6\"),\n    )\n    args = parser.parse_args()\n    return args\n\n\ndef get_columns(file_path: Path) -&gt; pd.Index:\n    col_df = pd.read_csv(file_path, header=1, nrows=0)\n    return col_df.columns\n\n\ndef estimate_size_of_lines(file_path: Path, columns=pd.Index) -&gt; float:\n    sample_size = 1000\n    sample = pd.read_csv(file_path, skiprows=2, nrows=sample_size, header=None)\n    sample.columns = columns\n    total_size = sample.memory_usage().sum()\n    size_per_line = total_size / len(sample)\n    return size_per_line\n\n\ndef chunk_file(file_path: Path, output_dir: Path, chunksize: int) -&gt; None:\n    if not file_path.suffix.lower() == \".csv\":\n        logging.error(\"File is not a CSV\")\n        exit()\n    cols = get_columns(file_path)\n    size_per_line = estimate_size_of_lines(file_path, cols)\n    file_size = file_path.stat().st_size\n    file_stem = file_path.stem\n    with pd.read_csv(file_path, chunksize=chunksize, skiprows=2, header=None) as reader:\n        with tqdm(total=file_size, desc=\"Progress estimate based on file size\") as pbar:\n            for i, chunk in enumerate(reader):\n                chunk.columns = cols\n                out_file = Path(file_stem + f\"_chunk{i}.parquet\")\n                chunk.to_parquet(output_dir / out_file)\n                # See here for comparison of pandas DataFrame size vs CSV size:\n                # https://stackoverflow.com/questions/18089667/how-to-estimate-how-much-memory-a-pandas-dataframe-will-need#32970117\n                pbar.update((size_per_line * chunksize) / 2)\n\n\ndef main():\n    logging.basicConfig(format=\"\\n%(levelname)s:%(message)s\", level=logging.INFO)\n    args = arg_parser()\n    f = Path(args.file)\n    output_dir = Path(args.output_dir)\n    if not output_dir.exists():\n        output_dir.mkdir(parents=True)\n    elif len(sorted(output_dir.glob(f.stem + \"*.parquet\"))) &gt; 1:\n        logging.error(\"Pre-existing chunks of this file in output directory. Exiting.\")\n        exit()\n    if not f.exists():\n        logging.error(\"Path does not exist\")\n        exit()\n    if not f.is_file():\n        logging.error(\"Path provided does not point to a file\")\n        exit()\n    chunk_file(f, output_dir, args.chunksize)\n\n\nif __name__ == \"__main__\":\n    main()"
+  },
+  {
+    "objectID": "aemo_data.html#dividing-large-aemo-data-csvs-into-parquet-partitions",
+    "href": "aemo_data.html#dividing-large-aemo-data-csvs-into-parquet-partitions",
+    "title": "AEMO Data Snippets",
+    "section": "",
+    "text": "This script can be run via the command line to divide a large AEMO data CSV (e.g. from the Monthly Data Archive, such as rebids in BIDPEROFFER) into Parquet partitions. This is advantageous for using packages such as Dask to analyse such data.\nIt assumes that the first row of the table is the header (i.e. columns) for a single data table.\n\n\nWritten using Python 3.11. Uses pathlib and type annotations, so probably need at least Python &gt; 3.5.\n# Python script (executable via CLI) to create parquet partitions\n# for large AEMO data CSVs. Assumes first line is table header and that only one table\n# type is in the file\n#\n# Copyright (C) 2023 Abhijith Prakash\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program.  If not, see &lt;https://www.gnu.org/licenses/&gt;.\n\nimport argparse\nimport logging\nfrom pathlib import Path\n\nimport pandas as pd\nfrom tqdm import tqdm\n\n\ndef arg_parser():\n    description = (\n        \"Chunk large monthly AEMO data table CSVs into parquet partitions. \"\n        + \"Assumes that the table header is in the 2nd row\"\n    )\n    parser = argparse.ArgumentParser(description=description)\n    parser.add_argument(\n        \"-file\", type=str, required=True, help=(\"File to process. Must be CSV\")\n    )\n    parser.add_argument(\n        \"-output_dir\",\n        type=str,\n        required=True,\n        help=(\n            \"Directory to write parquet chunks to. Will be created if it does not exist\"\n        ),\n    )\n    parser.add_argument(\n        \"-chunksize\",\n        type=int,\n        default=10**6,\n        help=(\"Size of each DataFrame chunk (# of lines). Default 10^6\"),\n    )\n    args = parser.parse_args()\n    return args\n\n\ndef get_columns(file_path: Path) -&gt; pd.Index:\n    col_df = pd.read_csv(file_path, header=1, nrows=0)\n    return col_df.columns\n\n\ndef estimate_size_of_lines(file_path: Path, columns=pd.Index) -&gt; float:\n    sample_size = 1000\n    sample = pd.read_csv(file_path, skiprows=2, nrows=sample_size, header=None)\n    sample.columns = columns\n    total_size = sample.memory_usage().sum()\n    size_per_line = total_size / len(sample)\n    return size_per_line\n\n\ndef chunk_file(file_path: Path, output_dir: Path, chunksize: int) -&gt; None:\n    if not file_path.suffix.lower() == \".csv\":\n        logging.error(\"File is not a CSV\")\n        exit()\n    cols = get_columns(file_path)\n    size_per_line = estimate_size_of_lines(file_path, cols)\n    file_size = file_path.stat().st_size\n    file_stem = file_path.stem\n    with pd.read_csv(file_path, chunksize=chunksize, skiprows=2, header=None) as reader:\n        with tqdm(total=file_size, desc=\"Progress estimate based on file size\") as pbar:\n            for i, chunk in enumerate(reader):\n                chunk.columns = cols\n                out_file = Path(file_stem + f\"_chunk{i}.parquet\")\n                chunk.to_parquet(output_dir / out_file)\n                # See here for comparison of pandas DataFrame size vs CSV size:\n                # https://stackoverflow.com/questions/18089667/how-to-estimate-how-much-memory-a-pandas-dataframe-will-need#32970117\n                pbar.update((size_per_line * chunksize) / 2)\n\n\ndef main():\n    logging.basicConfig(format=\"\\n%(levelname)s:%(message)s\", level=logging.INFO)\n    args = arg_parser()\n    f = Path(args.file)\n    output_dir = Path(args.output_dir)\n    if not output_dir.exists():\n        output_dir.mkdir(parents=True)\n    elif len(sorted(output_dir.glob(f.stem + \"*.parquet\"))) &gt; 1:\n        logging.error(\"Pre-existing chunks of this file in output directory. Exiting.\")\n        exit()\n    if not f.exists():\n        logging.error(\"Path does not exist\")\n        exit()\n    if not f.is_file():\n        logging.error(\"Path provided does not point to a file\")\n        exit()\n    chunk_file(f, output_dir, args.chunksize)\n\n\nif __name__ == \"__main__\":\n    main()"
+  },
   {
     "objectID": "index.html",
     "href": "index.html",
diff --git a/sitemap.xml b/sitemap.xml
index e16638d..989f1c6 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,14 +2,14 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://github.com/UNSW-CEEM/CEEM-Gists/aemo_data.html</loc>
-    <lastmod>2023-07-08T03:37:05.380Z</lastmod>
+    <lastmod>2023-07-08T03:59:53.769Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/UNSW-CEEM/CEEM-Gists/index.html</loc>
-    <lastmod>2023-07-08T03:37:02.888Z</lastmod>
+    <lastmod>2023-07-08T03:59:52.293Z</lastmod>
   </url>
   <url>
     <loc>https://github.com/UNSW-CEEM/CEEM-Gists/contributing.html</loc>
-    <lastmod>2023-07-08T03:37:05.040Z</lastmod>
+    <lastmod>2023-07-08T03:59:53.357Z</lastmod>
   </url>
 </urlset>