diff --git a/_config.yml b/_config.yml
index b194a40..e4e3a28 100644
--- a/_config.yml
+++ b/_config.yml
@@ -60,6 +60,11 @@ ghub:
   edit: true
   repository: dsa
 
+
+analytics:
+  google: 
+    tracking_id: UA-184894912-1
+
 future: true
 
 # Build settings
diff --git a/_dsa/_config.yml b/_dsa/_config.yml
deleted file mode 100755
index 529d800..0000000
--- a/_dsa/_config.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-author:
-- given: Neil D.
-  family: Lawrence
-  institution: University of Cambridge
-  gscholar: r3SJcvoAAAAJ
-  twitter: lawrennd
-  orcid: 0000-0001-9258-1030
-  url: http://inverseprobability.com
-layout: lecture
-venue: Virtual (Zoom)
-ipynb: True
-talkcss: https://inverseprobability.com/assets/css/talks.css
-postsdir: ../../../mlatcl/dsa/_lectures/
-slidesdir: ../../../mlatcl/dsa/slides/
-notesdir: ../../../mlatcl/dsa/_notes/
-notebooksdir: ../../../mlatcl/dsa/_notebooks/
-writediagramsdir: .
-diagramsdir: ./slides/diagrams/
-baseurl: "dsa/" # the subpath of your site, e.g. /blog/
-url: "https://mlatcl.github.io/" # the base hostname & protocol for your site
-transition: None
-ghub:
-- organization: lawrennd
-  repository: talks
-  branch: gh-pages
-  directory: _dsa
-  
diff --git a/_lamd/_lamd.yml b/_lamd/_lamd.yml
new file mode 100755
index 0000000..bda6e6f
--- /dev/null
+++ b/_lamd/_lamd.yml
@@ -0,0 +1,43 @@
+author:
+- family: Lawrence
+  given: Neil D.
+  gscholar: r3SJcvoAAAAJ
+  institute: University of Cambridge
+  twitter: lawrennd
+  orcid: 0000-0001-9258-1030
+  url: http://inverseprobability.com
+layout: lecture
+room:
+venue: Virtual (Zoom)
+talkcss: https://inverseprobability.com/assets/css/talks.css
+bibdir: $HOME/lawrennd/bibliography
+postsdir: ../_lectures/
+slidesdir: ../slides/
+notesdir: ../_notes/
+notebooksdir: ../_notebooks/
+practicalsdir: ../_practicals/
+snippetsdir: $HOME/lawrennd/snippets
+writediagramsdir: .
+scriptsdir: ./scripts/
+diagramsdir: ./slides/diagrams/
+posts: True
+docx: False
+notespdf: False
+slidesipynb: False
+pptx: True
+pdf: False
+assignment: False
+ipynb: True
+reveal: True
+transition: None
+potx: custom-reference.potx 
+dotx: custom-reference.dotx
+baseurl: "dsa/" # the subpath of your site, e.g. /blog/
+url: "https://mlatcl.github.io/" # the base hostname & protocol for your site
+transition: None
+ghub:
+- organization: mlatcl
+  repository: dsa
+  branch: gh-pages
+  directory: _lamd
+  
diff --git a/_dsa/bayesian-methods-abuja.md b/_lamd/bayesian-methods-abuja.md
similarity index 95%
rename from _dsa/bayesian-methods-abuja.md
rename to _lamd/bayesian-methods-abuja.md
index 1e4cbe3..6ccba1f 100755
--- a/_dsa/bayesian-methods-abuja.md
+++ b/_lamd/bayesian-methods-abuja.md
@@ -28,10 +28,8 @@ venue: DSA, Abuja
 transition: None
 ---
 
-\include{talk-macros.tex}
-
 \include{_ml/includes/what-is-ml.md}
-\include{_ml/includes/nigeria-nmis-data.md}
+\include{_datasets/includes/nigeria-nmis-data.md}
 \include{_ml/includes/probability-intro.md}
 \include{_ml/includes/probabilistic-modelling.md}
 
diff --git a/_lamd/compile.sh b/_lamd/compile.sh
new file mode 100755
index 0000000..bc67bab
--- /dev/null
+++ b/_lamd/compile.sh
@@ -0,0 +1,11 @@
+#/bin/bash
+
+FILES=""
+SKIP=true
+while read stub; do
+    if $SKIP; then
+	SKIP=false
+    else
+	maketalk $stub
+    fi
+done < lectures.csv
diff --git a/_dsa/gaussian-processes.md b/_lamd/gaussian-processes.md
similarity index 98%
rename from _dsa/gaussian-processes.md
rename to _lamd/gaussian-processes.md
index 76c7338..ac9a8f7 100755
--- a/_dsa/gaussian-processes.md
+++ b/_lamd/gaussian-processes.md
@@ -11,7 +11,6 @@ time: "15:00 (West Africa Standard Time)"
 transition: None
 ---
 
-\include{talk-macros.tex}
 \include{_mlai/includes/mlai-notebook-setup.md}
 
 \include{_gp/includes/gp-book.md}
@@ -78,7 +77,7 @@ $$
 \include{_gp/includes/gp-optimize.md}
 \include{_kern/includes/eq-covariance.md}
 \include{_gp/includes/gp-summer-school.md}
-\include{_gp/includes/gpy-software.md}
+\include{_software/includes/gpy-software.md}
 \include{_gp/includes/gpy-tutorial.md}
 
 \subsection{Review}
diff --git a/_lamd/lectures.csv b/_lamd/lectures.csv
new file mode 100644
index 0000000..6c6fa3c
--- /dev/null
+++ b/_lamd/lectures.csv
@@ -0,0 +1,6 @@
+lectureId
+what-is-machine-learning
+probabilistic-machine-learning
+ml-systems
+bayesian-methods-abuja
+gaussian-processes
diff --git a/_dsa/ml-systems-kimberley.md b/_lamd/ml-systems-kimberley.md
similarity index 100%
rename from _dsa/ml-systems-kimberley.md
rename to _lamd/ml-systems-kimberley.md
diff --git a/_dsa/ml-systems.md b/_lamd/ml-systems.md
similarity index 98%
rename from _dsa/ml-systems.md
rename to _lamd/ml-systems.md
index 83d09cd..3c443f8 100755
--- a/_dsa/ml-systems.md
+++ b/_lamd/ml-systems.md
@@ -20,8 +20,6 @@ venue: Virtual DSA
 transition: None
 ---
 
-\include{talk-macros.tex}
-
 \slides{\section{AI via ML Systems}
 
 \include{_ai/includes/supply-chain-system.md}
diff --git a/_dsa/probabilistic-machine-learning.md b/_lamd/probabilistic-machine-learning.md
similarity index 98%
rename from _dsa/probabilistic-machine-learning.md
rename to _lamd/probabilistic-machine-learning.md
index 8c80d10..50b51d7 100755
--- a/_dsa/probabilistic-machine-learning.md
+++ b/_lamd/probabilistic-machine-learning.md
@@ -30,8 +30,6 @@ https://www.kaggle.com/alaowerre/nigeria-nmis-health-facility-data
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
-\include{talk-macros.tex}
-
 \include{_ml/includes/what-is-ml.md}
 \include{_ml/includes/probability-intro.md}
 \include{_ml/includes/probabilistic-modelling.md}
diff --git a/_dsa/what-is-machine-learning-ashesi.md b/_lamd/what-is-machine-learning-ashesi.md
similarity index 100%
rename from _dsa/what-is-machine-learning-ashesi.md
rename to _lamd/what-is-machine-learning-ashesi.md
diff --git a/_dsa/what-is-machine-learning.md b/_lamd/what-is-machine-learning.md
similarity index 97%
rename from _dsa/what-is-machine-learning.md
rename to _lamd/what-is-machine-learning.md
index 9d5eb49..e3a5175 100755
--- a/_dsa/what-is-machine-learning.md
+++ b/_lamd/what-is-machine-learning.md
@@ -20,8 +20,6 @@ papersize: a4paper
 transition: None
 ---
 
-\include{../talk-macros.gpp}
-
 \section{Introduction}
 
 \include{_data-science/includes/data-science-africa.md}
@@ -54,7 +52,7 @@ $$
 \notes{Stephen Kiprotich, the 2012 gold medal winner from the London Olympics, comes from Kapchorwa district, in eastern Uganda, near the border with Kenya.}
 
 \include{_ml/includes/olympic-marathon-polynomial.md}
-\include{../_ml/includes/what-does-machine-learning-do.md}
+\include{_ml/includes/what-does-machine-learning-do.md}
 
 \include{_ml/includes/what-is-ml-2.md}
 \include{_ai/includes/ai-vs-data-science-2.md}
diff --git a/_lectures/01-what-is-machine-learning.html b/_lectures/01-what-is-machine-learning.html
index 912fd2d..dee2826 100644
--- a/_lectures/01-what-is-machine-learning.html
+++ b/_lectures/01-what-is-machine-learning.html
@@ -1,7 +1,12 @@
 ---
 title: "What is Machine Learning?"
 venue: "Data Science Africa Summer School, Addis Ababa, Ethiopia"
-abstract: "<p>In this talk we will introduce the fundamental ideas in machine learning. We’ll develop our exposition around the ideas of prediction function and the objective function. We don’t so much focus on the derivation of particular algorithms, but more the general principles involved to give an idea of the machine learning <em>landscape</em>.</p>"
+abstract: "<p>In this talk we will introduce the fundamental ideas in
+machine learning. We’ll develop our exposition around the ideas of
+prediction function and the objective function. We don’t so much focus
+on the derivation of particular algorithms, but more the general
+principles involved to give an idea of the machine learning
+<em>landscape</em>.</p>"
 author:
 - given: Neil D.
   family: Lawrence
@@ -9,13 +14,15 @@
   institute: Amazon Cambridge and University of Sheffield
   twitter: lawrennd
   gscholar: r3SJcvoAAAAJ
-  orchid: null
+  orcid: 
+edit_url: https://github.com/mlatcl/dsa/edit/gh-pages/_lamd/what-is-machine-learning.md
 date: 2019-06-03
 published: 2019-06-03
-week: 0
 session: 1
 reveal: 01-what-is-machine-learning.slides.html
+transition: None
 ipynb: 01-what-is-machine-learning.ipynb
+pptx: 01-what-is-machine-learning.pptx
 layout: lecture
 categories:
 - notes
@@ -33,41 +40,93 @@
 -->
 <h1 id="introduction">Introduction</h1>
 <h2 id="data-science-africa">Data Science Africa</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_data-science/includes/data-science-africa.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_data-science/includes/data-science-africa.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_data-science/includes/data-science-africa.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_data-science/includes/data-science-africa.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="data-science-africa-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="data-science-africa-magnify" class="magnify" onclick="magnifyFigure(&#39;data-science-africa&#39;)">
+<div id="data-science-africa-magnify" class="magnify"
+onclick="magnifyFigure(&#39;data-science-africa&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="data-science-africa-caption" class="caption-frame">
-<p>Figure: Data Science Africa <a href="http://datascienceafrica.org" class="uri">http://datascienceafrica.org</a> is a ground up initiative for capacity building around data science, machine learning and artificial intelligence on the African continent.</p>
+<p>Figure: Data Science Africa <a href="http://datascienceafrica.org"
+class="uri">http://datascienceafrica.org</a> is a ground up initiative
+for capacity building around data science, machine learning and
+artificial intelligence on the African continent.</p>
 </div>
 </div>
-<p>Data Science Africa is a bottom up initiative for capacity building in data science, machine learning and artificial intelligence on the African continent.</p>
-<p>As of 2019 there have been five workshops and five schools, located in Nyeri, Kenya (twice); Kampala, Uganda; Arusha, Tanzania; Abuja, Nigeria; Addis Ababa, Ethiopia and Accra, Ghana. The next event is scheduled for June 2020 in Kampala, Uganda.</p>
-<p>The main notion is <em>end-to-end</em> data science. For example, going from data collection in the farmer’s field to decision making in the Ministry of Agriculture. Or going from malaria disease counts in health centers to medicine distribution.</p>
-<p>The philosophy is laid out in <span class="citation" data-cites="Lawrence:dsa15">(Lawrence 2015)</span>. The key idea is that the modern <em>information infrastructure</em> presents new solutions to old problems. Modes of development change because less capital investment is required to take advantage of this infrastructure. The philosophy is that local capacity building is the right way to leverage these challenges in addressing data science problems in the African context.</p>
-<p>Data Science Africa is now a non-govermental organization registered in Kenya. The organising board of the meeting is entirely made up of scientists and academics based on the African continent.</p>
+<div class="figure">
+<div id="dsa-events-october-2021-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//dsa/dsa-events-october-2021.svg" width="60%" style=" ">
+</object>
+</div>
+<div id="dsa-events-october-2021-magnify" class="magnify"
+onclick="magnifyFigure(&#39;dsa-events-october-2021&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="dsa-events-october-2021-caption" class="caption-frame">
+<p>Figure: Data Science Africa meetings held up to October 2021.</p>
+</div>
+</div>
+<p>Data Science Africa is a bottom up initiative for capacity building
+in data science, machine learning and artificial intelligence on the
+African continent.</p>
+<p>As of May 2023 there have been eleven workshops and schools, located
+in seven different countries: Nyeri, Kenya (twice); Kampala, Uganda;
+Arusha, Tanzania; Abuja, Nigeria; Addis Ababa, Ethiopia; Accra, Ghana;
+Kampala, Uganda and Kimberley, South Africa (virtual), and in Kigali,
+Rwanda.</p>
+<p>The main notion is <em>end-to-end</em> data science. For example,
+going from data collection in the farmer’s field to decision making in
+the Ministry of Agriculture. Or going from malaria disease counts in
+health centers to medicine distribution.</p>
+<p>The philosophy is laid out in <span class="citation"
+data-cites="Lawrence:dsa15">(Lawrence, 2015)</span>. The key idea is
+that the modern <em>information infrastructure</em> presents new
+solutions to old problems. Modes of development change because less
+capital investment is required to take advantage of this infrastructure.
+The philosophy is that local capacity building is the right way to
+leverage these challenges in addressing data science problems in the
+African context.</p>
+<p>Data Science Africa is now a non-govermental organization registered
+in Kenya. The organising board of the meeting is entirely made up of
+scientists and academics based on the African continent.</p>
 <div class="figure">
 <div id="africa-benefit-data-revolution-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="africa-benefit-data-revolution-magnify" class="magnify" onclick="magnifyFigure(&#39;africa-benefit-data-revolution&#39;)">
+<div id="africa-benefit-data-revolution-magnify" class="magnify"
+onclick="magnifyFigure(&#39;africa-benefit-data-revolution&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="africa-benefit-data-revolution-caption" class="caption-frame">
-<p>Figure: The lack of existing physical infrastructure on the African continent makes it a particularly interesting environment for deploying solutions based on the <em>information infrastructure</em>. The idea is explored more in this Guardian op-ed on Guardian article on <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How African can benefit from the data revolution</a>.</p>
-</div>
+<p>Figure: The lack of existing physical infrastructure on the African
+continent makes it a particularly interesting environment for deploying
+solutions based on the <em>information infrastructure</em>. The idea is
+explored more in this Guardian op-ed on Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How
+African can benefit from the data revolution</a>.</p>
+</div>
+</div>
+<p>Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">Data
+Science Africa</a></p>
+<h2 id="example-prediction-of-malaria-incidence-in-uganda">Example:
+Prediction of Malaria Incidence in Uganda</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
 </div>
-<p>Guardian article on <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">Data Science Africa</a></p>
-<h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction of Malaria Incidence in Uganda</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_health/includes/malaria-gp.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_health/includes/malaria-gp.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip0">
@@ -80,7 +139,7 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <title>
 Martin Mubangizi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/martin-mubangizi.png" clip-path="url(#clip0)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png" clip-path="url(#clip0)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip1">
@@ -91,9 +150,9 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
-Ricardo Andrade Pacheco
+Ricardo Andrade Pacecho
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip2">
@@ -106,61 +165,113 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <title>
 John Quinn
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/john-quinn.jpg" clip-path="url(#clip2)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg" clip-path="url(#clip2)"/>
 </svg>
 </div>
-<p>As an example of using Gaussian process models within the full pipeline from data to decsion, we’ll consider the prediction of Malaria incidence in Uganda. For the purposes of this study malaria reports come in two forms, HMIS reports from health centres and Sentinel data, which is curated by the WHO. There are limited sentinel sites and many HMIS sites.</p>
-<p>The work is from Ricardo Andrade Pacheco’s PhD thesis, completed in collaboration with John Quinn and Martin Mubangizi <span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al. 2014; Mubangizi et al. 2014)</span>. John and Martin were initally from the AI-DEV group from the University of Makerere in Kampala and more latterly they were based at UN Global Pulse in Kampala.</p>
-<p>Malaria data is spatial data. Uganda is split into districts, and health reports can be found for each district. This suggests that models such as conditional random fields could be used for spatial modelling, but there are two complexities with this. First of all, occasionally districts split into two. Secondly, sentinel sites are a specific location within a district, such as Nagongera which is a sentinel site based in the Tororo district.</p>
+<p>As an example of using Gaussian process models within the full
+pipeline from data to decsion, we’ll consider the prediction of Malaria
+incidence in Uganda. For the purposes of this study malaria reports come
+in two forms, HMIS reports from health centres and Sentinel data, which
+is curated by the WHO. There are limited sentinel sites and many HMIS
+sites.</p>
+<p>The work is from Ricardo Andrade Pacheco’s PhD thesis, completed in
+collaboration with John Quinn and Martin Mubangizi <span
+class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>. John and Martin were
+initally from the AI-DEV group from the University of Makerere in
+Kampala and more latterly they were based at UN Global Pulse in Kampala.
+You can see the work summarized on the UN Global Pulse <a
+href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">disease
+outbreaks project site here</a>.</p>
+<ul>
+<li>See <a href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">UN
+Global Pulse Disease Outbreaks Site</a></li>
+</ul>
+<p>Malaria data is spatial data. Uganda is split into districts, and
+health reports can be found for each district. This suggests that models
+such as conditional random fields could be used for spatial modelling,
+but there are two complexities with this. First of all, occasionally
+districts split into two. Secondly, sentinel sites are a specific
+location within a district, such as Nagongera which is a sentinel site
+based in the Tororo district.</p>
 <div class="figure">
 <div id="uganda-districts-2006-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="uganda-districts-2006-magnify" class="magnify" onclick="magnifyFigure(&#39;uganda-districts-2006&#39;)">
+<div id="uganda-districts-2006-magnify" class="magnify"
+onclick="magnifyFigure(&#39;uganda-districts-2006&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="uganda-districts-2006-caption" class="caption-frame">
-<p>Figure: Ugandan districs. Data SRTM/NASA from <a href="https://dds.cr.usgs.gov/srtm/version2_1" class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.</p>
+<p>Figure: Ugandan districts. Data SRTM/NASA from <a
+href="https://dds.cr.usgs.gov/srtm/version2_1"
+class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.</p>
 </div>
 </div>
-<p><span style="text-align:right"><span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al. 2014; Mubangizi et al. 2014)</span></span></p>
-<p>The common standard for collecting health data on the African continent is from the Health management information systems (HMIS). However, this data suffers from missing values <span class="citation" data-cites="Gething:hmis06">(Gething et al. 2006)</span> and diagnosis of diseases like typhoid and malaria may be confounded.</p>
+<div style="text-align:right">
+<span class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>
+</div>
+<p>The common standard for collecting health data on the African
+continent is from the Health management information systems (HMIS).
+However, this data suffers from missing values <span class="citation"
+data-cites="Gething:hmis06">(Gething et al., 2006)</span> and diagnosis
+of diseases like typhoid and malaria may be confounded.</p>
 <div class="figure">
 <div id="tororo-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="tororo-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;tororo-district-in-uganda&#39;)">
+<div id="tororo-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;tororo-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="tororo-district-in-uganda-caption" class="caption-frame">
-<p>Figure: The Tororo district, where the sentinel site, Nagongera, is located.</p>
+<p>Figure: The Tororo district, where the sentinel site, Nagongera, is
+located.</p>
 </div>
 </div>
-<p><a href="https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/sentinel/en/">World Health Organization Sentinel Surveillance systems</a> are set up “when high-quality data are needed about a particular disease that cannot be obtained through a passive system”. Several sentinel sites give accurate assessment of malaria disease levels in Uganda, including a site in Nagongera.</p>
+<p><a
+href="https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/sentinel/en/">World
+Health Organization Sentinel Surveillance systems</a> are set up “when
+high-quality data are needed about a particular disease that cannot be
+obtained through a passive system”. Several sentinel sites give accurate
+assessment of malaria disease levels in Uganda, including a site in
+Nagongera.</p>
 <div class="figure">
 <div id="sentinel-nagongera-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="sentinel-nagongera-magnify" class="magnify" onclick="magnifyFigure(&#39;sentinel-nagongera&#39;)">
+<div id="sentinel-nagongera-magnify" class="magnify"
+onclick="magnifyFigure(&#39;sentinel-nagongera&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="sentinel-nagongera-caption" class="caption-frame">
-<p>Figure: Sentinel and HMIS data along with rainfall and temperature for the Nagongera sentinel station in the Tororo district.</p>
-</div>
-</div>
-<p>In collaboration with the AI Research Group at Makerere we chose to investigate whether Gaussian process models could be used to assimilate information from these two different sources of disease informaton. Further, we were interested in whether local information on rainfall and temperature could be used to improve malaria estimates.</p>
-<p>The aim of the project was to use WHO Sentinel sites, alongside rainfall and temperature, to improve predictions from HMIS data of levels of malaria.</p>
+<p>Figure: Sentinel and HMIS data along with rainfall and temperature
+for the Nagongera sentinel station in the Tororo district.</p>
+</div>
+</div>
+<p>In collaboration with the AI Research Group at Makerere we chose to
+investigate whether Gaussian process models could be used to assimilate
+information from these two different sources of disease informaton.
+Further, we were interested in whether local information on rainfall and
+temperature could be used to improve malaria estimates.</p>
+<p>The aim of the project was to use WHO Sentinel sites, alongside
+rainfall and temperature, to improve predictions from HMIS data of
+levels of malaria.</p>
 <div class="figure">
 <div id="mubende-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="mubende-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;mubende-district-in-uganda&#39;)">
+<div id="mubende-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;mubende-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="mubende-district-in-uganda-caption" class="caption-frame">
@@ -170,10 +281,11 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <div class="figure">
 <div id="malaria-prediction-mubende-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="malaria-prediction-mubende-magnify" class="magnify" onclick="magnifyFigure(&#39;malaria-prediction-mubende&#39;)">
+<div id="malaria-prediction-mubende-magnify" class="magnify"
+onclick="magnifyFigure(&#39;malaria-prediction-mubende&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="malaria-prediction-mubende-caption" class="caption-frame">
@@ -182,24 +294,27 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 </div>
 <div class="figure">
 <div id="-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 <div id="-magnify" class="magnify" onclick="magnifyFigure(&#39;&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="-caption" class="caption-frame">
-<p>Figure: The project arose out of the Gaussian process summer school held at Makerere in Kampala in 2013. The school led, in turn, to the Data Science Africa initiative.</p>
+<p>Figure: The project arose out of the Gaussian process summer school
+held at Makerere in Kampala in 2013. The school led, in turn, to the
+Data Science Africa initiative.</p>
 </div>
 </div>
 <h2 id="early-warning-systems">Early Warning Systems</h2>
 <div class="figure">
 <div id="kabarole-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="kabarole-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;kabarole-district-in-uganda&#39;)">
+<div id="kabarole-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kabarole-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kabarole-district-in-uganda-caption" class="caption-frame">
@@ -209,347 +324,940 @@ <h2 id="early-warning-systems">Early Warning Systems</h2>
 <div class="figure">
 <div id="kabarole-disease-over-time-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="kabarole-disease-over-time-magnify" class="magnify" onclick="magnifyFigure(&#39;kabarole-disease-over-time&#39;)">
+<div id="kabarole-disease-over-time-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kabarole-disease-over-time&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kabarole-disease-over-time-caption" class="caption-frame">
-<p>Figure: Estimate of the current disease situation in the Kabarole district over time. Estimate is constructed with a Gaussian process with an additive covariance funciton.</p>
-</div>
-</div>
-<p>Health monitoring system for the Kabarole district. Here we have fitted the reports with a Gaussian process with an additive covariance function. It has two components, one is a long time scale component (in red above) the other is a short time scale component (in blue).</p>
-<p>Monitoring proceeds by considering two aspects of the curve. Is the blue line (the short term report signal) above the red (which represents the long term trend? If so we have higher than expected reports. If this is the case <em>and</em> the gradient is still positive (i.e. reports are going up) we encode this with a <em>red</em> color. If it is the case and the gradient of the blue line is negative (i.e. reports are going down) we encode this with an <em>amber</em> color. Conversely, if the blue line is below the red <em>and</em> decreasing, we color <em>green</em>. On the other hand if it is below red but increasing, we color <em>yellow</em>.</p>
-<p>This gives us an early warning system for disease. Red is a bad situation getting worse, amber is bad, but improving. Green is good and getting better and yellow good but degrading.</p>
-<p>Finally, there is a gray region which represents when the scale of the effect is small.</p>
+<p>Figure: Estimate of the current disease situation in the Kabarole
+district over time. Estimate is constructed with a Gaussian process with
+an additive covariance funciton.</p>
+</div>
+</div>
+<p>Health monitoring system for the Kabarole district. Here we have
+fitted the reports with a Gaussian process with an additive covariance
+function. It has two components, one is a long time scale component (in
+red above) the other is a short time scale component (in blue).</p>
+<p>Monitoring proceeds by considering two aspects of the curve. Is the
+blue line (the short term report signal) above the red (which represents
+the long term trend? If so we have higher than expected reports. If this
+is the case <em>and</em> the gradient is still positive (i.e. reports
+are going up) we encode this with a <em>red</em> color. If it is the
+case and the gradient of the blue line is negative (i.e. reports are
+going down) we encode this with an <em>amber</em> color. Conversely, if
+the blue line is below the red <em>and</em> decreasing, we color
+<em>green</em>. On the other hand if it is below red but increasing, we
+color <em>yellow</em>.</p>
+<p>This gives us an early warning system for disease. Red is a bad
+situation getting worse, amber is bad, but improving. Green is good and
+getting better and yellow good but degrading.</p>
+<p>Finally, there is a gray region which represents when the scale of
+the effect is small.</p>
 <div class="figure">
 <div id="early-warning-system-map-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="early-warning-system-map-magnify" class="magnify" onclick="magnifyFigure(&#39;early-warning-system-map&#39;)">
+<div id="early-warning-system-map-magnify" class="magnify"
+onclick="magnifyFigure(&#39;early-warning-system-map&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="early-warning-system-map-caption" class="caption-frame">
-<p>Figure: The map of Ugandan districts with an overview of the Malaria situation in each district.</p>
+<p>Figure: The map of Ugandan districts with an overview of the Malaria
+situation in each district.</p>
 </div>
 </div>
-<p>These colors can now be observed directly on a spatial map of the districts to give an immediate impression of the current status of the disease across the country.</p>
+<p>These colors can now be observed directly on a spatial map of the
+districts to give an immediate impression of the current status of the
+disease across the country.</p>
 <h2 id="machine-learning">Machine Learning</h2>
-<p>This talk is a general introduction to machine learning, we will highlight the technical challenges and the current solutions. We will give an overview of what is machine learning and why it is important.</p>
+<p>This talk is a general introduction to machine learning, we will
+highlight the technical challenges and the current solutions. We will
+give an overview of what is machine learning and why it is
+important.</p>
 <h2 id="rise-of-machine-learning">Rise of Machine Learning</h2>
-<p>Machine learning is the combination of data and models, through computation, to make predictions. <br /><span class="math display">$$
-\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}
-$$</span><br /></p>
+<p>Machine learning is the combination of data and models, through
+computation, to make predictions. <span class="math display">\[
+\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow}
+\text{prediction}
+\]</span></p>
 <h2 id="data-revolution">Data Revolution</h2>
-Machine learning has risen in prominence due to the rise in data availability, and its interconnection with computers. The high bandwidth connection between data and computer leads to a new interaction between us and data via the computer. It is that channel that is being mediated by machine learning techniques.
+Machine learning has risen in prominence due to the rise in data
+availability, and its interconnection with computers. The high bandwidth
+connection between data and computer leads to a new interaction between
+us and data via the computer. It is that channel that is being mediated
+by machine learning techniques.
 <div class="figure">
 <div id="data-science-information-flow-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/data-science/new-flow-of-information.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//data-science/new-flow-of-information.svg" width="60%" style=" ">
 </object>
 </div>
-<div id="data-science-information-flow-magnify" class="magnify" onclick="magnifyFigure(&#39;data-science-information-flow&#39;)">
+<div id="data-science-information-flow-magnify" class="magnify"
+onclick="magnifyFigure(&#39;data-science-information-flow&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="data-science-information-flow-caption" class="caption-frame">
-<p>Figure: Large amounts of data and high interconnection bandwidth mean that we receive much of our information about the world around us through computers.</p>
+<p>Figure: Large amounts of data and high interconnection bandwidth mean
+that we receive much of our information about the world around us
+through computers.</p>
 </div>
 </div>
 <h2 id="supply-chain">Supply Chain</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/supply-chain.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/supply-chain.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="packhorse-bridge-burbage-brook-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/packhorse-bridge-burbage-brook.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/packhorse-bridge-burbage-brook.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="packhorse-bridge-burbage-brook-magnify" class="magnify" onclick="magnifyFigure(&#39;packhorse-bridge-burbage-brook&#39;)">
+<div id="packhorse-bridge-burbage-brook-magnify" class="magnify"
+onclick="magnifyFigure(&#39;packhorse-bridge-burbage-brook&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="packhorse-bridge-burbage-brook-caption" class="caption-frame">
-<p>Figure: Packhorse Bridge under Burbage Edge. This packhorse route climbs steeply out of Hathersage and heads towards Sheffield. Packhorses were the main route for transporting goods across the Peak District. The high cost of transport is one driver of the ‘smith’ model, where there is a local skilled person responsible for assembling or creating goods (e.g. a blacksmith).</p>
-</div>
-</div>
-<p>On Sunday mornings in Sheffield, I often used to run across Packhorse Bridge in Burbage valley. The bridge is part of an ancient network of trails crossing the Pennines that, before Turnpike roads arrived in the 18th century, was the main way in which goods were moved. Given that the moors around Sheffield were home to sand quarries, tin mines, lead mines and the villages in the Derwent valley were known for nail and pin manufacture, this wasn’t simply movement of agricultural goods, but it was the infrastructure for industrial transport.</p>
-<p>The profession of leading the horses was known as a Jagger and leading out of the village of Hathersage is Jagger’s Lane, a trail that headed underneath Stanage Edge and into Sheffield.</p>
-<p>The movement of goods from regions of supply to areas of demand is fundamental to our society. The physical infrastructure of supply chain has evolved a great deal over the last 300 years.</p>
+<p>Figure: Packhorse Bridge under Burbage Edge. This packhorse route
+climbs steeply out of Hathersage and heads towards Sheffield. Packhorses
+were the main route for transporting goods across the Peak District. The
+high cost of transport is one driver of the ‘smith’ model, where there
+is a local skilled person responsible for assembling or creating goods
+(e.g. a blacksmith).</p>
+</div>
+</div>
+<p>On Sunday mornings in Sheffield, I often used to run across Packhorse
+Bridge in Burbage valley. The bridge is part of an ancient network of
+trails crossing the Pennines that, before Turnpike roads arrived in the
+18th century, was the main way in which goods were moved. Given that the
+moors around Sheffield were home to sand quarries, tin mines, lead mines
+and the villages in the Derwent valley were known for nail and pin
+manufacture, this wasn’t simply movement of agricultural goods, but it
+was the infrastructure for industrial transport.</p>
+<p>The profession of leading the horses was known as a Jagger and
+leading out of the village of Hathersage is Jagger’s Lane, a trail that
+headed underneath Stanage Edge and into Sheffield.</p>
+<p>The movement of goods from regions of supply to areas of demand is
+fundamental to our society. The physical infrastructure of supply chain
+has evolved a great deal over the last 300 years.</p>
 <h2 id="cromford">Cromford</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/cromford.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/cromford.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/cromford.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/cromford.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="cromford-mill-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/cromford-mill.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/cromford-mill.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="cromford-mill-magnify" class="magnify" onclick="magnifyFigure(&#39;cromford-mill&#39;)">
+<div id="cromford-mill-magnify" class="magnify"
+onclick="magnifyFigure(&#39;cromford-mill&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="cromford-mill-caption" class="caption-frame">
-<p>Figure: Richard Arkwright is regarded of the founder of the modern factory system. Factories exploit distribution networks to centralize production of goods. Arkwright located his factory in Cromford due to proximity to Nottingham Weavers (his market) and availability of water power from the tributaries of the Derwent river. When he first arrived there was almost no transportation network. Over the following 200 years The Cromford Canal (1790s), a Turnpike (now the A6, 1816-18) and the High Peak Railway (now closed, 1820s) were all constructed to improve transportation access as the factory blossomed.</p>
-</div>
-</div>
-<p>Richard Arkwright is known as the father of the modern factory system. In 1771 he set up a <a href="https://en.wikipedia.org/wiki/Cromford_Mill">Mill</a> for spinning cotton yarn in the village of Cromford, in the Derwent Valley. The Derwent valley is relatively inaccessible. Raw cotton arrived in Liverpool from the US and India. It needed to be transported on packhorse across the bridleways of the Pennines. But Cromford was a good location due to proximity to Nottingham, where weavers where consuming the finished thread, and the availability of water power from small tributaries of the Derwent river for Arkwright’s <a href="https://en.wikipedia.org/wiki/Spinning_jenny">water frames</a> which automated the production of yarn from raw cotton.</p>
-<p>By 1794 the <a href="https://en.wikipedia.org/wiki/Cromford_Canal">Cromford Canal</a> was opened to bring coal in to Cromford and give better transport to Nottingham. The construction of the canals was driven by the need to improve the transport infrastructure, facilitating the movement of goods across the UK. Canals, roads and railways were initially constructed by the economic need for moving goods. To improve supply chain.</p>
-<p>The A6 now does pass through Cromford, but at the time he moved there there was merely a track. The High Peak Railway was opened in 1832, it is now converted to the High Peak Trail, but it remains the highest railway built in Britain.</p>
-<p><span class="citation" data-cites="Cooper:transformation91">Cooper (1991)</span></p>
+<p>Figure: Richard Arkwright is regarded of the founder of the modern
+factory system. Factories exploit distribution networks to centralize
+production of goods. Arkwright located his factory in Cromford due to
+proximity to Nottingham Weavers (his market) and availability of water
+power from the tributaries of the Derwent river. When he first arrived
+there was almost no transportation network. Over the following 200 years
+The Cromford Canal (1790s), a Turnpike (now the A6, 1816-18) and the
+High Peak Railway (now closed, 1820s) were all constructed to improve
+transportation access as the factory blossomed.</p>
+</div>
+</div>
+<p>Richard Arkwright is known as the father of the modern factory
+system. In 1771 he set up a <a
+href="https://en.wikipedia.org/wiki/Cromford_Mill">Mill</a> for spinning
+cotton yarn in the village of Cromford, in the Derwent Valley. The
+Derwent valley is relatively inaccessible. Raw cotton arrived in
+Liverpool from the US and India. It needed to be transported on
+packhorse across the bridleways of the Pennines. But Cromford was a good
+location due to proximity to Nottingham, where weavers where consuming
+the finished thread, and the availability of water power from small
+tributaries of the Derwent river for Arkwright’s <a
+href="https://en.wikipedia.org/wiki/Spinning_jenny">water frames</a>
+which automated the production of yarn from raw cotton.</p>
+<p>By 1794 the <a
+href="https://en.wikipedia.org/wiki/Cromford_Canal">Cromford Canal</a>
+was opened to bring coal in to Cromford and give better transport to
+Nottingham. The construction of the canals was driven by the need to
+improve the transport infrastructure, facilitating the movement of goods
+across the UK. Canals, roads and railways were initially constructed by
+the economic need for moving goods. To improve supply chain.</p>
+<p>The A6 now does pass through Cromford, but at the time he moved there
+there was merely a track. The High Peak Railway was opened in 1832, it
+is now converted to the High Peak Trail, but it remains the highest
+railway built in Britain.</p>
+<p><span class="citation" data-cites="Cooper:transformation91">Cooper
+(1991)</span></p>
 <h2 id="containerization">Containerization</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/containerisation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/containerisation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/containerisation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/containerisation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="container-2539942_1920-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/container-2539942_1920.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/container-2539942_1920.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="container-2539942_1920-magnify" class="magnify" onclick="magnifyFigure(&#39;container-2539942_1920&#39;)">
+<div id="container-2539942_1920-magnify" class="magnify"
+onclick="magnifyFigure(&#39;container-2539942_1920&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="container-2539942_1920-caption" class="caption-frame">
-<p>Figure: The container is one of the major drivers of globalization, and arguably the largest agent of social change in the last 100 years. It reduces the cost of transportation, significantly changing the appropriate topology of distribution networks. The container makes it possible to ship goods halfway around the world for cheaper than it costs to process those goods, leading to an extended distribution topology.</p>
-</div>
-</div>
-<p>Containerization has had a dramatic effect on global economics, placing many people in the developing world at the end of the supply chain.</p>
+<p>Figure: The container is one of the major drivers of globalization,
+and arguably the largest agent of social change in the last 100 years.
+It reduces the cost of transportation, significantly changing the
+appropriate topology of distribution networks. The container makes it
+possible to ship goods halfway around the world for cheaper than it
+costs to process those goods, leading to an extended distribution
+topology.</p>
+</div>
+</div>
+<p>Containerization has had a dramatic effect on global economics,
+placing many people in the developing world at the end of the supply
+chain.</p>
 <div class="figure">
 <div id="wild-alaskan-cod-figure" class="figure-frame">
 <table>
 <tr>
 <td width="45%">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/wild-alaskan-cod.jpg" width="90%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod.jpg" width="90%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="45%">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/wild-alaskan-cod-made-in-china.jpg" width="90%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod-made-in-china.jpg" width="90%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 </tr>
 </table>
 </div>
-<div id="wild-alaskan-cod-magnify" class="magnify" onclick="magnifyFigure(&#39;wild-alaskan-cod&#39;)">
+<div id="wild-alaskan-cod-magnify" class="magnify"
+onclick="magnifyFigure(&#39;wild-alaskan-cod&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="wild-alaskan-cod-caption" class="caption-frame">
-<p>Figure: Wild Alaskan Cod, being solid in the Pacific Northwest, that is a product of China. It is cheaper to ship the deep frozen fish thousands of kilometers for processing than to process locally.</p>
+<p>Figure: Wild Alaskan Cod, being solid in the Pacific Northwest, that
+is a product of China. It is cheaper to ship the deep frozen fish
+thousands of kilometers for processing than to process locally.</p>
+</div>
+</div>
+<p>For example, you can buy Wild Alaskan Cod fished from Alaska,
+processed in China, sold in North America. This is driven by the low
+cost of transport for frozen cod vs the higher relative cost of cod
+processing in the US versus China. Similarly,
+<a href="https://www.telegraph.co.uk/news/uknews/1534286/12000-mile-trip-to-have-seafood-shelled.html" target="_blank">Scottish
+prawns are also processed in China for sale in the UK.</a></p>
+<div class="figure">
+<div id="environmental-impact-of-food-by-life-cycle-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/environmental-impact-of-food-by-life-cycle.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
 </div>
+<div id="environmental-impact-of-food-by-life-cycle-magnify"
+class="magnify"
+onclick="magnifyFigure(&#39;environmental-impact-of-food-by-life-cycle&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<p>For example, you can buy Wild Alaskan Cod fished from Alaska, processed in China, sold in North America. This is driven by the low cost of transport for frozen cod vs the higher relative cost of cod processing in the US versus China. Similarly, <a href="https://www.telegraph.co.uk/news/uknews/1534286/12000-mile-trip-to-have-seafood-shelled.html" target="_blank" >Scottish prawns are also processed in China for sale in the UK.</a></p>
-<p>This effect on cost of transport vs cost of processing is the main driver of the topology of the modern supply chain and the associated effect of globalization. If transport is much cheaper than processing, then processing will tend to agglomerate in places where processing costs can be minimized.</p>
-<p>Large scale global economic change has principally been driven by changes in the technology that drives supply chain.</p>
-<p>Supply chain is a large-scale automated decision making network. Our aim is to make decisions not only based on our models of customer behavior (as observed through data), but also by accounting for the structure of our fulfilment center, and delivery network.</p>
-<p>Many of the most important questions in supply chain take the form of counterfactuals. E.g. “What would happen if we opened a manufacturing facility in Cambridge?” A counter factual is a question that implies a mechanistic understanding of a system. It goes beyond simple smoothness assumptions or translation invariants. It requires a physical, or <em>mechanistic</em> understanding of the supply chain network. For this reason, the type of models we deploy in supply chain often involve simulations or more mechanistic understanding of the network.</p>
-<p>In supply chain Machine Learning alone is not enough, we need to bridge between models that contain real mechanisms and models that are entirely data driven.</p>
-<p>This is challenging, because as we introduce more mechanism to the models we use, it becomes harder to develop efficient algorithms to match those models to data.</p>
+<div id="environmental-impact-of-food-by-life-cycle-caption"
+class="caption-frame">
+<p>Figure: The transport cost of most foods is a very small portion of
+the total cost. The exception is if foods are air freighted. Source: <a
+href="https://ourworldindata.org/food-choice-vs-eating-local"
+class="uri">https://ourworldindata.org/food-choice-vs-eating-local</a>
+by Hannah Ritche CC-BY</p>
+</div>
+</div>
+<p>This effect on cost of transport vs cost of processing is the main
+driver of the topology of the modern supply chain and the associated
+effect of globalization. If transport is much cheaper than processing,
+then processing will tend to agglomerate in places where processing
+costs can be minimized.</p>
+<p>Large scale global economic change has principally been driven by
+changes in the technology that drives supply chain.</p>
+<p>Supply chain is a large-scale automated decision making network. Our
+aim is to make decisions not only based on our models of customer
+behavior (as observed through data), but also by accounting for the
+structure of our fulfilment center, and delivery network.</p>
+<p>Many of the most important questions in supply chain take the form of
+counterfactuals. E.g. “What would happen if we opened a manufacturing
+facility in Cambridge?” A counter factual is a question that implies a
+mechanistic understanding of a system. It goes beyond simple smoothness
+assumptions or translation invariants. It requires a physical, or
+<em>mechanistic</em> understanding of the supply chain network. For this
+reason, the type of models we deploy in supply chain often involve
+simulations or more mechanistic understanding of the network.</p>
+<p>In supply chain Machine Learning alone is not enough, we need to
+bridge between models that contain real mechanisms and models that are
+entirely data driven.</p>
+<p>This is challenging, because as we introduce more mechanism to the
+models we use, it becomes harder to develop efficient algorithms to
+match those models to data.</p>
 <h2 id="for-africa">For Africa</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/supply-chain-africa.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_supply-chain/includes/supply-chain-africa.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>There is a large opportunity because infrastructures around automation are moving from physical infrastructure towards information infrastructures. How can African countries benefit from a modern information infrastructure? The aim of Data Science Africa is to answer this question, with the answers coming from the attendees.</p>
-<p>Machine learning aims to replicate processes through the direct use of data. When deployed in the domain of ‘artificial intelligence’, the processes that it is replicating, or <em>emulating</em>, are cognitive processes.</p>
-<p>The first trick in machine learning is to convert the process itself into a <em>mathematical function</em>. That function has a set of parameters which control its behaviour. What we call learning is the adaption of these parameters to change the behavior of the function. The choice of mathematical function we use is a vital component of the model.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain-africa.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain-africa.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>There is a large opportunity because infrastructures around
+automation are moving from physical infrastructure towards information
+infrastructures. How can African countries benefit from a modern
+information infrastructure? The aim of Data Science Africa is to answer
+this question, with the answers coming from the attendees.</p>
+<p>Machine learning aims to replicate processes through the direct use
+of data. When deployed in the domain of ‘artificial intelligence’, the
+processes that it is replicating, or <em>emulating</em>, are cognitive
+processes.</p>
+<p>The first trick in machine learning is to convert the process itself
+into a <em>mathematical function</em>. That function has a set of
+parameters which control its behaviour. What we call learning is the
+adaption of these parameters to change the behavior of the function. The
+choice of mathematical function we use is a vital component of the
+model.</p>
 <div class="figure">
 <div id="kapchorwa-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kapchorwa_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kapchorwa_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="kapchorwa-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;kapchorwa-district-in-uganda&#39;)">
+<div id="kapchorwa-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kapchorwa-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kapchorwa-district-in-uganda-caption" class="caption-frame">
-<p>Figure: The Kapchorwa District, home district of Stephen Kiprotich.</p>
+<p>Figure: The Kapchorwa District, home district of Stephen
+Kiprotich.</p>
 </div>
 </div>
-<p>Stephen Kiprotich, the 2012 gold medal winner from the London Olympics, comes from Kapchorwa district, in eastern Uganda, near the border with Kenya.</p>
+<p>Stephen Kiprotich, the 2012 gold medal winner from the London
+Olympics, comes from Kapchorwa district, in eastern Uganda, near the
+border with Kenya.</p>
 <h2 id="olympic-marathon-data">Olympic Marathon Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <table>
 <tr>
 <td width="70%">
 <ul>
 <li>Gold medal times for Olympic Marathon since 1896.</li>
-<li>Marathons before 1924 didn’t have a standardised distance.</li>
+<li>Marathons before 1924 didn’t have a standardized distance.</li>
 <li>Present results using pace per km.</li>
-<li>In 1904 Marathon was badly organised leading to very slow times.</li>
+<li>In 1904 Marathon was badly organized leading to very slow
+times.</li>
 </ul>
 </td>
 <td width="30%">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/Stephen_Kiprotich.jpg" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//Stephen_Kiprotich.jpg" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
-<small>Image from Wikimedia Commons <a href="http://bit.ly/16kMKHQ" class="uri">http://bit.ly/16kMKHQ</a></small>
+<small>Image from Wikimedia Commons <a href="http://bit.ly/16kMKHQ"
+class="uri">http://bit.ly/16kMKHQ</a></small>
 </td>
 </tr>
 </table>
-<p>The first thing we will do is load a standard data set for regression modelling. The data consists of the pace of Olympic Gold Medal Marathon winners for the Olympics from 1896 to present. First we load in the data and plot.</p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade git<span class="op">+</span>https:<span class="op">//</span>github.com<span class="op">/</span>sods<span class="op">/</span>ods</span></code></pre></div>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb2-2"><a href="#cb2-2"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
-<span id="cb3-2"><a href="#cb3-2"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
-<span id="cb3-3"><a href="#cb3-3"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span>
-<span id="cb3-4"><a href="#cb3-4"></a></span>
-<span id="cb3-5"><a href="#cb3-5"></a>offset <span class="op">=</span> y.mean()</span>
-<span id="cb3-6"><a href="#cb3-6"></a>scale <span class="op">=</span> np.sqrt(y.var())</span></code></pre></div>
+<p>The first thing we will do is load a standard data set for regression
+modelling. The data consists of the pace of Olympic Gold Medal Marathon
+winners for the Olympics from 1896 to present. Let’s load in the data
+and plot.</p>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install pods</span></code></pre></div>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>offset <span class="op">=</span> y.mean()</span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>scale <span class="op">=</span> np.sqrt(y.var())</span>
+<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>yhat <span class="op">=</span> (y <span class="op">-</span> offset)<span class="op">/</span>scale</span></code></pre></div>
 <div class="figure">
 <div id="olympic-marathon-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/datasets/olympic-marathon.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//datasets/olympic-marathon.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-marathon-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-marathon&#39;)">
+<div id="olympic-marathon-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="olympic-marathon-caption" class="caption-frame">
-<p>Figure: Olympic marathon pace times since 1892.</p>
-</div>
-</div>
-<p>Things to notice about the data include the outlier in 1904, in this year, the olympics was in St Louis, USA. Organizational problems and challenges with dust kicked up by the cars following the race meant that participants got lost, and only very few participants completed.</p>
-<p>More recent years see more consistently quick marathons.</p>
-<h2 id="polynomial-fits-to-olympic-data">Polynomial Fits to Olympic Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-polynomial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-polynomial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb4-2"><a href="#cb4-2"></a><span class="im">from</span> matplotlib <span class="im">import</span> pyplot <span class="im">as</span> plt</span>
-<span id="cb4-3"><a href="#cb4-3"></a><span class="im">import</span> mlai</span>
-<span id="cb4-4"><a href="#cb4-4"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>basis <span class="op">=</span> mlai.polynomial</span>
-<span id="cb5-2"><a href="#cb5-2"></a></span>
-<span id="cb5-3"><a href="#cb5-3"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
-<span id="cb5-4"><a href="#cb5-4"></a></span>
-<span id="cb5-5"><a href="#cb5-5"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
-<span id="cb5-6"><a href="#cb5-6"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span>
-<span id="cb5-7"><a href="#cb5-7"></a></span>
-<span id="cb5-8"><a href="#cb5-8"></a>xlim <span class="op">=</span> [<span class="dv">1892</span>, <span class="dv">2020</span>]</span>
-<span id="cb5-9"><a href="#cb5-9"></a></span>
-<span id="cb5-10"><a href="#cb5-10"></a>basis<span class="op">=</span>mlai.Basis(mlai.polynomial, number<span class="op">=</span><span class="dv">1</span>, data_limits<span class="op">=</span>xlim)</span></code></pre></div>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb6-2"><a href="#cb6-2"></a><span class="im">from</span> matplotlib <span class="im">import</span> pyplot <span class="im">as</span> plt</span>
-<span id="cb6-3"><a href="#cb6-3"></a><span class="im">import</span> teaching_plots <span class="im">as</span> plot</span>
-<span id="cb6-4"><a href="#cb6-4"></a><span class="im">import</span> mlai</span>
-<span id="cb6-5"><a href="#cb6-5"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a>basis <span class="op">=</span> mlai.polynomial</span>
-<span id="cb7-2"><a href="#cb7-2"></a></span>
-<span id="cb7-3"><a href="#cb7-3"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
-<span id="cb7-4"><a href="#cb7-4"></a></span>
-<span id="cb7-5"><a href="#cb7-5"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
-<span id="cb7-6"><a href="#cb7-6"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span>
-<span id="cb7-7"><a href="#cb7-7"></a></span>
-<span id="cb7-8"><a href="#cb7-8"></a>xlim <span class="op">=</span> [<span class="dv">1892</span>, <span class="dv">2020</span>]</span>
-<span id="cb7-9"><a href="#cb7-9"></a>max_basis <span class="op">=</span> <span class="dv">27</span></span>
-<span id="cb7-10"><a href="#cb7-10"></a></span>
-<span id="cb7-11"><a href="#cb7-11"></a>ll <span class="op">=</span> np.array([np.nan]<span class="op">*</span>(max_basis))</span>
-<span id="cb7-12"><a href="#cb7-12"></a>sum_squares <span class="op">=</span> np.array([np.nan]<span class="op">*</span>(max_basis))</span>
-<span id="cb7-13"><a href="#cb7-13"></a>basis<span class="op">=</span>mlai.Basis(mlai.polynomial, number<span class="op">=</span><span class="dv">1</span>, data_limits<span class="op">=</span>xlim)</span></code></pre></div>
-<div class="sourceCode" id="cb8"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a>plot.rmse_fit(x, y, param_name<span class="op">=</span><span class="st">&#39;number&#39;</span>, param_range<span class="op">=</span>(<span class="dv">1</span>, <span class="dv">28</span>), </span>
-<span id="cb8-2"><a href="#cb8-2"></a>              model<span class="op">=</span>mlai.LM, basis<span class="op">=</span>basis, </span>
-<span id="cb8-3"><a href="#cb8-3"></a>              xlim<span class="op">=</span>xlim, objective_ylim<span class="op">=</span>[<span class="dv">0</span>, <span class="fl">0.8</span>],</span>
-<span id="cb8-4"><a href="#cb8-4"></a>              diagrams<span class="op">=</span><span class="st">&#39;../slides/diagrams/ml&#39;</span>)</span></code></pre></div>
+<p>Figure: Olympic marathon pace times since 1896.</p>
+</div>
+</div>
+<p>Things to notice about the data include the outlier in 1904, in that
+year the Olympics was in St Louis, USA. Organizational problems and
+challenges with dust kicked up by the cars following the race meant that
+participants got lost, and only very few participants completed. More
+recent years see more consistently quick marathons.</p>
+<h2 id="polynomial-fits-to-olympic-marthon-data">Polynomial Fits to
+Olympic Marthon Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-polynomial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-polynomial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<p>Define the polynomial basis function.</p>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> polynomial</span></code></pre></div>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> polynomial(x, num_basis<span class="op">=</span><span class="dv">4</span>, data_limits<span class="op">=</span>[<span class="op">-</span><span class="fl">1.</span>, <span class="fl">1.</span>]):</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;Polynomial basis&quot;</span></span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>    centre <span class="op">=</span> data_limits[<span class="dv">0</span>]<span class="op">/</span><span class="fl">2.</span> <span class="op">+</span> data_limits[<span class="dv">1</span>]<span class="op">/</span><span class="fl">2.</span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>    span <span class="op">=</span> data_limits[<span class="dv">1</span>] <span class="op">-</span> data_limits[<span class="dv">0</span>]</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>    z <span class="op">=</span> np.asarray(x, dtype<span class="op">=</span><span class="bu">float</span>) <span class="op">-</span> centre</span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>    z <span class="op">=</span> <span class="dv">2</span><span class="op">*</span>z<span class="op">/</span>span   <span class="co"># scale the inputs to be within -1, 1 where polynomials are well behaved</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    Phi <span class="op">=</span> np.zeros((x.shape[<span class="dv">0</span>], num_basis))</span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(num_basis):</span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>        Phi[:, i:i<span class="op">+</span><span class="dv">1</span>] <span class="op">=</span> z<span class="op">**</span>i</span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> Phi</span></code></pre></div>
+<p>Now we include the solution for the linear regression through
+QR-decomposition.</p>
+<div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> basis_fit(Phi, y):</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;Use QR decomposition to fit the basis.&quot;</span><span class="st">&quot;&quot;</span></span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>    Q, R <span class="op">=</span> np.linalg.qr(Phi)</span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> sp.linalg.solve_triangular(R, Q.T<span class="op">@</span>y) </span></code></pre></div>
+<h2 id="linear-fit">Linear Fit</h2>
+<div class="sourceCode" id="cb9"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>poly_args <span class="op">=</span> {<span class="st">&#39;num_basis&#39;</span>:<span class="dv">2</span>, <span class="co"># two basis functions (1 and x)</span></span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>             <span class="st">&#39;data_limits&#39;</span>:xlim}</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, <span class="op">**</span>poly_args)</span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> basis_fit(Phi, y)</span></code></pre></div>
+<p>Now we make some predictions for the fit.</p>
+<div class="sourceCode" id="cb10"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>x_pred <span class="op">=</span> np.linspace(xlim[<span class="dv">0</span>], xlim[<span class="dv">1</span>], <span class="dv">400</span>)[:, np.newaxis]</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, <span class="op">**</span>poly_args)</span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>f_pred <span class="op">=</span> Phi_pred<span class="op">@</span>w</span></code></pre></div>
+<div class="figure">
+<div id="olympic-marathon-polynomial-2-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-2.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="olympic-marathon-polynomial-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon-polynomial-2&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="olympic-marathon-polynomial-2-caption" class="caption-frame">
+<p>Figure: Fit of a 1-degree polynomial (a linear model) to the Olympic
+marathon data.</p>
+</div>
+</div>
+<h2 id="cubic-fit">Cubic Fit</h2>
+<div class="sourceCode" id="cb11"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>poly_args <span class="op">=</span> {<span class="st">&#39;num_basis&#39;</span>:<span class="dv">4</span>, <span class="co"># four basis: 1, x, x^2, x^3</span></span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>             <span class="st">&#39;data_limits&#39;</span>:xlim}</span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, <span class="op">**</span>poly_args)</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> basis_fit(Phi, y)</span></code></pre></div>
+<div class="sourceCode" id="cb12"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, <span class="op">**</span>poly_args)</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>f_pred <span class="op">=</span> Phi_pred<span class="op">@</span>w</span></code></pre></div>
+<div class="figure">
+<div id="olympic-marathon-polynomial-4-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-4.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="olympic-marathon-polynomial-4-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon-polynomial-4&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="olympic-marathon-polynomial-4-caption" class="caption-frame">
+<p>Figure: Fit of a 3-degree polynomial (a cubic model) to the Olympic
+marathon data.</p>
+</div>
+</div>
+<h2 id="th-degree-polynomial-fit">9th Degree Polynomial Fit</h2>
+<p>Now we’ll try a 9th degree polynomial fit to the data.</p>
+<div class="sourceCode" id="cb13"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>poly_args <span class="op">=</span> {<span class="st">&#39;num_basis&#39;</span>:<span class="dv">10</span>, <span class="co"># basis up to x^9</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>             <span class="st">&#39;data_limits&#39;</span>:xlim}</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, <span class="op">**</span>poly_args)</span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> basis_fit(Phi, y)</span></code></pre></div>
+<div class="sourceCode" id="cb14"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, <span class="op">**</span>poly_args)</span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>f_pred <span class="op">=</span> Phi_pred<span class="op">@</span>w</span></code></pre></div>
+<div class="figure">
+<div id="olympic-marathon-polynomial-10-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-10.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="olympic-marathon-polynomial-10-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon-polynomial-10&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="olympic-marathon-polynomial-10-caption" class="caption-frame">
+<p>Figure: Fit of a 9-degree polynomial to the Olympic marathon
+data.</p>
+</div>
+</div>
+<h2 id="th-degree-polynomial-fit-1">16th Degree Polynomial Fit</h2>
+<p>Now we’ll try a 16th degree polynomial fit to the data.</p>
+<div class="sourceCode" id="cb15"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>poly_args <span class="op">=</span> {<span class="st">&#39;num_basis&#39;</span>:<span class="dv">17</span>, <span class="co"># basis up to x^16</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>             <span class="st">&#39;data_limits&#39;</span>:xlim}</span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, <span class="op">**</span>poly_args)</span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> basis_fit(Phi, y)</span></code></pre></div>
+<div class="sourceCode" id="cb16"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, <span class="op">**</span>poly_args)</span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>f_pred <span class="op">=</span> Phi_pred<span class="op">@</span>w</span></code></pre></div>
 <div class="figure">
-<div id="olympic-lm-polynomial-num-basis-02-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number002.svg" width="80%" style=" ">
+<div id="olympic-marathon-polynomial-17-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-17.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-lm-polynomial-num-basis-02-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-lm-polynomial-num-basis-02&#39;)">
+<div id="olympic-marathon-polynomial-17-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon-polynomial-17&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-lm-polynomial-num-basis-02-caption" class="caption-frame">
-<p>Figure: Fit of a 1 degree polynomial to the olympic marathon data.</p>
+<div id="olympic-marathon-polynomial-17-caption" class="caption-frame">
+<p>Figure: Fit of a 16-degree polynomial to the Olympic marathon
+data.</p>
+</div>
+</div>
+<h2 id="th-degree-polynomial-fit-2">26th Degree Polynomial Fit</h2>
+<p>Now we’ll try a 26th degree polynomial fit to the data.</p>
+<div class="sourceCode" id="cb17"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>poly_args <span class="op">=</span> {<span class="st">&#39;num_basis&#39;</span>:<span class="dv">27</span>, <span class="co"># basis up to x^26</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>             <span class="st">&#39;data_limits&#39;</span>:xlim}</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, <span class="op">**</span>poly_args)</span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> basis_fit(Phi, y)</span></code></pre></div>
+<div class="sourceCode" id="cb18"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, <span class="op">**</span>poly_args)</span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>f_pred <span class="op">=</span> Phi_pred<span class="op">@</span>w</span></code></pre></div>
+<div class="figure">
+<div id="olympic-marathon-polynomial-27-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-27.svg" width="80%" style=" ">
+</object>
 </div>
+<div id="olympic-marathon-polynomial-27-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon-polynomial-27&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="olympic-marathon-polynomial-27-caption" class="caption-frame">
+<p>Figure: Fit of a 26-degree polynomial to the Olympic marathon
+data.</p>
+</div>
+</div>
+<h2 id="what-does-machine-learning-do">What does Machine Learning
+do?</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-does-machine-learning-do.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-does-machine-learning-do.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Any process of automation allows us to scale what we do by codifying
+a process in some way that makes it efficient and repeatable. Machine
+learning automates by emulating human (or other actions) found in data.
+Machine learning codifies in the form of a mathematical function that is
+learnt by a computer. If we can create these mathematical functions in
+ways in which they can interconnect, then we can also build systems.</p>
+<p>Machine learning works through codifying a prediction of interest
+into a mathematical function. For example, we can try and predict the
+probability that a customer wants to by a jersey given knowledge of
+their age, and the latitude where they live. The technique known as
+logistic regression estimates the odds that someone will by a jumper as
+a linear weighted sum of the features of interest.</p>
+<p><span class="math display">\[ \text{odds} =
+\frac{p(\text{bought})}{p(\text{not bought})} \]</span></p>
+<p><span class="math display">\[ \log \text{odds}  = w_0 + w_1
+\text{age} + w_2 \text{latitude}.\]</span> Here <span
+class="math inline">\(w_0\)</span>, <span
+class="math inline">\(w_1\)</span> and <span
+class="math inline">\(w_2\)</span> are the parameters of the model. If
+<span class="math inline">\(w_1\)</span> and <span
+class="math inline">\(w_2\)</span> are both positive, then the log-odds
+that someone will buy a jumper increase with increasing latitude and
+age, so the further north you are and the older you are the more likely
+you are to buy a jumper. The parameter <span
+class="math inline">\(w_0\)</span> is an offset parameter and gives the
+log-odds of buying a jumper at zero age and on the equator. It is likely
+to be negative<a href="#fn1" class="footnote-ref" id="fnref1"
+role="doc-noteref"><sup>1</sup></a> indicating that the purchase is
+odds-against. This is also a classical statistical model, and models
+like logistic regression are widely used to estimate probabilities from
+ad-click prediction to disease risk.</p>
+<p>This is called a generalized linear model, we can also think of it as
+estimating the <em>probability</em> of a purchase as a nonlinear
+function of the features (age, latitude) and the parameters (the <span
+class="math inline">\(w\)</span> values). The function is known as the
+<em>sigmoid</em> or <a
+href="https://en.wikipedia.org/wiki/Logistic_regression">logistic
+function</a>, thus the name <em>logistic</em> regression.</p>
+<h3 id="sigmoid-function">Sigmoid Function</h3>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/sigmoid-function.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/sigmoid-function.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
 </div>
 <div class="figure">
-<div id="olympic-lm-polynomial-num-basis-03-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number003.svg" width="80%" style=" ">
+<div id="the-logistic-function-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/logistic.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-lm-polynomial-num-basis-03-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-lm-polynomial-num-basis-03&#39;)">
+<div id="the-logistic-function-magnify" class="magnify"
+onclick="magnifyFigure(&#39;the-logistic-function&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-lm-polynomial-num-basis-03-caption" class="caption-frame">
-<p>Figure: Fit of a 2 degree polynomial to the olympic marathon data.</p>
-</div>
-</div>
-<h2 id="what-does-machine-learning-do">What does Machine Learning do?</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/../_ml/includes/what-does-machine-learning-do.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/../_ml/includes/what-does-machine-learning-do.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Any process of automation allows us to scale what we do by codifying a process in some way that makes it efficient and repeatable. Machine learning automates by emulating human (or other actions) found in data. Machine learning codifies in the form of a mathematical function that is learnt by a computer. If we can create these mathematical functions in ways in which they can interconnect, then we can also build systems.</p>
-<p>Machine learning works through codifing a prediction of interest into a mathematical function. For example, we can try and predict the probability that a customer wants to by a jersey given knowledge of their age, and the latitude where they live. The technique known as logistic regression estimates the odds that someone will by a jumper as a linear weighted sum of the features of interest.</p>
-<p><br /><span class="math display">$$ \text{odds} = \frac{p(\text{bought})}{p(\text{not bought})} $$</span><br /></p>
-<p><br /><span class="math display">log odds = <em>β</em><sub>0</sub> + <em>β</em><sub>1</sub>age + <em>β</em><sub>2</sub>latitude.</span><br /> Here <span class="math inline"><em>β</em><sub>0</sub></span>, <span class="math inline"><em>β</em><sub>1</sub></span> and <span class="math inline"><em>β</em><sub>2</sub></span> are the parameters of the model. If <span class="math inline"><em>β</em><sub>1</sub></span> and <span class="math inline"><em>β</em><sub>2</sub></span> are both positive, then the log-odds that someone will buy a jumper increase with increasing latitude and age, so the further north you are and the older you are the more likely you are to buy a jumper. The parameter <span class="math inline"><em>β</em><sub>0</sub></span> is an offset parameter, and gives the log-odds of buying a jumper at zero age and on the equator. It is likely to be negative<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a> indicating that the purchase is odds-against. This is actually a classical statistical model, and models like logistic regression are widely used to estimate probabilities from ad-click prediction to risk of disease.</p>
-<p>This is called a generalized linear model, we can also think of it as estimating the <em>probability</em> of a purchase as a nonlinear function of the features (age, lattitude) and the parameters (the <span class="math inline"><em>β</em></span> values). The function is known as the <em>sigmoid</em> or <a href="https://en.wikipedia.org/wiki/Logistic_regression">logistic function</a>, thus the name <em>logistic</em> regression.</p>
-<p><br /><span class="math display">$$ p(\text{bought}) =  \sigmoid{\beta_0 + \beta_1 \text{age} + \beta_2 \text{latitude}}.$$</span><br /> In the case where we have <em>features</em> to help us predict, we sometimes denote such features as a vector, <span class="math inline">$\inputVector$</span>, and we then use an inner product between the features and the parameters, <span class="math inline">$\boldsymbol{\beta}^\top \inputVector = \beta_1 \inputScalar_1 + \beta_2 \inputScalar_2 + \beta_3 \inputScalar_3 ...$</span>, to represent the argument of the sigmoid.</p>
-<p><br /><span class="math display">$$ p(\text{bought}) =  \sigmoid{\boldsymbol{\beta}^\top \inputVector}.$$</span><br /> More generally, we aim to predict some aspect of our data, <span class="math inline">$\dataScalar$</span>, by relating it through a mathematical function, <span class="math inline">$\mappingFunction(\cdot)$</span>, to the parameters, <span class="math inline"><strong>β</strong></span> and the data, <span class="math inline">$\inputVector$</span>.</p>
-<p><br /><span class="math display">$$ \dataScalar =  \mappingFunction\left(\inputVector, \boldsymbol{\beta}\right).$$</span><br /> We call <span class="math inline">$\mappingFunction(\cdot)$</span> the <em>prediction function</em>.</p>
-<p>To obtain the fit to data, we use a separate function called the <em>objective function</em> that gives us a mathematical representation of the difference between our predictions and the real data.</p>
-<p><br /><span class="math display">$$\errorFunction(\boldsymbol{\beta}, \dataMatrix, \inputMatrix)$$</span><br /> A commonly used examples (for example in a regression problem) is least squares, <br /><span class="math display">$$\errorFunction(\boldsymbol{\beta}, \dataMatrix, \inputMatrix) = \sum_{i=1}^\numData \left(\dataScalar_i - \mappingFunction(\inputVector_i, \boldsymbol{\beta})\right)^2.$$</span><br /></p>
-<p>If a linear prediction function is combined with the least squares objective function then that gives us a classical <em>linear regression</em>, another classical statistical model. Statistics often focusses on linear models because it makes interpretation of the model easier. Interpretation is key in statistics because the aim is normally to validate questions by analysis of data. Machine learning has typically focussed more on the prediction function itself and worried less about the interpretation of parameters, which are normally denoted by <span class="math inline"><strong>w</strong></span> instead of <span class="math inline"><strong>β</strong></span>. As a result <em>non-linear</em> functions are explored more often as they tend to improve quality of predictions but at the expense of interpretability.</p>
+<div id="the-logistic-function-caption" class="caption-frame">
+<p>Figure: The logistic function.</p>
+</div>
+</div>
+<p>The function has this characeristic ‘s’-shape (from where the term
+sigmoid, as in sigma, comes from). It also takes the input from the
+entire real line and ‘squashes’ it into an output that is between zero
+and one. For this reason it is sometimes also called a ‘squashing
+function’.</p>
+<p>The sigmoid comes from the inverting the odds ratio, <span
+class="math display">\[
+\frac{\pi}{(1-\pi)}
+\]</span> where <span class="math inline">\(\pi\)</span> is the
+probability of a positive outcome and <span
+class="math inline">\(1-\pi\)</span> is the probability of a negative
+outcome</p>
+<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(w_0 +
+w_1 \text{age} + w_2 \text{latitude}\right).\]</span></p>
+<p>In the case where we have <em>features</em> to help us predict, we
+sometimes denote such features as a vector, <span
+class="math inline">\(\mathbf{ x}\)</span>, and we then use an inner
+product between the features and the parameters, <span
+class="math inline">\(\mathbf{ w}^\top \mathbf{ x}= w_1 x_1 + w_2 x_2 +
+w_3 x_3 ...\)</span>, to represent the argument of the sigmoid.</p>
+<p><span class="math display">\[ p(\text{bought})
+=  \sigma\left(\mathbf{ w}^\top \mathbf{ x}\right).\]</span> More
+generally, we aim to predict some aspect of our data, <span
+class="math inline">\(y\)</span>, by relating it through a mathematical
+function, <span class="math inline">\(f(\cdot)\)</span>, to the
+parameters, <span class="math inline">\(\mathbf{ w}\)</span> and the
+data, <span class="math inline">\(\mathbf{ x}\)</span>.</p>
+<p><span class="math display">\[ y=  f\left(\mathbf{ x}, \mathbf{
+w}\right).\]</span> We call <span
+class="math inline">\(f(\cdot)\)</span> the <em>prediction
+function</em>.</p>
+<p>To obtain the fit to data, we use a separate function called the
+<em>objective function</em> that gives us a mathematical representation
+of the difference between our predictions and the real data.</p>
+<p><span class="math display">\[E(\mathbf{ w}, \mathbf{Y},
+\mathbf{X})\]</span> A commonly used examples (for example in a
+regression problem) is least squares, <span
+class="math display">\[E(\mathbf{ w}, \mathbf{Y}, \mathbf{X}) =
+\sum_{i=1}^n\left(y_i - f(\mathbf{ x}_i, \mathbf{
+w})\right)^2.\]</span></p>
+<p>If a linear prediction function is combined with the least squares
+objective function, then that gives us a classical <em>linear
+regression</em>, another classical statistical model. Statistics often
+focusses on linear models because it makes interpretation of the model
+easier. Interpretation is key in statistics because the aim is normally
+to validate questions by analysis of data. Machine learning has
+typically focused more on the prediction function itself and worried
+less about the interpretation of parameters. In statistics, where
+interpretation is typically more important than prediction, parameters
+are normally denoted by <span
+class="math inline">\(\boldsymbol{\beta}\)</span> instead of <span
+class="math inline">\(\mathbf{ w}\)</span>.</p>
+<p>A key difference between statistics and machine learning, is that
+(traditionally) machine learning has focussed on predictive capability
+and statistics has focussed on interpretability. That means that in a
+statistics class far more emphasis will be placed on interpretation of
+the parameters. In machine learning, the parameters, $, are just a means
+to an end. But in statistics, when we denote the parameters by <span
+class="math inline">\(\boldsymbol{\beta}\)</span>, we often use the
+parameters to tell us something about the disease.</p>
+<p>So we move between <span class="math display">\[ p(\text{bought})
+=  \sigma\left(w_0 + w_1 \text{age} + w_2
+\text{latitude}\right).\]</span></p>
+<p>to denote the emphasis is on predictive power to</p>
+<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(\beta_0
++ \beta_1 \text{age} + \beta_2 \text{latitude}\right).\]</span></p>
+<p>to denote the emphasis is on interpretation of the parameters.</p>
+<p>Another effect of the focus on prediction in machine learning is that
+<em>non-linear</em> approaches, which can be harder to interpret, are
+more widely deployedin machine learning – they tend to improve quality
+of predictions at the expense of interpretability.</p>
 <h2 id="what-is-machine-learning">What is Machine Learning?</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml-2.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml-2.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Machine learning allows us to extract knowledge from data to form a prediction.</p>
-<p><br /><span class="math display">$$\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}$$</span><br /></p>
-<p>A machine learning prediction is made by combining a model with data to form the prediction. The manner in which this is done gives us the machine learning <em>algorithm</em>.</p>
-<p>Machine learning models are <em>mathematical models</em> which make weak assumptions about data, e.g. smoothness assumptions. By combining these assumptions with the data, we observe we can interpolate between data points or, occasionally, extrapolate into the future.</p>
-<p>Machine learning is a technology which strongly overlaps with the methodology of statistics. From a historical/philosophical view point, machine learning differs from statistics in that the focus in the machine learning community has been primarily on accuracy of prediction, whereas the focus in statistics is typically on the interpretability of a model and/or validating a hypothesis through data collection.</p>
-<p>The rapid increase in the availability of compute and data has led to the increased prominence of machine learning. This prominence is surfacing in two different but overlapping domains: data science and artificial intelligence.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-2.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-2.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Machine learning allows us to extract knowledge from data to form a
+prediction.</p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p>A machine learning prediction is made by combining a model with data
+to form the prediction. The manner in which this is done gives us the
+machine learning <em>algorithm</em>.</p>
+<p>Machine learning models are <em>mathematical models</em> which make
+weak assumptions about data, e.g. smoothness assumptions. By combining
+these assumptions with the data, we observe we can interpolate between
+data points or, occasionally, extrapolate into the future.</p>
+<p>Machine learning is a technology which strongly overlaps with the
+methodology of statistics. From a historical/philosophical view point,
+machine learning differs from statistics in that the focus in the
+machine learning community has been primarily on accuracy of prediction,
+whereas the focus in statistics is typically on the interpretability of
+a model and/or validating a hypothesis through data collection.</p>
+<p>The rapid increase in the availability of compute and data has led to
+the increased prominence of machine learning. This prominence is
+surfacing in two different but overlapping domains: data science and
+artificial intelligence.</p>
 <h2 id="from-model-to-decision">From Model to Decision</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml-end-to-end.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml-end-to-end.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The real challenge, however, is end-to-end decision making. Taking information from the environment and using it to drive decision making to achieve goals.</p>
-<h2 id="artificial-intelligence-and-data-science">Artificial Intelligence and Data Science</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ai/includes/ai-vs-data-science-2.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ai/includes/ai-vs-data-science-2.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Artificial intelligence has the objective of endowing computers with human-like intelligent capabilities. For example, understanding an image (computer vision) or the contents of some speech (speech recognition), the meaning of a sentence (natural language processing) or the translation of a sentence (machine translation).</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-end-to-end.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-end-to-end.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The real challenge, however, is end-to-end decision making. Taking
+information from the environment and using it to drive decision making
+to achieve goals.</p>
+<h2 id="artificial-intelligence-and-data-science">Artificial
+Intelligence and Data Science</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ai/includes/ai-vs-data-science-2.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ai/includes/ai-vs-data-science-2.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Artificial intelligence has the objective of endowing computers with
+human-like intelligent capabilities. For example, understanding an image
+(computer vision) or the contents of some speech (speech recognition),
+the meaning of a sentence (natural language processing) or the
+translation of a sentence (machine translation).</p>
 <h3 id="supervised-learning-for-ai">Supervised Learning for AI</h3>
-<p>The machine learning approach to artificial intelligence is to collect and annotate a large data set from humans. The problem is characterized by input data (e.g. a particular image) and a label (e.g. is there a car in the image yes/no). The machine learning algorithm fits a mathematical function (I call this the <em>prediction function</em>) to map from the input image to the label. The parameters of the prediction function are set by minimizing an error between the function’s predictions and the true data. This mathematical function that encapsulates this error is known as the <em>objective function</em>.</p>
-<p>This approach to machine learning is known as <em>supervised learning</em>. Various approaches to supervised learning use different prediction functions, objective functions or different optimization algorithms to fit them.</p>
-<p>For example, <em>deep learning</em> makes use of <em>neural networks</em> to form the predictions. A neural network is a particular type of mathematical function that allows the algorithm designer to introduce invariances into the function.</p>
-<p>An invariance is an important way of including prior understanding in a machine learning model. For example, in an image, a car is still a car regardless of whether it’s in the upper left or lower right corner of the image. This is known as translation invariance. A neural network encodes translation invariance in <em>convolutional layers</em>. Convolutional neural networks are widely used in image recognition tasks.</p>
-<p>An alternative structure is known as a recurrent neural network (RNN). RNNs neural networks encode temporal structure. They use auto regressive connections in their hidden layers, they can be seen as time series models which have non-linear auto-regressive basis functions. They are widely used in speech recognition and machine translation.</p>
-<p>Machine learning has been deployed in Speech Recognition (e.g. Alexa, deep neural networks, convolutional neural networks for speech recognition), in computer vision (e.g. Amazon Go, convolutional neural networks for person recognition and pose detection).</p>
-<p>The field of data science is related to AI, but philosophically different. It arises because we are increasingly creating large amounts of data through <em>happenstance</em> rather than active collection. In the modern era data is laid down by almost all our activities. The objective of data science is to extract insights from this data.</p>
-<p>Classically, in the field of statistics, data analysis proceeds by assuming that the question (or scientific hypothesis) comes before the data is created. E.g., if I want to determine the effectiveness of a particular drug, I perform a <em>design</em> for my data collection. I use foundational approaches such as randomization to account for confounders. This made a lot of sense in an era where data had to be actively collected. The reduction in cost of data collection and storage now means that many data sets are available which weren’t collected with a particular question in mind. This is a challenge because bias in the way data was acquired can corrupt the insights we derive. We can perform randomized control trials (or A/B tests) to verify our conclusions, but the opportunity is to use data science techniques to better guide our question selection or even answer a question without the expense of a full randomized control trial (referred to as A/B testing in modern internet parlance).</p>
-<h2 id="neural-networks-and-prediction-functions">Neural Networks and Prediction Functions</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/neural-networks.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/neural-networks.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Neural networks are adaptive non-linear function models. Originally, they were studied (by McCulloch and Pitts <span class="citation" data-cites="McCulloch:neuron43">(McCulloch and Pitts 1943)</span>) as simple models for neurons, but over the last decade they have become popular because they are a flexible approach to modelling complex data. A particular characteristic of neural network models is that they can be composed to form highly complex functions which encode many of our expectations of the real world. They allow us to encode our assumptions about how the world works.</p>
-<p>We will return to composition later, but for the moment, let’s focus on a one hidden layer neural network. We are interested in the prediction function, so we’ll ignore the objective function (which is often called an error function) for the moment, and just describe the mathematical object of interest</p>
-<p><br /><span class="math display">$$
-\mappingFunction(\inputVector) = \mappingMatrix^\top \activationVector(\mappingMatrixTwo, \inputVector)
-$$</span><br /></p>
-<p>Where in this case <span class="math inline">$\mappingFunction(\cdot)$</span> is a scalar function with vector inputs, and <span class="math inline">$\activationVector(\cdot)$</span> is a vector function with vector inputs. The dimensionality of the vector function is known as the number of hidden units, or the number of neurons. The elements of this vector function are known as the <em>activation</em> function of the neural network and <span class="math inline">$\mappingMatrixTwo$</span> are the parameters of the activation functions.</p>
-<h2 id="relations-with-classical-statistics">Relations with Classical Statistics</h2>
-<p>In statistics activation functions are traditionally known as <em>basis functions</em>. And we would think of this as a <em>linear model</em>. It’s doesn’t make linear predictions, but it’s linear because in statistics estimation focuses on the parameters, <span class="math inline">$\mappingMatrix$</span>, not the parameters, <span class="math inline">$\mappingMatrixTwo$</span>. The linear model terminology refers to the fact that the model is <em>linear in the parameters</em>, but it is <em>not</em> linear in the data unless the activation functions are chosen to be linear.</p>
+<p>The machine learning approach to artificial intelligence is to
+collect and annotate a large data set from humans. The problem is
+characterized by input data (e.g. a particular image) and a label
+(e.g. is there a car in the image yes/no). The machine learning
+algorithm fits a mathematical function (I call this the <em>prediction
+function</em>) to map from the input image to the label. The parameters
+of the prediction function are set by minimizing an error between the
+function’s predictions and the true data. This mathematical function
+that encapsulates this error is known as the <em>objective
+function</em>.</p>
+<p>This approach to machine learning is known as <em>supervised
+learning</em>. Various approaches to supervised learning use different
+prediction functions, objective functions or different optimization
+algorithms to fit them.</p>
+<p>For example, <em>deep learning</em> makes use of <em>neural
+networks</em> to form the predictions. A neural network is a particular
+type of mathematical function that allows the algorithm designer to
+introduce invariances into the function.</p>
+<p>An invariance is an important way of including prior understanding in
+a machine learning model. For example, in an image, a car is still a car
+regardless of whether it’s in the upper left or lower right corner of
+the image. This is known as translation invariance. A neural network
+encodes translation invariance in <em>convolutional layers</em>.
+Convolutional neural networks are widely used in image recognition
+tasks.</p>
+<p>An alternative structure is known as a recurrent neural network
+(RNN). RNNs neural networks encode temporal structure. They use auto
+regressive connections in their hidden layers, they can be seen as time
+series models which have non-linear auto-regressive basis functions.
+They are widely used in speech recognition and machine translation.</p>
+<p>Machine learning has been deployed in Speech Recognition (e.g. Alexa,
+deep neural networks, convolutional neural networks for speech
+recognition), in computer vision (e.g. Amazon Go, convolutional neural
+networks for person recognition and pose detection).</p>
+<p>The field of data science is related to AI, but philosophically
+different. It arises because we are increasingly creating large amounts
+of data through <em>happenstance</em> rather than active collection. In
+the modern era data is laid down by almost all our activities. The
+objective of data science is to extract insights from this data.</p>
+<p>Classically, in the field of statistics, data analysis proceeds by
+assuming that the question (or scientific hypothesis) comes before the
+data is created. E.g., if I want to determine the effectiveness of a
+particular drug, I perform a <em>design</em> for my data collection. I
+use foundational approaches such as randomization to account for
+confounders. This made a lot of sense in an era where data had to be
+actively collected. The reduction in cost of data collection and storage
+now means that many data sets are available which weren’t collected with
+a particular question in mind. This is a challenge because bias in the
+way data was acquired can corrupt the insights we derive. We can perform
+randomized control trials (or A/B tests) to verify our conclusions, but
+the opportunity is to use data science techniques to better guide our
+question selection or even answer a question without the expense of a
+full randomized control trial (referred to as A/B testing in modern
+internet parlance).</p>
+<h2 id="neural-networks-and-prediction-functions">Neural Networks and
+Prediction Functions</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/neural-networks.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/neural-networks.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Neural networks are adaptive non-linear function models. Originally,
+they were studied (by McCulloch and Pitts <span class="citation"
+data-cites="McCulloch:neuron43">(McCulloch and Pitts, 1943)</span>) as
+simple models for neurons, but over the last decade they have become
+popular because they are a flexible approach to modelling complex data.
+A particular characteristic of neural network models is that they can be
+composed to form highly complex functions which encode many of our
+expectations of the real world. They allow us to encode our assumptions
+about how the world works.</p>
+<p>We will return to composition later, but for the moment, let’s focus
+on a one hidden layer neural network. We are interested in the
+prediction function, so we’ll ignore the objective function (which is
+often called an error function) for the moment, and just describe the
+mathematical object of interest</p>
+<p><span class="math display">\[
+f(\mathbf{ x}) = \mathbf{W}^\top \boldsymbol{ \phi}(\mathbf{V}, \mathbf{
+x})
+\]</span></p>
+<p>Where in this case <span class="math inline">\(f(\cdot)\)</span> is a
+scalar function with vector inputs, and <span
+class="math inline">\(\boldsymbol{ \phi}(\cdot)\)</span> is a vector
+function with vector inputs. The dimensionality of the vector function
+is known as the number of hidden units, or the number of neurons. The
+elements of this vector function are known as the <em>activation</em>
+function of the neural network and <span
+class="math inline">\(\mathbf{V}\)</span> are the parameters of the
+activation functions.</p>
+<h2 id="relations-with-classical-statistics">Relations with Classical
+Statistics</h2>
+<p>In statistics activation functions are traditionally known as
+<em>basis functions</em>. And we would think of this as a <em>linear
+model</em>. It’s doesn’t make linear predictions, but it’s linear
+because in statistics estimation focuses on the parameters, <span
+class="math inline">\(\mathbf{W}\)</span>, not the parameters, <span
+class="math inline">\(\mathbf{V}\)</span>. The linear model terminology
+refers to the fact that the model is <em>linear in the parameters</em>,
+but it is <em>not</em> linear in the data unless the activation
+functions are chosen to be linear.</p>
 <h2 id="adaptive-basis-functions">Adaptive Basis Functions</h2>
-<p>The first difference in the (early) neural network literature to the classical statistical literature is the decision to optimize these parameters, <span class="math inline">$\mappingMatrixTwo$</span>, as well as the parameters, <span class="math inline">$\mappingMatrix$</span> (which would normally be denoted in statistics by <span class="math inline"><strong>β</strong></span>)<a href="#fn2" class="footnote-ref" id="fnref2" role="doc-noteref"><sup>2</sup></a>.</p>
+<p>The first difference in the (early) neural network literature to the
+classical statistical literature is the decision to optimize these
+parameters, <span class="math inline">\(\mathbf{V}\)</span>, as well as
+the parameters, <span class="math inline">\(\mathbf{W}\)</span> (which
+would normally be denoted in statistics by <span
+class="math inline">\(\boldsymbol{\beta}\)</span>)<a href="#fn2"
+class="footnote-ref" id="fnref2"
+role="doc-noteref"><sup>2</sup></a>.</p>
 <h2 id="machine-learning-1">Machine Learning</h2>
-<p>The key idea in machine learning is to observe the system in practice, and then emulate its behavior with mathematics. That leads to a design challenge as to where to place the mathematical function. The placement of the mathematical function leads to the different domains of machine learning.</p>
+<p>The key idea in machine learning is to observe the system in
+practice, and then emulate its behavior with mathematics. That leads to
+a design challenge as to where to place the mathematical function. The
+placement of the mathematical function leads to the different domains of
+machine learning.</p>
 <ol type="1">
 <li>Supervised learning</li>
 <li>Unsupervised learning</li>
 <li>Reinforcement learning</li>
 </ol>
 <h1 id="supervised-learning">Supervised Learning</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/supervised-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/supervised-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Supervised learning is one of the most widely deployed machine learning technologies, and a particular domain of success has been <em>classification</em>. Classification is the process of taking an input (which might be an image) and categorizing it into one of a number of different classes (e.g. dog or cat). This simple idea underpins a lot of machine learning. By scanning across the image we can also determine where the animal is in the image.</p>
-<h2 id="introduction-to-classification">Introduction to Classification</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Classification is perhaps the technique most closely assocated with machine learning. In the speech based agents, on-device classifiers are used to determine when the wake word is used. A wake word is a word that wakes up the device. For the Amazon Echo it is “Alexa”, for Siri it is “Hey Siri”. Once the wake word detected with a classifier, the speech can be uploaded to the cloud for full processing, the speech recognition stages.</p>
-<p>This isn’t just useful for intelligent agents, the UN global pulse project on public discussion on radio also uses <a href="https://radio.unglobalpulse.net/uganda/">wake word detection for recording radio conversations</a>.</p>
-<p>A major breakthrough in image classification came in 2012 with the ImageNet result of <a href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex Krizhevsky, Ilya Sutskever and Geoff Hinton</a> from the University of Toronto. ImageNet is a large data base of 14 million images with many thousands of classes. The data is used in a community-wide challenge for object categorization. Krizhevsky et al used convolutional neural networks to outperform all previous approaches on the challenge. They formed a company which was purchased shortly after by Google. This challenge, known as object categorisation, was a major obstacle for practical computer vision systems. Modern object categorization systems are close to human performance.</p>
-<p>Machine learning problems normally involve a prediction function and an objective function. Regression is the case where the prediction function iss over the real numbers, so the codomain of the functions, <span class="math inline">$\mappingFunction(\inputMatrix)$</span> was the real numbers or sometimes real vectors. The classification problem consists of predicting whether or not a particular example is a member of a particular class. So we may want to know if a particular image represents a digit 6 or if a particular user will click on a given advert. These are classification problems, and they require us to map to <em>yes</em> or <em>no</em> answers. That makes them naturally discrete mappings.</p>
-<p>In classification we are given an input vector, <span class="math inline">$\inputVector$</span>, and an associated label, <span class="math inline">$\dataScalar$</span> which either takes the value <span class="math inline"> − 1</span> to represent <em>no</em> or <span class="math inline">1</span> to represent <em>yes</em>.</p>
-<p>In supervised learning the inputs, <span class="math inline">$\inputVector$</span>, are mapped to a label, <span class="math inline">$\dataScalar$</span>, through a function <span class="math inline">$\mappingFunction(\cdot)$</span> that is dependent on a set of parameters, <span class="math inline">$\weightVector$</span>, <br /><span class="math display">$$
-\dataScalar = \mappingFunction(\inputVector; \weightVector).
-$$</span><br /> The function <span class="math inline">$\mappingFunction(\cdot)$</span> is known as the <em>prediction function</em>. The key challenges are (1) choosing which features, <span class="math inline">$\inputVector$</span>, are relevant in the prediction, (2) defining the appropriate <em>class of function</em>, <span class="math inline">$\mappingFunction(\cdot)$</span>, to use and (3) selecting the right parameters, <span class="math inline">$\weightVector$</span>.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Supervised learning is one of the most widely deployed machine
+learning technologies, and a particular domain of success has been
+<em>classification</em>. Classification is the process of taking an
+input (which might be an image) and categorizing it into one of a number
+of different classes (e.g. dog or cat). This simple idea underpins a lot
+of machine learning. By scanning across the image we can also determine
+where the animal is in the image.</p>
+<h2 id="introduction-to-classification">Introduction to
+Classification</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Classification is perhaps the technique most closely assocated with
+machine learning. In the speech based agents, on-device classifiers are
+used to determine when the wake word is used. A wake word is a word that
+wakes up the device. For the Amazon Echo it is “Alexa”, for Siri it is
+“Hey Siri”. Once the wake word detected with a classifier, the speech
+can be uploaded to the cloud for full processing, the speech recognition
+stages.</p>
+<p>This isn’t just useful for intelligent agents, the UN global pulse
+project on public discussion on radio also uses <a
+href="https://radio.unglobalpulse.net/uganda/">wake word detection for
+recording radio conversations</a>.</p>
+<p>A major breakthrough in image classification came in 2012 with the
+ImageNet result of <a
+href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex
+Krizhevsky, Ilya Sutskever and Geoff Hinton</a> from the University of
+Toronto. ImageNet is a large data base of 14 million images with many
+thousands of classes. The data is used in a community-wide challenge for
+object categorization. Krizhevsky et al used convolutional neural
+networks to outperform all previous approaches on the challenge. They
+formed a company which was purchased shortly after by Google. This
+challenge, known as object categorisation, was a major obstacle for
+practical computer vision systems. Modern object categorization systems
+are close to human performance.</p>
+<p>Machine learning problems normally involve a prediction function and
+an objective function. Regression is the case where the prediction
+function iss over the real numbers, so the codomain of the functions,
+<span class="math inline">\(f(\mathbf{X})\)</span> was the real numbers
+or sometimes real vectors. The classification problem consists of
+predicting whether or not a particular example is a member of a
+particular class. So we may want to know if a particular image
+represents a digit 6 or if a particular user will click on a given
+advert. These are classification problems, and they require us to map to
+<em>yes</em> or <em>no</em> answers. That makes them naturally discrete
+mappings.</p>
+<p>In classification we are given an input vector, <span
+class="math inline">\(\mathbf{ x}\)</span>, and an associated label,
+<span class="math inline">\(y\)</span> which either takes the value
+<span class="math inline">\(-1\)</span> to represent <em>no</em> or
+<span class="math inline">\(1\)</span> to represent <em>yes</em>.</p>
+<p>In supervised learning the inputs, <span
+class="math inline">\(\mathbf{ x}\)</span>, are mapped to a label, <span
+class="math inline">\(y\)</span>, through a function <span
+class="math inline">\(f(\cdot)\)</span> that is dependent on a set of
+parameters, <span class="math inline">\(\mathbf{ w}\)</span>, <span
+class="math display">\[
+y= f(\mathbf{ x}; \mathbf{ w}).
+\]</span> The function <span class="math inline">\(f(\cdot)\)</span> is
+known as the <em>prediction function</em>. The key challenges are (1)
+choosing which features, <span class="math inline">\(\mathbf{
+x}\)</span>, are relevant in the prediction, (2) defining the
+appropriate <em>class of function</em>, <span
+class="math inline">\(f(\cdot)\)</span>, to use and (3) selecting the
+right parameters, <span class="math inline">\(\mathbf{ w}\)</span>.</p>
 <h2 id="classification-examples">Classification Examples</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <ul>
-<li>Classifiying hand written digits from binary images (automatic zip code reading)</li>
+<li>Classifiying hand written digits from binary images (automatic zip
+code reading)</li>
 <li>Detecting faces in images (e.g. digital cameras).</li>
 <li>Who a detected face belongs to (e.g. Facebook, DeepFace)</li>
 <li>Classifying type of cancer given gene expression data.</li>
-<li>Categorization of document types (different types of news article on the internet)</li>
+<li>Categorization of document types (different types of news article on
+the internet)</li>
 </ul>
 <div class="figure">
 <div id="the-perceptron-algorithm-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron001.svg" width="80%" style=" ">
 </object>
-<object class="svgplot " data="../slides/diagrams/ml/perceptron044.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron044.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="the-perceptron-algorithm-magnify" class="magnify" onclick="magnifyFigure(&#39;the-perceptron-algorithm&#39;)">
+<div id="the-perceptron-algorithm-magnify" class="magnify"
+onclick="magnifyFigure(&#39;the-perceptron-algorithm&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="the-perceptron-algorithm-caption" class="caption-frame">
@@ -557,778 +1265,2192 @@ <h2 id="classification-examples">Classification Examples</h2>
 </div>
 </div>
 <h2 id="logistic-regression">Logistic Regression</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/logistic-regression.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/logistic-regression.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>A logistic regression is an approach to classification which extends the linear basis function models we’ve already explored. Rather than modeling the output of the function directly the assumption is that we model the <em>log-odds</em> with the basis functions.</p>
-<p>The <a href="http://en.wikipedia.org/wiki/Odds">odds</a> are defined as the ratio of the probability of a positive outcome, to the probability of a negative outcome. If the probability of a positive outcome is denoted by <span class="math inline"><em>π</em></span>, then the odds are computed as <span class="math inline">$\frac{\pi}{1-\pi}$</span>. Odds are widely used by <a href="http://en.wikipedia.org/wiki/Bookmaker">bookmakers</a> in gambling, although a bookmakers odds won’t normalise: i.e. if you look at the equivalent probabilities, and sum over the probability of all outcomes the bookmakers are considering, then you won’t get one. This is how the bookmaker makes a profit. Because a probability is always between zero and one, the odds are always between <span class="math inline">0</span> and <span class="math inline">∞</span>. If the positive outcome is unlikely the odds are close to zero, if it is very likely then the odds become close to infinite. Taking the logarithm of the odds maps the odds from the positive half space to being across the entire real line. Odds that were between 0 and 1 (where the negative outcome was more likely) are mapped to the range between <span class="math inline"> − ∞</span> and <span class="math inline">0</span>. Odds that are greater than 1 are mapped to the range between <span class="math inline">0</span> and <span class="math inline">∞</span>. Considering the log odds therefore takes a number between 0 and 1 (the probability of positive outcome) and maps it to the entire real line. The function that does this is known as the <a href="http://en.wikipedia.org/wiki/Logit">logit function</a>, <span class="math inline">$g^{-1}(p_i) = \log\frac{p_i}{1-p_i}$</span>. This function is known as a <em>link function</em>.</p>
-<p>For a standard regression we take, <br /><span class="math display">$$
-\mappingFunction(\inputVector) = \mappingVector^\top
-\basisVector(\inputVector),
-$$</span><br /> if we want to perform classification we perform a logistic regression. <br /><span class="math display">$$
-\log \frac{\pi}{(1-\pi)} = \mappingVector^\top
-\basisVector(\inputVector)
-$$</span><br /> where the odds ratio between the positive class and the negative class is given by <br /><span class="math display">$$
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>A logistic regression is an approach to classification which extends
+the linear basis function models we’ve already explored. Rather than
+modeling the output of the function directly the assumption is that we
+model the <em>log-odds</em> with the basis functions.</p>
+<p>The <a href="http://en.wikipedia.org/wiki/Odds">odds</a> are defined
+as the ratio of the probability of a positive outcome, to the
+probability of a negative outcome. If the probability of a positive
+outcome is denoted by <span class="math inline">\(\pi\)</span>, then the
+odds are computed as <span
+class="math inline">\(\frac{\pi}{1-\pi}\)</span>. Odds are widely used
+by <a href="http://en.wikipedia.org/wiki/Bookmaker">bookmakers</a> in
+gambling, although a bookmakers odds won’t normalise: i.e. if you look
+at the equivalent probabilities, and sum over the probability of all
+outcomes the bookmakers are considering, then you won’t get one. This is
+how the bookmaker makes a profit. Because a probability is always
+between zero and one, the odds are always between <span
+class="math inline">\(0\)</span> and <span
+class="math inline">\(\infty\)</span>. If the positive outcome is
+unlikely the odds are close to zero, if it is very likely then the odds
+become close to infinite. Taking the logarithm of the odds maps the odds
+from the positive half space to being across the entire real line. Odds
+that were between 0 and 1 (where the negative outcome was more likely)
+are mapped to the range between <span
+class="math inline">\(-\infty\)</span> and <span
+class="math inline">\(0\)</span>. Odds that are greater than 1 are
+mapped to the range between <span class="math inline">\(0\)</span> and
+<span class="math inline">\(\infty\)</span>. Considering the log odds
+therefore takes a number between 0 and 1 (the probability of positive
+outcome) and maps it to the entire real line. The function that does
+this is known as the <a href="http://en.wikipedia.org/wiki/Logit">logit
+function</a>, <span class="math inline">\(g^{-1}(p_i) =
+\log\frac{p_i}{1-p_i}\)</span>. This function is known as a <em>link
+function</em>.</p>
+<p>For a standard regression we take, <span class="math display">\[
+f(\mathbf{ x}) = \mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x}),
+\]</span> if we want to perform classification we perform a logistic
+regression. <span class="math display">\[
+\log \frac{\pi}{(1-\pi)} = \mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x})
+\]</span> where the odds ratio between the positive class and the
+negative class is given by <span class="math display">\[
 \frac{\pi}{(1-\pi)}
-$$</span><br /> The odds can never be negative, but can take any value from 0 to <span class="math inline">∞</span>. We have defined the link function as taking the form <span class="math inline"><em>g</em><sup> − 1</sup>( ⋅ )</span> implying that the inverse link function is given by <span class="math inline"><em>g</em>( ⋅ )</span>. Since we have defined, <br /><span class="math display">$$
+\]</span> The odds can never be negative, but can take any value from 0
+to <span class="math inline">\(\infty\)</span>. We have defined the link
+function as taking the form <span
+class="math inline">\(g^{-1}(\cdot)\)</span> implying that the inverse
+link function is given by <span class="math inline">\(g(\cdot)\)</span>.
+Since we have defined, <span class="math display">\[
 g^{-1}(\pi) =
-\mappingVector^\top \basisVector(\inputVector)
-$$</span><br /> we can write <span class="math inline"><em>π</em></span> in terms of the <em>inverse link</em> function, <span class="math inline"><em>g</em>( ⋅ )</span> as <br /><span class="math display">$$
-\pi = g(\mappingVector^\top
-\basisVector(\inputVector)).
-$$</span><br /></p>
+\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x})
+\]</span> we can write <span class="math inline">\(\pi\)</span> in terms
+of the <em>inverse link</em> function, <span
+class="math inline">\(g(\cdot)\)</span> as <span class="math display">\[
+\pi = g(\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x})).
+\]</span></p>
 <h2 id="basis-function">Basis Function</h2>
-<p>We’ll define our prediction, objective and gradient functions below. But before we start, we need to define a basis function for our model. Let’s start with the linear basis.</p>
-<div class="sourceCode" id="cb9"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb10"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a></span>
-<span id="cb10-2"><a href="#cb10-2"></a><span class="im">from</span> mlai <span class="im">import</span> linear</span></code></pre></div>
+<p>We’ll define our prediction, objective and gradient functions below.
+But before we start, we need to define a basis function for our model.
+Let’s start with the linear basis.</p>
+<div class="sourceCode" id="cb19"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb20"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb21"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> linear</span></code></pre></div>
 <h2 id="prediction-function">Prediction Function</h2>
-<p>Now we have the basis function let’s define the prediction function.</p>
-<div class="sourceCode" id="cb11"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a><span class="kw">def</span> predict(w, x, basis<span class="op">=</span>linear, <span class="op">**</span>kwargs):</span>
-<span id="cb12-2"><a href="#cb12-2"></a>    <span class="co">&quot;Generates the prediction function and the basis matrix.&quot;</span></span>
-<span id="cb12-3"><a href="#cb12-3"></a>    Phi <span class="op">=</span> basis(x, <span class="op">**</span>kwargs)</span>
-<span id="cb12-4"><a href="#cb12-4"></a>    f <span class="op">=</span> np.dot(Phi, w)</span>
-<span id="cb12-5"><a href="#cb12-5"></a>    <span class="cf">return</span> <span class="fl">1.</span><span class="op">/</span>(<span class="dv">1</span><span class="op">+</span>np.exp(<span class="op">-</span>f)), Phi</span></code></pre></div>
-<p>This inverse of the link function is known as the <a href="http://en.wikipedia.org/wiki/Logistic_function">logistic</a> (thus the name logistic regression) or sometimes it is called the sigmoid function. For a particular value of the input to the link function, <span class="math inline">$\mappingFunction_i = \mappingVector^\top \basisVector(\inputVector_i)$</span> we can plot the value of the inverse link function as below.</p>
-<h3 id="sigmoid-function">Sigmoid Function</h3>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/sigmoid-function.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/sigmoid-function.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<p>Now we have the basis function let’s define the prediction
+function.</p>
+<div class="sourceCode" id="cb22"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb23"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> predict(w, x, basis<span class="op">=</span>linear, <span class="op">**</span>kwargs):</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;Generates the prediction function and the basis matrix.&quot;</span></span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>    Phi <span class="op">=</span> basis(x, <span class="op">**</span>kwargs)</span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>    f <span class="op">=</span> np.dot(Phi, w)</span>
+<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> <span class="fl">1.</span><span class="op">/</span>(<span class="dv">1</span><span class="op">+</span>np.exp(<span class="op">-</span>f)), Phi</span></code></pre></div>
+<p>This inverse of the link function is known as the <a
+href="http://en.wikipedia.org/wiki/Logistic_function">logistic</a> (thus
+the name logistic regression) or sometimes it is called the sigmoid
+function. For a particular value of the input to the link function,
+<span class="math inline">\(f_i = \mathbf{ w}^\top \boldsymbol{
+\phi}(\mathbf{ x}_i)\)</span> we can plot the value of the inverse link
+function as below.</p>
+<p>By replacing the inverse link with the sigmoid we can write <span
+class="math inline">\(\pi\)</span> as a function of the input and the
+parameter vector as, <span class="math display">\[
+\pi(\mathbf{ x},\mathbf{ w}) = \frac{1}{1+\exp\left(-\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x})\right)}.
+\]</span> The process for logistic regression is as follows. Compute the
+output of a standard linear basis function composition (<span
+class="math inline">\(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\)</span>, as we did for linear regression) and then apply the
+inverse link function, <span class="math inline">\(g(\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x}))\)</span>. In logistic regression this
+involves <em>squashing</em> it with the logistic (or sigmoid) function.
+Use this value, which now has an interpretation as a
+<em>probability</em> in a Bernoulli distribution to form the likelihood.
+Then we can assume conditional independence of each data point given the
+parameters and develop a likelihod for the entire data set.</p>
+<p>As we discussed last time, the Bernoulli likelihood is of the form,
+<span class="math display">\[
+P(y_i|\mathbf{ w}, \mathbf{ x}) =
+\pi_i^{y_i} (1-\pi_i)^{1-y_i}
+\]</span> which we can think of as clever trick for mathematically
+switching between two probabilities if we were to write it as code it
+would be better described as</p>
+<div class="sourceCode" id="cb24"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> bernoulli(x, y, pi):</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> y <span class="op">==</span> <span class="dv">1</span>:</span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> pi(x)</span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">else</span>:</span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi(x)</span></code></pre></div>
+<p>but writing it mathematically makes it easier to write our objective
+function within a single mathematical equation.</p>
+<h2 id="maximum-likelihood">Maximum Likelihood</h2>
+<p>To obtain the parameters of the model, we need to maximize the
+likelihood, or minimize the objective function, normally taken to be the
+negative log likelihood. With a data conditional independence assumption
+the likelihood has the form, <span class="math display">\[
+P(\mathbf{ y}|\mathbf{ w},
+\mathbf{X}) = \prod_{i=1}^nP(y_i|\mathbf{ w}, \mathbf{ x}_i).
+\]</span> which can be written as a log likelihood in the form <span
+class="math display">\[
+\log P(\mathbf{ y}|\mathbf{ w},
+\mathbf{X}) = \sum_{i=1}^n\log P(y_i|\mathbf{ w}, \mathbf{ x}_i) =
+\sum_{i=1}^n
+y_i \log \pi_i + \sum_{i=1}^n(1-y_i)\log (1-\pi_i)
+\]</span> and if we take the probability of positive outcome for the
+<span class="math inline">\(i\)</span>th data point to be given by <span
+class="math display">\[
+\pi_i = g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x}_i)\right),
+\]</span> where <span class="math inline">\(g(\cdot)\)</span> is the
+<em>inverse</em> link function, then this leads to an objective function
+of the form, <span class="math display">\[
+E(\mathbf{ w}) = -  \sum_{i=1}^ny_i \log
+g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i)\right) -
+\sum_{i=1}^n(1-y_i)\log \left(1-g\left(\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x}_i)\right)\right).
+\]</span></p>
+<div class="sourceCode" id="cb25"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb26"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> objective(g, y):</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;Computes the objective function.&quot;</span></span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    labs <span class="op">=</span> np.asarray(y, dtype<span class="op">=</span><span class="bu">float</span>).flatten()</span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    posind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">1</span>)</span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>    negind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">0</span>)</span>
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> <span class="op">-</span>np.log(g[posind, :]).<span class="bu">sum</span>() <span class="op">-</span> np.log(<span class="dv">1</span><span class="op">-</span>g[negind, :]).<span class="bu">sum</span>()</span></code></pre></div>
+<p>As normal, we would like to minimize this objective. This can be done
+by differentiating with respect to the parameters of our prediction
+function, <span class="math inline">\(\pi(\mathbf{ x};\mathbf{
+w})\)</span>, for optimisation. The gradient of the likelihood with
+respect to <span class="math inline">\(\pi(\mathbf{ x};\mathbf{
+w})\)</span> is of the form, <span class="math display">\[
+\frac{\text{d}E(\mathbf{ w})}{\text{d}\mathbf{ w}} = -\sum_{i=1}^n
+\frac{y_i}{g\left(\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{
+x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
+\boldsymbol{ \phi}(\mathbf{ x}_i) +  \sum_{i=1}^n
+\frac{1-y_i}{1-g\left(\mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{
+x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
+\boldsymbol{ \phi}(\mathbf{ x}_i)
+\]</span> where we used the chain rule to develop the derivative in
+terms of <span
+class="math inline">\(\frac{\text{d}g(f_i)}{\text{d}f_i}\)</span>, which
+is the gradient of the inverse link function (in our case the gradient
+of the sigmoid function).</p>
+<p>So the objective function now depends on the gradient of the inverse
+link function, as well as the likelihood depends on the gradient of the
+inverse link function, as well as the gradient of the log likelihood,
+and naturally the gradient of the argument of the inverse link function
+with respect to the parameters, which is simply <span
+class="math inline">\(\boldsymbol{ \phi}(\mathbf{ x}_i)\)</span>.</p>
+<p>The only missing term is the gradient of the inverse link function.
+For the sigmoid squashing function we have, <span
+class="math display">\[\begin{align*}
+g(f_i) &amp;= \frac{1}{1+\exp(-f_i)}\\
+&amp;=(1+\exp(-f_i))^{-1}
+\end{align*}\]</span> and the gradient can be computed as <span
+class="math display">\[\begin{align*}
+\frac{\text{d}g(f_i)}{\text{d} f_i} &amp; =
+\exp(-f_i)(1+\exp(-f_i))^{-2}\\
+&amp; = \frac{1}{1+\exp(-f_i)}
+\frac{\exp(-f_i)}{1+\exp(-f_i)} \\
+&amp; = g(f_i) (1-g(f_i))
+\end{align*}\]</span> so the full gradient can be written down as <span
+class="math display">\[
+\frac{\text{d}E(\mathbf{ w})}{\text{d}\mathbf{ w}} = -\sum_{i=1}^n
+y_i\left(1-g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\right)\right)
+\boldsymbol{ \phi}(\mathbf{ x}_i) +  \sum_{i=1}^n
+(1-y_i)\left(g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\right)\right)
+\boldsymbol{ \phi}(\mathbf{ x}_i).
+\]</span></p>
+<div class="sourceCode" id="cb27"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb28"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient(g, Phi, y):</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;Generates the gradient of the parameter vector.&quot;</span></span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>    labs <span class="op">=</span> np.asarray(y, dtype<span class="op">=</span><span class="bu">float</span>).flatten()</span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>    posind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">1</span>)</span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>    dw <span class="op">=</span> <span class="op">-</span>(Phi[posind]<span class="op">*</span>(<span class="dv">1</span><span class="op">-</span>g[posind])).<span class="bu">sum</span>(<span class="dv">0</span>)</span>
+<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>    negind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">0</span> )</span>
+<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>    dw <span class="op">+=</span> (Phi[negind]<span class="op">*</span>g[negind]).<span class="bu">sum</span>(<span class="dv">0</span>)</span>
+<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> dw[:, <span class="va">None</span>]</span></code></pre></div>
+<h2 id="optimization-of-the-function">Optimization of the Function</h2>
+<p>Reorganizing the gradient to find a stationary point of the function
+with respect to the parameters <span class="math inline">\(\mathbf{
+w}\)</span> turns out to be impossible. Optimization has to proceed by
+<em>numerical methods</em>. Options include the multidimensional variant
+of <a href="http://en.wikipedia.org/wiki/Newton%27s_method">Newton’s
+method</a> or <a
+href="http://en.wikipedia.org/wiki/Gradient_method">gradient based
+optimization methods</a> like we used for optimizing matrix
+factorization for the movie recommender system. We recall from matrix
+factorization that, for large data, <em>stochastic gradient descent</em>
+or the Robbins Munro <span class="citation"
+data-cites="Robbins:stoch51">(Robbins and Monro, 1951)</span>
+optimization procedure worked best for function minimization.</p>
+<h1 id="nigeria-nmis-data">Nigeria NMIS Data</h1>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>As an example data set we will use Nigerian Millennium Development
+Goals Information System Health Facility <span class="citation"
+data-cites="Nigeria-nmis14">(The Office of the Senior Special Assistant
+to the President on the Millennium Development Goals (OSSAP-MDGs) and
+Columbia University, 2014)</span>. It can be found here <a
+href="https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014"
+class="uri">https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014</a>.</p>
+<p>Taking from the information on the site,</p>
+<blockquote>
+<p>The Nigeria MDG (Millennium Development Goals) Information System –
+NMIS health facility data is collected by the Office of the Senior
+Special Assistant to the President on the Millennium Development Goals
+(OSSAP-MDGs) in partner with the Sustainable Engineering Lab at Columbia
+University. A rigorous, geo-referenced baseline facility inventory
+across Nigeria is created spanning from 2009 to 2011 with an additional
+survey effort to increase coverage in 2014, to build Nigeria’s first
+nation-wide inventory of health facility. The database includes 34,139
+health facilities info in Nigeria.</p>
+<p>The goal of this database is to make the data collected available to
+planners, government officials, and the public, to be used to make
+strategic decisions for planning relevant interventions.</p>
+<p>For data inquiry, please contact Ms. Funlola Osinupebi, Performance
+Monitoring &amp; Communications, Advisory Power Team, Office of the Vice
+President at funlola.osinupebi@aptovp.org</p>
+<p>To learn more, please visit <a
+href="http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/"
+class="uri">http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/</a></p>
+<p>Suggested citation: Nigeria NMIS facility database (2014), the Office
+of the Senior Special Assistant to the President on the Millennium
+Development Goals (OSSAP-MDGs) &amp; Columbia University</p>
+</blockquote>
+<p>For ease of use we’ve packaged this data set in the <code>pods</code>
+library</p>
+<h2 id="pods">pods</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In Sheffield we created a suite of software tools for ‘Open Data
+Science’. Open data science is an approach to sharing code, models and
+data that should make it easier for companies, health professionals and
+scientists to gain access to data science techniques.</p>
+<p>You can also check this blog post on <a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a>.</p>
+<p>The software can be installed using</p>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/ods"
+class="uri">https://github.com/lawrennd/ods</a></p>
+<p>Once <code>pods</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb29"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb30"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigeria_nmis()[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<p>Alternatively, you can access the data directly with the following
+commands.</p>
+<div class="sourceCode" id="cb31"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib.request</span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span>
+<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
+<p>Once it is loaded in the data can be summarized using the
+<code>describe</code> method in pandas.</p>
+<div class="sourceCode" id="cb32"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>data.describe()</span></code></pre></div>
+<p>We can also find out the dimensions of the dataset using the
+<code>shape</code> property.</p>
+<div class="sourceCode" id="cb33"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>data.shape</span></code></pre></div>
+<p>Dataframes have different functions that you can use to explore and
+understand your data. In python and the Jupyter notebook it is possible
+to see a list of all possible functions and attributes by typing the
+name of the object followed by <code>.&lt;Tab&gt;</code> for example in
+the above case if we type <code>data.&lt;Tab&gt;</code> it show the
+columns available (these are attributes in pandas dataframes) such as
+<code>num_nurses_fulltime</code>, and also functions, such as
+<code>.describe()</code>.</p>
+<p>For functions we can also see the documentation about the function by
+following the name with a question mark. This will open a box with
+documentation at the bottom which can be closed with the x button.</p>
+<div class="sourceCode" id="cb34"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>data.describe?</span></code></pre></div>
 <div class="figure">
-<div id="the-logistic-function-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/logistic.svg" width="80%" style=" ">
-</object>
+<div id="nigerian-health-facilities-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
-<div id="the-logistic-function-magnify" class="magnify" onclick="magnifyFigure(&#39;the-logistic-function&#39;)">
-<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
-</div>
-<div id="the-logistic-function-caption" class="caption-frame">
-<p>Figure: The logistic function.</p>
 </div>
+<div id="nigerian-health-facilities-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigerian-health-facilities&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<p>The function has this characeristic ‘s’-shape (from where the term sigmoid, as in sigma, comes from). It also takes the input from the entire real line and ‘squashes’ it into an output that is between zero and one. For this reason it is sometimes also called a ‘squashing function’.</p>
-<p>By replacing the inverse link with the sigmoid we can write <span class="math inline"><em>π</em></span> as a function of the input and the parameter vector as, <br /><span class="math display">$$
-\pi(\inputVector,\mappingVector) = \frac{1}{1+\exp\left(-\mappingVector^\top \basisVector(\inputVector)\right)}.
-$$</span><br /> The process for logistic regression is as follows. Compute the output of a standard linear basis function composition (<span class="math inline">$\mappingVector^\top \basisVector(\inputVector)$</span>, as we did for linear regression) and then apply the inverse link function, <span class="math inline">$g(\mappingVector^\top \basisVector(\inputVector))$</span>. In logistic regression this involves <em>squashing</em> it with the logistic (or sigmoid) function. Use this value, which now has an interpretation as a <em>probability</em> in a Bernoulli distribution to form the likelihood. Then we can assume conditional independence of each data point given the parameters and develop a likelihod for the entire data set.</p>
-<p>As we discussed last time, the Bernoulli likelihood is of the form, <br /><span class="math display">$$
-P(\dataScalar_i|\mappingVector, \inputVector) =
-\pi_i^{\dataScalar_i} (1-\pi_i)^{1-\dataScalar_i}
-$$</span><br /> which we can think of as clever trick for mathematically switching between two probabilities if we were to write it as code it would be better described as</p>
-<div class="sourceCode" id="cb13"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a><span class="kw">def</span> bernoulli(x, y, pi):</span>
-<span id="cb13-2"><a href="#cb13-2"></a>    <span class="cf">if</span> y <span class="op">==</span> <span class="dv">1</span>:</span>
-<span id="cb13-3"><a href="#cb13-3"></a>        <span class="cf">return</span> pi(x)</span>
-<span id="cb13-4"><a href="#cb13-4"></a>    <span class="cf">else</span>:</span>
-<span id="cb13-5"><a href="#cb13-5"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi(x)</span></code></pre></div>
-<p>but writing it mathematically makes it easier to write our objective function within a single mathematical equation.</p>
-<h2 id="maximum-likelihood">Maximum Likelihood</h2>
-<p>To obtain the parameters of the model, we need to maximize the likelihood, or minimize the objective function, normally taken to be the negative log likelihood. With a data conditional independence assumption the likelihood has the form, <br /><span class="math display">$$
-P(\dataVector|\mappingVector,
-\inputMatrix) = \prod_{i=1}^\numData P(\dataScalar_i|\mappingVector, \inputVector_i). 
-$$</span><br /> which can be written as a log likelihood in the form <br /><span class="math display">$$
-\log P(\dataVector|\mappingVector,
-\inputMatrix) = \sum_{i=1}^\numData \log P(\dataScalar_i|\mappingVector, \inputVector_i) = \sum_{i=1}^\numData
-\dataScalar_i \log \pi_i + \sum_{i=1}^\numData (1-\dataScalar_i)\log (1-\pi_i)
-$$</span><br /> and if we take the probability of positive outcome for the <span class="math inline"><em>i</em></span>th data point to be given by <br /><span class="math display">$$
-\pi_i = g\left(\mappingVector^\top \basisVector(\inputVector_i)\right),
-$$</span><br /> where <span class="math inline"><em>g</em>( ⋅ )</span> is the <em>inverse</em> link function, then this leads to an objective function of the form, <br /><span class="math display">$$
-E(\mappingVector) = -  \sum_{i=1}^\numData \dataScalar_i \log
-g\left(\mappingVector^\top \basisVector(\inputVector_i)\right) -
-\sum_{i=1}^\numData(1-\dataScalar_i)\log \left(1-g\left(\mappingVector^\top
-\basisVector(\inputVector_i)\right)\right).
-$$</span><br /></p>
-<div class="sourceCode" id="cb14"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb15"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1"></a><span class="kw">def</span> objective(g, y):</span>
-<span id="cb15-2"><a href="#cb15-2"></a>    <span class="co">&quot;Computes the objective function.&quot;</span></span>
-<span id="cb15-3"><a href="#cb15-3"></a>    labs <span class="op">=</span> np.asarray(y, dtype<span class="op">=</span><span class="bu">float</span>).flatten()</span>
-<span id="cb15-4"><a href="#cb15-4"></a>    posind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">1</span>)</span>
-<span id="cb15-5"><a href="#cb15-5"></a>    negind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">0</span>)</span>
-<span id="cb15-6"><a href="#cb15-6"></a>    <span class="cf">return</span> <span class="op">-</span>np.log(g[posind, :]).<span class="bu">sum</span>() <span class="op">-</span> np.log(<span class="dv">1</span><span class="op">-</span>g[negind, :]).<span class="bu">sum</span>()</span></code></pre></div>
-<p>As normal, we would like to minimize this objective. This can be done by differentiating with respect to the parameters of our prediction function, <span class="math inline">$\pi(\inputVector;\mappingVector)$</span>, for optimisation. The gradient of the likelihood with respect to <span class="math inline">$\pi(\inputVector;\mappingVector)$</span> is of the form, <br /><span class="math display">$$
-\frac{\text{d}E(\mappingVector)}{\text{d}\mappingVector} = -\sum_{i=1}^\numData
-\frac{\dataScalar_i}{g\left(\mappingVector^\top
-\basisVector(\inputVector)\right)}\frac{\text{d}g(\mappingFunction_i)}{\text{d}\mappingFunction_i}
-\basisVector(\inputVector_i) +  \sum_{i=1}^\numData
-\frac{1-\dataScalar_i}{1-g\left(\mappingVector^\top
-\basisVector(\inputVector)\right)}\frac{\text{d}g(\mappingFunction_i)}{\text{d}\mappingFunction_i}
-\basisVector(\inputVector_i)
-$$</span><br /> where we used the chain rule to develop the derivative in terms of <span class="math inline">$\frac{\text{d}g(\mappingFunction_i)}{\text{d}\mappingFunction_i}$</span>, which is the gradient of the inverse link function (in our case the gradient of the sigmoid function).</p>
-<p>So the objective function now depends on the gradient of the inverse link function, as well as the likelihood depends on the gradient of the inverse link function, as well as the gradient of the log likelihood, and naturally the gradient of the argument of the inverse link function with respect to the parameters, which is simply <span class="math inline">$\basisVector(\inputVector_i)$</span>.</p>
-<p>The only missing term is the gradient of the inverse link function. For the sigmoid squashing function we have, <br /><span class="math display">$$\begin{align*}
-g(\mappingFunction_i) &amp;= \frac{1}{1+\exp(-\mappingFunction_i)}\\
-&amp;=(1+\exp(-\mappingFunction_i))^{-1}
-\end{align*}$$</span><br /> and the gradient can be computed as <br /><span class="math display">$$\begin{align*}
-\frac{\text{d}g(\mappingFunction_i)}{\text{d} \mappingFunction_i} &amp; =
-\exp(-\mappingFunction_i)(1+\exp(-\mappingFunction_i))^{-2}\\
-&amp; = \frac{1}{1+\exp(-\mappingFunction_i)}
-\frac{\exp(-\mappingFunction_i)}{1+\exp(-\mappingFunction_i)} \\
-&amp; = g(\mappingFunction_i) (1-g(\mappingFunction_i))
-\end{align*}$$</span><br /> so the full gradient can be written down as <br /><span class="math display">$$
-\frac{\text{d}E(\mappingVector)}{\text{d}\mappingVector} = -\sum_{i=1}^\numData
-\dataScalar_i\left(1-g\left(\mappingVector^\top \basisVector(\inputVector)\right)\right)
-\basisVector(\inputVector_i) +  \sum_{i=1}^\numData
-(1-\dataScalar_i)\left(g\left(\mappingVector^\top \basisVector(\inputVector)\right)\right)
-\basisVector(\inputVector_i).
-$$</span><br /></p>
-<div class="sourceCode" id="cb16"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb17"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1"></a><span class="kw">def</span> gradient(g, Phi, y):</span>
-<span id="cb17-2"><a href="#cb17-2"></a>    <span class="co">&quot;Generates the gradient of the parameter vector.&quot;</span></span>
-<span id="cb17-3"><a href="#cb17-3"></a>    labs <span class="op">=</span> np.asarray(y, dtype<span class="op">=</span><span class="bu">float</span>).flatten()</span>
-<span id="cb17-4"><a href="#cb17-4"></a>    posind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">1</span>)</span>
-<span id="cb17-5"><a href="#cb17-5"></a>    dw <span class="op">=</span> <span class="op">-</span>(Phi[posind]<span class="op">*</span>(<span class="dv">1</span><span class="op">-</span>g[posind])).<span class="bu">sum</span>(<span class="dv">0</span>)</span>
-<span id="cb17-6"><a href="#cb17-6"></a>    negind <span class="op">=</span> np.where(labs<span class="op">==</span><span class="dv">0</span> )</span>
-<span id="cb17-7"><a href="#cb17-7"></a>    dw <span class="op">+=</span> (Phi[negind]<span class="op">*</span>g[negind]).<span class="bu">sum</span>(<span class="dv">0</span>)</span>
-<span id="cb17-8"><a href="#cb17-8"></a>    <span class="cf">return</span> dw[:, <span class="va">None</span>]</span></code></pre></div>
-<h2 id="optimization-of-the-function">Optimization of the Function</h2>
-<p>Reorganizing the gradient to find a stationary point of the function with respect to the parameters <span class="math inline">$\mappingVector$</span> turns out to be impossible. Optimization has to proceed by <em>numerical methods</em>. Options include the multidimensional variant of <a href="http://en.wikipedia.org/wiki/Newton%27s_method">Newton’s method</a> or <a href="http://en.wikipedia.org/wiki/Gradient_method">gradient based optimization methods</a> like we used for optimizing matrix factorization for the movie recommender system. We recall from matrix factorization that, for large data, <em>stochastic gradient descent</em> or the Robbins Munro <span class="citation" data-cites="Robbins:stoch51">(Robbins and Monro 1951)</span> optimization procedure worked best for function minimization.</p>
-<h2 id="nigerian-nmis-data">Nigerian NMIS Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-classification.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-classification.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>First we will load in the Nigerian NMIS health data. Our aim will be to predict whether a center has maternal health delivery services given the attributes in the data. We will predict of the number of nurses, the number of doctors, location etc.</p>
-<p>Let’s first remind ourselves of the data.</p>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1"></a><span class="im">import</span> urllib.request</span></code></pre></div>
-<div class="sourceCode" id="cb19"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb20"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
-<div class="sourceCode" id="cb21"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
-<p>data.head()}</p>
-<p>Now we will convert this data into a form which we can use as inputs <code>X</code>, and labels <code>y</code>.</p>
-<div class="sourceCode" id="cb22"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb22-2"><a href="#cb22-2"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1"></a>data <span class="op">=</span> data[<span class="op">~</span>pd.isnull(data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>])]</span>
-<span id="cb23-2"><a href="#cb23-2"></a>data <span class="op">=</span> data.dropna() <span class="co"># Remove entries with missing values</span></span>
-<span id="cb23-3"><a href="#cb23-3"></a>X <span class="op">=</span> data[[<span class="st">&#39;emergency_transport&#39;</span>,</span>
-<span id="cb23-4"><a href="#cb23-4"></a>          <span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
-<span id="cb23-5"><a href="#cb23-5"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
-<span id="cb23-6"><a href="#cb23-6"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
-<span id="cb23-7"><a href="#cb23-7"></a>          <span class="st">&#39;num_nurses_fulltime&#39;</span>,</span>
-<span id="cb23-8"><a href="#cb23-8"></a>          <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
-<span id="cb23-9"><a href="#cb23-9"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
-<span id="cb23-10"><a href="#cb23-10"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
-<span id="cb23-11"><a href="#cb23-11"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
-<span id="cb23-12"><a href="#cb23-12"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
-<span id="cb23-13"><a href="#cb23-13"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>, </span>
-<span id="cb23-14"><a href="#cb23-14"></a>          <span class="st">&#39;latitude&#39;</span>, </span>
-<span id="cb23-15"><a href="#cb23-15"></a>          <span class="st">&#39;longitude&#39;</span>]].copy()</span>
-<span id="cb23-16"><a href="#cb23-16"></a>y <span class="op">=</span> data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>]<span class="op">==</span><span class="va">True</span>  <span class="co"># set label to be whether there&#39;s a maternal health delivery service</span></span>
-<span id="cb23-17"><a href="#cb23-17"></a></span>
-<span id="cb23-18"><a href="#cb23-18"></a><span class="co"># Create series of health center types with the relevant index</span></span>
-<span id="cb23-19"><a href="#cb23-19"></a>s <span class="op">=</span> data[<span class="st">&#39;facility_type_display&#39;</span>].<span class="bu">apply</span>(pd.Series, <span class="dv">1</span>).stack() </span>
-<span id="cb23-20"><a href="#cb23-20"></a>s.index <span class="op">=</span> s.index.droplevel(<span class="op">-</span><span class="dv">1</span>) <span class="co"># to line up with df&#39;s index</span></span>
-<span id="cb23-21"><a href="#cb23-21"></a></span>
-<span id="cb23-22"><a href="#cb23-22"></a><span class="co"># Extract from the series the unique list of types.</span></span>
-<span id="cb23-23"><a href="#cb23-23"></a>types <span class="op">=</span> s.unique()</span>
-<span id="cb23-24"><a href="#cb23-24"></a></span>
-<span id="cb23-25"><a href="#cb23-25"></a><span class="co"># For each type extract the indices where it is present and add a column to X</span></span>
-<span id="cb23-26"><a href="#cb23-26"></a>type_names <span class="op">=</span> []</span>
-<span id="cb23-27"><a href="#cb23-27"></a><span class="cf">for</span> htype <span class="kw">in</span> types:</span>
-<span id="cb23-28"><a href="#cb23-28"></a>    index <span class="op">=</span> s[s<span class="op">==</span>htype].index.tolist()</span>
-<span id="cb23-29"><a href="#cb23-29"></a>    type_col<span class="op">=</span>htype.replace(<span class="st">&#39; &#39;</span>, <span class="st">&#39;_&#39;</span>).replace(<span class="st">&#39;/&#39;</span>,<span class="st">&#39;-&#39;</span>).lower()</span>
-<span id="cb23-30"><a href="#cb23-30"></a>    type_names.append(type_col)</span>
-<span id="cb23-31"><a href="#cb23-31"></a>    X.loc[:, type_col] <span class="op">=</span> <span class="fl">0.0</span> </span>
-<span id="cb23-32"><a href="#cb23-32"></a>    X.loc[index, type_col] <span class="op">=</span> <span class="fl">1.0</span></span></code></pre></div>
-<p>This has given us a new data frame <code>X</code> which contains the different facility types in different columns.</p>
-<div class="sourceCode" id="cb24"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1"></a>X.describe()</span></code></pre></div>
+<div id="nigerian-health-facilities-caption" class="caption-frame">
+<p>Figure: Location of the over thirty-four thousand health facilities
+registered in the NMIS data across Nigeria. Each facility plotted
+according to its latitude and longitude.</p>
+</div>
+</div>
+<h2 id="nigeria-nmis-data-classification">Nigeria NMIS Data
+Classification</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Our aim will be to predict whether a center has maternal health
+delivery services given the attributes in the data. We will predict of
+the number of nurses, the number of doctors, location etc.</p>
+<p>Now we will convert this data into a form which we can use as inputs
+<code>X</code>, and labels <code>y</code>.</p>
+<div class="sourceCode" id="cb35"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb36"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data[<span class="op">~</span>pd.isnull(data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>])]</span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data.dropna() <span class="co"># Remove entries with missing values</span></span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> data[[<span class="st">&#39;emergency_transport&#39;</span>,</span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
+<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
+<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
+<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_nurses_fulltime&#39;</span>,</span>
+<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
+<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
+<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
+<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
+<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
+<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>, </span>
+<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;latitude&#39;</span>, </span>
+<span id="cb36-15"><a href="#cb36-15" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;longitude&#39;</span>]].copy()</span>
+<span id="cb36-16"><a href="#cb36-16" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>]<span class="op">==</span><span class="va">True</span>  <span class="co"># set label to be whether there&#39;s a maternal health delivery service</span></span>
+<span id="cb36-17"><a href="#cb36-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-18"><a href="#cb36-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Create series of health center types with the relevant index</span></span>
+<span id="cb36-19"><a href="#cb36-19" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> data[<span class="st">&#39;facility_type_display&#39;</span>].<span class="bu">apply</span>(pd.Series, <span class="dv">1</span>).stack() </span>
+<span id="cb36-20"><a href="#cb36-20" aria-hidden="true" tabindex="-1"></a>s.index <span class="op">=</span> s.index.droplevel(<span class="op">-</span><span class="dv">1</span>) <span class="co"># to line up with df&#39;s index</span></span>
+<span id="cb36-21"><a href="#cb36-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-22"><a href="#cb36-22" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract from the series the unique list of types.</span></span>
+<span id="cb36-23"><a href="#cb36-23" aria-hidden="true" tabindex="-1"></a>types <span class="op">=</span> s.unique()</span>
+<span id="cb36-24"><a href="#cb36-24" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-25"><a href="#cb36-25" aria-hidden="true" tabindex="-1"></a><span class="co"># For each type extract the indices where it is present and add a column to X</span></span>
+<span id="cb36-26"><a href="#cb36-26" aria-hidden="true" tabindex="-1"></a>type_names <span class="op">=</span> []</span>
+<span id="cb36-27"><a href="#cb36-27" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> htype <span class="kw">in</span> types:</span>
+<span id="cb36-28"><a href="#cb36-28" aria-hidden="true" tabindex="-1"></a>    index <span class="op">=</span> s[s<span class="op">==</span>htype].index.tolist()</span>
+<span id="cb36-29"><a href="#cb36-29" aria-hidden="true" tabindex="-1"></a>    type_col<span class="op">=</span>htype.replace(<span class="st">&#39; &#39;</span>, <span class="st">&#39;_&#39;</span>).replace(<span class="st">&#39;/&#39;</span>,<span class="st">&#39;-&#39;</span>).lower()</span>
+<span id="cb36-30"><a href="#cb36-30" aria-hidden="true" tabindex="-1"></a>    type_names.append(type_col)</span>
+<span id="cb36-31"><a href="#cb36-31" aria-hidden="true" tabindex="-1"></a>    X.loc[:, type_col] <span class="op">=</span> <span class="fl">0.0</span> </span>
+<span id="cb36-32"><a href="#cb36-32" aria-hidden="true" tabindex="-1"></a>    X.loc[index, type_col] <span class="op">=</span> <span class="fl">1.0</span></span></code></pre></div>
+<p>This has given us a new data frame <code>X</code> which contains the
+different facility types in different columns.</p>
+<div class="sourceCode" id="cb37"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>X.describe()</span></code></pre></div>
 <h2 id="batch-gradient-descent">Batch Gradient Descent</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/logistic-regression-gradient-descent.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/logistic-regression-gradient-descent.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>We will need to define some initial random values for our vector and then minimize the objective by descending the gradient.</p>
-<div class="sourceCode" id="cb25"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1"></a><span class="co"># Separate train and test</span></span>
-<span id="cb25-2"><a href="#cb25-2"></a>indices <span class="op">=</span> np.random.permutation(X.shape[<span class="dv">0</span>])</span>
-<span id="cb25-3"><a href="#cb25-3"></a>num_train <span class="op">=</span> np.ceil(X.shape[<span class="dv">0</span>]<span class="op">/</span><span class="dv">2</span>)r</span>
-<span id="cb25-4"><a href="#cb25-4"></a>train_indices <span class="op">=</span> indices[:num_train]</span>
-<span id="cb25-5"><a href="#cb25-5"></a>test_indices <span class="op">=</span> indices[num_train:]</span>
-<span id="cb25-6"><a href="#cb25-6"></a>X_train <span class="op">=</span> X.iloc[train_indices]</span>
-<span id="cb25-7"><a href="#cb25-7"></a>y_train <span class="op">=</span> y.iloc[train_indices]<span class="op">==</span><span class="va">True</span></span>
-<span id="cb25-8"><a href="#cb25-8"></a>X_test <span class="op">=</span> X.iloc[test_indices]</span>
-<span id="cb25-9"><a href="#cb25-9"></a>y_test <span class="op">=</span> y.iloc[test_indices]<span class="op">==</span><span class="va">True</span></span></code></pre></div>
-<div class="sourceCode" id="cb26"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb27"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1"></a><span class="co"># gradient descent algorithm</span></span>
-<span id="cb27-2"><a href="#cb27-2"></a>w <span class="op">=</span> np.random.normal(size<span class="op">=</span>(X.shape[<span class="dv">1</span>]<span class="op">+</span><span class="dv">1</span>, <span class="dv">1</span>), scale <span class="op">=</span> <span class="fl">0.001</span>)</span>
-<span id="cb27-3"><a href="#cb27-3"></a>eta <span class="op">=</span> <span class="fl">1e-9</span></span>
-<span id="cb27-4"><a href="#cb27-4"></a>iters <span class="op">=</span> <span class="dv">10000</span></span>
-<span id="cb27-5"><a href="#cb27-5"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(iters):</span>
-<span id="cb27-6"><a href="#cb27-6"></a>    g, Phi <span class="op">=</span> predict(w, X_train, linear)</span>
-<span id="cb27-7"><a href="#cb27-7"></a>    w <span class="op">-=</span> eta<span class="op">*</span>gradient(g, Phi, y_train) <span class="op">+</span> <span class="fl">0.001</span><span class="op">*</span>w</span>
-<span id="cb27-8"><a href="#cb27-8"></a>    <span class="cf">if</span> <span class="kw">not</span> i <span class="op">%</span> <span class="dv">100</span>:</span>
-<span id="cb27-9"><a href="#cb27-9"></a>        <span class="bu">print</span>(<span class="st">&quot;Iter&quot;</span>, i, <span class="st">&quot;Objective&quot;</span>, objective(g, y_train))</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression-gradient-descent.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression-gradient-descent.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>We will need to define some initial random values for our vector and
+then minimize the objective by descending the gradient.</p>
+<div class="sourceCode" id="cb38"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Separate train and test</span></span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>indices <span class="op">=</span> np.random.permutation(X.shape[<span class="dv">0</span>])</span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>num_train <span class="op">=</span> np.ceil(X.shape[<span class="dv">0</span>]<span class="op">/</span><span class="dv">2</span>)r</span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a>train_indices <span class="op">=</span> indices[:num_train]</span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>test_indices <span class="op">=</span> indices[num_train:]</span>
+<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a>X_train <span class="op">=</span> X.iloc[train_indices]</span>
+<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>y_train <span class="op">=</span> y.iloc[train_indices]<span class="op">==</span><span class="va">True</span></span>
+<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a>X_test <span class="op">=</span> X.iloc[test_indices]</span>
+<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a>y_test <span class="op">=</span> y.iloc[test_indices]<span class="op">==</span><span class="va">True</span></span></code></pre></div>
+<div class="sourceCode" id="cb39"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb40"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="co"># gradient descent algorithm</span></span>
+<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> np.random.normal(size<span class="op">=</span>(X.shape[<span class="dv">1</span>]<span class="op">+</span><span class="dv">1</span>, <span class="dv">1</span>), scale <span class="op">=</span> <span class="fl">0.001</span>)</span>
+<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a>eta <span class="op">=</span> <span class="fl">1e-9</span></span>
+<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a>iters <span class="op">=</span> <span class="dv">10000</span></span>
+<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(iters):</span>
+<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a>    g, Phi <span class="op">=</span> predict(w, X_train, linear)</span>
+<span id="cb40-7"><a href="#cb40-7" aria-hidden="true" tabindex="-1"></a>    w <span class="op">-=</span> eta<span class="op">*</span>gradient(g, Phi, y_train) <span class="op">+</span> <span class="fl">0.001</span><span class="op">*</span>w</span>
+<span id="cb40-8"><a href="#cb40-8" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> <span class="kw">not</span> i <span class="op">%</span> <span class="dv">100</span>:</span>
+<span id="cb40-9"><a href="#cb40-9" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="st">&quot;Iter&quot;</span>, i, <span class="st">&quot;Objective&quot;</span>, objective(g, y_train))</span></code></pre></div>
 <p>Let’s look at the weights and how they relate to the inputs.</p>
-<div class="sourceCode" id="cb28"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
-<div class="sourceCode" id="cb29"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1"></a><span class="bu">print</span>(w)</span></code></pre></div>
-<p>What does the magnitude of the weight vectors tell you about the different parameters and their influence on outcome? Are the weights of roughly the same size, if not, how might you fix this?</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1"></a>g_test, Phi_test <span class="op">=</span> predict(w, X_test, linear)</span>
-<span id="cb30-2"><a href="#cb30-2"></a>np.<span class="bu">sum</span>(g_test[y_test]<span class="op">&gt;</span><span class="fl">0.5</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb41"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
+<div class="sourceCode" id="cb42"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(w)</span></code></pre></div>
+<p>What does the magnitude of the weight vectors tell you about the
+different parameters and their influence on outcome? Are the weights of
+roughly the same size, if not, how might you fix this?</p>
+<div class="sourceCode" id="cb43"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>g_test, Phi_test <span class="op">=</span> predict(w, X_test, linear)</span>
+<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">sum</span>(g_test[y_test]<span class="op">&gt;</span><span class="fl">0.5</span>)</span></code></pre></div>
 <h2 id="stochastic-gradient-descent">Stochastic Gradient Descent</h2>
-<h3 id="exercise-2">Exercise 2</h3>
-<p>Now construct a stochastic gradient descent algorithm and run it on the data. Is it faster or slower than batch gradient descent? What can you do to improve convergence speed?</p>
+<h3 id="exercise-1">Exercise 1</h3>
+<p>Now construct a stochastic gradient descent algorithm and run it on
+the data. Is it faster or slower than batch gradient descent? What can
+you do to improve convergence speed?</p>
 <h2 id="regression">Regression</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/regression-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/regression-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Classification is the case where our prediction function gives a discrete valued output, normally associated with a ‘class’. Regression is an alternative approach where the aim is to predict a <em>continuous output</em>.</p>
-<p>The name is a historical accident, it would be better to call regression ‘curve fitting’, or even split it into two parts ‘interpolation’, which is the practice of predicting a function value between existing data, and ‘extrapolation’, which is the practice of predicting a function value beyond the regime where we have data.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Classification is the case where our prediction function gives a
+discrete valued output, normally associated with a ‘class’. Regression
+is an alternative approach where the aim is to predict a <em>continuous
+output</em>.</p>
+<p>The name is a historical accident, it would be better to call
+regression ‘curve fitting’, or even split it into two parts
+‘interpolation’, which is the practice of predicting a function value
+between existing data, and ‘extrapolation’, which is the practice of
+predicting a function value beyond the regime where we have data.</p>
 <h2 id="regression-examples">Regression Examples</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/regression-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/regression-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Regression involves predicting a real value, <span class="math inline">$\dataScalar_i$</span>, given an input vector, <span class="math inline">$\inputVector_i$</span>. For example, the Tecator data involves predicting the quality of meat given spectral measurements. Or in radiocarbon dating, the C14 calibration curve maps from radiocarbon age to age measured through a back-trace of tree rings. Regression has also been used to predict the quality of board game moves given expert rated training data.</p>
-<h2 id="supervised-learning-challenges">Supervised Learning Challenges</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/supervised-learning-challenges.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/supervised-learning-challenges.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>There are three principal challenges in constructing a problem for supervised learning.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Regression involves predicting a real value, <span
+class="math inline">\(y_i\)</span>, given an input vector, <span
+class="math inline">\(\mathbf{ x}_i\)</span>. For example, the Tecator
+data involves predicting the quality of meat given spectral
+measurements. Or in radiocarbon dating, the C14 calibration curve maps
+from radiocarbon age to age measured through a back-trace of tree rings.
+Regression has also been used to predict the quality of board game moves
+given expert rated training data.</p>
+<h2 id="supervised-learning-challenges">Supervised Learning
+Challenges</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning-challenges.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning-challenges.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>There are three principal challenges in constructing a problem for
+supervised learning.</p>
 <ol type="1">
-<li>choosing which features, <span class="math inline">$\inputVector$</span>, are relevant in the prediction</li>
-<li>defining the appropriate <em>class of function</em>, <span class="math inline">$\mappingFunction(\cdot)$</span>.</li>
-<li>selecting the right parameters, <span class="math inline">$\weightVector$</span>.</li>
+<li>choosing which features, <span class="math inline">\(\mathbf{
+x}\)</span>, are relevant in the prediction</li>
+<li>defining the appropriate <em>class of function</em>, <span
+class="math inline">\(f(\cdot)\)</span>.</li>
+<li>selecting the right parameters, <span class="math inline">\(\mathbf{
+w}\)</span>.</li>
 </ol>
 <h2 id="feature-selection">Feature Selection</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/feature-selection.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/feature-selection.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Feature selection is a critical stage in the algorithm design process. In the Olympic prediction example above we’re only using time to predict the the pace of the runners. In practice we might also want to use characteristics of the course: how hilly it is, what the temperature was when the race was run. In 1904 the runners actually got lost during the race. Should we include ‘lost’ as a feature? It would certainly help explain the particularly slow time in 1904. The features we select should be ones we expect to correlate with the prediction. In statistics, these features are even called <em>predictors</em> which highlights their role in developing the prediction function. For Facebook newsfeed, we might use features that include how close your friendship is with the poster, or how often you react to that poster, or whether a photo is included in the post.</p>
-<p>Sometimes we use feature selection algorithms, algorithms that automate the process of finding the features that we need. Classification is often used to rank search results, to decide which adverts to serve or, at Facebook, to determine what appears at the top of your newsfeed. In the Facebook example features might include how many likes a post has had, whether it has an image in it, whether you regularly interact with the friend who has posted. A good newsfeed ranking algorithm is critical to Facebook’s success, just as good ad serving choice is critical to Google’s success. These algorithms are in turn highly dependent on the feature sets used. Facebook in particular has made heavy investments in machine learning pipelines for evaluation of the feature utility.</p>
-<h2 id="class-of-function-mappingfunctioncdot">Class of Function, <span class="math inline">$\mappingFunction(\cdot)$</span></h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/class-of-function.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/class-of-function.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>By class of function we mean, what are the characteristics of the mapping between <span class="math inline"><strong>x</strong></span> and <span class="math inline"><em>y</em></span>. Often, we might choose it to be a smooth function. Sometimes we will choose it to be a linear function. If the prediction is a forecast, for example the demand of a particular product, then the function would need some periodic components to reflect seasonal or weekly effects.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/feature-selection.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/feature-selection.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Feature selection is a critical stage in the algorithm design
+process. In the Olympic prediction example above we’re only using time
+to predict the the pace of the runners. In practice we might also want
+to use characteristics of the course: how hilly it is, what the
+temperature was when the race was run. In 1904 the runners actually got
+lost during the race. Should we include ‘lost’ as a feature? It would
+certainly help explain the particularly slow time in 1904. The features
+we select should be ones we expect to correlate with the prediction. In
+statistics, these features are even called <em>predictors</em> which
+highlights their role in developing the prediction function. For
+Facebook newsfeed, we might use features that include how close your
+friendship is with the poster, or how often you react to that poster, or
+whether a photo is included in the post.</p>
+<p>Sometimes we use feature selection algorithms, algorithms that
+automate the process of finding the features that we need.
+Classification is often used to rank search results, to decide which
+adverts to serve or, at Facebook, to determine what appears at the top
+of your newsfeed. In the Facebook example features might include how
+many likes a post has had, whether it has an image in it, whether you
+regularly interact with the friend who has posted. A good newsfeed
+ranking algorithm is critical to Facebook’s success, just as good ad
+serving choice is critical to Google’s success. These algorithms are in
+turn highly dependent on the feature sets used. Facebook in particular
+has made heavy investments in machine learning pipelines for evaluation
+of the feature utility.</p>
+<h2 id="class-of-function-fcdot">Class of Function, <span
+class="math inline">\(f(\cdot)\)</span></h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>By class of function we mean, what are the characteristics of the
+mapping between <span class="math inline">\(\mathbf{x}\)</span> and
+<span class="math inline">\(y\)</span>. Often, we might choose it to be
+a smooth function. Sometimes we will choose it to be a linear function.
+If the prediction is a forecast, for example the demand of a particular
+product, then the function would need some periodic components to
+reflect seasonal or weekly effects.</p>
 <h2 id="analysis-of-us-birth-rates">Analysis of US Birth Rates</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/bda-forecasting.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/bda-forecasting.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/bda-forecasting.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/bda-forecasting.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<div class="centered" style="">
+<svg viewBox="0 0 200 200" style="width:15%">
+<defs> <clipPath id="clip3">
+<style>
+circle {
+  fill: black;
+}
+</style>
+<circle cx="100" cy="100" r="100"/> </clipPath> </defs>
+<title>
+Aki Vehtari
+</title>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/aki-vehtari.jpg" clip-path="url(#clip3)"/>
+</svg>
+</div>
 <div class="figure">
 <div id="bialik-friday-the-13th-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bialik-fridaythe13th-1.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bialik-fridaythe13th-1.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="bialik-friday-the-13th-magnify" class="magnify" onclick="magnifyFigure(&#39;bialik-friday-the-13th&#39;)">
+<div id="bialik-friday-the-13th-magnify" class="magnify"
+onclick="magnifyFigure(&#39;bialik-friday-the-13th&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="bialik-friday-the-13th-caption" class="caption-frame">
-<p>Figure: This is a retrospective analysis of US births by Aki Vehtari. The challenges of forecasting. Even with seasonal and weekly effects removed there are significant effects on holidays, weekends, etc.</p>
+<p>Figure: This is a retrospective analysis of US births by Aki Vehtari.
+The challenges of forecasting. Even with seasonal and weekly effects
+removed there are significant effects on holidays, weekends, etc.</p>
 </div>
 </div>
-<p>There’s a nice analysis of US birth rates by Gaussian processes with additive covariances in <span class="citation" data-cites="Gelman:bayesian13">Gelman et al. (2013)</span>. A combination of covariance functions are used to take account of weekly and yearly trends. The analysis is summarized on the cover of the book.</p>
+<p>There’s a nice analysis of US birth rates by Gaussian processes with
+additive covariances in <span class="citation"
+data-cites="Gelman:bayesian13">Gelman et al. (2013)</span>. A
+combination of covariance functions are used to take account of weekly
+and yearly trends. The analysis is summarized on the cover of the
+book.</p>
 <div class="figure">
 <div id="bayesian-data-analysis-figure" class="figure-frame">
 <table>
 <tr>
 <td width="50%">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bda_cover_1.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover_1.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="50%">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bda_cover.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 </tr>
 </table>
 </div>
-<div id="bayesian-data-analysis-magnify" class="magnify" onclick="magnifyFigure(&#39;bayesian-data-analysis&#39;)">
+<div id="bayesian-data-analysis-magnify" class="magnify"
+onclick="magnifyFigure(&#39;bayesian-data-analysis&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="bayesian-data-analysis-caption" class="caption-frame">
-<p>Figure: Two different editions of Bayesian Data Analysis <span class="citation" data-cites="Gelman:bayesian13">(Gelman et al. 2013)</span>.</p>
-</div>
-</div>
-<p>In the ImageNet challenge the input, <span class="math inline">$\inputVector$</span>, was in the form of an image. And the form of the prediction function was a <em>convolutional neural network</em> (more on this later). A convolutional neural network introduces <em>invariances</em> into the function that are particular to image classification. An invariance is a transformation of the input that we don’t want to affect the output. For example, a cat in an image is still a cat no matter where it’s located in the image (translation). The cat is also a cat regardless of how large it is (scale), or whether it’s upside-down (rotation). Convolutional neural networks encode these invariances: scale invariance, rotation invariance and translation invariance; in the mathematical function.</p>
-<p>Encoding invariance in the prediction function is like encoding knowledge in the model. If we don’t specify these invariances, then the model must learn them. This will require a lot more data to achieve the same performance, making the model less data efficient. Note that one invariance that is <em>not</em> encoded in a convolutional network is invariance to camera type. As a result, practitioners need to be careful to ensure that their training data is representative of the type of cameras that will be used when the model is deployed.</p>
-<p>In general the prediction function could be any set of parameterized functions. In the Olympic marathon data example above we used a polynomial fit, <br /><span class="math display">$$
-\mappingFunction(\inputScalar) = \weightScalar_0 + \weightScalar_1 \inputScalar+ \weightScalar_2 \inputScalar^2 + \weightScalar_3 \inputScalar^3 + \weightScalar_4 \inputScalar^4.
-$$</span><br /> The Olympic example is also a supervised learning challenge. But it is a <em>regression</em> problem. A regression problem is one where the output is a continuous value (such as the pace in the marathon). In classification the output is constrained to be discrete. For example, classifying whether or not an image contains a dog implies the output is binary. An early example of a regression problem used in machine learning was <a href="http://lib.stat.cmu.edu/datasets/tecator">the Tecator data</a>, where the fat, water and protein content of meat samples was predicted as a function of the absorption of infrared light.</p>
-<h2 id="class-of-function-neural-networks">Class of Function: Neural Networks</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/class-of-function-neural-network.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/class-of-function-neural-network.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>One class of function that has become popular recently is neural network functions, in particular deep neural networks. The ImageNet challenge uses <em>convolutional neural networks</em> which introduce a <em>translation invariance</em> to the prediction function.</p>
-<p>It’s impressive that only this additional invariance is enough to improve performance so much, particularly when we know that rotational invariances and scale invariances are also applicable for object detection in images.</p>
+<p>Figure: Two different editions of Bayesian Data Analysis <span
+class="citation" data-cites="Gelman:bayesian13">(Gelman et al.,
+2013)</span>.</p>
+</div>
+</div>
+<p>In the ImageNet challenge the input, <span
+class="math inline">\(\mathbf{ x}\)</span>, was in the form of an image.
+And the form of the prediction function was a <em>convolutional neural
+network</em> (more on this later). A convolutional neural network
+introduces <em>invariances</em> into the function that are particular to
+image classification. An invariance is a transformation of the input
+that we don’t want to affect the output. For example, a cat in an image
+is still a cat no matter where it’s located in the image (translation).
+The cat is also a cat regardless of how large it is (scale), or whether
+it’s upside-down (rotation). Convolutional neural networks encode these
+invariances: scale invariance, rotation invariance and translation
+invariance; in the mathematical function.</p>
+<p>Encoding invariance in the prediction function is like encoding
+knowledge in the model. If we don’t specify these invariances, then the
+model must learn them. This will require a lot more data to achieve the
+same performance, making the model less data efficient. Note that one
+invariance that is <em>not</em> encoded in a convolutional network is
+invariance to camera type. As a result, practitioners need to be careful
+to ensure that their training data is representative of the type of
+cameras that will be used when the model is deployed.</p>
+<p>In general the prediction function could be any set of parameterized
+functions. In the Olympic marathon data example above we used a
+polynomial fit, <span class="math display">\[
+f(x) = w_0 + w_1 x+ w_2 x^2 + w_3 x^3 + w_4 x^4.
+\]</span> The Olympic example is also a supervised learning challenge.
+But it is a <em>regression</em> problem. A regression problem is one
+where the output is a continuous value (such as the pace in the
+marathon). In classification the output is constrained to be discrete.
+For example, classifying whether or not an image contains a dog implies
+the output is binary. An early example of a regression problem used in
+machine learning was <a
+href="http://lib.stat.cmu.edu/datasets/tecator">the Tecator data</a>,
+where the fat, water and protein content of meat samples was predicted
+as a function of the absorption of infrared light.</p>
+<h2 id="class-of-function-neural-networks">Class of Function: Neural
+Networks</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function-neural-network.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function-neural-network.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>One class of function that has become popular recently is neural
+network functions, in particular deep neural networks. The ImageNet
+challenge uses <em>convolutional neural networks</em> which introduce a
+<em>translation invariance</em> to the prediction function.</p>
+<p>It’s impressive that only this additional invariance is enough to
+improve performance so much, particularly when we know that rotational
+invariances and scale invariances are also applicable for object
+detection in images.</p>
 <h1 id="deep-learning">Deep Learning</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-learning-overview.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-learning-overview.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Classical statistical models and simple machine learning models have a great deal in common. The main difference between the fields is philosophical. Machine learning practitioners are typically more concerned with the quality of prediciton (e.g. measured by ROC curve) while statisticians tend to focus more on the interpretability of the model and the validity of any decisions drawn from that interpretation. For example, a statistical model may be used to validate whether a large scale intervention (such as the mass provision of mosquito nets) has had a long term effect on disease (such as malaria). In this case one of the covariates is likely to be the provision level of nets in a particular region. The response variable would be the rate of malaria disease in the region. The parmaeter, <span class="math inline"><em>β</em><sub>1</sub></span> associated with that covariate will demonstrate a positive or negative effect which would be validated in answering the question. The focus in statistics would be less on the accuracy of the response variable and more on the validity of the interpretation of the effect variable, <span class="math inline"><em>β</em><sub>1</sub></span>.</p>
-<p>A machine learning practitioner on the other hand would typically denote the parameter <span class="math inline"><em>w</em><sub>1</sub></span>, instead of <span class="math inline"><em>β</em><sub>1</sub></span> and would only be interested in the output of the prediction function, <span class="math inline">$\mappingFunction(\cdot)$</span> rather than the parameter itself. The general formalism of the prediction function allows for <em>non-linear</em> models. In machine learning, the emphasis on prediction over interpretability means that non-linear models are often used. The parameters, <span class="math inline"><strong>w</strong></span>, are a means to an end (good prediction) rather than an end in themselves (interpretable).</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-overview.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-overview.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Classical statistical models and simple machine learning models have
+a great deal in common. The main difference between the fields is
+philosophical. Machine learning practitioners are typically more
+concerned with the quality of prediciton (e.g. measured by ROC curve)
+while statisticians tend to focus more on the interpretability of the
+model and the validity of any decisions drawn from that interpretation.
+For example, a statistical model may be used to validate whether a large
+scale intervention (such as the mass provision of mosquito nets) has had
+a long term effect on disease (such as malaria). In this case one of the
+covariates is likely to be the provision level of nets in a particular
+region. The response variable would be the rate of malaria disease in
+the region. The parmaeter, <span class="math inline">\(\beta_1\)</span>
+associated with that covariate will demonstrate a positive or negative
+effect which would be validated in answering the question. The focus in
+statistics would be less on the accuracy of the response variable and
+more on the validity of the interpretation of the effect variable, <span
+class="math inline">\(\beta_1\)</span>.</p>
+<p>A machine learning practitioner on the other hand would typically
+denote the parameter <span class="math inline">\(w_1\)</span>, instead
+of <span class="math inline">\(\beta_1\)</span> and would only be
+interested in the output of the prediction function, <span
+class="math inline">\(f(\cdot)\)</span> rather than the parameter
+itself. The general formalism of the prediction function allows for
+<em>non-linear</em> models. In machine learning, the emphasis on
+prediction over interpretability means that non-linear models are often
+used. The parameters, <span class="math inline">\(\mathbf{w}\)</span>,
+are a means to an end (good prediction) rather than an end in themselves
+(interpretable).</p>
 <!-- No slide titles in this context -->
 <h2 id="deepface">DeepFace</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-face.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-face.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-face.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-face.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="deep-face-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/deepface_neg.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//deepface_neg.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="deep-face-magnify" class="magnify" onclick="magnifyFigure(&#39;deep-face&#39;)">
+<div id="deep-face-magnify" class="magnify"
+onclick="magnifyFigure(&#39;deep-face&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="deep-face-caption" class="caption-frame">
-<p>Figure: The DeepFace architecture <span class="citation" data-cites="Taigman:deepface14">(Taigman et al. 2014)</span>, visualized through colors to represent the functional mappings at each layer. There are 120 million parameters in the model.</p>
-</div>
-</div>
-<p>The DeepFace architecture <span class="citation" data-cites="Taigman:deepface14">(Taigman et al. 2014)</span> consists of layers that deal with <em>translation</em> and <em>rotational</em> invariances. These layers are followed by three locally-connected layers and two fully-connected layers. Color illustrates feature maps produced at each layer. The neural network includes more than 120 million parameters, where more than 95% come from the local and fully connected layers.</p>
+<p>Figure: The DeepFace architecture <span class="citation"
+data-cites="Taigman:deepface14">(Taigman et al., 2014)</span>,
+visualized through colors to represent the functional mappings at each
+layer. There are 120 million parameters in the model.</p>
+</div>
+</div>
+<p>The DeepFace architecture <span class="citation"
+data-cites="Taigman:deepface14">(Taigman et al., 2014)</span> consists
+of layers that deal with <em>translation</em> invariances, known as
+convolutional layers. These layers are followed by three
+locally-connected layers and two fully-connected layers. Color
+illustrates feature maps produced at each layer. The neural network
+includes more than 120 million parameters, where more than 95% come from
+the local and fully connected layers.</p>
 <h3 id="deep-learning-as-pinball">Deep Learning as Pinball</h3>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-learning-as-pinball.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deep-learning-as-pinball.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-as-pinball.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-as-pinball.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="early-pinball-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/576px-Early_Pinball.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//576px-Early_Pinball.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="early-pinball-magnify" class="magnify" onclick="magnifyFigure(&#39;early-pinball&#39;)">
+<div id="early-pinball-magnify" class="magnify"
+onclick="magnifyFigure(&#39;early-pinball&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="early-pinball-caption" class="caption-frame">
-<p>Figure: Deep learning models are composition of simple functions. We can think of a pinball machine as an analogy. Each layer of pins corresponds to one of the layers of functions in the model. Input data is represented by the location of the ball from left to right when it is dropped in from the top. Output class comes from the position of the ball as it leaves the pins at the bottom.</p>
-</div>
-</div>
-<p>Sometimes deep learning models are described as being like the brain, or too complex to understand, but one analogy I find useful to help the gist of these models is to think of them as being similar to early pin ball machines.</p>
-<p>In a deep neural network, we input a number (or numbers), whereas in pinball, we input a ball.</p>
-<p>Think of the location of the ball on the left-right axis as a single number. Our simple pinball machine can only take one number at a time. As the ball falls through the machine, each layer of pins can be thought of as a different layer of ‘neurons’. Each layer acts to move the ball from left to right.</p>
-<p>In a pinball machine, when the ball gets to the bottom it might fall into a hole defining a score, in a neural network, that is equivalent to the decision: a classification of the input object.</p>
-<p>An image has more than one number associated with it, so it is like playing pinball in a <em>hyper-space</em>.</p>
+<p>Figure: Deep learning models are composition of simple functions. We
+can think of a pinball machine as an analogy. Each layer of pins
+corresponds to one of the layers of functions in the model. Input data
+is represented by the location of the ball from left to right when it is
+dropped in from the top. Output class comes from the position of the
+ball as it leaves the pins at the bottom.</p>
+</div>
+</div>
+<p>Sometimes deep learning models are described as being like the brain,
+or too complex to understand, but one analogy I find useful to help the
+gist of these models is to think of them as being similar to early pin
+ball machines.</p>
+<p>In a deep neural network, we input a number (or numbers), whereas in
+pinball, we input a ball.</p>
+<p>Think of the location of the ball on the left-right axis as a single
+number. Our simple pinball machine can only take one number at a time.
+As the ball falls through the machine, each layer of pins can be thought
+of as a different layer of ‘neurons’. Each layer acts to move the ball
+from left to right.</p>
+<p>In a pinball machine, when the ball gets to the bottom it might fall
+into a hole defining a score, in a neural network, that is equivalent to
+the decision: a classification of the input object.</p>
+<p>An image has more than one number associated with it, so it is like
+playing pinball in a <em>hyper-space</em>.</p>
 <div class="figure">
 <div id="pinball-initialization-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/pinball001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//pinball001.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="pinball-initialization-magnify" class="magnify" onclick="magnifyFigure(&#39;pinball-initialization&#39;)">
+<div id="pinball-initialization-magnify" class="magnify"
+onclick="magnifyFigure(&#39;pinball-initialization&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="pinball-initialization-caption" class="caption-frame">
-<p>Figure: At initialization, the pins, which represent the parameters of the function, aren’t in the right place to bring the balls to the correct decisions.</p>
+<p>Figure: At initialization, the pins, which represent the parameters
+of the function, aren’t in the right place to bring the balls to the
+correct decisions.</p>
 </div>
 </div>
 <div class="figure">
 <div id="pinball-trained-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/pinball002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//pinball002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="pinball-trained-magnify" class="magnify" onclick="magnifyFigure(&#39;pinball-trained&#39;)">
+<div id="pinball-trained-magnify" class="magnify"
+onclick="magnifyFigure(&#39;pinball-trained&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="pinball-trained-caption" class="caption-frame">
-<p>Figure: After learning the pins are now in the right place to bring the balls to the correct decisions.</p>
-</div>
-</div>
-<p>Learning involves moving all the pins to be in the correct position, so that the ball ends up in the right place when it’s fallen through the machine. But moving all these pins in hyperspace can be difficult.</p>
-<p>In a hyper-space you have to put a lot of data through the machine for to explore the positions of all the pins. Even when you feed many millions of data points through the machine, there are likely to be regions in the hyper-space where no ball has passed. When future test data passes through the machine in a new route unusual things can happen.</p>
-<p><em>Adversarial examples</em> exploit this high dimensional space. If you have access to the pinball machine, you can use gradient methods to find a position for the ball in the hyper space where the image looks like one thing, but will be classified as another.</p>
-<p>Probabilistic methods explore more of the space by considering a range of possible paths for the ball through the machine. This helps to make them more data efficient and gives some robustness to adversarial examples.</p>
+<p>Figure: After learning the pins are now in the right place to bring
+the balls to the correct decisions.</p>
+</div>
+</div>
+<p>Learning involves moving all the pins to be in the correct position,
+so that the ball ends up in the right place when it’s fallen through the
+machine. But moving all these pins in hyperspace can be difficult.</p>
+<p>In a hyper-space you have to put a lot of data through the machine
+for to explore the positions of all the pins. Even when you feed many
+millions of data points through the machine, there are likely to be
+regions in the hyper-space where no ball has passed. When future test
+data passes through the machine in a new route unusual things can
+happen.</p>
+<p><em>Adversarial examples</em> exploit this high dimensional space. If
+you have access to the pinball machine, you can use gradient methods to
+find a position for the ball in the hyper space where the image looks
+like one thing, but will be classified as another.</p>
+<p>Probabilistic methods explore more of the space by considering a
+range of possible paths for the ball through the machine. This helps to
+make them more data efficient and gives some robustness to adversarial
+examples.</p>
 <h2 id="encoding-knowledge">Encoding Knowledge</h2>
-<p>Knowledge that is not encoded in the prediction function must be learned through data. So any unspecified invariance (such as rotational or scale invariances) must be learned through the data. This means that learning would require a lot more data than otherwise would be necessary and results in less data efficient algorithms.</p>
-<p>The choice of predication funciton and invariances is therefore a critical stage in designing your machine learning algorithm. Unfortunately many invariances are non-trivial to incorporate and many machine learning algorithms focus on simpler concepts such as linearity or smoothness.</p>
-<h2 id="parameter-estimation-objective-functions">Parameter Estimation: Objective Functions</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/parameter-estimation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/parameter-estimation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Once we have a set of features, and the class of functions we use is determined, we need to find the parameters of the model.</p>
-<p>The parameters of the model, <span class="math inline">$\weightVector$</span>, are estimated by specifying an <em>objective function</em>. The objective function specifies the quality of the match between the prediction function and the <em>training data</em>. In supervised learning the objective function incorporates both the input data (in the ImageNet data the image, in the Olympic marathon data the year of the marathon) and a <em>label</em>.</p>
-<p>The label is where the term supervised learning comes from. The idea being that a supervisor, or annotator, has already looked at the data and given it labels. For regression problem, a typical objective function is the <em>squared error</em>, <br /><span class="math display">$$
-\errorFunction(\weightVector) = \sum_{i=1}^\numData (\dataScalar_i - \mappingFunction(\inputVector_i))^2
-$$</span><br /> where the data is provided to us as a set of <span class="math inline"><em>n</em></span> inputs, <span class="math inline">$\inputVector_1$</span>, <span class="math inline">$\inputVector_2$</span>, <span class="math inline">$\inputVector_3$</span>, <span class="math inline">…</span>, <span class="math inline">$\inputVector_n$</span> each one with an associated label, <span class="math inline">$\dataScalar_1$</span>, <span class="math inline">$\dataScalar_2$</span>, <span class="math inline">$\dataScalar_3$</span>, <span class="math inline">…</span>, <span class="math inline">$\dataScalar_\numData$</span>. Sometimes the label is cheap to acquire. For example, in Newsfeed ranking Facebook are acquiring a label each time a user clicks on a post in their Newsfeed. Similarly, in ad-click prediction labels are obtained whenever an advert is clicked. More generally though, we have to employ human annotators to label the data. For example, ImageNet, the breakthrough deep learning result was annotated using Amazon’s Mechanical Turk. Without such large scale human input, we would not have the breakthrough results on image categorization we have today.</p>
-<p>Some tasks are easier to annotate than others. For example, in the Tecator data, to acquire the actual values of water, protein and fat content in the meat samples further experiments may be required. It is not simply a matter of human labelling. Even if the task is easy for humans to solve there can be problems. For example, humans will extrapolate the context of an image. A colleague mentioned once to me a challenge where humans were labelling images as containing swimming pools, even though none was visible, because they could infer there must be a pool nearby, perhaps because there are kids wearing bathing suits. But there is no swimming pool in the image for the computer to find. The quality of any machine learning solution is very sensitive to the quality of annotated data we have. Investing in processes and tools to improve annotation of data is therefore priority for improving the quality of machine learning solutions.</p>
-<p>There can also be significant problems with misrepresentation in the data set. If data isn’t collected carefully, then it can reflect biases about the population that we don’t want our models to have. For example, if we design a face detector using Californians may not perform well when deployed in Kampala, Uganda.</p>
-<h2 id="generalization-and-overfitting">Generalization and Overfitting</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/generalization-and-overfitting.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/generalization-and-overfitting.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Once a supervised learning system is trained it can be placed in a sequential pipeline to automate a process that used to be done manually.</p>
-<p>Supervised learning is one of the dominant approaches to learning. But the cost and time associated with labeling data is a major bottleneck for deploying machine learning systems. The process for creating training data requires significant human intervention. For example, internationalization of a speech recognition system would require large speech corpora in new languages.</p>
-<p>An important distinction in machine learning is the separation between training data and test data (or production data). Training data is the data that was used to find the model parameters. Test data (or production data) is the data that is used with the live system. The ability of a machine learning system to predict well on production systems given only its training data is known as its <em>generalization</em> ability. This is the system’s ability to predict in areas where it hasn’t previously seen data.</p>
-<h2 id="hold-out-validation-on-olympic-marathon-data">Hold Out Validation on Olympic Marathon Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-hold-out-validation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-hold-out-validation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<p>Knowledge that is not encoded in the prediction function must be
+learned through data. So any unspecified invariance (such as rotational
+or scale invariances) must be learned through the data. This means that
+learning would require a lot more data than otherwise would be necessary
+and results in less data efficient algorithms.</p>
+<p>The choice of predication funciton and invariances is therefore a
+critical stage in designing your machine learning algorithm.
+Unfortunately many invariances are non-trivial to incorporate and many
+machine learning algorithms focus on simpler concepts such as linearity
+or smoothness.</p>
+<h2 id="parameter-estimation-objective-functions">Parameter Estimation:
+Objective Functions</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/parameter-estimation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/parameter-estimation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Once we have a set of features, and the class of functions we use is
+determined, we need to find the parameters of the model.</p>
+<p>The parameters of the model, <span class="math inline">\(\mathbf{
+w}\)</span>, are estimated by specifying an <em>objective function</em>.
+The objective function specifies the quality of the match between the
+prediction function and the <em>training data</em>. In supervised
+learning the objective function incorporates both the input data (in the
+ImageNet data the image, in the Olympic marathon data the year of the
+marathon) and a <em>label</em>.</p>
+<p>The label is where the term supervised learning comes from. The idea
+being that a supervisor, or annotator, has already looked at the data
+and given it labels. For regression problem, a typical objective
+function is the <em>squared error</em>, <span class="math display">\[
+E(\mathbf{ w}) = \sum_{i=1}^n(y_i - f(\mathbf{ x}_i))^2
+\]</span> where the data is provided to us as a set of <span
+class="math inline">\(n\)</span> inputs, <span
+class="math inline">\(\mathbf{ x}_1\)</span>, <span
+class="math inline">\(\mathbf{ x}_2\)</span>, <span
+class="math inline">\(\mathbf{ x}_3\)</span>, <span
+class="math inline">\(\dots\)</span>, <span
+class="math inline">\(\mathbf{ x}_n\)</span> each one with an associated
+label, <span class="math inline">\(y_1\)</span>, <span
+class="math inline">\(y_2\)</span>, <span
+class="math inline">\(y_3\)</span>, <span
+class="math inline">\(\dots\)</span>, <span
+class="math inline">\(y_n\)</span>. Sometimes the label is cheap to
+acquire. For example, in Newsfeed ranking Facebook are acquiring a label
+each time a user clicks on a post in their Newsfeed. Similarly, in
+ad-click prediction labels are obtained whenever an advert is clicked.
+More generally though, we have to employ human annotators to label the
+data. For example, ImageNet, the breakthrough deep learning result was
+annotated using Amazon’s Mechanical Turk. Without such large scale human
+input, we would not have the breakthrough results on image
+categorization we have today.</p>
+<p>Some tasks are easier to annotate than others. For example, in the
+Tecator data, to acquire the actual values of water, protein and fat
+content in the meat samples further experiments may be required. It is
+not simply a matter of human labelling. Even if the task is easy for
+humans to solve there can be problems. For example, humans will
+extrapolate the context of an image. A colleague mentioned once to me a
+challenge where humans were labelling images as containing swimming
+pools, even though none was visible, because they could infer there must
+be a pool nearby, perhaps because there are kids wearing bathing suits.
+But there is no swimming pool in the image for the computer to find. The
+quality of any machine learning solution is very sensitive to the
+quality of annotated data we have. Investing in processes and tools to
+improve annotation of data is therefore priority for improving the
+quality of machine learning solutions.</p>
+<p>There can also be significant problems with misrepresentation in the
+data set. If data isn’t collected carefully, then it can reflect biases
+about the population that we don’t want our models to have. For example,
+if we design a face detector using Californians may not perform well
+when deployed in Kampala, Uganda.</p>
+<h2 id="generalization-and-overfitting">Generalization and
+Overfitting</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/generalization-and-overfitting.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/generalization-and-overfitting.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Once a supervised learning system is trained it can be placed in a
+sequential pipeline to automate a process that used to be done
+manually.</p>
+<p>Supervised learning is one of the dominant approaches to learning.
+But the cost and time associated with labeling data is a major
+bottleneck for deploying machine learning systems. The process for
+creating training data requires significant human intervention. For
+example, internationalization of a speech recognition system would
+require large speech corpora in new languages.</p>
+<p>An important distinction in machine learning is the separation
+between training data and test data (or production data). Training data
+is the data that was used to find the model parameters. Test data (or
+production data) is the data that is used with the live system. The
+ability of a machine learning system to predict well on production
+systems given only its training data is known as its
+<em>generalization</em> ability. This is the system’s ability to predict
+in areas where it hasn’t previously seen data.</p>
+<h2 id="hold-out-validation-on-olympic-marathon-data">Hold Out
+Validation on Olympic Marathon Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-hold-out-validation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-hold-out-validation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
-<div id="olympic-val-extra-LM-polynomial-number-11-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number011.svg" width="80%" style=" ">
+<div id="olympic-val-extra-LM-polynomial-number-11-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-val-extra-LM-polynomial-number-11-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-val-extra-LM-polynomial-number-11&#39;)">
+<div id="olympic-val-extra-LM-polynomial-number-11-magnify"
+class="magnify"
+onclick="magnifyFigure(&#39;olympic-val-extra-LM-polynomial-number-11&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-val-extra-LM-polynomial-number-11-caption" class="caption-frame">
-<p>Figure: Olympic marathon data with validation error for extrapolation.</p>
+<div id="olympic-val-extra-LM-polynomial-number-11-caption"
+class="caption-frame">
+<p>Figure: Olympic marathon data with validation error for
+extrapolation.</p>
 </div>
 </div>
 <h2 id="extrapolation">Extrapolation</h2>
 <h2 id="interpolation">Interpolation</h2>
 <div class="figure">
-<div id="olympic-val-inter-LM-polynomial-number-11-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number011.svg" width="80%" style=" ">
+<div id="olympic-val-inter-LM-polynomial-number-11-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-val-inter-LM-polynomial-number-11-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-val-inter-LM-polynomial-number-11&#39;)">
+<div id="olympic-val-inter-LM-polynomial-number-11-magnify"
+class="magnify"
+onclick="magnifyFigure(&#39;olympic-val-inter-LM-polynomial-number-11&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-val-inter-LM-polynomial-number-11-caption" class="caption-frame">
-<p>Figure: Olympic marathon data with validation error for interpolation.</p>
+<div id="olympic-val-inter-LM-polynomial-number-11-caption"
+class="caption-frame">
+<p>Figure: Olympic marathon data with validation error for
+interpolation.</p>
 </div>
 </div>
 <h2 id="choice-of-validation-set">Choice of Validation Set</h2>
 <h2 id="hold-out-data">Hold Out Data</h2>
-<p>You have a conclusion as to which model fits best under the training error, but how do the two models perform in terms of validation? In this section we consider <em>hold out</em> validation. In hold out validation we remove a portion of the training data for <em>validating</em> the model on. The remaining data is used for fitting the model (training). Because this is a time series prediction, it makes sense for us to hold out data at the end of the time series. This means that we are validating on future predictions. We will hold out data from after 1980 and fit the model to the data before 1980.</p>
-<div class="sourceCode" id="cb31"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1"></a><span class="co"># select indices of data to &#39;hold out&#39;</span></span>
-<span id="cb31-2"><a href="#cb31-2"></a>indices_hold_out <span class="op">=</span> np.flatnonzero(x<span class="op">&gt;</span><span class="dv">1980</span>)</span>
-<span id="cb31-3"><a href="#cb31-3"></a></span>
-<span id="cb31-4"><a href="#cb31-4"></a><span class="co"># Create a training set</span></span>
-<span id="cb31-5"><a href="#cb31-5"></a>x_train <span class="op">=</span> np.delete(x, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb31-6"><a href="#cb31-6"></a>y_train <span class="op">=</span> np.delete(y, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb31-7"><a href="#cb31-7"></a></span>
-<span id="cb31-8"><a href="#cb31-8"></a><span class="co"># Create a hold out set</span></span>
-<span id="cb31-9"><a href="#cb31-9"></a>x_valid <span class="op">=</span> np.take(x, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb31-10"><a href="#cb31-10"></a>y_valid <span class="op">=</span> np.take(y, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span></code></pre></div>
-<h3 id="exercise-3">Exercise 3</h3>
-<p>For both the linear and quadratic models, fit the model to the data up until 1980 and then compute the error on the held out data (from 1980 onwards). Which model performs better on the validation data?</p>
+<p>You have a conclusion as to which model fits best under the training
+error, but how do the two models perform in terms of validation? In this
+section we consider <em>hold out</em> validation. In hold out validation
+we remove a portion of the training data for <em>validating</em> the
+model on. The remaining data is used for fitting the model (training).
+Because this is a time series prediction, it makes sense for us to hold
+out data at the end of the time series. This means that we are
+validating on future predictions. We will hold out data from after 1980
+and fit the model to the data before 1980.</p>
+<div class="sourceCode" id="cb44"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="co"># select indices of data to &#39;hold out&#39;</span></span>
+<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a>indices_hold_out <span class="op">=</span> np.flatnonzero(x<span class="op">&gt;</span><span class="dv">1980</span>)</span>
+<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a training set</span></span>
+<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a>x_train <span class="op">=</span> np.delete(x, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a>y_train <span class="op">=</span> np.delete(y, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb44-8"><a href="#cb44-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a hold out set</span></span>
+<span id="cb44-9"><a href="#cb44-9" aria-hidden="true" tabindex="-1"></a>x_valid <span class="op">=</span> np.take(x, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb44-10"><a href="#cb44-10" aria-hidden="true" tabindex="-1"></a>y_valid <span class="op">=</span> np.take(y, indices_hold_out, axis<span class="op">=</span><span class="dv">0</span>)</span></code></pre></div>
+<h3 id="exercise-2">Exercise 2</h3>
+<p>For both the linear and quadratic models, fit the model to the data
+up until 1980 and then compute the error on the held out data (from 1980
+onwards). Which model performs better on the validation data?</p>
 <h2 id="richer-basis-set">Richer Basis Set</h2>
-<p>Now we have an approach for deciding which model to retain, we can consider the entire family of polynomial bases, with arbitrary degrees.</p>
-<h3 id="exercise-4">Exercise 4</h3>
-<p>Now we are going to build a more sophisticated form of basis function, one that can accept arguments to its inputs (similar to those we used in <a href="./week4.ipynb">this lab</a>). Here we will start with a polynomial basis.</p>
+<p>Now we have an approach for deciding which model to retain, we can
+consider the entire family of polynomial bases, with arbitrary
+degrees.</p>
+<h3 id="exercise-3">Exercise 3</h3>
+<p>Now we are going to build a more sophisticated form of basis
+function, one that can accept arguments to its inputs (similar to those
+we used in <a href="./week4.ipynb">this lab</a>). Here we will start
+with a polynomial basis.</p>
 <pre><code>def polynomial(x, degree, loc, scale):
     degrees =np.arange(degree+1)
     return ((x-loc)/scale)**degrees</code></pre>
-<p>The basis as we’ve defined it has three arguments as well as the input. The degree of the polynomial, the scale of the polynomial and the offset. These arguments need to be passed to the basis functions whenever they are called. Modify your code to pass these additional arguments to the python function for creating the basis. Do this for each of your functions <code>predict</code>, <code>fit</code> and <code>objective</code>. You will find <code>*args</code> (or <code>**kwargs</code>) useful.</p>
-<p>Write code that tries to fit different models to the data with polynomial basis. Use a maximum degree for your basis from 0 to 17. For each polynomial store the <em>hold out validation error</em> and the <em>training error</em>. When you have finished the computation plot the hold out error for your models and the training error for your p. When computing your polynomial basis use <code>offset=1956.</code> and <code>scale=120.</code> to ensure that the data is mapped (roughly) to the -1, 1 range.</p>
-<p>Which polynomial has the minimum training error? Which polynomial has the minimum validation error?</p>
+<p>The basis as we’ve defined it has three arguments as well as the
+input. The degree of the polynomial, the scale of the polynomial and the
+offset. These arguments need to be passed to the basis functions
+whenever they are called. Modify your code to pass these additional
+arguments to the python function for creating the basis. Do this for
+each of your functions <code>predict</code>, <code>fit</code> and
+<code>objective</code>. You will find <code>*args</code> (or
+<code>**kwargs</code>) useful.</p>
+<p>Write code that tries to fit different models to the data with
+polynomial basis. Use a maximum degree for your basis from 0 to 17. For
+each polynomial store the <em>hold out validation error</em> and the
+<em>training error</em>. When you have finished the computation plot the
+hold out error for your models and the training error for your p. When
+computing your polynomial basis use <code>offset=1956.</code> and
+<code>scale=120.</code> to ensure that the data is mapped (roughly) to
+the -1, 1 range.</p>
+<p>Which polynomial has the minimum training error? Which polynomial has
+the minimum validation error?</p>
 <h2 id="bias-variance-decomposition">Bias Variance Decomposition</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bias-variance-dilemma.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bias-variance-dilemma.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The bias-variance decomposition considers the expected test error for different variations of the <em>training data</em> sampled from, <span class="math inline">$\Pr(\dataVector, \dataScalar)$</span> <br /><span class="math display">$$
-\mathbb{E}\left[ \left(\dataScalar - \mappingFunction^*(\dataVector)\right)^2 \right].
-$$</span><br /> This can be decomposed into two parts, <br /><span class="math display">$$
-\mathbb{E}\left[ \left(\dataScalar - \mappingFunction(\dataVector)\right)^2 \right] = \text{bias}\left[\mappingFunction^*(\dataVector)\right]^2 + \text{variance}\left[\mappingFunction^*(\dataVector)\right] +\sigma^2,
-$$</span><br /> where the bias is given by <br /><span class="math display">$$
-  \text{bias}\left[\mappingFunction^*(\dataVector)\right] =
-\mathbb{E}\left[\mappingFunction^*(\dataVector)\right] * \mappingFunction(\dataVector)
-$$</span><br /> and it summarizes error that arises from the model’s inability to represent the underlying complexity of the data. For example, if we were to model the marathon pace of the winning runner from the Olympics by computing the average pace across time, then that model would exhibit <em>bias</em> error because the reality of Olympic marathon pace is it is changing (typically getting faster).</p>
-<p>The variance term is given by <br /><span class="math display">$$
-  \text{variance}\left[\mappingFunction^*(\dataVector)\right] = \mathbb{E}\left[\left(\mappingFunction^*(\dataVector) - \mathbb{E}\left[\mappingFunction^*(\dataVector)\right]\right)^2\right].
-  $$</span><br /> The variance term is often described as arising from a model that is too complex, but we have to be careful with this idea. Is the model really too complex relative to the real world that generates the data? The real world is a complex place, and it is rare that we are constructing mathematical models that are more complex than the world around us. Rather, the ‘too complex’ refers to ability to estimate the parameters of the model given the data we have. Slight variations in the training set cause changes in prediction.</p>
-<p>Models that exhibit high variance are sometimes said to ‘overfit’ the data whereas models that exhibit high bias are sometimes described as ‘underfitting’ the data.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-dilemma.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-dilemma.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>One of Breiman’s ideas for improving predictive performance is known
+as bagging <span class="citation"
+data-cites="Breiman:bagging96">(<strong>Breiman:bagging96?</strong>)</span>.
+The idea is to train a number of models on the data such that they
+overfit (high variance). Then average the predictions of these models.
+The models are trained on different bootstrap samples <span
+class="citation" data-cites="Efron:bootstrap79">(Efron, 1979)</span> and
+their predictions are aggregated giving us the acronym, Bagging. By
+combining decision trees with bagging, we recover random forests <span
+class="citation" data-cites="Breiman-forests01">(Breiman,
+2001)</span>.</p>
+<p>Bias and variance can also be estimated through Efron’s bootstrap
+<span class="citation" data-cites="Efron:bootstrap79">(Efron,
+1979)</span>, and the traditional view has been that there’s a form of
+Goldilocks effect, where the best predictions are given by the model
+that is ‘just right’ for the amount of data available. Not to simple,
+not too complex. The idea is that bias decreases with increasing model
+complexity and variance increases with increasing model complexity.
+Typically plots begin with the Mummy bear on the left (too much bias)
+end with the Daddy bear on the right (too much variance) and show a dip
+in the middle where the Baby bear (just) right finds themselves.</p>
+<p>The Daddy bear is typically positioned at the point where the model
+can exactly interpolate the data. For a generalized linear model <span
+class="citation" data-cites="McCullagh:gen_linear89">(McCullagh and
+Nelder, 1989)</span>, this is the point at which the number of
+parameters is equal to the number of data<a href="#fn3"
+class="footnote-ref" id="fnref3"
+role="doc-noteref"><sup>3</sup></a>.</p>
+<p>The bias-variance decomposition <span class="citation"
+data-cites="Geman:biasvariance92">(<strong>Geman:biasvariance92?</strong>)</span>
+considers the expected test error for different variations of the
+<em>training data</em> sampled from, <span
+class="math inline">\(\mathbb{P}(\mathbf{ x}, y)\)</span> <span
+class="math display">\[\begin{align*}
+R(\mathbf{ w}) = &amp; \int \left(y- f^*(\mathbf{ x})\right)^2
+\mathbb{P}(y, \mathbf{ x}) \text{d}y\text{d}\mathbf{ x}\\
+&amp; \triangleq \mathbb{E}\left[ \left(y- f^*(\mathbf{ x})\right)^2
+\right].
+\end{align*}\]</span></p>
+<p>This can be decomposed into two parts, <span class="math display">\[
+\begin{align*}
+\mathbb{E}\left[ \left(y- f(\mathbf{ x})\right)^2 \right] = &amp;
+\text{bias}\left[f^*(\mathbf{ x})\right]^2  +
+\text{variance}\left[f^*(\mathbf{ x})\right]  +\sigma^2,
+\end{align*}
+\]</span> where the bias is given by <span class="math display">\[
+  \text{bias}\left[f^*(\mathbf{ x})\right] =
+\mathbb{E}\left[f^*(\mathbf{ x})\right] - f(\mathbf{ x})
+\]</span> and it summarizes error that arises from the model’s inability
+to represent the underlying complexity of the data. For example, if we
+were to model the marathon pace of the winning runner from the Olympics
+by computing the average pace across time, then that model would exhibit
+<em>bias</em> error because the reality of Olympic marathon pace is it
+is changing (typically getting faster).</p>
+<p>The variance term is given by <span class="math display">\[
+  \text{variance}\left[f^*(\mathbf{ x})\right] =
+\mathbb{E}\left[\left(f^*(\mathbf{ x}) - \mathbb{E}\left[f^*(\mathbf{
+x})\right]\right)^2\right].
+  \]</span> The variance term is often described as arising from a model
+that is too complex, but we must be careful with this idea. Is the model
+really too complex relative to the real world that generates the data?
+The real world is a complex place, and it is rare that we are
+constructing mathematical models that are more complex than the world
+around us. Rather, the ‘too complex’ refers to ability to estimate the
+parameters of the model given the data we have. Slight variations in the
+training set cause changes in prediction.</p>
+<p>Models that exhibit high variance are sometimes said to ‘overfit’ the
+data whereas models that exhibit high bias are sometimes described as
+‘underfitting’ the data.</p>
 <h2 id="bias-vs-variance-error-plots">Bias vs Variance Error Plots</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bias-variance-plots.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bias-variance-plots.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-plots.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-plots.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <p>Helper function for sampling data from two different classes.</p>
-<div class="sourceCode" id="cb33"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb46"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
 <p>Helper function for plotting the decision boundary of the SVM.</p>
-<div class="sourceCode" id="cb34"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1"></a><span class="im">import</span> urllib.request</span></code></pre></div>
-<div class="sourceCode" id="cb35"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py&#39;</span>,<span class="st">&#39;mlai.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb36"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1"></a><span class="im">import</span> matplotlib</span>
-<span id="cb36-2"><a href="#cb36-2"></a>font <span class="op">=</span> {<span class="st">&#39;family&#39;</span> : <span class="st">&#39;sans&#39;</span>,</span>
-<span id="cb36-3"><a href="#cb36-3"></a>        <span class="st">&#39;weight&#39;</span> : <span class="st">&#39;bold&#39;</span>,</span>
-<span id="cb36-4"><a href="#cb36-4"></a>        <span class="st">&#39;size&#39;</span>   : <span class="dv">22</span>}</span>
-<span id="cb36-5"><a href="#cb36-5"></a></span>
-<span id="cb36-6"><a href="#cb36-6"></a>matplotlib.rc(<span class="st">&#39;font&#39;</span>, <span class="op">**</span>font)</span>
-<span id="cb36-7"><a href="#cb36-7"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
-<div class="sourceCode" id="cb37"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1"></a><span class="im">from</span> sklearn <span class="im">import</span> svm</span></code></pre></div>
-<div class="sourceCode" id="cb38"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1"></a><span class="co"># Create an instance of SVM and fit the data. </span></span>
-<span id="cb38-2"><a href="#cb38-2"></a>C <span class="op">=</span> <span class="fl">100.0</span>  <span class="co"># SVM regularization parameter</span></span>
-<span id="cb38-3"><a href="#cb38-3"></a>gammas <span class="op">=</span> [<span class="fl">0.001</span>, <span class="fl">0.01</span>, <span class="fl">0.1</span>, <span class="dv">1</span>]</span>
-<span id="cb38-4"><a href="#cb38-4"></a></span>
-<span id="cb38-5"><a href="#cb38-5"></a></span>
-<span id="cb38-6"><a href="#cb38-6"></a>per_class<span class="op">=</span><span class="dv">30</span></span>
-<span id="cb38-7"><a href="#cb38-7"></a>num_samps <span class="op">=</span> <span class="dv">20</span></span>
-<span id="cb38-8"><a href="#cb38-8"></a><span class="co"># Set-up 2x2 grid for plotting.</span></span>
-<span id="cb38-9"><a href="#cb38-9"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">4</span>, figsize<span class="op">=</span>(<span class="dv">10</span>,<span class="dv">3</span>))</span>
-<span id="cb38-10"><a href="#cb38-10"></a>xlim<span class="op">=</span><span class="va">None</span></span>
-<span id="cb38-11"><a href="#cb38-11"></a>ylim<span class="op">=</span><span class="va">None</span></span>
-<span id="cb38-12"><a href="#cb38-12"></a><span class="cf">for</span> samp <span class="kw">in</span> <span class="bu">range</span>(num_samps):</span>
-<span id="cb38-13"><a href="#cb38-13"></a>    X, y<span class="op">=</span>create_data(per_class)</span>
-<span id="cb38-14"><a href="#cb38-14"></a>    models <span class="op">=</span> []</span>
-<span id="cb38-15"><a href="#cb38-15"></a>    titles <span class="op">=</span> []</span>
-<span id="cb38-16"><a href="#cb38-16"></a>    <span class="cf">for</span> gamma <span class="kw">in</span> gammas:</span>
-<span id="cb38-17"><a href="#cb38-17"></a>        models.append(svm.SVC(kernel<span class="op">=</span><span class="st">&#39;rbf&#39;</span>, gamma<span class="op">=</span>gamma, C<span class="op">=</span>C))</span>
-<span id="cb38-18"><a href="#cb38-18"></a>        titles.append(<span class="st">&#39;$\gamma=</span><span class="sc">{}</span><span class="st">$&#39;</span>.<span class="bu">format</span>(gamma))</span>
-<span id="cb38-19"><a href="#cb38-19"></a>    models <span class="op">=</span> (cl.fit(X, y) <span class="cf">for</span> cl <span class="kw">in</span> models)</span>
-<span id="cb38-20"><a href="#cb38-20"></a>    xlim, ylim <span class="op">=</span> decision_boundary_plot(models, X, y, </span>
-<span id="cb38-21"><a href="#cb38-21"></a>                           axs<span class="op">=</span>ax, </span>
-<span id="cb38-22"><a href="#cb38-22"></a>                           filename<span class="op">=</span><span class="st">&#39;bias-variance</span><span class="sc">{samp:0&gt;3}</span><span class="st">.svg&#39;</span>.<span class="bu">format</span>(samp<span class="op">=</span>samp), </span>
-<span id="cb38-23"><a href="#cb38-23"></a>                           directory<span class="op">=</span><span class="st">&#39;../slides/diagrams/ml&#39;</span></span>
-<span id="cb38-24"><a href="#cb38-24"></a>                           titles<span class="op">=</span>titles,</span>
-<span id="cb38-25"><a href="#cb38-25"></a>                          xlim<span class="op">=</span>xlim,</span>
-<span id="cb38-26"><a href="#cb38-26"></a>                          ylim<span class="op">=</span>ylim)</span></code></pre></div>
+<div class="sourceCode" id="cb47"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib.request</span></code></pre></div>
+<div class="sourceCode" id="cb48"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py&#39;</span>,<span class="st">&#39;mlai.py&#39;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb49"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib</span>
+<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>font <span class="op">=</span> {<span class="st">&#39;family&#39;</span> : <span class="st">&#39;sans&#39;</span>,</span>
+<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>        <span class="st">&#39;weight&#39;</span> : <span class="st">&#39;bold&#39;</span>,</span>
+<span id="cb49-4"><a href="#cb49-4" aria-hidden="true" tabindex="-1"></a>        <span class="st">&#39;size&#39;</span>   : <span class="dv">22</span>}</span>
+<span id="cb49-5"><a href="#cb49-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb49-6"><a href="#cb49-6" aria-hidden="true" tabindex="-1"></a>matplotlib.rc(<span class="st">&#39;font&#39;</span>, <span class="op">**</span>font)</span>
+<span id="cb49-7"><a href="#cb49-7" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
+<div class="sourceCode" id="cb50"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn <span class="im">import</span> svm</span></code></pre></div>
+<div class="sourceCode" id="cb51"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create an instance of SVM and fit the data. </span></span>
+<span id="cb51-2"><a href="#cb51-2" aria-hidden="true" tabindex="-1"></a>C <span class="op">=</span> <span class="fl">100.0</span>  <span class="co"># SVM regularization parameter</span></span>
+<span id="cb51-3"><a href="#cb51-3" aria-hidden="true" tabindex="-1"></a>gammas <span class="op">=</span> [<span class="fl">0.001</span>, <span class="fl">0.01</span>, <span class="fl">0.1</span>, <span class="dv">1</span>]</span>
+<span id="cb51-4"><a href="#cb51-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb51-5"><a href="#cb51-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb51-6"><a href="#cb51-6" aria-hidden="true" tabindex="-1"></a>per_class<span class="op">=</span><span class="dv">30</span></span>
+<span id="cb51-7"><a href="#cb51-7" aria-hidden="true" tabindex="-1"></a>num_samps <span class="op">=</span> <span class="dv">20</span></span>
+<span id="cb51-8"><a href="#cb51-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Set-up 2x2 grid for plotting.</span></span>
+<span id="cb51-9"><a href="#cb51-9" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">4</span>, figsize<span class="op">=</span>(<span class="dv">10</span>,<span class="dv">3</span>))</span>
+<span id="cb51-10"><a href="#cb51-10" aria-hidden="true" tabindex="-1"></a>xlim<span class="op">=</span><span class="va">None</span></span>
+<span id="cb51-11"><a href="#cb51-11" aria-hidden="true" tabindex="-1"></a>ylim<span class="op">=</span><span class="va">None</span></span>
+<span id="cb51-12"><a href="#cb51-12" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> samp <span class="kw">in</span> <span class="bu">range</span>(num_samps):</span>
+<span id="cb51-13"><a href="#cb51-13" aria-hidden="true" tabindex="-1"></a>    X, y<span class="op">=</span>create_data(per_class)</span>
+<span id="cb51-14"><a href="#cb51-14" aria-hidden="true" tabindex="-1"></a>    models <span class="op">=</span> []</span>
+<span id="cb51-15"><a href="#cb51-15" aria-hidden="true" tabindex="-1"></a>    titles <span class="op">=</span> []</span>
+<span id="cb51-16"><a href="#cb51-16" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> gamma <span class="kw">in</span> gammas:</span>
+<span id="cb51-17"><a href="#cb51-17" aria-hidden="true" tabindex="-1"></a>        models.append(svm.SVC(kernel<span class="op">=</span><span class="st">&#39;rbf&#39;</span>, gamma<span class="op">=</span>gamma, C<span class="op">=</span>C))</span>
+<span id="cb51-18"><a href="#cb51-18" aria-hidden="true" tabindex="-1"></a>        titles.append(<span class="st">&#39;$\gamma=</span><span class="sc">{}</span><span class="st">$&#39;</span>.<span class="bu">format</span>(gamma))</span>
+<span id="cb51-19"><a href="#cb51-19" aria-hidden="true" tabindex="-1"></a>    models <span class="op">=</span> (cl.fit(X, y) <span class="cf">for</span> cl <span class="kw">in</span> models)</span>
+<span id="cb51-20"><a href="#cb51-20" aria-hidden="true" tabindex="-1"></a>    xlim, ylim <span class="op">=</span> decision_boundary_plot(models, X, y, </span>
+<span id="cb51-21"><a href="#cb51-21" aria-hidden="true" tabindex="-1"></a>                           axs<span class="op">=</span>ax, </span>
+<span id="cb51-22"><a href="#cb51-22" aria-hidden="true" tabindex="-1"></a>                           filename<span class="op">=</span><span class="st">&#39;bias-variance</span><span class="sc">{samp:0&gt;3}</span><span class="st">.svg&#39;</span>.<span class="bu">format</span>(samp<span class="op">=</span>samp), </span>
+<span id="cb51-23"><a href="#cb51-23" aria-hidden="true" tabindex="-1"></a>                           directory<span class="op">=</span><span class="st">&#39;./ml&#39;</span></span>
+<span id="cb51-24"><a href="#cb51-24" aria-hidden="true" tabindex="-1"></a>                           titles<span class="op">=</span>titles,</span>
+<span id="cb51-25"><a href="#cb51-25" aria-hidden="true" tabindex="-1"></a>                          xlim<span class="op">=</span>xlim,</span>
+<span id="cb51-26"><a href="#cb51-26" aria-hidden="true" tabindex="-1"></a>                          ylim<span class="op">=</span>ylim)</span></code></pre></div>
 <!---->
 <div class="figure">
 <div id="bias-variance-errors-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="bias-variance-errors-magnify" class="magnify" onclick="magnifyFigure(&#39;bias-variance-errors&#39;)">
+<div id="bias-variance-errors-magnify" class="magnify"
+onclick="magnifyFigure(&#39;bias-variance-errors&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="bias-variance-errors-caption" class="caption-frame">
-<p>Figure: In each figure the simpler model is on the left, and the more complex model is on the right. Each fit is done to a different version of the data set. The simpler model is more consistent in its errors (bias error), whereas the more complex model is varying in its errors (variance error).</p>
+<p>Figure: In each figure the simpler model is on the left, and the more
+complex model is on the right. Each fit is done to a different version
+of the data set. The simpler model is more consistent in its errors
+(bias error), whereas the more complex model is varying in its errors
+(variance error).</p>
 </div>
 </div>
 <h2 id="overfitting">Overfitting</h2>
 <div class="figure">
 <div id="alex-ihler-overfitting-figure" class="figure-frame">
-<iframe width="800" height="600" src="https://www.youtube.com/embed/py8QrZPT48s?start=4m0s" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
+<iframe width="600" height="450" src="https://www.youtube.com/embed/py8QrZPT48s?start=4m0s" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
-<div id="alex-ihler-overfitting-magnify" class="magnify" onclick="magnifyFigure(&#39;alex-ihler-overfitting&#39;)">
+<div id="alex-ihler-overfitting-magnify" class="magnify"
+onclick="magnifyFigure(&#39;alex-ihler-overfitting&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="alex-ihler-overfitting-caption" class="caption-frame">
 <p>Figure: Alex Ihler discusses polynomials and overfitting.</p>
 </div>
 </div>
-<p>We can easily develop a simple prediction function that reconstructs the training data exactly, you can just use a look up table. But how would the lookup table predict between the training data, where examples haven’t been seen before? The choice of the class of prediction functions is critical in ensuring that the model generalizes well.</p>
-<p>The generalization error is normally estimated by applying the objective function to a set of data that the model <em>wasn’t</em> trained on, the test data. To ensure good performance we normally want a model that gives us a low generalization error. If we weren’t sure of the right prediction function to use, then we could try 1,000 different prediction functions. Then we could use the one that gives us the lowest error on the test data. But you have to be careful. Selecting a model in this way is like a further stage of training where you are using the test data in the training.<a href="#fn3" class="footnote-ref" id="fnref3" role="doc-noteref"><sup>3</sup></a> So when this is done, the data used for this is not known as test data, it is known as <em>validation data</em>. And the associated error is the <em>validation error</em>. Using the validation error for model selection is a standard machine learning technique, but it can be misleading about the final generalization error. Almost all machine learning practitioners know not to use the test data in your training procedure, but sometimes people forget that when validation data is used for model selection that validation error cannot be used as an unbiased estimate of the generalization performance.</p>
-<h2 id="olympic-data-with-bayesian-polynomials">Olympic Data with Bayesian Polynomials</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-bayesian-polynomial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-marathon-bayesian-polynomial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Five fold cross validation tests the ability of the model to <em>interpolate</em>.</p>
-<div class="sourceCode" id="cb39"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1"></a><span class="im">import</span> mlai</span>
-<span id="cb39-2"><a href="#cb39-2"></a><span class="im">import</span> pods</span></code></pre></div>
+<p>We can easily develop a simple prediction function that reconstructs
+the training data exactly, you can just use a look up table. But how
+would the lookup table predict between the training data, where examples
+haven’t been seen before? The choice of the class of prediction
+functions is critical in ensuring that the model generalizes well.</p>
+<p>The generalization error is normally estimated by applying the
+objective function to a set of data that the model <em>wasn’t</em>
+trained on, the test data. To ensure good performance we normally want a
+model that gives us a low generalization error. If we weren’t sure of
+the right prediction function to use, then we could try 1,000 different
+prediction functions. Then we could use the one that gives us the lowest
+error on the test data. But you have to be careful. Selecting a model in
+this way is like a further stage of training where you are using the
+test data in the training.<a href="#fn4" class="footnote-ref"
+id="fnref4" role="doc-noteref"><sup>4</sup></a> So when this is done,
+the data used for this is not known as test data, it is known as
+<em>validation data</em>. And the associated error is the <em>validation
+error</em>. Using the validation error for model selection is a standard
+machine learning technique, but it can be misleading about the final
+generalization error. Almost all machine learning practitioners know not
+to use the test data in your training procedure, but sometimes people
+forget that when validation data is used for model selection that
+validation error cannot be used as an unbiased estimate of the
+generalization performance.</p>
+<h2 id="olympic-data-with-bayesian-polynomials">Olympic Data with
+Bayesian Polynomials</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-bayesian-polynomial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-bayesian-polynomial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Five fold cross validation tests the ability of the model to
+<em>interpolate</em>.</p>
+<div class="sourceCode" id="cb52"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span>
+<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
 <div class="figure">
 <div id="olympic-blm-polynomial-number-26-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number026.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-blm-polynomial-number-26-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-blm-polynomial-number-26&#39;)">
+<div id="olympic-blm-polynomial-number-26-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-blm-polynomial-number-26&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-blm-polynomial-number-26-caption" class="caption-frame">
-<p>Figure: Bayesian fit with 26th degree polynomial and negative marginal log likelihood.</p>
+<div id="olympic-blm-polynomial-number-26-caption"
+class="caption-frame">
+<p>Figure: Bayesian fit with 26th degree polynomial and negative
+marginal log likelihood.</p>
 </div>
 </div>
 <h2 id="hold-out-validation">Hold Out Validation</h2>
-<p>For the polynomial fit, we will now look at <em>hold out</em> validation, where we are holding out some of the most recent points. This tests the abilit of our model to <em>extrapolate</em>.</p>
+<p>For the polynomial fit, we will now look at <em>hold out</em>
+validation, where we are holding out some of the most recent points.
+This tests the abilit of our model to <em>extrapolate</em>.</p>
 <div class="figure">
-<div id="olympic-val-blm-polynomial-number-26-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number026.svg" width="80%" style=" ">
+<div id="olympic-val-blm-polynomial-number-26-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-val-blm-polynomial-number-26-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-val-blm-polynomial-number-26&#39;)">
+<div id="olympic-val-blm-polynomial-number-26-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-val-blm-polynomial-number-26&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-val-blm-polynomial-number-26-caption" class="caption-frame">
-<p>Figure: Bayesian fit with 26th degree polynomial and hold out validation scores.</p>
+<div id="olympic-val-blm-polynomial-number-26-caption"
+class="caption-frame">
+<p>Figure: Bayesian fit with 26th degree polynomial and hold out
+validation scores.</p>
 </div>
 </div>
 <h2 id="fold-cross-validation">5-fold Cross Validation</h2>
-<p>Five fold cross validation tests the ability of the model to <em>interpolate</em>.</p>
+<p>Five fold cross validation tests the ability of the model to
+<em>interpolate</em>.</p>
 <div class="figure">
-<div id="olympic-5cv05-blm-polynomial-number-26-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number026.svg" width="80%" style=" ">
+<div id="olympic-5cv05-blm-polynomial-number-26-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="olympic-5cv05-blm-polynomial-number-26-magnify" class="magnify" onclick="magnifyFigure(&#39;olympic-5cv05-blm-polynomial-number-26&#39;)">
+<div id="olympic-5cv05-blm-polynomial-number-26-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-5cv05-blm-polynomial-number-26&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="olympic-5cv05-blm-polynomial-number-26-caption" class="caption-frame">
-<p>Figure: Bayesian fit with 26th degree polynomial and five fold cross validation scores.</p>
+<div id="olympic-5cv05-blm-polynomial-number-26-caption"
+class="caption-frame">
+<p>Figure: Bayesian fit with 26th degree polynomial and five fold cross
+validation scores.</p>
 </div>
 </div>
 <!-- Leave unsupervised and reinforcement learning in the notes -->
 <h1 id="unsupervised-learning">Unsupervised Learning</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/unsupervised-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/unsupervised-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>In unsupervised learning you have data, <span class="math inline">$\inputVector$</span>, but no labels <span class="math inline">$\dataScalar$</span>. The aim in unsupervised learning is to extract structure from data. The type of structure you are interested in is dependent on the broader context of the task. In supervised learning that context is very much driven by the labels. Supervised learning algorithms try and focus on the aspects of the data which are relevant to predicting the labels. But in unsupervised learning there are no labels.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/unsupervised-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/unsupervised-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<h2 id="unsupervised-learning-1">Unsupervised Learning</h2>
+<p>Supervised learning is when your data is provided with labels. Now we
+are going to turn to a different form of learning, commonly known as
+<em>unsupervised</em> learning. In unsupervised learning our data isn’t
+necessarily labelled in any form, but we want models that give us a
+better understanding of the data. We’ve actually seen an example of this
+already with , which we introduces in the context of <em>objective
+functions</em>. Now we will introduce a more probabilistic approach to
+such models, specifically we are interested in <em>latent variable</em>
+modelling.</p>
+<p>In unsupervised learning you have data, <span
+class="math inline">\(\mathbf{ x}\)</span>, but no labels <span
+class="math inline">\(y\)</span>. The aim in unsupervised learning is to
+extract structure from data. The type of structure you are interested in
+is dependent on the broader context of the task. In supervised learning
+that context is very much driven by the labels. Supervised learning
+algorithms try and focus on the aspects of the data which are relevant
+to predicting the labels. But in unsupervised learning there are no
+labels.</p>
 <h2 id="context">Context</h2>
-<p>Humans can easily sort a number of objects into objects that share similar characteristics. We easily categorize animals or vehicles. But if the data is very large this is too slow. Even for smaller data, it may be that it is presented in a form that is unintelligible for humans. We are good at dealing with high dimensional data when it’s presented in images, but if it’s presented as a series of numbers, we find it hard to interpret. In unsupervised learning we want the computer to do the sorting for us. For example, an e-commerce company might need an algorithm that can go through its entire list of products and automatically sort them into groups such that similar products are located together.</p>
+<p>Humans can easily sort a number of objects into objects that share
+similar characteristics. We easily categorize animals or vehicles. But
+if the data is very large this is too slow. Even for smaller data, it
+may be that it is presented in a form that is unintelligible for humans.
+We are good at dealing with high dimensional data when it’s presented in
+images, but if it’s presented as a series of numbers, we find it hard to
+interpret. In unsupervised learning we want the computer to do the
+sorting for us. For example, an e-commerce company might need an
+algorithm that can go through its entire list of products and
+automatically sort them into groups such that similar products are
+located together.</p>
 <h2 id="discrete-vs-continuous">Discrete vs Continuous</h2>
-<p>Supervised learning is broadly divided into classification: i.e. wake word classification in the Amazon Echo, and regression, e.g. shelf life prediction for perishable goods. Similarly, unsupervised learning can be broadly split into methods that cluster the data (i.e. provide a discrete label) and methods that represent the data as a continuous value.</p>
+<p>Supervised learning is broadly divided into classification: i.e. wake
+word classification in the Amazon Echo, and regression, e.g. shelf life
+prediction for perishable goods. Similarly, unsupervised learning can be
+broadly split into methods that cluster the data (i.e. provide a
+discrete label) and methods that represent the data as a continuous
+value.</p>
 <h2 id="clustering">Clustering</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/clustering.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/clustering.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Clustering methods associate each data point with a different label. Unlike in classification the label is not provided by a human annotator. It is allocated by the computer. Clustering is quite intuitive for humans, we do it naturally with our observations of the real world. For example, we cluster animals into different groups. If we encounter a new animal, we can immediately assign it to a group: bird, mammal, insect. These are certainly labels that can be provided by humans, but they were also originally invented by humans. With clustering we want the computer to recreate that process of inventing the label.</p>
-<p>Unsupervised learning enables computers to form similar categorizations on data that is too large scale for us to process. When the Greek philosopher, Plato, was thinking about ideas, he considered the concept of the Platonic ideal. The Platonic ideal bird is the bird that is most bird-like or the chair that is most chair-like. In some sense, the task in clustering is to define different clusters, by finding their Platonic ideal (known as the cluster center) and allocate each data point to the relevant cluster center. So, allocate each animal to the class defined by its nearest cluster center.</p>
-<p>To perform clustering on a computer we need to define a notion of either similarity or distance between the objects and their Platonic ideal, the cluster center. We normally assume that our objects are represented by vectors of data, <span class="math inline">$\inputVector_i$</span>. Similarly, we represent our cluster center for category <span class="math inline"><em>j</em></span> by a vector <span class="math inline">$\meanVector_j$</span>. This vector contains the ideal features of a bird, a chair, or whatever category <span class="math inline"><em>j</em></span> is. In clustering we can either think in terms of similarity of the objects, or distances. We want objects that are similar to each other to cluster together. We want objects that are distant from each other to cluster apart.</p>
-<p>This requires us to formalize our notion of similarity or distance. Let’s focus on distances. A definition of distance between an object, <span class="math inline"><em>i</em></span>, and the cluster center of class <span class="math inline"><em>j</em></span> is a function of two vectors, the data point, <span class="math inline">$\inputVector_i$</span> and the cluster center, <span class="math inline">$\meanVector_j$</span>, <br /><span class="math display">$$
-d_{ij} = f(\inputVector_i, \meanVector_j).
-$$</span><br /> Our objective is then to find cluster centers that are close to as many data points as possible. For example, we might want to cluster customers into their different tastes. We could represent each customer by the products they’ve purchased in the past. This could be a binary vector <span class="math inline">$\inputVector_i$</span>. We can then define a distance between the cluster center and the customer.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/clustering.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/clustering.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<ul>
+<li><p>One common approach, not deeply covered in this course.</p></li>
+<li><p>Associate each data point, <span class="math inline">\(\mathbf{
+y}_{i, :}\)</span> with one of <span class="math inline">\(k\)</span>
+different discrete groups.</p></li>
+<li><p>For example:</p>
+<ul>
+<li>Clustering animals into discrete groups. Are animals discrete or
+continuous?</li>
+<li>Clustering into different different <em>political</em>
+affiliations.</li>
+</ul></li>
+<li><p>Humans do seem to like clusters:</p>
+<ul>
+<li>Very useful when interacting with biologists.</li>
+</ul></li>
+<li><p>Subtle difference between clustering and <em>vector
+quantisation</em></p></li>
+<li><p>Little anecdote.</p></li>
+<li><p>To my mind difference is in clustering there should be a
+reduction in data density between samples.</p></li>
+<li><p>This definition is not universally applied.</p></li>
+<li><p>For today’s purposes we merge them:</p>
+<ul>
+<li>Determine how to allocate each point to a group and <em>harder</em>
+total number of groups.</li>
+</ul></li>
+<li><p>Simple algorithm for allocating points to groups.</p></li>
+<li><p><em>Require</em>: Set of <span class="math inline">\(k\)</span>
+cluster centres &amp; assignment of each points to a cluster.</p></li>
+</ul>
+<ol type="1">
+<li>Initialize cluster centres as randomly selected data points.
+<ol start="2" type="1">
+<li>Assign each data point to <em>nearest</em> cluster centre.</li>
+<li>Update each cluster centre by setting it to the mean of assigned
+data points.</li>
+<li>Repeat 2 and 3 until cluster allocations do not change.</li>
+</ol></li>
+</ol>
+<ul>
+<li>This minimizes the objective <span class="math display">\[
+E=\sum_{j=1}^K \sum_{i\ \text{allocated to}\ j}  \left(\mathbf{ y}_{i,
+:} - \boldsymbol{ \mu}_{j, :}\right)^\top\left(\mathbf{ y}_{i, :} -
+\boldsymbol{ \mu}_{j, :}\right)
+\]</span> <em>i.e.</em> it minimizes thesum of Euclidean squared
+distances betwen points and their associated centres.</li>
+<li>The minimum is <em>not</em> guaranteed to be <em>global</em> or
+<em>unique</em>.</li>
+<li>This objective is a non-convex optimization problem.</li>
+</ul>
+<p>Clustering methods associate each data point with a different label.
+Unlike in classification the label is not provided by a human annotator.
+It is allocated by the computer. Clustering is quite intuitive for
+humans, we do it naturally with our observations of the real world. For
+example, we cluster animals into different groups. If we encounter a new
+animal, we can immediately assign it to a group: bird, mammal, insect.
+These are certainly labels that can be provided by humans, but they were
+also originally invented by humans. With clustering we want the computer
+to recreate that process of inventing the label.</p>
+<p>Unsupervised learning enables computers to form similar
+categorizations on data that is too large scale for us to process. When
+the Greek philosopher, Plato, was thinking about ideas, he considered
+the concept of the Platonic ideal. The Platonic ideal bird is the bird
+that is most bird-like or the chair that is most chair-like. In some
+sense, the task in clustering is to define different clusters, by
+finding their Platonic ideal (known as the cluster center) and allocate
+each data point to the relevant cluster center. So, allocate each animal
+to the class defined by its nearest cluster center.</p>
+<p>To perform clustering on a computer we need to define a notion of
+either similarity or distance between the objects and their Platonic
+ideal, the cluster center. We normally assume that our objects are
+represented by vectors of data, <span class="math inline">\(\mathbf{
+x}_i\)</span>. Similarly, we represent our cluster center for category
+<span class="math inline">\(j\)</span> by a vector <span
+class="math inline">\(\boldsymbol{ \mu}_j\)</span>. This vector contains
+the ideal features of a bird, a chair, or whatever category <span
+class="math inline">\(j\)</span> is. In clustering we can either think
+in terms of similarity of the objects, or distances. We want objects
+that are similar to each other to cluster together. We want objects that
+are distant from each other to cluster apart.</p>
+<p>This requires us to formalize our notion of similarity or distance.
+Let’s focus on distances. A definition of distance between an object,
+<span class="math inline">\(i\)</span>, and the cluster center of class
+<span class="math inline">\(j\)</span> is a function of two vectors, the
+data point, <span class="math inline">\(\mathbf{ x}_i\)</span> and the
+cluster center, <span class="math inline">\(\boldsymbol{
+\mu}_j\)</span>, <span class="math display">\[
+d_{ij} = f(\mathbf{ x}_i, \boldsymbol{ \mu}_j).
+\]</span> Our objective is then to find cluster centers that are close
+to as many data points as possible. For example, we might want to
+cluster customers into their different tastes. We could represent each
+customer by the products they’ve purchased in the past. This could be a
+binary vector <span class="math inline">\(\mathbf{ x}_i\)</span>. We can
+then define a distance between the cluster center and the customer.</p>
 <h3 id="squared-distance">Squared Distance</h3>
-<p>A commonly used distance is the squared distance, <br /><span class="math display">$$
-\distanceScalar_{ij} = (\inputVector_i - \meanVector_j)^2.
-$$</span><br /> The squared distance comes up a lot in machine learning. In unsupervised learning it was used to measure dissimilarity between predictions and observed data. Here its being used to measure the dissimilarity between a cluster center and the data.</p>
-<p>Once we have decided on the distance or similarity function, we can decide a number of cluster centers, <span class="math inline"><em>K</em></span>. We find their location by allocating each center to a sub-set of the points and minimizing the sum of the squared errors, <br /><span class="math display">$$
-\errorFunction(\meanMatrix) = \sum_{i \in \mathbf{i}_j} (\inputVector_i - \meanVector_j)^2
-$$</span><br /> where the notation <span class="math inline"><strong>i</strong><sub><em>j</em></sub></span> represents all the indices of each data point which has been allocated to the <span class="math inline"><em>j</em></span>th cluster represented by the center <span class="math inline">$\meanVector_j$</span>.</p>
-<h3 id="k-means-clustering"><span class="math inline"><em>k</em></span>-Means Clustering</h3>
-<p>One approach to minimizing this objective function is known as <em><span class="math inline"><em>k</em></span>-means clustering</em>. It is simple and relatively quick to implement, but it is an initialization sensitive algorithm. Initialization is the process of choosing an initial set of parameters before optimization. For <span class="math inline"><em>k</em></span>-means clustering you need to choose an initial set of centers. In <span class="math inline"><em>k</em></span>-means clustering your final set of clusters is very sensitive to the initial choice of centers. For more technical details on <span class="math inline"><em>k</em></span>-means clustering you can watch a video of Alex Ihler introducing the algorithm here.</p>
-<h3 id="k-means-clustering-1"><span class="math inline"><em>k</em></span>-Means Clustering</h3>
+<p>A commonly used distance is the squared distance, <span
+class="math display">\[
+d_{ij} = (\mathbf{ x}_i - \boldsymbol{ \mu}_j)^2.
+\]</span> The squared distance comes up a lot in machine learning. In
+unsupervised learning it was used to measure dissimilarity between
+predictions and observed data. Here its being used to measure the
+dissimilarity between a cluster center and the data.</p>
+<p>Once we have decided on the distance or similarity function, we can
+decide a number of cluster centers, <span
+class="math inline">\(K\)</span>. We find their location by allocating
+each center to a sub-set of the points and minimizing the sum of the
+squared errors, <span class="math display">\[
+E(\mathbf{M}) = \sum_{i \in \mathbf{i}_j} (\mathbf{ x}_i - \boldsymbol{
+\mu}_j)^2
+\]</span> where the notation <span
+class="math inline">\(\mathbf{i}_j\)</span> represents all the indices
+of each data point which has been allocated to the <span
+class="math inline">\(j\)</span>th cluster represented by the center
+<span class="math inline">\(\boldsymbol{ \mu}_j\)</span>.</p>
+<h3 id="k-means-clustering"><span class="math inline">\(k\)</span>-Means
+Clustering</h3>
+<p>One approach to minimizing this objective function is known as
+<em><span class="math inline">\(k\)</span>-means clustering</em>. It is
+simple and relatively quick to implement, but it is an initialization
+sensitive algorithm. Initialization is the process of choosing an
+initial set of parameters before optimization. For <span
+class="math inline">\(k\)</span>-means clustering you need to choose an
+initial set of centers. In <span class="math inline">\(k\)</span>-means
+clustering your final set of clusters is very sensitive to the initial
+choice of centers. For more technical details on <span
+class="math inline">\(k\)</span>-means clustering you can watch a video
+of Alex Ihler introducing the algorithm here.</p>
+<h3 id="k-means-clustering-1"><span
+class="math inline">\(k\)</span>-Means Clustering</h3>
 <div class="figure">
 <div id="kmeans-clustering-13-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/kmeans-clustering/kmeans_clustering_013.svg" width="\width" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/kmeans_clustering_013.svg" width="\width" style=" ">
 </object>
 </div>
-<div id="kmeans-clustering-13-magnify" class="magnify" onclick="magnifyFigure(&#39;kmeans-clustering-13&#39;)">
+<div id="kmeans-clustering-13-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kmeans-clustering-13&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kmeans-clustering-13-caption" class="caption-frame">
-<p>Figure: Clustering with the <span class="math inline"><em>k</em></span>-means clustering algorithm.</p>
+<p>Figure: Clustering with the <span
+class="math inline">\(k\)</span>-means clustering algorithm.</p>
 </div>
 </div>
 <div class="figure">
 <div id="k-means-clustering-figure" class="figure-frame">
-<iframe width="800" height="600" src="https://www.youtube.com/embed/mfqmoUN-Cuw?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
+<iframe width="600" height="450" src="https://www.youtube.com/embed/mfqmoUN-Cuw?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
-<div id="k-means-clustering-magnify" class="magnify" onclick="magnifyFigure(&#39;k-means-clustering&#39;)">
+<div id="k-means-clustering-magnify" class="magnify"
+onclick="magnifyFigure(&#39;k-means-clustering&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="k-means-clustering-caption" class="caption-frame">
-<p>Figure: <span class="math inline"><em>k</em></span>-means clustering by Alex Ihler.</p>
+<p>Figure: <span class="math inline">\(k\)</span>-means clustering by
+Alex Ihler.</p>
 </div>
 </div>
 <h3 id="hierarchical-clustering">Hierarchical Clustering</h3>
-<p>Other approaches to clustering involve forming taxonomies of the cluster centers, like humans apply to animals, to form trees. You can learn more about agglomerative clustering in this video from Alex Ihler.</p>
+<p>Other approaches to clustering involve forming taxonomies of the
+cluster centers, like humans apply to animals, to form trees. You can
+learn more about agglomerative clustering in this video from Alex
+Ihler.</p>
 <div class="figure">
-<div id="alex-ihler-hierarchical-clustering-figure" class="figure-frame">
-<iframe width="800" height="600" src="https://www.youtube.com/embed/OcoE7JlbXvY?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
+<div id="alex-ihler-hierarchical-clustering-figure"
+class="figure-frame">
+<iframe width="600" height="450" src="https://www.youtube.com/embed/OcoE7JlbXvY?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
-<div id="alex-ihler-hierarchical-clustering-magnify" class="magnify" onclick="magnifyFigure(&#39;alex-ihler-hierarchical-clustering&#39;)">
+<div id="alex-ihler-hierarchical-clustering-magnify" class="magnify"
+onclick="magnifyFigure(&#39;alex-ihler-hierarchical-clustering&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="alex-ihler-hierarchical-clustering-caption" class="caption-frame">
+<div id="alex-ihler-hierarchical-clustering-caption"
+class="caption-frame">
 <p>Figure: Hierarchical Clustering by Alex Ihler.</p>
 </div>
 </div>
 <h3 id="phylogenetic-trees">Phylogenetic Trees</h3>
-<p>Indeed, one application of machine learning techniques is performing a hierarchical clustering based on genetic data, i.e. the actual contents of the genome. If we do this across a number of species then we can produce a <em>phylogeny</em>. The phylogeny aims to represent the actual evolution of the species and some phylogenies even estimate the timing of the common ancestor between two species<a href="#fn4" class="footnote-ref" id="fnref4" role="doc-noteref"><sup>4</sup></a>. Similar methods are used to estimate the origin of viruses like AIDS or Bird flu which mutate very quickly. Determining the origin of viruses can be important in containing or treating outbreaks.</p>
+<p>Indeed, one application of machine learning techniques is performing
+a hierarchical clustering based on genetic data, i.e. the actual
+contents of the genome. If we do this across a number of species then we
+can produce a <em>phylogeny</em>. The phylogeny aims to represent the
+actual evolution of the species and some phylogenies even estimate the
+timing of the common ancestor between two species<a href="#fn5"
+class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a>.
+Similar methods are used to estimate the origin of viruses like AIDS or
+Bird flu which mutate very quickly. Determining the origin of viruses
+can be important in containing or treating outbreaks.</p>
 <h3 id="product-clustering">Product Clustering</h3>
-<p>An e-commerce company could apply hierarchical clustering to all its products. That would give a phylogeny of products. Each cluster of products would be split into sub-clusters of products until we got down to individual products. For example, we might expect a high level split to be Electronics/Clothing. Of course, a challenge with these tree-like structures is that many products belong in more than one parent cluster: for example running shoes should be in more than one group, they are ‘sporting goods’ and they are ‘apparel’. A tree structure doesn’t allow this allocation.</p>
-<h3 id="hierarchical-clustering-challenge">Hierarchical Clustering Challenge</h3>
-<p>Our own psychological grouping capabilities are studied as a domain of cognitive science. Researchers like Josh Tenenbaum have developed algorithms that decompose data in more complex ways, but they can normally only be applied to smaller data sets.</p>
+<p>An e-commerce company could apply hierarchical clustering to all its
+products. That would give a phylogeny of products. Each cluster of
+products would be split into sub-clusters of products until we got down
+to individual products. For example, we might expect a high level split
+to be Electronics/Clothing. Of course, a challenge with these tree-like
+structures is that many products belong in more than one parent cluster:
+for example running shoes should be in more than one group, they are
+‘sporting goods’ and they are ‘apparel’. A tree structure doesn’t allow
+this allocation.</p>
+<h3 id="hierarchical-clustering-challenge">Hierarchical Clustering
+Challenge</h3>
+<p>Our own psychological grouping capabilities are studied as a domain
+of cognitive science. Researchers like Josh Tenenbaum have developed
+algorithms that decompose data in more complex ways, but they can
+normally only be applied to smaller data sets.</p>
+<h2 id="other-clustering-approaches">Other Clustering Approaches</h2>
+<ul>
+<li>Spectral clustering (<span class="citation"
+data-cites="Shi:normalized00">Shi and Malik (2000)</span>,<span
+class="citation" data-cites="Ng:spectral02">Ng et al. (n.d.)</span>)
+<ul>
+<li>Allows clusters which aren’t convex hulls.</li>
+</ul></li>
+<li>Dirichlet process
+<ul>
+<li>A probabilistic formulation for a clustering algorithm that is
+<em>non-parametric</em>.</li>
+<li>Loosely speaking it allows infinite clusters</li>
+<li>In practice useful for dealing with previously unknown species
+(e.g. a “Black Swan Event”).</li>
+</ul></li>
+</ul>
 <h2 id="dimensionality-reduction">Dimensionality Reduction</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/dimensionality-reduction.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/dimensionality-reduction.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Dimensionality reduction methods compress the data by replacing the original data with a reduced number of continuous variables. One way of thinking of these methods is to imagine a marionette.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_dimred/includes/dimensionality-reduction-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_dimred/includes/dimensionality-reduction-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Dimensionality reduction methods compress the data by replacing the
+original data with a reduced number of continuous variables. One way of
+thinking of these methods is to imagine a marionette.</p>
 <div class="figure">
 <div id="marionette-figure" class="figure-frame">
-<object class data="../slides/diagrams/ml/marionette.svg" width="40%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/marionette.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="marionette-magnify" class="magnify" onclick="magnifyFigure(&#39;marionette&#39;)">
+<div id="marionette-magnify" class="magnify"
+onclick="magnifyFigure(&#39;marionette&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="marionette-caption" class="caption-frame">
-<p>Figure: Thinking of dimensionality reduction as a marionette. We observe the high dimensional pose of the puppet, <span class="math inline">$\inputVector$</span>, but the movement of the puppeteer’s hand, <span class="math inline">$\latentVector$</span> remains hidden to us. Dimensionality reduction aims to recover those hidden movements which generated the observations.</p>
-</div>
-</div>
-<p>The position of each body part of a marionette could be thought of as our data, <span class="math inline">$\inputVector_i$</span>. So, each data point consists of the 3-D co-ordinates of all the different body parts of the marionette. Let’s say there are 13 different body parts (2 each of feet, knees, hips, hands, elbows, shoulders, one head). Each body part has an x, y, z position in Cartesian coordinates. So that’s 39 numbers associated with each observation.</p>
-<p>The movement of these 39 parts is determined by the puppeteer via strings. Let’s assume it’s a very simple puppet, with just one stick to control it. The puppeteer can move the stick up and down, left and right. And they can twist it. This gives three parameters in the puppeteers control. This implies that the 39 variables we see moving are controlled by only 3 variables. These 3 variables are often called the hidden or <em>latent variables</em>.</p>
-<p>Dimensionality reduction assumes something similar for real world data. It assumes that the data we observe is generated from some lower dimensional underlying process. It then seeks to recover the values associated with this low dimensional process.</p>
+<p>Figure: Thinking of dimensionality reduction as a marionette. We
+observe the high dimensional pose of the puppet, <span
+class="math inline">\(\mathbf{ x}\)</span>, but the movement of the
+puppeteer’s hand, <span class="math inline">\(\mathbf{ z}\)</span>
+remains hidden to us. Dimensionality reduction aims to recover those
+hidden movements which generated the observations.</p>
+</div>
+</div>
+<p>The position of each body part of a marionette could be thought of as
+our data, <span class="math inline">\(\mathbf{ x}_i\)</span>. So, each
+data point consists of the 3-D co-ordinates of all the different body
+parts of the marionette. Let’s say there are 13 different body parts (2
+each of feet, knees, hips, hands, elbows, shoulders, one head). Each
+body part has an x, y, z position in Cartesian coordinates. So that’s 39
+numbers associated with each observation.</p>
+<p>The movement of these 39 parts is determined by the puppeteer via
+strings. Let’s assume it’s a very simple puppet, with just one stick to
+control it. The puppeteer can move the stick up and down, left and
+right. And they can twist it. This gives three parameters in the
+puppeteers control. This implies that the 39 variables we see moving are
+controlled by only 3 variables. These 3 variables are often called the
+hidden or <em>latent variables</em>.</p>
+<p>Dimensionality reduction assumes something similar for real world
+data. It assumes that the data we observe is generated from some lower
+dimensional underlying process. It then seeks to recover the values
+associated with this low dimensional process.</p>
 <h3 id="examples-in-social-sciences">Examples in Social Sciences</h3>
-<p>Dimensionality reduction techniques underpin a lot of psychological scoring tests such as IQ tests or personality tests. An IQ test can involve several hundred questions, potentially giving a rich, high dimensional, characterization of some aspects of your intelligence. It is then summarized by a single number. Similarly, the Myers-Briggs personality test involves answering questions about preferences which are reduced to a set of numbers reflecting personality.</p>
-<p>These tests are assuming that our intelligence is implicitly one-dimensional and that our personality is implicitly four dimensional. Other examples include political belief which is typically represented on a left to right scale. A one-dimensional distillation of an entire philosophy about how a country should be run. Our own leadership principles imply that our decisions have a fourteen-dimensional space underlying them. Each decision could be characterized by judging to what extent it embodies each of the principles.</p>
-<p>Political belief, personality, intelligence, leadership. None of these exist as a directly measurable quantity in the real world, rather they are inferred based on measurables. Dimensionality reduction is the process of allowing the computer to automatically find such underlying dimensions. This automatically allowing us to characterize each data point according to those explanatory variables. Each of these characteristics can be scored, and individuals can then be turned into vectors.</p>
-<p>This doesn’t only apply to individuals, in recent years work on language modeling has taken a similar approach to words. The <a href="https://arxiv.org/abs/1301.3781">word2vec</a> algorithm performed a dimensionality reduction on words, now you can take any word and map it to a latent space where similar words exhibit similar characteristics. A personality space for words.</p>
+<p>Dimensionality reduction techniques underpin a lot of psychological
+scoring tests such as IQ tests or personality tests. An IQ test can
+involve several hundred questions, potentially giving a rich, high
+dimensional, characterization of some aspects of your intelligence. It
+is then summarized by a single number. Similarly, the Myers-Briggs
+personality test involves answering questions about preferences which
+are reduced to a set of numbers reflecting personality.</p>
+<p>These tests are assuming that our intelligence is implicitly
+one-dimensional and that our personality is implicitly four dimensional.
+Other examples include political belief which is typically represented
+on a left to right scale. A one-dimensional distillation of an entire
+philosophy about how a country should be run. Our own leadership
+principles imply that our decisions have a fourteen-dimensional space
+underlying them. Each decision could be characterized by judging to what
+extent it embodies each of the principles.</p>
+<p>Political belief, personality, intelligence, leadership. None of
+these exist as a directly measurable quantity in the real world, rather
+they are inferred based on measurables. Dimensionality reduction is the
+process of allowing the computer to automatically find such underlying
+dimensions. This automatically allowing us to characterize each data
+point according to those explanatory variables. Each of these
+characteristics can be scored, and individuals can then be turned into
+vectors.</p>
+<p>This doesn’t only apply to individuals, in recent years work on
+language modeling has taken a similar approach to words. The <a
+href="https://arxiv.org/abs/1301.3781">word2vec</a> algorithm performed
+a dimensionality reduction on words, now you can take any word and map
+it to a latent space where similar words exhibit similar
+characteristics. A ‘personality space’ for words.</p>
 <h3 id="principal-component-analysis">Principal Component Analysis</h3>
-<p>Principal component analysis (PCA) is arguably the queen of dimensionality reduction techniques. PCA was developed as an approach to dimensionality reduction in 1930s by Hotelling as a method for the social sciences. In Hotelling’s formulation of PCA it was assumed that any data point, <span class="math inline"><strong>x</strong></span> could be represented as a weighted sum of the latent factors of interest, so that Hotelling described prediction functions (like in regression and classification above), only the regression is now <em>multiple output</em>. And instead of predicting a label, <span class="math inline"><em>y</em><sub><em>i</em></sub></span>, we now try and force the regression to predict the observed feature vector, <span class="math inline">$\dataVector_i$</span>. So, for example, on an IQ test we would try and predict subject <span class="math inline"><em>i</em></span>’s answer to the <span class="math inline"><em>j</em></span>th question with the following function <br /><span class="math display">$$
-\dataScalar_{ij} = \mappingFunction_j(\latentScalar_i; \weightVector).
-$$</span><br /> Here <span class="math inline"><em>z</em><sub><em>i</em></sub></span> would be the IQ of subject <span class="math inline"><em>i</em></span> and <span class="math inline">$\mappingFunction_j(\cdot)$</span> would be a function representing the relationship between the subject’s IQ and their score on the answer to question <span class="math inline"><em>j</em></span>. This function is the same for all subjects, but the subject’s IQ is assumed to differ leading to different scores for each subject.</p>
+<p>Principal component analysis (PCA) is arguably the queen of
+dimensionality reduction techniques. PCA was developed as an approach to
+dimensionality reduction in 1930s by Hotelling as a method for the
+social sciences. In Hotelling’s formulation of PCA it was assumed that
+any data point, <span class="math inline">\(\mathbf{x}\)</span> could be
+represented as a weighted sum of the latent factors of interest, so that
+Hotelling described prediction functions (like in regression and
+classification above), only the regression is now <em>multiple
+output</em>. And instead of predicting a label, <span
+class="math inline">\(y_i\)</span>, we now try and force the regression
+to predict the observed feature vector, <span
+class="math inline">\(\mathbf{ y}_i\)</span>. So, for example, on an IQ
+test we would try and predict subject <span
+class="math inline">\(i\)</span>’s answer to the <span
+class="math inline">\(j\)</span>th question with the following function
+<span class="math display">\[
+y_{ij} = f_j(z_i; \mathbf{ w}).
+\]</span> Here <span class="math inline">\(z_i\)</span> would be the IQ
+of subject <span class="math inline">\(i\)</span> and <span
+class="math inline">\(f_j(\cdot)\)</span> would be a function
+representing the relationship between the subject’s IQ and their score
+on the answer to question <span class="math inline">\(j\)</span>. This
+function is the same for all subjects, but the subject’s IQ is assumed
+to differ leading to different scores for each subject.</p>
 <div class="figure">
 <div id="dem-manifold-print-all-1-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/demManifoldPrint_all_1_2.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/demManifoldPrint_all_1_2.svg" width="60%" style=" ">
 </object>
 </div>
-<div id="dem-manifold-print-all-1-2-magnify" class="magnify" onclick="magnifyFigure(&#39;dem-manifold-print-all-1-2&#39;)">
+<div id="dem-manifold-print-all-1-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;dem-manifold-print-all-1-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="dem-manifold-print-all-1-2-caption" class="caption-frame">
-<p>Figure: Visualization of the first two principal components of an artificial data set. The data was generated by taking an image of a handwritten digit, 6, and rotating it 360 times, one degree each time. The first two principal components have been extracted in the diagram. The underlying circular shape is derived from the rotation of the data. Each image in the data set is projected on to the location its projected to in the latent space.</p>
+<p>Figure: Visualization of the first two principal components of an
+artificial data set. The data was generated by taking an image of a
+handwritten digit, 6, and rotating it 360 times, one degree each time.
+The first two principal components have been extracted in the diagram.
+The underlying circular shape is derived from the rotation of the data.
+Each image in the data set is projected on to the location its projected
+to in the latent space.</p>
 </div>
 </div>
 <h3 id="hotellings-pca">Hotelling’s PCA</h3>
-<p>In Hotelling’s formulation he assumed that the function was a linear function. This idea is taken from a wider field known as <em>factor analysis</em>, so Hotelling described the challenge as <br /><span class="math display">$$
-\mappingFunction_j(\latentScalar_i; \weightVector) = \weightScalar_j \latentScalar_i
-$$</span><br /> so the answer to the <span class="math inline"><em>j</em></span>th question is predicted to be a scaling of the subject’s IQ. The scale factor is given by <span class="math inline">$\weightScalar_j$</span>. If there are more latent dimensions then a matrix of parameters, <span class="math inline">$\weightMatrix$</span> is used, for example if there were two latent dimensions, we’d have <br /><span class="math display">$$
-\mappingFunction_j(\mathbf{\latentScalar}_i; \weightMatrix) = \weightScalar_{1j} \latentScalar_{1i} + \weightScalar_{2j} \latentScalar_{2i}
-$$</span><br /> where, if this were a personality test, then <span class="math inline">$\latentScalar_{1i}$</span> might represent the spectrum over a subject’s extrovert/introvert and <span class="math inline">$\latentScalar_{2i}$</span> might represent where the subject was on the rational/perceptual scale. The function would make a prediction about the subjects answer to a particular question on the test (e.g. preference for office job vs preference for outdoor job). In factor analysis the parameters <span class="math inline">$\weightMatrix$</span> are known as the factor <em>loadings</em> and in PCA they are known as the principal components.</p>
+<p>In Hotelling’s formulation he assumed that the function was a linear
+function. This idea is taken from a wider field known as <em>factor
+analysis</em>, so Hotelling described the challenge as <span
+class="math display">\[
+f_j(z_i; \mathbf{ w}) = w_j z_i
+\]</span> so the answer to the <span class="math inline">\(j\)</span>th
+question is predicted to be a scaling of the subject’s IQ. The scale
+factor is given by <span class="math inline">\(w_j\)</span>. If there
+are more latent dimensions then a matrix of parameters, <span
+class="math inline">\(\mathbf{W}\)</span> is used, for example if there
+were two latent dimensions, we’d have <span class="math display">\[
+f_j(\mathbf{z}_i; \mathbf{W}) = w_{1j} z_{1i} + w_{2j} z_{2i}
+\]</span> where, if this were a personality test, then <span
+class="math inline">\(z_{1i}\)</span> might represent the spectrum over
+a subject’s extrovert/introvert and <span
+class="math inline">\(z_{2i}\)</span> might represent where the subject
+was on the rational/perceptual scale. The function would make a
+prediction about the subjects answer to a particular question on the
+test (e.g. preference for office job vs preference for outdoor job). In
+factor analysis the parameters <span
+class="math inline">\(\mathbf{W}\)</span> are known as the factor
+<em>loadings</em> and in PCA they are known as the principal
+components.</p>
 <h3 id="parameters">Parameters</h3>
-<p>Fitting the model involves finding estimates for the loadings, <span class="math inline">$\weightMatrix$</span>, and latent variables, <span class="math inline">$\latentMatrix$</span>. There are different approaches including least squares. The least squares approach is used, for example, in recommender systems. In recommender systems this method is called <em>matrix factorization</em>. The customer characteristics, <span class="math inline">$\dataVector_i$</span> is the customer rating for each different product (or item) and the latent variables can be seen as a space of customer preferences. In the recommender system case, the loadings matrix also has an interpretation as product similarities.<a href="#fn5" class="footnote-ref" id="fnref5" role="doc-noteref"><sup>5</sup></a> Recommender systems have a particular characteristic in that most of the entries of the vector <span class="math inline">$\dataVector_i$</span> are missing most of the time.</p>
-<p>In PCA and factor analysis the unknown latent factors are dealt with through a probability distribution. They are each assumed to be drawn from a zero mean, unit variance normal distribution. This leaves the factor loadings to be estimated. For PCA the maximum likelihood solution for the factor loadings can be shown to be given by the <em>eigenvalue decomposition</em> of the data covariance matrix. This is algorithmically simple and convenient, although slow to compute for very large data sets with many features and many subjects. The eigenvalue problem can also be derived from many other starting points: e.g. the directions of maximum variance in the data or finding a latent space that best preserves inter-point distances between the data, or the optimal linear compression of the data given a linear reconstruction. These many and varied justifications for the eigenvalue decomposition may account for the popularity of PCA. Indeed, there is even an interpretation for Google’s original PageRank algorithm (which computed the <em>smallest</em> eigenvector of the internet’s linkage matrix) as seeking the dominant principal component of the web.<a href="#fn6" class="footnote-ref" id="fnref6" role="doc-noteref"><sup>6</sup></a></p>
-<p>Characterizing users according to past buying behavior and combining this with characteristics about products, is key to making good recommendations and returning useful search results. Further advances can be made if we understand the context of a particular session. For example, if a user is buying Christmas presents and searches for a dress, then it could be the case that the user is willing to spend a little more on the dress than in normal circumstances. Characterizing these effects requires more data and more complex algorithms. However, in domains such a search we are normally constrained by the speed with which we need to return results. Accounting for each of these factors while returning results with acceptable latency is a particular challenge.</p>
+<p>Fitting the model involves finding estimates for the loadings, <span
+class="math inline">\(\mathbf{W}\)</span>, and latent variables, <span
+class="math inline">\(\mathbf{Z}\)</span>. There are different
+approaches including least squares. The least squares approach is used,
+for example, in recommender systems. In recommender systems this method
+is called <em>matrix factorization</em>. The customer characteristics,
+<span class="math inline">\(\mathbf{ y}_i\)</span> is the customer
+rating for each different product (or item) and the latent variables can
+be seen as a space of customer preferences. In the recommender system
+case, the loadings matrix also has an interpretation as product
+similarities.<a href="#fn6" class="footnote-ref" id="fnref6"
+role="doc-noteref"><sup>6</sup></a> Recommender systems have a
+particular characteristic in that most of the entries of the vector
+<span class="math inline">\(\mathbf{ y}_i\)</span> are missing most of
+the time.</p>
+<p>In PCA and factor analysis the unknown latent factors are dealt with
+through a probability distribution. They are each assumed to be drawn
+from a zero mean, unit variance normal distribution. This leaves the
+factor loadings to be estimated. For PCA the maximum likelihood solution
+for the factor loadings can be shown to be given by the <em>eigenvalue
+decomposition</em> of the data covariance matrix. This is
+algorithmically simple and convenient, although slow to compute for very
+large data sets with many features and many subjects. The eigenvalue
+problem can also be derived from many other starting points: e.g. the
+directions of maximum variance in the data or finding a latent space
+that best preserves inter-point distances between the data, or the
+optimal linear compression of the data given a linear reconstruction.
+These many and varied justifications for the eigenvalue decomposition
+may account for the popularity of PCA. Indeed, there is even an
+interpretation for Google’s original PageRank algorithm (which computed
+the <em>smallest</em> eigenvector of the internet’s linkage matrix) as
+seeking the dominant principal component of the web.<a href="#fn7"
+class="footnote-ref" id="fnref7" role="doc-noteref"><sup>7</sup></a></p>
+<p>Characterizing users according to past buying behavior and combining
+this with characteristics about products, is key to making good
+recommendations and returning useful search results. Further advances
+can be made if we understand the context of a particular session. For
+example, if a user is buying Christmas presents and searches for a
+dress, then it could be the case that the user is willing to spend a
+little more on the dress than in normal circumstances. Characterizing
+these effects requires more data and more complex algorithms. However,
+in domains such a search we are normally constrained by the speed with
+which we need to return results. Accounting for each of these factors
+while returning results with acceptable latency is a particular
+challenge.</p>
 <h1 id="reinforcement-learning">Reinforcement Learning</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/reinforcement-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/reinforcement-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The final domain of learning we will review is known as reinforcement learning. The domain of reinforcement learning is one that many researchers seem to believe is offering a route to <em>general intelligence</em>. The idea of general intelligence is to develop algorithms that are adaptable to many different circumstances. Supervised learning algorithms are designed to resolve particular challenges. Data is annotated with those challenges in mind. Unsupervised attempts to build representations without any context. But normally the algorithm designer has an understanding of what the broader objective is and designs the algorithms accordingly (for example, characterizing users). In reinforcement learning some context is given, in the form of a reward, but the reward is normally delayed. There may have been many actions that affected the outcome, but which actions had a positive effect and which a negative effect?</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/reinforcement-learning.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/reinforcement-learning.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The final domain of learning we will review is known as reinforcement
+learning. The domain of reinforcement learning is one that many
+researchers seem to believe is offering a route to <em>general
+intelligence</em>. The idea of general intelligence is to develop
+algorithms that are adaptable to many different circumstances.
+Supervised learning algorithms are designed to resolve particular
+challenges. Data is annotated with those challenges in mind.
+Unsupervised attempts to build representations without any context. But
+normally the algorithm designer has an understanding of what the broader
+objective is and designs the algorithms accordingly (for example,
+characterizing users). In reinforcement learning some context is given,
+in the form of a reward, but the reward is normally delayed. There may
+have been many actions that affected the outcome, but which actions had
+a positive effect and which a negative effect?</p>
 <h2 id="reward">“Reward”</h2>
 <ul>
-<li><p>In reinforcement learning some context is given, in the form of a reward. But it is often <em>delayed</em></p></li>
-<li><p>Credit allocation problem: many actions that affected the outcome, but which actions had a positive effect and which a negative effect?</p></li>
+<li><p>In reinforcement learning some context is given, in the form of a
+reward. But it is often <em>delayed</em></p></li>
+<li><p>Credit allocation problem: many actions that affected the
+outcome, but which actions had a positive effect and which a negative
+effect?</p></li>
 </ul>
-<p>One issue for many companies is that the best way of testing the customer experience, A/B testing, prioritizes short term reward. The internet is currently being driven by short term rewards which make it distracting in the short term, but perhaps less useful in the long term. Click-bait is an example, but there are more subtle effects. The success of Facebook is driven by its ability to draw us in when likely we should be doing something else. This is driven by large scale A/B testing.</p>
-<p>One open question is how to drive non-visual interfaces through equivalents to A/B testing. Speech interfaces, such as those used in intelligent agents, are less amenable to A/B testing when determining the quality of the interface. Improving interaction with them is therefore less exact science than the visual interface. Data efficient reinforcement learning methods are likely to be key to improving these agent’s ability to interact with the user and understand intent. However, they are not yet mature enough to be deployed in this application.</p>
+<p>One issue for many companies is that the best way of testing the
+customer experience, A/B testing, prioritizes short term reward. The
+internet is currently being driven by short term rewards which make it
+distracting in the short term, but perhaps less useful in the long term.
+Click-bait is an example, but there are more subtle effects. The success
+of Facebook is driven by its ability to draw us in when likely we should
+be doing something else. This is driven by large scale A/B testing.</p>
+<p>One open question is how to drive non-visual interfaces through
+equivalents to A/B testing. Speech interfaces, such as those used in
+intelligent agents, are less amenable to A/B testing when determining
+the quality of the interface. Improving interaction with them is
+therefore less exact science than the visual interface. Data efficient
+reinforcement learning methods are likely to be key to improving these
+agent’s ability to interact with the user and understand intent.
+However, they are not yet mature enough to be deployed in this
+application.</p>
 <h2 id="game-play">Game Play</h2>
-<p>An area where reinforcement learning methods have been deployed with high profile success is game play. In game play the reward is delayed to the end of the game, and it comes in the form of victory or defeat. A significant advantage of game play as an application area is that, through simulation of the game, it is possible to generate as much data as is required to solve the problem. For this reason, many of the recent advances in reinforcement learning have occurred with methods that are not data efficient.</p>
-<p>The company DeepMind is set up around reinforcement learning as an approach to general intelligence. All their most well-known achievements are centered around artificial intelligence in game play. In reinforcement learning a decision made at any given time have a downstream effect on the result. Whether the effect if beneficial or not is unknown until a future moment.</p>
-<p>We can think of reinforcement learning as providing a label, but the label is associated with a series of data involving a number of decisions taken. Each decision was taken given the understanding of game play at any given moment. Understanding which of these decisions was important in victory or defeat is a hard problem.</p>
-<p>In machine learning the process of understanding which decisions were beneficial and which were detrimental is known as the credit allocation problem. You wish to reward decisions that led to success to encourage them, but punish decisions that lead to failure.</p>
-<p>Broadly speaking, DeepMind uses an approach to Machine Learning where there are two mathematical functions at work. One determines the action to be taken at any given moment, the other estimates the quality of the board position at any given time. These are respectively known as the <em>policy network</em> and the <em>value network</em>.<a href="#fn7" class="footnote-ref" id="fnref7" role="doc-noteref"><sup>7</sup></a> DeepMind made use of convolutional neural networks for both these models.</p>
+<p>An area where reinforcement learning methods have been deployed with
+high profile success is game play. In game play the reward is delayed to
+the end of the game, and it comes in the form of victory or defeat. A
+significant advantage of game play as an application area is that,
+through simulation of the game, it is possible to generate as much data
+as is required to solve the problem. For this reason, many of the recent
+advances in reinforcement learning have occurred with methods that are
+not data efficient.</p>
+<p>The company DeepMind is set up around reinforcement learning as an
+approach to general intelligence. All their most well-known achievements
+are centered around artificial intelligence in game play. In
+reinforcement learning a decision made at any given time have a
+downstream effect on the result. Whether the effect if beneficial or not
+is unknown until a future moment.</p>
+<p>We can think of reinforcement learning as providing a label, but the
+label is associated with a series of data involving a number of
+decisions taken. Each decision was taken given the understanding of game
+play at any given moment. Understanding which of these decisions was
+important in victory or defeat is a hard problem.</p>
+<p>In machine learning the process of understanding which decisions were
+beneficial and which were detrimental is known as the credit allocation
+problem. You wish to reward decisions that led to success to encourage
+them, but punish decisions that lead to failure.</p>
+<p>Broadly speaking, DeepMind uses an approach to Machine Learning where
+there are two mathematical functions at work. One determines the action
+to be taken at any given moment, the other estimates the quality of the
+board position at any given time. These are respectively known as the
+<em>policy network</em> and the <em>value network</em>.<a href="#fn8"
+class="footnote-ref" id="fnref8" role="doc-noteref"><sup>8</sup></a>
+DeepMind made use of convolutional neural networks for both these
+models.</p>
 <h2 id="alphago">AlphaGo</h2>
-<p>The ancient Chinese game of Go was considered a challenge for artificial intelligence for two reasons. Firstly, the game tree has a very high branching factor. The game tree is a discrete representation of the game. Every node in the game tree is associated with a board position. You can move through the game tree by making legal a move on the board to change the position. In Go, there are so many legal moves that the game tree increases exponentially. This challenge in Go was addressed by using stochastic game tree search. Rather than exploring the game tree exhaustively they explored it randomly.</p>
-<p>Secondly, evaluating the quality of any given board position was deemed to be very hard.<a href="#fn8" class="footnote-ref" id="fnref8" role="doc-noteref"><sup>8</sup></a> The value function determines for each player whether they are winning or losing. Skilled Go players can assess a board position, but they do it by instinct, by intuition. Just as early AI researchers struggled to give rules for detecting cancer, it is challenging to give rules to assess a Go board. The machine learning approach that AlphaGo took is to train a value function network to make this assessment.</p>
-<p>The approach that DeepMind took to conquering Go is a <em>model-free</em> approach known as <em>Q-learning</em>.<a href="#fn9" class="footnote-ref" id="fnref9" role="doc-noteref"><sup>9</sup></a> The model-free approach refers to the fact that they don’t directly include a model of how the world evolves in the reinforcement learning algorithm. They make extensive use of the game tree, but they don’t model how it evolves. They do model the expected reward of each position in the game tree (the value function) but that is not the same as modeling how the game will proceed.</p>
-<h2 id="reinforcement-learning-and-classical-control">Reinforcement Learning and Classical Control</h2>
-<p>An alternative approach to reinforcement learning is to use a prediction function to suggest how the world will evolve in response to your actions. To predict how the game tree will evolve. You can then use this prediction to indirectly infer the expected reward associated with any action. This is known as <em>model-based</em> reinforcement learning.</p>
-<p>This model-based approach is also closer to a control system. A classical control system is one where you give the system a set point. For example, a thermostat in the house. You set the temperature and the boiler switches off when it reaches it. Optimal control is about getting the house to the right temperature as quickly as possible. Classical control is widely used in robotic control and flight control.</p>
-<p>One interesting crossover between classical control and machine learning arises because classical optimal control can be seen as a form of model-based reinforcement learning. One where the reward is recovered when the set point is reached. In control engineering the prediction function is known as the <em>transfer function</em>. The process of fitting the transfer function in control is known as <em>system identification</em>.</p>
-<p>There is some exciting work emerging at the interface between the areas of control and reinforcement learning. Results at this interface could be very important for improving the quality of robotic and drone control.</p>
+<p>The ancient Chinese game of Go was considered a challenge for
+artificial intelligence for two reasons. Firstly, the game tree has a
+very high branching factor. The game tree is a discrete representation
+of the game. Every node in the game tree is associated with a board
+position. You can move through the game tree by making legal a move on
+the board to change the position. In Go, there are so many legal moves
+that the game tree increases exponentially. This challenge in Go was
+addressed by using stochastic game tree search. Rather than exploring
+the game tree exhaustively they explored it randomly.</p>
+<p>Secondly, evaluating the quality of any given board position was
+deemed to be very hard.<a href="#fn9" class="footnote-ref" id="fnref9"
+role="doc-noteref"><sup>9</sup></a> The value function determines for
+each player whether they are winning or losing. Skilled Go players can
+assess a board position, but they do it by instinct, by intuition. Just
+as early AI researchers struggled to give rules for detecting cancer, it
+is challenging to give rules to assess a Go board. The machine learning
+approach that AlphaGo took is to train a value function network to make
+this assessment.</p>
+<p>The approach that DeepMind took to conquering Go is a
+<em>model-free</em> approach known as <em>Q-learning</em>.<a
+href="#fn10" class="footnote-ref" id="fnref10"
+role="doc-noteref"><sup>10</sup></a> The model-free approach refers to
+the fact that they don’t directly include a model of how the world
+evolves in the reinforcement learning algorithm. They make extensive use
+of the game tree, but they don’t model how it evolves. They do model the
+expected reward of each position in the game tree (the value function)
+but that is not the same as modeling how the game will proceed.</p>
+<h2 id="reinforcement-learning-and-classical-control">Reinforcement
+Learning and Classical Control</h2>
+<p>An alternative approach to reinforcement learning is to use a
+prediction function to suggest how the world will evolve in response to
+your actions. To predict how the game tree will evolve. You can then use
+this prediction to indirectly infer the expected reward associated with
+any action. This is known as <em>model-based</em> reinforcement
+learning.</p>
+<p>This model-based approach is also closer to a control system. A
+classical control system is one where you give the system a set point.
+For example, a thermostat in the house. You set the temperature and the
+boiler switches off when it reaches it. Optimal control is about getting
+the house to the right temperature as quickly as possible. Classical
+control is widely used in robotic control and flight control.</p>
+<p>One interesting crossover between classical control and machine
+learning arises because classical optimal control can be seen as a form
+of model-based reinforcement learning. One where the reward is recovered
+when the set point is reached. In control engineering the prediction
+function is known as the <em>transfer function</em>. The process of
+fitting the transfer function in control is known as <em>system
+identification</em>.</p>
+<p>There is some exciting work emerging at the interface between the
+areas of control and reinforcement learning. Results at this interface
+could be very important for improving the quality of robotic and drone
+control.</p>
 <h2 id="optimization-methods">Optimization Methods</h2>
-<p>As we implied above, reinforcement learning can also used to improve user experience. In that case the reward is gained when the user buys a product from us. This makes it closely allied to the area of optimization. Optimization of our user interfaces can be seen as a reinforcement learning task, but more commonly it is thought about separately in the domains of <em>Bayesian optimization</em> or <em>bandit learning</em>.</p>
-<p>We use optimization in machine learning to find the parameters of our models. We can do that because we have a mathematical representation of our objective function as a direct function of the parameters.</p>
-<p>Examples in this form of optimization include, what is the best user interface for presenting adverts? What is the best design for a front wing for an F1 racing car? Which product should I return top of the list in response to this user’s search?</p>
-<p>Bayesian optimization arises when we can’t directly relate the parameters in the system of interest to our objective through a mathematical function. For example, what is the mathematical function that relates a user’s experience to the probability that they will buy a product?</p>
+<p>As we implied above, reinforcement learning can also used to improve
+user experience. In that case the reward is gained when the user buys a
+product from us. This makes it closely allied to the area of
+optimization. Optimization of our user interfaces can be seen as a
+reinforcement learning task, but more commonly it is thought about
+separately in the domains of <em>Bayesian optimization</em> or
+<em>bandit learning</em>.</p>
+<p>We use optimization in machine learning to find the parameters of our
+models. We can do that because we have a mathematical representation of
+our objective function as a direct function of the parameters.</p>
+<p>Examples in this form of optimization include, what is the best user
+interface for presenting adverts? What is the best design for a front
+wing for an F1 racing car? Which product should I return top of the list
+in response to this user’s search?</p>
+<p>Bayesian optimization arises when we can’t directly relate the
+parameters in the system of interest to our objective through a
+mathematical function. For example, what is the mathematical function
+that relates a user’s experience to the probability that they will buy a
+product?</p>
 <h2 id="bayesian-optimization">Bayesian Optimization</h2>
-<p>One approach to these problems is to use machine learning methods to develop a <em>surrogate model</em> for the optimization task. The surrogate model is a prediction function that attempts to recreate the process we are finding hard to model. We try to simultaneously fit the surrogate model and optimize the process.</p>
+<p>One approach to these problems is to use machine learning methods to
+develop a <em>surrogate model</em> for the optimization task. The
+surrogate model is a prediction function that attempts to recreate the
+process we are finding hard to model. We try to simultaneously fit the
+surrogate model and optimize the process.</p>
 <h2 id="surrogate-models">Surrogate Models</h2>
-<p>Bayesian optimization methods use a <em>surrogate model</em> (normally a specific form of regression model). They use this to predict how the real system will perform. The surrogate model makes a prediction (with an estimate of the uncertainty) of what the response will be to any given input. Parameters to test are chosen by considering this prediction. Similar to reinforcement learning, this can be viewed as a <em>model-based</em> approach because the surrogate model can be seen as a model of the real world. In bandit methods strategies are determined without turning to a model to motivate them. They are <em>model free</em> methods.</p>
-<h2 id="model-based-and-model-free-performance">Model-Based and Model Free: Performance</h2>
-<p>Because of their different philosophies, if a class of prediction functions is chosen, then a model-based approach might have better average case performance. At least in terms of <em>data efficiency</em>. A model free approach may well have better worst-case performance though, because it makes less assumptions about the nature of the data. To put it another way, making assumptions about the data is helpful if they are right: and if the model is sensible they’ll be right on average. However, it is unhelpful if the model is wrong. Indeed, it could be actively damaging. Since we can’t usually guarantee the model is absolutely right, the worst-case performance of a model-based approach would be poor.</p>
-<p>We have introduced a range of machine learning approaches by focusing on their use of mathematical functions to replace manually coded systems of rules. The important characteristic of machine learning is that the form of these functions, as dictated by their parameters, is determined by acquiring data from the real world.</p>
+<p>Bayesian optimization methods use a <em>surrogate model</em>
+(normally a specific form of regression model). They use this to predict
+how the real system will perform. The surrogate model makes a prediction
+(with an estimate of the uncertainty) of what the response will be to
+any given input. Parameters to test are chosen by considering this
+prediction. Similar to reinforcement learning, this can be viewed as a
+<em>model-based</em> approach because the surrogate model can be seen as
+a model of the real world. In bandit methods strategies are determined
+without turning to a model to motivate them. They are <em>model
+free</em> methods.</p>
+<h2 id="model-based-and-model-free-performance">Model-Based and Model
+Free: Performance</h2>
+<p>Because of their different philosophies, if a class of prediction
+functions is chosen, then a model-based approach might have better
+average case performance. At least in terms of <em>data efficiency</em>.
+A model free approach may well have better worst-case performance
+though, because it makes less assumptions about the nature of the data.
+To put it another way, making assumptions about the data is helpful if
+they are right: and if the model is sensible they’ll be right on
+average. However, it is unhelpful if the model is wrong. Indeed, it
+could be actively damaging. Since we can’t usually guarantee the model
+is absolutely right, the worst-case performance of a model-based
+approach would be poor.</p>
+<p>We have introduced a range of machine learning approaches by focusing
+on their use of mathematical functions to replace manually coded systems
+of rules. The important characteristic of machine learning is that the
+form of these functions, as dictated by their parameters, is determined
+by acquiring data from the real world.</p>
 <h2 id="deployment">Deployment</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deployment.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/deployment.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The methods we have introduced are roughly speaking introduced in order of difficulty of deployment. While supervised learning is more involved in terms of collection of data, it is the most straightforward method to deploy once that data is recovered. For this reason, a major focus with supervised learning should always be on maintaining data quality, increasing the efficiency and accountability<a href="#fn10" class="footnote-ref" id="fnref10" role="doc-noteref"><sup>10</sup></a> of the data collection pipeline and the quality of features used.</p>
-<p>You can also check my blog post on <a href="http://inverseprobability.com/2017/01/12/data-readiness-levels">Data Readiness Levels</a>. and my blog post on <a href="http://inverseprobability.com/2018/11/05/the-3ds-of-machine-learning-systems-design">The 3Ds of Machine Learning Systems Design</a>..</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/deployment.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deployment.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The methods we have introduced are roughly speaking introduced in
+order of difficulty of deployment. While supervised learning is more
+involved in terms of collection of data, it is the most straightforward
+method to deploy once that data is recovered. For this reason, a major
+focus with supervised learning should always be on maintaining data
+quality, increasing the efficiency and accountability<a href="#fn11"
+class="footnote-ref" id="fnref11" role="doc-noteref"><sup>11</sup></a>
+of the data collection pipeline and the quality of features used.</p>
+<p>You can also check my blog post on <a
+href="http://inverseprobability.com/2017/01/12/data-readiness-levels">Data
+Readiness Levels</a>. and my blog post on <a
+href="http://inverseprobability.com/2018/11/05/the-3ds-of-machine-learning-systems-design">The
+3Ds of Machine Learning Systems Design</a>..</p>
 <h2 id="where-to-deploy">Where to Deploy?</h2>
-<p>In relation to what AI can and can’t do today Andrew Ng is quoted as saying:</p>
+<p>In relation to what AI can and can’t do today Andrew Ng is quoted as
+saying:</p>
 <blockquote>
-<p>If a typical person can do a mental task with less than one second of thought, we can probably automate it using AI either now or in the near future.<a href="#fn11" class="footnote-ref" id="fnref11" role="doc-noteref"><sup>11</sup></a> Andrew Ng</p>
+<p>If a typical person can do a mental task with less than one second of
+thought, we can probably automate it using AI either now or in the near
+future.<a href="#fn12" class="footnote-ref" id="fnref12"
+role="doc-noteref"><sup>12</sup></a> Andrew Ng</p>
 </blockquote>
 <h2 id="is-this-right">Is this Right?</h2>
-<p>I would broadly agree with this quote but only in the context of supervised learning. If a human expert takes around that amount of time, then it’s also likely we can acquire the data necessary to build a supervised learning algorithm that can emulate that human’s response.</p>
-<p>The picture with regard to unsupervised learning and reinforcement learning is more clouded.</p>
-<p>One observation is that for <em>supervised</em> learning we seem to be moving beyond the era where very deep machine learning expertise is required to deploy methods. A solid understanding of machine learning (say to Masters level) is certainly required, but the quality of the final result is likely more dependent on domain expertise and the quality of the data and the information processing pipeline. This seems part of a wider trend where some of the big successes in machine learning are moving rapidly from the domain of science to that of engineering.<a href="#fn12" class="footnote-ref" id="fnref12" role="doc-noteref"><sup>12</sup></a></p>
-<p>You can check my blog post on <a href="http://inverseprobability.com/2016/11/29/new-directions-in-kernels-and-gaussian-processes">New Directions in Kernels and Gaussian Processes</a>..</p>
-<p>So if we can only emulate tasks that humans take around a second to do, how are we managing to deliver on self driving cars? The answer is that we are constructing engineered systems from sub-components, each of which is a machine learning subsystem. But they are tied together as a component based system in line with our traditional engineering approach. This has an advantage that each component in the system can be verified before its inclusion. This is important for debugging and safety. But in practice we can expect these systems to be very brittle. A human adapts the way in which they drive the car across their lifetime. A human can react to other road users. In extreme situations, such as a car jacking, a human can set to one side normal patterns of behavior, and purposely crash their car to draw attention to the situation.</p>
-<p>Supervised machine learning solutions are normally trained offline. They do not adapt when deployed because this makes them less verifiable. But this compounds the brittleness of our solutions. By deploying our solutions we actually change the environment in which they operate. Therefore, it’s important that they can be quickly updated to reflect changing circumstances. This updating happens offline. For a complex mechanical system, such as a delivery drone, extensive testing of the system may be required when any component is updated. It is therefore imperative that these data processing pipelines are well documented so that they can be redeployed on demand.</p>
-<p>In practice there can be challenges with the false dichotomy between reproducibility and performance. It is likely that most of our data scientists are caring less about their ability to redeploy their pipelines and only about their ability to produce an algorithm that achieves a particular performance. A key question is how reproducible is that process? There is a <em>false</em> dichotomy because ensuring reproducibility will typically improve performance as it will make it easier to run a rigorous set of explorative experiments. A worry is that, currently, we do not have a way to quantify the scale of this potential problem within companies.</p>
+<p>I would broadly agree with this quote but only in the context of
+supervised learning. If a human expert takes around that amount of time,
+then it’s also likely we can acquire the data necessary to build a
+supervised learning algorithm that can emulate that human’s
+response.</p>
+<p>The picture with regard to unsupervised learning and reinforcement
+learning is more clouded.</p>
+<p>One observation is that for <em>supervised</em> learning we seem to
+be moving beyond the era where very deep machine learning expertise is
+required to deploy methods. A solid understanding of machine learning
+(say to Masters level) is certainly required, but the quality of the
+final result is likely more dependent on domain expertise and the
+quality of the data and the information processing pipeline. This seems
+part of a wider trend where some of the big successes in machine
+learning are moving rapidly from the domain of science to that of
+engineering.<a href="#fn13" class="footnote-ref" id="fnref13"
+role="doc-noteref"><sup>13</sup></a></p>
+<p>You can check my blog post on <a
+href="http://inverseprobability.com/2016/11/29/new-directions-in-kernels-and-gaussian-processes">New
+Directions in Kernels and Gaussian Processes</a>..</p>
+<p>So if we can only emulate tasks that humans take around a second to
+do, how are we managing to deliver on self driving cars? The answer is
+that we are constructing engineered systems from sub-components, each of
+which is a machine learning subsystem. But they are tied together as a
+component based system in line with our traditional engineering
+approach. This has an advantage that each component in the system can be
+verified before its inclusion. This is important for debugging and
+safety. But in practice we can expect these systems to be very brittle.
+A human adapts the way in which they drive the car across their
+lifetime. A human can react to other road users. In extreme situations,
+such as a car jacking, a human can set to one side normal patterns of
+behavior, and purposely crash their car to draw attention to the
+situation.</p>
+<p>Supervised machine learning solutions are normally trained offline.
+They do not adapt when deployed because this makes them less verifiable.
+But this compounds the brittleness of our solutions. By deploying our
+solutions we actually change the environment in which they operate.
+Therefore, it’s important that they can be quickly updated to reflect
+changing circumstances. This updating happens offline. For a complex
+mechanical system, such as a delivery drone, extensive testing of the
+system may be required when any component is updated. It is therefore
+imperative that these data processing pipelines are well documented so
+that they can be redeployed on demand.</p>
+<p>In practice there can be challenges with the false dichotomy between
+reproducibility and performance. It is likely that most of our data
+scientists are caring less about their ability to redeploy their
+pipelines and only about their ability to produce an algorithm that
+achieves a particular performance. A key question is how reproducible is
+that process? There is a <em>false</em> dichotomy because ensuring
+reproducibility will typically improve performance as it will make it
+easier to run a rigorous set of explorative experiments. A worry is
+that, currently, we do not have a way to quantify the scale of this
+potential problem within companies.</p>
 <h2 id="model-choice">Model Choice</h2>
-<p>Common to all machine learning methods is the initial choice of useful classes of functions. The deep learning revolution is associated with a particular class of mathematical functions that is proving very successful in what were seen to be challenging domains: speech, vision, language. This has meant that significant advances in problems that have been seen as hard have occurred in artificial intelligence.</p>
+<p>Common to all machine learning methods is the initial choice of
+useful classes of functions. The deep learning revolution is associated
+with a particular class of mathematical functions that is proving very
+successful in what were seen to be challenging domains: speech, vision,
+language. This has meant that significant advances in problems that have
+been seen as hard have occurred in artificial intelligence.</p>
 <!-- Machine learning solutions When we deploy our solutions in the real world, we find that the situation is more complex. ThereAnother potential problem with our rush to supervised learning solutions is the false dichotomy between reproducibility and performance. Across Amazon we are using data science to design solutions which are deployed into production.  -->
 <!-- It also requires more expertise on the machine learning side to develop and deploy solutions in un, and requires more expertise.  -->
 <!-- such as avoiding a crash, to deliberately ram into another vehicle -->
 <!-- To deliver complex solutions, like self driving cars, many sub-components from a  -->
 <!-- Domain expertise becomWith regard to deIn particular, we are moving beyond the era where there is a short -->
 <h2 id="thanks">Thanks!</h2>
-<p>For more information on these subjects and more you might want to check the following resources.</p>
+<p>For more information on these subjects and more you might want to
+check the following resources.</p>
 <ul>
 <li>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></li>
-<li>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></li>
-<li>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></li>
-<li>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
+<li>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></li>
+<li>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></li>
+<li>blog: <a
+href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
 </ul>
 <h1 class="unnumbered" id="references">References</h1>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Andrade:consistent14">
-<p>Andrade-Pacheco, Ricardo, Martin Mubangizi, John Quinn, and Neil D. Lawrence. 2014. “Consistent Mapping of Government Malaria Records Across a Changing Territory Delimitation.” <em>Malaria Journal</em> 13 (Suppl 1). <a href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>.</p>
-</div>
-<div id="ref-Cooper:transformation91">
-<p>Cooper, Brian. 1991. <em>Transformation of a Valley: Derbyshire Derwent</em>. Scarthin Books.</p>
-</div>
-<div id="ref-Gelman:bayesian13">
-<p>Gelman, Andrew, John B. Carlin, Hal S. Stern, and Donald B. Rubin. 2013. <em>Bayesian Data Analysis</em>. 3rd ed. Chapman; Hall.</p>
-</div>
-<div id="ref-Gething:hmis06">
-<p>Gething, Peter W., Abdisalan M. Noor, Priscilla W. Gikandi, Esther A. A. Ogara, Simon I. Hay, Mark S. Nixon, Robert W. Snow, and Peter M. Atkinson. 2006. “Improving Imperfect Data from Health Management Information Systems in Africa Using Space–Time Geostatistics.” <em>PLoS Medicine</em> 3 (6). <a href="https://doi.org/10.1371/journal.pmed.0030271">https://doi.org/10.1371/journal.pmed.0030271</a>.</p>
-</div>
-<div id="ref-Lawrence:dsa15">
-<p>Lawrence, Neil D. 2015. “How Africa Can Benefit from the Data Revolution.” The Guardian Media &amp; Tech Network. <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information</a>.</p>
-</div>
-<div id="ref-McCulloch:neuron43">
-<p>McCulloch, Warren S., and Walter Pitts. 1943. “A Logical Calculus of the Ideas Immanent in Nervous Activity.” <em>Bulletin of Mathematical Biophysics</em> 5: 115–33.</p>
-</div>
-<div id="ref-Mubangizi:malaria14">
-<p>Mubangizi, Martin, Ricardo Andrade-Pacheco, Michael Thomas Smith, John Quinn, and Neil D. Lawrence. 2014. “Malaria Surveillance with Multiple Data Sources Using Gaussian Process Models.” In <em>1st International Conference on the Use of Mobile ICT in Africa</em>.</p>
-</div>
-<div id="ref-Robbins:stoch51">
-<p>Robbins, H., and S. Monro. 1951. “A Stochastic Approximation Method.” <em>Annals of Mathematical Statistics</em> 22: 400–407.</p>
-</div>
-<div id="ref-Taigman:deepface14">
-<p>Taigman, Yaniv, Ming Yang, Marc’Aurelio Ranzato, and Lior Wolf. 2014. “DeepFace: Closing the Gap to Human-Level Performance in Face Verification.” In <em>Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition</em>. <a href="https://doi.org/10.1109/CVPR.2014.220">https://doi.org/10.1109/CVPR.2014.220</a>.</p>
-</div>
-</div>
-<section class="footnotes" role="doc-endnotes">
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Andrade:consistent14" class="csl-entry" role="listitem">
+Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014.
+Consistent mapping of government malaria records across a changing
+territory delimitation. Malaria Journal 13. <a
+href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>
+</div>
+<div id="ref-Breiman-forests01" class="csl-entry" role="listitem">
+Breiman, L., 2001. Random forests. Mach. Learn. 45, 5–32. <a
+href="https://doi.org/10.1023/A:1010933404324">https://doi.org/10.1023/A:1010933404324</a>
+</div>
+<div id="ref-Cooper:transformation91" class="csl-entry" role="listitem">
+Cooper, B., 1991. Transformation of a valley: Derbyshire derwent.
+Scarthin Books.
+</div>
+<div id="ref-Efron:bootstrap79" class="csl-entry" role="listitem">
+Efron, B., 1979. Bootstrap methods: Another look at the jackkife. Annals
+of Statistics 7, 1–26.
+</div>
+<div id="ref-Gelman:bayesian13" class="csl-entry" role="listitem">
+Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., Rubin,
+D.B., 2013. Bayesian data analysis, 3rd ed. Chapman; Hall.
+</div>
+<div id="ref-Gething:hmis06" class="csl-entry" role="listitem">
+Gething, P.W., Noor, A.M., Gikandi, P.W., Ogara, E.A.A., Hay, S.I.,
+Nixon, M.S., Snow, R.W., Atkinson, P.M., 2006. Improving imperfect data
+from health management information systems in <span>A</span>frica using
+space–time geostatistics. PLoS Medicine 3. <a
+href="https://doi.org/10.1371/journal.pmed.0030271">https://doi.org/10.1371/journal.pmed.0030271</a>
+</div>
+<div id="ref-Lawrence:dsa15" class="csl-entry" role="listitem">
+Lawrence, N.D., 2015. <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How
+<span>A</span>frica can benefit from the data revolution</a>.
+</div>
+<div id="ref-McCullagh:gen_linear89" class="csl-entry" role="listitem">
+McCullagh, P., Nelder, J.A., 1989. Generalized linear models, 2nd ed.
+Chapman; Hall.
+</div>
+<div id="ref-McCulloch:neuron43" class="csl-entry" role="listitem">
+McCulloch, W.S., Pitts, W., 1943. A logical calculus of the ideas
+immanent in nervous activity. Bulletin of Mathematical Biophysics 5,
+115–133. <a
+href="https://doi.org/10.1007/BF02478259">https://doi.org/10.1007/BF02478259</a>
+</div>
+<div id="ref-Mubangizi:malaria14" class="csl-entry" role="listitem">
+Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence,
+N.D., 2014. Malaria surveillance with multiple data sources using
+<span>Gaussian</span> process models, in: 1st International Conference
+on the Use of Mobile <span>ICT</span> in Africa.
+</div>
+<div id="ref-Ng:spectral02" class="csl-entry" role="listitem">
+Ng, A.Y., Jordan, M.I., Weiss, Y., n.d. On spectral clustering: Analysis
+and an algorithm.
+</div>
+<div id="ref-Robbins:stoch51" class="csl-entry" role="listitem">
+Robbins, H., Monro, S., 1951. A stochastic approximation method. Annals
+of Mathematical Statistics 22, 400–407.
+</div>
+<div id="ref-Shi:normalized00" class="csl-entry" role="listitem">
+Shi, J., Malik, J., 2000. Normalized cuts and image segmentation. IEEE
+Transactions on Pattern Analysis and Machine Intelligence 22, 888–905.
+</div>
+<div id="ref-Taigman:deepface14" class="csl-entry" role="listitem">
+Taigman, Y., Yang, M., Ranzato, M., Wolf, L., 2014.
+<span>DeepFace</span>: Closing the gap to human-level performance in
+face verification, in: Proceedings of the <span>IEEE</span> Computer
+Society Conference on Computer Vision and Pattern Recognition. <a
+href="https://doi.org/10.1109/CVPR.2014.220">https://doi.org/10.1109/CVPR.2014.220</a>
+</div>
+<div id="ref-Nigeria-nmis14" class="csl-entry" role="listitem">
+The Office of the Senior Special Assistant to the President on the
+Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.
+Nigeria <span>NMIS</span> facility database.
+</div>
+</div>
+<aside id="footnotes" class="footnotes footnotes-end-of-document"
+role="doc-endnotes">
 <hr />
 <ol>
-<li id="fn1" role="doc-endnote"><p>The logarithm of a number less than one is negative, for a number greater than one the logarithm is positive. So if odds are greater than evens (odds-on) the log-odds are positive, if the odds are less than evens (odds-against) the log-odds will be negative.<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn2" role="doc-endnote"><p>In classical statistics we often interpret these parameters, <span class="math inline"><em>β</em></span>, whereas in machine learning we are normally more interested in the result of the prediction, and less in the prediction. Although this is changing with more need for accountability. In honour of this I normally use <span class="math inline"><strong>β</strong></span> when I care about the value of these parameters, and <span class="math inline">$\mappingVector$</span> when I care more about the quality of the prediction.<a href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn3" role="doc-endnote"><p>Using the test data in your training procedure is a major error in any machine learning procedure. It is extremely dangerous as it gives a misleading assessment of the model performance. The <a href="http://inverseprobability.com/2015/06/04/baidu-on-imagenet">Baidu ImageNet scandal</a> was an example of a team competing in the ImageNet challenge which did this. The team had announced via the publication pre-print server Arxiv that they had a world-leading performance on the ImageNet challenge. This was reported in the mainstream media. Two weeks later the challenge organizers revealed that the team had created multiple accounts for checking their test performance more times than was permitted by the challenge rules. This was then reported as “AI’s first doping scandal”. The team lead was fired by Baidu.<a href="#fnref3" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn4" role="doc-endnote"><p>These models are quite a lot more complex than the simple clustering we describe here. They represent a common ancestor through a cluster center that is then allowed to evolve over time through a mutation rate. The time of separation between different species is estimated via these mutation rates.<a href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn5" role="doc-endnote"><p>One way of thinking about this is to flip the model on its side. Instead of thinking about the <span class="math inline"><em>i</em></span>th subject and the <span class="math inline"><em>j</em></span>th characteristic. Assume that each product is the subject. So, the <span class="math inline"><em>j</em></span>th item is thought of as the subject, and each item’s characteristic is given by the rating from a particular user. In this case symmetries in the model show that the matrix <span class="math inline">$\weightMatrix$</span> can now be seen as a matrix of <em>latent variables</em> and the matrix <span class="math inline">$\latentMatrix$</span> can be seen as <em>factor loadings</em>. So, you can think of the method as simultaneously doing a dimensionality reduction on the products and the users. Recommender systems also use other approaches, some of them based on similarity measures. In a similarity measure-based recommender system the rating prediction is given by looking for similar products in the user profile and scoring the new product with a score that is a weighted sum of those products.<a href="#fnref5" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn6" role="doc-endnote"><p>The interpretation requires you to think of the web as a series of web pages in a high dimensional space where distances between web pages are computed by moving along the links (in either direction). The PageRank is the one-dimensional space that best preserves those distances in the sense of an L1 norm. The interpretation works because the smallest eigenvalue of the linkage matrix is the <em>largest</em> eigenvalue of the inverse of the linkage matrix. The inverse linkage matrix (which would be impossible to compute) embeds similarities between pages according to how far apart they are via a random walk along the linkage matrix.<a href="#fnref6" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn7" role="doc-endnote"><p>The approach was described early on in the history of machine learning by Chris Watkins, during his PhD thesis in the 1980s. It is known as Q-learning. It’s recent success in the games domain is driven by the use of deep learning for the policy and value functions as well as the use of fast compute to generate and process very large quantities of data. In its standard form it is not seen as a very data-efficient approach.<a href="#fnref7" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn8" role="doc-endnote"><p>The situation in chess is much easier, firstly the number of possible moves at any time is about an order of magnitude lower, meaning the game tree doesn’t grow as quickly. Secondly, in chess, there are well defined value functions. For example, a value function could be based on adding together the points that are associated with each piece.<a href="#fnref8" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn9" role="doc-endnote"><p>The approach was described early on in the history of machine learning by Chris Watkins, during his PhD thesis in the 1980s. It is known as Q-learning. It’s recent success in the games domain is driven by the use of deep learning for the policy and value functions as well as the use of fast compute to generate and process very large quantities of data. In its standard form it is not seen as a very data-efficient approach.<a href="#fnref9" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn10" role="doc-endnote"><p>To try and better embody the state of data readiness in organizations I’ve been proposing “Data Readiness Levels”. More needs to be done in this area to improve the efficiency of the data science pipeline.<a href="#fnref10" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn11" role="doc-endnote"><p>The quote can be found in the Harvard Business Review Article <a href="https://hbr.org/2016/11/what-artificial-intelligence-can-and-cant-do-right-now">“What Artificial Intelligence Can and Can’t Do Right Now”</a>.<a href="#fnref11" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
-<li id="fn12" role="doc-endnote"><p>This trend was very clear at the moment, <a href="%7B%7Bsite.baseurl%20%7D%7D/">I spoke about it</a> at a recent Dagstuhl workshop on new directions for kernel methods and Gaussian processes.<a href="#fnref12" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn1"><p>The logarithm of a number less than one is negative, for
+a number greater than one the logarithm is positive. So if odds are
+greater than evens (odds-on) the log-odds are positive, if the odds are
+less than evens (odds-against) the log-odds will be negative.<a
+href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn2"><p>In classical statistics we often interpret these
+parameters, <span class="math inline">\(\beta\)</span>, whereas in
+machine learning we are normally more interested in the result of the
+prediction, and less in the prediction. Although this is changing with
+more need for accountability. In honour of this I normally use <span
+class="math inline">\(\boldsymbol{\beta}\)</span> when I care about the
+value of these parameters, and <span class="math inline">\(\mathbf{
+w}\)</span> when I care more about the quality of the prediction.<a
+href="#fnref2" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn3"><p>Assuming we are ignoring parameters in the link function
+and the distribution function.<a href="#fnref3" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn4"><p>Using the test data in your training procedure is a
+major error in any machine learning procedure. It is extremely dangerous
+as it gives a misleading assessment of the model performance. The <a
+href="http://inverseprobability.com/2015/06/04/baidu-on-imagenet">Baidu
+ImageNet scandal</a> was an example of a team competing in the ImageNet
+challenge which did this. The team had announced via the publication
+pre-print server Arxiv that they had a world-leading performance on the
+ImageNet challenge. This was reported in the mainstream media. Two weeks
+later the challenge organizers revealed that the team had created
+multiple accounts for checking their test performance more times than
+was permitted by the challenge rules. This was then reported as “AI’s
+first doping scandal”. The team lead was fired by Baidu.<a
+href="#fnref4" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn5"><p>These models are quite a lot more complex than the
+simple clustering we describe here. They represent a common ancestor
+through a cluster center that is then allowed to evolve over time
+through a mutation rate. The time of separation between different
+species is estimated via these mutation rates.<a href="#fnref5"
+class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn6"><p>One way of thinking about this is to flip the model on
+its side. Instead of thinking about the <span
+class="math inline">\(i\)</span>th subject and the <span
+class="math inline">\(j\)</span>th characteristic. Assume that each
+product is the subject. So, the <span class="math inline">\(j\)</span>th
+item is thought of as the subject, and each item’s characteristic is
+given by the rating from a particular user. In this case symmetries in
+the model show that the matrix <span
+class="math inline">\(\mathbf{W}\)</span> can now be seen as a matrix of
+<em>latent variables</em> and the matrix <span
+class="math inline">\(\mathbf{Z}\)</span> can be seen as <em>factor
+loadings</em>. So, you can think of the method as simultaneously doing a
+dimensionality reduction on the products and the users. Recommender
+systems also use other approaches, some of them based on similarity
+measures. In a similarity measure-based recommender system the rating
+prediction is given by looking for similar products in the user profile
+and scoring the new product with a score that is a weighted sum of those
+products.<a href="#fnref6" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn7"><p>The interpretation requires you to think of the web as a
+series of web pages in a high dimensional space where distances between
+web pages are computed by moving along the links (in either direction).
+The PageRank is the one-dimensional space that best preserves those
+distances in the sense of an L1 norm. The interpretation works because
+the smallest eigenvalue of the linkage matrix is the <em>largest</em>
+eigenvalue of the inverse of the linkage matrix. The inverse linkage
+matrix (which would be impossible to compute) embeds similarities
+between pages according to how far apart they are via a random walk
+along the linkage matrix.<a href="#fnref7" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn8"><p>The approach was described early on in the history of
+machine learning by Chris Watkins, during his PhD thesis in the 1980s.
+It is known as Q-learning. It’s recent success in the games domain is
+driven by the use of deep learning for the policy and value functions as
+well as the use of fast compute to generate and process very large
+quantities of data. In its standard form it is not seen as a very
+data-efficient approach.<a href="#fnref8" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn9"><p>The situation in chess is much easier, firstly the
+number of possible moves at any time is about an order of magnitude
+lower, meaning the game tree doesn’t grow as quickly. Secondly, in
+chess, there are well defined value functions. For example, a value
+function could be based on adding together the points that are
+associated with each piece.<a href="#fnref9" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn10"><p>The approach was described early on in the history of
+machine learning by Chris Watkins, during his PhD thesis in the 1980s.
+It is known as Q-learning. It’s recent success in the games domain is
+driven by the use of deep learning for the policy and value functions as
+well as the use of fast compute to generate and process very large
+quantities of data. In its standard form it is not seen as a very
+data-efficient approach.<a href="#fnref10" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn11"><p>To try and better embody the state of data readiness in
+organizations I’ve been proposing “Data Readiness Levels”. More needs to
+be done in this area to improve the efficiency of the data science
+pipeline.<a href="#fnref11" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+<li id="fn12"><p>The quote can be found in the Harvard Business Review
+Article <a
+href="https://hbr.org/2016/11/what-artificial-intelligence-can-and-cant-do-right-now">“What
+Artificial Intelligence Can and Can’t Do Right Now”</a>.<a
+href="#fnref12" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
+<li id="fn13"><p>This trend was very clear at the moment, <a
+href="%7B%7Bsite.baseurl%20%7D%7D/">I spoke about it</a> at a recent
+Dagstuhl workshop on new directions for kernel methods and Gaussian
+processes.<a href="#fnref13" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
 </ol>
-</section>
+</aside>
 
diff --git a/_lectures/02-ml-systems.html b/_lectures/02-ml-systems.html
index 4b15b07..6aa0e85 100644
--- a/_lectures/02-ml-systems.html
+++ b/_lectures/02-ml-systems.html
@@ -1,7 +1,14 @@
 ---
 title: "Introduction to Machine Learning Systems"
 venue: "Virtual DSA"
-abstract: "This notebook introduces some of the challenges of building machine learning data systems. It will introduce you to concepts around joining of databases together. The storage and manipulation of data is at the core of machine learning systems and data science. The goal of this notebook is to introduce the reader to these concepts, not to authoritatively answer any questions about the state of Nigerian health facilities or Covid19, but it may give you ideas about how to try and do that in your own country."
+abstract: "This notebook introduces some of the challenges of building
+machine learning data systems. It will introduce you to concepts around
+joining of databases together. The storage and manipulation of data is
+at the core of machine learning systems and data science. The goal of
+this notebook is to introduce the reader to these concepts, not to
+authoritatively answer any questions about the state of Nigerian health
+facilities or Covid19, but it may give you ideas about how to try and do
+that in your own country."
 author:
 - given: Eric
   family: Meissner
@@ -9,27 +16,29 @@
   institute: 
   twitter: meissner_eric_7
   gscholar: 
-  orchid: 
+  orcid: 
 - given: Andrei
   family: Paleyes
   url: https://www.linkedin.com/in/andreipaleyes/
   institute: 
   twitter: 
   gscholar: 
-  orchid: 
+  orcid: 
 - given: Neil D.
   family: Lawrence
   url: http://inverseprobability.com
   institute: 
   twitter: lawrennd
   gscholar: 
-  orchid: 
+  orcid: 
+edit_url: https://github.com/mlatcl/dsa/edit/gh-pages/_lamd/ml-systems.md
 date: 2020-07-24
 published: 2020-07-24
-week: 0
 session: 2
 reveal: 02-ml-systems.slides.html
+transition: None
 ipynb: 02-ml-systems.ipynb
+pptx: 02-ml-systems.pptx
 layout: lecture
 categories:
 - notes
@@ -45,253 +54,953 @@
 <!--
 
 -->
-<h2 id="question">Question</h2>
-<p>In this notebook, we explore the question of health facility distribution in Nigeria, spatially, and in relation to population density.</p>
-<p>We answer and visualize the question “How does the number of health facilities per capita vary across Nigeria?”</p>
-<p>Rather than focussing purely on using tools like <code>pandas</code> to manipulate the data, our focus will be on introducing some concepts from databases.</p>
-<p>Machine learning can be summarized as <br /><span class="math display">$$
-\text{model} + \text{data} \xrightarrow{\text{compute}} \text{prediction}
-$$</span><br /> and many machine learning courses focus a lot on the model part. But to build a machine learning system in practice, a lot of work has to be put into the data part. This notebook gives some pointers on that work and how to think about your machine learning systems design.</p>
+<h2 id="nigerian-health-facility-distribution">Nigerian Health Facility
+Distribution</h2>
+<p>In this notebook, we explore the question of health facility
+distribution in Nigeria, spatially, and in relation to population
+density.</p>
+<p>We explore and visualize the question “How does the number of health
+facilities per capita vary across Nigeria?”</p>
+<p>Rather than focussing purely on using tools like <code>pandas</code>
+to manipulate the data, our focus will be on introducing some concepts
+from databases.</p>
+<p>Machine learning can be summarized as <span class="math display">\[
+\text{model} + \text{data} \xrightarrow{\text{compute}}
+\text{prediction}
+\]</span> and many machine learning courses focus a lot on the model
+part. But to build a machine learning system in practice, a lot of work
+must be put into the data part. This notebook gives some pointers on
+that work and how to think about your machine learning systems
+design.</p>
 <h2 id="datasets">Datasets</h2>
-<p>In this notebook , we download 4 datasets:</p>
+<p>In this notebook, we download 4 datasets:</p>
 <ul>
 <li>Nigeria NMIS health facility data</li>
-<li>Population data for Administrative Zone 1 (states) areas in Nigeria</li>
+<li>Population data for Administrative Zone 1 (states) areas in
+Nigeria</li>
 <li>Map boundaries for Nigerian states (for plotting and binning)</li>
 <li>Covid cases across Nigeria (as of May 20, 2020)</li>
 </ul>
-<p>But joining these data sets together is just an example. As another example, you could think of <a href="https://safeboda.com/ng/">SafeBoda</a>, a ride-hailing app that’s available in Lagos and Kampala. As well as looking at the health examples, try to imagine how SafeBoda may have had to design their systems to be scalable and reliable for storing and sharing data.</p>
-<h2 id="imports-installs-and-downloads">Imports, Installs, and Downloads</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-installs.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-installs.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>First, we’re going to download some particular python libraries for dealing with geospatial data. We’re dowloading <a href="https://geopandas.org"><code>geopandas</code></a> which will help us deal with ‘shape files’ that give the geographical lay out of Nigeria. And to get a small database set up running quickly, we’re installing <a href="https://pypi.org/project/csv-to-sqlite/"><code>csv-to-sqlite</code></a> which allows us to convert CSV data to a simple database.</p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="op">%</span>pip install geopandas</span></code></pre></div>
+<p>But joining these data sets together is just an example. As another
+example, you could think of <a
+href="https://safeboda.com/ng/">SafeBoda</a>, a ride-hailing app that’s
+available in Lagos and Kampala. As well as looking at the health
+examples, try to imagine how SafeBoda may have had to design their
+systems to be scalable and reliable for storing and sharing data.</p>
+<h2 id="imports-installs-and-downloads">Imports, Installs, and
+Downloads</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-installs.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-installs.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>First, we’re going to download some particular python libraries for
+dealing with geospatial data. We’re dowloading <a
+href="https://geopandas.org"><code>geopandas</code></a> which will help
+us deal with ‘shape files’ that give the geographical lay out of
+Nigeria. We also need <code>pygeos</code> for indexing.</p>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install geopandas</span></code></pre></div>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install pygeos</span></code></pre></div>
+<h2 id="setup">Setup</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<!--setupplotcode{import seaborn as sns
+sns.set_style('darkgrid')
+sns.set_context('paper')
+sns.set_palette('colorblind')}-->
+<h2 id="notutils">notutils</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>This small package is a helper package for various notebook utilities
+used below.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install notutils</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/notutils"
+class="uri">https://github.com/lawrennd/notutils</a></p>
+<p>Once <code>notutils</code> is installed, it can be imported in the
+usual manner.</p>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> notutils</span></code></pre></div>
+<h2 id="pods">pods</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In Sheffield we created a suite of software tools for ‘Open Data
+Science’. Open data science is an approach to sharing code, models and
+data that should make it easier for companies, health professionals and
+scientists to gain access to data science techniques.</p>
+<p>You can also check this blog post on <a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a>.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install pods</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/ods"
+class="uri">https://github.com/lawrennd/ods</a></p>
+<p>Once <code>pods</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<h2 id="mlai">mlai</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The <code>mlai</code> software is a suite of helper functions for
+teaching and demonstrating machine learning algorithms. It was first
+used in the Machine Learning and Adaptive Intelligence course in
+Sheffield in 2013.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install mlai</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/mlai"
+class="uri">https://github.com/lawrennd/mlai</a></p>
+<p>Once <code>mlai</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
 <h2 id="databases-and-joins">Databases and Joins</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/databases-and-joins.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/databases-and-joins.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The main idea we will be working with today is called the ‘join’. A join does exactly what it sounds like, it combines two database tables.</p>
-<p>You have already started to look at data structures, in particular you have been learning about <code>pandas</code> which is a great way of storing and structuring your data set to make it easier to plot and manipulate your data.</p>
-<p>Pandas is great for the data scientist to analyze data because it makes many operations easier. But it is not so good for building the machine learning system. In a machine learning system, you may have to handle a lot of data. Even if you start with building a system where you only have a few customers, perhaps you build an online taxi system (like SafeBoda) for Kampala. Maybe you will have 50 customers. Then maybe your system can be handled with some python scripts and pandas.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/databases-and-joins.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/databases-and-joins.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The main idea we will be working with in this practical is the
+‘join’. A join does exactly what it sounds like, it combines two
+database tables.</p>
+<p>You may have already started to look at data structures and learning
+about <code>pandas</code> which is a great way of storing and
+structuring your data set to make it easier to plot and manipulate your
+data.</p>
+<p>Pandas is great for the data scientist to analyze data because it
+makes many operations easier. But it is not so good for building the
+machine learning system. In a machine learning system, you may have to
+handle a lot of data. Even if you start with building a system where you
+only have a few customers, perhaps you build an online taxi system (like
+<a href="https://safeboda.com/ug/">SafeBoda</a>) for Kampala. Maybe you
+will have 50 customers. Then maybe your system can be handled with some
+python scripts and <code>pandas</code>.</p>
 <h2 id="scaling-ml-systems">Scaling ML Systems</h2>
-<p>But what if you are succesful? What if everyone in Kampala wants to use your system? There are 1.5 million people in Kampala and maybe 100,000 Boda Boda drivers.</p>
-<p>What if you are even more succesful? What if everyone in Lagos wants to use your system? There are around 20 million people in Lagos … and maybe as many Okada drivers as people in Kampala!</p>
-<p>We want to build safe and reliable machine learning systems. Building them from pandas and python is about as safe and reliable as <a href="https://www.monitor.co.ug/News/National/Boda-accidents-kill-10-city-UN-report-Kampala/688334-4324032-15oru2dz/index.html">taking six children to school on a boda boda</a>.</p>
-<p>To build a reliable system, we need to turn to <em>databases</em>. In this notebook <a href="https://en.wikipedia.org/wiki/Join_(SQL)">we’ll be focussing on SQL databases</a> and how you bring together different streams of data in a Machine Learning System.</p>
-<p>In a machine learning system, you will need to bring different data sets together. In database terminology this is known as a ‘join’. You have two different data sets, and you want to join them together. Just like you can join two pieces of metal using a welder, or two pieces of wood with screws.</p>
-<p>But instead of using a welder or screws to join data, we join it using particular columns of the data. We can join data together using people’s names. One database may contain where people live, another database may contain where they go to school. If we join these two databases we can have a database which shows where people live and where they got to school.</p>
-<p>In the notebook, we will join together some data about where the health centres are in Nigeria and where the have been cases of Covid19. There are other challenges in the ML System Design that are not going to be covered here. They include: how to update the data bases, and how to control access to the data bases from different users (boda boda drivers, riders, administrators etc).</p>
-<h2 id="hospital-data">Hospital Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-data-systems.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-data-systems.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The first and primary dataset we use is the NMIS health facility dataset, which contains data on the location, type, and staffing of health facilities across Nigeria.</p>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a><span class="im">import</span> urllib.request</span>
-<span id="cb2-2"><a href="#cb2-2"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span>
-<span id="cb3-2"><a href="#cb3-2"></a>hospital_data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
-<p>It’s always a good idea to inspect your data once it’s downloaded to check it contains what you expect. In <code>pandas</code> you can do this with the <code>.head()</code> method. That allows us to see the first few entries of the <code>pandas</code> data structure.</p>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a>hospital_data.head()</span></code></pre></div>
-<p>We can also check in <code>pandas</code> what the different columns of the data frame are to see what it contains.</p>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>hospital_data.columns</span></code></pre></div>
-<p>We can immiediately see that there are facility names, dates, and some characteristics of each health center such as number of doctors etc. As well as all that, we have two fields, <code>latitude</code> and <code>longitude</code> that likely give us the hospital locaiton. Let’s plot them to have a look.</p>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a>plt.plot(hospital_data.longitude, hospital_data.latitude,<span class="st">&#39;ro&#39;</span>, alpha<span class="op">=</span><span class="fl">0.01</span>)</span></code></pre></div>
-<p>There we have the location of these different hospitals. We set alpha in the plot to 0.01 to make the dots transparent, so we can see the locations of each health center.</p>
+<p>But what if you are successful? What if everyone in Kampala wants to
+use your system? There are 1.5 million people in Kampala and maybe
+100,000 Boda Boda drivers.<a href="#fn1" class="footnote-ref"
+id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
+<p>What if you are even more succesful? What if everyone in Lagos wants
+to use your system? There are around 20 million people in Lagos … and
+maybe as many Okada[^okada] drivers as people in Kampala!</p>
+<p>[^okada] In Lagos the Boda Boda is called an Okada.</p>
+<p>We want to build safe and reliable machine learning systems. Building
+them from <code>pandas</code> and python is about as safe and reliable
+as <a
+href="https://www.monitor.co.ug/News/National/Boda-accidents-kill-10-city-UN-report-Kampala/688334-4324032-15oru2dz/index.html">taking
+six children to school on a boda boda</a>.</p>
+<p>To build a reliable system, we need to turn to <em>databases</em>. In
+this notebook <a href="https://en.wikipedia.org/wiki/Join_(SQL)">we’ll
+be focusing on SQL databases</a> and how you bring together different
+streams of data in a Machine Learning System.</p>
+<p>In a machine learning system, you will need to bring different data
+sets together. In database terminology this is known as a ‘join’. You
+have two different data sets, and you want to join them together. Just
+like you can join two pieces of metal using a welder, or two pieces of
+wood with screws.</p>
+<p>But instead of using a welder or screws to join data, we join it
+using columns of the data. We can join data together using people’s
+names. One database may contain where people live, another database may
+contain where they go to school. If we join these two databases, we can
+have a database which shows where people live and where they got to
+school.</p>
+<p>In the notebook, we will join some data about where the health
+centers are in Nigeria with data about where there have been cases of
+Covid19. There are other challenges in the ML System Design that are not
+going to be covered here. They include how to update the databases and
+how to control access to the databases from different users (boda boda
+drivers, riders, administrators etc).</p>
+<h1 id="nigeria-nmis-data">Nigeria NMIS Data</h1>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>As an example data set we will use Nigerian Millennium Development
+Goals Information System Health Facility <span class="citation"
+data-cites="Nigeria-nmis14">(The Office of the Senior Special Assistant
+to the President on the Millennium Development Goals (OSSAP-MDGs) and
+Columbia University, 2014)</span>. It can be found here <a
+href="https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014"
+class="uri">https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014</a>.</p>
+<p>Taking from the information on the site,</p>
+<blockquote>
+<p>The Nigeria MDG (Millennium Development Goals) Information System –
+NMIS health facility data is collected by the Office of the Senior
+Special Assistant to the President on the Millennium Development Goals
+(OSSAP-MDGs) in partner with the Sustainable Engineering Lab at Columbia
+University. A rigorous, geo-referenced baseline facility inventory
+across Nigeria is created spanning from 2009 to 2011 with an additional
+survey effort to increase coverage in 2014, to build Nigeria’s first
+nation-wide inventory of health facility. The database includes 34,139
+health facilities info in Nigeria.</p>
+<p>The goal of this database is to make the data collected available to
+planners, government officials, and the public, to be used to make
+strategic decisions for planning relevant interventions.</p>
+<p>For data inquiry, please contact Ms. Funlola Osinupebi, Performance
+Monitoring &amp; Communications, Advisory Power Team, Office of the Vice
+President at funlola.osinupebi@aptovp.org</p>
+<p>To learn more, please visit <a
+href="http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/"
+class="uri">http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/</a></p>
+<p>Suggested citation: Nigeria NMIS facility database (2014), the Office
+of the Senior Special Assistant to the President on the Millennium
+Development Goals (OSSAP-MDGs) &amp; Columbia University</p>
+</blockquote>
+<p>For ease of use we’ve packaged this data set in the <code>pods</code>
+library</p>
+<div class="sourceCode" id="cb9"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigeria_nmis()[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<p>Alternatively, you can access the data directly with the following
+commands.</p>
+<div class="sourceCode" id="cb10"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib.request</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
+<p>Once it is loaded in the data can be summarized using the
+<code>describe</code> method in pandas.</p>
+<div class="sourceCode" id="cb11"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>data.describe()</span></code></pre></div>
+<p>We can also find out the dimensions of the dataset using the
+<code>shape</code> property.</p>
+<div class="sourceCode" id="cb12"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>data.shape</span></code></pre></div>
+<p>Dataframes have different functions that you can use to explore and
+understand your data. In python and the Jupyter notebook it is possible
+to see a list of all possible functions and attributes by typing the
+name of the object followed by <code>.&lt;Tab&gt;</code> for example in
+the above case if we type <code>data.&lt;Tab&gt;</code> it show the
+columns available (these are attributes in pandas dataframes) such as
+<code>num_nurses_fulltime</code>, and also functions, such as
+<code>.describe()</code>.</p>
+<p>For functions we can also see the documentation about the function by
+following the name with a question mark. This will open a box with
+documentation at the bottom which can be closed with the x button.</p>
+<div class="sourceCode" id="cb13"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>data.describe?</span></code></pre></div>
+<div class="figure">
+<div id="nigerian-health-facilities-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+<div id="nigerian-health-facilities-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigerian-health-facilities&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="nigerian-health-facilities-caption" class="caption-frame">
+<p>Figure: Location of the over thirty-four thousand health facilities
+registered in the NMIS data across Nigeria. Each facility plotted
+according to its latitude and longitude.</p>
+</div>
+</div>
+<div class="sourceCode" id="cb14"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>hospital_data <span class="op">=</span> data</span></code></pre></div>
 <h2 id="administrative-zone-geo-data">Administrative Zone Geo Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-spatial-join.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-spatial-join.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>A very common operation is the need to map from locations in a country to the administrative regions. If we were building a ride sharing app, we might also want to map riders to locations in the city, so that we could know how many riders we had in different city areas.</p>
-<p>Administrative regions have various names like cities, counties, districts or states. These conversions for the administrative regions are important for getting the right information to the right people.</p>
-<p>Of course, if we had a knowlegdeable Nigerian, we could ask her about what the right location for each of these health facilities is, which state is it in? But given that we have the latitude and longitude, we should be able to find out automatically what the different states are.</p>
-<p>This is where “geo” data becomes important. We need to download a dataset that stores the location of the different states in Nigeria. These files are known as ‘outline’ files. Because they draw the different states of different countries in outline.</p>
-<p>There are special databases for storing this type of information, the database we are using is in the <code>gdb</code> or GeoDataBase format. It comes in a zip file. Let’s download the outline files for the Nigerian states. They have been made available by the <a href="https://data.humdata.org/">Humanitarian Data Exchange</a>, you can also find other states data from the same site.</p>
-<div class="sourceCode" id="cb8"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a><span class="im">import</span> zipfile</span></code></pre></div>
-<div class="sourceCode" id="cb9"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a>admin_zones_url <span class="op">=</span> <span class="st">&#39;https://data.humdata.org/dataset/81ac1d38-f603-4a98-804d-325c658599a3/resource/0bc2f7bb-9ff6-40db-a569-1989b8ffd3bc/download/nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span></span>
-<span id="cb9-2"><a href="#cb9-2"></a>_, msg <span class="op">=</span> urllib.request.urlretrieve(admin_zones_url, <span class="st">&#39;nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span>)</span>
-<span id="cb9-3"><a href="#cb9-3"></a><span class="cf">with</span> zipfile.ZipFile(<span class="st">&#39;/content/nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span>, <span class="st">&#39;r&#39;</span>) <span class="im">as</span> zip_ref:</span>
-<span id="cb9-4"><a href="#cb9-4"></a>    zip_ref.extractall(<span class="st">&#39;/content/nga_admbnda_osgof_eha_itos.gdb&#39;</span>)</span></code></pre></div>
-<p>Now we have this data of the outlines of the different states in Nigeria.</p>
-<p>The next thing we need to know is how these health facilities map onto different states in Nigeria. Without “binning” facilities somehow, it’s difficult to effectively see how they are distributed across the country.</p>
-<p>We do this by finding a “geo” dataset that contains the spatial outlay of Nigerian states by latitude/longitude coordinates. The dataset we use is of the “gdb” (GeoDataBase) type and comes as a zip file. We don’t need to worry much about this datatype for this notebook, only noting that geopandas knows how to load in the dataset, and that it contains different “layers” for the same map. In this case, each layer is a different degree of granularity of the political boundaries, with layer 0 being the whole country, 1 is by state, or 2 is by local government. We’ll go with a state level view for simplicity, but as an excercise you can change it to layer 2 to view the distribution by local government.</p>
-<p>Once we have these <code>MultiPolygon</code> objects that define the boundaries of different states, we can perform a spatial join (sjoin) from the coordinates of individual health facilities (which we already converted to the appropriate <code>Point</code> type when moving the health data to a GeoDataFrame.)</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-spatial-join.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-spatial-join.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>A very common operation is the need to map from locations in a
+country to the administrative regions. If we were building a ride
+sharing app, we might also want to map riders to locations in the city,
+so that we could know how many riders we had in different city
+areas.</p>
+<p>Administrative regions have various names like cities, counties,
+districts, or states. These conversions for the administrative regions
+are important for getting the right information to the right people.</p>
+<p>Of course, if we had a knowledgeable Nigerian, we could ask her about
+what the right location for each of these health facilities is, which
+state is it in? But given that we have the latitude and longitude, we
+should be able to find out automatically what the different states
+are.</p>
+<p>This is where “geo” data becomes important. We need to download a
+dataset that stores the location of the different states in Nigeria.
+These files are known as ‘outline’ files. Because they draw the
+different states of different countries in outline.</p>
+<p>There are special databases for storing this type of information, the
+database we are using is in the <code>gdb</code> or GeoDataBase format.
+It comes in a zip file. Let’s download the outline files for the
+Nigerian states. They have been made available by the <a
+href="https://data.humdata.org/">Humanitarian Data Exchange</a>, you can
+also find other states data from the same site.</p>
+<h2 id="nigerian-administrative-zones-data">Nigerian Administrative
+Zones Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-administrative-zones-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-administrative-zones-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>For ease of use we’ve packaged this data set in the <code>pods</code>
+library</p>
+<div class="sourceCode" id="cb15"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigerian_administrative_zones()[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>data.set_index(<span class="st">&quot;admin1Name_en&quot;</span>, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<p>Alternatively you can access the data directly with the following
+commands.</p>
+<div class="sourceCode" id="cb16"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> zipfile</span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>admin_zones_url <span class="op">=</span> <span class="st">&#39;https://data.humdata.org/dataset/81ac1d38-f603-4a98-804d-325c658599a3/resource/0bc2f7bb-9ff6-40db-a569-1989b8ffd3bc/download/nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>_, msg <span class="op">=</span> urllib.request.urlretrieve(admin_zones_url, <span class="st">&#39;nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span>)</span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> zipfile.ZipFile(<span class="st">&#39;/content/nga_admbnda_osgof_eha_itos.gdb.zip&#39;</span>, <span class="st">&#39;r&#39;</span>) <span class="im">as</span> zip_ref:</span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    zip_ref.extractall(<span class="st">&#39;/content/nga_admbnda_osgof_eha_itos.gdb&#39;</span>)</span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> geopandas <span class="im">as</span> gpd</span>
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> fiona</span>
+<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>states_file <span class="op">=</span> <span class="st">&quot;./nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/&quot;</span></span>
+<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a>layers <span class="op">=</span> fiona.listlayers(states_file)</span>
+<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> gpd.read_file(states_file, layer<span class="op">=</span><span class="dv">1</span>)</span>
+<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a>data.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span>
+<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data.set_index(<span class="st">&#39;admin1Name_en&#39;</span>)</span>
+<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a>    </span></code></pre></div>
+<div class="figure">
+<div id="nigerian-state-borders-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-state-borders.svg" width="60%" style=" ">
+</object>
+</div>
+<div id="nigerian-state-borders-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigerian-state-borders&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="nigerian-state-borders-caption" class="caption-frame">
+<p>Figure: Border locations for the thirty-six different states of
+Nigeria.</p>
+</div>
+</div>
+<div class="sourceCode" id="cb17"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>zones_gdf <span class="op">=</span> data</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>zones_gdf[<span class="st">&#39;admin1Name_en&#39;</span>] <span class="op">=</span> zones_gdf.index</span></code></pre></div>
+<p>Now we have this data of the outlines of the different states in
+Nigeria.</p>
+<p>The next thing we need to know is how these health facilities map
+onto different states in Nigeria. Without “binning” facilities somehow,
+it’s difficult to effectively see how they are distributed across the
+country.</p>
+<p>We do this by finding a “geo” dataset that contains the spatial
+outlay of Nigerian states by latitude/longitude coordinates. The dataset
+we use is of the “gdb” (GeoDataBase) type and comes as a zip file. We
+don’t need to worry much about this datatype for this notebook, only
+noting that geopandas knows how to load in the dataset, and that it
+contains different “layers” for the same map. In this case, each layer
+is a different degree of granularity of the political boundaries, with
+layer 0 being the whole country, 1 is by state, or 2 is by local
+government. We’ll go with a state level view for simplicity, but as an
+excercise you can change it to layer 2 to view the distribution by local
+government.</p>
+<p>Once we have these <code>MultiPolygon</code> objects that define the
+boundaries of different states, we can perform a spatial join (sjoin)
+from the coordinates of individual health facilities (which we already
+converted to the appropriate <code>Point</code> type when moving the
+health data to a GeoDataFrame.)</p>
 <h2 id="joining-a-geodataframe">Joining a GeoDataFrame</h2>
-<p>The first database join we’re going to do is a special one, it’s a ‘spatial join’. We’re going to join together the locations of the hospitals with their states.</p>
-<p>This join is unusual because it requires some mathematics to get right. The outline files give us the borders of the different states in latitude and longitude, the health facilities have given locations in the country.</p>
-<p>A spatial join involves finding out which state each health facility belongs to. Fortunately, the mathematics you need is already programmed for you in GeoPandas. That means all we need to do is convert our <code>pandas</code> dataframe of health facilities into a <code>GeoDataFrame</code> which allows us to do the spatial join.</p>
-<div class="sourceCode" id="cb10"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a><span class="im">import</span> geopandas <span class="im">as</span> gpd</span></code></pre></div>
-<div class="sourceCode" id="cb11"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a>hosp_gdf <span class="op">=</span> gpd.GeoDataFrame(</span>
-<span id="cb11-2"><a href="#cb11-2"></a>    hospital_data, geometry<span class="op">=</span>gpd.points_from_xy(hospital_data.longitude, hospital_data.latitude))</span>
-<span id="cb11-3"><a href="#cb11-3"></a>hosp_gdf.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span></code></pre></div>
-<p>There are some technial details here: the <code>crs</code> refers to the coordinate system in use by a particular GeoDataFrame. <code>EPSG:4326</code> is the standard coordinate system of latitude/longitude.</p>
-<h2 id="your-first-join-converting-gps-coordinates-to-states">Your First Join: Converting GPS Coordinates to States</h2>
-<p>Now we have the data in the <code>GeoPandas</code> format, we can start converting into states. We will use the <a href="https://pypi.org/project/Fiona/"><code>fiona</code></a> library for reading the right layers from the files. Before we do the join, lets plot the location of health centers and states on the same map.</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a><span class="im">import</span> fiona</span></code></pre></div>
-<div class="sourceCode" id="cb13"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a>states_file <span class="op">=</span> <span class="st">&quot;/content/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/&quot;</span></span>
-<span id="cb13-2"><a href="#cb13-2"></a></span>
-<span id="cb13-3"><a href="#cb13-3"></a><span class="co"># geopandas included map, filtered to just Nigeria</span></span>
-<span id="cb13-4"><a href="#cb13-4"></a>world <span class="op">=</span> gpd.read_file(gpd.datasets.get_path(<span class="st">&#39;naturalearth_lowres&#39;</span>))</span>
-<span id="cb13-5"><a href="#cb13-5"></a>world.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span>
-<span id="cb13-6"><a href="#cb13-6"></a>nigeria <span class="op">=</span> world[(world[<span class="st">&#39;name&#39;</span>] <span class="op">==</span> <span class="st">&#39;Nigeria&#39;</span>)]</span>
-<span id="cb13-7"><a href="#cb13-7"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb13-8"><a href="#cb13-8"></a></span>
-<span id="cb13-9"><a href="#cb13-9"></a>layers <span class="op">=</span> fiona.listlayers(states_file)</span>
-<span id="cb13-10"><a href="#cb13-10"></a>zones_gdf <span class="op">=</span> gpd.read_file(states_file, layer<span class="op">=</span><span class="dv">1</span>)</span>
-<span id="cb13-11"><a href="#cb13-11"></a>zones_gdf.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span>
-<span id="cb13-12"><a href="#cb13-12"></a>zones_gdf <span class="op">=</span> zones_gdf.set_index(<span class="st">&#39;admin1Name_en&#39;</span>)</span>
-<span id="cb13-13"><a href="#cb13-13"></a>zones_gdf.plot(ax<span class="op">=</span>base, color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>)</span>
-<span id="cb13-14"><a href="#cb13-14"></a></span>
-<span id="cb13-15"><a href="#cb13-15"></a><span class="co"># We can now plot our ``GeoDataFrame``.</span></span>
-<span id="cb13-16"><a href="#cb13-16"></a>hosp_gdf.plot(ax<span class="op">=</span>base, color<span class="op">=</span><span class="st">&#39;b&#39;</span>, alpha<span class="op">=</span><span class="fl">0.02</span>, )</span>
-<span id="cb13-17"><a href="#cb13-17"></a></span>
-<span id="cb13-18"><a href="#cb13-18"></a>plt.show()</span></code></pre></div>
+<p>The first database join we’re going to do is a special one, it’s a
+‘spatial join’. We’re going to join the locations of the hospitals with
+their states.</p>
+<p>This join is unusual because it requires some mathematics to get
+right. The outline files give us the borders of the different states in
+latitude and longitude, the health facilities have given locations in
+the country.</p>
+<p>A spatial join involves finding out which state each health facility
+belongs to. Fortunately, the mathematics you need is already programmed
+for you in GeoPandas. That means all we need to do is convert our
+<code>pandas</code> dataframe of health facilities into a
+<code>GeoDataFrame</code> which allows us to do the spatial join.</p>
+<p>First, we convert the hospital data to a <code>geopandas</code> data
+frame.</p>
+<div class="sourceCode" id="cb18"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> geopandas <span class="im">as</span> gpd</span></code></pre></div>
+<div class="sourceCode" id="cb19"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>geometry <span class="op">=</span> gpd.points_from_xy(hospital_data.longitude, hospital_data.latitude)</span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>hosp_gdf <span class="op">=</span> gpd.GeoDataFrame(hospital_data, </span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>                            geometry<span class="op">=</span>geometry)</span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>hosp_gdf.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span></code></pre></div>
+<p>There are some technial details here: the <code>crs</code> refers to
+the coordinate system in use by a particular GeoDataFrame.
+<code>EPSG:4326</code> is the standard coordinate system of
+latitude/longitude.</p>
+<h2 id="your-first-join-converting-gps-coordinates-to-states">Your First
+Join: Converting GPS Coordinates to States</h2>
+<p>Now we have the data in the <code>GeoPandas</code> format, we can
+start converting into states. We will use the <a
+href="https://pypi.org/project/Fiona/"><code>fiona</code></a> library
+for reading the right layers from the files. Before we do the join, lets
+plot the location of health centers and states on the same map.</p>
+<div class="sourceCode" id="cb20"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>world_gdf <span class="op">=</span> gpd.read_file(gpd.datasets.get_path(<span class="st">&#39;naturalearth_lowres&#39;</span>))</span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>world_gdf.crs <span class="op">=</span> <span class="st">&quot;EPSG:4326&quot;</span></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>nigeria_gdf <span class="op">=</span> world_gdf[(world_gdf[<span class="st">&#39;name&#39;</span>] <span class="op">==</span> <span class="st">&#39;Nigeria&#39;</span>)]</span></code></pre></div>
+<div class="figure">
+<div id="nigeria-states-and-health-facilities-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigeria-states-and-health-facilities.svg" width="60%" style=" ">
+</object>
+</div>
+<div id="nigeria-states-and-health-facilities-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigeria-states-and-health-facilities&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="nigeria-states-and-health-facilities-caption"
+class="caption-frame">
+<p>Figure: The outline of the thirty-six different states of nigeria
+with the location sof the health centers plotted on the map.</p>
+</div>
+</div>
 <h2 id="performing-the-spatial-join">Performing the Spatial Join</h2>
-<p>We’ve now plotted the different health center locations across the states. You can clearly see that each of the dots falls within a different state. For helping the visualisation, we’ve made the dots somewhat transparent (we set the <code>alpha</code> in the plot). This means that we can see the regions where there are more health centers, you should be able to spot where the major cities in Nigeria are given the increased number of health centers in those regions.</p>
-<p>Of course, we can now see by eye, which of the states each of the health centers belongs to. But we want the computer to do our join for us. <code>GeoPandas</code> provides us with the spatial join. Here we’re going to do a <a href="https://en.wikipedia.org/wiki/Join_(SQL)#Left_outer_join"><code>left</code> or <code>outer</code> join</a>.</p>
-<div class="sourceCode" id="cb14"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="im">from</span> geopandas.tools <span class="im">import</span> sjoin</span></code></pre></div>
-<p>We have two GeoPandas data frames, <code>hosp_gdf</code> and <code>zones_gdf</code>. Let’s have a look at the columns the contain.</p>
-<div class="sourceCode" id="cb15"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1"></a>hosp_gdf.columns</span></code></pre></div>
-<p>We can see that this is the GeoDataFrame containing the information about the hospital. Now let’s have a look at the <code>zones_gdf</code> data frame.</p>
-<div class="sourceCode" id="cb16"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1"></a>zones_gdf.columns</span></code></pre></div>
-<p>You can see that this data frame has a different set of columns. It has all the different administrative regions. But there is one column name that overlaps. We can find it by looking for the intersection between the two sets.</p>
-<div class="sourceCode" id="cb17"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1"></a><span class="bu">set</span>(hosp_gdf.columns).intersection(<span class="bu">set</span>(zones_gdf.columns))</span></code></pre></div>
-<p>Here we’ve converted the lists of columns into python ‘sets’, and then looked for the intersection. The <em>join</em> will occur on the intersection between these columns. It will try and match the geometry of the hospitals (their location) to the geometry of the states (their outlines). This match is done in one line in GeoPandas.</p>
-<p>We’re having to use GeoPandas because this join is a special one based on geographical locations, if the join was on customer name or some other discrete variable, we could do the join in pandas or directly in SQL.</p>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1"></a>hosp_state_joined <span class="op">=</span> sjoin(hosp_gdf, zones_gdf, how<span class="op">=</span><span class="st">&#39;left&#39;</span>)</span></code></pre></div>
-<p>The intersection of the two data frames indicates how the two data frames will be joined (if there’s no intersection, they can’t be joined). It’s like indicating the two holes that would need to be bolted together on two pieces of metal. If the holes don’t match, the join can’t be done. There has to be an intersection.</p>
-<p>But what will the result look like? Well the join should be the ‘union’ of the two data frames. We can have a look at what the union should be by (again) converting the columns to sets.</p>
-<div class="sourceCode" id="cb19"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1"></a><span class="bu">set</span>(hosp_gdf.columns).union(<span class="bu">set</span>(zones_gdf.columns))</span></code></pre></div>
-<p>That gives a list of all the columns (notice that ‘geometry’ only appears once).</p>
-<p>Let’s check that’s what the join command did, by looking at the columns of our new data frame, <code>hosp_state_joined</code>. Notice also that there’s a new column: <code>index_right</code>. The two original data bases had separate indices. The <code>index_right</code> column represents the index from the <code>zones_gdf</code>, which is the Nigerian state.</p>
-<div class="sourceCode" id="cb20"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1"></a><span class="bu">set</span>(hosp_state_joined.columns)</span></code></pre></div>
-<p>Great! They are all there! We have completed our join. We had two separate data frames with information about states and information about hospitals. But by performing an ‘outer’ or a ‘left’ join, we now have a single data frame with all the information in the same place! Let’s have a look at the first frew entries in the new data frame.</p>
-<div class="sourceCode" id="cb21"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1"></a>hosp_state_joined.head()</span></code></pre></div>
+<p>We’ve now plotted the different health center locations across the
+states. You can clearly see that each of the dots falls within a
+different state. For helping the visualization, we’ve made the dots
+somewhat transparent (we set the <code>alpha</code> in the plot). This
+means that we can see the regions where there are more health centers,
+you should be able to spot where the major cities in Nigeria are given
+the increased number of health centers in those regions.</p>
+<p>Of course, we can now see by eye, which of the states each of the
+health centers belongs to. But we want the computer to do our join for
+us. <code>GeoPandas</code> provides us with the spatial join. Here we’re
+going to do a <a
+href="https://en.wikipedia.org/wiki/Join_(SQL)#Left_outer_join"><code>left</code>
+or <code>outer</code> join</a>.</p>
+<div class="sourceCode" id="cb21"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> geopandas.tools <span class="im">import</span> sjoin</span></code></pre></div>
+<p>We have two GeoPandas data frames, <code>hosp_gdf</code> and
+<code>zones_gdf</code>. Let’s have a look at the columns the
+contain.</p>
+<div class="sourceCode" id="cb22"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>hosp_gdf.columns</span></code></pre></div>
+<p>We can see that this is the GeoDataFrame containing the information
+about the hospital. Now let’s have a look at the <code>zones_gdf</code>
+data frame.</p>
+<div class="sourceCode" id="cb23"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>zones_gdf.columns</span></code></pre></div>
+<p>You can see that this data frame has a different set of columns. It
+has all the different administrative regions. But there is one column
+name that overlaps. We can find it by looking for the intersection
+between the two sets.</p>
+<div class="sourceCode" id="cb24"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="bu">set</span>(hosp_gdf.columns).intersection(<span class="bu">set</span>(zones_gdf.columns))</span></code></pre></div>
+<p>Here we’ve converted the lists of columns into python ‘sets’, and
+then looked for the intersection. The <em>join</em> will occur on the
+intersection between these columns. It will try and match the geometry
+of the hospitals (their location) to the geometry of the states (their
+outlines). This match is done in one line in GeoPandas.</p>
+<p>We’re having to use GeoPandas because this join is a special one
+based on geographical locations, if the join was on customer name or
+some other discrete variable, we could do the join in pandas or directly
+in SQL.</p>
+<div class="sourceCode" id="cb25"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>hosp_state_joined <span class="op">=</span> sjoin(hosp_gdf, zones_gdf, how<span class="op">=</span><span class="st">&#39;left&#39;</span>)</span></code></pre></div>
+<p>The intersection of the two data frames indicates how the two data
+frames will be joined (if there’s no intersection, they can’t be
+joined). It’s like indicating the two holes that would need to be bolted
+together on two pieces of metal. If the holes don’t match, the join
+can’t be done. There has to be an intersection.</p>
+<p>But what will the result look like? Well, the join should be the
+‘union’ of the two data frames. We can have a look at what the union
+should be by (again) converting the columns to sets.</p>
+<div class="sourceCode" id="cb26"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="bu">set</span>(hosp_gdf.columns).union(<span class="bu">set</span>(zones_gdf.columns))</span></code></pre></div>
+<p>That gives a list of all the columns (notice that ‘geometry’ only
+appears once).</p>
+<p>Let’s check that’s what the join command did, by looking at the
+columns of our new data frame, <code>hosp_state_joined</code>. Notice
+also that there’s a new column: <code>index_right</code>. The two
+original data bases had separate indices. The <code>index_right</code>
+column represents the index from the <code>zones_gdf</code>, which is
+the Nigerian state.</p>
+<div class="sourceCode" id="cb27"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="bu">set</span>(hosp_state_joined.columns)</span></code></pre></div>
+<p>Great! They are all there! We have completed our join. We had two
+separate data frames with information about states and information about
+hospitals. But by performing an ‘outer’ or a ‘left’ join, we now have a
+single data frame with all the information in the same place! Let’s have
+a look at the first frew entries in the new data frame.</p>
+<div class="sourceCode" id="cb28"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>hosp_state_joined.head()</span></code></pre></div>
 <h2 id="sql-database">SQL Database</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-sqlite.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-sqlite.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Our first join was a special one, because it involved spatial data. That meant using the special <code>gdb</code> format and the <code>GeoPandas</code> tool for manipulating that data. But we’ve now saved our updated data in a new file.</p>
-<p>To do this, we use the command line utility that comes standard for SQLite database creation. SQLite is a simple database that’s useful for playing with database commands on your local machine. For a real system, you would need to set up a server to run the database. The server is a separate machine with the job of answering database queries. SQLite pretends to be a proper database, but doesn’t require us to go to the extra work of setting up a server. Popular SQL server software includes <a href="https://www.mysql.com/"><code>MySQL</code></a> which is free or <a href="https://www.microsoft.com/en-gb/sql-server/sql-server-2019">Microsoft’s SQL Server</a>.</p>
-<p>A typical machine learning installation might have you running a database from a cloud service (such as AWS, Azure or Google Cloud Platform). That cloud service would host the database for you and you would pay according to the number of queries made.</p>
-<p>Many start-up companies were formed on the back of a <code>MySQL</code> server hosted on top of AWS. You can <a href="https://aws.amazon.com/getting-started/hands-on/create-mysql-db/">read how to do that here</a>.</p>
-<p>If you were designing your own ride hailing app, or any other major commercial software you would want to investigate whether you would need to set up a central SQL server in one of these frameworks.</p>
-<p>Today though, we’ll just stick to SQLite which gives you a sense of the database without the time and expense of setting it up on the cloud. As well as showing you the SQL commands (which is often what’s used in a production ML system) we’ll also give the equivalent <code>pandas</code> commands, which would often be what you would use when you’re doing data analysis in <code>python</code> and <code>Jupyter</code>.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sql.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sql.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Our first join was a special one, because it involved spatial data.
+That meant using the special <code>gdb</code> format and the
+<code>GeoPandas</code> tool for manipulating that data. But we’ve now
+saved our updated data in a new file.</p>
+<p>To do this, we use the command line utility that comes standard for
+SQLite database creation. SQLite is a simple database that’s useful for
+playing with database commands on your local machine. For a real system,
+you would need to set up a server to run the database. The server is a
+separate machine with the job of answering database queries. SQLite
+pretends to be a proper database but doesn’t require us to go to the
+extra work of setting up a server. Popular SQL server software includes
+<a href="https://mariadb.org/"><code>MariaDB</code></a> which is open
+source, or <a
+href="https://www.microsoft.com/en-gb/sql-server/sql-server-2019">Microsoft’s
+SQL Server</a>.</p>
+<p>A typical machine learning installation might have you running a
+database from a cloud service (such as AWS, Azure or Google Cloud
+Platform). That cloud service would host the database for you, and you
+would pay according to the number of queries made.</p>
+<p>Many start-up companies were formed on the back of a
+<code>MySQL</code> server hosted on top of AWS. Although since MySQL was
+sold to Sun, and then passed on to Oracle, the open source community has
+turned its attention to <code>MariaDB</code>, here’s the <a
+href="https://aws.amazon.com/getting-started/hands-on/create-mariadb-db/">AWS
+instructions on how to set up <code>MariaDB</code></a>.</p>
+<p>If you were designing your own ride hailing app, or any other major
+commercial software you would want to investigate whether you would need
+to set up a central SQL server in one of these frameworks.</p>
+<p>Today though, we’ll just stick to SQLite which gives you a sense of
+the database without the time and expense of setting it up on the cloud.
+As well as showing you the SQL commands (which is often what’s used in a
+production ML system) we’ll also give the equivalent <code>pandas</code>
+commands, which would often be what you would use when you’re doing data
+analysis in <code>python</code> and <code>Jupyter</code>.</p>
 <h2 id="create-the-sqlite-database">Create the SQLite Database</h2>
-<p>The beautiful thing about SQLite is that it allows us to play with SQL without going to the work of setting up a proper SQL server. Creating a data base in SQLite is as simple as writing a new file. To create the database, we’ll first write our joined data to a CSV file, then we’ll use a little utility to convert our hospital database into a SQLite database.</p>
-<div class="sourceCode" id="cb22"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1"></a>hosp_state_joined.to_csv(<span class="st">&#39;facilities.csv&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1"></a><span class="op">%</span>pip install csv<span class="op">-</span>to<span class="op">-</span>sqlite</span></code></pre></div>
-<div class="sourceCode" id="cb24"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f facilities.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
-<p>Rather than being installed on a separate server, SQLite simply stores the database locally in a file called <code>db.sqlite</code>.</p>
-<p>In the database there can be several ‘tables’. Each table can be thought of as like a separate dataframe. The table name we’ve just saved is ‘hospitals_zones_joined’.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sqlite.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sqlite.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The beautiful thing about SQLite is that it allows us to play with
+SQL without going to the work of setting up a proper SQL server.
+Creating a data base in SQLite is as simple as writing a new file. To
+create the database, we’ll first write our joined data to a CSV file,
+then we’ll use a little utility to convert our hospital database into a
+SQLite database.</p>
+<div class="sourceCode" id="cb29"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>hosp_state_joined.to_csv(<span class="st">&quot;hospitals_zones_joined.csv&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb30"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install csv<span class="op">-</span>to<span class="op">-</span>sqlite</span></code></pre></div>
+<div class="sourceCode" id="cb31"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f hospitals_zones_joined.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
+<p>Rather than being installed on a separate server, SQLite simply
+stores the database locally in a file called <code>db.sqlite</code>.</p>
+<p>In the database there can be several ‘tables’. Each table can be
+thought of as like a separate dataframe. The table name we’ve just saved
+is ‘hospitals_zones_joined’.</p>
 <h2 id="accessing-the-sql-database">Accessing the SQL Database</h2>
-<p>Now that we have a SQL database, we can create a connection to it and query it using SQL commands. Let’s try to simply select the data we wrote to it, to make sure its the same.</p>
-<p>Start by making a connection to the database. This will often be done via remote connections, but for this example we’ll connect locally to the database using the filepath directly.</p>
-<div class="sourceCode" id="cb25"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1"></a>conn <span class="op">=</span> create_connection(<span class="st">&quot;db.sqlite&quot;</span>)</span></code></pre></div>
-<p>Now that we have a connection, we can write a command and pass it to the database.</p>
-<p>To access a data base, the first thing that is made is a connection. Then SQL is used to extract the information required. A typical SQL command is <code>SELECT</code>. It allows us to extract rows from a given table. It operates a bit like the <code>.head()</code> method in <code>pandas</code>, it will return the first <code>N</code> rows (by default the <code>.head()</code> command returns the first 5 rows, but you can set <code>n</code> to whatever you like. Here we’ve included a default value of 5 to make it match the <code>pandas</code> command.</p>
-<p>The python library, <code>sqlite3</code>, allows us to access the SQL database directly from python. We do this using an <code>execute</code> command on the connection.</p>
-<p>Typically, its good software engineering practice to ‘wrap’ the database command in some python code. This allows the commands to be maintained. Below we wrap the SQL command</p>
-<pre><code>SELECT * FROM [table_name] LIMIT : N</code></pre>
-<p>in python code. This SQL command selects the first <code>N</code> entries from a given database called <code>table_name</code>.</p>
-<p>We can pass the <code>table_name</code> and number of rows, <code>N</code> to the python command.</p>
-<p>Let’s have a go at calling the command to extract the first three facilities from our health center database. Let’s try creating a function that does the same thing the pandas .head() method does so we can inspect our database.</p>
-<div class="sourceCode" id="cb27"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1"></a><span class="kw">def</span> head(conn, table, n<span class="op">=</span><span class="dv">5</span>):</span>
-<span id="cb27-2"><a href="#cb27-2"></a>  rows <span class="op">=</span> select_top(conn, table, n)</span>
-<span id="cb27-3"><a href="#cb27-3"></a>  <span class="cf">for</span> r <span class="kw">in</span> rows:</span>
-<span id="cb27-4"><a href="#cb27-4"></a>      <span class="bu">print</span>(r)</span></code></pre></div>
-<div class="sourceCode" id="cb28"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1"></a>head(conn, <span class="st">&#39;facilities&#39;</span>)</span></code></pre></div>
-<p>Great! We now have the data base in SQLite, and some python functions that operate on the data base by wrapping SQL commands.</p>
-<p>We will return to the SQL command style after download and add the other datasets to the database using a combination of <code>pandas</code> and the <code>csv-to-sqlite</code> utility.</p>
-<p>Our next task will be to introduce data on COVID19 so that we can join that to our other data sets.</p>
+<p>Now that we have a SQL database, we can create a connection to it and
+query it using SQL commands. Let’s try to simply select the data we
+wrote to it, to make sure it’s the same.</p>
+<p>Start by making a connection to the database. This will often be done
+via remote connections, but for this example we’ll connect locally to
+the database using the filepath directly.</p>
+<p>To access a data base, the first thing that is made is a connection.
+Then SQL is used to extract the information required. A typical SQL
+command is <code>SELECT</code>. It allows us to extract rows from a
+given table. It operates a bit like the <code>.head()</code> method in
+<code>pandas</code>, it will return the first <code>N</code> rows (by
+default the <code>.head()</code> command returns the first 5 rows, but
+you can set <code>N</code> to whatever you like. Here we’ve included a
+default value of 5 to make it match the <code>pandas</code> command.</p>
+<p>We do this using an <code>execute</code> command on the
+connection.</p>
+<p>Typically, its good software engineering practice to ‘wrap’ the
+database command in some python code. This allows the commands to be
+maintained. You will also be asked to do this in your final assessment,
+including re-writing some of the code - pay attention to the slight
+syntax differences and multi-statement queries.Below we wrap the SQL
+command</p>
+<pre><code>SELECT * FROM table_name LIMIT N</code></pre>
+<p>in python code. This SQL command selects the first <code>N</code>
+entries from a given database called <code>table_name</code>.</p>
+<p>We can pass the <code>table_name</code> and number of rows,
+<code>n</code>, to the python command.</p>
+<div class="sourceCode" id="cb33"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>conn <span class="op">=</span> create_connection(<span class="st">&quot;db.sqlite&quot;</span>)</span></code></pre></div>
+<p>Now that we have a connection, we can write a command and pass it to
+the database.</p>
+<p>To access a data base, the first thing that is made is a connection.
+Then SQL is used to extract the information required. A typical SQL
+command is <code>SELECT</code>. It allows us to extract rows from a
+given table. It operates a bit like the <code>.head()</code> method in
+<code>pandas</code>, it will return the first <code>N</code> rows (by
+default the <code>.head()</code> command returns the first 5 rows, but
+you can set <code>N</code> to whatever you like. Here we’ve included a
+default value of 5 to make it match the <code>pandas</code> command.</p>
+<p>The python library, <code>sqlite3</code>, allows us to access the SQL
+database directly from python.</p>
+<p>Let’s have a go at calling the command to extract the first three
+facilities from our health center database. Let’s try creating a
+function that does the same thing the pandas <code>.head()</code> method
+does so we can inspect our database.</p>
+<div class="sourceCode" id="cb34"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> head(conn, table, n<span class="op">=</span><span class="dv">5</span>):</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>  rows <span class="op">=</span> select_top(conn, table, n)</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>  <span class="cf">for</span> r <span class="kw">in</span> rows:</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>      <span class="bu">print</span>(r)</span></code></pre></div>
+<div class="sourceCode" id="cb35"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>head(conn, <span class="st">&quot;hospitals_zones_joined&quot;</span>)</span></code></pre></div>
+<p>Great! We now have the database in and some python functions that
+operate on the data base by wrapping SQL commands.</p>
+<p>We will return to the SQL command style after download and add the
+other datasets to the database using a combination of
+<code>pandas</code> and the database utilities.</p>
+<p>Our next task will be to introduce data on COVID19 so that we can
+join that to our other data sets.</p>
 <h2 id="covid-data">Covid Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-covid-join.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_systems/includes/nigeria-nmis-covid-join.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Now we have the health data, we’re going to combine it with <a href="https://github.com/dsfsi/covid19africa">data about COVID-19 cases in Nigeria over time</a>. This data is kindly provided by Africa open COVID-19 data working group, which Elaine Nsoesie has been working with. The data is taken from Twitter, and only goes up until May 2020.</p>
-<p>They provide their data in github. We can access the cases we’re interested in from the following URL.</p>
-<p>For convenience, we’ll load the data into pandas first, but our next step will be to create a new SQLite table containing the data. Then we’ll join that table to our existing tables.</p>
-<div class="sourceCode" id="cb29"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1"></a>covid_data_url <span class="op">=</span> <span class="st">&#39;https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv&#39;</span></span>
-<span id="cb29-2"><a href="#cb29-2"></a>covid_data_csv <span class="op">=</span> <span class="st">&#39;cases.csv&#39;</span></span>
-<span id="cb29-3"><a href="#cb29-3"></a>urllib.request.urlretrieve(covid_data_url, covid_data_csv)</span>
-<span id="cb29-4"><a href="#cb29-4"></a>covid_data <span class="op">=</span> pd.read_csv(covid_data_csv)</span></code></pre></div>
-<p>As normal, we should inspect our data to check that it contains what we expect.</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1"></a>covid_data.head()</span></code></pre></div>
-<p>And we can get an idea of all the information in the data from looking at the columns.</p>
-<div class="sourceCode" id="cb31"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1"></a>covid_data.columns</span></code></pre></div>
-<p>Now we convert this CSV file we’ve downloaded into a new table in the database file. We can do this, again, with the csv-to-sqlite script.</p>
-<div class="sourceCode" id="cb32"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f cases.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-covid-join.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-covid-join.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Now we have the health data, we’re going to combine it with <a
+href="https://github.com/dsfsi/covid19africa">data about COVID-19 cases
+in Nigeria over time</a>. This data is kindly provided by Africa open
+COVID-19 data working group, which <a
+href="https://www.bu.edu/sph/profile/elaine-nsoesie/">Elaine Nsoesie</a>
+has been working with. The data is taken from Twitter, and only goes up
+until May 2020.</p>
+<p>They provide their data in GitHub. We can access the cases we’re
+interested in from the following URL.</p>
+<p><a
+href="https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv"
+class="uri">https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv</a></p>
+<p>For convenience, we’ll load the data into pandas first, but our next
+step will be to create a new SQLite table containing the data. Then
+we’ll join that table to our existing tables.</p>
+<h1 id="nigerian-covid-data">Nigerian COVID Data</h1>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-covid-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-covid-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>At the beginning of the COVID-19 outbreak, the Consortium for African
+COVID-19 Data formed to bring together data from across the African
+continent on COVID-19 cases <span class="citation"
+data-cites="Marivate-covid20">(Marivate et al., 2020)</span>. These
+cases are recorded in the following GitHub repository: <a
+href="https://github.com/dsfsi/covid19africa"
+class="uri">https://github.com/dsfsi/covid19africa</a>.</p>
+<p>For ease of use we’ve packaged this data set in the <code>pods</code>
+library</p>
+<div class="sourceCode" id="cb36"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb37"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigerian_covid()[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<p>Alternatively, you can access the data directly with the following
+commands.</p>
+<div class="sourceCode" id="cb38"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib.request</span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv&#39;</span>, <span class="st">&#39;line-list-nigeria.csv&#39;</span>)</span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;line-list-nigeria.csv&#39;</span>, parse_dates<span class="op">=</span>[<span class="st">&#39;date&#39;</span>, </span>
+<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a>                                                         <span class="st">&#39;date_confirmation&#39;</span>, </span>
+<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>                                                         <span class="st">&#39;date_admission_hospital&#39;</span>, </span>
+<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a>                                                         <span class="st">&#39;date_onset_symptoms&#39;</span>,</span>
+<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a>                                                         <span class="st">&#39;death_date&#39;</span>])</span></code></pre></div>
+<p>Once it is loaded in the data can be summarized using the
+<code>describe</code> method in pandas.</p>
+<div class="sourceCode" id="cb39"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>data.describe()</span></code></pre></div>
+<div class="figure">
+<div id="nigerian-covid-data-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//datasets/nigerian-covid-data.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="nigerian-covid-data-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigerian-covid-data&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="nigerian-covid-data-caption" class="caption-frame">
+<p>Figure: Evolution of COVID-19 cases in Nigeria.</p>
+</div>
+</div>
+<div class="sourceCode" id="cb40"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>covid_data<span class="op">=</span>data</span>
+<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>covid_data.to_csv(<span class="st">&#39;cases.csv&#39;</span>)</span></code></pre></div>
+<p>Now we convert this CSV file we’ve downloaded into a new table in the
+database file.</p>
+<p>We can do this, again, with the csv-to-sqlite script.</p>
+<div class="sourceCode" id="cb41"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f cases.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
 <h2 id="population-data">Population Data</h2>
-<p>Now we have information about COVID cases, and we have information about how many health centers and how many doctors and nurses there are in each health center. But unless we understand how many people there are in each state, then we cannot make decisions about where they may be problems with the disease.</p>
-<p>If we were running our ride hailing service, we would also need information about how many people there were in different areas, so we could understand what the <em>demand</em> for the boda boda rides might be.</p>
-<p>To access the number of people we can get population statistics from the <a href="https://data.humdata.org/">Humanitarian Data Exchange</a>.</p>
-<p>We also want to have population data for each state in Nigeria, so that we can see attributes like whether there are zones of high health facility density but low population density.</p>
-<div class="sourceCode" id="cb33"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1"></a>pop_url <span class="op">=</span> <span class="st">&#39;https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066/resource/d9fc551a-b5e4-4bed-9d0d-b047b6961817/download/nga_pop_adm1_2016.csv&#39;</span></span>
-<span id="cb33-2"><a href="#cb33-2"></a>_, msg <span class="op">=</span> urllib.request.urlretrieve(pop_url,<span class="st">&#39;nga_pop_adm1_2016.csv&#39;</span>)</span>
-<span id="cb33-3"><a href="#cb33-3"></a>pop_data <span class="op">=</span> pd.read_csv(<span class="st">&#39;nga_pop_adm1_2016.csv&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb34"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1"></a>pop_data.head()</span></code></pre></div>
-<p>To do joins with this data, we must first make sure that the columns have the right names. The name should match the same name of the column in our existing data. So we reset the column names, and the name of the index, as follows.</p>
-<div class="sourceCode" id="cb35"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1"></a>pop_data.columns <span class="op">=</span> [<span class="st">&#39;admin1Name_en&#39;</span>, <span class="st">&#39;admin1Pcode&#39;</span>, <span class="st">&#39;admin0Name_en&#39;</span>, <span class="st">&#39;admin0Pcode&#39;</span>, <span class="st">&#39;population&#39;</span>]</span>
-<span id="cb35-2"><a href="#cb35-2"></a>pop_data <span class="op">=</span> pop_data.set_index(<span class="st">&#39;admin1Name_en&#39;</span>)</span></code></pre></div>
-<p>When doing this for real world data, you should also make sure that the names used in the rows are the same across the different data bases. For example, has someone decided to use an abbreviation for ‘Federal Capital Territory’ and set it as ‘FCT’. The computer won’t understand these are the same states, and if you do a join with such data you can get duplicate entries or missing entries. This sort of thing happens a lot in real world data and takes a lot of time to sort out. Fortunately, in this case, the data is well curated and we don’t have these problems.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-population-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-population-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Now we have information about COVID cases, and we have information
+about how many health centers and how many doctors and nurses there are
+in each health center. But unless we understand how many people there
+are in each state, then we cannot make decisions about where they may be
+problems with the disease.</p>
+<p>If we were running our ride hailing service, we would also need
+information about how many people there were in different areas, so we
+could understand what the demand for the boda boda rides might be.</p>
+<p>To access the number of people we can get population statistics from
+the <a href="https://data.humdata.org/">Humanitarian Data
+Exchange</a>.</p>
+<p>We also want to have population data for each state in Nigeria, so
+that we can see attributes like whether there are zones of high health
+facility density but low population density.</p>
+<div class="sourceCode" id="cb42"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib</span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a>pop_url <span class="op">=</span> <span class="st">&quot;https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066/resource/d9fc551a-b5e4-4bed-9d0d-b047b6961817/download/nga_admpop_adm1_2020.csv&quot;</span></span>
+<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a>_, msg <span class="op">=</span> urllib.request.urlretrieve(pop_url,<span class="st">&quot;nga_admpop_adm1_2020.csv&quot;</span>)</span>
+<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&quot;nga_admpop_adm1_2020.csv&quot;</span>)</span></code></pre></div>
+<p>To do joins with this data, we must first make sure that the columns
+have the right names. The name should match the same name of the column
+in our existing data. So we reset the column names, and the name of the
+index, as follows.</p>
+<div class="sourceCode" id="cb43"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>data.dropna(axis<span class="op">=</span><span class="dv">0</span>, how<span class="op">=</span><span class="st">&quot;all&quot;</span>, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a>data.dropna(axis<span class="op">=</span><span class="dv">1</span>, how<span class="op">=</span><span class="st">&quot;all&quot;</span>, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a>data.rename(columns <span class="op">=</span> {<span class="st">&quot;ADM0_NAME&quot;</span> : <span class="st">&quot;admin0Name_en&quot;</span>, </span>
+<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a>                       <span class="st">&quot;ADM0_PCODE&quot;</span> : <span class="st">&quot;admin0Pcode&quot;</span>, </span>
+<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>                       <span class="st">&quot;ADM1_NAME&quot;</span> : <span class="st">&quot;admin1Name_en&quot;</span>, </span>
+<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a>                       <span class="st">&quot;ADM1_PCODE&quot;</span> : <span class="st">&quot;admin1Pcode&quot;</span>, </span>
+<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>                       <span class="st">&quot;T_TL&quot;</span> : <span class="st">&quot;population&quot;</span>},</span>
+<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a>            inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a>data[<span class="st">&quot;admin0Name_en&quot;</span>] <span class="op">=</span> data[<span class="st">&quot;admin0Name_en&quot;</span>].<span class="bu">str</span>.title()</span>
+<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a>data[<span class="st">&quot;admin1Name_en&quot;</span>] <span class="op">=</span> data[<span class="st">&quot;admin1Name_en&quot;</span>].<span class="bu">str</span>.title()</span>
+<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data.set_index(<span class="st">&quot;admin1Name_en&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb44"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigerian_population()[<span class="st">&quot;Y&quot;</span>]</span></code></pre></div>
+<div class="sourceCode" id="cb45"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<div class="sourceCode" id="cb46"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>pop_data<span class="op">=</span>data</span></code></pre></div>
+<p>When doing this for real world data, you should also make sure that
+the names used in the rows are the same across the different data bases.
+For example, has someone decided to use an abbreviation for ‘Federal
+Capital Territory’ and set it as ‘FCT’. The computer won’t understand
+these are the same states, and if you do a join with such data, you can
+get duplicate entries or missing entries. This sort of thing happens a
+lot in real world data and takes a lot of time to sort out. Fortunately,
+in this case, the data is well curated, and we don’t have these
+problems.</p>
 <h2 id="save-to-database-file">Save to database file</h2>
-<p>The next step is to add this new CSV file as an additional table in our SQLite database. This is done using the script as before.</p>
-<div class="sourceCode" id="cb36"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1"></a>pop_data.to_csv(<span class="st">&#39;pop_data.csv&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb37"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f pop_data.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
-<h2 id="computing-per-capita-hospitals-and-covid">Computing per capita hospitals and COVID</h2>
-<p>The Minister of Health in Abuja may be interested in which states are most vulnerable to COVID19. We now have all the information in our SQL data bases to compute what our health center provision is per capita, and what the COVID19 situation is.</p>
-<p>To do this, we will use the <code>JOIN</code> operation from SQL and introduce a new operation called <code>GROUPBY</code>.</p>
-<h4 id="joining-in-pandas">Joining in Pandas</h4>
-<p>As before, these operations can be done in pandas or GeoPandas. Before we create the SQL commands, we’ll show how you can do that in pandas.</p>
-<p>In pandas, the equivalent of a database table is a dataframe. So the JOIN operation takes two dataframes and joins them based on the key. The key is that special shared column between the two tables. The place where the ‘holes align’ so the two databases can be joined together.</p>
-<p>In GeoPandas we used an outer join. In an outer join you keep all rows from both tables, even if there is no match on the key. In an inner join, you only keep the rows if the two tables have a matching key.</p>
-<p>This is sometimes where problems can creep in. If in one table Abuja’s state is encoded as ‘FCT’ or ‘FCT-Abuja’, and in another table it’s encoded as ‘Federal Capital Territory’, they won’t match and that data wouldn’t appear in the joined table.</p>
-<p>In simple terms, a JOIN operation takes two tables (or dataframes) and combines them based on some key, in this case the index of the Pandas data frame which is the state name.</p>
-<div class="sourceCode" id="cb38"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1"></a>pop_joined <span class="op">=</span> zones_gdf.join(pop_data[<span class="st">&#39;population&#39;</span>], how<span class="op">=</span><span class="st">&#39;inner&#39;</span>)</span></code></pre></div>
+<p>The next step is to add this new CSV file as an additional table in
+our database.</p>
+<h2 id="loading-the-population-data-into-the-sqlite-database">Loading
+the Population Data into the SQLite Database</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigerian-population-data-sqlite.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigerian-population-data-sqlite.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>We can load the data into the SQLite database using the script as
+before.</p>
+<div class="sourceCode" id="cb47"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>pop_data.to_csv(<span class="st">&#39;pop_data.csv&#39;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb48"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>csv<span class="op">-</span>to<span class="op">-</span>sqlite <span class="op">-</span>f pop_data.csv <span class="op">-</span>t full <span class="op">-</span>o db.sqlite</span></code></pre></div>
+<h2 id="computing-per-capita-hospitals-and-covid">Computing per capita
+hospitals and COVID</h2>
+<p>The Minister of Health in Abuja may be interested in which states are
+most vulnerable to COVID19. We now have all the information in our SQL
+data bases to compute what our health center provision is per capita,
+and what the COVID19 situation is.</p>
+<p>To do this, we will use the <code>JOIN</code> operation from SQL and
+introduce a new operation called <code>GROUPBY</code>.</p>
+<h3 id="joining-in-pandas">Joining in Pandas</h3>
+<p>As before, these operations can be done in pandas or GeoPandas.
+Before we create the SQL commands, we’ll show how you can do that in
+pandas.</p>
+<p>In <code>pandas</code>, the equivalent of a database table is a
+dataframe. So, the JOIN operation takes two dataframes and joins them
+based on the key. The key is that special shared column between the two
+tables. The place where the ‘holes align’ so the two databases can be
+joined together.</p>
+<p>In GeoPandas we used an outer join. In an outer join you keep all
+rows from both tables, even if there is no match on the key. In an inner
+join, you only keep the rows if the two tables have a matching key.</p>
+<p>This is sometimes where problems can creep in. If in one table
+Abuja’s state is encoded as ‘FCT’ or ‘FCT-Abuja’, and in another table
+it’s encoded as ‘Federal Capital Territory’, they won’t match, and that
+data wouldn’t appear in the joined table.</p>
+<p>In simple terms, a JOIN operation takes two tables (or dataframes)
+and combines them based on some key, in this case the index of the
+Pandas data frame which is the state name.</p>
+<div class="sourceCode" id="cb49"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a>zones_gdf.set_index(<span class="st">&quot;admin1Name_en&quot;</span>, inplace<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>pop_joined <span class="op">=</span> zones_gdf.join(pop_data[<span class="st">&#39;population&#39;</span>], how<span class="op">=</span><span class="st">&#39;inner&#39;</span>)</span></code></pre></div>
 <h2 id="groupby-in-pandas">GroupBy in Pandas</h2>
-<p>Our COVID19 data is in the form of individual cases. But we are interested in total case counts for each state. There is a special data base operation known as <code>GROUP BY</code> for collecting information about the individual states. The type of information you might want could be a sum, the maximum value, an average, the minimum value. We can use a GroupBy operation in <code>pandas</code> and SQL to summarize the counts of covid cases in each state.</p>
-<p>A <code>GROUPBY</code> operation groups rows with the same key (in this case ‘province/state’) into separate objects, that we can operate on further such as to count the rows in each group, or to sum or take the mean over the values in some column (imagine each case row had the age of the patient, and you were interested in the mean age of patients.)</p>
-<div class="sourceCode" id="cb39"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1"></a>covid_cases_by_state <span class="op">=</span> covid_data.groupby([<span class="st">&#39;province/state&#39;</span>]).count()[<span class="st">&#39;case_id&#39;</span>]</span></code></pre></div>
-<p>The <code>.groupby()</code> method on the dataframe has now given us a new data series that contains the total number of covid cases in each state. We can examine it to check we have something sensible.</p>
-<div class="sourceCode" id="cb40"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1"></a>covid_cases_by_state</span></code></pre></div>
-<p>Now we have this new data series, it can be added to the pandas data frame as a new column.</p>
-<div class="sourceCode" id="cb41"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1"></a>pop_joined[<span class="st">&#39;covid_cases_by_state&#39;</span>] <span class="op">=</span> covid_cases_by_state</span></code></pre></div>
-<p>The spatial join we did on the original data frame to obtain hosp_state_joined introduced a new column, index_right which contains the state of each of the hospitals. Let’s have a quick look at it below.</p>
-<div class="sourceCode" id="cb42"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1"></a>hosp_state_joined[<span class="st">&#39;index_right&#39;</span>]</span></code></pre></div>
-<p>To count the hospitals in each of the states, we first create a grouped series where we’ve grouped on these states.</p>
-<div class="sourceCode" id="cb43"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1"></a>grouped <span class="op">=</span> hosp_state_joined.groupby(<span class="st">&#39;index_right&#39;</span>)</span></code></pre></div>
-<p>This python operation now goes through each of the groups and counts how many hospitals there are in each state. It stores the result in a dictionary. If you’re new to Python, then to understand this code you need to understand what a ‘dictionary comprehension’ is. In this case the dictionary comprehension is being used to create a python dictionary of states and total hospital counts. That’s then being converted into a <code>pandas</code> Data Series and added to the <code>pop_joined</code> dataframe.</p>
-<div class="sourceCode" id="cb44"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1"></a>counted_groups <span class="op">=</span> {k: <span class="bu">len</span>(v) <span class="cf">for</span> k, v <span class="kw">in</span> grouped.groups.items()}</span>
-<span id="cb44-2"><a href="#cb44-2"></a>pop_joined[<span class="st">&#39;hosp_state&#39;</span>] <span class="op">=</span> pd.Series(counted_groups)</span></code></pre></div>
-<p>For convenience, we can now add a new data series to the data frame that contains the per capita information about hospitals. that makes it easy to retrieve later.</p>
-<div class="sourceCode" id="cb45"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1"></a>pop_joined[<span class="st">&#39;hosp_per_capita_10k&#39;</span>] <span class="op">=</span> (pop_joined[<span class="st">&#39;hosp_state&#39;</span>] <span class="op">*</span> <span class="dv">10000</span> )<span class="op">/</span> pop_joined[<span class="st">&#39;population&#39;</span>]</span></code></pre></div>
+<p>Our COVID19 data is in the form of individual cases. But we are
+interested in total case counts for each state. There is a special data
+base operation known as <code>GROUP BY</code> for collecting information
+about the individual states. The type of information you might want
+could be a sum, the maximum value, an average, the minimum value. We can
+use a GroupBy operation in <code>pandas</code> and SQL to summarize the
+counts of covid cases in each state.</p>
+<p>A <code>GROUPBY</code> operation groups rows with the same key (in
+this case ‘province/state’) into separate objects, that we can operate
+on further such as to count the rows in each group, or to sum or take
+the mean over the values in some column (imagine each case row had the
+age of the patient, and you were interested in the mean age of
+patients.)</p>
+<div class="sourceCode" id="cb50"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a>covid_cases_by_state <span class="op">=</span> covid_data.groupby([<span class="st">&#39;province/state&#39;</span>]).count()[<span class="st">&#39;case_id&#39;</span>]</span></code></pre></div>
+<p>The <code>.groupby()</code> method on the dataframe has now given us
+a new data series that contains the total number of covid cases in each
+state. We can examine it to check we have something sensible.</p>
+<div class="sourceCode" id="cb51"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>covid_cases_by_state</span></code></pre></div>
+<p>Now we have this new data series, it can be added to the pandas
+dataframe as a new column.</p>
+<div class="sourceCode" id="cb52"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>pop_joined[<span class="st">&#39;covid_cases_by_state&#39;</span>] <span class="op">=</span> covid_cases_by_state</span></code></pre></div>
+<p>The spatial join we did on the original data frame to obtain
+hosp_state_joined introduced a new column, <code>index_right</code> that
+contains the state of each of the hospitals. Let’s have a quick look at
+it below.</p>
+<div class="sourceCode" id="cb53"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>hosp_state_joined[<span class="st">&#39;index_right&#39;</span>]</span></code></pre></div>
+<p>To count the hospitals in each of the states, we first create a
+grouped series where we’ve grouped on these states.</p>
+<div class="sourceCode" id="cb54"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>grouped <span class="op">=</span> hosp_state_joined.groupby(<span class="st">&#39;admin1Name_en&#39;</span>)</span></code></pre></div>
+<p>This python operation now goes through each of the groups and counts
+how many hospitals there are in each state. It stores the result in a
+dictionary. If you’re new to python, then to understand this code you
+need to understand what a ‘dictionary comprehension’ is. In this case
+the dictionary comprehension is being used to create a python dictionary
+of states and total hospital counts. That’s then being converted into a
+<code>pandas</code> Data Series and added to the <code>pop_joined</code>
+dataframe.</p>
+<div class="sourceCode" id="cb55"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
+<div class="sourceCode" id="cb56"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>counted_groups <span class="op">=</span> {k: <span class="bu">len</span>(v) <span class="cf">for</span> k, v <span class="kw">in</span> grouped.groups.items()}</span>
+<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a>pop_joined[<span class="st">&#39;hosp_state&#39;</span>] <span class="op">=</span> pd.Series(counted_groups)</span></code></pre></div>
+<p>For convenience, we can now add a new data series to the data frame
+that contains the per capita information about hospitals. that makes it
+easy to retrieve later.</p>
+<div class="sourceCode" id="cb57"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>pop_joined[<span class="st">&#39;hosp_per_capita_10k&#39;</span>] <span class="op">=</span> (pop_joined[<span class="st">&#39;hosp_state&#39;</span>] <span class="op">*</span> <span class="dv">10000</span> )<span class="op">/</span> pop_joined[<span class="st">&#39;population&#39;</span>]</span></code></pre></div>
 <h2 id="sql-style">SQL-style</h2>
-<p>That’s the <code>pandas</code> approach to doing it. But <code>pandas</code> itself is inspired by database language, in particular relational databases such as SQL. To do these types of joins at scale, e.g. for our ride hailing app, we need to see how to do these joins in a database.</p>
-<p>As before, we’ll wrap the underlying SQL commands with a convenient python command.</p>
-<p>What you see below gives the full SQL command. There is a <a href="https://www.w3schools.com/sql/sql_select.asp"><code>SELECT</code> command</a>, which extracts <code>FROM</code> a particular table. It then completes an <a href="https://www.w3schools.com/sql/sql_join_inner.asp"><code>INNER JOIN</code></a> using particular columns (<code>provice/state</code> and <code>index_right</code>)</p>
-<p>Now we’ve created our python wrapper, we can connect to the data base and run our SQL command on the database using the wrapper.</p>
-<div class="sourceCode" id="cb46"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1"></a>conn <span class="op">=</span> create_connection(<span class="st">&quot;db.sqlite&quot;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb47"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1"></a>state_cases_hosps <span class="op">=</span> join_counts(conn)</span></code></pre></div>
-<div class="sourceCode" id="cb48"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1"></a><span class="cf">for</span> row <span class="kw">in</span> state_cases_hosps:</span>
-<span id="cb48-2"><a href="#cb48-2"></a>    <span class="bu">print</span>(<span class="st">&quot;State </span><span class="sc">{}</span><span class="st"> </span><span class="ch">\t\t</span><span class="st"> Covid Cases </span><span class="sc">{}</span><span class="st"> </span><span class="ch">\t\t</span><span class="st"> Health Facilities </span><span class="sc">{}</span><span class="st">&quot;</span>.<span class="bu">format</span>(row[<span class="dv">0</span>], row[<span class="dv">1</span>], row[<span class="dv">2</span>]))</span></code></pre></div>
-<div class="sourceCode" id="cb49"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb49-2"><a href="#cb49-2"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;population&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb49-3"><a href="#cb49-3"></a>base.set_title(<span class="st">&quot;Population of Nigerian States&quot;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb50"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb50-2"><a href="#cb50-2"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;hosp_per_capita_10k&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb50-3"><a href="#cb50-3"></a>base.set_title(<span class="st">&quot;Hospitals Per Capita (10k) of Nigerian States&quot;</span>)</span></code></pre></div>
-<h2 id="exercise">Exercise</h2>
-<ol type="1">
-<li><p>Add a new column the dataframe for covid cases per 10,000 population, in the same way we computed health facilities per 10k capita.</p></li>
-<li><p>Add a new column for covid cases per health facility.</p></li>
-</ol>
-<p>Do this in both the SQL and the Pandas styles to get a feel for how they differ.</p>
-<p>{<code>{.python} # pop_joined['cases_per_capita_10k'] = ??? # pop_joined['cases_per_facility'] = ???</code></p>
-<div class="sourceCode" id="cb51"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb51-2"><a href="#cb51-2"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;cases_per_capita_10k&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb51-3"><a href="#cb51-3"></a>base.set_title(<span class="st">&quot;Covid Cases Per Capita (10k) of Nigerian States&quot;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb52"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb52-2"><a href="#cb52-2"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;covid_cases_by_state&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb52-3"><a href="#cb52-3"></a>base.set_title(<span class="st">&quot;Covid Cases by State&quot;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb53"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1"></a>base <span class="op">=</span> nigeria.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
-<span id="cb53-2"><a href="#cb53-2"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;cases_per_facility&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
-<span id="cb53-3"><a href="#cb53-3"></a>base.set_title(<span class="st">&quot;Covid Cases per Health Facility&quot;</span>)</span></code></pre></div>
+<p>That’s the <code>pandas</code> approach to doing it. But
+<code>pandas</code> itself is inspired by database languages, in
+particular relational databases such as SQL. To do these types of joins
+at scale, e.g., for a ride hailing app, we need to do these joins in a
+database.</p>
+<p>As before, we’ll wrap the underlying SQL commands with a convenient
+python command.</p>
+<p>What you see below gives the full SQL command. There is a <a
+href="https://www.w3schools.com/sql/sql_select.asp"><code>SELECT</code>
+command</a>, which extracts <code>FROM</code> a particular table. It
+then completes an <a
+href="https://www.w3schools.com/sql/sql_join_inner.asp"><code>INNER JOIN</code></a>
+using particular columns (<code>province/state</code> and
+<code>admin1Name_en</code>)</p>
+<p>Now we’ve created our python wrapper, we can connect to the data base
+and run our SQL command on the database using the wrapper.</p>
+<div class="sourceCode" id="cb58"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>conn <span class="op">=</span> create_connection(<span class="st">&quot;db.sqlite&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb59"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>state_cases_hosps <span class="op">=</span> join_counts(conn)</span></code></pre></div>
+<div class="sourceCode" id="cb60"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> row <span class="kw">in</span> state_cases_hosps:</span>
+<span id="cb60-2"><a href="#cb60-2" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="st">&quot;State </span><span class="sc">{}</span><span class="st"> </span><span class="ch">\t\t</span><span class="st"> Covid Cases </span><span class="sc">{}</span><span class="st"> </span><span class="ch">\t\t</span><span class="st"> Health Facilities </span><span class="sc">{}</span><span class="st">&quot;</span>.<span class="bu">format</span>(row[<span class="dv">0</span>], row[<span class="dv">1</span>], row[<span class="dv">2</span>]))</span></code></pre></div>
+<div class="sourceCode" id="cb61"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>base <span class="op">=</span> nigeria_gdf.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
+<span id="cb61-2"><a href="#cb61-2" aria-hidden="true" tabindex="-1"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;population&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb61-3"><a href="#cb61-3" aria-hidden="true" tabindex="-1"></a>base.set_title(<span class="st">&quot;Population of Nigerian States&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb62"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a>base <span class="op">=</span> nigeria_gdf.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
+<span id="cb62-2"><a href="#cb62-2" aria-hidden="true" tabindex="-1"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;hosp_per_capita_10k&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb62-3"><a href="#cb62-3" aria-hidden="true" tabindex="-1"></a>base.set_title(<span class="st">&quot;Hospitals Per Capita (10k) of Nigerian States&quot;</span>)</span></code></pre></div>
+<h3 id="exercise-1">Exercise 1</h3>
+<p>Add a new column the dataframe for covid cases per 10,000 population,
+in the same way we computed health facilities per 10k capita.</p>
+<h3 id="exercise-2">Exercise 2</h3>
+<p>Add a new column for covid cases per health facility.</p>
+<h3 id="exercise-3">Exercise 3</h3>
+<p>Do this in both the SQL and the Pandas styles to get a feel for how
+they differ.</p>
+<h3 id="exercise-4">Exercise 4</h3>
+<p>Perform an inner join using SQL on your databases and convert the
+result into a <code>pandas</code> DataFrame.</p>
+<div class="sourceCode" id="cb63"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a><span class="co"># pop_joined[&#39;cases_per_capita_10k&#39;] = ???</span></span>
+<span id="cb63-3"><a href="#cb63-3" aria-hidden="true" tabindex="-1"></a><span class="co"># pop_joined[&#39;cases_per_facility&#39;] = ???</span></span></code></pre></div>
+<div class="sourceCode" id="cb64"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a>base <span class="op">=</span> nigeria_gdf.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
+<span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;cases_per_capita_10k&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb64-3"><a href="#cb64-3" aria-hidden="true" tabindex="-1"></a>base.set_title(<span class="st">&quot;Covid Cases Per Capita (10k) of Nigerian States&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb65"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a>base <span class="op">=</span> nigeria_gdf.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
+<span id="cb65-2"><a href="#cb65-2" aria-hidden="true" tabindex="-1"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;covid_cases_by_state&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb65-3"><a href="#cb65-3" aria-hidden="true" tabindex="-1"></a>base.set_title(<span class="st">&quot;Covid Cases by State&quot;</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb66"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a>base <span class="op">=</span> nigeria_gdf.plot(color<span class="op">=</span><span class="st">&#39;white&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, alpha<span class="op">=</span><span class="dv">0</span>, figsize<span class="op">=</span>(<span class="dv">11</span>, <span class="dv">11</span>))</span>
+<span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a>pop_joined.plot(ax<span class="op">=</span>base, column<span class="op">=</span><span class="st">&#39;cases_per_facility&#39;</span>, edgecolor<span class="op">=</span><span class="st">&#39;black&#39;</span>, legend<span class="op">=</span><span class="va">True</span>)</span>
+<span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a>base.set_title(<span class="st">&quot;Covid Cases per Health Facility&quot;</span>)</span></code></pre></div>
 <h2 id="thanks">Thanks!</h2>
-<p>For more information on these subjects and more you might want to check the following resources.</p>
+<p>For more information on these subjects and more you might want to
+check the following resources.</p>
 <ul>
 <li>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></li>
-<li>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></li>
-<li>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></li>
-<li>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
+<li>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></li>
+<li>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></li>
+<li>blog: <a
+href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
 </ul>
-<h1 id="references">References</h1>
+<h1 class="unnumbered" id="references">References</h1>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Marivate-covid20" class="csl-entry" role="listitem">
+Marivate, V., Nsoesie, E., Bekele, E., Africa open COVID-19 data working
+group, 2020. <span class="nocase">Coronavirus COVID-19 (2019-nCoV) Data
+Repository for Africa</span>. <a
+href="https://doi.org/10.5281/zenodo.3757554">https://doi.org/10.5281/zenodo.3757554</a>
+</div>
+<div id="ref-Nigeria-nmis14" class="csl-entry" role="listitem">
+The Office of the Senior Special Assistant to the President on the
+Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.
+Nigeria <span>NMIS</span> facility database.
+</div>
+</div>
+<aside id="footnotes" class="footnotes footnotes-end-of-document"
+role="doc-endnotes">
+<hr />
+<ol>
+<li id="fn1"><p>Boda Boda is the name for the motorbike taxis found
+commonly in Kampala.<a href="#fnref1" class="footnote-back"
+role="doc-backlink">↩︎</a></p></li>
+</ol>
+</aside>
 
diff --git a/_lectures/03-bayesian-methods-abuja.html b/_lectures/03-bayesian-methods-abuja.html
index 6ea002d..39a949d 100644
--- a/_lectures/03-bayesian-methods-abuja.html
+++ b/_lectures/03-bayesian-methods-abuja.html
@@ -1,7 +1,12 @@
 ---
 title: "Bayesian Methods"
 venue: "DSA, Abuja"
-abstract: "<p>In this session we review the <em>probabilistic</em> approach to machine learning. We start with a review of probability, and introduce the concepts of probabilistic modelling. We then apply the approach in practice to Naive Bayesian classification. In this session we review the probabilistic formulation of a classification model, reviewing initially maximum likelihood and the naive Bayes model.</p>"
+abstract: "<p>In this session we review the <em>probabilistic</em>
+approach to machine learning. We start with a review of probability, and
+introduce the concepts of probabilistic modelling. We then apply the
+approach in practice to Naive Bayesian classification. In this session
+we review the probabilistic formulation of a classification model,
+reviewing initially maximum likelihood and the naive Bayes model.</p>"
 author:
 - given: Neil D.
   family: Lawrence
@@ -9,20 +14,22 @@
   institute: Amazon Cambridge and University of Sheffield
   twitter: lawrennd
   gscholar: r3SJcvoAAAAJ
-  orchid: 
+  orcid: 
 - given: Oluwasanmi
   family: Koyejo
   url: https://sanmi.cs.illinois.edu/
   institute: Google and University of Illinois
   twitter: 
   gscholar: EaaOeJwAAAAJ
-  orchid: 
+  orcid: 
+edit_url: https://github.com/mlatcl/dsa/edit/gh-pages/_lamd/bayesian-methods-abuja.md
 date: 2018-11-14
 published: 2018-11-14
-week: 0
 session: 3
 reveal: 03-bayesian-methods-abuja.slides.html
+transition: None
 ipynb: 03-bayesian-methods-abuja.ipynb
+pptx: 03-bayesian-methods-abuja.pptx
 layout: lecture
 categories:
 - notes
@@ -39,98 +46,334 @@
 
 -->
 <h1 id="what-is-machine-learning">What is Machine Learning?</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>What is machine learning? At its most basic level machine learning is a combination of</p>
-<p><br /><span class="math display">$$\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}$$</span><br /></p>
-<p>where <em>data</em> is our observations. They can be actively or passively acquired (meta-data). The <em>model</em> contains our assumptions, based on previous experience. That experience can be other data, it can come from transfer learning, or it can merely be our beliefs about the regularities of the universe. In humans our models include our inductive biases. The <em>prediction</em> is an action to be taken or a categorization or a quality score. The reason that machine learning has become a mainstay of artificial intelligence is the importance of predictions in artificial intelligence. The data and the model are combined through computation.</p>
-<p>In practice we normally perform machine learning using two functions. To combine data with a model we typically make use of:</p>
-<p><strong>a prediction function</strong> a function which is used to make the predictions. It includes our beliefs about the regularities of the universe, our assumptions about how the world works, e.g. smoothness, spatial similarities, temporal similarities.</p>
-<p><strong>an objective function</strong> a function which defines the cost of misprediction. Typically it includes knowledge about the world’s generating processes (probabilistic objectives) or the costs we pay for mispredictions (empiricial risk minimization).</p>
-<p>The combination of data and model through the prediction function and the objective function leads to a <em>learning algorithm</em>. The class of prediction functions and objective functions we can make use of is restricted by the algorithms they lead to. If the prediction function or the objective function are too complex, then it can be difficult to find an appropriate learning algorithm. Much of the acdemic field of machine learning is the quest for new learning algorithms that allow us to bring different types of models and data together.</p>
-<p>A useful reference for state of the art in machine learning is the UK Royal Society Report, <a href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine Learning: Power and Promise of Computers that Learn by Example</a>.</p>
-<p>You can also check my post blog post on <a href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What is Machine Learning?</a>..</p>
-<h1 id="nigerian-nmis-data">Nigerian NMIS Data</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>As an example data set we will use Nigerian NMIS Health Facility data from openAFRICA. It can be found here <a href="https://africaopendata.org/dataset/nigeria-nmis-health-facility-data-2014" class="uri">https://africaopendata.org/dataset/nigeria-nmis-health-facility-data-2014</a></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>What is machine learning? At its most basic level machine learning is
+a combination of</p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p>where <em>data</em> is our observations. They can be actively or
+passively acquired (meta-data). The <em>model</em> contains our
+assumptions, based on previous experience. That experience can be other
+data, it can come from transfer learning, or it can merely be our
+beliefs about the regularities of the universe. In humans our models
+include our inductive biases. The <em>prediction</em> is an action to be
+taken or a categorization or a quality score. The reason that machine
+learning has become a mainstay of artificial intelligence is the
+importance of predictions in artificial intelligence. The data and the
+model are combined through computation.</p>
+<p>In practice we normally perform machine learning using two functions.
+To combine data with a model we typically make use of:</p>
+<p><strong>a prediction function</strong> it is used to make the
+predictions. It includes our beliefs about the regularities of the
+universe, our assumptions about how the world works, e.g., smoothness,
+spatial similarities, temporal similarities.</p>
+<p><strong>an objective function</strong> it defines the ‘cost’ of
+misprediction. Typically, it includes knowledge about the world’s
+generating processes (probabilistic objectives) or the costs we pay for
+mispredictions (empirical risk minimization).</p>
+<p>The combination of data and model through the prediction function and
+the objective function leads to a <em>learning algorithm</em>. The class
+of prediction functions and objective functions we can make use of is
+restricted by the algorithms they lead to. If the prediction function or
+the objective function are too complex, then it can be difficult to find
+an appropriate learning algorithm. Much of the academic field of machine
+learning is the quest for new learning algorithms that allow us to bring
+different types of models and data together.</p>
+<p>A useful reference for state of the art in machine learning is the UK
+Royal Society Report, <a
+href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine
+Learning: Power and Promise of Computers that Learn by Example</a>.</p>
+<p>You can also check my post blog post on <a
+href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What
+is Machine Learning?</a>.</p>
+<h1 id="nigeria-nmis-data">Nigeria NMIS Data</h1>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>As an example data set we will use Nigerian Millennium Development
+Goals Information System Health Facility <span class="citation"
+data-cites="Nigeria-nmis14">(The Office of the Senior Special Assistant
+to the President on the Millennium Development Goals (OSSAP-MDGs) and
+Columbia University, 2014)</span>. It can be found here <a
+href="https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014"
+class="uri">https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014</a>.</p>
 <p>Taking from the information on the site,</p>
 <blockquote>
-<p>The Nigeria MDG (Millennium Development Goals) Information System – NMIS health facility data is collected by the Office of the Senior Special Assistant to the President on the Millennium Development Goals (OSSAP-MDGs) in partner with the Sustainable Engineering Lab at Columbia University. A rigorous, geo-referenced baseline facility inventory across Nigeria is created spanning from 2009 to 2011 with an additional survey effort to increase coverage in 2014, to build Nigeria’s first nation-wide inventory of health facility. The database includes 34,139 health facilities info in Nigeria.</p>
-<p>The goal of this database is to make the data collected available to planners, government officials, and the public, to be used to make strategic decisions for planning relevant interventions.</p>
-<p>For data inquiry, please contact Ms. Funlola Osinupebi, Performance Monitoring &amp; Communications, Advisory Power Team, Office of the Vice President at funlola.osinupebi@aptovp.org</p>
-<p>To learn more, please visit <a href="http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/" class="uri">http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/</a></p>
-<p>Suggested citation: Nigeria NMIS facility database (2014), the Office of the Senior Special Assistant to the President on the Millennium Development Goals (OSSAP-MDGs) &amp; Columbia University</p>
+<p>The Nigeria MDG (Millennium Development Goals) Information System –
+NMIS health facility data is collected by the Office of the Senior
+Special Assistant to the President on the Millennium Development Goals
+(OSSAP-MDGs) in partner with the Sustainable Engineering Lab at Columbia
+University. A rigorous, geo-referenced baseline facility inventory
+across Nigeria is created spanning from 2009 to 2011 with an additional
+survey effort to increase coverage in 2014, to build Nigeria’s first
+nation-wide inventory of health facility. The database includes 34,139
+health facilities info in Nigeria.</p>
+<p>The goal of this database is to make the data collected available to
+planners, government officials, and the public, to be used to make
+strategic decisions for planning relevant interventions.</p>
+<p>For data inquiry, please contact Ms. Funlola Osinupebi, Performance
+Monitoring &amp; Communications, Advisory Power Team, Office of the Vice
+President at funlola.osinupebi@aptovp.org</p>
+<p>To learn more, please visit <a
+href="http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/"
+class="uri">http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/</a></p>
+<p>Suggested citation: Nigeria NMIS facility database (2014), the Office
+of the Senior Special Assistant to the President on the Millennium
+Development Goals (OSSAP-MDGs) &amp; Columbia University</p>
 </blockquote>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="im">import</span> urllib.request</span></code></pre></div>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
-<p>Once it is loaded in the data can be summarized using the <code>describe</code> method in pandas.</p>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>data.describe()</span></code></pre></div>
-<p>In python and jupyter notebook it is possible to see a list of all possible functions and attributes by typing the name of the object followed by <code>.&lt;Tab&gt;</code> for example in the above case if we type <code>data.&lt;Tab&gt;</code> it show the columns available (these are attributes in pandas dataframes) such as <code>num_nurses_fulltime</code>, and also functions, such as <code>.describe()</code>.</p>
-<p>For functions we can also see the documentation about the function by following the name with a question mark. This will open a box with documentation at the bottom which can be closed with the x button.</p>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a>data.describe?</span></code></pre></div>
-<p>The NMIS facility data is stored in an object known as a ‘data frame’. Data frames come from the statistical family of programming languages based on <code>S</code>, the most widely used of which is <a href="http://en.wikipedia.org/wiki/R_(programming_language)"><code>R</code></a>. The data frame gives us a convenient object for manipulating data. The describe method summarizes which columns there are in the data frame and gives us counts, means, standard deviations and percentiles for the values in those columns. To access a column directly we can write</p>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="bu">print</span>(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>])</span>
-<span id="cb7-2"><a href="#cb7-2"></a><span class="co">#print(data[&#39;num_nurses_fulltime&#39;])</span></span></code></pre></div>
-<p>This shows the number of doctors per facility, number of nurses and number of community health workers (CHEWS). We can plot the number of doctors against the number of nurses as follows.</p>
-<div class="sourceCode" id="cb8"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a><span class="co"># this ensures the plot appears in the web browser</span></span>
-<span id="cb8-2"><a href="#cb8-2"></a><span class="op">%</span>matplotlib inline </span>
-<span id="cb8-3"><a href="#cb8-3"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt <span class="co"># this imports the plotting library in python</span></span></code></pre></div>
-<div class="sourceCode" id="cb9"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a>_ <span class="op">=</span> plt.plot(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>], data[<span class="st">&#39;num_nurses_fulltime&#39;</span>], <span class="st">&#39;rx&#39;</span>)</span></code></pre></div>
-<p>You may be curious what the arguments we give to <code>plt.plot</code> are for, now is the perfect time to look at the documentation</p>
-<div class="sourceCode" id="cb10"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a>plt.plot?</span></code></pre></div>
-<p>We immediately note that some facilities have a lot of nurses, which prevent’s us seeing the detail of the main number of facilities. First lets identify the facilities with the most nurses.</p>
-<div class="sourceCode" id="cb11"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a>data[data[<span class="st">&#39;num_nurses_fulltime&#39;</span>]<span class="op">&gt;</span><span class="dv">100</span>]</span></code></pre></div>
-<p>Here we are using the command <code>data['num_nurses_fulltime']&gt;100</code> to index the facilities in the pandas data frame which have over 100 nurses. To sort them in order we can also use the <code>sort</code> command. The result of this command on its own is a data <code>Series</code> of <code>True</code> and <code>False</code> values. However, when it is passed to the <code>data</code> data frame it returns a new data frame which contains only those values for which the data series is <code>True</code>. We can also sort the result. To sort the result by the values in the <code>num_nurses_fulltime</code> column in <em>descending</em> order we use the following command.</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a>data[data[<span class="st">&#39;num_nurses_fulltime&#39;</span>]<span class="op">&gt;</span><span class="dv">100</span>].sort_values(by<span class="op">=</span><span class="st">&#39;num_nurses_fulltime&#39;</span>, ascending<span class="op">=</span><span class="va">False</span>)</span></code></pre></div>
-<p>We now see that the ‘University of Calabar Teaching Hospital’ is a large outlier with 513 nurses. We can try and determine how much of an outlier by histograming the data.</p>
-<h2 id="plotting-the-data">Plotting the Data</h2>
-<div class="sourceCode" id="cb13"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a>data[<span class="st">&#39;num_nurses_fulltime&#39;</span>].hist(bins<span class="op">=</span><span class="dv">20</span>) <span class="co"># histogram the data with 20 bins.</span></span>
-<span id="cb13-2"><a href="#cb13-2"></a>plt.title(<span class="st">&#39;Histogram of Number of Nurses&#39;</span>)</span></code></pre></div>
-<p>We can’t see very much here. Two things are happening. There are so many facilities with zero or one nurse that we don’t see the histogram for hospitals with many nurses. We can try more bins and using a <em>log</em> scale on the <span class="math inline"><em>y</em></span>-axis.</p>
-<div class="sourceCode" id="cb14"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a>data[<span class="st">&#39;num_nurses_fulltime&#39;</span>].hist(bins<span class="op">=</span><span class="dv">100</span>) <span class="co"># histogram the data with 20 bins.</span></span>
-<span id="cb14-2"><a href="#cb14-2"></a>plt.title(<span class="st">&#39;Histogram of Number of Nurses&#39;</span>)</span>
-<span id="cb14-3"><a href="#cb14-3"></a>ax <span class="op">=</span> plt.gca()</span>
-<span id="cb14-4"><a href="#cb14-4"></a>ax.set_yscale(<span class="st">&#39;log&#39;</span>)</span></code></pre></div>
-<h3 id="exercise-1">Exercise 1</h3>
-<p>Read on the internet about the following python libraries: <code>numpy</code>, <code>matplotlib</code>, <code>scipy</code> and <code>pandas</code>. What functionality does each provide python?</p>
-<p>Let’s try and see how the number of nurses relates to the number of doctors.</p>
-<div class="sourceCode" id="cb15"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>(<span class="dv">10</span>, <span class="dv">7</span>)) </span>
-<span id="cb15-2"><a href="#cb15-2"></a>ax.plot(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>], data[<span class="st">&#39;num_nurses_fulltime&#39;</span>], <span class="st">&#39;rx&#39;</span>)</span>
-<span id="cb15-3"><a href="#cb15-3"></a>ax.set_xscale(<span class="st">&#39;log&#39;</span>) <span class="co"># use a logarithmic x scale</span></span>
-<span id="cb15-4"><a href="#cb15-4"></a>ax.set_yscale(<span class="st">&#39;log&#39;</span>) <span class="co"># use a logarithmic Y scale</span></span>
-<span id="cb15-5"><a href="#cb15-5"></a><span class="co"># give the plot some titles and labels</span></span>
-<span id="cb15-6"><a href="#cb15-6"></a>plt.title(<span class="st">&#39;Number of Nurses against Number of Doctors&#39;</span>)</span>
-<span id="cb15-7"><a href="#cb15-7"></a>plt.ylabel(<span class="st">&#39;number of nurses&#39;</span>)</span>
-<span id="cb15-8"><a href="#cb15-8"></a>plt.xlabel(<span class="st">&#39;number of doctors&#39;</span>)</span></code></pre></div>
-<p>Note a few things. We are interacting with our data. In particular, we are replotting the data according to what we have learned so far. We are using the progamming language as a <em>scripting</em> language to give the computer one command or another, and then the next command we enter is dependent on the result of the previous. This is a very different paradigm to classical software engineering. In classical software engineering we normally write many lines of code (entire object classes or functions) before compiling the code and running it. Our approach is more similar to the approach we take whilst debugging. Historically, researchers interacted with data using a <em>console</em>. A command line window which allowed command entry. The notebook format we are using is slightly different. Each of the code entry boxes acts like a separate console window. We can move up and down the notebook and run each part in a different order. The <em>state</em> of the program is always as we left it after running the previous part.</p>
+<p>For ease of use we’ve packaged this data set in the <code>pods</code>
+library</p>
+<h2 id="pods">pods</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In Sheffield we created a suite of software tools for ‘Open Data
+Science’. Open data science is an approach to sharing code, models and
+data that should make it easier for companies, health professionals and
+scientists to gain access to data science techniques.</p>
+<p>You can also check this blog post on <a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a>.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install pods</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/ods"
+class="uri">https://github.com/lawrennd/ods</a></p>
+<p>Once <code>pods</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.nigeria_nmis()[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>data.head()</span></code></pre></div>
+<p>Alternatively, you can access the data directly with the following
+commands.</p>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> urllib.request</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv&#39;</span>, <span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pd.read_csv(<span class="st">&#39;healthmopupandbaselinenmisfacility.csv&#39;</span>)</span></code></pre></div>
+<p>Once it is loaded in the data can be summarized using the
+<code>describe</code> method in pandas.</p>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>data.describe()</span></code></pre></div>
+<p>We can also find out the dimensions of the dataset using the
+<code>shape</code> property.</p>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>data.shape</span></code></pre></div>
+<p>Dataframes have different functions that you can use to explore and
+understand your data. In python and the Jupyter notebook it is possible
+to see a list of all possible functions and attributes by typing the
+name of the object followed by <code>.&lt;Tab&gt;</code> for example in
+the above case if we type <code>data.&lt;Tab&gt;</code> it show the
+columns available (these are attributes in pandas dataframes) such as
+<code>num_nurses_fulltime</code>, and also functions, such as
+<code>.describe()</code>.</p>
+<p>For functions we can also see the documentation about the function by
+following the name with a question mark. This will open a box with
+documentation at the bottom which can be closed with the x button.</p>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>data.describe?</span></code></pre></div>
+<div class="figure">
+<div id="nigerian-health-facilities-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+<div id="nigerian-health-facilities-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nigerian-health-facilities&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="nigerian-health-facilities-caption" class="caption-frame">
+<p>Figure: Location of the over thirty-four thousand health facilities
+registered in the NMIS data across Nigeria. Each facility plotted
+according to its latitude and longitude.</p>
+</div>
+</div>
 <h1 id="probabilities">Probabilities</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>We are now going to do some simple review of probabilities and use this review to explore some aspects of our data.</p>
-<p>A probability distribution expresses uncertainty about the outcome of an event. We often encode this uncertainty in a variable. So if we are considering the outcome of an event, <span class="math inline"><em>Y</em></span>, to be a coin toss, then we might consider <span class="math inline"><em>Y</em> = 1</span> to be heads and <span class="math inline"><em>Y</em> = 0</span> to be tails. We represent the probability of a given outcome with the notation: <br /><span class="math display"><em>P</em>(<em>Y</em> = 1) = 0.5</span><br /> The first rule of probability is that the probability must normalize. The sum of the probability of all events must equal 1. So if the probability of heads (<span class="math inline"><em>Y</em> = 1</span>) is 0.5, then the probability of tails (the only other possible outcome) is given by <br /><span class="math display"><em>P</em>(<em>Y</em> = 0) = 1 − <em>P</em>(<em>Y</em> = 1) = 0.5</span><br /></p>
-<p>Probabilities are often defined as the limit of the ratio between the number of positive outcomes (e.g. <em>heads</em>) given the number of trials. If the number of positive outcomes for event <span class="math inline"><em>y</em></span> is denoted by <span class="math inline"><em>n</em></span> and the number of trials is denoted by <span class="math inline"><em>N</em></span> then this gives the ratio <br /><span class="math display">$$
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>We are now going to do some simple review of probabilities and use
+this review to explore some aspects of our data.</p>
+<p>A probability distribution expresses uncertainty about the outcome of
+an event. We often encode this uncertainty in a variable. So if we are
+considering the outcome of an event, <span
+class="math inline">\(Y\)</span>, to be a coin toss, then we might
+consider <span class="math inline">\(Y=1\)</span> to be heads and <span
+class="math inline">\(Y=0\)</span> to be tails. We represent the
+probability of a given outcome with the notation: <span
+class="math display">\[
+P(Y=1) = 0.5
+\]</span> The first rule of probability is that the probability must
+normalize. The sum of the probability of all events must equal 1. So if
+the probability of heads (<span class="math inline">\(Y=1\)</span>) is
+0.5, then the probability of tails (the only other possible outcome) is
+given by <span class="math display">\[
+P(Y=0) = 1-P(Y=1) = 0.5
+\]</span></p>
+<p>Probabilities are often defined as the limit of the ratio between the
+number of positive outcomes (e.g. <em>heads</em>) given the number of
+trials. If the number of positive outcomes for event <span
+class="math inline">\(y\)</span> is denoted by <span
+class="math inline">\(n\)</span> and the number of trials is denoted by
+<span class="math inline">\(N\)</span> then this gives the ratio <span
+class="math display">\[
 P(Y=y) = \lim_{N\rightarrow
 \infty}\frac{n_y}{N}.
-$$</span><br /> In practice we never get to observe an event infinite times, so rather than considering this we often use the following estimate <br /><span class="math display">$$
+\]</span> In practice we never get to observe an event infinite times,
+so rather than considering this we often use the following estimate
+<span class="math display">\[
 P(Y=y) \approx \frac{n_y}{N}.
-$$</span><br /></p>
-<h2 id="probability-and-the-nmis-data">Probability and the NMIS Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-nigerian-nmis.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-nigerian-nmis.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Let’s use the sum rule to compute the estimate the probability that a facility has more than two nurses.</p>
-<div class="sourceCode" id="cb16"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1"></a>large <span class="op">=</span> (data.num_nurses_fulltime<span class="op">&gt;</span><span class="dv">2</span>).<span class="bu">sum</span>()  <span class="co"># number of positive outcomes (in sum True counts as 1, False counts as 0)</span></span>
-<span id="cb16-2"><a href="#cb16-2"></a>total_facilities <span class="op">=</span> data.num_nurses_fulltime.count()</span>
-<span id="cb16-3"><a href="#cb16-3"></a></span>
-<span id="cb16-4"><a href="#cb16-4"></a>prob_large <span class="op">=</span> <span class="bu">float</span>(large)<span class="op">/</span><span class="bu">float</span>(total_facilities)</span>
-<span id="cb16-5"><a href="#cb16-5"></a><span class="bu">print</span>(<span class="st">&quot;Probability of number of nurses being greather than 2 is:&quot;</span>, prob_large)</span></code></pre></div>
+\]</span></p>
+<h2 id="exploring-the-nmis-data">Exploring the NMIS Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-explore.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-explore.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The NMIS facility data is stored in an object known as a ‘data
+frame’. Data frames come from the statistical family of programming
+languages based on <code>S</code>, the most widely used of which is <a
+href="http://en.wikipedia.org/wiki/R_(programming_language)"><code>R</code></a>.
+The data frame gives us a convenient object for manipulating data. The
+describe method summarizes which columns there are in the data frame and
+gives us counts, means, standard deviations and percentiles for the
+values in those columns. To access a column directly we can write</p>
+<div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>])</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="co">#print(data[&#39;num_nurses_fulltime&#39;])</span></span></code></pre></div>
+<p>This shows the number of doctors per facility, number of nurses and
+number of community health workers (CHEWS). We can plot the number of
+doctors against the number of nurses as follows.</p>
+<div class="sourceCode" id="cb9"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt <span class="co"># this imports the plotting library in python</span></span></code></pre></div>
+<div class="sourceCode" id="cb10"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>_ <span class="op">=</span> plt.plot(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>], data[<span class="st">&#39;num_nurses_fulltime&#39;</span>], <span class="st">&#39;rx&#39;</span>)</span></code></pre></div>
+<p>You may be curious what the arguments we give to
+<code>plt.plot</code> are for, now is the perfect time to look at the
+documentation</p>
+<div class="sourceCode" id="cb11"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>plt.plot?</span></code></pre></div>
+<p>We immediately note that some facilities have a lot of nurses, which
+prevent’s us seeing the detail of the main number of facilities. First
+lets identify the facilities with the most nurses.</p>
+<div class="sourceCode" id="cb12"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>data[data[<span class="st">&#39;num_nurses_fulltime&#39;</span>]<span class="op">&gt;</span><span class="dv">100</span>]</span></code></pre></div>
+<p>Here we are using the command
+<code>data['num_nurses_fulltime']&gt;100</code> to index the facilities
+in the pandas data frame which have over 100 nurses. To sort them in
+order we can also use the <code>sort</code> command. The result of this
+command on its own is a data <code>Series</code> of <code>True</code>
+and <code>False</code> values. However, when it is passed to the
+<code>data</code> data frame it returns a new data frame which contains
+only those values for which the data series is <code>True</code>. We can
+also sort the result. To sort the result by the values in the
+<code>num_nurses_fulltime</code> column in <em>descending</em> order we
+use the following command.</p>
+<div class="sourceCode" id="cb13"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>data[data[<span class="st">&#39;num_nurses_fulltime&#39;</span>]<span class="op">&gt;</span><span class="dv">100</span>].sort_values(by<span class="op">=</span><span class="st">&#39;num_nurses_fulltime&#39;</span>, ascending<span class="op">=</span><span class="va">False</span>)</span></code></pre></div>
+<p>We now see that the ‘University of Calabar Teaching Hospital’ is a
+large outlier with 513 nurses. We can try and determine how much of an
+outlier by histograming the data.</p>
+<h2 id="plotting-the-data">Plotting the Data</h2>
+<div class="sourceCode" id="cb14"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>data[<span class="st">&#39;num_nurses_fulltime&#39;</span>].hist(bins<span class="op">=</span><span class="dv">20</span>) <span class="co"># histogram the data with 20 bins.</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">&#39;Histogram of Number of Nurses&#39;</span>)</span></code></pre></div>
+<p>We can’t see very much here. Two things are happening. There are so
+many facilities with zero or one nurse that we don’t see the histogram
+for hospitals with many nurses. We can try more bins and using a
+<em>log</em> scale on the <span
+class="math inline">\(y\)</span>-axis.</p>
+<div class="sourceCode" id="cb15"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>data[<span class="st">&#39;num_nurses_fulltime&#39;</span>].hist(bins<span class="op">=</span><span class="dv">100</span>) <span class="co"># histogram the data with 20 bins.</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">&#39;Histogram of Number of Nurses&#39;</span>)</span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>ax <span class="op">=</span> plt.gca()</span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a>ax.set_yscale(<span class="st">&#39;log&#39;</span>)</span></code></pre></div>
+<p>Let’s try and see how the number of nurses relates to the number of
+doctors.</p>
+<div class="sourceCode" id="cb16"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>(<span class="dv">10</span>, <span class="dv">7</span>)) </span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>ax.plot(data[<span class="st">&#39;num_doctors_fulltime&#39;</span>], data[<span class="st">&#39;num_nurses_fulltime&#39;</span>], <span class="st">&#39;rx&#39;</span>)</span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>ax.set_xscale(<span class="st">&#39;log&#39;</span>) <span class="co"># use a logarithmic x scale</span></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>ax.set_yscale(<span class="st">&#39;log&#39;</span>) <span class="co"># use a logarithmic Y scale</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="co"># give the plot some titles and labels</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">&#39;Number of Nurses against Number of Doctors&#39;</span>)</span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">&#39;number of nurses&#39;</span>)</span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">&#39;number of doctors&#39;</span>)</span></code></pre></div>
+<p>Note a few things. We are interacting with our data. In particular,
+we are replotting the data according to what we have learned so far. We
+are using the progamming language as a <em>scripting</em> language to
+give the computer one command or another, and then the next command we
+enter is dependent on the result of the previous. This is a very
+different paradigm to classical software engineering. In classical
+software engineering we normally write many lines of code (entire object
+classes or functions) before compiling the code and running it. Our
+approach is more similar to the approach we take whilst debugging.
+Historically, researchers interacted with data using a <em>console</em>.
+A command line window which allowed command entry. The notebook format
+we are using is slightly different. Each of the code entry boxes acts
+like a separate console window. We can move up and down the notebook and
+run each part in a different order. The <em>state</em> of the program is
+always as we left it after running the previous part.</p>
+<h2 id="probability-and-the-nmis-data">Probability and the NMIS
+Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-probability.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-probability.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Let’s use the sum rule to compute the estimate the probability that a
+facility has more than two nurses.</p>
+<div class="sourceCode" id="cb17"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>large <span class="op">=</span> (data.num_nurses_fulltime<span class="op">&gt;</span><span class="dv">2</span>).<span class="bu">sum</span>()  <span class="co"># number of positive outcomes (in sum True counts as 1, False counts as 0)</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>total_facilities <span class="op">=</span> data.num_nurses_fulltime.count()</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>prob_large <span class="op">=</span> <span class="bu">float</span>(large)<span class="op">/</span><span class="bu">float</span>(total_facilities)</span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;Probability of number of nurses being greather than 2 is:&quot;</span>, prob_large)</span></code></pre></div>
 <h1 id="conditioning">Conditioning</h1>
-<p>When predicting whether a coin turns up head or tails, we might think that this event is <em>independent</em> of the year or time of day. If we include an observation such as time, then in a probability this is known as <em>condtioning</em>. We use this notation, <span class="math inline"><em>P</em>(<em>Y</em> = <em>y</em>|<em>X</em> = <em>x</em>)</span>, to condition the outcome on a second variable (in this case the number of doctors). Or, often, for a shorthand we use <span class="math inline"><em>P</em>(<em>y</em>|<em>x</em>)</span> to represent this distribution (the <span class="math inline"><em>Y</em>=</span> and <span class="math inline"><em>X</em>=</span> being implicit). If two variables are independent then we find that <br /><span class="math display"><em>P</em>(<em>y</em>|<em>x</em>) = <em>p</em>(<em>y</em>).</span><br /> However, we might believe that the number of nurses is dependent on the number of doctors. For this we can try estimating <span class="math inline"><em>P</em>(<em>Y</em> &gt; 2|<em>X</em> &gt; 1)</span> and compare the result, for example to <span class="math inline"><em>P</em>(<em>Y</em> &gt; 2|<em>X</em> ≤ 1)</span> using our empirical estimate of the probability.</p>
-<div class="sourceCode" id="cb17"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1"></a>large <span class="op">=</span> ((data.num_nurses_fulltime<span class="op">&gt;</span><span class="dv">2</span>) <span class="op">&amp;</span> (data.num_doctors_fulltime<span class="op">&gt;</span><span class="dv">1</span>)).<span class="bu">sum</span>()</span>
-<span id="cb17-2"><a href="#cb17-2"></a>total_large_doctors <span class="op">=</span> (data.num_doctors_fulltime<span class="op">&gt;</span><span class="dv">1</span>).<span class="bu">sum</span>()</span>
-<span id="cb17-3"><a href="#cb17-3"></a>prob_both_large <span class="op">=</span> large<span class="op">/</span>total_large_doctors</span>
-<span id="cb17-4"><a href="#cb17-4"></a><span class="bu">print</span>(<span class="st">&quot;Probability of number of nurses being greater than 2 given number of doctors is greater than 1 is:&quot;</span>, prob_both_large)</span></code></pre></div>
-<h3 id="exercise-2">Exercise 2</h3>
-<p>Write code that prints out the probability of nurses being greater than 2 for different numbers of doctors.</p>
-<p>Make sure the plot is included in <em>this</em> notebook file (the Jupyter magic command <code>%matplotlib inline</code> we ran above will do that for you, it only needs to be run once per file).</p>
+<p>When predicting whether a coin turns up head or tails, we might think
+that this event is <em>independent</em> of the year or time of day. If
+we include an observation such as time, then in a probability this is
+known as <em>condtioning</em>. We use this notation, <span
+class="math inline">\(P(Y=y|X=x)\)</span>, to condition the outcome on a
+second variable (in this case the number of doctors). Or, often, for a
+shorthand we use <span class="math inline">\(P(y|x)\)</span> to
+represent this distribution (the <span class="math inline">\(Y=\)</span>
+and <span class="math inline">\(X=\)</span> being implicit). If two
+variables are independent then we find that <span
+class="math display">\[
+P(y|x) = p(y).
+\]</span> However, we might believe that the number of nurses is
+dependent on the number of doctors. For this we can try estimating <span
+class="math inline">\(P(Y&gt;2 | X&gt;1)\)</span> and compare the
+result, for example to <span class="math inline">\(P(Y&gt;2|X\leq
+1)\)</span> using our empirical estimate of the probability.</p>
+<div class="sourceCode" id="cb18"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>large <span class="op">=</span> ((data.num_nurses_fulltime<span class="op">&gt;</span><span class="dv">2</span>) <span class="op">&amp;</span> (data.num_doctors_fulltime<span class="op">&gt;</span><span class="dv">1</span>)).<span class="bu">sum</span>()</span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>total_large_doctors <span class="op">=</span> (data.num_doctors_fulltime<span class="op">&gt;</span><span class="dv">1</span>).<span class="bu">sum</span>()</span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>prob_both_large <span class="op">=</span> large<span class="op">/</span>total_large_doctors</span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;Probability of number of nurses being greater than 2 given number of doctors is greater than 1 is:&quot;</span>, prob_both_large)</span></code></pre></div>
+<h3 id="exercise-1">Exercise 1</h3>
+<p>Write code that prints out the probability of nurses being greater
+than 2 for different numbers of doctors.</p>
+<p>Make sure the plot is included in <em>this</em> notebook file (the
+Jupyter magic command <code>%matplotlib inline</code> we ran above will
+do that for you, it only needs to be run once per file).</p>
 <table>
 <thead>
 <tr class="header">
@@ -142,17 +385,17 @@ <h3 id="exercise-2">Exercise 2</h3>
 <tbody>
 <tr class="odd">
 <td>joint</td>
-<td><span class="math inline"><em>P</em>(<em>X</em> = <em>x</em>, <em>Y</em> = <em>y</em>)</span></td>
+<td><span class="math inline">\(P(X=x, Y=y)\)</span></td>
 <td>prob. that X=x <em>and</em> Y=y</td>
 </tr>
 <tr class="even">
 <td>marginal</td>
-<td><span class="math inline"><em>P</em>(<em>X</em> = <em>x</em>)</span></td>
+<td><span class="math inline">\(P(X=x)\)</span></td>
 <td>prob. that X=x <em>regardless of</em> Y</td>
 </tr>
 <tr class="odd">
 <td>conditional</td>
-<td><span class="math inline"><em>P</em>(<em>X</em> = <em>x</em>|<em>Y</em> = <em>y</em>)</span></td>
+<td><span class="math inline">\(P(X=x\vert Y=y)\)</span></td>
 <td>prob. that X=x <em>given that</em> Y=y</td>
 </tr>
 </tbody>
@@ -160,22 +403,33 @@ <h3 id="exercise-2">Exercise 2</h3>
 <center>
 The different basic probability distributions.
 </center>
-<h2 id="a-pictorial-definition-of-probability">A Pictorial Definition of Probability</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-review.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-review.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<h2 id="a-pictorial-definition-of-probability">A Pictorial Definition of
+Probability</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="prob-diagram-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/mlai/prob_diagram.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//mlai/prob_diagram.svg" width="60%" style=" ">
 </object>
 </div>
-<div id="prob-diagram-magnify" class="magnify" onclick="magnifyFigure(&#39;prob-diagram&#39;)">
+<div id="prob-diagram-magnify" class="magnify"
+onclick="magnifyFigure(&#39;prob-diagram&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="prob-diagram-caption" class="caption-frame">
-<p>Figure: Diagram representing the different probabilities, joint, marginal and conditional. This diagram was inspired by lectures given by Christopher Bishop.</p>
+<p>Figure: Diagram representing the different probabilities, joint,
+marginal and conditional. This diagram was inspired by lectures given by
+Christopher Bishop.</p>
 </div>
 </div>
-<p><span style="text-align:right">Inspired by lectures from Christopher Bishop</span></p>
-<h2 id="definition-of-probability-distributions">Definition of probability distributions</h2>
+<div style="text-align:right">
+Inspired by lectures from Christopher Bishop
+</div>
+<h2 id="definition-of-probability-distributions">Definition of
+probability distributions</h2>
 <table>
 <colgroup>
 <col style="width: 20%" />
@@ -192,216 +446,537 @@ <h2 id="definition-of-probability-distributions">Definition of probability distr
 <tbody>
 <tr class="odd">
 <td>Joint Probability</td>
-<td><span class="math inline">$\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{N}$</span></td>
-<td><span class="math inline"><em>P</em>(<em>X</em>=3,<em>Y</em>=4)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{N}\)</span></td>
+<td><span class="math inline">\(P\left(X=3,Y=4\right)\)</span></td>
 </tr>
 <tr class="even">
 <td>Marginal Probability</td>
-<td><span class="math inline">$\lim_{N\rightarrow\infty}\frac{n_{X=5}}{N}$</span></td>
-<td><span class="math inline"><em>P</em>(<em>X</em>=5)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=5}}{N}\)</span></td>
+<td><span class="math inline">\(P\left(X=5\right)\)</span></td>
 </tr>
 <tr class="odd">
 <td>Conditional Probability</td>
-<td><span class="math inline">$\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{n_{Y=4}}$</span></td>
-<td><span class="math inline"><em>P</em>(<em>X</em>=3|<em>Y</em>=4)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{n_{Y=4}}\)</span></td>
+<td><span class="math inline">\(P\left(X=3\vert Y=4\right)\)</span></td>
 </tr>
 </tbody>
 </table>
 <h2 id="notational-details">Notational Details</h2>
-<p>Typically we should write out <span class="math inline"><em>P</em>(<em>X</em>=<em>x</em>,<em>Y</em>=<em>y</em>)</span>, but in practice we often shorten this to <span class="math inline"><em>P</em>(<em>x</em>,<em>y</em>)</span>. This looks very much like we might write a multivariate function, <em>e.g.</em> <br /><span class="math display">$$
+<p>Typically we should write out <span
+class="math inline">\(P\left(X=x,Y=y\right)\)</span>, but in practice we
+often shorten this to <span
+class="math inline">\(P\left(x,y\right)\)</span>. This looks very much
+like we might write a multivariate function, <em>e.g.</em> <span
+class="math display">\[
 f\left(x,y\right)=\frac{x}{y},
-$$</span><br /> but for a multivariate function <br /><span class="math display"><em>f</em>(<em>x</em>,<em>y</em>) ≠ <em>f</em>(<em>y</em>,<em>x</em>).</span><br /> However, <br /><span class="math display"><em>P</em>(<em>x</em>,<em>y</em>) = <em>P</em>(<em>y</em>,<em>x</em>)</span><br /> because <br /><span class="math display"><em>P</em>(<em>X</em>=<em>x</em>,<em>Y</em>=<em>y</em>) = <em>P</em>(<em>Y</em>=<em>y</em>,<em>X</em>=<em>x</em>).</span><br /> Sometimes I think of this as akin to the way in Python we can write ‘keyword arguments’ in functions. If we use keyword arguments, the ordering of arguments doesn’t matter.</p>
-<p>We’ve now introduced conditioning and independence to the notion of probability and computed some conditional probabilities on a practical example The scatter plot of deaths vs year that we created above can be seen as a <em>joint</em> probability distribution. We represent a joint probability using the notation <span class="math inline"><em>P</em>(<em>Y</em> = <em>y</em>, <em>X</em> = <em>x</em>)</span> or <span class="math inline"><em>P</em>(<em>y</em>, <em>x</em>)</span> for short. Computing a joint probability is equivalent to answering the simultaneous questions, what’s the probability that the number of nurses was over 2 and the number of doctors was 1? Or any other question that may occur to us. Again we can easily use pandas to ask such questions.</p>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1"></a>num_doctors <span class="op">=</span> <span class="dv">1</span></span>
-<span id="cb18-2"><a href="#cb18-2"></a>large <span class="op">=</span> (data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span><span class="dv">2</span>).<span class="bu">sum</span>()</span>
-<span id="cb18-3"><a href="#cb18-3"></a>total_facilities <span class="op">=</span> data.num_nurses_fulltime.count() <span class="co"># this is total number of films</span></span>
-<span id="cb18-4"><a href="#cb18-4"></a>prob_large <span class="op">=</span> <span class="bu">float</span>(large)<span class="op">/</span><span class="bu">float</span>(total_facilities)</span>
-<span id="cb18-5"><a href="#cb18-5"></a><span class="bu">print</span>(<span class="st">&quot;Probability of nurses being greater than 2 and number of doctors being&quot;</span>, num_doctors, <span class="st">&quot;is:&quot;</span>, prob_large)</span></code></pre></div>
+\]</span> but for a multivariate function <span class="math display">\[
+f\left(x,y\right)\neq f\left(y,x\right).
+\]</span> However, <span class="math display">\[
+P\left(x,y\right)=P\left(y,x\right)
+\]</span> because <span class="math display">\[
+P\left(X=x,Y=y\right)=P\left(Y=y,X=x\right).
+\]</span> Sometimes I think of this as akin to the way in Python we can
+write ‘keyword arguments’ in functions. If we use keyword arguments, the
+ordering of arguments doesn’t matter.</p>
+<p>We’ve now introduced conditioning and independence to the notion of
+probability and computed some conditional probabilities on a practical
+example The scatter plot of deaths vs year that we created above can be
+seen as a <em>joint</em> probability distribution. We represent a joint
+probability using the notation <span class="math inline">\(P(Y=y,
+X=x)\)</span> or <span class="math inline">\(P(y, x)\)</span> for short.
+Computing a joint probability is equivalent to answering the
+simultaneous questions, what’s the probability that the number of nurses
+was over 2 and the number of doctors was 1? Or any other question that
+may occur to us. Again we can easily use pandas to ask such
+questions.</p>
+<div class="sourceCode" id="cb19"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>num_doctors <span class="op">=</span> <span class="dv">1</span></span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>large <span class="op">=</span> (data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span><span class="dv">2</span>).<span class="bu">sum</span>()</span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>total_facilities <span class="op">=</span> data.num_nurses_fulltime.count() <span class="co"># this is total number of films</span></span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>prob_large <span class="op">=</span> <span class="bu">float</span>(large)<span class="op">/</span><span class="bu">float</span>(total_facilities)</span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;Probability of nurses being greater than 2 and number of doctors being&quot;</span>, num_doctors, <span class="st">&quot;is:&quot;</span>, prob_large)</span></code></pre></div>
 <h2 id="the-product-rule">The Product Rule</h2>
-<p>This number is the joint probability, <span class="math inline"><em>P</em>(<em>Y</em>, <em>X</em>)</span> which is much <em>smaller</em> than the conditional probability. The number can never be bigger than the conditional probabililty because it is computed using the <em>product rule</em>. <br /><span class="math display"><em>p</em>(<em>Y</em> = <em>y</em>, <em>X</em> = <em>x</em>) = <em>p</em>(<em>Y</em> = <em>y</em>|<em>X</em> = <em>x</em>)<em>p</em>(<em>X</em> = <em>x</em>)</span><br /> and <br /><span class="math display"><em>p</em>(<em>X</em> = <em>x</em>)</span><br /> is a probability distribution, which is equal or less than 1, ensuring the joint distribution is typically smaller than the conditional distribution.</p>
-<p>The product rule is a <em>fundamental</em> rule of probability, and you must remember it! It gives the relationship between the two questions: 1) What’s the probability that a facility has over two nurses <em>and</em> one doctor? and 2) What’s the probability that a facility has over two nurses <em>given that</em> it has one doctor?</p>
-<p>In our shorter notation we can write the product rule as <br /><span class="math display"><em>p</em>(<em>y</em>, <em>x</em>) = <em>p</em>(<em>y</em>|<em>x</em>)<em>p</em>(<em>x</em>)</span><br /> We can see the relation working in practice for our data above by computing the different values for <span class="math inline"><em>x</em> = 1</span>.</p>
-<div class="sourceCode" id="cb19"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1"></a>num_doctors<span class="op">=</span><span class="dv">1</span></span>
-<span id="cb19-2"><a href="#cb19-2"></a>num_nurses<span class="op">=</span><span class="dv">2</span></span>
-<span id="cb19-3"><a href="#cb19-3"></a>p_x <span class="op">=</span> <span class="bu">float</span>((data.num_doctors_fulltime<span class="op">==</span>num_doctors).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>(data.num_nurses_fulltime.count())</span>
-<span id="cb19-4"><a href="#cb19-4"></a>p_y_given_x <span class="op">=</span> <span class="bu">float</span>((data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span>num_nurses).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>((data.num_doctors_fulltime<span class="op">==</span>num_doctors).<span class="bu">sum</span>())</span>
-<span id="cb19-5"><a href="#cb19-5"></a>p_y_and_x <span class="op">=</span> <span class="bu">float</span>((data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span>num_nurses).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>(data.num_nurses_fulltime.count())</span>
-<span id="cb19-6"><a href="#cb19-6"></a></span>
-<span id="cb19-7"><a href="#cb19-7"></a><span class="bu">print</span>(<span class="st">&quot;P(x) is&quot;</span>, p_x)</span>
-<span id="cb19-8"><a href="#cb19-8"></a><span class="bu">print</span>(<span class="st">&quot;P(y|x) is&quot;</span>, p_y_given_x)</span>
-<span id="cb19-9"><a href="#cb19-9"></a><span class="bu">print</span>(<span class="st">&quot;P(y,x) is&quot;</span>, p_y_and_x)</span></code></pre></div>
+<p>This number is the joint probability, <span
+class="math inline">\(P(Y, X)\)</span> which is much <em>smaller</em>
+than the conditional probability. The number can never be bigger than
+the conditional probabililty because it is computed using the
+<em>product rule</em>. <span class="math display">\[
+p(Y=y, X=x) = p(Y=y|X=x)p(X=x)
+\]</span> and <span class="math display">\[p(X=x)\]</span> is a
+probability distribution, which is equal or less than 1, ensuring the
+joint distribution is typically smaller than the conditional
+distribution.</p>
+<p>The product rule is a <em>fundamental</em> rule of probability, and
+you must remember it! It gives the relationship between the two
+questions: 1) What’s the probability that a facility has over two nurses
+<em>and</em> one doctor? and 2) What’s the probability that a facility
+has over two nurses <em>given that</em> it has one doctor?</p>
+<p>In our shorter notation we can write the product rule as <span
+class="math display">\[
+p(y, x) = p(y|x)p(x)
+\]</span> We can see the relation working in practice for our data above
+by computing the different values for <span
+class="math inline">\(x=1\)</span>.</p>
+<div class="sourceCode" id="cb20"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>num_doctors<span class="op">=</span><span class="dv">1</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>num_nurses<span class="op">=</span><span class="dv">2</span></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>p_x <span class="op">=</span> <span class="bu">float</span>((data.num_doctors_fulltime<span class="op">==</span>num_doctors).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>(data.num_doctors_fulltime.count())</span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>p_y_given_x <span class="op">=</span> <span class="bu">float</span>((data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span>num_nurses).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>((data.num_doctors_fulltime<span class="op">==</span>num_doctors).<span class="bu">sum</span>())</span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>p_y_and_x <span class="op">=</span> <span class="bu">float</span>((data.num_nurses_fulltime[data.num_doctors_fulltime<span class="op">==</span>num_doctors]<span class="op">&gt;</span>num_nurses).<span class="bu">sum</span>())<span class="op">/</span><span class="bu">float</span>(data.num_nurses_fulltime.count())</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;P(x) is&quot;</span>, p_x)</span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;P(y|x) is&quot;</span>, p_y_given_x)</span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;P(y,x) is&quot;</span>, p_y_and_x)</span></code></pre></div>
 <h2 id="the-sum-rule">The Sum Rule</h2>
-<p>The other <em>fundamental rule</em> of probability is the <em>sum rule</em> this tells us how to get a <em>marginal</em> distribution from the joint distribution. Simply put it says that we need to sum across the value we’d like to remove. <br /><span class="math display"><em>P</em>(<em>Y</em> = <em>y</em>) = ∑<sub><em>x</em></sub><em>P</em>(<em>Y</em> = <em>y</em>, <em>X</em> = <em>x</em>)</span><br /> Or in our shortened notation <br /><span class="math display"><em>P</em>(<em>y</em>) = ∑<sub><em>x</em></sub><em>P</em>(<em>y</em>, <em>x</em>)</span><br /></p>
-<h3 id="exercise-3">Exercise 3</h3>
-<p>Write code that computes <span class="math inline"><em>P</em>(<em>y</em>)</span> by adding <span class="math inline"><em>P</em>(<em>y</em>, <em>x</em>)</span> for all values of <span class="math inline"><em>x</em></span>.</p>
+<p>The other <em>fundamental rule</em> of probability is the <em>sum
+rule</em> this tells us how to get a <em>marginal</em> distribution from
+the joint distribution. Simply put it says that we need to sum across
+the value we’d like to remove. <span class="math display">\[
+P(Y=y) = \sum_{x} P(Y=y, X=x)
+\]</span> Or in our shortened notation <span class="math display">\[
+P(y) = \sum_{x} P(y, x)
+\]</span></p>
+<h3 id="exercise-2">Exercise 2</h3>
+<p>Write code that computes <span class="math inline">\(P(y)\)</span> by
+adding <span class="math inline">\(P(y, x)\)</span> for all values of
+<span class="math inline">\(x\)</span>.</p>
 <h2 id="bayes-rule">Bayes’ Rule</h2>
-<p>Bayes’ rule is a very simple rule, it’s hardly worth the name of a rule at all. It follows directly from the product rule of probability. Because <span class="math inline"><em>P</em>(<em>y</em>, <em>x</em>) = <em>P</em>(<em>y</em>|<em>x</em>)<em>P</em>(<em>x</em>)</span> and by symmetry <span class="math inline"><em>P</em>(<em>y</em>, <em>x</em>) = <em>P</em>(<em>x</em>, <em>y</em>) = <em>P</em>(<em>x</em>|<em>y</em>)<em>P</em>(<em>y</em>)</span> then by equating these two equations and dividing through by <span class="math inline"><em>P</em>(<em>y</em>)</span> we have <br /><span class="math display">$$
+<p>Bayes’ rule is a very simple rule, it’s hardly worth the name of a
+rule at all. It follows directly from the product rule of probability.
+Because <span class="math inline">\(P(y, x) = P(y|x)P(x)\)</span> and by
+symmetry <span class="math inline">\(P(y,x)=P(x,y)=P(x|y)P(y)\)</span>
+then by equating these two equations and dividing through by <span
+class="math inline">\(P(y)\)</span> we have <span
+class="math display">\[
 P(x|y) =
 \frac{P(y|x)P(x)}{P(y)}
-$$</span><br /> which is known as Bayes’ rule (or Bayes’s rule, it depends how you choose to pronounce it). It’s not difficult to derive, and its importance is more to do with the semantic operation that it enables. Each of these probability distributions represents the answer to a question we have about the world. Bayes rule (via the product rule) tells us how to <em>invert</em> the probability.</p>
+\]</span> which is known as Bayes’ rule (or Bayes’s rule, it depends how
+you choose to pronounce it). It’s not difficult to derive, and its
+importance is more to do with the semantic operation that it enables.
+Each of these probability distributions represents the answer to a
+question we have about the world. Bayes rule (via the product rule)
+tells us how to <em>invert</em> the probability.</p>
 <h2 id="further-reading">Further Reading</h2>
 <ul>
-<li>Probability distributions: page 12–17 (Section 1.2) of <span class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
+<li>Probability distributions: page 12–17 (Section 1.2) of <span
+class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
 </ul>
 <h2 id="exercises">Exercises</h2>
 <ul>
-<li>Exercise 1.3 of <span class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
+<li>Exercise 1.3 of <span class="citation"
+data-cites="Bishop:book06">Bishop (2006)</span></li>
 </ul>
-<h2 id="probabilities-for-extracting-information-from-data">Probabilities for Extracting Information from Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-review.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probability-review.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>What use is all this probability in data science? Let’s think about how we might use the probabilities to do some decision making. Let’s look at the information data.</p>
-<div class="sourceCode" id="cb20"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1"></a>data.columns</span></code></pre></div>
-<h3 id="exercise-1-1">Exercise 1</h3>
-<p>Now we see we have several additional features. Let’s assume we want to predict <code>maternal_health_delivery_services</code>. How would we go about doing it?</p>
-<p>Using what you’ve learnt about joint, conditional and marginal probabilities, as well as the sum and product rule, how would you formulate the question you want to answer in terms of probabilities? Should you be using a joint or a conditional distribution? If it’s conditional, what should the distribution be over, and what should it be conditioned on?</p>
+<h2
+id="probabilities-for-extracting-information-from-data">Probabilities
+for Extracting Information from Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>What use is all this probability in data science? Let’s think about
+how we might use the probabilities to do some decision making. Let’s
+look at the information data.</p>
+<div class="sourceCode" id="cb21"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>data.columns</span></code></pre></div>
+<h3 id="exercise-3">Exercise 3</h3>
+<p>Now we see we have several additional features. Let’s assume we want
+to predict <code>maternal_health_delivery_services</code>. How would we
+go about doing it?</p>
+<p>Using what you’ve learnt about joint, conditional and marginal
+probabilities, as well as the sum and product rule, how would you
+formulate the question you want to answer in terms of probabilities?
+Should you be using a joint or a conditional distribution? If it’s
+conditional, what should the distribution be over, and what should it be
+conditioned on?</p>
 <h2 id="probabilistic-modelling">Probabilistic Modelling</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probabilistic-modelling.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/probabilistic-modelling.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>This Bayesian approach is designed to deal with uncertainty arising from fitting our prediction function to the data we have, a reduced data set.</p>
-<p>The Bayesian approach can be derived from a broader understanding of what our objective is. If we accept that we can jointly represent all things that happen in the world with a probability distribution, then we can interogate that probability to make predictions. So, if we are interested in predictions, <span class="math inline">$\dataScalar_*$</span> at future points input locations of interest, <span class="math inline">$\inputVector_*$</span> given previously training data, <span class="math inline">$\dataVector$</span> and corresponding inputs, <span class="math inline">$\inputMatrix$</span>, then we are really interogating the following probability density, <br /><span class="math display">$$
-p(\dataScalar_*|\dataVector, \inputMatrix, \inputVector_*),
-$$</span><br /> there is nothing controversial here, as long as you accept that you have a good joint model of the world around you that relates test data to training data, <span class="math inline">$p(\dataScalar_*, \dataVector, \inputMatrix, \inputVector_*)$</span> then this conditional distribution can be recovered through standard rules of probability (<span class="math inline">data + model → prediction</span>).</p>
-<p>We can construct this joint density through the use of the following decomposition: <br /><span class="math display">$$
-p(\dataScalar_*|\dataVector, \inputMatrix, \inputVector_*) = \int p(\dataScalar_*|\inputVector_*, \mappingMatrix) p(\mappingMatrix | \dataVector, \inputMatrix) \text{d} \mappingMatrix
-$$</span><br /></p>
-<p>where, for convenience, we are assuming <em>all</em> the parameters of the model are now represented by <span class="math inline">$\parameterVector$</span> (which contains <span class="math inline">$\mappingMatrix$</span> and <span class="math inline">$\mappingMatrixTwo$</span>) and <span class="math inline">$p(\parameterVector | \dataVector, \inputMatrix)$</span> is recognised as the posterior density of the parameters given data and <span class="math inline">$p(\dataScalar_*|\inputVector_*, \parameterVector)$</span> is the <em>likelihood</em> of an individual test data point given the parameters.</p>
-<p>The likelihood of the data is normally assumed to be independent across the parameters, <br /><span class="math display">$$
-p(\dataVector|\inputMatrix, \mappingMatrix) = \prod_{i=1}^\numData p(\dataScalar_i|\inputVector_i, \mappingMatrix),$$</span><br /></p>
-<p>and if that is so, it is easy to extend our predictions across all future, potential, locations, <br /><span class="math display">$$
-p(\dataVector_*|\dataVector, \inputMatrix, \inputMatrix_*) = \int p(\dataVector_*|\inputMatrix_*, \parameterVector) p(\parameterVector | \dataVector, \inputMatrix) \text{d} \parameterVector.
-$$</span><br /></p>
-<p>The likelihood is also where the <em>prediction function</em> is incorporated. For example in the regression case, we consider an objective based around the Gaussian density, <br /><span class="math display">$$
-p(\dataScalar_i | \mappingFunction(\inputVector_i)) = \frac{1}{\sqrt{2\pi \dataStd^2}} \exp\left(-\frac{\left(\dataScalar_i - \mappingFunction(\inputVector_i)\right)^2}{2\dataStd^2}\right)
-$$</span><br /></p>
-<p>In short, that is the classical approach to probabilistic inference, and all approaches to Bayesian neural networks fall within this path. For a deep probabilistic model, we can simply take this one stage further and place a probability distribution over the input locations, <br /><span class="math display">$$
-p(\dataVector_*|\dataVector) = \int p(\dataVector_*|\inputMatrix_*, \parameterVector) p(\parameterVector | \dataVector, \inputMatrix) p(\inputMatrix) p(\inputMatrix_*) \text{d} \parameterVector \text{d} \inputMatrix \text{d}\inputMatrix_*
-$$</span><br /> and we have <em>unsupervised learning</em> (from where we can get deep generative models).</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/probabilistic-modelling.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probabilistic-modelling.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>This Bayesian approach is designed to deal with uncertainty arising
+from fitting our prediction function to the data we have, a reduced data
+set.</p>
+<p>The Bayesian approach can be derived from a broader understanding of
+what our objective is. If we accept that we can jointly represent all
+things that happen in the world with a probability distribution, then we
+can interogate that probability to make predictions. So, if we are
+interested in predictions, <span class="math inline">\(y_*\)</span> at
+future points input locations of interest, <span
+class="math inline">\(\mathbf{ x}_*\)</span> given previously training
+data, <span class="math inline">\(\mathbf{ y}\)</span> and corresponding
+inputs, <span class="math inline">\(\mathbf{X}\)</span>, then we are
+really interogating the following probability density, <span
+class="math display">\[
+p(y_*|\mathbf{ y}, \mathbf{X}, \mathbf{ x}_*),
+\]</span> there is nothing controversial here, as long as you accept
+that you have a good joint model of the world around you that relates
+test data to training data, <span class="math inline">\(p(y_*, \mathbf{
+y}, \mathbf{X}, \mathbf{ x}_*)\)</span> then this conditional
+distribution can be recovered through standard rules of probability
+(<span class="math inline">\(\text{data} + \text{model} \rightarrow
+\text{prediction}\)</span>).</p>
+<p>We can construct this joint density through the use of the following
+decomposition: <span class="math display">\[
+p(y_*|\mathbf{ y}, \mathbf{X}, \mathbf{ x}_*) = \int p(y_*|\mathbf{
+x}_*, \mathbf{W}) p(\mathbf{W}| \mathbf{ y}, \mathbf{X}) \text{d}
+\mathbf{W}
+\]</span></p>
+<p>where, for convenience, we are assuming <em>all</em> the parameters
+of the model are now represented by <span
+class="math inline">\(\boldsymbol{ \theta}\)</span> (which contains
+<span class="math inline">\(\mathbf{W}\)</span> and <span
+class="math inline">\(\mathbf{V}\)</span>) and <span
+class="math inline">\(p(\boldsymbol{ \theta}| \mathbf{ y},
+\mathbf{X})\)</span> is recognised as the posterior density of the
+parameters given data and <span class="math inline">\(p(y_*|\mathbf{
+x}_*, \boldsymbol{ \theta})\)</span> is the <em>likelihood</em> of an
+individual test data point given the parameters.</p>
+<p>The likelihood of the data is normally assumed to be independent
+across the parameters, <span class="math display">\[
+p(\mathbf{ y}|\mathbf{X}, \mathbf{W}) = \prod_{i=1}^np(y_i|\mathbf{
+x}_i, \mathbf{W}),\]</span></p>
+<p>and if that is so, it is easy to extend our predictions across all
+future, potential, locations, <span class="math display">\[
+p(\mathbf{ y}_*|\mathbf{ y}, \mathbf{X}, \mathbf{X}_*) = \int p(\mathbf{
+y}_*|\mathbf{X}_*, \boldsymbol{ \theta}) p(\boldsymbol{ \theta}|
+\mathbf{ y}, \mathbf{X}) \text{d} \boldsymbol{ \theta}.
+\]</span></p>
+<p>The likelihood is also where the <em>prediction function</em> is
+incorporated. For example in the regression case, we consider an
+objective based around the Gaussian density, <span
+class="math display">\[
+p(y_i | f(\mathbf{ x}_i)) = \frac{1}{\sqrt{2\pi \sigma^2}}
+\exp\left(-\frac{\left(y_i - f(\mathbf{
+x}_i)\right)^2}{2\sigma^2}\right)
+\]</span></p>
+<p>In short, that is the classical approach to probabilistic inference,
+and all approaches to Bayesian neural networks fall within this path.
+For a deep probabilistic model, we can simply take this one stage
+further and place a probability distribution over the input locations,
+<span class="math display">\[
+p(\mathbf{ y}_*|\mathbf{ y}) = \int p(\mathbf{ y}_*|\mathbf{X}_*,
+\boldsymbol{ \theta}) p(\boldsymbol{ \theta}| \mathbf{ y}, \mathbf{X})
+p(\mathbf{X}) p(\mathbf{X}_*) \text{d} \boldsymbol{ \theta}\text{d}
+\mathbf{X}\text{d}\mathbf{X}_*
+\]</span> and we have <em>unsupervised learning</em> (from where we can
+get deep generative models).</p>
 <h2 id="graphical-models">Graphical Models</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/graphical-models.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/graphical-models.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>One way of representing a joint distribution is to consider conditional dependencies between data. Conditional dependencies allow us to factorize the distribution. For example, a Markov chain is a factorization of a distribution into components that represent the conditional relationships between points that are neighboring, often in time or space. It can be decomposed in the following form. <br /><span class="math display">$$p(\dataVector) = p(\dataScalar_\numData | \dataScalar_{\numData-1}) p(\dataScalar_{\numData-1}|\dataScalar_{\numData-2}) \dots p(\dataScalar_{2} | \dataScalar_{1})$$</span><br /></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/graphical-models.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/graphical-models.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>One way of representing a joint distribution is to consider
+conditional dependencies between data. Conditional dependencies allow us
+to factorize the distribution. For example, a Markov chain is a
+factorization of a distribution into components that represent the
+conditional relationships between points that are neighboring, often in
+time or space. It can be decomposed in the following form. <span
+class="math display">\[p(\mathbf{ y}) = p(y_n| y_{n-1})
+p(y_{n-1}|y_{n-2}) \dots p(y_{2} | y_{1})\]</span></p>
 <div class="figure">
 <div id="markov-chain-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/markov.svg" width="50%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/markov.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="markov-chain-magnify" class="magnify" onclick="magnifyFigure(&#39;markov-chain&#39;)">
+<div id="markov-chain-magnify" class="magnify"
+onclick="magnifyFigure(&#39;markov-chain&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="markov-chain-caption" class="caption-frame">
-<p>Figure: A Markov chain is a simple form of probabilistic graphical model providing a particular decomposition of the joint density.</p>
+<p>Figure: A Markov chain is a simple form of probabilistic graphical
+model providing a particular decomposition of the joint density.</p>
 </div>
 </div>
-<p>By specifying conditional independencies we can reduce the parameterization required for our data, instead of directly specifying the parameters of the joint distribution, we can specify each set of parameters of the conditonal independently. This can also give an advantage in terms of interpretability. Understanding a conditional independence structure gives a structured understanding of data. If developed correctly, according to causal methodology, it can even inform how we should intervene in the system to drive a desired result <span class="citation" data-cites="Pearl:causality95">(Pearl 1995)</span>.</p>
-<p>However, a challenge arises when the data becomes more complex. Consider the graphical model shown below, used to predict the perioperative risk of <em>C Difficile</em> infection following colon surgery <span class="citation" data-cites="Steele:predictive12">(Steele et al. 2012)</span>.</p>
+<p>By specifying conditional independencies we can reduce the
+parameterization required for our data, instead of directly specifying
+the parameters of the joint distribution, we can specify each set of
+parameters of the conditonal independently. This can also give an
+advantage in terms of interpretability. Understanding a conditional
+independence structure gives a structured understanding of data. If
+developed correctly, according to causal methodology, it can even inform
+how we should intervene in the system to drive a desired result <span
+class="citation" data-cites="Pearl:causality95">(Pearl,
+1995)</span>.</p>
+<p>However, a challenge arises when the data becomes more complex.
+Consider the graphical model shown below, used to predict the
+perioperative risk of <em>C Difficile</em> infection following colon
+surgery <span class="citation" data-cites="Steele:predictive12">(Steele
+et al., 2012)</span>.</p>
 <div class="figure">
 <div id="c-difficile-bayes-net-diagnosis-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/bayes-net-diagnosis.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//bayes-net-diagnosis.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="c-difficile-bayes-net-diagnosis-magnify" class="magnify" onclick="magnifyFigure(&#39;c-difficile-bayes-net-diagnosis&#39;)">
+<div id="c-difficile-bayes-net-diagnosis-magnify" class="magnify"
+onclick="magnifyFigure(&#39;c-difficile-bayes-net-diagnosis&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="c-difficile-bayes-net-diagnosis-caption" class="caption-frame">
-<p>Figure: A probabilistic directed graph used to predict the perioperative risk of <em>C Difficile</em> infection following colon surgery. When these models have good predictive performance they are often difficult to interpret. This may be due to the limited representation capability of the conditional densities in the model.</p>
-</div>
-</div>
-<p>To capture the complexity in the interelationship between the data, the graph itself becomes more complex, and less interpretable.</p>
-<h2 id="introduction-to-classification">Introduction to Classification</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Classification is perhaps the technique most closely assocated with machine learning. In the speech based agents, on-device classifiers are used to determine when the wake word is used. A wake word is a word that wakes up the device. For the Amazon Echo it is “Alexa”, for Siri it is “Hey Siri”. Once the wake word detected with a classifier, the speech can be uploaded to the cloud for full processing, the speech recognition stages.</p>
-<p>This isn’t just useful for intelligent agents, the UN global pulse project on public discussion on radio also uses <a href="https://radio.unglobalpulse.net/uganda/">wake word detection for recording radio conversations</a>.</p>
-<p>A major breakthrough in image classification came in 2012 with the ImageNet result of <a href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex Krizhevsky, Ilya Sutskever and Geoff Hinton</a> from the University of Toronto. ImageNet is a large data base of 14 million images with many thousands of classes. The data is used in a community-wide challenge for object categorization. Krizhevsky et al used convolutional neural networks to outperform all previous approaches on the challenge. They formed a company which was purchased shortly after by Google. This challenge, known as object categorisation, was a major obstacle for practical computer vision systems. Modern object categorization systems are close to human performance.</p>
-<p>Machine learning problems normally involve a prediction function and an objective function. Regression is the case where the prediction function iss over the real numbers, so the codomain of the functions, <span class="math inline">$\mappingFunction(\inputMatrix)$</span> was the real numbers or sometimes real vectors. The classification problem consists of predicting whether or not a particular example is a member of a particular class. So we may want to know if a particular image represents a digit 6 or if a particular user will click on a given advert. These are classification problems, and they require us to map to <em>yes</em> or <em>no</em> answers. That makes them naturally discrete mappings.</p>
-<p>In classification we are given an input vector, <span class="math inline">$\inputVector$</span>, and an associated label, <span class="math inline">$\dataScalar$</span> which either takes the value <span class="math inline"> − 1</span> to represent <em>no</em> or <span class="math inline">1</span> to represent <em>yes</em>.</p>
-<p>In supervised learning the inputs, <span class="math inline">$\inputVector$</span>, are mapped to a label, <span class="math inline">$\dataScalar$</span>, through a function <span class="math inline">$\mappingFunction(\cdot)$</span> that is dependent on a set of parameters, <span class="math inline">$\weightVector$</span>, <br /><span class="math display">$$
-\dataScalar = \mappingFunction(\inputVector; \weightVector).
-$$</span><br /> The function <span class="math inline">$\mappingFunction(\cdot)$</span> is known as the <em>prediction function</em>. The key challenges are (1) choosing which features, <span class="math inline">$\inputVector$</span>, are relevant in the prediction, (2) defining the appropriate <em>class of function</em>, <span class="math inline">$\mappingFunction(\cdot)$</span>, to use and (3) selecting the right parameters, <span class="math inline">$\weightVector$</span>.</p>
+<p>Figure: A probabilistic directed graph used to predict the
+perioperative risk of <em>C Difficile</em> infection following colon
+surgery. When these models have good predictive performance they are
+often difficult to interpret. This may be due to the limited
+representation capability of the conditional densities in the model.</p>
+</div>
+</div>
+<p>To capture the complexity in the interelationship between the data,
+the graph itself becomes more complex, and less interpretable.</p>
+<h2 id="introduction-to-classification">Introduction to
+Classification</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Classification is perhaps the technique most closely assocated with
+machine learning. In the speech based agents, on-device classifiers are
+used to determine when the wake word is used. A wake word is a word that
+wakes up the device. For the Amazon Echo it is “Alexa”, for Siri it is
+“Hey Siri”. Once the wake word detected with a classifier, the speech
+can be uploaded to the cloud for full processing, the speech recognition
+stages.</p>
+<p>This isn’t just useful for intelligent agents, the UN global pulse
+project on public discussion on radio also uses <a
+href="https://radio.unglobalpulse.net/uganda/">wake word detection for
+recording radio conversations</a>.</p>
+<p>A major breakthrough in image classification came in 2012 with the
+ImageNet result of <a
+href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex
+Krizhevsky, Ilya Sutskever and Geoff Hinton</a> from the University of
+Toronto. ImageNet is a large data base of 14 million images with many
+thousands of classes. The data is used in a community-wide challenge for
+object categorization. Krizhevsky et al used convolutional neural
+networks to outperform all previous approaches on the challenge. They
+formed a company which was purchased shortly after by Google. This
+challenge, known as object categorisation, was a major obstacle for
+practical computer vision systems. Modern object categorization systems
+are close to human performance.</p>
+<p>Machine learning problems normally involve a prediction function and
+an objective function. Regression is the case where the prediction
+function iss over the real numbers, so the codomain of the functions,
+<span class="math inline">\(f(\mathbf{X})\)</span> was the real numbers
+or sometimes real vectors. The classification problem consists of
+predicting whether or not a particular example is a member of a
+particular class. So we may want to know if a particular image
+represents a digit 6 or if a particular user will click on a given
+advert. These are classification problems, and they require us to map to
+<em>yes</em> or <em>no</em> answers. That makes them naturally discrete
+mappings.</p>
+<p>In classification we are given an input vector, <span
+class="math inline">\(\mathbf{ x}\)</span>, and an associated label,
+<span class="math inline">\(y\)</span> which either takes the value
+<span class="math inline">\(-1\)</span> to represent <em>no</em> or
+<span class="math inline">\(1\)</span> to represent <em>yes</em>.</p>
+<p>In supervised learning the inputs, <span
+class="math inline">\(\mathbf{ x}\)</span>, are mapped to a label, <span
+class="math inline">\(y\)</span>, through a function <span
+class="math inline">\(f(\cdot)\)</span> that is dependent on a set of
+parameters, <span class="math inline">\(\mathbf{ w}\)</span>, <span
+class="math display">\[
+y= f(\mathbf{ x}; \mathbf{ w}).
+\]</span> The function <span class="math inline">\(f(\cdot)\)</span> is
+known as the <em>prediction function</em>. The key challenges are (1)
+choosing which features, <span class="math inline">\(\mathbf{
+x}\)</span>, are relevant in the prediction, (2) defining the
+appropriate <em>class of function</em>, <span
+class="math inline">\(f(\cdot)\)</span>, to use and (3) selecting the
+right parameters, <span class="math inline">\(\mathbf{ w}\)</span>.</p>
 <h2 id="classification-examples">Classification Examples</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/classification-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <ul>
-<li>Classifiying hand written digits from binary images (automatic zip code reading)</li>
+<li>Classifiying hand written digits from binary images (automatic zip
+code reading)</li>
 <li>Detecting faces in images (e.g. digital cameras).</li>
 <li>Who a detected face belongs to (e.g. Facebook, DeepFace)</li>
 <li>Classifying type of cancer given gene expression data.</li>
-<li>Categorization of document types (different types of news article on the internet)</li>
+<li>Categorization of document types (different types of news article on
+the internet)</li>
 </ul>
 <h2 id="bernoulli-distribution">Bernoulli Distribution</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bernoulli-distribution.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bernoulli-distribution.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Our focus has been on models where the objective function is inspired by a probabilistic analysis of the problem. In particular we’ve argued that we answer questions about the data set by placing probability distributions over the various quantities of interest. For the case of binary classification this will normally involve introducing probability distributions for discrete variables. Such probability distributions, are in some senses easier than those for continuous variables, in particular we can represent a probability distribution over <span class="math inline">$\dataScalar$</span>, where <span class="math inline">$\dataScalar$</span> is binary, with one value. If we specify the probability that <span class="math inline">$\dataScalar=1$</span> with a number that is between 0 and 1, i.e. let’s say that <span class="math inline">$P(\dataScalar=1) = \pi$</span> (here we don’t mean <span class="math inline"><em>π</em></span> the number, we are setting <span class="math inline"><em>π</em></span> to be a variable) then we can specify the probability distribution through a table.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-distribution.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-distribution.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Our focus has been on models where the objective function is inspired
+by a probabilistic analysis of the problem. In particular we’ve argued
+that we answer questions about the data set by placing probability
+distributions over the various quantities of interest. For the case of
+binary classification this will normally involve introducing probability
+distributions for discrete variables. Such probability distributions,
+are in some senses easier than those for continuous variables, in
+particular we can represent a probability distribution over <span
+class="math inline">\(y\)</span>, where <span
+class="math inline">\(y\)</span> is binary, with one value. If we
+specify the probability that <span class="math inline">\(y=1\)</span>
+with a number that is between 0 and 1, i.e. let’s say that <span
+class="math inline">\(P(y=1) = \pi\)</span> (here we don’t mean <span
+class="math inline">\(\pi\)</span> the number, we are setting <span
+class="math inline">\(\pi\)</span> to be a variable) then we can specify
+the probability distribution through a table.</p>
 <table>
 <thead>
 <tr class="header">
-<th style="text-align: center;"><span class="math inline">$\dataScalar$</span></th>
+<th style="text-align: center;"><span
+class="math inline">\(y\)</span></th>
 <th style="text-align: center;">0</th>
 <th style="text-align: center;">1</th>
 </tr>
 </thead>
 <tbody>
 <tr class="odd">
-<td style="text-align: center;"><span class="math inline">$P(\dataScalar)$</span></td>
-<td style="text-align: center;"><span class="math inline">(1 − <em>π</em>)</span></td>
-<td style="text-align: center;"><span class="math inline"><em>π</em></span></td>
+<td style="text-align: center;"><span
+class="math inline">\(P(y)\)</span></td>
+<td style="text-align: center;"><span
+class="math inline">\((1-\pi)\)</span></td>
+<td style="text-align: center;"><span
+class="math inline">\(\pi\)</span></td>
 </tr>
 </tbody>
 </table>
-<p>Mathematically we can use a trick to implement this same table. We can use the value <span class="math inline">$\dataScalar$</span> as a mathematical switch and write that <br /><span class="math display">$$
-  P(\dataScalar) = \pi^\dataScalar (1-\pi)^{(1-\dataScalar)}
-  $$</span><br /> where our probability distribution is now written as a function of <span class="math inline">$\dataScalar$</span>. This probability distribution is known as the <a href="http://en.wikipedia.org/wiki/Bernoulli_distribution">Bernoulli distribution</a>. The Bernoulli distribution is a clever trick for mathematically switching between two probabilities if we were to write it as code it would be better described as</p>
-<div class="sourceCode" id="cb21"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1"></a><span class="kw">def</span> bernoulli(y_i, pi):</span>
-<span id="cb21-2"><a href="#cb21-2"></a>    <span class="cf">if</span> y_i <span class="op">==</span> <span class="dv">1</span>:</span>
-<span id="cb21-3"><a href="#cb21-3"></a>        <span class="cf">return</span> pi</span>
-<span id="cb21-4"><a href="#cb21-4"></a>    <span class="cf">else</span>:</span>
-<span id="cb21-5"><a href="#cb21-5"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
-<p>If we insert <span class="math inline">$\dataScalar=1$</span> then the function is equal to <span class="math inline"><em>π</em></span>, and if we insert <span class="math inline">$\dataScalar=0$</span> then the function is equal to <span class="math inline">1 − <em>π</em></span>. So the function recreates the table for the distribution given above.</p>
-<p>The probability distribution is named for <a href="http://en.wikipedia.org/wiki/Jacob_Bernoulli">Jacob Bernoulli</a>, the swiss mathematician. In his book Ars Conjectandi he considered the distribution and the result of a number of ‘trials’ under the Bernoulli distribution to form the <em>binomial</em> distribution. Below is the page where he considers Pascal’s triangle in forming combinations of the Bernoulli distribution to realise the binomial distribution for the outcome of positive trials.</p>
-<iframe frameborder="0" scrolling="no" style="border:0px" src="http://books.google.co.uk/books?id=CF4UAAAAQAAJ&amp;pg=PA87&amp;output=embed" width="700" height="500">
+<p>Mathematically we can use a trick to implement this same table. We
+can use the value <span class="math inline">\(y\)</span> as a
+mathematical switch and write that <span class="math display">\[
+  P(y) = \pi^y(1-\pi)^{(1-y)}
+  \]</span> where our probability distribution is now written as a
+function of <span class="math inline">\(y\)</span>. This probability
+distribution is known as the <a
+href="http://en.wikipedia.org/wiki/Bernoulli_distribution">Bernoulli
+distribution</a>. The Bernoulli distribution is a clever trick for
+mathematically switching between two probabilities if we were to write
+it as code it would be better described as</p>
+<div class="sourceCode" id="cb22"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> bernoulli(y_i, pi):</span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> y_i <span class="op">==</span> <span class="dv">1</span>:</span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> pi</span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">else</span>:</span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
+<p>If we insert <span class="math inline">\(y=1\)</span> then the
+function is equal to <span class="math inline">\(\pi\)</span>, and if we
+insert <span class="math inline">\(y=0\)</span> then the function is
+equal to <span class="math inline">\(1-\pi\)</span>. So the function
+recreates the table for the distribution given above.</p>
+<p>The probability distribution is named for <a
+href="http://en.wikipedia.org/wiki/Jacob_Bernoulli">Jacob Bernoulli</a>,
+the swiss mathematician. In his book Ars Conjectandi he considered the
+distribution and the result of a number of ‘trials’ under the Bernoulli
+distribution to form the <em>binomial</em> distribution. Below is the
+page where he considers Pascal’s triangle in forming combinations of the
+Bernoulli distribution to realise the binomial distribution for the
+outcome of positive trials.</p>
+<iframe frameborder="0" scrolling="no" style="border:0px" src="https://books.google.co.uk/books?id=CF4UAAAAQAAJ&amp;pg=PA87&amp;output=embed" width="700" height="500">
 </iframe>
 <div class="figure">
 <div id="bernoulli-urn-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/bernoulli-urn.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bernoulli-urn.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="bernoulli-urn-magnify" class="magnify" onclick="magnifyFigure(&#39;bernoulli-urn&#39;)">
+<div id="bernoulli-urn-magnify" class="magnify"
+onclick="magnifyFigure(&#39;bernoulli-urn&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="bernoulli-urn-caption" class="caption-frame">
-<p>Figure: Jacob Bernoulli described the Bernoulli distribution through an urn in which there are black and red balls.</p>
+<p>Figure: Jacob Bernoulli described the Bernoulli distribution through
+an urn in which there are black and red balls.</p>
 </div>
 </div>
-<p>Thomas Bayes also described the Bernoulli distribution, only he didn’t refer to Jacob Bernoulli’s work, so he didn’t call it by that name. He described the distribution in terms of a table (think of a <em>billiard table</em>) and two balls. Bayes suggests that each ball can be rolled across the table such that it comes to rest at a position that is <em>uniformly distributed</em> between the sides of the table.</p>
-<p>Let’s assume that the first ball is rolled, and that it comes to reset at a position that is <span class="math inline"><em>π</em></span> times the width of the table from the left hand side.</p>
-<p>Now, we roll the second ball. We are interested if the second ball ends up on the left side (+ve result) or the right side (-ve result) of the first ball. We use the Bernoulli distribution to determine this.</p>
-<p>For this reason in Bayes’s distribution there is considered to be <em>aleatoric</em> uncertainty about the distribution parameter.</p>
+<p>Thomas Bayes also described the Bernoulli distribution, only he
+didn’t refer to Jacob Bernoulli’s work, so he didn’t call it by that
+name. He described the distribution in terms of a table (think of a
+<em>billiard table</em>) and two balls. Bayes suggests that each ball
+can be rolled across the table such that it comes to rest at a position
+that is <em>uniformly distributed</em> between the sides of the
+table.</p>
+<p>Let’s assume that the first ball is rolled, and that it comes to
+reset at a position that is <span class="math inline">\(\pi\)</span>
+times the width of the table from the left hand side.</p>
+<p>Now, we roll the second ball. We are interested if the second ball
+ends up on the left side (+ve result) or the right side (-ve result) of
+the first ball. We use the Bernoulli distribution to determine this.</p>
+<p>For this reason in Bayes’s distribution there is considered to be
+<em>aleatoric</em> uncertainty about the distribution parameter.</p>
 <div class="figure">
 <div id="bayes-billiard-9-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard009.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard009.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="bayes-billiard-9-magnify" class="magnify" onclick="magnifyFigure(&#39;bayes-billiard-9&#39;)">
+<div id="bayes-billiard-9-magnify" class="magnify"
+onclick="magnifyFigure(&#39;bayes-billiard-9&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="bayes-billiard-9-caption" class="caption-frame">
-<p>Figure: Thomas Bayes described the Bernoulli distribution independently of Jacob Bernoulli. He used the analogy of a billiard table. Any ball on the table is given a uniformly random position between the left and right side of the table. The first ball (in the figure) gives the parameter of the Bernoulli distribution. The second ball (in the figure) gives the outcome as either left or right (relative to the first ball). This is the origin of the term Bayesian because the parameter of the distribution is drawn from a probsbility.</p>
-</div>
-</div>
-<h2 id="maximum-likelihood-in-the-bernoulli">Maximum Likelihood in the Bernoulli</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bernoulli-maximum-likelihood.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/bernoulli-maximum-likelihood.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Maximum likelihood in the Bernoulli distribution is straightforward. Let’s assume we have data, <span class="math inline">$\dataVector$</span> which consists of a vector of binary values of length <span class="math inline"><em>n</em></span>. If we assume each value was sampled independently from the Bernoulli distribution, conditioned on the parameter <span class="math inline"><em>π</em></span> then our joint probability density has the form <br /><span class="math display">$$
-p(\dataVector|\pi) = \prod_{i=1}^{\numData} \pi^{\dataScalar_i} (1-\pi)^{1-\dataScalar_i}.
-$$</span><br /> As normal in maximum likelihood we consider the negative log likelihood as our objective, <br /><span class="math display">$$\begin{align*}
-  \errorFunction(\pi)&amp; = -\log p(\dataVector|\pi)\\ 
-                     &amp; = -\sum_{i=1}^{\numData} \dataScalar_i \log \pi - \sum_{i=1}^{\numData} (1-\dataScalar_i) \log(1-\pi),
-  \end{align*}$$</span><br /></p>
-<p>and we can derive the gradient with respect to the parameter <span class="math inline"><em>π</em></span>. <br /><span class="math display">$$\frac{\text{d}\errorFunction(\pi)}{\text{d}\pi} = -\frac{\sum_{i=1}^{\numData} \dataScalar_i}{\pi}  + \frac{\sum_{i=1}^{\numData} (1-\dataScalar_i)}{1-\pi},$$</span><br /></p>
-<p>and as normal we look for a stationary point for the log likelihood by setting this derivative to zero, <br /><span class="math display">$$0 = -\frac{\sum_{i=1}^{\numData} \dataScalar_i}{\pi}  + \frac{\sum_{i=1}^{\numData} (1-\dataScalar_i)}{1-\pi},$$</span><br /> rearranging we form <br /><span class="math display">$$(1-\pi)\sum_{i=1}^{\numData} \dataScalar_i =   \pi\sum_{i=1}^{\numData} (1-\dataScalar_i),$$</span><br /> which implies <br /><span class="math display">$$\sum_{i=1}^{\numData} \dataScalar_i =   \pi\left(\sum_{i=1}^{\numData} (1-\dataScalar_i) + \sum_{i=1}^{\numData} \dataScalar_i\right),$$</span><br /></p>
-<p>and now we recognise that <span class="math inline">$\sum_{i=1}^{\numData} (1-\dataScalar_i) + \sum_{i=1}^{\numData} \dataScalar_i = \numData$</span> so we have <br /><span class="math display">$$\pi = \frac{\sum_{i=1}^{\numData} \dataScalar_i}{\numData}$$</span><br /></p>
-<p>so in other words we estimate the probability associated with the Bernoulli by setting it to the number of observed positives, divided by the total length of <span class="math inline">$\dataScalar$</span>. This makes intiutive sense. If I asked you to estimate the probability of a coin being heads, and you tossed the coin 100 times, and recovered 47 heads, then the estimate of the probability of heads should be <span class="math inline">$\frac{47}{100}$</span>.</p>
+<p>Figure: Thomas Bayes described the Bernoulli distribution
+independently of Jacob Bernoulli. He used the analogy of a billiard
+table. Any ball on the table is given a uniformly random position
+between the left and right side of the table. The first ball (in the
+figure) gives the parameter of the Bernoulli distribution. The second
+ball (in the figure) gives the outcome as either left or right (relative
+to the first ball). This is the origin of the term Bayesian because the
+parameter of the distribution is drawn from a probsbility.</p>
+</div>
+</div>
+<h2 id="maximum-likelihood-in-the-bernoulli">Maximum Likelihood in the
+Bernoulli</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-maximum-likelihood.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-maximum-likelihood.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Maximum likelihood in the Bernoulli distribution is straightforward.
+Let’s assume we have data, <span class="math inline">\(\mathbf{
+y}\)</span> which consists of a vector of binary values of length <span
+class="math inline">\(n\)</span>. If we assume each value was sampled
+independently from the Bernoulli distribution, conditioned on the
+parameter <span class="math inline">\(\pi\)</span> then our joint
+probability density has the form <span class="math display">\[
+p(\mathbf{ y}|\pi) = \prod_{i=1}^{n} \pi^{y_i} (1-\pi)^{1-y_i}.
+\]</span> As normal in maximum likelihood we consider the negative log
+likelihood as our objective, <span class="math display">\[\begin{align*}
+  E(\pi)&amp; = -\log p(\mathbf{ y}|\pi)\\
+                     &amp; = -\sum_{i=1}^{n} y_i \log \pi -
+\sum_{i=1}^{n} (1-y_i) \log(1-\pi),
+  \end{align*}\]</span></p>
+<p>and we can derive the gradient with respect to the parameter <span
+class="math inline">\(\pi\)</span>. <span
+class="math display">\[\frac{\text{d}E(\pi)}{\text{d}\pi} =
+-\frac{\sum_{i=1}^{n} y_i}{\pi}  + \frac{\sum_{i=1}^{n}
+(1-y_i)}{1-\pi},\]</span></p>
+<p>and as normal we look for a stationary point for the log likelihood
+by setting this derivative to zero, <span class="math display">\[0 =
+-\frac{\sum_{i=1}^{n} y_i}{\pi}  + \frac{\sum_{i=1}^{n}
+(1-y_i)}{1-\pi},\]</span> rearranging we form <span
+class="math display">\[(1-\pi)\sum_{i=1}^{n} y_i =   \pi\sum_{i=1}^{n}
+(1-y_i),\]</span> which implies <span
+class="math display">\[\sum_{i=1}^{n} y_i =   \pi\left(\sum_{i=1}^{n}
+(1-y_i) + \sum_{i=1}^{n} y_i\right),\]</span></p>
+<p>and now we recognise that <span class="math inline">\(\sum_{i=1}^{n}
+(1-y_i) + \sum_{i=1}^{n} y_i = n\)</span> so we have <span
+class="math display">\[\pi = \frac{\sum_{i=1}^{n} y_i}{n}\]</span></p>
+<p>so in other words we estimate the probability associated with the
+Bernoulli by setting it to the number of observed positives, divided by
+the total length of <span class="math inline">\(y\)</span>. This makes
+intiutive sense. If I asked you to estimate the probability of a coin
+being heads, and you tossed the coin 100 times, and recovered 47 heads,
+then the estimate of the probability of heads should be <span
+class="math inline">\(\frac{47}{100}\)</span>.</p>
 <h3 id="exercise-4">Exercise 4</h3>
-<p>Show that the maximum likelihood solution we have found is a <em>minimum</em> for our objective.</p>
-<p><br /><span class="math display">$$
+<p>Show that the maximum likelihood solution we have found is a
+<em>minimum</em> for our objective.</p>
+<p><span class="math display">\[
 \text{posterior} =
 \frac{\text{likelihood}\times\text{prior}}{\text{marginal likelihood}}
-$$</span><br /></p>
+\]</span></p>
 <p>Four components:</p>
 <ol type="1">
 <li>Prior distribution</li>
@@ -410,276 +985,655 @@ <h3 id="exercise-4">Exercise 4</h3>
 <li>Marginal likelihood</li>
 </ol>
 <h2 id="naive-bayes-classifiers">Naive Bayes Classifiers</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/naive-bayes.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/naive-bayes.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p><em>Note</em>: Everything we do below is possible using standard packages like <code>scikit-learn</code>, our purpose in this session is to help you understand how those engines are constructed. In practice for an application you should use a library like <code>scikit-learn</code>.</p>
-<p>In probabilistic machine learning we place probability distributions (or densities) over all the variables of interest, our first classification algorithm will do just that. We will consider how to form a classification by making assumptions about the <em>joint</em> density of our observations. We need to make assumptions to reduce the number of parameters we need to optimise.</p>
-<p>In the ideal world, given label data <span class="math inline">$\dataVector$</span> and the inputs <span class="math inline">$\inputMatrix$</span> we should be able to specify the joint density of all potential values of <span class="math inline">$\dataVector$</span> and <span class="math inline">$\inputMatrix$</span>, <span class="math inline">$p(\dataVector, \inputMatrix)$</span>. If <span class="math inline">$\inputMatrix$</span> and <span class="math inline">$\dataVector$</span> are our training data, and we can somehow extend our density to incorporate future test data (by augmenting <span class="math inline">$\dataVector$</span> with a new observation <span class="math inline">$\dataScalar^*$</span> and <span class="math inline">$\inputMatrix$</span> with the corresponding inputs, <span class="math inline">$\inputVector^*$</span>), then we can answer any given question about a future test point <span class="math inline">$\dataScalar^*$</span> given its covariates <span class="math inline">$\inputVector^*$</span> by conditioning on the training variables to recover, <br /><span class="math display">$$
-p(\dataScalar^*|\inputMatrix, \dataVector, \inputVector^*),
-$$</span><br /></p>
-<p>We can compute this distribution using the product and sum rules. However, to specify this density we must give the probability associated with all possible combinations of <span class="math inline">$\dataVector$</span> and <span class="math inline">$\inputMatrix$</span>. There are <span class="math inline">$2^{\numData}$</span> possible combinations for the vector <span class="math inline">$\dataVector$</span> and the probability for each of these combinations must be jointly specified along with the joint density of the matrix <span class="math inline">$\inputMatrix$</span>, as well as being able to <em>extend</em> the density for any chosen test location <span class="math inline">$\inputVector^*$</span>.</p>
-<p>In naive Bayes we make certain simplifying assumptions that allow us to perform all of the above in practice.</p>
-<h2 id="data-conditional-independence">Data Conditional Independence</h2>
-<p>If we are given model parameters <span class="math inline">$\paramVector$</span> we assume that conditioned on all these parameters that all data points in the model are independent. In other words we have, <br /><span class="math display">$$
-  p(\dataScalar^*, \inputVector^*, \dataVector, \inputMatrix|\paramVector) = p(\dataScalar^*, \inputVector^*|\paramVector)\prod_{i=1}^{\numData} p(\dataScalar_i, \inputVector_i | \paramVector).
-  $$</span><br /> This is a conditional independence assumption because we are not assuming our data are purely independent. If we were to assume that, then there would be nothing to learn about our test data given our training data. We are assuming that they are independent <em>given</em> our parameters, <span class="math inline">$\paramVector$</span>. We made similar assumptions for regression, where our parameter set included <span class="math inline">$\mappingVector$</span> and <span class="math inline">$\dataStd^2$</span>. Given those parameters we assumed that the density over <span class="math inline">$\dataVector, \dataScalar^*$</span> was <em>independent</em>. Here we are going a little further with that assumption because we are assuming the <em>joint</em> density of <span class="math inline">$\dataVector$</span> and <span class="math inline">$\inputMatrix$</span> is independent across the data given the parameters.</p>
-<p>Computing posterior distribution in this case becomes easier, this is known as the ‘Bayes classifier’.</p>
-<h2 id="feature-conditional-independence">Feature Conditional Independence</h2>
-<p><br /><span class="math display">$$
-p(\inputVector_i | \dataScalar_i, \paramVector) = \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)
-$$</span><br /> where <span class="math inline">$\dataDim$</span> is the dimensionality of our inputs.</p>
-<p>The assumption that is particular to naive Bayes is to now consider that the <em>features</em> are also conditionally independent, but not only given the parameters. We assume that the features are independent given the parameters <em>and</em> the label. So for each data point we have <br /><span class="math display">$$p(\inputVector_i | \dataScalar_i, \paramVector) = \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i,\paramVector)$$</span><br /> where <span class="math inline">$\dataDim$</span> is the dimensionality of our inputs.</p>
-<h2 id="marginal-density-for-datascalar_i">Marginal Density for <span class="math inline">$\dataScalar_i$</span></h2>
-<p><br /><span class="math display">$$
-p(\inputScalar_{i,j},\dataScalar_i| \paramVector) = p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i).
-$$</span><br /></p>
-<p>We now have nearly all of the components we need to specify the full joint density. However, the feature conditional independence doesn’t yet give us the joint density over <span class="math inline">$p(\dataScalar_i, \inputVector_i)$</span> which is required to subsitute in to our data conditional independence to give us the full density. To recover the joint density given the conditional distribution of each feature, <span class="math inline">$p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)$</span>, we need to make use of the product rule and combine it with a marginal density for <span class="math inline">$\dataScalar_i$</span>,</p>
-<p><br /><span class="math display">$$p(\inputScalar_{i,j},\dataScalar_i| \paramVector) = p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i).$$</span><br /> Because <span class="math inline">$\dataScalar_i$</span> is binary the <em>Bernoulli</em> density makes a suitable choice for our prior over <span class="math inline">$\dataScalar_i$</span>, <br /><span class="math display">$$p(\dataScalar_i|\pi) = \pi^{\dataScalar_i} (1-\pi)^{1-\dataScalar_i}$$</span><br /> where <span class="math inline"><em>π</em></span> now has the interpretation as being the <em>prior</em> probability that the classification should be positive.</p>
-<h2 id="joint-density-for-naive-bayes">Joint Density for Naive Bayes</h2>
-<p>This allows us to write down the full joint density of the training data, <br /><span class="math display">$$
-  p(\dataVector, \inputMatrix|\paramVector, \pi) = \prod_{i=1}^{\numData} \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i|\pi)
-  $$</span><br /></p>
-<p>which can now be fit by maximum likelihood. As normal we form our objective as the negative log likelihood,</p>
-<p><br /><span class="math display">$$\begin{align*}
-\errorFunction(\paramVector, \pi)&amp; =  -\log p(\dataVector, \inputMatrix|\paramVector, \pi) \\ &amp;= -\sum_{i=1}^{\numData} \sum_{j=1}^{\dataDim} \log p(\inputScalar_{i, j}|\dataScalar_i, \paramVector) -  \sum_{i=1}^{\numData} \log p(\dataScalar_i|\pi),
-\end{align*}$$</span><br /> which we note <em>decomposes</em> into two objective functions, one which is dependent on <span class="math inline"><em>π</em></span> alone and one which is dependent on <span class="math inline">$\paramVector$</span> alone so we have, <br /><span class="math display">$$
-\errorFunction(\pi, \paramVector) = \errorFunction(\paramVector) + \errorFunction(\pi).
-$$</span><br /> Since the two objective functions are separately dependent on the parameters <span class="math inline"><em>π</em></span> and <span class="math inline">$\paramVector$</span> we can minimize them independently. Firstly, minimizing the Bernoulli likelihood over the labels we have, <br /><span class="math display">$$
-\errorFunction(\pi) = -\sum_{i=1}^{\numData}\log p(\dataScalar_i|\pi) = -\sum_{i=1}^{\numData} \dataScalar_i \log \pi - \sum_{i=1}^{\numData} (1-\dataScalar_i) \log (1-\pi)
-$$</span><br /> which we already minimized above recovering <br /><span class="math display">$$
-\pi = \frac{\sum_{i=1}^{\numData} \dataScalar_i}{\numData}.
-$$</span><br /></p>
-<p>We now need to minimize the objective associated with the conditional distributions for the features, <br /><span class="math display">$$
-\errorFunction(\paramVector) = -\sum_{i=1}^{\numData} \sum_{j=1}^{\dataDim} \log p(\inputScalar_{i, j} |\dataScalar_i, \paramVector),
-$$</span><br /> which necessarily implies making some assumptions about the form of the conditional distributions. The right assumption will depend on the nature of our input data. For example, if we have an input which is real valued, we could use a Gaussian density and we could allow the mean and variance of the Gaussian to be different according to whether the class was positive or negative and according to which feature we were measuring. That would give us the form, <br /><span class="math display">$$
-p(\inputScalar_{i, j} | \dataScalar_i,\paramVector) = \frac{1}{\sqrt{2\pi \dataStd_{\dataScalar_i,j}^2}} \exp \left(-\frac{(\inputScalar_{i,j} - \mu_{\dataScalar_i, j})^2}{\dataStd_{\dataScalar_i,j}^2}\right),
-$$</span><br /> where <span class="math inline">$\dataStd_{1, j}^2$</span> is the variance of the density for the <span class="math inline"><em>j</em></span>th output and the class <span class="math inline">$\dataScalar_i=1$</span> and <span class="math inline">$\dataStd_{0, j}^2$</span> is the variance if the class is 0. The means can vary similarly. Our parameters, <span class="math inline">$\paramVector$</span> would consist of all the means and all the variances for the different dimensions.</p>
-<p>As normal we form our objective as the negative log likelihood, <br /><span class="math display">$$
-\errorFunction(\paramVector, \pi) = -\log p(\dataVector, \inputMatrix|\paramVector, \pi) = -\sum_{i=1}^{\numData} \sum_{j=1}^{\dataDim} \log p(\inputScalar_{i, j}|\dataScalar_i, \paramVector) - \sum_{i=1}^{\numData} \log p(\dataScalar_i|\pi),
-$$</span><br /> which we note <em>decomposes</em> into two objective functions, one which is dependent on <span class="math inline"><em>π</em></span> alone and one which is dependent on <span class="math inline">$\paramVector$</span> alone so we have, <br /><span class="math display">$$
-\errorFunction(\pi, \paramVector) = \errorFunction(\paramVector) + \errorFunction(\pi).
-$$</span><br /></p>
-<h2 id="nigerian-nmis-data-1">Nigerian NMIS Data</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-classification.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-classification.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>First we will load in the Nigerian NMIS health data. Our aim will be to predict whether a center has maternal health delivery services given the attributes in the data. We will predict of the number of nurses, the number of doctors, location etc.</p>
-<p>Let’s first remind ourselves of the data.</p>
-<div class="sourceCode" id="cb22"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1"></a>data.head()</span></code></pre></div>
-<p>Now we will convert this data into a form which we can use as inputs <code>X</code>, and labels <code>y</code>.</p>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb23-2"><a href="#cb23-2"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb24"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1"></a>data <span class="op">=</span> data[<span class="op">~</span>pd.isnull(data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>])]</span>
-<span id="cb24-2"><a href="#cb24-2"></a>data <span class="op">=</span> data.dropna() <span class="co"># Remove entries with missing values</span></span>
-<span id="cb24-3"><a href="#cb24-3"></a>X <span class="op">=</span> data[[<span class="st">&#39;emergency_transport&#39;</span>,</span>
-<span id="cb24-4"><a href="#cb24-4"></a>          <span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
-<span id="cb24-5"><a href="#cb24-5"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
-<span id="cb24-6"><a href="#cb24-6"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
-<span id="cb24-7"><a href="#cb24-7"></a>          <span class="st">&#39;num_nurses_fulltime&#39;</span>,</span>
-<span id="cb24-8"><a href="#cb24-8"></a>          <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
-<span id="cb24-9"><a href="#cb24-9"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
-<span id="cb24-10"><a href="#cb24-10"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
-<span id="cb24-11"><a href="#cb24-11"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
-<span id="cb24-12"><a href="#cb24-12"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
-<span id="cb24-13"><a href="#cb24-13"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>, </span>
-<span id="cb24-14"><a href="#cb24-14"></a>          <span class="st">&#39;latitude&#39;</span>, </span>
-<span id="cb24-15"><a href="#cb24-15"></a>          <span class="st">&#39;longitude&#39;</span>]].copy()</span>
-<span id="cb24-16"><a href="#cb24-16"></a>y <span class="op">=</span> data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>]<span class="op">==</span><span class="va">True</span>  <span class="co"># set label to be whether there&#39;s a maternal health delivery service</span></span>
-<span id="cb24-17"><a href="#cb24-17"></a></span>
-<span id="cb24-18"><a href="#cb24-18"></a><span class="co"># Create series of health center types with the relevant index</span></span>
-<span id="cb24-19"><a href="#cb24-19"></a>s <span class="op">=</span> data[<span class="st">&#39;facility_type_display&#39;</span>].<span class="bu">apply</span>(pd.Series, <span class="dv">1</span>).stack() </span>
-<span id="cb24-20"><a href="#cb24-20"></a>s.index <span class="op">=</span> s.index.droplevel(<span class="op">-</span><span class="dv">1</span>) <span class="co"># to line up with df&#39;s index</span></span>
-<span id="cb24-21"><a href="#cb24-21"></a></span>
-<span id="cb24-22"><a href="#cb24-22"></a><span class="co"># Extract from the series the unique list of types.</span></span>
-<span id="cb24-23"><a href="#cb24-23"></a>types <span class="op">=</span> s.unique()</span>
-<span id="cb24-24"><a href="#cb24-24"></a></span>
-<span id="cb24-25"><a href="#cb24-25"></a><span class="co"># For each type extract the indices where it is present and add a column to X</span></span>
-<span id="cb24-26"><a href="#cb24-26"></a>type_names <span class="op">=</span> []</span>
-<span id="cb24-27"><a href="#cb24-27"></a><span class="cf">for</span> htype <span class="kw">in</span> types:</span>
-<span id="cb24-28"><a href="#cb24-28"></a>    index <span class="op">=</span> s[s<span class="op">==</span>htype].index.tolist()</span>
-<span id="cb24-29"><a href="#cb24-29"></a>    type_col<span class="op">=</span>htype.replace(<span class="st">&#39; &#39;</span>, <span class="st">&#39;_&#39;</span>).replace(<span class="st">&#39;/&#39;</span>,<span class="st">&#39;-&#39;</span>).lower()</span>
-<span id="cb24-30"><a href="#cb24-30"></a>    type_names.append(type_col)</span>
-<span id="cb24-31"><a href="#cb24-31"></a>    X.loc[:, type_col] <span class="op">=</span> <span class="fl">0.0</span> </span>
-<span id="cb24-32"><a href="#cb24-32"></a>    X.loc[index, type_col] <span class="op">=</span> <span class="fl">1.0</span></span></code></pre></div>
-<p>This has given us a new data frame <code>X</code> which contains the different facility types in different columns.</p>
-<div class="sourceCode" id="cb25"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1"></a>X.describe()</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/naive-bayes.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/naive-bayes.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p><em>Note</em>: Everything we do below is possible using standard
+packages like <code>scikit-learn</code>, our purpose in this session is
+to help you understand how those engines are constructed. In practice
+for an application you should use a library like
+<code>scikit-learn</code>.</p>
+<p>In probabilistic machine learning we place probability distributions
+(or densities) over all the variables of interest, our first
+classification algorithm will do just that. We will consider how to form
+a classification by making assumptions about the <em>joint</em> density
+of our observations. We need to make assumptions to reduce the number of
+parameters we need to optimise.</p>
+<p>In the ideal world, given label data <span
+class="math inline">\(\mathbf{ y}\)</span> and the inputs <span
+class="math inline">\(\mathbf{X}\)</span> we should be able to specify
+the joint density of all potential values of <span
+class="math inline">\(\mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span>, <span
+class="math inline">\(p(\mathbf{ y}, \mathbf{X})\)</span>. If <span
+class="math inline">\(\mathbf{X}\)</span> and <span
+class="math inline">\(\mathbf{ y}\)</span> are our training data, and we
+can somehow extend our density to incorporate future test data (by
+augmenting <span class="math inline">\(\mathbf{ y}\)</span> with a new
+observation <span class="math inline">\(y^*\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span> with the corresponding inputs,
+<span class="math inline">\(\mathbf{ x}^*\)</span>), then we can answer
+any given question about a future test point <span
+class="math inline">\(y^*\)</span> given its covariates <span
+class="math inline">\(\mathbf{ x}^*\)</span> by conditioning on the
+training variables to recover, <span class="math display">\[
+p(y^*|\mathbf{X}, \mathbf{ y}, \mathbf{ x}^*),
+\]</span></p>
+<p>We can compute this distribution using the product and sum rules.
+However, to specify this density we must give the probability associated
+with all possible combinations of <span class="math inline">\(\mathbf{
+y}\)</span> and <span class="math inline">\(\mathbf{X}\)</span>. There
+are <span class="math inline">\(2^{n}\)</span> possible combinations for
+the vector <span class="math inline">\(\mathbf{ y}\)</span> and the
+probability for each of these combinations must be jointly specified
+along with the joint density of the matrix <span
+class="math inline">\(\mathbf{X}\)</span>, as well as being able to
+<em>extend</em> the density for any chosen test location <span
+class="math inline">\(\mathbf{ x}^*\)</span>.</p>
+<p>In naive Bayes we make certain simplifying assumptions that allow us
+to perform all of the above in practice.</p>
+<h2 id="data-conditional-independence">Data Conditional
+Independence</h2>
+<p>If we are given model parameters <span
+class="math inline">\(\boldsymbol{ \theta}\)</span> we assume that
+conditioned on all these parameters that all data points in the model
+are independent. In other words we have, <span class="math display">\[
+  p(y^*, \mathbf{ x}^*, \mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}) =
+p(y^*, \mathbf{ x}^*|\boldsymbol{ \theta})\prod_{i=1}^{n} p(y_i,
+\mathbf{ x}_i | \boldsymbol{ \theta}).
+  \]</span> This is a conditional independence assumption because we are
+not assuming our data are purely independent. If we were to assume that,
+then there would be nothing to learn about our test data given our
+training data. We are assuming that they are independent <em>given</em>
+our parameters, <span class="math inline">\(\boldsymbol{
+\theta}\)</span>. We made similar assumptions for regression, where our
+parameter set included <span class="math inline">\(\mathbf{ w}\)</span>
+and <span class="math inline">\(\sigma^2\)</span>. Given those
+parameters we assumed that the density over <span
+class="math inline">\(\mathbf{ y}, y^*\)</span> was
+<em>independent</em>. Here we are going a little further with that
+assumption because we are assuming the <em>joint</em> density of <span
+class="math inline">\(\mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span> is independent across the data
+given the parameters.</p>
+<p>Computing posterior distribution in this case becomes easier, this is
+known as the ‘Bayes classifier’.</p>
+<h2 id="feature-conditional-independence">Feature Conditional
+Independence</h2>
+<p><span class="math display">\[
+p(\mathbf{ x}_i | y_i, \boldsymbol{ \theta}) = \prod_{j=1}^{p}
+p(x_{i,j}|y_i, \boldsymbol{ \theta})
+\]</span> where <span class="math inline">\(p\)</span> is the
+dimensionality of our inputs.</p>
+<p>The assumption that is particular to naive Bayes is to now consider
+that the <em>features</em> are also conditionally independent, but not
+only given the parameters. We assume that the features are independent
+given the parameters <em>and</em> the label. So for each data point we
+have <span class="math display">\[p(\mathbf{ x}_i | y_i, \boldsymbol{
+\theta}) = \prod_{j=1}^{p} p(x_{i,j}|y_i,\boldsymbol{ \theta})\]</span>
+where <span class="math inline">\(p\)</span> is the dimensionality of
+our inputs.</p>
+<h2 id="marginal-density-for-y_i">Marginal Density for <span
+class="math inline">\(y_i\)</span></h2>
+<p><span class="math display">\[
+p(x_{i,j},y_i| \boldsymbol{ \theta}) = p(x_{i,j}|y_i, \boldsymbol{
+\theta})p(y_i).
+\]</span></p>
+<p>We now have nearly all of the components we need to specify the full
+joint density. However, the feature conditional independence doesn’t yet
+give us the joint density over <span class="math inline">\(p(y_i,
+\mathbf{ x}_i)\)</span> which is required to subsitute in to our data
+conditional independence to give us the full density. To recover the
+joint density given the conditional distribution of each feature, <span
+class="math inline">\(p(x_{i,j}|y_i, \boldsymbol{ \theta})\)</span>, we
+need to make use of the product rule and combine it with a marginal
+density for <span class="math inline">\(y_i\)</span>,</p>
+<p><span class="math display">\[p(x_{i,j},y_i| \boldsymbol{ \theta}) =
+p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i).\]</span> Because <span
+class="math inline">\(y_i\)</span> is binary the <em>Bernoulli</em>
+density makes a suitable choice for our prior over <span
+class="math inline">\(y_i\)</span>, <span
+class="math display">\[p(y_i|\pi) = \pi^{y_i} (1-\pi)^{1-y_i}\]</span>
+where <span class="math inline">\(\pi\)</span> now has the
+interpretation as being the <em>prior</em> probability that the
+classification should be positive.</p>
+<h2 id="joint-density-for-naive-bayes">Joint Density for Naive
+Bayes</h2>
+<p>This allows us to write down the full joint density of the training
+data, <span class="math display">\[
+  p(\mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}, \pi) = \prod_{i=1}^{n}
+\prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)
+  \]</span></p>
+<p>which can now be fit by maximum likelihood. As normal we form our
+objective as the negative log likelihood,</p>
+<p><span class="math display">\[\begin{align*}
+E(\boldsymbol{ \theta}, \pi)&amp; =  -\log p(\mathbf{ y},
+\mathbf{X}|\boldsymbol{ \theta}, \pi) \\ &amp;= -\sum_{i=1}^{n}
+\sum_{j=1}^{p} \log p(x_{i, j}|y_i, \boldsymbol{ \theta})
+-  \sum_{i=1}^{n} \log p(y_i|\pi),
+\end{align*}\]</span> which we note <em>decomposes</em> into two
+objective functions, one which is dependent on <span
+class="math inline">\(\pi\)</span> alone and one which is dependent on
+<span class="math inline">\(\boldsymbol{ \theta}\)</span> alone so we
+have, <span class="math display">\[
+E(\pi, \boldsymbol{ \theta}) = E(\boldsymbol{ \theta}) + E(\pi).
+\]</span> Since the two objective functions are separately dependent on
+the parameters <span class="math inline">\(\pi\)</span> and <span
+class="math inline">\(\boldsymbol{ \theta}\)</span> we can minimize them
+independently. Firstly, minimizing the Bernoulli likelihood over the
+labels we have, <span class="math display">\[
+E(\pi) = -\sum_{i=1}^{n}\log p(y_i|\pi) = -\sum_{i=1}^{n} y_i \log \pi -
+\sum_{i=1}^{n} (1-y_i) \log (1-\pi)
+\]</span> which we already minimized above recovering <span
+class="math display">\[
+\pi = \frac{\sum_{i=1}^{n} y_i}{n}.
+\]</span></p>
+<p>We now need to minimize the objective associated with the conditional
+distributions for the features, <span class="math display">\[
+E(\boldsymbol{ \theta}) = -\sum_{i=1}^{n} \sum_{j=1}^{p} \log p(x_{i, j}
+|y_i, \boldsymbol{ \theta}),
+\]</span> which necessarily implies making some assumptions about the
+form of the conditional distributions. The right assumption will depend
+on the nature of our input data. For example, if we have an input which
+is real valued, we could use a Gaussian density and we could allow the
+mean and variance of the Gaussian to be different according to whether
+the class was positive or negative and according to which feature we
+were measuring. That would give us the form, <span
+class="math display">\[
+p(x_{i, j} | y_i,\boldsymbol{ \theta}) = \frac{1}{\sqrt{2\pi
+\sigma_{y_i,j}^2}} \exp \left(-\frac{(x_{i,j} - \mu_{y_i,
+j})^2}{\sigma_{y_i,j}^2}\right),
+\]</span> where <span class="math inline">\(\sigma_{1, j}^2\)</span> is
+the variance of the density for the <span
+class="math inline">\(j\)</span>th output and the class <span
+class="math inline">\(y_i=1\)</span> and <span
+class="math inline">\(\sigma_{0, j}^2\)</span> is the variance if the
+class is 0. The means can vary similarly. Our parameters, <span
+class="math inline">\(\boldsymbol{ \theta}\)</span> would consist of all
+the means and all the variances for the different dimensions.</p>
+<p>As normal we form our objective as the negative log likelihood, <span
+class="math display">\[
+E(\boldsymbol{ \theta}, \pi) = -\log p(\mathbf{ y},
+\mathbf{X}|\boldsymbol{ \theta}, \pi) = -\sum_{i=1}^{n} \sum_{j=1}^{p}
+\log p(x_{i, j}|y_i, \boldsymbol{ \theta}) - \sum_{i=1}^{n} \log
+p(y_i|\pi),
+\]</span> which we note <em>decomposes</em> into two objective
+functions, one which is dependent on <span
+class="math inline">\(\pi\)</span> alone and one which is dependent on
+<span class="math inline">\(\boldsymbol{ \theta}\)</span> alone so we
+have, <span class="math display">\[
+E(\pi, \boldsymbol{ \theta}) = E(\boldsymbol{ \theta}) + E(\pi).
+\]</span></p>
+<h2 id="nigeria-nmis-data-classification">Nigeria NMIS Data
+Classification</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Our aim will be to predict whether a center has maternal health
+delivery services given the attributes in the data. We will predict of
+the number of nurses, the number of doctors, location etc.</p>
+<p>Now we will convert this data into a form which we can use as inputs
+<code>X</code>, and labels <code>y</code>.</p>
+<div class="sourceCode" id="cb23"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb24"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data[<span class="op">~</span>pd.isnull(data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>])]</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> data.dropna() <span class="co"># Remove entries with missing values</span></span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> data[[<span class="st">&#39;emergency_transport&#39;</span>,</span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
+<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_nurses_fulltime&#39;</span>,</span>
+<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
+<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
+<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
+<span id="cb24-11"><a href="#cb24-11" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
+<span id="cb24-12"><a href="#cb24-12" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
+<span id="cb24-13"><a href="#cb24-13" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>, </span>
+<span id="cb24-14"><a href="#cb24-14" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;latitude&#39;</span>, </span>
+<span id="cb24-15"><a href="#cb24-15" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;longitude&#39;</span>]].copy()</span>
+<span id="cb24-16"><a href="#cb24-16" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> data[<span class="st">&#39;maternal_health_delivery_services&#39;</span>]<span class="op">==</span><span class="va">True</span>  <span class="co"># set label to be whether there&#39;s a maternal health delivery service</span></span>
+<span id="cb24-17"><a href="#cb24-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb24-18"><a href="#cb24-18" aria-hidden="true" tabindex="-1"></a><span class="co"># Create series of health center types with the relevant index</span></span>
+<span id="cb24-19"><a href="#cb24-19" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> data[<span class="st">&#39;facility_type_display&#39;</span>].<span class="bu">apply</span>(pd.Series, <span class="dv">1</span>).stack() </span>
+<span id="cb24-20"><a href="#cb24-20" aria-hidden="true" tabindex="-1"></a>s.index <span class="op">=</span> s.index.droplevel(<span class="op">-</span><span class="dv">1</span>) <span class="co"># to line up with df&#39;s index</span></span>
+<span id="cb24-21"><a href="#cb24-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb24-22"><a href="#cb24-22" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract from the series the unique list of types.</span></span>
+<span id="cb24-23"><a href="#cb24-23" aria-hidden="true" tabindex="-1"></a>types <span class="op">=</span> s.unique()</span>
+<span id="cb24-24"><a href="#cb24-24" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb24-25"><a href="#cb24-25" aria-hidden="true" tabindex="-1"></a><span class="co"># For each type extract the indices where it is present and add a column to X</span></span>
+<span id="cb24-26"><a href="#cb24-26" aria-hidden="true" tabindex="-1"></a>type_names <span class="op">=</span> []</span>
+<span id="cb24-27"><a href="#cb24-27" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> htype <span class="kw">in</span> types:</span>
+<span id="cb24-28"><a href="#cb24-28" aria-hidden="true" tabindex="-1"></a>    index <span class="op">=</span> s[s<span class="op">==</span>htype].index.tolist()</span>
+<span id="cb24-29"><a href="#cb24-29" aria-hidden="true" tabindex="-1"></a>    type_col<span class="op">=</span>htype.replace(<span class="st">&#39; &#39;</span>, <span class="st">&#39;_&#39;</span>).replace(<span class="st">&#39;/&#39;</span>,<span class="st">&#39;-&#39;</span>).lower()</span>
+<span id="cb24-30"><a href="#cb24-30" aria-hidden="true" tabindex="-1"></a>    type_names.append(type_col)</span>
+<span id="cb24-31"><a href="#cb24-31" aria-hidden="true" tabindex="-1"></a>    X.loc[:, type_col] <span class="op">=</span> <span class="fl">0.0</span> </span>
+<span id="cb24-32"><a href="#cb24-32" aria-hidden="true" tabindex="-1"></a>    X.loc[index, type_col] <span class="op">=</span> <span class="fl">1.0</span></span></code></pre></div>
+<p>This has given us a new data frame <code>X</code> which contains the
+different facility types in different columns.</p>
+<div class="sourceCode" id="cb25"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>X.describe()</span></code></pre></div>
 <h2 id="naive-bayes-nmis">Naive Bayes NMIS</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-naive-bayes.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/nigerian-nmis-data-naive-bayes.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>We can now specify the naive Bayes model. For the genres we want to model the data as Bernoulli distributed, and for the year and body count we want to model the data as Gaussian distributed. We set up two data frames to contain the parameters for the rows and the columns below.</p>
-<div class="sourceCode" id="cb26"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1"></a><span class="co"># assume data is binary or real.</span></span>
-<span id="cb26-2"><a href="#cb26-2"></a><span class="co"># this list encodes whether it is binary or real (1 for binary, 0 for real)</span></span>
-<span id="cb26-3"><a href="#cb26-3"></a>binary_columns <span class="op">=</span> [<span class="st">&#39;emergency_transport&#39;</span>,</span>
-<span id="cb26-4"><a href="#cb26-4"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
-<span id="cb26-5"><a href="#cb26-5"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
-<span id="cb26-6"><a href="#cb26-6"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
-<span id="cb26-7"><a href="#cb26-7"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
-<span id="cb26-8"><a href="#cb26-8"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
-<span id="cb26-9"><a href="#cb26-9"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
-<span id="cb26-10"><a href="#cb26-10"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>] <span class="op">+</span> type_names</span>
-<span id="cb26-11"><a href="#cb26-11"></a>real_columns <span class="op">=</span> [<span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
-<span id="cb26-12"><a href="#cb26-12"></a>                <span class="st">&#39;num_nurses_fulltime&#39;</span>, </span>
-<span id="cb26-13"><a href="#cb26-13"></a>                <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
-<span id="cb26-14"><a href="#cb26-14"></a>                <span class="st">&#39;latitude&#39;</span>, </span>
-<span id="cb26-15"><a href="#cb26-15"></a>                <span class="st">&#39;longitude&#39;</span>]</span>
-<span id="cb26-16"><a href="#cb26-16"></a>Bernoulli <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">2</span>,<span class="bu">len</span>(binary_columns))), columns<span class="op">=</span>binary_columns, index<span class="op">=</span>[<span class="st">&#39;theta_0&#39;</span>, <span class="st">&#39;theta_1&#39;</span>])</span>
-<span id="cb26-17"><a href="#cb26-17"></a>Gaussian <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">4</span>,<span class="bu">len</span>(real_columns))), columns<span class="op">=</span>real_columns, index<span class="op">=</span>[<span class="st">&#39;mu_0&#39;</span>, <span class="st">&#39;sigma2_0&#39;</span>, <span class="st">&#39;mu_1&#39;</span>, <span class="st">&#39;sigma2_1&#39;</span>])</span></code></pre></div>
-<p>Now we have the data in a form ready for analysis, let’s construct our data matrix.</p>
-<div class="sourceCode" id="cb27"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1"></a>num_train <span class="op">=</span> <span class="dv">20000</span></span>
-<span id="cb27-2"><a href="#cb27-2"></a>indices <span class="op">=</span> np.random.permutation(X.shape[<span class="dv">0</span>])</span>
-<span id="cb27-3"><a href="#cb27-3"></a>train_indices <span class="op">=</span> indices[:num_train]</span>
-<span id="cb27-4"><a href="#cb27-4"></a>test_indices <span class="op">=</span> indices[num_train:]</span>
-<span id="cb27-5"><a href="#cb27-5"></a>X_train <span class="op">=</span> X.iloc[train_indices]</span>
-<span id="cb27-6"><a href="#cb27-6"></a>y_train <span class="op">=</span> y.iloc[train_indices]<span class="op">==</span><span class="va">True</span></span>
-<span id="cb27-7"><a href="#cb27-7"></a>X_test <span class="op">=</span> X.iloc[test_indices]</span>
-<span id="cb27-8"><a href="#cb27-8"></a>y_test <span class="op">=</span> y.iloc[test_indices]<span class="op">==</span><span class="va">True</span></span></code></pre></div>
-<p>And we can now train the model. For each feature we can make the fit independently. The fit is given by either counting the number of positives (for binary data) which gives us the maximum likelihood solution for the Bernoulli. Or by computing the empirical mean and variance of the data for the Gaussian, which also gives us the maximum likelihood solution.</p>
-<div class="sourceCode" id="cb28"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1"></a><span class="cf">for</span> column <span class="kw">in</span> X_train:</span>
-<span id="cb28-2"><a href="#cb28-2"></a>    <span class="cf">if</span> column <span class="kw">in</span> Gaussian:</span>
-<span id="cb28-3"><a href="#cb28-3"></a>        Gaussian[column][<span class="st">&#39;mu_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].mean()</span>
-<span id="cb28-4"><a href="#cb28-4"></a>        Gaussian[column][<span class="st">&#39;mu_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].mean()</span>
-<span id="cb28-5"><a href="#cb28-5"></a>        Gaussian[column][<span class="st">&#39;sigma2_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].var(ddof<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb28-6"><a href="#cb28-6"></a>        Gaussian[column][<span class="st">&#39;sigma2_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].var(ddof<span class="op">=</span><span class="dv">0</span>)</span>
-<span id="cb28-7"><a href="#cb28-7"></a>    <span class="cf">if</span> column <span class="kw">in</span> Bernoulli:</span>
-<span id="cb28-8"><a href="#cb28-8"></a>        Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].<span class="bu">sum</span>()<span class="op">/</span>(<span class="op">~</span>y_train).<span class="bu">sum</span>()</span>
-<span id="cb28-9"><a href="#cb28-9"></a>        Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].<span class="bu">sum</span>()<span class="op">/</span>(y_train).<span class="bu">sum</span>()</span></code></pre></div>
-<p>We can examine the nature of the distributions we’ve fitted to the model by looking at the entries in these data frames.</p>
-<div class="sourceCode" id="cb29"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1"></a>Bernoulli</span></code></pre></div>
-<p>The distributions show the parameters of the <em>independent</em> class conditional probabilities for no maternity services. It is a Bernoulli distribution with the parameter, <span class="math inline"><em>π</em></span>, given by (<code>theta_0</code>) for the facilities without maternity services and <code>theta_1</code> for the facilities with maternity services. The parameters whow that, facilities with maternity services also are more likely to have other services such as grid electricity, emergency transport, immunization programs etc.</p>
-<p>The naive Bayes assumption says that the joint probability for these services is given by the product of each of these Bernoulli distributions.</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1"></a>Gaussian</span></code></pre></div>
-<p>We have modelled the numbers in our table with a Gaussian density. Since several of these numbers are counts, a more appropriate distribution might be the Poisson distribution. But here we can see that the average number of nurses, healthworkers and doctors is <em>higher</em> in the facilities with maternal services (<code>mu_1</code>) than those without maternal services (<code>mu_0</code>). There is also a small difference between the mean latitude and longitudes. However, the <em>standard deviation</em> which would be given by the square root of the variance parameters (<code>sigma_0</code> and <code>sigma_1</code>) is large, implying that a difference in latitude and longitude may be due to sampling error. To be sure more analysis would be required.</p>
-<p>The final model parameter is the prior probability of the positive class, <span class="math inline"><em>π</em></span>, which is computed by maximum likelihood.</p>
-<div class="sourceCode" id="cb31"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1"></a>prior <span class="op">=</span> <span class="bu">float</span>(y_train.<span class="bu">sum</span>())<span class="op">/</span><span class="bu">len</span>(y_train)</span></code></pre></div>
-<p>The prior probability tells us that slightly more facilities have maternity services than those that don’t.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-naive-bayes.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-naive-bayes.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>We can now specify the naive Bayes model. For the genres we want to
+model the data as Bernoulli distributed, and for the year and body count
+we want to model the data as Gaussian distributed. We set up two data
+frames to contain the parameters for the rows and the columns below.</p>
+<div class="sourceCode" id="cb26"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># assume data is binary or real.</span></span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a><span class="co"># this list encodes whether it is binary or real (1 for binary, 0 for real)</span></span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>binary_columns <span class="op">=</span> [<span class="st">&#39;emergency_transport&#39;</span>,</span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;phcn_electricity&#39;</span>,</span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;child_health_measles_immun_calc&#39;</span>,</span>
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_water_supply&#39;</span>, </span>
+<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;improved_sanitation&#39;</span>,</span>
+<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;antenatal_care_yn&#39;</span>, </span>
+<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;family_planning_yn&#39;</span>,</span>
+<span id="cb26-10"><a href="#cb26-10" aria-hidden="true" tabindex="-1"></a>          <span class="st">&#39;malaria_treatment_artemisinin&#39;</span>] <span class="op">+</span> type_names</span>
+<span id="cb26-11"><a href="#cb26-11" aria-hidden="true" tabindex="-1"></a>real_columns <span class="op">=</span> [<span class="st">&#39;num_chews_fulltime&#39;</span>, </span>
+<span id="cb26-12"><a href="#cb26-12" aria-hidden="true" tabindex="-1"></a>                <span class="st">&#39;num_nurses_fulltime&#39;</span>, </span>
+<span id="cb26-13"><a href="#cb26-13" aria-hidden="true" tabindex="-1"></a>                <span class="st">&#39;num_doctors_fulltime&#39;</span>, </span>
+<span id="cb26-14"><a href="#cb26-14" aria-hidden="true" tabindex="-1"></a>                <span class="st">&#39;latitude&#39;</span>, </span>
+<span id="cb26-15"><a href="#cb26-15" aria-hidden="true" tabindex="-1"></a>                <span class="st">&#39;longitude&#39;</span>]</span>
+<span id="cb26-16"><a href="#cb26-16" aria-hidden="true" tabindex="-1"></a>Bernoulli <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">2</span>,<span class="bu">len</span>(binary_columns))), columns<span class="op">=</span>binary_columns, index<span class="op">=</span>[<span class="st">&#39;theta_0&#39;</span>, <span class="st">&#39;theta_1&#39;</span>])</span>
+<span id="cb26-17"><a href="#cb26-17" aria-hidden="true" tabindex="-1"></a>Gaussian <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">4</span>,<span class="bu">len</span>(real_columns))), columns<span class="op">=</span>real_columns, index<span class="op">=</span>[<span class="st">&#39;mu_0&#39;</span>, <span class="st">&#39;sigma2_0&#39;</span>, <span class="st">&#39;mu_1&#39;</span>, <span class="st">&#39;sigma2_1&#39;</span>])</span></code></pre></div>
+<p>Now we have the data in a form ready for analysis, let’s construct
+our data matrix.</p>
+<div class="sourceCode" id="cb27"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>num_train <span class="op">=</span> <span class="dv">20000</span></span>
+<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>indices <span class="op">=</span> np.random.permutation(X.shape[<span class="dv">0</span>])</span>
+<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>train_indices <span class="op">=</span> indices[:num_train]</span>
+<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>test_indices <span class="op">=</span> indices[num_train:]</span>
+<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>X_train <span class="op">=</span> X.iloc[train_indices]</span>
+<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>y_train <span class="op">=</span> y.iloc[train_indices]<span class="op">==</span><span class="va">True</span></span>
+<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>X_test <span class="op">=</span> X.iloc[test_indices]</span>
+<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a>y_test <span class="op">=</span> y.iloc[test_indices]<span class="op">==</span><span class="va">True</span></span></code></pre></div>
+<p>And we can now train the model. For each feature we can make the fit
+independently. The fit is given by either counting the number of
+positives (for binary data) which gives us the maximum likelihood
+solution for the Bernoulli. Or by computing the empirical mean and
+variance of the data for the Gaussian, which also gives us the maximum
+likelihood solution.</p>
+<div class="sourceCode" id="cb28"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> column <span class="kw">in</span> X_train:</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> column <span class="kw">in</span> Gaussian:</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>        Gaussian[column][<span class="st">&#39;mu_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].mean()</span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>        Gaussian[column][<span class="st">&#39;mu_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].mean()</span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>        Gaussian[column][<span class="st">&#39;sigma2_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].var(ddof<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>        Gaussian[column][<span class="st">&#39;sigma2_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].var(ddof<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> column <span class="kw">in</span> Bernoulli:</span>
+<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>        Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>] <span class="op">=</span> X_train[column][<span class="op">~</span>y_train].<span class="bu">sum</span>()<span class="op">/</span>(<span class="op">~</span>y_train).<span class="bu">sum</span>()</span>
+<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a>        Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>] <span class="op">=</span> X_train[column][y_train].<span class="bu">sum</span>()<span class="op">/</span>(y_train).<span class="bu">sum</span>()</span></code></pre></div>
+<p>We can examine the nature of the distributions we’ve fitted to the
+model by looking at the entries in these data frames.</p>
+<div class="sourceCode" id="cb29"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>Bernoulli</span></code></pre></div>
+<p>The distributions show the parameters of the <em>independent</em>
+class conditional probabilities for no maternity services. It is a
+Bernoulli distribution with the parameter, <span
+class="math inline">\(\pi\)</span>, given by (<code>theta_0</code>) for
+the facilities without maternity services and <code>theta_1</code> for
+the facilities with maternity services. The parameters whow that,
+facilities with maternity services also are more likely to have other
+services such as grid electricity, emergency transport, immunization
+programs etc.</p>
+<p>The naive Bayes assumption says that the joint probability for these
+services is given by the product of each of these Bernoulli
+distributions.</p>
+<div class="sourceCode" id="cb30"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>Gaussian</span></code></pre></div>
+<p>We have modelled the numbers in our table with a Gaussian density.
+Since several of these numbers are counts, a more appropriate
+distribution might be the Poisson distribution. But here we can see that
+the average number of nurses, healthworkers and doctors is
+<em>higher</em> in the facilities with maternal services
+(<code>mu_1</code>) than those without maternal services
+(<code>mu_0</code>). There is also a small difference between the mean
+latitude and longitudes. However, the <em>standard deviation</em> which
+would be given by the square root of the variance parameters
+(<code>sigma_0</code> and <code>sigma_1</code>) is large, implying that
+a difference in latitude and longitude may be due to sampling error. To
+be sure more analysis would be required.</p>
+<p>The final model parameter is the prior probability of the positive
+class, <span class="math inline">\(\pi\)</span>, which is computed by
+maximum likelihood.</p>
+<div class="sourceCode" id="cb31"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>prior <span class="op">=</span> <span class="bu">float</span>(y_train.<span class="bu">sum</span>())<span class="op">/</span><span class="bu">len</span>(y_train)</span></code></pre></div>
+<p>The prior probability tells us that slightly more facilities have
+maternity services than those that don’t.</p>
 <h2 id="making-predictions">Making Predictions</h2>
-<p>Naive Bayes has given us the class conditional densities: <span class="math inline">$p(\inputVector_i | \dataScalar_i, \paramVector)$</span>. To make predictions with these densities we need to form the distribution given by <br /><span class="math display">$$
-P(\dataScalar^*| \dataVector, \inputMatrix, \inputVector^*, \paramVector)
-$$</span><br /> This can be computed by using the product rule. We know that <br /><span class="math display">$$
-P(\dataScalar^*| \dataVector, \inputMatrix, \inputVector^*, \paramVector)p(\dataVector, \inputMatrix, \inputVector^*|\paramVector) = p(\dataScalar*, \dataVector, \inputMatrix, \inputVector^*| \paramVector)
-$$</span><br /> implying that <br /><span class="math display">$$
-P(\dataScalar^*| \dataVector, \inputMatrix, \inputVector^*, \paramVector) = \frac{p(\dataScalar*, \dataVector, \inputMatrix, \inputVector^*| \paramVector)}{p(\dataVector, \inputMatrix, \inputVector^*|\paramVector)}
-$$</span><br /> and we’ve already defined <span class="math inline">$p(\dataScalar^*, \dataVector, \inputMatrix, \inputVector^*| \paramVector)$</span> using our conditional independence assumptions above <br /><span class="math display">$$
-p(\dataScalar^*, \dataVector, \inputMatrix, \inputVector^*| \paramVector) = \prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*, \paramVector)p(\dataScalar^*|\pi)\prod_{i=1}^{\numData} \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i|\pi)
-$$</span><br /> The other required density is <br /><span class="math display">$$
-p(\dataVector, \inputMatrix, \inputVector^*|\paramVector)
-$$</span><br /> which can be found from <br /><span class="math display">$$p(\dataScalar^*, \dataVector, \inputMatrix, \inputVector^*| \paramVector)$$</span><br /> using the <em>sum rule</em> of probability, <br /><span class="math display">$$
-p(\dataVector, \inputMatrix, \inputVector^*|\paramVector) = \sum_{\dataScalar^*=0}^1 p(\dataScalar^*, \dataVector, \inputMatrix, \inputVector^*| \paramVector).
-$$</span><br /> Because of our independence assumptions that is simply equal to <br /><span class="math display">$$
-p(\dataVector, \inputMatrix, \inputVector^*| \paramVector) = \sum_{\dataScalar^*=0}^1 \prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*_i, \paramVector)p(\dataScalar^*|\pi)\prod_{i=1}^{\numData} \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i|\pi).
-$$</span><br /> Substituting both forms in to recover our distribution over the test label conditioned on the training data we have, <br /><span class="math display">$$
-P(\dataScalar^*| \dataVector, \inputMatrix, \inputVector^*, \paramVector) = \frac{\prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*_i, \paramVector)p(\dataScalar^*|\pi)\prod_{i=1}^{\numData} \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i|\pi)}{\sum_{\dataScalar^*=0}^1 \prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*_i, \paramVector)p(\dataScalar^*|\pi)\prod_{i=1}^{\numData} \prod_{j=1}^{\dataDim} p(\inputScalar_{i,j}|\dataScalar_i, \paramVector)p(\dataScalar_i|\pi)}
-$$</span><br /> and we notice that all the terms associated with the training data actually cancel, the test prediction is <em>conditionally independent</em> of the training data <em>given</em> the parameters. This is a result of our conditional independence assumptions over the data points. <br /><span class="math display">$$
-p(\dataScalar^*| \inputVector^*, \paramVector) = \frac{\prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*_i,
-\paramVector)p(\dataScalar^*|\pi)}{\sum_{\dataScalar^*=0}^1 \prod_{j=1}^{\dataDim} p(\inputScalar^*_{j}|\dataScalar^*_i, \paramVector)p(\dataScalar^*|\pi)}
-$$</span><br /> This formula is also fairly straightforward to implement. First we implement the log probabilities for the Gaussian density.</p>
-<div class="sourceCode" id="cb32"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1"></a><span class="kw">def</span> log_gaussian(x, mu, sigma2):</span>
-<span id="cb32-2"><a href="#cb32-2"></a>    <span class="cf">return</span> <span class="op">-</span><span class="fl">0.5</span><span class="op">*</span> np.log(<span class="dv">2</span><span class="op">*</span>np.pi<span class="op">*</span>sigma2)<span class="op">-</span>((x<span class="op">-</span>mu)<span class="op">**</span><span class="dv">2</span>)<span class="op">/</span>(<span class="dv">2</span><span class="op">*</span>sigma2)</span></code></pre></div>
-<p>Now for any test point we compute the joint distribution of the Gaussian features by <em>summing</em> their log probabilities. Working in log space can be a considerable advantage over computing the probabilities directly: as the number of features we include goes up, because all the probabilities are less than 1, the joint probability will become smaller and smaller, and may be difficult to represent accurately (or even underflow). Working in log space can ameliorate this problem. We can also compute the log probability for the Bernoulli distribution.</p>
-<div class="sourceCode" id="cb33"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1"></a><span class="kw">def</span> log_bernoulli(x, theta):</span>
-<span id="cb33-2"><a href="#cb33-2"></a>    <span class="cf">return</span> x<span class="op">*</span>np.log(theta) <span class="op">+</span> (<span class="dv">1</span><span class="op">-</span>x)<span class="op">*</span>np.log(<span class="dv">1</span><span class="op">-</span>theta)</span></code></pre></div>
+<p>Naive Bayes has given us the class conditional densities: <span
+class="math inline">\(p(\mathbf{ x}_i | y_i, \boldsymbol{
+\theta})\)</span>. To make predictions with these densities we need to
+form the distribution given by <span class="math display">\[
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta})
+\]</span> This can be computed by using the product rule. We know that
+<span class="math display">\[
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{
+\theta})p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) =
+p(y*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})
+\]</span> implying that <span class="math display">\[
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta}) =
+\frac{p(y*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{
+\theta})}{p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{
+\theta})}
+\]</span> and we’ve already defined <span class="math inline">\(p(y^*,
+\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})\)</span>
+using our conditional independence assumptions above <span
+class="math display">\[
+p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) =
+\prod_{j=1}^{p} p(x^*_{j}|y^*, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi)
+\]</span> The other required density is <span class="math display">\[
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta})
+\]</span> which can be found from <span class="math display">\[p(y^*,
+\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})\]</span>
+using the <em>sum rule</em> of probability, <span
+class="math display">\[
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) =
+\sum_{y^*=0}^1 p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|
+\boldsymbol{ \theta}).
+\]</span> Because of our independence assumptions that is simply equal
+to <span class="math display">\[
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) =
+\sum_{y^*=0}^1 \prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi).
+\]</span> Substituting both forms in to recover our distribution over
+the test label conditioned on the training data we have, <span
+class="math display">\[
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta}) =
+\frac{\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi)}{\sum_{y^*=0}^1 \prod_{j=1}^{p}
+p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n}
+\prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)}
+\]</span> and we notice that all the terms associated with the training
+data actually cancel, the test prediction is <em>conditionally
+independent</em> of the training data <em>given</em> the parameters.
+This is a result of our conditional independence assumptions over the
+data points. <span class="math display">\[
+p(y^*| \mathbf{ x}^*, \boldsymbol{ \theta}) = \frac{\prod_{j=1}^{p}
+p(x^*_{j}|y^*_i,
+\boldsymbol{ \theta})p(y^*|\pi)}{\sum_{y^*=0}^1 \prod_{j=1}^{p}
+p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)}
+\]</span> This formula is also fairly straightforward to implement.
+First we implement the log probabilities for the Gaussian density.</p>
+<div class="sourceCode" id="cb32"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> log_gaussian(x, mu, sigma2):</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> <span class="op">-</span><span class="fl">0.5</span><span class="op">*</span> np.log(<span class="dv">2</span><span class="op">*</span>np.pi<span class="op">*</span>sigma2)<span class="op">-</span>((x<span class="op">-</span>mu)<span class="op">**</span><span class="dv">2</span>)<span class="op">/</span>(<span class="dv">2</span><span class="op">*</span>sigma2)</span></code></pre></div>
+<p>Now for any test point we compute the joint distribution of the
+Gaussian features by <em>summing</em> their log probabilities. Working
+in log space can be a considerable advantage over computing the
+probabilities directly: as the number of features we include goes up,
+because all the probabilities are less than 1, the joint probability
+will become smaller and smaller, and may be difficult to represent
+accurately (or even underflow). Working in log space can ameliorate this
+problem. We can also compute the log probability for the Bernoulli
+distribution.</p>
+<div class="sourceCode" id="cb33"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> log_bernoulli(x, theta):</span>
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> x<span class="op">*</span>np.log(theta) <span class="op">+</span> (<span class="dv">1</span><span class="op">-</span>x)<span class="op">*</span>np.log(<span class="dv">1</span><span class="op">-</span>theta)</span></code></pre></div>
 <h2 id="laplace-smoothing">Laplace Smoothing</h2>
-<p>Before we proceed, let’s just pause and think for a moment what will happen if <code>theta</code> here is either zero or one. This will result in <span class="math inline">log 0 =  − ∞</span> and cause numerical problems. This definitely can happen in practice. If some of the features are rare or very common across the data set then the maximum likelihood solution could find values of zero or one respectively. Such values are problematic because they cause posterior probabilities of class membership of either one or zero. In practice we deal with this using <em>Laplace smoothing</em> (which actually has an interpretation as a Bayesian fit of the Bernoulli distribution. Laplace used an example of the sun rising each day, and a wish to predict the sun rise the following day to describe his idea of smoothing, which can be found at the bottom of following page from Laplace’s ‘Essai Philosophique …’</p>
-<iframe frameborder="0" scrolling="no" style="border:0px" src="http://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PA16&amp;output=embed" width="700" height="500">
+<p>Before we proceed, let’s just pause and think for a moment what will
+happen if <code>theta</code> here is either zero or one. This will
+result in <span class="math inline">\(\log 0 = -\infty\)</span> and
+cause numerical problems. This definitely can happen in practice. If
+some of the features are rare or very common across the data set then
+the maximum likelihood solution could find values of zero or one
+respectively. Such values are problematic because they cause posterior
+probabilities of class membership of either one or zero. In practice we
+deal with this using <em>Laplace smoothing</em> (which actually has an
+interpretation as a Bayesian fit of the Bernoulli distribution. Laplace
+used an example of the sun rising each day, and a wish to predict the
+sun rise the following day to describe his idea of smoothing, which can
+be found at the bottom of following page from Laplace’s ‘Essai
+Philosophique …’</p>
+<iframe frameborder="0" scrolling="no" style="border:0px" src="https://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PA16&amp;output=embed" width="700" height="500">
 </iframe>
-<p>Laplace suggests that when computing the probability of an event where a success or failure is rare (he uses an example of the sun rising across the last 5,000 years or 1,826,213 days) that even though only successes have been observed (in the sun rising case) that the odds for tomorrow shouldn’t be given as <br /><span class="math display">$$
+<p>Laplace suggests that when computing the probability of an event
+where a success or failure is rare (he uses an example of the sun rising
+across the last 5,000 years or 1,826,213 days) that even though only
+successes have been observed (in the sun rising case) that the odds for
+tomorrow shouldn’t be given as <span class="math display">\[
 \frac{1,826,213}{1,826,213} = 1
-$$</span><br /> but rather by adding one to the numerator and two to the denominator, <br /><span class="math display">$$
+\]</span> but rather by adding one to the numerator and two to the
+denominator, <span class="math display">\[
 \frac{1,826,213 + 1}{1,826,213 + 2} = 0.99999945.
-$$</span><br /> This technique is sometimes called a ‘pseudocount technique’ because it has an intepretation of assuming some observations before you start, it’s as if instead of observing <span class="math inline">$\sum_{i}\dataScalar_i$</span> successes you have an additional success, <span class="math inline">$\sum_{i}\dataScalar_i + 1$</span> and instead of having observed <span class="math inline"><em>n</em></span> events you’ve observed <span class="math inline">$\numData + 2$</span>. So we can think of Laplace’s idea saying (before we start) that we have ‘two observations worth of belief, that the odds are 50/50’, because before we start (i.e. when <span class="math inline">$\numData=0$</span>) our estimate is 0.5, yet because the effective <span class="math inline"><em>n</em></span> is only 2, this estimate is quickly overwhelmed by data. Laplace used ideas like this a lot, and it is known as his ‘principle of insufficient reason’. His idea was that in the absence of knowledge (i.e. before we start) we should assume that all possible outcomes are equally likely. This idea has a modern counterpart, known as the <a href="http://en.wikipedia.org/wiki/Principle_of_maximum_entropy">principle of maximum entropy</a>. A lot of the theory of this approach was developed by <a href="http://en.wikipedia.org/wiki/Edwin_Thompson_Jaynes">Ed Jaynes</a>, who according to his erstwhile collaborator and friend, John Skilling, learnt French as an undergraduate by reading the works of Laplace. Although John also related that Jaynes’s spoken French was not up to the standard of his scientific French. For me Ed Jaynes’s work very much carries on the tradition of Laplace into the modern era, in particular his focus on Bayesian approaches. I’m very proud to have met those that knew and worked with him. It turns out that Laplace’s idea also has a Bayesian interpretation (as Laplace understood), it comes from assuming a particular prior density for the parameter <span class="math inline"><em>π</em></span>, but we won’t explore that interpretation for the moment, and merely choose to estimate the probability as, <br /><span class="math display">$$
-\pi = \frac{\sum_{i=1}^{\numData} \dataScalar_i + 1}{\numData + 2}
-$$</span><br /> to prevent problems with certainty causing numerical issues and misclassifications. Let’s refit the Bernoulli features now.</p>
-<div class="sourceCode" id="cb34"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1"></a><span class="co"># fit the Bernoulli with Laplace smoothing.</span></span>
-<span id="cb34-2"><a href="#cb34-2"></a><span class="cf">for</span> column <span class="kw">in</span> X_train:</span>
-<span id="cb34-3"><a href="#cb34-3"></a>    <span class="cf">if</span> column <span class="kw">in</span> Bernoulli:</span>
-<span id="cb34-4"><a href="#cb34-4"></a>        Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>] <span class="op">=</span> (X_train[column][<span class="op">~</span>y_train].<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">1</span>)<span class="op">/</span>((<span class="op">~</span>y_train).<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">2</span>)</span>
-<span id="cb34-5"><a href="#cb34-5"></a>        Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>] <span class="op">=</span> (X_train[column][y_train].<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">1</span>)<span class="op">/</span>((y_train).<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">2</span>)</span></code></pre></div>
+\]</span> This technique is sometimes called a ‘pseudocount technique’
+because it has an intepretation of assuming some observations before you
+start, it’s as if instead of observing <span
+class="math inline">\(\sum_{i}y_i\)</span> successes you have an
+additional success, <span class="math inline">\(\sum_{i}y_i + 1\)</span>
+and instead of having observed <span class="math inline">\(n\)</span>
+events you’ve observed <span class="math inline">\(n+ 2\)</span>. So we
+can think of Laplace’s idea saying (before we start) that we have ‘two
+observations worth of belief, that the odds are 50/50’, because before
+we start (i.e. when <span class="math inline">\(n=0\)</span>) our
+estimate is 0.5, yet because the effective <span
+class="math inline">\(n\)</span> is only 2, this estimate is quickly
+overwhelmed by data. Laplace used ideas like this a lot, and it is known
+as his ‘principle of insufficient reason’. His idea was that in the
+absence of knowledge (i.e. before we start) we should assume that all
+possible outcomes are equally likely. This idea has a modern
+counterpart, known as the <a
+href="http://en.wikipedia.org/wiki/Principle_of_maximum_entropy">principle
+of maximum entropy</a>. A lot of the theory of this approach was
+developed by <a
+href="http://en.wikipedia.org/wiki/Edwin_Thompson_Jaynes">Ed Jaynes</a>,
+who according to his erstwhile collaborator and friend, John Skilling,
+learnt French as an undergraduate by reading the works of Laplace.
+Although John also related that Jaynes’s spoken French was not up to the
+standard of his scientific French. For me Ed Jaynes’s work very much
+carries on the tradition of Laplace into the modern era, in particular
+his focus on Bayesian approaches. I’m very proud to have met those that
+knew and worked with him. It turns out that Laplace’s idea also has a
+Bayesian interpretation (as Laplace understood), it comes from assuming
+a particular prior density for the parameter <span
+class="math inline">\(\pi\)</span>, but we won’t explore that
+interpretation for the moment, and merely choose to estimate the
+probability as, <span class="math display">\[
+\pi = \frac{\sum_{i=1}^{n} y_i + 1}{n+ 2}
+\]</span> to prevent problems with certainty causing numerical issues
+and misclassifications. Let’s refit the Bernoulli features now.</p>
+<div class="sourceCode" id="cb34"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="co"># fit the Bernoulli with Laplace smoothing.</span></span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> column <span class="kw">in</span> X_train:</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> column <span class="kw">in</span> Bernoulli:</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>        Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>] <span class="op">=</span> (X_train[column][<span class="op">~</span>y_train].<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">1</span>)<span class="op">/</span>((<span class="op">~</span>y_train).<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">2</span>)</span>
+<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>        Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>] <span class="op">=</span> (X_train[column][y_train].<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">1</span>)<span class="op">/</span>((y_train).<span class="bu">sum</span>() <span class="op">+</span> <span class="dv">2</span>)</span></code></pre></div>
 <p>That places us in a position to write the prediction function.</p>
-<div class="sourceCode" id="cb35"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb35-2"><a href="#cb35-2"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
-<div class="sourceCode" id="cb36"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1"></a><span class="kw">def</span> predict(X_test, Gaussian, Bernoulli, prior):</span>
-<span id="cb36-2"><a href="#cb36-2"></a>    log_positive <span class="op">=</span> pd.Series(data <span class="op">=</span> np.zeros(X_test.shape[<span class="dv">0</span>]), index<span class="op">=</span>X_test.index)</span>
-<span id="cb36-3"><a href="#cb36-3"></a>    log_negative <span class="op">=</span> pd.Series(data <span class="op">=</span> np.zeros(X_test.shape[<span class="dv">0</span>]), index<span class="op">=</span>X_test.index)</span>
-<span id="cb36-4"><a href="#cb36-4"></a>    <span class="cf">for</span> column <span class="kw">in</span> X_test.columns:</span>
-<span id="cb36-5"><a href="#cb36-5"></a>        <span class="cf">if</span> column <span class="kw">in</span> Gaussian:</span>
-<span id="cb36-6"><a href="#cb36-6"></a>            log_positive <span class="op">+=</span> log_gaussian(X_test[column], Gaussian[column][<span class="st">&#39;mu_1&#39;</span>], Gaussian[column][<span class="st">&#39;sigma2_1&#39;</span>])</span>
-<span id="cb36-7"><a href="#cb36-7"></a>            log_negative <span class="op">+=</span> log_gaussian(X_test[column], Gaussian[column][<span class="st">&#39;mu_0&#39;</span>], Gaussian[column][<span class="st">&#39;sigma2_0&#39;</span>])</span>
-<span id="cb36-8"><a href="#cb36-8"></a>        <span class="cf">elif</span> column <span class="kw">in</span> Bernoulli:</span>
-<span id="cb36-9"><a href="#cb36-9"></a>            log_positive <span class="op">+=</span> log_bernoulli(X_test[column], Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>])</span>
-<span id="cb36-10"><a href="#cb36-10"></a>            log_negative <span class="op">+=</span> log_bernoulli(X_test[column], Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>])</span>
-<span id="cb36-11"><a href="#cb36-11"></a>            </span>
-<span id="cb36-12"><a href="#cb36-12"></a>    v <span class="op">=</span> np.zeros_like(log_positive.values)</span>
-<span id="cb36-13"><a href="#cb36-13"></a>    <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(X_test.shape[<span class="dv">0</span>]):</span>
-<span id="cb36-14"><a href="#cb36-14"></a>        v[i] <span class="op">=</span> np.exp(log_positive.values[i] <span class="op">+</span> np.log(prior))<span class="op">/</span>(np.exp(log_positive.values[i] <span class="op">+</span> np.log(prior)) </span>
-<span id="cb36-15"><a href="#cb36-15"></a>                                                               <span class="op">+</span> np.exp(log_negative.values[i] <span class="op">+</span> np.log(<span class="dv">1</span><span class="op">-</span>prior)))</span>
-<span id="cb36-16"><a href="#cb36-16"></a>    <span class="cf">return</span> v</span>
-<span id="cb36-17"><a href="#cb36-17"></a>    <span class="co">#return np.exp(log_positive + np.log(prior))/(np.exp(log_positive + np.log(prior)) + np.exp(log_negative + np.log(1-prior)))</span></span></code></pre></div>
-<p>Now we are in a position to make the predictions for the test data.</p>
-<div class="sourceCode" id="cb37"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1"></a>p_y <span class="op">=</span> predict(X_test, Gaussian, Bernoulli, prior)</span></code></pre></div>
-<p>We can test the quality of the predictions in the following way. Firstly, we can threshold our probabilities at 0.5, allocating points with greater than 50% probability of membership of the positive class to the positive class. We can then compare to the true values, and see how many of these values we got correct. This is our total number correct.</p>
-<div class="sourceCode" id="cb38"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1"></a>correct <span class="op">=</span> y_test.eq(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)</span>
-<span id="cb38-2"><a href="#cb38-2"></a>total_correct <span class="op">=</span> <span class="bu">sum</span>(correct)</span>
-<span id="cb38-3"><a href="#cb38-3"></a><span class="bu">print</span>(<span class="st">&quot;Total correct&quot;</span>, total_correct, <span class="st">&quot; out of &quot;</span>, <span class="bu">len</span>(y_test), <span class="st">&quot;which is&quot;</span>, <span class="bu">float</span>(total_correct)<span class="op">/</span><span class="bu">len</span>(y_test), <span class="st">&quot;%&quot;</span>)</span></code></pre></div>
-<p>We can also now plot the <a href="http://en.wikipedia.org/wiki/Confusion_matrix">confusion matrix</a>. A confusion matrix tells us where we are making mistakes. Along the diagonal it stores the <em>true positives</em>, the points that were positive class that we classified correctly, and the <em>true negatives</em>, the points that were negative class and that we classified correctly. The off diagonal terms contain the false positives and the false negatives. Along the rows of the matrix we place the actual class, and along the columns we place our predicted class.</p>
-<div class="sourceCode" id="cb39"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1"></a>confusion_matrix <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">2</span>,<span class="dv">2</span>)), </span>
-<span id="cb39-2"><a href="#cb39-2"></a>                                columns<span class="op">=</span>[<span class="st">&#39;predicted no maternity&#39;</span>, <span class="st">&#39;predicted maternity&#39;</span>],</span>
-<span id="cb39-3"><a href="#cb39-3"></a>                                index <span class="op">=</span>[<span class="st">&#39;actual no maternity&#39;</span>,<span class="st">&#39;actual maternity&#39;</span>])</span>
-<span id="cb39-4"><a href="#cb39-4"></a>confusion_matrix[<span class="st">&#39;predicted maternity&#39;</span>][<span class="st">&#39;actual maternity&#39;</span>] <span class="op">=</span> (y_test <span class="op">&amp;</span> (p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
-<span id="cb39-5"><a href="#cb39-5"></a>confusion_matrix[<span class="st">&#39;predicted maternity&#39;</span>][<span class="st">&#39;actual no maternity&#39;</span>] <span class="op">=</span> (<span class="op">~</span>y_test <span class="op">&amp;</span> (p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
-<span id="cb39-6"><a href="#cb39-6"></a>confusion_matrix[<span class="st">&#39;predicted no maternity&#39;</span>][<span class="st">&#39;actual maternity&#39;</span>] <span class="op">=</span> (y_test <span class="op">&amp;</span> <span class="op">~</span>(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
-<span id="cb39-7"><a href="#cb39-7"></a>confusion_matrix[<span class="st">&#39;predicted no maternity&#39;</span>][<span class="st">&#39;actual no maternity&#39;</span>] <span class="op">=</span> (<span class="op">~</span>y_test <span class="op">&amp;</span> <span class="op">~</span>(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
-<span id="cb39-8"><a href="#cb39-8"></a>confusion_matrix</span></code></pre></div>
+<div class="sourceCode" id="cb35"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
+<div class="sourceCode" id="cb36"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> predict(X_test, Gaussian, Bernoulli, prior):</span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>    log_positive <span class="op">=</span> pd.Series(data <span class="op">=</span> np.zeros(X_test.shape[<span class="dv">0</span>]), index<span class="op">=</span>X_test.index)</span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>    log_negative <span class="op">=</span> pd.Series(data <span class="op">=</span> np.zeros(X_test.shape[<span class="dv">0</span>]), index<span class="op">=</span>X_test.index)</span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> column <span class="kw">in</span> X_test.columns:</span>
+<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">if</span> column <span class="kw">in</span> Gaussian:</span>
+<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a>            log_positive <span class="op">+=</span> log_gaussian(X_test[column], Gaussian[column][<span class="st">&#39;mu_1&#39;</span>], Gaussian[column][<span class="st">&#39;sigma2_1&#39;</span>])</span>
+<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a>            log_negative <span class="op">+=</span> log_gaussian(X_test[column], Gaussian[column][<span class="st">&#39;mu_0&#39;</span>], Gaussian[column][<span class="st">&#39;sigma2_0&#39;</span>])</span>
+<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a>        <span class="cf">elif</span> column <span class="kw">in</span> Bernoulli:</span>
+<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a>            log_positive <span class="op">+=</span> log_bernoulli(X_test[column], Bernoulli[column][<span class="st">&#39;theta_1&#39;</span>])</span>
+<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a>            log_negative <span class="op">+=</span> log_bernoulli(X_test[column], Bernoulli[column][<span class="st">&#39;theta_0&#39;</span>])</span>
+<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a>            </span>
+<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a>    v <span class="op">=</span> np.zeros_like(log_positive.values)</span>
+<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(X_test.shape[<span class="dv">0</span>]):</span>
+<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a>        v[i] <span class="op">=</span> np.exp(log_positive.values[i] <span class="op">+</span> np.log(prior))<span class="op">/</span>(np.exp(log_positive.values[i] <span class="op">+</span> np.log(prior)) </span>
+<span id="cb36-15"><a href="#cb36-15" aria-hidden="true" tabindex="-1"></a>                                                               <span class="op">+</span> np.exp(log_negative.values[i] <span class="op">+</span> np.log(<span class="dv">1</span><span class="op">-</span>prior)))</span>
+<span id="cb36-16"><a href="#cb36-16" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> v</span>
+<span id="cb36-17"><a href="#cb36-17" aria-hidden="true" tabindex="-1"></a>    <span class="co">#return np.exp(log_positive + np.log(prior))/(np.exp(log_positive + np.log(prior)) + np.exp(log_negative + np.log(1-prior)))</span></span></code></pre></div>
+<p>Now we are in a position to make the predictions for the test
+data.</p>
+<div class="sourceCode" id="cb37"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>p_y <span class="op">=</span> predict(X_test, Gaussian, Bernoulli, prior)</span></code></pre></div>
+<p>We can test the quality of the predictions in the following way.
+Firstly, we can threshold our probabilities at 0.5, allocating points
+with greater than 50% probability of membership of the positive class to
+the positive class. We can then compare to the true values, and see how
+many of these values we got correct. This is our total number
+correct.</p>
+<div class="sourceCode" id="cb38"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>correct <span class="op">=</span> y_test.eq(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)</span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>total_correct <span class="op">=</span> <span class="bu">sum</span>(correct)</span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;Total correct&quot;</span>, total_correct, <span class="st">&quot; out of &quot;</span>, <span class="bu">len</span>(y_test), <span class="st">&quot;which is&quot;</span>, <span class="bu">float</span>(total_correct)<span class="op">/</span><span class="bu">len</span>(y_test), <span class="st">&quot;%&quot;</span>)</span></code></pre></div>
+<p>We can also now plot the <a
+href="http://en.wikipedia.org/wiki/Confusion_matrix">confusion
+matrix</a>. A confusion matrix tells us where we are making mistakes.
+Along the diagonal it stores the <em>true positives</em>, the points
+that were positive class that we classified correctly, and the <em>true
+negatives</em>, the points that were negative class and that we
+classified correctly. The off diagonal terms contain the false positives
+and the false negatives. Along the rows of the matrix we place the
+actual class, and along the columns we place our predicted class.</p>
+<div class="sourceCode" id="cb39"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>confusion_matrix <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.zeros((<span class="dv">2</span>,<span class="dv">2</span>)), </span>
+<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>                                columns<span class="op">=</span>[<span class="st">&#39;predicted no maternity&#39;</span>, <span class="st">&#39;predicted maternity&#39;</span>],</span>
+<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a>                                index <span class="op">=</span>[<span class="st">&#39;actual no maternity&#39;</span>,<span class="st">&#39;actual maternity&#39;</span>])</span>
+<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>confusion_matrix[<span class="st">&#39;predicted maternity&#39;</span>][<span class="st">&#39;actual maternity&#39;</span>] <span class="op">=</span> (y_test <span class="op">&amp;</span> (p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
+<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a>confusion_matrix[<span class="st">&#39;predicted maternity&#39;</span>][<span class="st">&#39;actual no maternity&#39;</span>] <span class="op">=</span> (<span class="op">~</span>y_test <span class="op">&amp;</span> (p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
+<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a>confusion_matrix[<span class="st">&#39;predicted no maternity&#39;</span>][<span class="st">&#39;actual maternity&#39;</span>] <span class="op">=</span> (y_test <span class="op">&amp;</span> <span class="op">~</span>(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
+<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a>confusion_matrix[<span class="st">&#39;predicted no maternity&#39;</span>][<span class="st">&#39;actual no maternity&#39;</span>] <span class="op">=</span> (<span class="op">~</span>y_test <span class="op">&amp;</span> <span class="op">~</span>(p_y<span class="op">&gt;</span><span class="fl">0.5</span>)).<span class="bu">sum</span>()</span>
+<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a>confusion_matrix</span></code></pre></div>
 <h3 id="exercise-5">Exercise 5</h3>
-<p>How can you improve your classification, are all the features equally valid? Are some features more helpful than others? What happens if you remove features that appear to be less helpful. How might you select such features?</p>
+<p>How can you improve your classification, are all the features equally
+valid? Are some features more helpful than others? What happens if you
+remove features that appear to be less helpful. How might you select
+such features?</p>
 <h3 id="exercise-6">Exercise 6</h3>
-<p>We have decided to classify positive if probability of maternity is greater than 0.5. This has led us to accidentally classify some facilities as havien’t facilities for maternity when in fact they don’t. Imagine you wish to ensure that a facility handles maternity. With your test set how low do you have to set the threshold to avoid all the false negatives (i.e. facilities where you predicted there was no maternity, but in actuality there were?</p>
+<p>We have decided to classify positive if probability of maternity is
+greater than 0.5. This has led us to accidentally classify some
+facilities as havien’t facilities for maternity when in fact they don’t.
+Imagine you wish to ensure that a facility handles maternity. With your
+test set how low do you have to set the threshold to avoid all the false
+negatives (i.e. facilities where you predicted there was no maternity,
+but in actuality there were?</p>
 <h2 id="making-predictions-1">Making Predictions</h2>
-<p>Naive Bayes has given us the class conditional densities: <span class="math inline">$p(\inputVector_i | \dataScalar_i, \paramVector)$</span>. To make predictions with these densities we need to form the distribution given by <br /><span class="math display">$$
-P(\dataScalar^*| \dataVector, \inputMatrix, \inputVector^*, \paramVector)
-$$</span><br /></p>
+<p>Naive Bayes has given us the class conditional densities: <span
+class="math inline">\(p(\mathbf{ x}_i | y_i, \boldsymbol{
+\theta})\)</span>. To make predictions with these densities we need to
+form the distribution given by <span class="math display">\[
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta})
+\]</span></p>
 <h3 id="exercise-7">Exercise 7</h3>
-<p>Write down the negative log likelihood of the Gaussian density over a vector of variables <span class="math inline">$\inputVector$</span>. Assume independence between each variable. Minimize this objective to obtain the maximum likelihood solution of the form. <br /><span class="math display">$$
-\mu = \frac{\sum_{i=1}^{\numData} \inputScalar_i}{\numData}
-$$</span><br /> <br /><span class="math display">$$
-\dataStd^2 = \frac{\sum_{i=1}^{\numData} (\inputScalar_i - \mu)^2}{\numData}
-$$</span><br /></p>
-<p>If the input data was <em>binary</em> then we could also make use of the Bernoulli distribution for the features. For that case we would have the form, <br /><span class="math display">$$
-p(\inputScalar_{i, j} | \dataScalar_i,\paramVector) = \theta_{\dataScalar_i, j}^{\inputScalar_{i, j}}(1-\theta_{\dataScalar_i, j})^{(1-\inputScalar_{i,j})},
-$$</span><br /> where <span class="math inline"><em>θ</em><sub>1, <em>j</em></sub></span> is the probability that the <span class="math inline"><em>j</em></span>th feature is on if <span class="math inline">$\dataScalar_i$</span> is 1.</p>
-<p>In either case, maximum likelihood fitting would proceed in the same way. The objective has the form, <br /><span class="math display">$$
-\errorFunction(\paramVector) = -\sum_{j=1}^{\dataDim} \sum_{i=1}^{\numData} \log p(\inputScalar_{i,j} |\dataScalar_i, \paramVector),
-$$</span><br /> and if, as above, the parameters of the distributions are specific to each feature vector (we had means and variances for each continuous feature, and a probability for each binary feature) then we can use the fact that these parameters separate into disjoint subsets across the features to write, <br /><span class="math display">$$
+<p>Write down the negative log likelihood of the Gaussian density over a
+vector of variables <span class="math inline">\(\mathbf{ x}\)</span>.
+Assume independence between each variable. Minimize this objective to
+obtain the maximum likelihood solution of the form. <span
+class="math display">\[
+\mu = \frac{\sum_{i=1}^{n} x_i}{n}
+\]</span> <span class="math display">\[
+\sigma^2 = \frac{\sum_{i=1}^{n} (x_i - \mu)^2}{n}
+\]</span></p>
+<p>If the input data was <em>binary</em> then we could also make use of
+the Bernoulli distribution for the features. For that case we would have
+the form, <span class="math display">\[
+p(x_{i, j} | y_i,\boldsymbol{ \theta}) = \theta_{y_i, j}^{x_{i,
+j}}(1-\theta_{y_i, j})^{(1-x_{i,j})},
+\]</span> where <span class="math inline">\(\theta_{1, j}\)</span> is
+the probability that the <span class="math inline">\(j\)</span>th
+feature is on if <span class="math inline">\(y_i\)</span> is 1.</p>
+<p>In either case, maximum likelihood fitting would proceed in the same
+way. The objective has the form, <span class="math display">\[
+E(\boldsymbol{ \theta}) = -\sum_{j=1}^{p} \sum_{i=1}^{n} \log p(x_{i,j}
+|y_i, \boldsymbol{ \theta}),
+\]</span> and if, as above, the parameters of the distributions are
+specific to each feature vector (we had means and variances for each
+continuous feature, and a probability for each binary feature) then we
+can use the fact that these parameters separate into disjoint subsets
+across the features to write, <span class="math display">\[
 \begin{align*}
-\errorFunction(\paramVector) &amp;= -\sum_{j=1}^{\dataDim} \sum_{i=1}^{\numData} \log
-p(\inputScalar_{i,j} |\dataScalar_i, \paramVector_j)\\
-&amp; \sum_{j=1}^{\dataDim}
-\errorFunction(\paramVector_j),
+E(\boldsymbol{ \theta}) &amp;= -\sum_{j=1}^{p} \sum_{i=1}^{n} \log
+p(x_{i,j} |y_i, \boldsymbol{ \theta}_j)\\
+&amp; \sum_{j=1}^{p}
+E(\boldsymbol{ \theta}_j),
 \end{align*}
-$$</span><br /> which means we can minimize our objective on each feature independently.</p>
-<p>These characteristics mean that naive Bayes scales very well with big data. To fit the model we consider each feature in turn, we select the positive class and fit parameters for that class, then we select each negative class and fit features for that class. We have code below.</p>
+\]</span> which means we can minimize our objective on each feature
+independently.</p>
+<p>These characteristics mean that naive Bayes scales very well with big
+data. To fit the model we consider each feature in turn, we select the
+positive class and fit parameters for that class, then we select each
+negative class and fit features for that class. We have code below.</p>
 <h2 id="naive-bayes-summary">Naive Bayes Summary</h2>
-<p>Naive Bayes is making very simple assumptions about the data, in particular it is modeling the full <em>joint</em> probability of the data set, <span class="math inline">$p(\dataVector, \inputMatrix | \paramVector, \pi)$</span> by very strong assumptions about factorizations that are unlikely to be true in practice. The data conditional independence assumption is common, and relies on a rich parameter vector to absorb all the information in the training data. The additional assumption of naive Bayes is that features are conditional independent given the class label <span class="math inline">$\dataScalar_i$</span> (and the parameter vector, <span class="math inline">$\paramVector$</span>. This is quite a strong assumption. However, it causes the objective function to decompose into parts which can be independently fitted to the different feature vectors, meaning it is very easy to fit the model to large data. It is also clear how we should handle <em>streaming</em> data and <em>missing</em> data. This means that the model can be run ‘live’, adapting parameters and information as it arrives. Indeed, the model is even capable of dealing with new <em>features</em> that might arrive at run time. Such is the strength of the modeling the joint probability density. However, the factorization assumption that allows us to do this efficiently is very strong and may lead to poor decision boundaries in practice.</p>
+<p>Naive Bayes is making very simple assumptions about the data, in
+particular it is modeling the full <em>joint</em> probability of the
+data set, <span class="math inline">\(p(\mathbf{ y}, \mathbf{X}|
+\boldsymbol{ \theta}, \pi)\)</span> by very strong assumptions about
+factorizations that are unlikely to be true in practice. The data
+conditional independence assumption is common, and relies on a rich
+parameter vector to absorb all the information in the training data. The
+additional assumption of naive Bayes is that features are conditional
+independent given the class label <span
+class="math inline">\(y_i\)</span> (and the parameter vector, <span
+class="math inline">\(\boldsymbol{ \theta}\)</span>. This is quite a
+strong assumption. However, it causes the objective function to
+decompose into parts which can be independently fitted to the different
+feature vectors, meaning it is very easy to fit the model to large data.
+It is also clear how we should handle <em>streaming</em> data and
+<em>missing</em> data. This means that the model can be run ‘live’,
+adapting parameters and information as it arrives. Indeed, the model is
+even capable of dealing with new <em>features</em> that might arrive at
+run time. Such is the strength of the modeling the joint probability
+density. However, the factorization assumption that allows us to do this
+efficiently is very strong and may lead to poor decision boundaries in
+practice.</p>
 <h2 id="other-reading">Other Reading</h2>
 <ul>
-<li>Chapter 5 of <span class="citation" data-cites="Rogers:book11">Rogers and Girolami (2011)</span> up to pg 179 (Section 5.1, and 5.2 up to 5.2.2).</li>
+<li>Chapter 5 of <span class="citation"
+data-cites="Rogers:book11">Rogers and Girolami (2011)</span> up to pg
+179 (Section 5.1, and 5.2 up to 5.2.2).</li>
 </ul>
 <h1 id="references">References</h1>
 <h2 id="thanks">Thanks!</h2>
-<p>For more information on these subjects and more you might want to check the following resources.</p>
+<p>For more information on these subjects and more you might want to
+check the following resources.</p>
 <ul>
 <li>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></li>
-<li>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></li>
-<li>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></li>
-<li>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
+<li>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></li>
+<li>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></li>
+<li>blog: <a
+href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
 </ul>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Bishop:book06">
-<p>Bishop, Christopher M. 2006. <em>Pattern Recognition and Machine Learning</em>. springer.</p>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Bishop:book06" class="csl-entry" role="listitem">
+Bishop, C.M., 2006. Pattern recognition and machine learning. springer.
+</div>
+<div id="ref-Pearl:causality95" class="csl-entry" role="listitem">
+Pearl, J., 1995. From <span>B</span>ayesian networks to causal networks,
+in: Gammerman, A. (Ed.), Probabilistic Reasoning and
+<span>B</span>ayesian Belief Networks. Alfred Waller, pp. 1–31.
 </div>
-<div id="ref-Pearl:causality95">
-<p>Pearl, Judea. 1995. “From Bayesian Networks to Causal Networks.” In <em>Probabilistic Reasoning and Bayesian Belief Networks</em>, edited by A. Gammerman, 1–31. Alfred Waller.</p>
+<div id="ref-Rogers:book11" class="csl-entry" role="listitem">
+Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC
+Press.
 </div>
-<div id="ref-Rogers:book11">
-<p>Rogers, Simon, and Mark Girolami. 2011. <em>A First Course in Machine Learning</em>. CRC Press.</p>
+<div id="ref-Steele:predictive12" class="csl-entry" role="listitem">
+Steele, S., Bilchik, A., Eberhardt, J., Kalina, P., Nissan, A., Johnson,
+E., Avital, I., Stojadinovic, A., 2012. Using machine-learned
+<span>B</span>ayesian belief networks to predict perioperative risk of
+clostridium difficile infection following colon surgery. Interact J Med
+Res 1, e6. <a
+href="https://doi.org/10.2196/ijmr.2131">https://doi.org/10.2196/ijmr.2131</a>
 </div>
-<div id="ref-Steele:predictive12">
-<p>Steele, S, A Bilchik, J Eberhardt, P Kalina, A Nissan, E Johnson, I Avital, and A Stojadinovic. 2012. “Using Machine-Learned Bayesian Belief Networks to Predict Perioperative Risk of Clostridium Difficile Infection Following Colon Surgery.” <em>Interact J Med Res</em> 1 (2): e6. <a href="https://doi.org/10.2196/ijmr.2131">https://doi.org/10.2196/ijmr.2131</a>.</p>
+<div id="ref-Nigeria-nmis14" class="csl-entry" role="listitem">
+The Office of the Senior Special Assistant to the President on the
+Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.
+Nigeria <span>NMIS</span> facility database.
 </div>
 </div>
 
diff --git a/_lectures/04-gaussian-processes.html b/_lectures/04-gaussian-processes.html
index eb5d33c..dd4c48b 100644
--- a/_lectures/04-gaussian-processes.html
+++ b/_lectures/04-gaussian-processes.html
@@ -1,22 +1,24 @@
 ---
 title: "Gaussian Processes"
 venue: "Virtual Data Science Nigeria"
-abstract: "<p>Classical machine learning and statistical approaches to learning, such as neural networks and linear regression, assume a parametric form for functions. Gaussian process models are an alternative approach that assumes a probabilistic prior over functions. This brings benefits, in that uncertainty of function estimation is sustained throughout inference, and some challenges: algorithms for fitting Gaussian processes tend to be more complex than parametric models. In this sessions I will introduce Gaussian processes and explain why sustaining uncertainty is important.</p>"
-author:
-- given: Neil D.
-  family: Lawrence
-  url: http://inverseprobability.com
-  institute: 
-  twitter: lawrennd
-  gscholar: r3SJcvoAAAAJ
-  orcid: 0000-0001-9258-1030
+abstract: "<p>Classical machine learning and statistical approaches to
+learning, such as neural networks and linear regression, assume a
+parametric form for functions. Gaussian process models are an
+alternative approach that assumes a probabilistic prior over functions.
+This brings benefits, in that uncertainty of function estimation is
+sustained throughout inference, and some challenges: algorithms for
+fitting Gaussian processes tend to be more complex than parametric
+models. In this sessions I will introduce Gaussian processes and explain
+why sustaining uncertainty is important.</p>"
+edit_url: https://github.com/mlatcl/dsa/edit/gh-pages/_lamd/gaussian-processes.md
 date: 2020-11-13
 published: 2020-11-13
 time: "15:00 (West Africa Standard Time)"
-week: 0
 session: 4
 reveal: 04-gaussian-processes.slides.html
+transition: None
 ipynb: 04-gaussian-processes.ipynb
+pptx: 04-gaussian-processes.pptx
 layout: lecture
 categories:
 - notes
@@ -33,54 +35,142 @@
 
 -->
 <h2 id="setup">Setup</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_mlai/includes/mlai-notebook-setup.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_mlai/includes/mlai-notebook-setup.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>First we download some libraries and files to support the notebook.</p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="im">import</span> urllib.request</span></code></pre></div>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py&#39;</span>,<span class="st">&#39;mlai.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/teaching_plots.py&#39;</span>,<span class="st">&#39;teaching_plots.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/gp_tutorial.py&#39;</span>,<span class="st">&#39;gp_tutorial.py&#39;</span>)</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<!--setupplotcode{import seaborn as sns
+sns.set_style('darkgrid')
+sns.set_context('paper')
+sns.set_palette('colorblind')}-->
+<h2 id="notutils">notutils</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>This small package is a helper package for various notebook utilities
+used below.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install notutils</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/notutils"
+class="uri">https://github.com/lawrennd/notutils</a></p>
+<p>Once <code>notutils</code> is installed, it can be imported in the
+usual manner.</p>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> notutils</span></code></pre></div>
 <h2 id="pods">pods</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_data-science/includes/pods-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_data-science/includes/pods-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>In Sheffield we created a suite of software tools for ‘Open Data Science’. Open data science is an approach to sharing code, models and data that should make it easier for companies, health professionals and scientists to gain access to data science techniques.</p>
-<p>You can also check this blog post on <a href="http://inverseprobability.com/2014/07/01/open-data-science">Open Data Science</a>.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In Sheffield we created a suite of software tools for ‘Open Data
+Science’. Open data science is an approach to sharing code, models and
+data that should make it easier for companies, health professionals and
+scientists to gain access to data science techniques.</p>
+<p>You can also check this blog post on <a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a>.</p>
+<p>The software can be installed using</p>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install pods</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/ods"
+class="uri">https://github.com/lawrennd/ods</a></p>
+<p>Once <code>pods</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<h2 id="mlai">mlai</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The <code>mlai</code> software is a suite of helper functions for
+teaching and demonstrating machine learning algorithms. It was first
+used in the Machine Learning and Adaptive Intelligence course in
+Sheffield in 2013.</p>
 <p>The software can be installed using</p>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a><span class="op">%</span>pip install <span class="op">--</span>upgrade git<span class="op">+</span>https:<span class="op">//</span>github.com<span class="op">/</span>sods<span class="op">/</span>ods</span></code></pre></div>
-<p>from the command prompt where you can access your python installation.</p>
-<p>The code is also available on github: <a href="https://github.com/sods/ods" class="uri">https://github.com/sods/ods</a></p>
-<p>Once <code>pods</code> is installed, it can be imported in the usual manner.</p>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install mlai</span></code></pre></div>
+<p>from the command prompt where you can access your python
+installation.</p>
+<p>The code is also available on GitHub: <a
+href="https://github.com/lawrennd/mlai"
+class="uri">https://github.com/lawrennd/mlai</a></p>
+<p>Once <code>mlai</code> is installed, it can be imported in the usual
+manner.</p>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
 <div class="figure">
-<div id="gaussian-processes-for-machine-learning-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gp/rasmussen-williams-book.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div id="gaussian-processes-for-machine-learning-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/rasmussen-williams-book.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="gaussian-processes-for-machine-learning-magnify" class="magnify" onclick="magnifyFigure(&#39;gaussian-processes-for-machine-learning&#39;)">
+<div id="gaussian-processes-for-machine-learning-magnify"
+class="magnify"
+onclick="magnifyFigure(&#39;gaussian-processes-for-machine-learning&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="gaussian-processes-for-machine-learning-caption" class="caption-frame">
-<p>Figure: A key reference for Gaussian process models remains the excellent book “Gaussian Processes for Machine Learning” (<span class="citation" data-cites="Rasmussen:book06">Rasmussen and Williams (2006)</span>). The book is also <a href="http://www.gaussianprocess.org/gpml/" target="_blank" >freely available online</a>.</p>
-</div>
+<div id="gaussian-processes-for-machine-learning-caption"
+class="caption-frame">
+<p>Figure: A key reference for Gaussian process models remains the
+excellent book “Gaussian Processes for Machine Learning” (<span
+class="citation" data-cites="Rasmussen:book06">Rasmussen and Williams
+(2006)</span>). The book is also
+<a href="http://www.gaussianprocess.org/gpml/" target="_blank">freely
+available online</a>.</p>
+</div>
+</div>
+<p><span class="citation" data-cites="Rasmussen:book06">Rasmussen and
+Williams (2006)</span> is still one of the most important references on
+Gaussian process models. It is <a
+href="http://www.gaussianprocess.org/gpml/">available freely
+online</a>.</p>
+<h2 id="a-first-course-in-machine-learning">A First Course in Machine
+Learning</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/first-course-book.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/first-course-book.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
 </div>
-<p><span class="citation" data-cites="Rasmussen:book06">Rasmussen and Williams (2006)</span> is still one of the most important references on Gaussian process models. It is <a href="http://www.gaussianprocess.org/gpml/">available freely online</a>.</p>
-<h2 id="a-first-course-in-machine-learning">A First Course in Machine Learning</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/first-course-book.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/first-course-book.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
 <div class="figure">
-<div id="a-first-course-in-machine-learning-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/mlai/a-first-course-in-machine-learning.jpg" width="40%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div id="a-first-course-in-machine-learning-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//mlai/a-first-course-in-machine-learning.jpg" width="40%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="a-first-course-in-machine-learning-magnify" class="magnify" onclick="magnifyFigure(&#39;a-first-course-in-machine-learning&#39;)">
+<div id="a-first-course-in-machine-learning-magnify" class="magnify"
+onclick="magnifyFigure(&#39;a-first-course-in-machine-learning&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="a-first-course-in-machine-learning-caption" class="caption-frame">
-<p>Figure: The main course text is “A First Course in Machine Learning” by <span class="citation" data-cites="Rogers:book11">Rogers and Girolami (2011)</span>.</p>
+<div id="a-first-course-in-machine-learning-caption"
+class="caption-frame">
+<p>Figure: The main course text is “A First Course in Machine Learning”
+by <span class="citation" data-cites="Rogers:book11">Rogers and Girolami
+(2011)</span>.</p>
 </div>
 </div>
 <!--include{_gp/includes/what-is-a-gp.md}-->
-<h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction of Malaria Incidence in Uganda</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_health/includes/malaria-gp.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_health/includes/malaria-gp.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<h2 id="example-prediction-of-malaria-incidence-in-uganda">Example:
+Prediction of Malaria Incidence in Uganda</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip0">
@@ -93,7 +183,7 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <title>
 Martin Mubangizi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/martin-mubangizi.png" clip-path="url(#clip0)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png" clip-path="url(#clip0)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip1">
@@ -104,9 +194,9 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
-Ricardo Andrade Pacheco
+Ricardo Andrade Pacecho
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip2">
@@ -119,61 +209,113 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <title>
 John Quinn
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/john-quinn.jpg" clip-path="url(#clip2)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg" clip-path="url(#clip2)"/>
 </svg>
 </div>
-<p>As an example of using Gaussian process models within the full pipeline from data to decsion, we’ll consider the prediction of Malaria incidence in Uganda. For the purposes of this study malaria reports come in two forms, HMIS reports from health centres and Sentinel data, which is curated by the WHO. There are limited sentinel sites and many HMIS sites.</p>
-<p>The work is from Ricardo Andrade Pacheco’s PhD thesis, completed in collaboration with John Quinn and Martin Mubangizi <span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al. 2014; Mubangizi et al. 2014)</span>. John and Martin were initally from the AI-DEV group from the University of Makerere in Kampala and more latterly they were based at UN Global Pulse in Kampala.</p>
-<p>Malaria data is spatial data. Uganda is split into districts, and health reports can be found for each district. This suggests that models such as conditional random fields could be used for spatial modelling, but there are two complexities with this. First of all, occasionally districts split into two. Secondly, sentinel sites are a specific location within a district, such as Nagongera which is a sentinel site based in the Tororo district.</p>
+<p>As an example of using Gaussian process models within the full
+pipeline from data to decsion, we’ll consider the prediction of Malaria
+incidence in Uganda. For the purposes of this study malaria reports come
+in two forms, HMIS reports from health centres and Sentinel data, which
+is curated by the WHO. There are limited sentinel sites and many HMIS
+sites.</p>
+<p>The work is from Ricardo Andrade Pacheco’s PhD thesis, completed in
+collaboration with John Quinn and Martin Mubangizi <span
+class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>. John and Martin were
+initally from the AI-DEV group from the University of Makerere in
+Kampala and more latterly they were based at UN Global Pulse in Kampala.
+You can see the work summarized on the UN Global Pulse <a
+href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">disease
+outbreaks project site here</a>.</p>
+<ul>
+<li>See <a href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">UN
+Global Pulse Disease Outbreaks Site</a></li>
+</ul>
+<p>Malaria data is spatial data. Uganda is split into districts, and
+health reports can be found for each district. This suggests that models
+such as conditional random fields could be used for spatial modelling,
+but there are two complexities with this. First of all, occasionally
+districts split into two. Secondly, sentinel sites are a specific
+location within a district, such as Nagongera which is a sentinel site
+based in the Tororo district.</p>
 <div class="figure">
 <div id="uganda-districts-2006-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="uganda-districts-2006-magnify" class="magnify" onclick="magnifyFigure(&#39;uganda-districts-2006&#39;)">
+<div id="uganda-districts-2006-magnify" class="magnify"
+onclick="magnifyFigure(&#39;uganda-districts-2006&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="uganda-districts-2006-caption" class="caption-frame">
-<p>Figure: Ugandan districs. Data SRTM/NASA from <a href="https://dds.cr.usgs.gov/srtm/version2_1" class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.</p>
+<p>Figure: Ugandan districts. Data SRTM/NASA from <a
+href="https://dds.cr.usgs.gov/srtm/version2_1"
+class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.</p>
+</div>
 </div>
+<div style="text-align:right">
+<span class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>
 </div>
-<p><span style="text-align:right"><span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al. 2014; Mubangizi et al. 2014)</span></span></p>
-<p>The common standard for collecting health data on the African continent is from the Health management information systems (HMIS). However, this data suffers from missing values <span class="citation" data-cites="Gething:hmis06">(Gething et al. 2006)</span> and diagnosis of diseases like typhoid and malaria may be confounded.</p>
+<p>The common standard for collecting health data on the African
+continent is from the Health management information systems (HMIS).
+However, this data suffers from missing values <span class="citation"
+data-cites="Gething:hmis06">(Gething et al., 2006)</span> and diagnosis
+of diseases like typhoid and malaria may be confounded.</p>
 <div class="figure">
 <div id="tororo-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="tororo-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;tororo-district-in-uganda&#39;)">
+<div id="tororo-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;tororo-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="tororo-district-in-uganda-caption" class="caption-frame">
-<p>Figure: The Tororo district, where the sentinel site, Nagongera, is located.</p>
+<p>Figure: The Tororo district, where the sentinel site, Nagongera, is
+located.</p>
 </div>
 </div>
-<p><a href="https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/sentinel/en/">World Health Organization Sentinel Surveillance systems</a> are set up “when high-quality data are needed about a particular disease that cannot be obtained through a passive system”. Several sentinel sites give accurate assessment of malaria disease levels in Uganda, including a site in Nagongera.</p>
+<p><a
+href="https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/sentinel/en/">World
+Health Organization Sentinel Surveillance systems</a> are set up “when
+high-quality data are needed about a particular disease that cannot be
+obtained through a passive system”. Several sentinel sites give accurate
+assessment of malaria disease levels in Uganda, including a site in
+Nagongera.</p>
 <div class="figure">
 <div id="sentinel-nagongera-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="sentinel-nagongera-magnify" class="magnify" onclick="magnifyFigure(&#39;sentinel-nagongera&#39;)">
+<div id="sentinel-nagongera-magnify" class="magnify"
+onclick="magnifyFigure(&#39;sentinel-nagongera&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="sentinel-nagongera-caption" class="caption-frame">
-<p>Figure: Sentinel and HMIS data along with rainfall and temperature for the Nagongera sentinel station in the Tororo district.</p>
-</div>
-</div>
-<p>In collaboration with the AI Research Group at Makerere we chose to investigate whether Gaussian process models could be used to assimilate information from these two different sources of disease informaton. Further, we were interested in whether local information on rainfall and temperature could be used to improve malaria estimates.</p>
-<p>The aim of the project was to use WHO Sentinel sites, alongside rainfall and temperature, to improve predictions from HMIS data of levels of malaria.</p>
+<p>Figure: Sentinel and HMIS data along with rainfall and temperature
+for the Nagongera sentinel station in the Tororo district.</p>
+</div>
+</div>
+<p>In collaboration with the AI Research Group at Makerere we chose to
+investigate whether Gaussian process models could be used to assimilate
+information from these two different sources of disease informaton.
+Further, we were interested in whether local information on rainfall and
+temperature could be used to improve malaria estimates.</p>
+<p>The aim of the project was to use WHO Sentinel sites, alongside
+rainfall and temperature, to improve predictions from HMIS data of
+levels of malaria.</p>
 <div class="figure">
 <div id="mubende-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="mubende-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;mubende-district-in-uganda&#39;)">
+<div id="mubende-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;mubende-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="mubende-district-in-uganda-caption" class="caption-frame">
@@ -183,10 +325,11 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 <div class="figure">
 <div id="malaria-prediction-mubende-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="malaria-prediction-mubende-magnify" class="magnify" onclick="magnifyFigure(&#39;malaria-prediction-mubende&#39;)">
+<div id="malaria-prediction-mubende-magnify" class="magnify"
+onclick="magnifyFigure(&#39;malaria-prediction-mubende&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="malaria-prediction-mubende-caption" class="caption-frame">
@@ -195,24 +338,27 @@ <h2 id="example-prediction-of-malaria-incidence-in-uganda">Example: Prediction o
 </div>
 <div class="figure">
 <div id="-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 <div id="-magnify" class="magnify" onclick="magnifyFigure(&#39;&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="-caption" class="caption-frame">
-<p>Figure: The project arose out of the Gaussian process summer school held at Makerere in Kampala in 2013. The school led, in turn, to the Data Science Africa initiative.</p>
+<p>Figure: The project arose out of the Gaussian process summer school
+held at Makerere in Kampala in 2013. The school led, in turn, to the
+Data Science Africa initiative.</p>
 </div>
 </div>
 <h2 id="early-warning-systems">Early Warning Systems</h2>
 <div class="figure">
 <div id="kabarole-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
-<div id="kabarole-district-in-uganda-magnify" class="magnify" onclick="magnifyFigure(&#39;kabarole-district-in-uganda&#39;)">
+<div id="kabarole-district-in-uganda-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kabarole-district-in-uganda&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kabarole-district-in-uganda-caption" class="caption-frame">
@@ -222,451 +368,1203 @@ <h2 id="early-warning-systems">Early Warning Systems</h2>
 <div class="figure">
 <div id="kabarole-disease-over-time-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="kabarole-disease-over-time-magnify" class="magnify" onclick="magnifyFigure(&#39;kabarole-disease-over-time&#39;)">
+<div id="kabarole-disease-over-time-magnify" class="magnify"
+onclick="magnifyFigure(&#39;kabarole-disease-over-time&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="kabarole-disease-over-time-caption" class="caption-frame">
-<p>Figure: Estimate of the current disease situation in the Kabarole district over time. Estimate is constructed with a Gaussian process with an additive covariance funciton.</p>
-</div>
-</div>
-<p>Health monitoring system for the Kabarole district. Here we have fitted the reports with a Gaussian process with an additive covariance function. It has two components, one is a long time scale component (in red above) the other is a short time scale component (in blue).</p>
-<p>Monitoring proceeds by considering two aspects of the curve. Is the blue line (the short term report signal) above the red (which represents the long term trend? If so we have higher than expected reports. If this is the case <em>and</em> the gradient is still positive (i.e. reports are going up) we encode this with a <em>red</em> color. If it is the case and the gradient of the blue line is negative (i.e. reports are going down) we encode this with an <em>amber</em> color. Conversely, if the blue line is below the red <em>and</em> decreasing, we color <em>green</em>. On the other hand if it is below red but increasing, we color <em>yellow</em>.</p>
-<p>This gives us an early warning system for disease. Red is a bad situation getting worse, amber is bad, but improving. Green is good and getting better and yellow good but degrading.</p>
-<p>Finally, there is a gray region which represents when the scale of the effect is small.</p>
+<p>Figure: Estimate of the current disease situation in the Kabarole
+district over time. Estimate is constructed with a Gaussian process with
+an additive covariance funciton.</p>
+</div>
+</div>
+<p>Health monitoring system for the Kabarole district. Here we have
+fitted the reports with a Gaussian process with an additive covariance
+function. It has two components, one is a long time scale component (in
+red above) the other is a short time scale component (in blue).</p>
+<p>Monitoring proceeds by considering two aspects of the curve. Is the
+blue line (the short term report signal) above the red (which represents
+the long term trend? If so we have higher than expected reports. If this
+is the case <em>and</em> the gradient is still positive (i.e. reports
+are going up) we encode this with a <em>red</em> color. If it is the
+case and the gradient of the blue line is negative (i.e. reports are
+going down) we encode this with an <em>amber</em> color. Conversely, if
+the blue line is below the red <em>and</em> decreasing, we color
+<em>green</em>. On the other hand if it is below red but increasing, we
+color <em>yellow</em>.</p>
+<p>This gives us an early warning system for disease. Red is a bad
+situation getting worse, amber is bad, but improving. Green is good and
+getting better and yellow good but degrading.</p>
+<p>Finally, there is a gray region which represents when the scale of
+the effect is small.</p>
 <div class="figure">
 <div id="early-warning-system-map-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="early-warning-system-map-magnify" class="magnify" onclick="magnifyFigure(&#39;early-warning-system-map&#39;)">
+<div id="early-warning-system-map-magnify" class="magnify"
+onclick="magnifyFigure(&#39;early-warning-system-map&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="early-warning-system-map-caption" class="caption-frame">
-<p>Figure: The map of Ugandan districts with an overview of the Malaria situation in each district.</p>
+<p>Figure: The map of Ugandan districts with an overview of the Malaria
+situation in each district.</p>
 </div>
 </div>
-<p>These colors can now be observed directly on a spatial map of the districts to give an immediate impression of the current status of the disease across the country.</p>
+<p>These colors can now be observed directly on a spatial map of the
+districts to give an immediate impression of the current status of the
+disease across the country.</p>
 <h1 id="what-is-machine-learning">What is Machine Learning?</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/what-is-ml.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>What is machine learning? At its most basic level machine learning is a combination of</p>
-<p><br /><span class="math display">$$\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}$$</span><br /></p>
-<p>where <em>data</em> is our observations. They can be actively or passively acquired (meta-data). The <em>model</em> contains our assumptions, based on previous experience. That experience can be other data, it can come from transfer learning, or it can merely be our beliefs about the regularities of the universe. In humans our models include our inductive biases. The <em>prediction</em> is an action to be taken or a categorization or a quality score. The reason that machine learning has become a mainstay of artificial intelligence is the importance of predictions in artificial intelligence. The data and the model are combined through computation.</p>
-<p>In practice we normally perform machine learning using two functions. To combine data with a model we typically make use of:</p>
-<p><strong>a prediction function</strong> a function which is used to make the predictions. It includes our beliefs about the regularities of the universe, our assumptions about how the world works, e.g. smoothness, spatial similarities, temporal similarities.</p>
-<p><strong>an objective function</strong> a function which defines the cost of misprediction. Typically it includes knowledge about the world’s generating processes (probabilistic objectives) or the costs we pay for mispredictions (empiricial risk minimization).</p>
-<p>The combination of data and model through the prediction function and the objective function leads to a <em>learning algorithm</em>. The class of prediction functions and objective functions we can make use of is restricted by the algorithms they lead to. If the prediction function or the objective function are too complex, then it can be difficult to find an appropriate learning algorithm. Much of the acdemic field of machine learning is the quest for new learning algorithms that allow us to bring different types of models and data together.</p>
-<p>A useful reference for state of the art in machine learning is the UK Royal Society Report, <a href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine Learning: Power and Promise of Computers that Learn by Example</a>.</p>
-<p>You can also check my post blog post on <a href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What is Machine Learning?</a>..</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>What is machine learning? At its most basic level machine learning is
+a combination of</p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p>where <em>data</em> is our observations. They can be actively or
+passively acquired (meta-data). The <em>model</em> contains our
+assumptions, based on previous experience. That experience can be other
+data, it can come from transfer learning, or it can merely be our
+beliefs about the regularities of the universe. In humans our models
+include our inductive biases. The <em>prediction</em> is an action to be
+taken or a categorization or a quality score. The reason that machine
+learning has become a mainstay of artificial intelligence is the
+importance of predictions in artificial intelligence. The data and the
+model are combined through computation.</p>
+<p>In practice we normally perform machine learning using two functions.
+To combine data with a model we typically make use of:</p>
+<p><strong>a prediction function</strong> it is used to make the
+predictions. It includes our beliefs about the regularities of the
+universe, our assumptions about how the world works, e.g., smoothness,
+spatial similarities, temporal similarities.</p>
+<p><strong>an objective function</strong> it defines the ‘cost’ of
+misprediction. Typically, it includes knowledge about the world’s
+generating processes (probabilistic objectives) or the costs we pay for
+mispredictions (empirical risk minimization).</p>
+<p>The combination of data and model through the prediction function and
+the objective function leads to a <em>learning algorithm</em>. The class
+of prediction functions and objective functions we can make use of is
+restricted by the algorithms they lead to. If the prediction function or
+the objective function are too complex, then it can be difficult to find
+an appropriate learning algorithm. Much of the academic field of machine
+learning is the quest for new learning algorithms that allow us to bring
+different types of models and data together.</p>
+<p>A useful reference for state of the art in machine learning is the UK
+Royal Society Report, <a
+href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine
+Learning: Power and Promise of Computers that Learn by Example</a>.</p>
+<p>You can also check my post blog post on <a
+href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What
+is Machine Learning?</a>.</p>
 <h2 id="overdetermined-system">Overdetermined System</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/overdetermined-inaugural.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/overdetermined-inaugural.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The challenge with a linear model is that it has two unknowns, <span class="math inline"><em>m</em></span>, and <span class="math inline"><em>c</em></span>. Observing data allows us to write down a system of simultaneous linear equations. So, for example if we observe two data points, the first with the input value, <span class="math inline">$\inputScalar_1 = 1$</span> and the output value, <span class="math inline">$\dataScalar_1 =3$</span> and a second data point, <span class="math inline">$\inputScalar = 3$</span>, <span class="math inline">$\dataScalar=1$</span>, then we can write two simultaneous linear equations of the form.</p>
-<p>point 1: <span class="math inline">$\inputScalar = 1$</span>, <span class="math inline">$\dataScalar=3$</span> <br /><span class="math display">3 = <em>m</em> + <em>c</em></span><br /> point 2: <span class="math inline">$\inputScalar = 3$</span>, <span class="math inline">$\dataScalar=1$</span> <br /><span class="math display">1 = 3<em>m</em> + <em>c</em></span><br /></p>
-<p>The solution to these two simultaneous equations can be represented graphically as</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-system.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-system.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The challenge with a linear model is that it has two unknowns, <span
+class="math inline">\(m\)</span>, and <span
+class="math inline">\(c\)</span>. Observing data allows us to write down
+a system of simultaneous linear equations. So, for example if we observe
+two data points, the first with the input value, <span
+class="math inline">\(x_1 = 1\)</span> and the output value, <span
+class="math inline">\(y_1 =3\)</span> and a second data point, <span
+class="math inline">\(x= 3\)</span>, <span
+class="math inline">\(y=1\)</span>, then we can write two simultaneous
+linear equations of the form.</p>
+<p>point 1: <span class="math inline">\(x= 1\)</span>, <span
+class="math inline">\(y=3\)</span> <span class="math display">\[
+3 = m + c
+\]</span> point 2: <span class="math inline">\(x= 3\)</span>, <span
+class="math inline">\(y=1\)</span> <span class="math display">\[
+1 = 3m + c
+\]</span></p>
+<p>The solution to these two simultaneous equations can be represented
+graphically as</p>
 <div class="figure">
 <div id="over-determined-system-3-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system003.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system003.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="over-determined-system-3-magnify" class="magnify" onclick="magnifyFigure(&#39;over-determined-system-3&#39;)">
+<div id="over-determined-system-3-magnify" class="magnify"
+onclick="magnifyFigure(&#39;over-determined-system-3&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="over-determined-system-3-caption" class="caption-frame">
-<p>Figure: The solution of two linear equations represented as the fit of a straight line through two data</p>
+<p>Figure: The solution of two linear equations represented as the fit
+of a straight line through two data</p>
 </div>
 </div>
-<p>The challenge comes when a third data point is observed and it doesn’t naturally fit on the straight line.</p>
-<p>point 3: <span class="math inline">$\inputScalar = 2$</span>, <span class="math inline">$\dataScalar=2.5$</span> <br /><span class="math display">2.5 = 2<em>m</em> + <em>c</em></span><br /></p>
+<p>The challenge comes when a third data point is observed, and it
+doesn’t fit on the straight line.</p>
+<p>point 3: <span class="math inline">\(x= 2\)</span>, <span
+class="math inline">\(y=2.5\)</span> <span class="math display">\[
+2.5 = 2m + c
+\]</span></p>
 <div class="figure">
 <div id="over-determined-system-4-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system004.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system004.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="over-determined-system-4-magnify" class="magnify" onclick="magnifyFigure(&#39;over-determined-system-4&#39;)">
+<div id="over-determined-system-4-magnify" class="magnify"
+onclick="magnifyFigure(&#39;over-determined-system-4&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="over-determined-system-4-caption" class="caption-frame">
-<p>Figure: A third observation of data is inconsistent with the solution dictated by the first two observations</p>
+<p>Figure: A third observation of data is inconsistent with the solution
+dictated by the first two observations</p>
 </div>
 </div>
-<p>Now there are three candidate lines, each consistent with our data.</p>
+<p>Now there are three candidate lines, each consistent with our
+data.</p>
 <div class="figure">
 <div id="over-determined-system-7-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system007.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system007.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="over-determined-system-7-magnify" class="magnify" onclick="magnifyFigure(&#39;over-determined-system-7&#39;)">
+<div id="over-determined-system-7-magnify" class="magnify"
+onclick="magnifyFigure(&#39;over-determined-system-7&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="over-determined-system-7-caption" class="caption-frame">
-<p>Figure: Three solutions to the problem, each consistent with two points of the three observations</p>
-</div>
+<p>Figure: Three solutions to the problem, each consistent with two
+points of the three observations</p>
+</div>
+</div>
+<p>This is known as an <em>overdetermined</em> system because there are
+more data than we need to determine our parameters. The problem arises
+because the model is a simplification of the real world, and the data we
+observe is therefore inconsistent with our model.</p>
+<h2 id="pierre-simon-laplace">Pierre-Simon Laplace</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-laplace-intro.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-laplace-intro.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The solution was proposed by Pierre-Simon Laplace. His idea was to
+accept that the model was an incomplete representation of the real
+world, and the way it was incomplete is <em>unknown</em>. His idea was
+that such unknowns could be dealt with through probability.</p>
+<h3 id="pierre-simon-laplace-1">Pierre-Simon Laplace</h3>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplace-portrait.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplace-portrait.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
 </div>
-<p>This is known as an <em>overdetermined</em> system because there are more data than we need to determine our parameters. The problem arises because the model is a simplification of the real world, and the data we observe is therefore inconsistent with our model.</p>
-<p>The solution was proposed by Pierre-Simon Laplace. His idea was to accept that the model was an incomplete representation of the real world, and the manner in which it was incomplete is <em>unknown</em>. His idea was that such unknowns could be dealt with through probability.</p>
-<h3 id="pierre-simon-laplace">Pierre-Simon Laplace</h3>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_physics/includes/laplace-portrait.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_physics/includes/laplace-portrait.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
 <div class="figure">
 <div id="pierre-simon-laplace-image-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/Pierre-Simon_Laplace.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/Pierre-Simon_Laplace.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="pierre-simon-laplace-image-magnify" class="magnify" onclick="magnifyFigure(&#39;pierre-simon-laplace-image&#39;)">
+<div id="pierre-simon-laplace-image-magnify" class="magnify"
+onclick="magnifyFigure(&#39;pierre-simon-laplace-image&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="pierre-simon-laplace-image-caption" class="caption-frame">
 <p>Figure: Pierre-Simon Laplace 1749-1827.</p>
 </div>
 </div>
-<iframe frameborder="0" scrolling="no" style="border:0px" src="http://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PR17-IA2&amp;output=embed" width="700" height="500">
+<iframe frameborder="0" scrolling="no" style="border:0px" src="https://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PR17-IA2&amp;output=embed" width="700" height="500">
 </iframe>
-<p>Famously, Laplace considered the idea of a deterministic Universe, one in which the model is <em>known</em>, or as the below translation refers to it, “an intelligence which could comprehend all the forces by which nature is animated”. He speculates on an “intelligence” that can submit this vast data to analysis and propsoses that such an entity would be able to predict the future.</p>
+<p>Famously, Laplace considered the idea of a deterministic Universe,
+one in which the model is <em>known</em>, or as the below translation
+refers to it, “an intelligence which could comprehend all the forces by
+which nature is animated”. He speculates on an “intelligence” that can
+submit this vast data to analysis and propsoses that such an entity
+would be able to predict the future.</p>
 <blockquote>
-<p>Given for one instant an intelligence which could comprehend all the forces by which nature is animated and the respective situation of the beings who compose it—an intelligence sufficiently vast to submit these data to analysis—it would embrace in the same formulate the movements of the greatest bodies of the universe and those of the lightest atom; for it, nothing would be uncertain and the future, as the past, would be present in its eyes.</p>
+<p>Given for one instant an intelligence which could comprehend all the
+forces by which nature is animated and the respective situation of the
+beings who compose it—an intelligence sufficiently vast to submit these
+data to analysis—it would embrace in the same formulate the movements of
+the greatest bodies of the universe and those of the lightest atom; for
+it, nothing would be uncertain and the future, as the past, would be
+present in its eyes.</p>
 </blockquote>
-<p>This notion is known as <em>Laplace’s demon</em> or <em>Laplace’s superman</em>.</p>
+<p>This notion is known as <em>Laplace’s demon</em> or <em>Laplace’s
+superman</em>.</p>
 <div class="figure">
 <div id="laplaces-determinism-english-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/laplacesDeterminismEnglish.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/laplacesDeterminismEnglish.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="laplaces-determinism-english-magnify" class="magnify" onclick="magnifyFigure(&#39;laplaces-determinism-english&#39;)">
+<div id="laplaces-determinism-english-magnify" class="magnify"
+onclick="magnifyFigure(&#39;laplaces-determinism-english&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="laplaces-determinism-english-caption" class="caption-frame">
 <p>Figure: Laplace’s determinsim in English translation.</p>
 </div>
 </div>
-<p>Unfortunately, most analyses of his ideas stop at that point, whereas his real point is that such a notion is unreachable. Not so much <em>superman</em> as <em>strawman</em>. Just three pages later in the “Philosophical Essay on Probabilities” <span class="citation" data-cites="Laplace:essai14">(Laplace 1814)</span>, Laplace goes on to observe:</p>
+<h2 id="laplaces-gremlin">Laplace’s Gremlin</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplaces-determinism.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplaces-determinism.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Unfortunately, most analyses of his ideas stop at that point, whereas
+his real point is that such a notion is unreachable. Not so much
+<em>superman</em> as <em>strawman</em>. Just three pages later in the
+“Philosophical Essay on Probabilities” <span class="citation"
+data-cites="Laplace:essai14">(Laplace, 1814)</span>, Laplace goes on to
+observe:</p>
 <blockquote>
-<p>The curve described by a simple molecule of air or vapor is regulated in a manner just as certain as the planetary orbits; the only difference between them is that which comes from our ignorance.</p>
-<p>Probability is relative, in part to this ignorance, in part to our knowledge.</p>
+<p>The curve described by a simple molecule of air or vapor is regulated
+in a manner just as certain as the planetary orbits; the only difference
+between them is that which comes from our ignorance.</p>
+<p>Probability is relative, in part to this ignorance, in part to our
+knowledge.</p>
 </blockquote>
-<iframe frameborder="0" scrolling="no" style="border:0px" src="http://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PR17-IA4&amp;output=embed" width="700" height="500">
+<iframe frameborder="0" scrolling="no" style="border:0px" src="https://books.google.co.uk/books?id=1YQPAAAAQAAJ&amp;pg=PR17-IA4&amp;output=embed" width="700" height="500">
 </iframe>
 <div class="figure">
 <div id="probability-relative-in-part-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/philosophicaless00lapliala.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="probability-relative-in-part-magnify" class="magnify" onclick="magnifyFigure(&#39;probability-relative-in-part&#39;)">
+<div id="probability-relative-in-part-magnify" class="magnify"
+onclick="magnifyFigure(&#39;probability-relative-in-part&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="probability-relative-in-part-caption" class="caption-frame">
-<p>Figure: To Laplace, determinism is a strawman. Ignorance of mechanism and data leads to uncertainty which should be dealt with through probability.</p>
-</div>
-</div>
-<p>In other words, we can never make use of the idealistic deterministic Universe due to our ignorance about the world, Laplace’s suggestion, and focus in this essay is that we turn to probability to deal with this uncertainty. This is also our inspiration for using probability in machine learning.</p>
-<p>The “forces by which nature is animated” is our <em>model</em>, the “situation of beings that compose it” is our <em>data</em> and the “intelligence sufficiently vast enough to submit these data to analysis” is our compute. The fly in the ointment is our <em>ignorance</em> about these aspects. And <em>probability</em> is the tool we use to incorporate this ignorance leading to uncertainty or <em>doubt</em> in our predictions.</p>
-<p>Laplace’s concept was that the reason that the data doesn’t match up to the model is because of unconsidered factors, and that these might be well represented through probability densities. He tackles the challenge of the unknown factors by adding a variable, <span class="math inline">$\noiseScalar$</span>, that represents the unknown. In modern parlance we would call this a <em>latent</em> variable. But in the context Laplace uses it, the variable is so common that it has other names such as a “slack” variable or the <em>noise</em> in the system.</p>
-<p>point 1: <span class="math inline">$\inputScalar = 1$</span>, <span class="math inline">$\dataScalar=3$</span> <br /><span class="math display">$$
-3 = m + c + \noiseScalar_1
-$$</span><br /> point 2: <span class="math inline">$\inputScalar = 3$</span>, <span class="math inline">$\dataScalar=1$</span> <br /><span class="math display">$$
-1 = 3m + c + \noiseScalar_2
-$$</span><br /> point 3: <span class="math inline">$\inputScalar = 2$</span>, <span class="math inline">$\dataScalar=2.5$</span> <br /><span class="math display">$$
-2.5 = 2m + c + \noiseScalar_3
-$$</span><br /></p>
-<p>Laplace’s trick has converted the <em>overdetermined</em> system into an <em>underdetermined</em> system. He has now added three variables, <span class="math inline">$\{\noiseScalar_i\}_{i=1}^3$</span>, which represent the unknown corruptions of the real world. Laplace’s idea is that we should represent that unknown corruption with a <em>probability distribution</em>.</p>
+<p>Figure: To Laplace, determinism is a strawman. Ignorance of mechanism
+and data leads to uncertainty which should be dealt with through
+probability.</p>
+</div>
+</div>
+<p>In other words, we can never make use of the idealistic deterministic
+Universe due to our ignorance about the world, Laplace’s suggestion, and
+focus in this essay is that we turn to probability to deal with this
+uncertainty. This is also our inspiration for using probability in
+machine learning. This is the true message of Laplace’s essay, not
+determinism, but the gremlin of uncertainty that emerges from our
+ignorance.</p>
+<p>The “forces by which nature is animated” is our <em>model</em>, the
+“situation of beings that compose it” is our <em>data</em> and the
+“intelligence sufficiently vast enough to submit these data to analysis”
+is our compute. The fly in the ointment is our <em>ignorance</em> about
+these aspects. And <em>probability</em> is the tool we use to
+incorporate this ignorance leading to uncertainty or <em>doubt</em> in
+our predictions.</p>
+<h2 id="latent-variables">Latent Variables</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/laplace-latent-variable-solution.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/laplace-latent-variable-solution.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Laplace’s concept was that the reason that the data doesn’t match up
+to the model is because of unconsidered factors, and that these might be
+well represented through probability densities. He tackles the challenge
+of the unknown factors by adding a variable, <span
+class="math inline">\(\epsilon\)</span>, that represents the unknown. In
+modern parlance we would call this a <em>latent</em> variable. But in
+the context Laplace uses it, the variable is so common that it has other
+names such as a “slack” variable or the <em>noise</em> in the
+system.</p>
+<p>point 1: <span class="math inline">\(x= 1\)</span>, <span
+class="math inline">\(y=3\)</span> [ 3 = m + c + _1 ] point 2: <span
+class="math inline">\(x= 3\)</span>, <span
+class="math inline">\(y=1\)</span> [ 1 = 3m + c + _2 ] point 3: <span
+class="math inline">\(x= 2\)</span>, <span
+class="math inline">\(y=2.5\)</span> [ 2.5 = 2m + c + _3 ]</p>
+<p>Laplace’s trick has converted the <em>overdetermined</em> system into
+an <em>underdetermined</em> system. He has now added three variables,
+<span class="math inline">\(\{\epsilon_i\}_{i=1}^3\)</span>, which
+represent the unknown corruptions of the real world. Laplace’s idea is
+that we should represent that unknown corruption with a <em>probability
+distribution</em>.</p>
 <h2 id="a-probabilistic-process">A Probabilistic Process</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/overdetermined-inaugural.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/overdetermined-inaugural.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>However, it was left to an admirer of Gauss to develop a practical probability density for that purpose. It was Carl Friederich Gauss who suggested that the <em>Gaussian</em> density (which at the time was unnamed!) should be used to represent this error.</p>
-<p>The result is a <em>noisy</em> function, a function which has a deterministic part, and a stochastic part. This type of function is sometimes known as a probabilistic or stochastic process, to distinguish it from a deterministic process.</p>
-<h2 id="two-important-gaussian-properties">Two Important Gaussian Properties</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/univariate-gaussian-properties.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/univariate-gaussian-properties.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The Gaussian density has many important properties, but for the moment we’ll review two of them.</p>
+<p>However, it was left to an admirer of Laplace to develop a practical
+probability density for that purpose. It was Carl Friedrich Gauss who
+suggested that the <em>Gaussian</em> density (which at the time was
+unnamed!) should be used to represent this error.</p>
+<p>The result is a <em>noisy</em> function, a function which has a
+deterministic part, and a stochastic part. This type of function is
+sometimes known as a probabilistic or stochastic process, to distinguish
+it from a deterministic process.</p>
+<h2 id="two-important-gaussian-properties">Two Important Gaussian
+Properties</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/univariate-gaussian-properties.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/univariate-gaussian-properties.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The Gaussian density has many important properties, but for the
+moment we’ll review two of them.</p>
 <h2 id="sum-of-gaussians">Sum of Gaussians</h2>
-<p>If we assume that a variable, <span class="math inline">$\dataScalar_i$</span>, is sampled from a Gaussian density,</p>
-<p><br /><span class="math display">$$\dataScalar_i \sim \gaussianSamp{\meanScalar_i}{\sigma_i^2}$$</span><br /></p>
-<p>Then we can show that the sum of a set of variables, each drawn independently from such a density is also distributed as Gaussian. The mean of the resulting density is the sum of the means, and the variance is the sum of the variances,</p>
-<p><br /><span class="math display">$$
-\sum_{i=1}^{\numData} \dataScalar_i \sim \gaussianSamp{\sum_{i=1}^\numData \meanScalar_i}{\sum_{i=1}^\numData \sigma_i^2}
-$$</span><br /></p>
-<p>Since we are very familiar with the Gaussian density and its properties, it is not immediately apparent how unusual this is. Most random variables, when you add them together, change the family of density they are drawn from. For example, the Gaussian is exceptional in this regard. Indeed, other random variables, if they are independently drawn and summed together tend to a Gaussian density. That is the <a href="https://en.wikipedia.org/wiki/Central_limit_theorem"><em>central limit theorem</em></a> which is a major justification for the use of a Gaussian density.</p>
+<p>If we assume that a variable, <span
+class="math inline">\(y_i\)</span>, is sampled from a Gaussian
+density,</p>
+<p><span class="math display">\[y_i \sim
+\mathcal{N}\left(\mu_i,\sigma_i^2\right)\]</span></p>
+<p>Then we can show that the sum of a set of variables, each drawn
+independently from such a density is also distributed as Gaussian. The
+mean of the resulting density is the sum of the means, and the variance
+is the sum of the variances,</p>
+<p><span class="math display">\[
+\sum_{i=1}^{n} y_i \sim
+\mathcal{N}\left(\sum_{i=1}^n\mu_i,\sum_{i=1}^n\sigma_i^2\right)
+\]</span></p>
+<p>Since we are very familiar with the Gaussian density and its
+properties, it is not immediately apparent how unusual this is. Most
+random variables, when you add them together, change the family of
+density they are drawn from. For example, the Gaussian is exceptional in
+this regard. Indeed, other random variables, if they are independently
+drawn and summed together tend to a Gaussian density. That is the <a
+href="https://en.wikipedia.org/wiki/Central_limit_theorem"><em>central
+limit theorem</em></a> which is a major justification for the use of a
+Gaussian density.</p>
 <h2 id="scaling-a-gaussian">Scaling a Gaussian</h2>
-<p>Less unusual is the <em>scaling</em> property of a Gaussian density. If a variable, <span class="math inline">$\dataScalar$</span>, is sampled from a Gaussian density,</p>
-<p><br /><span class="math display">$$\dataScalar \sim \gaussianSamp{\meanScalar}{\sigma^2}$$</span><br /> and we choose to scale that variable by a <em>deterministic</em> value, <span class="math inline">$\mappingScalar$</span>, then the <em>scaled variable</em> is distributed as</p>
-<p><br /><span class="math display">$$\mappingScalar \dataScalar \sim \gaussianSamp{\mappingScalar\meanScalar}{\mappingScalar^2 \sigma^2}.$$</span><br /> Unlike the summing properties, where adding two or more random variables independently sampled from a family of densitites typically brings the summed variable <em>outside</em> that family, scaling many densities leaves the distribution of that variable in the same <em>family</em> of densities. Indeed, many densities include a <em>scale</em> parameter (e.g. the <a href="https://en.wikipedia.org/wiki/Gamma_distribution">Gamma density</a>) which is purely for this purpose. In the Gaussian the standard deviation, <span class="math inline">$\dataStd$</span>, is the scale parameter. To see why this makes sense, let’s consider, <br /><span class="math display">$$z \sim \gaussianSamp{0}{1},$$</span><br /> then if we scale by <span class="math inline">$\dataStd$</span> so we have, <span class="math inline">$\dataScalar=\dataStd z$</span>, we can write, <br /><span class="math display">$$\dataScalar =\dataStd z \sim \gaussianSamp{0}{\dataStd^2}$$</span><br /></p>
-<p>Let’s first of all review the properties of the multivariate Gaussian distribution that make linear Gaussian models easier to deal with. We’ll return to the, perhaps surprising, result on the parameters within the nonlinearity, <span class="math inline">$\parameterVector$</span>, shortly.</p>
-<p>To work with linear Gaussian models, to find the marginal likelihood all you need to know is the following rules. If <br /><span class="math display">$$
-\dataVector = \mappingMatrix \inputVector + \noiseVector,
-$$</span><br /> where <span class="math inline">$\dataVector$</span>, <span class="math inline">$\inputVector$</span> and <span class="math inline">$\noiseVector$</span> are vectors and we assume that <span class="math inline">$\inputVector$</span> and <span class="math inline">$\noiseVector$</span> are drawn from multivariate Gaussians, <br /><span class="math display">$$
+<p>Less unusual is the <em>scaling</em> property of a Gaussian density.
+If a variable, <span class="math inline">\(y\)</span>, is sampled from a
+Gaussian density,</p>
+<p><span class="math display">\[y\sim
+\mathcal{N}\left(\mu,\sigma^2\right)\]</span> and we choose to scale
+that variable by a <em>deterministic</em> value, <span
+class="math inline">\(w\)</span>, then the <em>scaled variable</em> is
+distributed as</p>
+<p><span class="math display">\[wy\sim \mathcal{N}\left(w\mu,w^2
+\sigma^2\right).\]</span> Unlike the summing properties, where adding
+two or more random variables independently sampled from a family of
+densitites typically brings the summed variable <em>outside</em> that
+family, scaling many densities leaves the distribution of that variable
+in the same <em>family</em> of densities. Indeed, many densities include
+a <em>scale</em> parameter (e.g. the <a
+href="https://en.wikipedia.org/wiki/Gamma_distribution">Gamma
+density</a>) which is purely for this purpose. In the Gaussian the
+standard deviation, <span class="math inline">\(\sigma\)</span>, is the
+scale parameter. To see why this makes sense, let’s consider, <span
+class="math display">\[z \sim \mathcal{N}\left(0,1\right),\]</span> then
+if we scale by <span class="math inline">\(\sigma\)</span> so we have,
+<span class="math inline">\(y=\sigma z\)</span>, we can write, <span
+class="math display">\[y=\sigma z \sim
+\mathcal{N}\left(0,\sigma^2\right)\]</span></p>
+<p>Let’s first of all review the properties of the multivariate Gaussian
+distribution that make linear Gaussian models easier to deal with. We’ll
+return to the, perhaps surprising, result on the parameters within the
+nonlinearity, <span class="math inline">\(\boldsymbol{ \theta}\)</span>,
+shortly.</p>
+<p>To work with linear Gaussian models, to find the marginal likelihood
+all you need to know is the following rules. If <span
+class="math display">\[
+\mathbf{ y}= \mathbf{W}\mathbf{ x}+ \boldsymbol{ \epsilon},
+\]</span> where <span class="math inline">\(\mathbf{ y}\)</span>, <span
+class="math inline">\(\mathbf{ x}\)</span> and <span
+class="math inline">\(\boldsymbol{ \epsilon}\)</span> are vectors and we
+assume that <span class="math inline">\(\mathbf{ x}\)</span> and <span
+class="math inline">\(\boldsymbol{ \epsilon}\)</span> are drawn from
+multivariate Gaussians, <span class="math display">\[
 \begin{align}
-\inputVector &amp; \sim \gaussianSamp{\meanVector}{\covarianceMatrix}\\
-\noiseVector &amp; \sim \gaussianSamp{\zerosVector}{\covarianceMatrixTwo}
+\mathbf{ x}&amp; \sim \mathcal{N}\left(\boldsymbol{
+\mu},\mathbf{C}\right)\\
+\boldsymbol{ \epsilon}&amp; \sim
+\mathcal{N}\left(\mathbf{0},\boldsymbol{ \Sigma}\right)
 \end{align}
-$$</span><br /> then we know that <span class="math inline">$\dataVector$</span> is also drawn from a multivariate Gaussian with, <br /><span class="math display">$$
-\dataVector \sim \gaussianSamp{\mappingMatrix\meanVector}{\mappingMatrix\covarianceMatrix\mappingMatrix^\top + \covarianceMatrixTwo}.
-$$</span><br /></p>
-<p>With appropriately defined covariance, <span class="math inline">$\covarianceMatrixTwo$</span>, this is actually the marginal likelihood for Factor Analysis, or Probabilistic Principal Component Analysis <span class="citation" data-cites="Tipping:probpca99">(Tipping and Bishop 1999)</span>, because we integrated out the inputs (or <em>latent</em> variables they would be called in that case).</p>
+\]</span> then we know that <span class="math inline">\(\mathbf{
+y}\)</span> is also drawn from a multivariate Gaussian with, <span
+class="math display">\[
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{W}\boldsymbol{
+\mu},\mathbf{W}\mathbf{C}\mathbf{W}^\top + \boldsymbol{ \Sigma}\right).
+\]</span></p>
+<p>With appropriately defined covariance, <span
+class="math inline">\(\boldsymbol{ \Sigma}\)</span>, this is actually
+the marginal likelihood for Factor Analysis, or Probabilistic Principal
+Component Analysis <span class="citation"
+data-cites="Tipping:probpca99">(Tipping and Bishop, 1999)</span>,
+because we integrated out the inputs (or <em>latent</em> variables they
+would be called in that case).</p>
 <h2 id="laplaces-idea">Laplace’s Idea</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-regression-log-likelihood.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-regression-log-likelihood.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Laplace had the idea to augment the observations by noise, that is equivalent to considering a probability density whose mean is given by the <em>prediction function</em> <br /><span class="math display">$$p\left(\dataScalar_i|\inputScalar_i\right)=\frac{1}{\sqrt{2\pi\dataStd^2}}\exp\left(-\frac{\left(\dataScalar_i-f\left(\inputScalar_i\right)\right)^{2}}{2\dataStd^2}\right).$$</span><br /></p>
-<p>This is known as <em>stochastic process</em>. It is a function that is corrupted by noise. Laplace didn’t suggest the Gaussian density for that purpose, that was an innovation from Carl Friederich Gauss, which is what gives the Gaussian density its name.</p>
-<h2 id="height-as-a-function-of-weight">Height as a Function of Weight</h2>
-<p>In the standard Gaussian, parametized by mean and variance.</p>
-<p>Make the mean a linear function of an <em>input</em>.</p>
-<p>This leads to a regression model. <br /><span class="math display">$$
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-log-likelihood.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-log-likelihood.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Laplace had the idea to augment the observations by noise, that is
+equivalent to considering a probability density whose mean is given by
+the <em>prediction function</em> <span
+class="math display">\[p\left(y_i|x_i\right)=\frac{1}{\sqrt{2\pi\sigma^2}}\exp\left(-\frac{\left(y_i-f\left(x_i\right)\right)^{2}}{2\sigma^2}\right).\]</span></p>
+<p>This is known as <em>stochastic process</em>. It is a function that
+is corrupted by noise. Laplace didn’t suggest the Gaussian density for
+that purpose, that was an innovation from Carl Friederich Gauss, which
+is what gives the Gaussian density its name.</p>
+<h2 id="height-as-a-function-of-weight">Height as a Function of
+Weight</h2>
+<p>In the standard Gaussian, parameterized by mean and variance, make
+the mean a linear function of an <em>input</em>.</p>
+<p>This leads to a regression model. <span class="math display">\[
 \begin{align*}
-  \dataScalar_i=&amp;\mappingFunction\left(\inputScalar_i\right)+\noiseScalar_i,\\
-         \noiseScalar_i \sim &amp; \gaussianSamp{0}{\dataStd^2}.
+  y_i=&amp;f\left(x_i\right)+\epsilon_i,\\
+         \epsilon_i \sim &amp; \mathcal{N}\left(0,\sigma^2\right).
   \end{align*}
-$$</span><br /></p>
-<p>Assume <span class="math inline">$\dataScalar_i$</span> is height and <span class="math inline">$\inputScalar_i$</span> is weight.</p>
-<h1 id="linear-algebra">Linear Algebra</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-data-linear-regression.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/olympic-data-linear-regression.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Linear algebra provides a very similar role, when we introduce <a href="http://en.wikipedia.org/wiki/Linear_algebra">linear algebra</a>, it is because we are faced with a large number of addition and multiplication operations. These operations need to be done together and would be very tedious to write down as a group. So the first reason we reach for linear algebra is for a more compact representation of our mathematical formulae.</p>
-<h2 id="running-example-olympic-marathons">Running Example: Olympic Marathons</h2>
-<p>Now we will load in the Olympic marathon data. This is data of the olympic marath times for the men’s marathon from the first olympics in 1896 up until the London 2012 olympics.</p>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb8"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
-<span id="cb8-2"><a href="#cb8-2"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
-<span id="cb8-3"><a href="#cb8-3"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span></code></pre></div>
-<p>You can see what these values are by typing:</p>
-<div class="sourceCode" id="cb9"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1"></a><span class="bu">print</span>(x)</span>
-<span id="cb9-2"><a href="#cb9-2"></a><span class="bu">print</span>(y)</span></code></pre></div>
-<p>Note that they are not <code>pandas</code> data frames for this example, they are just arrays of dimensionality <span class="math inline">$\numData\times 1$</span>, where <span class="math inline">$\numData$</span> is the number of data.</p>
-<p>The aim of this lab is to have you coding linear regression in python. We will do it in two ways, once using iterative updates (coordinate ascent) and then using linear algebra. The linear algebra approach will not only work much better, it is easy to extend to multiple input linear regression and <em>non-linear</em> regression using basis functions.</p>
-<h2 id="plotting-the-data">Plotting the Data</h2>
-<p>You can make a plot of <span class="math inline">$\dataScalar$</span> vs <span class="math inline">$\inputScalar$</span> with the following command:</p>
-<div class="sourceCode" id="cb10"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1"></a><span class="op">%</span>matplotlib inline </span>
-<span id="cb10-2"><a href="#cb10-2"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span></code></pre></div>
-<div class="sourceCode" id="cb11"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1"></a>plt.plot(x, y, <span class="st">&#39;rx&#39;</span>)</span>
-<span id="cb11-2"><a href="#cb11-2"></a>plt.xlabel(<span class="st">&#39;year&#39;</span>)</span>
-<span id="cb11-3"><a href="#cb11-3"></a>plt.ylabel(<span class="st">&#39;pace in min/km&#39;</span>)</span></code></pre></div>
-<h2 id="maximum-likelihood-iterative-solution">Maximum Likelihood: Iterative Solution</h2>
-<p>Now we will take the maximum likelihood approach we derived in the lecture to fit a line, <span class="math inline">$\dataScalar_i=m\inputScalar_i + c$</span>, to the data you’ve plotted. We are trying to minimize the error function: <br /><span class="math display">$$
-\errorFunction(m, c) =  \sum_{i=1}^\numData(\dataScalar_i-m\inputScalar_i-c)^2
-$$</span><br /> with respect to <span class="math inline"><em>m</em></span>, <span class="math inline"><em>c</em></span> and <span class="math inline"><em>σ</em><sup>2</sup></span>. We can start with an initial guess for <span class="math inline"><em>m</em></span>,</p>
-<div class="sourceCode" id="cb12"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1"></a>m <span class="op">=</span> <span class="op">-</span><span class="fl">0.4</span></span>
-<span id="cb12-2"><a href="#cb12-2"></a>c <span class="op">=</span> <span class="dv">80</span></span></code></pre></div>
-<p>Then we use the maximum likelihood update to find an estimate for the offset, <span class="math inline"><em>c</em></span>.</p>
-<h2 id="log-likelihood-for-multivariate-regression">Log Likelihood for Multivariate Regression</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-regression-direct-solution.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-regression-direct-solution.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<h1 id="multiple-input-solution-with-linear-algebra">Multiple Input Solution with Linear Algebra</h1>
-<p>You’ve now seen how slow it can be to perform a coordinate ascent on a system. Another approach to solving the system (which is not always possible, particularly in <em>non-linear</em> systems) is to go direct to the minimum. To do this we need to introduce <em>linear algebra</em>. We will represent all our errors and functions in the form of linear algebra. As we mentioned above, linear algebra is just a shorthand for performing lots of multiplications and additions simultaneously. What does it have to do with our system then? Well the first thing to note is that the linear function we were trying to fit has the following form: <br /><span class="math display">$$
-\mappingFunction(x) = mx + c
-$$</span><br /> the classical form for a straight line. From a linear algebraic perspective we are looking for multiplications and additions. We are also looking to separate our parameters from our data. The data is the <em>givens</em> remember, in French the word is données literally translated means <em>givens</em> that’s great, because we don’t need to change the data, what we need to change are the parameters (or variables) of the model. In this function the data comes in through <span class="math inline"><em>x</em></span>, and the parameters are <span class="math inline"><em>m</em></span> and <span class="math inline"><em>c</em></span>.</p>
-<p>What we’d like to create is a vector of parameters and a vector of data. Then we could represent the system with vectors that represent the data, and vectors that represent the parameters.</p>
-<p>We look to turn the multiplications and additions into a linear algebraic form, we have one multiplication (<span class="math inline"><em>m</em> × <em>c</em></span>) and one addition (<span class="math inline"><em>m</em><em>x</em> + <em>c</em></span>). But we can turn this into a inner product by writing it in the following way, <br /><span class="math display">$$
-\mappingFunction(x) = m \times x +
+\]</span></p>
+<p>Assume <span class="math inline">\(y_i\)</span> is height and <span
+class="math inline">\(x_i\)</span> is weight.</p>
+<h2 id="olympic-marathon-data">Olympic Marathon Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<table>
+<tr>
+<td width="70%">
+<ul>
+<li>Gold medal times for Olympic Marathon since 1896.</li>
+<li>Marathons before 1924 didn’t have a standardized distance.</li>
+<li>Present results using pace per km.</li>
+<li>In 1904 Marathon was badly organized leading to very slow
+times.</li>
+</ul>
+</td>
+<td width="30%">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//Stephen_Kiprotich.jpg" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+<small>Image from Wikimedia Commons <a href="http://bit.ly/16kMKHQ"
+class="uri">http://bit.ly/16kMKHQ</a></small>
+</td>
+</tr>
+</table>
+<p>The first thing we will do is load a standard data set for regression
+modelling. The data consists of the pace of Olympic Gold Medal Marathon
+winners for the Olympics from 1896 to present. Let’s load in the data
+and plot.</p>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb8"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>offset <span class="op">=</span> y.mean()</span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a>scale <span class="op">=</span> np.sqrt(y.var())</span>
+<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a>yhat <span class="op">=</span> (y <span class="op">-</span> offset)<span class="op">/</span>scale</span></code></pre></div>
+<div class="figure">
+<div id="olympic-marathon-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//datasets/olympic-marathon.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="olympic-marathon-magnify" class="magnify"
+onclick="magnifyFigure(&#39;olympic-marathon&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="olympic-marathon-caption" class="caption-frame">
+<p>Figure: Olympic marathon pace times since 1896.</p>
+</div>
+</div>
+<p>Things to notice about the data include the outlier in 1904, in that
+year the Olympics was in St Louis, USA. Organizational problems and
+challenges with dust kicked up by the cars following the race meant that
+participants got lost, and only very few participants completed. More
+recent years see more consistently quick marathons.</p>
+<h2 id="running-example-olympic-marathons">Running Example: Olympic
+Marathons</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-linear-regression.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-linear-regression.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Note that <code>x</code> and <code>y</code> are not
+<code>pandas</code> data frames for this example, they are just arrays
+of dimensionality <span class="math inline">\(n\times 1\)</span>, where
+<span class="math inline">\(n\)</span> is the number of data.</p>
+<p>The aim of this lab is to have you coding linear regression in
+python. We will do it in two ways, once using iterative updates
+(coordinate ascent) and then using linear algebra. The linear algebra
+approach will not only work much better, it is also easy to extend to
+multiple input linear regression and <em>non-linear</em> regression
+using basis functions.</p>
+<h2 id="maximum-likelihood-iterative-solution">Maximum Likelihood:
+Iterative Solution</h2>
+<p>Now we will take the maximum likelihood approach we derived in the
+lecture to fit a line, <span class="math inline">\(y_i=mx_i +
+c\)</span>, to the data you’ve plotted. We are trying to minimize the
+error function: <span class="math display">\[
+E(m, c) =  \sum_{i=1}^n(y_i-mx_i-c)^2
+\]</span> with respect to <span class="math inline">\(m\)</span>, <span
+class="math inline">\(c\)</span> and <span
+class="math inline">\(\sigma^2\)</span>. We can start with an initial
+guess for <span class="math inline">\(m\)</span>,</p>
+<div class="sourceCode" id="cb9"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>m <span class="op">=</span> <span class="op">-</span><span class="fl">0.4</span></span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>c <span class="op">=</span> <span class="dv">80</span></span></code></pre></div>
+<p>Then we use the maximum likelihood update to find an estimate for the
+offset, <span class="math inline">\(c\)</span>.</p>
+<h2 id="log-likelihood-for-multivariate-regression">Log Likelihood for
+Multivariate Regression</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-multivariate-log-likelihood.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-multivariate-log-likelihood.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<h2 id="quadratic-loss">Quadratic Loss</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-direct-solution.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-direct-solution.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Now we’ve identified the empirical risk with the loss, we’ll use
+<span class="math inline">\(E(\mathbf{ w})\)</span> to represent our
+objective function. <span class="math display">\[
+E(\mathbf{ w}) = \sum_{i=1}^n\left(y_i - f(\mathbf{ x}_i, \mathbf{
+w})\right)^2
+\]</span> gives us our objective.</p>
+<p>In the case of the linear prediction function, we can substitute
+<span class="math inline">\(f(\mathbf{ x}_i, \mathbf{ w}) = \mathbf{
+w}^\top \mathbf{ x}_i\)</span>. <span class="math display">\[
+E(\mathbf{ w}) = \sum_{i=1}^n\left(y_i - \mathbf{ w}^\top \mathbf{
+x}_i\right)^2
+\]</span> To compute the gradient of the objective, we first expand the
+brackets.</p>
+<h2 id="bracket-expansion">Bracket Expansion</h2>
+<p><span class="math display">\[
+\begin{align*}
+  E(\mathbf{ w},\sigma^2)  = &amp;
+\frac{n}{2}\log \sigma^2 + \frac{1}{2\sigma^2}\sum
+_{i=1}^{n}y_i^{2}-\frac{1}{\sigma^2}\sum
+_{i=1}^{n}y_i\mathbf{ w}^{\top}\mathbf{
+x}_i\\&amp;+\frac{1}{2\sigma^2}\sum
+_{i=1}^{n}\mathbf{ w}^{\top}\mathbf{ x}_i\mathbf{ x}_i^{\top}\mathbf{ w}
++\text{const}.\\
+    = &amp; \frac{n}{2}\log \sigma^2 + \frac{1}{2\sigma^2}\sum
+_{i=1}^{n}y_i^{2}-\frac{1}{\sigma^2}
+\mathbf{ w}^\top\sum_{i=1}^{n}\mathbf{
+x}_iy_i\\&amp;+\frac{1}{2\sigma^2}
+\mathbf{ w}^{\top}\left[\sum
+_{i=1}^{n}\mathbf{ x}_i\mathbf{ x}_i^{\top}\right]\mathbf{
+w}+\text{const}.
+\end{align*}
+\]</span></p>
+<h1 id="solution-with-linear-algebra">Solution with Linear Algebra</h1>
+<p>In this section we’re going compute the minimum of the quadratic loss
+with respect to the parameters. When we do this, we’ll also review
+<em>linear algebra</em>. We will represent all our errors and functions
+in the form of matrices and vectors.</p>
+<p>Linear algebra is just a shorthand for performing lots of
+multiplications and additions simultaneously. What does it have to do
+with our system then? Well, the first thing to note is that the classic
+linear function we fit for a one-dimensional regression has the form:
+<span class="math display">\[
+f(x) = mx + c
+\]</span> the classical form for a straight line. From a linear
+algebraic perspective, we are looking for multiplications and additions.
+We are also looking to separate our parameters from our data. The data
+is the <em>givens</em>. In French the word is données literally
+translated means <em>givens</em> that’s great, because we don’t need to
+change the data, what we need to change are the parameters (or
+variables) of the model. In this function the data comes in through
+<span class="math inline">\(x\)</span>, and the parameters are <span
+class="math inline">\(m\)</span> and <span
+class="math inline">\(c\)</span>.</p>
+<p>What we’d like to create is a vector of parameters and a vector of
+data. Then we could represent the system with vectors that represent the
+data, and vectors that represent the parameters.</p>
+<p>We look to turn the multiplications and additions into a linear
+algebraic form, we have one multiplication (<span
+class="math inline">\(m\times c\)</span>) and one addition (<span
+class="math inline">\(mx + c\)</span>). But we can turn this into an
+inner product by writing it in the following way, <span
+class="math display">\[
+f(x) = m \times x +
 c \times 1,
-$$</span><br /> in other words we’ve extracted the unit value, from the offset, <span class="math inline"><em>c</em></span>. We can think of this unit value like an extra item of data, because it is always given to us, and it is always set to 1 (unlike regular data, which is likely to vary!). We can therefore write each input data location, <span class="math inline">$\inputVector$</span>, as a vector <br /><span class="math display">$$
-\inputVector = \begin{bmatrix} 1\\ x\end{bmatrix}.
-$$</span><br /></p>
-<p>Now we choose to also turn our parameters into a vector. The parameter vector will be defined to contain <br /><span class="math display">$$
-\mappingVector = \begin{bmatrix} c \\ m\end{bmatrix}
-$$</span><br /> because if we now take the inner product between these to vectors we recover <br /><span class="math display">$$
-\inputVector\cdot\mappingVector = 1 \times c + x \times m = mx + c
-$$</span><br /> In <code>numpy</code> we can define this vector as follows</p>
-<div class="sourceCode" id="cb13"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb14"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1"></a><span class="co"># define the vector w</span></span>
-<span id="cb14-2"><a href="#cb14-2"></a>w <span class="op">=</span> np.zeros(shape<span class="op">=</span>(<span class="dv">2</span>, <span class="dv">1</span>))</span>
-<span id="cb14-3"><a href="#cb14-3"></a>w[<span class="dv">0</span>] <span class="op">=</span> m</span>
-<span id="cb14-4"><a href="#cb14-4"></a>w[<span class="dv">1</span>] <span class="op">=</span> c</span></code></pre></div>
-<p>This gives us the equivalence between original operation and an operation in vector space. Whilst the notation here isn’t a lot shorter, the beauty is that we will be able to add as many features as we like and still keep the seame representation. In general, we are now moving to a system where each of our predictions is given by an inner product. When we want to represent a linear product in linear algebra, we tend to do it with the transpose operation, so since we have <span class="math inline"><strong>a</strong> ⋅ <strong>b</strong> = <strong>a</strong><sup>⊤</sup><strong>b</strong></span> we can write <br /><span class="math display">$$
-\mappingFunction(\inputVector_i) = \inputVector_i^\top\mappingVector.
-$$</span><br /> Where we’ve assumed that each data point, <span class="math inline">$\inputVector_i$</span>, is now written by appending a 1 onto the original vector <br /><span class="math display">$$
-\inputVector_i = \begin{bmatrix} 
+\]</span> in other words, we’ve extracted the unit value from the
+offset, <span class="math inline">\(c\)</span>. We can think of this
+unit value like an extra item of data, because it is always given to us,
+and it is always set to 1 (unlike regular data, which is likely to
+vary!). We can therefore write each input data location, <span
+class="math inline">\(\mathbf{ x}\)</span>, as a vector <span
+class="math display">\[
+\mathbf{ x}= \begin{bmatrix} 1\\ x\end{bmatrix}.
+\]</span></p>
+<p>Now we choose to also turn our parameters into a vector. The
+parameter vector will be defined to contain <span
+class="math display">\[
+\mathbf{ w}= \begin{bmatrix} c \\ m\end{bmatrix}
+\]</span> because if we now take the inner product between these two
+vectors we recover <span class="math display">\[
+\mathbf{ x}\cdot\mathbf{ w}= 1 \times c + x \times m = mx + c
+\]</span> In <code>numpy</code> we can define this vector as follows</p>
+<div class="sourceCode" id="cb10"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb11"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># define the vector w</span></span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> np.zeros(shape<span class="op">=</span>(<span class="dv">2</span>, <span class="dv">1</span>))</span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>w[<span class="dv">0</span>] <span class="op">=</span> m</span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>w[<span class="dv">1</span>] <span class="op">=</span> c</span></code></pre></div>
+<p>This gives us the equivalence between original operation and an
+operation in vector space. Whilst the notation here isn’t a lot shorter,
+the beauty is that we will be able to add as many features as we like
+and keep the same representation. In general, we are now moving to a
+system where each of our predictions is given by an inner product. When
+we want to represent a linear product in linear algebra, we tend to do
+it with the transpose operation, so since we have <span
+class="math inline">\(\mathbf{a}\cdot\mathbf{b} =
+\mathbf{a}^\top\mathbf{b}\)</span> we can write <span
+class="math display">\[
+f(\mathbf{ x}_i) = \mathbf{ x}_i^\top\mathbf{ w}.
+\]</span> Where we’ve assumed that each data point, <span
+class="math inline">\(\mathbf{ x}_i\)</span>, is now written by
+appending a 1 onto the original vector <span class="math display">\[
+\mathbf{ x}_i = \begin{bmatrix}
 1 \\
-\inputScalar_i
+x_i
 \end{bmatrix}
-$$</span><br /></p>
+\]</span></p>
 <h1 id="design-matrix">Design Matrix</h1>
-<p>We can do this for the entire data set to form a <a href="http://en.wikipedia.org/wiki/Design_matrix"><em>design matrix</em></a> <span class="math inline">$\inputMatrix$</span>,</p>
-<p><br /><span class="math display">$$\inputMatrix
-= \begin{bmatrix} 
-\inputVector_1^\top \\\ 
-\inputVector_2^\top \\\ 
+<p>We can do this for the entire data set to form a <a
+href="http://en.wikipedia.org/wiki/Design_matrix"><em>design
+matrix</em></a> <span class="math inline">\(\boldsymbol{ \Phi}\)</span>,
+<span class="math display">\[
+\boldsymbol{ \Phi}
+= \begin{bmatrix}
+\mathbf{ x}_1^\top \\\
+\mathbf{ x}_2^\top \\\
 \vdots \\\
-\inputVector_\numData^\top
+\mathbf{ x}_n^\top
 \end{bmatrix} = \begin{bmatrix}
-1 &amp; \inputScalar_1 \\\
-1 &amp; \inputScalar_2 \\\
+1 &amp; x_1 \\\
+1 &amp; x_2 \\\
 \vdots
 &amp; \vdots \\\
-1 &amp; \inputScalar_\numData 
-\end{bmatrix},$$</span><br /></p>
-<p>which in <code>numpy</code> can be done with the following commands:</p>
-<div class="sourceCode" id="cb15"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb16"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1"></a>X <span class="op">=</span> np.hstack((np.ones_like(x), x))</span>
-<span id="cb16-2"><a href="#cb16-2"></a><span class="bu">print</span>(X)</span></code></pre></div>
-<h2 id="writing-the-objective-with-linear-algebra">Writing the Objective with Linear Algebra</h2>
-<p>When we think of the objective function, we can think of it as the errors where the error is defined in a similar way to what it was in Legendre’s day <span class="math inline">$\dataScalar_i - \mappingFunction(\inputVector_i)$</span>, in statistics these errors are also sometimes called <a href="http://en.wikipedia.org/wiki/Errors_and_residuals_in_statistics"><em>residuals</em></a>. So we can think as the objective and the prediction function as two separate parts, first we have, <br /><span class="math display">$$
-\errorFunction(\mappingVector) = \sum_{i=1}^\numData (\dataScalar_i - \mappingFunction(\inputVector_i; \mappingVector))^2,
-$$</span><br /> where we’ve made the function <span class="math inline">$\mappingFunction(\cdot)$</span>’s dependence on the parameters <span class="math inline">$\mappingVector$</span> explicit in this equation. Then we have the definition of the function itself, <br /><span class="math display">$$
-\mappingFunction(\inputVector_i; \mappingVector) = \inputVector_i^\top \mappingVector.
-$$</span><br /> Let’s look again at these two equations and see if we can identify any inner products. The first equation is a sum of squares, which is promising. Any sum of squares can be represented by an inner product, <br /><span class="math display">$$
-a = \sum_{i=1}^{k} b^2_i = \mathbf{b}^\top\mathbf{b},
-$$</span><br /> so if we wish to represent <span class="math inline">$\errorFunction(\mappingVector)$</span> in this way, all we need to do is convert the sum operator to an inner product. We can get a vector from that sum operator by placing both <span class="math inline">$\dataScalar_i$</span> and <span class="math inline">$\mappingFunction(\inputVector_i; \mappingVector)$</span> into vectors, which we do by defining <br /><span class="math display">$$
-\dataVector = \begin{bmatrix}\dataScalar_1\\ \dataScalar_2\\ \vdots \\ \dataScalar_\numData\end{bmatrix}
-$$</span><br /> and defining <br /><span class="math display">$$
-\mappingFunctionVector(\inputVector_1; \mappingVector) = \begin{bmatrix}\mappingFunction(\inputVector_1; \mappingVector)\\ \mappingFunction(\inputVector_2; \mappingVector)\\ \vdots \\ \mappingFunction(\inputVector_\numData; \mappingVector)\end{bmatrix}.
-$$</span><br /> The second of these is actually a vector-valued function. This term may appear intimidating, but the idea is straightforward. A vector valued function is simply a vector whose elements are themselves defined as <em>functions</em>, i.e. it is a vector of functions, rather than a vector of scalars. The idea is so straightforward, that we are going to ignore it for the moment, and barely use it in the derivation. But it will reappear later when we introduce <em>basis functions</em>. So we will, for the moment, ignore the dependence of <span class="math inline">$\mappingFunctionVector$</span> on <span class="math inline">$\mappingVector$</span> and <span class="math inline">$\inputMatrix$</span> and simply summarise it by a vector of numbers <br /><span class="math display">$$
-\mappingFunctionVector = \begin{bmatrix}\mappingFunction_1\\\mappingFunction_2\\
-\vdots \\ \mappingFunction_\numData\end{bmatrix}.
-$$</span><br /> This allows us to write our objective in the folowing, linear algebraic form, <br /><span class="math display">$$
-\errorFunction(\mappingVector) = (\dataVector - \mappingFunctionVector)^\top(\dataVector - \mappingFunctionVector)
-$$</span><br /> from the rules of inner products. But what of our matrix <span class="math inline">$\inputMatrix$</span> of input data? At this point, we need to dust off <a href="http://en.wikipedia.org/wiki/Matrix_multiplication"><em>matrix-vector multiplication</em></a>. Matrix multiplication is simply a convenient way of performing many inner products together, and it’s exactly what we need to summarise the operation <br /><span class="math display">$$
-f_i = \inputVector_i^\top\mappingVector.
-$$</span><br /> This operation tells us that each element of the vector <span class="math inline">$\mappingFunctionVector$</span> (our vector valued function) is given by an inner product between <span class="math inline">$\inputVector_i$</span> and <span class="math inline">$\mappingVector$</span>. In other words it is a series of inner products. Let’s look at the definition of matrix multiplication, it takes the form <br /><span class="math display"><strong>c</strong> = <strong>B</strong><strong>a</strong></span><br /> where <span class="math inline"><strong>c</strong></span> might be a <span class="math inline"><em>k</em></span> dimensional vector (which we can intepret as a <span class="math inline"><em>k</em> × 1</span> dimensional matrix), and <span class="math inline"><strong>B</strong></span> is a <span class="math inline"><em>k</em> × <em>k</em></span> dimensional matrix and <span class="math inline"><strong>a</strong></span> is a <span class="math inline"><em>k</em></span> dimensional vector (<span class="math inline"><em>k</em> × 1</span> dimensional matrix).</p>
-<p>The result of this multiplication is of the form <br /><span class="math display">$$
+1 &amp; x_n
+\end{bmatrix},
+\]</span> which in <code>numpy</code> can be done with the following
+commands:</p>
+<div class="sourceCode" id="cb12"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb13"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> np.hstack((np.ones_like(x), x))</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(Phi)</span></code></pre></div>
+<h2 id="writing-the-objective-with-linear-algebra">Writing the Objective
+with Linear Algebra</h2>
+<p>When we think of the objective function, we can think of it as the
+errors where the error is defined in a similar way to what it was in
+Legendre’s day <span class="math inline">\(y_i - f(\mathbf{
+x}_i)\)</span>, in statistics these errors are also sometimes called <a
+href="http://en.wikipedia.org/wiki/Errors_and_residuals_in_statistics"><em>residuals</em></a>.
+So, we can think as the objective and the prediction function as two
+separate parts, first we have, <span class="math display">\[
+E(\mathbf{ w}) = \sum_{i=1}^n(y_i - f(\mathbf{ x}_i; \mathbf{ w}))^2,
+\]</span> where we’ve made the function <span
+class="math inline">\(f(\cdot)\)</span>’s dependence on the parameters
+<span class="math inline">\(\mathbf{ w}\)</span> explicit in this
+equation. Then we have the definition of the function itself, <span
+class="math display">\[
+f(\mathbf{ x}_i; \mathbf{ w}) = \mathbf{ x}_i^\top \mathbf{ w}.
+\]</span> Let’s look again at these two equations and see if we can
+identify any inner products. The first equation is a sum of squares,
+which is promising. Any sum of squares can be represented by an inner
+product, <span class="math display">\[
+a = \sum_{i=1}^{k} b^2_i = \mathbf{b}^\top\mathbf{b}.
+\]</span> If we wish to represent <span class="math inline">\(E(\mathbf{
+w})\)</span> in this way, all we need to do is convert the sum operator
+to an inner product. We can get a vector from that sum operator by
+placing both <span class="math inline">\(y_i\)</span> and <span
+class="math inline">\(f(\mathbf{ x}_i; \mathbf{ w})\)</span> into
+vectors, which we do by defining <span class="math display">\[
+\mathbf{ y}= \begin{bmatrix}y_1\\ y_2\\ \vdots \\ y_n\end{bmatrix}
+\]</span> and defining <span class="math display">\[
+\mathbf{ f}(\mathbf{ x}_1; \mathbf{ w}) = \begin{bmatrix}f(\mathbf{
+x}_1; \mathbf{ w})\\ f(\mathbf{ x}_2; \mathbf{ w})\\ \vdots \\
+f(\mathbf{ x}_n; \mathbf{ w})\end{bmatrix}.
+\]</span> The second of these is a vector-valued function. This term may
+appear intimidating, but the idea is straightforward. A vector valued
+function is simply a vector whose elements are themselves defined as
+<em>functions</em>, i.e., it is a vector of functions, rather than a
+vector of scalars. The idea is so straightforward, that we are going to
+ignore it for the moment, and barely use it in the derivation. But it
+will reappear later when we introduce <em>basis functions</em>. So, we
+will for the moment ignore the dependence of <span
+class="math inline">\(\mathbf{ f}\)</span> on <span
+class="math inline">\(\mathbf{ w}\)</span> and <span
+class="math inline">\(\boldsymbol{ \Phi}\)</span> and simply summarise
+it by a vector of numbers <span class="math display">\[
+\mathbf{ f}= \begin{bmatrix}f_1\\f_2\\
+\vdots \\ f_n\end{bmatrix}.
+\]</span> This allows us to write our objective in the folowing, linear
+algebraic form, <span class="math display">\[
+E(\mathbf{ w}) = (\mathbf{ y}- \mathbf{ f})^\top(\mathbf{ y}- \mathbf{
+f})
+\]</span> from the rules of inner products. But what of our matrix <span
+class="math inline">\(\boldsymbol{ \Phi}\)</span> of input data? At this
+point, we need to dust off <a
+href="http://en.wikipedia.org/wiki/Matrix_multiplication"><em>matrix-vector
+multiplication</em></a>. Matrix multiplication is simply a convenient
+way of performing many inner products together, and it’s exactly what we
+need to summarize the operation <span class="math display">\[
+f_i = \mathbf{ x}_i^\top\mathbf{ w}.
+\]</span> This operation tells us that each element of the vector <span
+class="math inline">\(\mathbf{ f}\)</span> (our vector valued function)
+is given by an inner product between <span
+class="math inline">\(\mathbf{ x}_i\)</span> and <span
+class="math inline">\(\mathbf{ w}\)</span>. In other words, it is a
+series of inner products. Let’s look at the definition of matrix
+multiplication, it takes the form <span class="math display">\[
+\mathbf{c} = \mathbf{B}\mathbf{a},
+\]</span> where <span class="math inline">\(\mathbf{c}\)</span> might be
+a <span class="math inline">\(k\)</span> dimensional vector (which we
+can interpret as a <span class="math inline">\(k\times 1\)</span>
+dimensional matrix), and <span class="math inline">\(\mathbf{B}\)</span>
+is a <span class="math inline">\(k\times k\)</span> dimensional matrix
+and <span class="math inline">\(\mathbf{a}\)</span> is a <span
+class="math inline">\(k\)</span> dimensional vector (<span
+class="math inline">\(k\times 1\)</span> dimensional matrix).</p>
+<p>The result of this multiplication is of the form <span
+class="math display">\[
 \begin{bmatrix}c_1\\c_2 \\ \vdots \\
-a_k\end{bmatrix} = 
+a_k\end{bmatrix} =
 \begin{bmatrix} b_{1,1} &amp; b_{1, 2} &amp; \dots &amp; b_{1, k} \\
 b_{2, 1} &amp; b_{2, 2} &amp; \dots &amp; b_{2, k} \\
 \vdots &amp; \vdots &amp; \ddots &amp; \vdots \\
-b_{k, 1} &amp; b_{k, 2} &amp; \dots &amp; b_{k, k} \end{bmatrix} \begin{bmatrix}a_1\\a_2 \\
-\vdots\\ c_k\end{bmatrix} = \begin{bmatrix} b_{1, 1}a_1 + b_{1, 2}a_2 + \dots +
+b_{k, 1} &amp; b_{k, 2} &amp; \dots &amp; b_{k, k} \end{bmatrix}
+\begin{bmatrix}a_1\\a_2 \\
+\vdots\\ c_k\end{bmatrix} = \begin{bmatrix} b_{1, 1}a_1 + b_{1, 2}a_2 +
+\dots +
 b_{1, k}a_k\\
-b_{2, 1}a_1 + b_{2, 2}a_2 + \dots + b_{2, k}a_k \\ 
+b_{2, 1}a_1 + b_{2, 2}a_2 + \dots + b_{2, k}a_k \\
 \vdots\\
-b_{k, 1}a_1 + b_{k, 2}a_2 + \dots + b_{k, k}a_k\end{bmatrix}
-$$</span><br /> so we see that each element of the result, <span class="math inline"><strong>a</strong></span> is simply the inner product between each <em>row</em> of <span class="math inline"><strong>B</strong></span> and the vector <span class="math inline"><strong>c</strong></span>. Because we have defined each element of <span class="math inline">$\mappingFunctionVector$</span> to be given by the inner product between each <em>row</em> of the design matrix and the vector <span class="math inline">$\mappingVector$</span> we now can write the full operation in one matrix multiplication, <br /><span class="math display">$$
-\mappingFunctionVector = \inputMatrix\mappingVector.
-$$</span><br /></p>
-<div class="sourceCode" id="cb17"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb18"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1"></a>f <span class="op">=</span> X<span class="op">@</span>w <span class="co"># The @ sign performs matrix multiplication</span></span></code></pre></div>
-<p>Combining this result with our objective function, <br /><span class="math display">$$
-\errorFunction(\mappingVector) = (\dataVector - \mappingFunctionVector)^\top(\dataVector - \mappingFunctionVector)
-$$</span><br /> we find we have defined the <em>model</em> with two equations. One equation tells us the form of our predictive function and how it depends on its parameters, the other tells us the form of our objective function.</p>
-<div class="sourceCode" id="cb19"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1"></a>resid <span class="op">=</span> (y<span class="op">-</span>f)</span>
-<span id="cb19-2"><a href="#cb19-2"></a>E <span class="op">=</span> np.dot(resid.T, resid) <span class="co"># matrix multiplication on a single vector is equivalent to a dot product.</span></span>
-<span id="cb19-3"><a href="#cb19-3"></a><span class="bu">print</span>(<span class="st">&quot;Error function is:&quot;</span>, E)</span></code></pre></div>
-<h3 id="exercise-0">Exercise 0</h3>
-<p>The prediction for our movie recommender system had the form <br /><span class="math display"><em>f</em><sub><em>i</em>, <em>j</em></sub> = <strong>u</strong><sub><em>i</em></sub><sup>⊤</sup><strong>v</strong><sub><em>j</em></sub></span><br /> and the objective function was then <br /><span class="math display">$$
-E = \sum_{i,j} s_{i,j}(\dataScalar_{i,j} - f_{i, j})^2
-$$</span><br /> Try writing this down in matrix and vector form. How many of the terms can you do? For each variable and parameter carefully think about whether it should be represented as a matrix or vector. Do as many of the terms as you can. Use <span class="math inline">$\LaTeX$</span> to give your answers and give the <em>dimensions</em> of any matrices you create.</p>
-<h1 id="objective-optimisation">Objective Optimisation</h1>
-<p>Our <em>model</em> has now been defined with two equations, the prediction function and the objective function. Next we will use multivariate calculus to define an <em>algorithm</em> to fit the model. The separation between model and algorithm is important and is often overlooked. Our model contains a function that shows how it will be used for prediction, and a function that describes the objective function we need to optimise to obtain a good set of parameters.</p>
-<p>The model linear regression model we have described is still the same as the one we fitted above with a coordinate ascent algorithm. We have only played with the notation to obtain the same model in a matrix and vector notation. However, we will now fit this model with a different algorithm, one that is much faster. It is such a widely used algorithm that from the end user’s perspective it doesn’t even look like an algorithm, it just appears to be a single operation (or function). However, underneath the computer calls an algorithm to find the solution. Further, the algorithm we obtain is very widely used, and because of this it turns out to be highly optimised.</p>
-<p>Once again we are going to try and find the stationary points of our objective by finding the <em>stationary points</em>. However, the stationary points of a multivariate function, are a little bit more complext to find. Once again we need to find the point at which the derivative is zero, but now we need to use <em>multivariate calculus</em> to find it. This involves learning a few additional rules of differentiation (that allow you to do the derivatives of a function with respect to vector), but in the end it makes things quite a bit easier. We define vectorial derivatives as follows, <br /><span class="math display">$$
-\frac{\text{d}\errorFunction(\mappingVector)}{\text{d}\mappingVector} =
-\begin{bmatrix}\frac{\text{d}\errorFunction(\mappingVector)}{\text{d}\mappingScalar_1}\\\frac{\text{d}\errorFunction(\mappingVector)}{\text{d}\mappingScalar_2}\end{bmatrix}.
-$$</span><br /> where <span class="math inline">$\frac{\text{d}\errorFunction(\mappingVector)}{\text{d}\mappingScalar_1}$</span> is the <a href="http://en.wikipedia.org/wiki/Partial_derivative">partial derivative</a> of the error function with respect to <span class="math inline">$\mappingScalar_1$</span>.</p>
-<p>Differentiation through multiplications and additions is relatively straightforward, and since linear algebra is just multiplication and addition, then its rules of diffentiation are quite straightforward too, but slightly more complex than regular derivatives.</p>
+b_{k, 1}a_1 + b_{k, 2}a_2 + \dots + b_{k, k}a_k\end{bmatrix}.
+\]</span> We see that each element of the result, <span
+class="math inline">\(\mathbf{a}\)</span> is simply the inner product
+between each <em>row</em> of <span
+class="math inline">\(\mathbf{B}\)</span> and the vector <span
+class="math inline">\(\mathbf{c}\)</span>. Because we have defined each
+element of <span class="math inline">\(\mathbf{ f}\)</span> to be given
+by the inner product between each <em>row</em> of the design matrix and
+the vector <span class="math inline">\(\mathbf{ w}\)</span> we now can
+write the full operation in one matrix multiplication,</p>
+<p><span class="math display">\[
+\mathbf{ f}= \boldsymbol{ \Phi}\mathbf{ w}.
+\]</span></p>
+<div class="sourceCode" id="cb14"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb15"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>f <span class="op">=</span> Phi<span class="op">@</span>w <span class="co"># The @ sign performs matrix multiplication</span></span></code></pre></div>
+<p>Combining this result with our objective function, <span
+class="math display">\[
+E(\mathbf{ w}) = (\mathbf{ y}- \mathbf{ f})^\top(\mathbf{ y}- \mathbf{
+f})
+\]</span> we find we have defined the <em>model</em> with two equations.
+One equation tells us the form of our predictive function and how it
+depends on its parameters, the other tells us the form of our objective
+function.</p>
+<div class="sourceCode" id="cb16"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>resid <span class="op">=</span> (y<span class="op">-</span>f)</span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>E <span class="op">=</span> np.dot(resid.T, resid) <span class="co"># matrix multiplication on a single vector is equivalent to a dot product.</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&quot;Error function is:&quot;</span>, E)</span></code></pre></div>
+<h1 id="objective-optimization">Objective Optimization</h1>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-objective-optimisation.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-objective-optimisation.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Our <em>model</em> has now been defined with two equations: the
+prediction function and the objective function. Now we will use
+multivariate calculus to define an <em>algorithm</em> to fit the model.
+The separation between model and algorithm is important and is often
+overlooked. Our model contains a function that shows how it will be used
+for prediction, and a function that describes the objective function we
+need to optimize to obtain a good set of parameters.</p>
+<p>The model linear regression model we have described is still the same
+as the one we fitted above with a coordinate ascent algorithm. We have
+only played with the notation to obtain the same model in a matrix and
+vector notation. However, we will now fit this model with a different
+algorithm, one that is much faster. It is such a widely used algorithm
+that from the end user’s perspective it doesn’t even look like an
+algorithm, it just appears to be a single operation (or function).
+However, underneath the computer calls an algorithm to find the
+solution. Further, the algorithm we obtain is very widely used, and
+because of this it turns out to be highly optimized.</p>
+<p>Once again, we are going to try and find the stationary points of our
+objective by finding the <em>stationary points</em>. However, the
+stationary points of a multivariate function, are a little bit more
+complex to find. As before we need to find the point at which the
+gradient is zero, but now we need to use <em>multivariate calculus</em>
+to find it. This involves learning a few additional rules of
+differentiation (that allow you to do the derivatives of a function with
+respect to vector), but in the end it makes things quite a bit easier.
+We define vectorial derivatives as follows, <span
+class="math display">\[
+\frac{\text{d}E(\mathbf{ w})}{\text{d}\mathbf{ w}} =
+\begin{bmatrix}\frac{\text{d}E(\mathbf{
+w})}{\text{d}w_1}\\\frac{\text{d}E(\mathbf{
+w})}{\text{d}w_2}\end{bmatrix}.
+\]</span> where <span class="math inline">\(\frac{\text{d}E(\mathbf{
+w})}{\text{d}w_1}\)</span> is the <a
+href="http://en.wikipedia.org/wiki/Partial_derivative">partial
+derivative</a> of the error function with respect to <span
+class="math inline">\(w_1\)</span>.</p>
+<p>Differentiation through multiplications and additions is relatively
+straightforward, and since linear algebra is just multiplication and
+addition, then its rules of differentiation are quite straightforward
+too, but slightly more complex than regular derivatives.</p>
 <h2 id="multivariate-derivatives">Multivariate Derivatives</h2>
-<p>We will need two rules of multivariate or <em>matrix</em> differentiation. The first is diffentiation of an inner product. By remembering that the inner product is made up of multiplication and addition, we can hope that its derivative is quite straightforward, and so it proves to be. We can start by thinking about the definition of the inner product, <br /><span class="math display"><strong>a</strong><sup>⊤</sup><strong>z</strong> = ∑<sub><em>i</em></sub><em>a</em><sub><em>i</em></sub><em>z</em><sub><em>i</em></sub>,</span><br /> which if we were to take the derivative with respect to <span class="math inline"><em>z</em><sub><em>k</em></sub></span> would simply return the gradient of the one term in the sum for which the derivative was non zero, that of <span class="math inline"><em>a</em><sub><em>k</em></sub></span>, so we know that <br /><span class="math display">$$
+<p>We will need two rules of multivariate or <em>matrix</em>
+differentiation. The first is differentiation of an inner product. By
+remembering that the inner product is made up of multiplication and
+addition, we can hope that its derivative is quite straightforward, and
+so it proves to be. We can start by thinking about the definition of the
+inner product, <span class="math display">\[
+\mathbf{a}^\top\mathbf{z} = \sum_{i} a_i
+z_i,
+\]</span> which if we were to take the derivative with respect to <span
+class="math inline">\(z_k\)</span> would simply return the gradient of
+the one term in the sum for which the derivative was non-zero, that of
+<span class="math inline">\(a_k\)</span>, so we know that <span
+class="math display">\[
 \frac{\text{d}}{\text{d}z_k} \mathbf{a}^\top \mathbf{z} = a_k
-$$</span><br /> and by our definition of multivariate derivatives we can simply stack all the partial derivatives of this form in a vector to obtain the result that <br /><span class="math display">$$
+\]</span> and by our definition for multivariate derivatives, we can
+simply stack all the partial derivatives of this form in a vector to
+obtain the result that <span class="math display">\[
 \frac{\text{d}}{\text{d}\mathbf{z}}
 \mathbf{a}^\top \mathbf{z} = \mathbf{a}.
-$$</span><br /> The second rule that’s required is differentiation of a ‘matrix quadratic’. A scalar quadratic in <span class="math inline"><em>z</em></span> with coefficient <span class="math inline"><em>c</em></span> has the form <span class="math inline"><em>c</em><em>z</em><sup>2</sup></span>. If <span class="math inline"><strong>z</strong></span> is a <span class="math inline"><em>k</em> × 1</span> vector and <span class="math inline"><strong>C</strong></span> is a <span class="math inline"><em>k</em> × <em>k</em></span> <em>matrix</em> of coefficients then the matrix quadratic form is written as <span class="math inline"><strong>z</strong><sup>⊤</sup><strong>C</strong><strong>z</strong></span>, which is itself a <em>scalar</em> quantity, but it is a function of a <em>vector</em>.</p>
-<h3 id="matching-dimensions-in-matrix-multiplications">Matching Dimensions in Matrix Multiplications</h3>
-<p>There’s a trick for telling that it’s a scalar result. When you are doing maths with matrices, it’s always worth pausing to perform a quick sanity check on the dimensions. Matrix multplication only works when the dimensions match. To be precise, the ‘inner’ dimension of the matrix must match. What is the inner dimension. If we multiply two matrices <span class="math inline"><strong>A</strong></span> and <span class="math inline"><strong>B</strong></span>, the first of which has <span class="math inline"><em>k</em></span> rows and <span class="math inline">ℓ</span> columns and the second of which has <span class="math inline"><em>p</em></span> rows and <span class="math inline"><em>q</em></span> columns, then we can check whether the multiplication works by writing the dimensionalities next to each other, <br /><span class="math display">$$
+\]</span> The second rule that’s required is differentiation of a
+‘matrix quadratic’. A scalar quadratic in <span
+class="math inline">\(z\)</span> with coefficient <span
+class="math inline">\(c\)</span> has the form <span
+class="math inline">\(cz^2\)</span>. If <span
+class="math inline">\(\mathbf{z}\)</span> is a <span
+class="math inline">\(k\times 1\)</span> vector and <span
+class="math inline">\(\mathbf{C}\)</span> is a <span
+class="math inline">\(k \times k\)</span> <em>matrix</em> of
+coefficients then the matrix quadratic form is written as <span
+class="math inline">\(\mathbf{z}^\top \mathbf{C}\mathbf{z}\)</span>,
+which is itself a <em>scalar</em> quantity, but it is a function of a
+<em>vector</em>.</p>
+<h3 id="matching-dimensions-in-matrix-multiplications">Matching
+Dimensions in Matrix Multiplications</h3>
+<p>There’s a trick for telling a multiplication leads to a scalar
+result. When you are doing mathematics with matrices, it’s always worth
+pausing to perform a quick sanity check on the dimensions. Matrix
+multplication only works when the dimensions match. To be precise, the
+‘inner’ dimension of the matrix must match. What is the inner dimension?
+If we multiply two matrices <span
+class="math inline">\(\mathbf{A}\)</span> and <span
+class="math inline">\(\mathbf{B}\)</span>, the first of which has <span
+class="math inline">\(k\)</span> rows and <span
+class="math inline">\(\ell\)</span> columns and the second of which has
+<span class="math inline">\(p\)</span> rows and <span
+class="math inline">\(q\)</span> columns, then we can check whether the
+multiplication works by writing the dimensionalities next to each other,
+<span class="math display">\[
 \mathbf{A} \mathbf{B} \rightarrow (k \times
-\underbrace{\ell)(p}_\text{inner dimensions} \times q) \rightarrow (k\times q).
-$$</span><br /> The inner dimensions are the two inside dimensions, <span class="math inline">ℓ</span> and <span class="math inline"><em>p</em></span>. The multiplication will only work if <span class="math inline">ℓ = <em>p</em></span>. The result of the multiplication will then be a <span class="math inline"><em>k</em> × <em>q</em></span> matrix: this dimensionality comes from the ‘outer dimensions’. Note that matrix multiplication is not <a href="http://en.wikipedia.org/wiki/Commutative_property"><em>commutative</em></a>. And if you change the order of the multiplication, <br /><span class="math display">$$
-\mathbf{B} \mathbf{A} \rightarrow (\ell \times \underbrace{k)(q}_\text{inner dimensions} \times p) \rightarrow (\ell \times p).
-$$</span><br /> firstly it may no longer even work, because now the condition is that <span class="math inline"><em>k</em> = <em>q</em></span>, and secondly the result could be of a different dimensionality. An exception is if the matrices are square matrices (e.g. same number of rows as columns) and they are both <em>symmetric</em>. A symmetric matrix is one for which <span class="math inline"><strong>A</strong> = <strong>A</strong><sup>⊤</sup></span>, or equivalently, <span class="math inline"><em>a</em><sub><em>i</em>, <em>j</em></sub> = <em>a</em><sub><em>j</em>, <em>i</em></sub></span> for all <span class="math inline"><em>i</em></span> and <span class="math inline"><em>j</em></span>.</p>
-<p>You will need to get used to working with matrices and vectors applying and developing new machine learning techniques. You should have come across them before, but you may not have used them as extensively as we will now do in this course. You should get used to using this trick to check your work and ensure you know what the dimension of an output matrix should be. For our matrix quadratic form, it turns out that we can see it as a special type of inner product. <br /><span class="math display">$$
+\underbrace{\ell)(p}_\text{inner dimensions} \times q) \rightarrow
+(k\times q).
+\]</span> The inner dimensions are the two inside dimensions, <span
+class="math inline">\(\ell\)</span> and <span
+class="math inline">\(p\)</span>. The multiplication will only work if
+<span class="math inline">\(\ell=p\)</span>. The result of the
+multiplication will then be a <span class="math inline">\(k\times
+q\)</span> matrix: this dimensionality comes from the ‘outer
+dimensions’. Note that matrix multiplication is not <a
+href="http://en.wikipedia.org/wiki/Commutative_property"><em>commutative</em></a>.
+And if you change the order of the multiplication, <span
+class="math display">\[
+\mathbf{B} \mathbf{A} \rightarrow (\ell \times
+\underbrace{k)(q}_\text{inner dimensions} \times p) \rightarrow (\ell
+\times p).
+\]</span> Firstly, it may no longer even work, because now the condition
+is that <span class="math inline">\(k=q\)</span>, and secondly the
+result could be of a different dimensionality. An exception is if the
+matrices are square matrices (e.g., same number of rows as columns) and
+they are both <em>symmetric</em>. A symmetric matrix is one for which
+<span class="math inline">\(\mathbf{A}=\mathbf{A}^\top\)</span>, or
+equivalently, <span class="math inline">\(a_{i,j} = a_{j,i}\)</span> for
+all <span class="math inline">\(i\)</span> and <span
+class="math inline">\(j\)</span>.</p>
+<p>For applying and developing machine learning algorithms you should
+get familiar with working with matrices and vectors. You should have
+come across them before, but you may not have used them as extensively
+as we are doing now. It’s worth getting used to using this trick to
+check your work and ensure you know what the dimension of an output
+matrix should be. For our matrix quadratic form, it turns out that we
+can see it as a special type of inner product. <span
+class="math display">\[
 \mathbf{z}^\top\mathbf{C}\mathbf{z} \rightarrow (1\times
-\underbrace{k) (k}_\text{inner dimensions}\times k) (k\times 1) \rightarrow
+\underbrace{k) (k}_\text{inner dimensions}\times k) (k\times 1)
+\rightarrow
 \mathbf{b}^\top\mathbf{z}
-$$</span><br /> where <span class="math inline"><strong>b</strong> = <strong>C</strong><strong>z</strong></span> so therefore the result is a scalar, <br /><span class="math display">$$
+\]</span> where <span class="math inline">\(\mathbf{b} =
+\mathbf{C}\mathbf{z}\)</span> so therefore the result is a scalar, <span
+class="math display">\[
 \mathbf{b}^\top\mathbf{z} \rightarrow
 (1\times \underbrace{k) (k}_\text{inner dimensions}\times 1) \rightarrow
 (1\times 1)
-$$</span><br /> where a <span class="math inline">(1 × 1)</span> matrix is recognised as a scalar.</p>
-<p>This implies that we should be able to differentiate this form, and indeed the rule for its differentiation is slightly more complex than the inner product, but still quite simple, <br /><span class="math display">$$
+\]</span> where a <span class="math inline">\((1\times 1)\)</span>
+matrix is recognised as a scalar.</p>
+<p>This implies that we should be able to differentiate this form, and
+indeed the rule for its differentiation is slightly more complex than
+the inner product, but still quite simple, <span class="math display">\[
 \frac{\text{d}}{\text{d}\mathbf{z}}
-\mathbf{z}^\top\mathbf{C}\mathbf{z}= \mathbf{C}\mathbf{z} + \mathbf{C}^\top
+\mathbf{z}^\top\mathbf{C}\mathbf{z}= \mathbf{C}\mathbf{z} +
+\mathbf{C}^\top
 \mathbf{z}.
-$$</span><br /> Note that in the special case where <span class="math inline"><strong>C</strong></span> is symmetric then we have <span class="math inline"><strong>C</strong> = <strong>C</strong><sup>⊤</sup></span> and the derivative simplifies to <br /><span class="math display">$$
+\]</span> Note that in the special case where <span
+class="math inline">\(\mathbf{C}\)</span> is symmetric then we have
+<span class="math inline">\(\mathbf{C} = \mathbf{C}^\top\)</span> and
+the derivative simplifies to <span class="math display">\[
 \frac{\text{d}}{\text{d}\mathbf{z}} \mathbf{z}^\top\mathbf{C}\mathbf{z}=
 2\mathbf{C}\mathbf{z}.
-$$</span><br /></p>
+\]</span></p>
 <h2 id="differentiate-the-objective">Differentiate the Objective</h2>
-<p>First, we need to compute the full objective by substituting our prediction function into the objective function to obtain the objective in terms of <span class="math inline">$\mappingVector$</span>. Doing this we obtain <br /><span class="math display">$$
-\errorFunction(\mappingVector)= (\dataVector - \inputMatrix\mappingVector)^\top (\dataVector - \inputMatrix\mappingVector).
-$$</span><br /> We now need to differentiate this <em>quadratic form</em> to find the minimum. We differentiate with respect to the <em>vector</em> <span class="math inline">$\mappingVector$</span>. But before we do that, we’ll expand the brackets in the quadratic form to obtain a series of scalar terms. The rules for bracket expansion across the vectors are similar to those for the scalar system giving, <br /><span class="math display">(<strong>a</strong> − <strong>b</strong>)<sup>⊤</sup>(<strong>c</strong> − <strong>d</strong>) = <strong>a</strong><sup>⊤</sup><strong>c</strong> − <strong>a</strong><sup>⊤</sup><strong>d</strong> − <strong>b</strong><sup>⊤</sup><strong>c</strong> + <strong>b</strong><sup>⊤</sup><strong>d</strong></span><br /> which substituting for <span class="math inline">$\mathbf{a} = \mathbf{c} = \dataVector$</span> and <span class="math inline">$\mathbf{b}=\mathbf{d} = \inputMatrix\mappingVector$</span> gives <br /><span class="math display">$$
-\errorFunction(\mappingVector)=
-\dataVector^\top\dataVector - 2\dataVector^\top\inputMatrix\mappingVector +
-\mappingVector^\top\inputMatrix^\top\inputMatrix\mappingVector
-$$</span><br /> where we used the fact that <span class="math inline">$\dataVector^\top\inputMatrix\mappingVector=\mappingVector^\top\inputMatrix^\top\dataVector$</span>. Now we can use our rules of differentiation to compute the derivative of this form, which is, <br /><span class="math display">$$
-\frac{\text{d}}{\text{d}\mappingVector}\errorFunction(\mappingVector)=- 2\inputMatrix^\top \dataVector +
-2\inputMatrix^\top\inputMatrix\mappingVector,
-$$</span><br /> where we have exploited the fact that <span class="math inline">$\inputMatrix^\top\inputMatrix$</span> is symmetric to obtain this result.</p>
-<h3 id="exercise-0-1">Exercise 0</h3>
-<p>Use the equivalence between our vector and our matrix formulations of linear regression, alongside our definition of vector derivates, to match the gradients we’ve computed directly for <span class="math inline">$\frac{\text{d}\errorFunction(c, m)}{\text{d}c}$</span> and <span class="math inline">$\frac{\text{d}\errorFunction(c, m)}{\text{d}m}$</span> to those for <span class="math inline">$\frac{\text{d}\errorFunction(\mappingVector)}{\text{d}\mappingVector}$</span>.</p>
-<h1 id="update-equation-for-global-optimum">Update Equation for Global Optimum</h1>
-<p>Once again, we need to find the minimum of our objective function. Using our likelihood for multiple input regression we can now minimize for our parameter vector <span class="math inline">$\mappingVector$</span>. Firstly, just as in the single input case, we seek stationary points by find parameter vectors that solve for when the gradients are zero, <br /><span class="math display">$$
-\mathbf{0}=- 2\inputMatrix^\top
-\dataVector + 2\inputMatrix^\top\inputMatrix\mappingVector,
-$$</span><br /> where <span class="math inline"><strong>0</strong></span> is a <em>vector</em> of zeros. Rearranging this equation we find the solution to be <br /><span class="math display">$$
-\mappingVector = \left[\inputMatrix^\top \inputMatrix\right]^{-1} \inputMatrix^\top
-\dataVector
-$$</span><br /> where <span class="math inline"><strong>A</strong><sup> − 1</sup></span> denotes <a href="http://en.wikipedia.org/wiki/Invertible_matrix"><em>matrix inverse</em></a>.</p>
-<h2 id="solving-the-multivariate-system">Solving the Multivariate System</h2>
-<p>The solution for <span class="math inline">$\mappingVector$</span> is given in terms of a matrix inverse, but computation of a matrix inverse requires, in itself, an algorithm to resolve it. You’ll know this if you had to invert, by hand, a <span class="math inline">3 × 3</span> matrix in high school. From a numerical stability perspective, it is also best not to compute the matrix inverse directly, but rather to ask the computer to <em>solve</em> the system of linear equations given by <br /><span class="math display">$$\inputMatrix^\top\inputMatrix \mappingVector = \inputMatrix^\top\dataVector$$</span><br /> for <span class="math inline">$\mappingVector$</span>. This can be done in <code>numpy</code> using the command</p>
-<div class="sourceCode" id="cb20"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb21"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1"></a>np.linalg.solve?</span></code></pre></div>
-<p>so we can obtain the solution using</p>
-<div class="sourceCode" id="cb22"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1"></a>w <span class="op">=</span> np.linalg.solve(X.T<span class="op">@</span>X, X.T<span class="op">@</span>y)</span>
-<span id="cb22-2"><a href="#cb22-2"></a><span class="bu">print</span>(w)</span></code></pre></div>
-<p>We can map it back to the liner regression and plot the fit as follows</p>
-<h2 id="multivariate-linear-regression">Multivariate Linear Regression</h2>
-<p>A major advantage of the new system is that we can build a linear regression on a multivariate system. The matrix calculus didn’t specify what the length of the vector <span class="math inline">$\inputVector$</span> should be, or equivalently the size of the design matrix.</p>
+<p>First, we need to compute the full objective by substituting our
+prediction function into the objective function to obtain the objective
+in terms of <span class="math inline">\(\mathbf{ w}\)</span>. Doing this
+we obtain <span class="math display">\[
+E(\mathbf{ w})= (\mathbf{ y}- \boldsymbol{ \Phi}\mathbf{ w})^\top
+(\mathbf{ y}- \boldsymbol{ \Phi}\mathbf{ w}).
+\]</span> We now need to differentiate this <em>quadratic form</em> to
+find the minimum. We differentiate with respect to the <em>vector</em>
+<span class="math inline">\(\mathbf{ w}\)</span>. But before we do that,
+we’ll expand the brackets in the quadratic form to obtain a series of
+scalar terms. The rules for bracket expansion across the vectors are
+similar to those for the scalar system giving, <span
+class="math display">\[
+(\mathbf{a} - \mathbf{b})^\top
+(\mathbf{c} - \mathbf{d}) = \mathbf{a}^\top \mathbf{c} - \mathbf{a}^\top
+\mathbf{d} - \mathbf{b}^\top \mathbf{c} + \mathbf{b}^\top \mathbf{d}
+\]</span> which substituting for <span class="math inline">\(\mathbf{a}
+= \mathbf{c} = \mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{b}=\mathbf{d} = \boldsymbol{ \Phi}\mathbf{
+w}\)</span> gives <span class="math display">\[
+E(\mathbf{ w})=
+\mathbf{ y}^\top\mathbf{ y}- 2\mathbf{ y}^\top\boldsymbol{ \Phi}\mathbf{
+w}+
+\mathbf{ w}^\top\boldsymbol{ \Phi}^\top\boldsymbol{ \Phi}\mathbf{ w}
+\]</span> where we used the fact that <span
+class="math inline">\(\mathbf{ y}^\top\boldsymbol{ \Phi}\mathbf{
+w}=\mathbf{ w}^\top\boldsymbol{ \Phi}^\top\mathbf{ y}\)</span>.</p>
+<p>Now we can use our rules of differentiation to compute the derivative
+of this form, which is, <span class="math display">\[
+\frac{\text{d}}{\text{d}\mathbf{ w}}E(\mathbf{ w})=- 2\boldsymbol{
+\Phi}^\top \mathbf{ y}+
+2\boldsymbol{ \Phi}^\top\boldsymbol{ \Phi}\mathbf{ w},
+\]</span> where we have exploited the fact that <span
+class="math inline">\(\boldsymbol{ \Phi}^\top\boldsymbol{ \Phi}\)</span>
+is symmetric to obtain this result.</p>
+<h3 id="exercise-1">Exercise 1</h3>
+<p>Use the equivalence between our vector and our matrix formulations of
+linear regression, alongside our definition of vector derivates, to
+match the gradients we’ve computed directly for <span
+class="math inline">\(\frac{\text{d}E(c, m)}{\text{d}c}\)</span> and
+<span class="math inline">\(\frac{\text{d}E(c, m)}{\text{d}m}\)</span>
+to those for <span class="math inline">\(\frac{\text{d}E(\mathbf{
+w})}{\text{d}\mathbf{ w}}\)</span>.</p>
+<h1 id="update-equation-for-global-optimum">Update Equation for Global
+Optimum</h1>
+<p>We need to find the minimum of our objective function. Using our
+objective function, we can minimize for our parameter vector <span
+class="math inline">\(\mathbf{ w}\)</span>. Firstly, we seek stationary
+points by find parameter vectors that solve for when the gradients are
+zero, <span class="math display">\[
+\mathbf{0}=- 2\boldsymbol{ \Phi}^\top
+\mathbf{ y}+ 2\boldsymbol{ \Phi}^\top\boldsymbol{ \Phi}\mathbf{ w},
+\]</span> where <span class="math inline">\(\mathbf{0}\)</span> is a
+<em>vector</em> of zeros. Rearranging this equation, we find the
+solution to be <span class="math display">\[
+\boldsymbol{ \Phi}^\top \boldsymbol{ \Phi}\mathbf{ w}= \boldsymbol{
+\Phi}^\top
+\mathbf{ y}
+\]</span> which is a matrix equation of the familiar form <span
+class="math inline">\(\mathbf{A}\mathbf{x} = \mathbf{b}\)</span>.</p>
+<h2 id="solving-the-multivariate-system">Solving the Multivariate
+System</h2>
+<p>The solution for <span class="math inline">\(\mathbf{ w}\)</span> can
+be written mathematically in terms of a matrix inverse of <span
+class="math inline">\(\boldsymbol{ \Phi}^\top\boldsymbol{
+\Phi}\)</span>, but computation of a matrix inverse requires an
+algorithm to resolve it. You’ll know this if you had to invert, by hand,
+a <span class="math inline">\(3\times 3\)</span> matrix in high school.
+From a numerical stability perspective, it is also best not to compute
+the matrix inverse directly, but rather to ask the computer to
+<em>solve</em> the system of linear equations given by <span
+class="math display">\[
+\boldsymbol{ \Phi}^\top\boldsymbol{ \Phi}\mathbf{ w}= \boldsymbol{
+\Phi}^\top\mathbf{ y}
+\]</span> for <span class="math inline">\(\mathbf{ w}\)</span>.</p>
+<h2 id="multivariate-linear-regression">Multivariate Linear
+Regression</h2>
+<p>A major advantage of the new system is that we can build a linear
+regression on a multivariate system. The matrix calculus didn’t specify
+what the length of the vector <span class="math inline">\(\mathbf{
+x}\)</span> should be, or equivalently the size of the design
+matrix.</p>
 <h2 id="movie-body-count-data">Movie Body Count Data</h2>
-<p>Let’s consider the movie body count data.</p>
-<div class="sourceCode" id="cb23"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb24"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1"></a>data <span class="op">=</span> pods.datasets.movie_body_count()</span>
-<span id="cb24-2"><a href="#cb24-2"></a>movies <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span></code></pre></div>
-<p>Let’s remind ourselves of the features we’ve been provided with.</p>
-<div class="sourceCode" id="cb25"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1"></a><span class="bu">print</span>(<span class="st">&#39;, &#39;</span>.join(movies.columns))</span></code></pre></div>
-<p>Now we will build a design matrix based on the numeric features: year, Body_Count, Length_Minutes in an effort to predict the rating. We build the design matrix as follows:</p>
-<h2 id="relation-to-single-input-system">Relation to Single Input System</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_datasets/includes/movie-body-count-data.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/movie-body-count-data.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>This is a data set created by Simon Garnier and Rany Olson for
+exploring the differences between R and Python for data science. The
+data contains information about different movies augmented by estimates
+about how many on-screen deaths are contained in the movie. The data is
+craped from <a href="http://www.moviebodycounts.com"
+class="uri">http://www.moviebodycounts.com</a>. The data contains the
+following featuers for each movie: <code>Year</code>,
+<code>Body_Count</code>, <code>MPAA_Rating</code>, <code>Genre</code>,
+<code>Director</code>, <code>Actors</code>, <code>Length_Minutes</code>,
+<code>IMDB_Rating</code>.</p>
+<div class="sourceCode" id="cb17"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb18"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.movie_body_count()</span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>movies <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span></code></pre></div>
+<p>The data is provided to us in the form of a pandas data frame, we can
+see the features we’re provided with by inspecting the columns of the
+data frame.</p>
+<div class="sourceCode" id="cb19"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">&#39;, &#39;</span>.join(movies.columns))</span></code></pre></div>
+<h2 id="multivariate-regression-on-movie-body-count-data">Multivariate
+Regression on Movie Body Count Data</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/movie-body-count-linear-regression.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/movie-body-count-linear-regression.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Now we will build a design matrix based on the numeric features:
+year, Body_Count, Length_Minutes in an effort to predict the rating. We
+build the design matrix as follows:</p>
 <p>Bias as an additional feature.</p>
-<div class="sourceCode" id="cb26"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1"></a>select_features <span class="op">=</span> [<span class="st">&#39;Year&#39;</span>, <span class="st">&#39;Body_Count&#39;</span>, <span class="st">&#39;Length_Minutes&#39;</span>]</span>
-<span id="cb26-2"><a href="#cb26-2"></a>X <span class="op">=</span> movies[select_features]</span>
-<span id="cb26-3"><a href="#cb26-3"></a>X[<span class="st">&#39;Eins&#39;</span>] <span class="op">=</span> <span class="dv">1</span> <span class="co"># add a column for the offset</span></span>
-<span id="cb26-4"><a href="#cb26-4"></a>y <span class="op">=</span> movies[[<span class="st">&#39;IMDB_Rating&#39;</span>]]</span></code></pre></div>
-<p>Now let’s perform a linear regression. But this time, we will create a pandas data frame for the result so we can store it in a form that we can visualise easily.</p>
-<div class="sourceCode" id="cb27"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
-<div class="sourceCode" id="cb28"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1"></a>w <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.linalg.solve(X.T<span class="op">@</span>X, X.T<span class="op">@</span>y),  <span class="co"># solve linear regression here</span></span>
-<span id="cb28-2"><a href="#cb28-2"></a>                 index <span class="op">=</span> X.columns,  <span class="co"># columns of X become rows of w</span></span>
-<span id="cb28-3"><a href="#cb28-3"></a>                 columns<span class="op">=</span>[<span class="st">&#39;regression_coefficient&#39;</span>]) <span class="co"># the column of X is the value of regression coefficient</span></span></code></pre></div>
-<p>We can check the residuals to see how good our estimates are</p>
-<div class="sourceCode" id="cb29"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1"></a>(y <span class="op">-</span> X<span class="op">@</span>w).hist()</span></code></pre></div>
-<p>Which shows our model <em>hasn’t</em> yet done a great job of representation, because the spread of values is large. We can check what the rating is dominated by in terms of regression coefficients.</p>
-<div class="sourceCode" id="cb30"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1"></a>w</span></code></pre></div>
-<p>Although we have to be a little careful about interpretation because our input values live on different scales, however it looks like we are dominated by the bias, with a small negative effect for later films (but bear in mind the years are large, so this effect is probably larger than it looks) and a positive effect for length. So it looks like long earlier films generally do better, but the residuals are so high that we probably haven’t modelled the system very well.</p>
+<div class="sourceCode" id="cb20"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>select_features <span class="op">=</span> [<span class="st">&#39;Year&#39;</span>, <span class="st">&#39;Body_Count&#39;</span>, <span class="st">&#39;Length_Minutes&#39;</span>]</span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> movies[select_features]</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>Phi[<span class="st">&#39;Eins&#39;</span>] <span class="op">=</span> <span class="dv">1</span> <span class="co"># add a column for the offset</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> movies[[<span class="st">&#39;IMDB_Rating&#39;</span>]]</span></code></pre></div>
+<p>Now let’s perform a linear regression. But this time, we will create
+a pandas data frame for the result so we can store it in a form that we
+can visualise easily.</p>
+<div class="sourceCode" id="cb21"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code></pre></div>
+<div class="sourceCode" id="cb22"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>w <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>np.linalg.solve(Phi.T<span class="op">@</span>Phi, Phi.T<span class="op">@</span>y),  <span class="co"># solve linear regression here</span></span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>                 index <span class="op">=</span> Phi.columns,  <span class="co"># columns of Phi become rows of w</span></span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>                 columns<span class="op">=</span>[<span class="st">&#39;regression_coefficient&#39;</span>]) <span class="co"># the column of Phi is the value of regression coefficient</span></span></code></pre></div>
+<p>We can check the residuals to see how good our estimates are. First
+we create a pandas data frame containing the predictions and use it to
+compute the residuals.</p>
+<div class="sourceCode" id="cb23"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>ypred <span class="op">=</span> pd.DataFrame(data<span class="op">=</span>(Phi<span class="op">@</span>w).values, columns<span class="op">=</span>[<span class="st">&#39;IMDB_Rating&#39;</span>])</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>resid <span class="op">=</span> y<span class="op">-</span>ypred</span></code></pre></div>
+<div class="figure">
+<div id="movie-body-count-residuals-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/movie-body-count-rating-residuals.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="movie-body-count-residuals-magnify" class="magnify"
+onclick="magnifyFigure(&#39;movie-body-count-residuals&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="movie-body-count-residuals-caption" class="caption-frame">
+<p>Figure: Residual values for the ratings from the prediction of the
+movie rating given the data from the film.</p>
+</div>
+</div>
+<p>Which shows our model <em>hasn’t</em> yet done a great job of
+representation, because the spread of values is large. We can check what
+the rating is dominated by in terms of regression coefficients.</p>
+<div class="sourceCode" id="cb24"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>w</span></code></pre></div>
+<p>Although we have to be a little careful about interpretation because
+our input values live on different scales, however it looks like we are
+dominated by the bias, with a small negative effect for later films (but
+bear in mind the years are large, so this effect is probably larger than
+it looks) and a positive effect for length. So it looks like long
+earlier films generally do better, but the residuals are so high that we
+probably haven’t modelled the system very well.</p>
 <h1 id="underdetermined-system">Underdetermined System</h1>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/underdetermined-system.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/underdetermined-system.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>What about the situation where you have more parameters than data in your simultaneous equation? This is known as an <em>underdetermined</em> system. In fact this set up is in some sense <em>easier</em> to solve, because we don’t need to think about introducing a slack variable (although it might make a lot of sense from a <em>modelling</em> perspective to do so).</p>
-<p>The way Laplace proposed resolving an overdetermined system, was to introduce slack variables, <span class="math inline">$\noiseScalar_i$</span>, which needed to be estimated for each point. The slack variable represented the difference between our actual prediction and the true observation. This is known as the <em>residual</em>. By introducing the slack variable we now have an additional <span class="math inline"><em>n</em></span> variables to estimate, one for each data point, <span class="math inline">$\{\noiseScalar_i\}$</span>. This actually turns the overdetermined system into an underdetermined system. Introduction of <span class="math inline"><em>n</em></span> variables, plus the original <span class="math inline"><em>m</em></span> and <span class="math inline"><em>c</em></span> gives us <span class="math inline">$\numData+2$</span> parameters to be estimated from <span class="math inline"><em>n</em></span> observations, which actually makes the system <em>underdetermined</em>. However, we then made a probabilistic assumption about the slack variables, we assumed that the slack variables were distributed according to a probability density. And for the moment we have been assuming that density was the Gaussian, <br /><span class="math display">$$\noiseScalar_i \sim \gaussianSamp{0}{\dataStd^2},$$</span><br /> with zero mean and variance <span class="math inline">$\dataStd^2$</span>.</p>
-<p>The follow up question is whether we can do the same thing with the parameters. If we have two parameters and only one unknown can we place a probability distribution over the parameters, as we did with the slack variables? The answer is yes.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/underdetermined-system.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/underdetermined-system.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>What about the situation where you have more parameters than data in
+your simultaneous equation? This is known as an <em>underdetermined</em>
+system. In fact, this set up is in some sense <em>easier</em> to solve,
+because we don’t need to think about introducing a slack variable
+(although it might make a lot of sense from a <em>modelling</em>
+perspective to do so).</p>
+<p>The way Laplace proposed resolving an overdetermined system, was to
+introduce slack variables, <span
+class="math inline">\(\epsilon_i\)</span>, which needed to be estimated
+for each point. The slack variable represented the difference between
+our actual prediction and the true observation. This is known as the
+<em>residual</em>. By introducing the slack variable, we now have an
+additional <span class="math inline">\(n\)</span> variables to estimate,
+one for each data point, <span
+class="math inline">\(\{\epsilon_i\}\)</span>. This turns the
+overdetermined system into an underdetermined system. Introduction of
+<span class="math inline">\(n\)</span> variables, plus the original
+<span class="math inline">\(m\)</span> and <span
+class="math inline">\(c\)</span> gives us <span
+class="math inline">\(n+2\)</span> parameters to be estimated from <span
+class="math inline">\(n\)</span> observations, which makes the system
+<em>underdetermined</em>. However, we then made a probabilistic
+assumption about the slack variables, we assumed that the slack
+variables were distributed according to a probability density. And for
+the moment we have been assuming that density was the Gaussian, <span
+class="math display">\[\epsilon_i \sim
+\mathcal{N}\left(0,\sigma^2\right),\]</span> with zero mean and variance
+<span class="math inline">\(\sigma^2\)</span>.</p>
+<p>The follow up question is whether we can do the same thing with the
+parameters. If we have two parameters and only one unknown, can we place
+a probability distribution over the parameters as we did with the slack
+variables? The answer is yes.</p>
 <h2 id="underdetermined-system-1">Underdetermined System</h2>
 <div class="figure">
 <div id="under-determined-system-9-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system009.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system009.svg" width="40%" style=" ">
 </object>
 </div>
-<div id="under-determined-system-9-magnify" class="magnify" onclick="magnifyFigure(&#39;under-determined-system-9&#39;)">
+<div id="under-determined-system-9-magnify" class="magnify"
+onclick="magnifyFigure(&#39;under-determined-system-9&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="under-determined-system-9-caption" class="caption-frame">
-<p>Figure: An underdetermined system can be fit by considering uncertainty. Multiple solutions are consistent with one specified point.</p>
+<p>Figure: An underdetermined system can be fit by considering
+uncertainty. Multiple solutions are consistent with one specified
+point.</p>
 </div>
 </div>
 <h2 id="two-dimensional-gaussian">Two Dimensional Gaussian</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Consider the distribution of height (in meters) of an adult male human population. We will approximate the marginal density of heights as a Gaussian density with mean given by <span class="math inline">1.7m</span> and a standard deviation of <span class="math inline">0.15m</span>, implying a variance of <span class="math inline">$\dataStd^2=0.0225$</span>, <br /><span class="math display">$$
-  p(h) \sim \gaussianSamp{1.7}{0.0225}.
-  $$</span><br /> Similarly, we assume that weights of the population are distributed a Gaussian density with a mean of <span class="math inline">75kg</span> and a standard deviation of <span class="math inline">6<em>k</em><em>g</em></span> (implying a variance of 36), <br /><span class="math display">$$
-  p(w) \sim \gaussianSamp{75}{36}.
-  $$</span><br /></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Consider the distribution of height (in meters) of an adult male
+human population. We will approximate the marginal density of heights as
+a Gaussian density with mean given by <span
+class="math inline">\(1.7\text{m}\)</span> and a standard deviation of
+<span class="math inline">\(0.15\text{m}\)</span>, implying a variance
+of <span class="math inline">\(\sigma^2=0.0225\)</span>, <span
+class="math display">\[
+  p(h) \sim \mathcal{N}\left(1.7,0.0225\right).
+  \]</span> Similarly, we assume that weights of the population are
+distributed a Gaussian density with a mean of <span
+class="math inline">\(75 \text{kg}\)</span> and a standard deviation of
+<span class="math inline">\(6 kg\)</span> (implying a variance of 36),
+<span class="math display">\[
+  p(w) \sim \mathcal{N}\left(75,36\right).
+  \]</span></p>
 <div class="figure">
 <div id="height-weight-gaussian-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/height_weight_gaussian.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/height_weight_gaussian.svg" width="70%" style=" ">
 </object>
 </div>
-<div id="height-weight-gaussian-magnify" class="magnify" onclick="magnifyFigure(&#39;height-weight-gaussian&#39;)">
+<div id="height-weight-gaussian-magnify" class="magnify"
+onclick="magnifyFigure(&#39;height-weight-gaussian&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="height-weight-gaussian-caption" class="caption-frame">
@@ -674,374 +1572,688 @@ <h2 id="two-dimensional-gaussian">Two Dimensional Gaussian</h2>
 </div>
 </div>
 <h2 id="independence-assumption">Independence Assumption</h2>
-<p>First of all, we make an independence assumption, we assume that height and weight are independent. The definition of probabilistic independence is that the joint density, <span class="math inline"><em>p</em>(<em>w</em>, <em>h</em>)</span>, factorizes into its marginal densities, <br /><span class="math display"><em>p</em>(<em>w</em>, <em>h</em>) = <em>p</em>(<em>w</em>)<em>p</em>(<em>h</em>).</span><br /> Given this assumption we can sample from the joint distribution by independently sampling weights and heights.</p>
+<p>First of all, we make an independence assumption, we assume that
+height and weight are independent. The definition of probabilistic
+independence is that the joint density, <span class="math inline">\(p(w,
+h)\)</span>, factorizes into its marginal densities, <span
+class="math display">\[
+  p(w, h) = p(w)p(h).
+  \]</span> Given this assumption we can sample from the joint
+distribution by independently sampling weights and heights.</p>
 <div class="figure">
 <div id="independent-height-weight-7-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight007.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight007.svg" width="70%" style=" ">
 </object>
 </div>
-<div id="independent-height-weight-7-magnify" class="magnify" onclick="magnifyFigure(&#39;independent-height-weight-7&#39;)">
+<div id="independent-height-weight-7-magnify" class="magnify"
+onclick="magnifyFigure(&#39;independent-height-weight-7&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="independent-height-weight-7-caption" class="caption-frame">
-<p>Figure: Samples from independent Gaussian variables that might represent heights and weights.</p>
+<p>Figure: Samples from independent Gaussian variables that might
+represent heights and weights.</p>
 </div>
 </div>
-<p>In reality height and weight are <em>not</em> independent. Taller people tend on average to be heavier, and heavier people are likely to be taller. This is reflected by the <em>body mass index</em>. A ratio suggested by one of the fathers of statistics, Adolphe Quetelet. Quetelet was interested in the notion of the <em>average man</em> and collected various statistics about people. He defined the BMI to be, <br /><span class="math display">$$
+<p>In reality height and weight are <em>not</em> independent. Taller
+people tend on average to be heavier, and heavier people are likely to
+be taller. This is reflected by the <em>body mass index</em>. A ratio
+suggested by one of the fathers of statistics, Adolphe Quetelet.
+Quetelet was interested in the notion of the <em>average man</em> and
+collected various statistics about people. He defined the BMI to be,
+<span class="math display">\[
 \text{BMI} = \frac{w}{h^2}
-$$</span><br />To deal with this dependence we now introduce the notion of <em>correlation</em> to the multivariate Gaussian density.</p>
-<h2 id="sampling-two-dimensional-variables">Sampling Two Dimensional Variables</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian-correlated-sample.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian-correlated-sample.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+\]</span>To deal with this dependence we now introduce the notion of
+<em>correlation</em> to the multivariate Gaussian density.</p>
+<h2 id="sampling-two-dimensional-variables">Sampling Two Dimensional
+Variables</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-correlated-sample.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-correlated-sample.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="correlated-height-weight-7-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight007.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight007.svg" width="70%" style=" ">
 </object>
 </div>
-<div id="correlated-height-weight-7-magnify" class="magnify" onclick="magnifyFigure(&#39;correlated-height-weight-7&#39;)">
+<div id="correlated-height-weight-7-magnify" class="magnify"
+onclick="magnifyFigure(&#39;correlated-height-weight-7&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="correlated-height-weight-7-caption" class="caption-frame">
-<p>Figure: Samples from <em>correlated</em> Gaussian variables that might represent heights and weights.</p>
+<p>Figure: Samples from <em>correlated</em> Gaussian variables that
+might represent heights and weights.</p>
 </div>
 </div>
 <h2 id="independent-gaussians">Independent Gaussians</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian-maths.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/two-d-gaussian-maths.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p><br /><span class="math display"><em>p</em>(<em>w</em>, <em>h</em>) = <em>p</em>(<em>w</em>)<em>p</em>(<em>h</em>)</span><br /></p>
-<p><br /><span class="math display">$$
-p(w, h) = \frac{1}{\sqrt{2\pi \dataStd_1^2}\sqrt{2\pi\dataStd_2^2}} \exp\left(-\frac{1}{2}\left(\frac{(w-\meanScalar_1)^2}{\dataStd_1^2} + \frac{(h-\meanScalar_2)^2}{\dataStd_2^2}\right)\right)
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-p(w, h) = \frac{1}{\sqrt{2\pi\dataStd_1^22\pi\dataStd_2^2}} \exp\left(-\frac{1}{2}\left(\begin{bmatrix}w \\ h\end{bmatrix} - \begin{bmatrix}\meanScalar_1 \\ \meanScalar_2\end{bmatrix}\right)^\top\begin{bmatrix}\dataStd_1^2&amp; 0\\0&amp;\dataStd_2^2\end{bmatrix}^{-1}\left(\begin{bmatrix}w \\ h\end{bmatrix} - \begin{bmatrix}\meanScalar_1 \\ \meanScalar_2\end{bmatrix}\right)\right)
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-p(\dataVector) = \frac{1}{\det{2\pi \mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\dataVector - \meanVector)^\top\mathbf{D}^{-1}(\dataVector - \meanVector)\right)
-$$</span><br /></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-maths.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-maths.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p><span class="math display">\[
+p(w, h) = p(w)p(h)
+\]</span></p>
+<p><span class="math display">\[
+p(w, h) = \frac{1}{\sqrt{2\pi \sigma_1^2}\sqrt{2\pi\sigma_2^2}}
+\exp\left(-\frac{1}{2}\left(\frac{(w-\mu_1)^2}{\sigma_1^2} +
+\frac{(h-\mu_2)^2}{\sigma_2^2}\right)\right)
+\]</span></p>
+<p><span class="math display">\[
+p(w, h) = \frac{1}{\sqrt{2\pi\sigma_1^22\pi\sigma_2^2}}
+\exp\left(-\frac{1}{2}\left(\begin{bmatrix}w \\ h\end{bmatrix} -
+\begin{bmatrix}\mu_1 \\
+\mu_2\end{bmatrix}\right)^\top\begin{bmatrix}\sigma_1^2&amp;
+0\\0&amp;\sigma_2^2\end{bmatrix}^{-1}\left(\begin{bmatrix}w \\
+h\end{bmatrix} - \begin{bmatrix}\mu_1 \\
+\mu_2\end{bmatrix}\right)\right)
+\]</span></p>
+<p><span class="math display">\[
+p(\mathbf{ y}) = \frac{1}{\det{2\pi \mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
+\]</span></p>
 <h2 id="correlated-gaussian">Correlated Gaussian</h2>
-<p>Form correlated from original by rotating the data space using matrix <span class="math inline">$\rotationMatrix$</span>.</p>
-<p><br /><span class="math display">$$
-p(\dataVector) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\dataVector - \meanVector)^\top\mathbf{D}^{-1}(\dataVector - \meanVector)\right)
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-p(\dataVector) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\rotationMatrix^\top\dataVector - \rotationMatrix^\top\meanVector)^\top\mathbf{D}^{-1}(\rotationMatrix^\top\dataVector - \rotationMatrix^\top\meanVector)\right)
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-p(\dataVector) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\dataVector - \meanVector)^\top\rotationMatrix\mathbf{D}^{-1}\rotationMatrix^\top(\dataVector - \meanVector)\right)
-$$</span><br /> this gives a covariance matrix: <br /><span class="math display">$$
-\covarianceMatrix^{-1} = \rotationMatrix \mathbf{D}^{-1} \rotationMatrix^\top
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-p(\dataVector) = \frac{1}{\det{2\pi\covarianceMatrix}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\dataVector - \meanVector)^\top\covarianceMatrix^{-1} (\dataVector - \meanVector)\right)
-$$</span><br /> this gives a covariance matrix: <br /><span class="math display">$$
-\covarianceMatrix = \rotationMatrix \mathbf{D} \rotationMatrix^\top
-$$</span><br /></p>
+<p>Form correlated from original by rotating the data space using matrix
+<span class="math inline">\(\mathbf{R}\)</span>.</p>
+<p><span class="math display">\[
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
+\]</span></p>
+<p><span class="math display">\[
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{R}^\top\mathbf{ y}-
+\mathbf{R}^\top\boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{R}^\top\mathbf{ y}-
+\mathbf{R}^\top\boldsymbol{ \mu})\right)
+\]</span></p>
+<p><span class="math display">\[
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{R}\mathbf{D}^{-1}\mathbf{R}^\top(\mathbf{ y}-
+\boldsymbol{ \mu})\right)
+\]</span> this gives a covariance matrix: <span class="math display">\[
+\mathbf{C}^{-1} = \mathbf{R}\mathbf{D}^{-1} \mathbf{R}^\top
+\]</span></p>
+<p><span class="math display">\[
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{C}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{C}^{-1} (\mathbf{ y}- \boldsymbol{ \mu})\right)
+\]</span> this gives a covariance matrix: <span class="math display">\[
+\mathbf{C}= \mathbf{R}\mathbf{D} \mathbf{R}^\top
+\]</span></p>
 <h2 id="basis-functions">Basis Functions</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/basis-functions-nn.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/basis-functions-nn.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Here’s the idea, instead of working directly on the original input space, <span class="math inline">$\inputVector$</span>, we build models in a new space, <span class="math inline">$\basisVector(\inputVector)$</span> where <span class="math inline">$\basisVector(\cdot)$</span> is a <em>vector-valued</em> function that is defined on the space <span class="math inline">$\inputVector$</span>.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/basis-functions-nn.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/basis-functions-nn.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Here’s the idea, instead of working directly on the original input
+space, <span class="math inline">\(\mathbf{ x}\)</span>, we build models
+in a new space, <span class="math inline">\(\boldsymbol{ \phi}(\mathbf{
+x})\)</span> where <span class="math inline">\(\boldsymbol{
+\phi}(\cdot)\)</span> is a <em>vector-valued</em> function that is
+defined on the space <span class="math inline">\(\mathbf{
+x}\)</span>.</p>
 <h2 id="quadratic-basis">Quadratic Basis</h2>
-<p>Remember, that a <em>vector-valued function</em> is just a vector that contains functions instead of values. Here’s an example for a one dimensional input space, <span class="math inline"><em>x</em></span>, being projected to a <em>quadratic</em> basis. First we consider each basis function in turn, we can think of the elements of our vector as being indexed so that we have <br /><span class="math display">$$
+<p>Remember, that a <em>vector-valued function</em> is just a vector
+that contains functions instead of values. Here’s an example for a one
+dimensional input space, <span class="math inline">\(x\)</span>, being
+projected to a <em>quadratic</em> basis. First we consider each basis
+function in turn, we can think of the elements of our vector as being
+indexed so that we have <span class="math display">\[
 \begin{align*}
-\basisFunc_1(\inputScalar) &amp; = 1, \\
-\basisFunc_2(\inputScalar) &amp; = x, \\
-\basisFunc_3(\inputScalar) &amp; = \inputScalar^2.
+\phi_1(x) &amp; = 1, \\
+\phi_2(x) &amp; = x, \\
+\phi_3(x) &amp; = x^2.
 \end{align*}
-$$</span><br /> Now we can consider them together by placing them in a vector, <br /><span class="math display">$$
-\basisVector(\inputScalar) = \begin{bmatrix} 1\\ x \\ \inputScalar^2\end{bmatrix}.
-$$</span><br /> For the vector-valued function, we have simply collected the different functions together in the same vector making them notationally easier to deal with in our mathematics.</p>
-<p>When we consider the vector-valued function for each data point, then we place all the data into a matrix. The result is a matrix valued function, <br /><span class="math display">$$
-\basisMatrix(\inputVector) = 
-\begin{bmatrix} 1 &amp; \inputScalar_1 &amp;
-\inputScalar_1^2 \\
-1 &amp; \inputScalar_2 &amp; \inputScalar_2^2\\
+\]</span> Now we can consider them together by placing them in a vector,
+<span class="math display">\[
+\boldsymbol{ \phi}(x) = \begin{bmatrix} 1\\ x \\ x^2\end{bmatrix}.
+\]</span> For the vector-valued function, we have simply collected the
+different functions together in the same vector making them notationally
+easier to deal with in our mathematics.</p>
+<p>When we consider the vector-valued function for each data point, then
+we place all the data into a matrix. The result is a matrix valued
+function, <span class="math display">\[
+\boldsymbol{ \Phi}(\mathbf{ x}) =
+\begin{bmatrix} 1 &amp; x_1 &amp;
+x_1^2 \\
+1 &amp; x_2 &amp; x_2^2\\
 \vdots &amp; \vdots &amp; \vdots \\
-1 &amp; \inputScalar_n &amp; \inputScalar_n^2
+1 &amp; x_n &amp; x_n^2
 \end{bmatrix}
-$$</span><br /> where we are still in the one dimensional input setting so <span class="math inline">$\inputVector$</span> here represents a vector of our inputs with <span class="math inline">$\numData$</span> elements.</p>
-<p>Let’s try constructing such a matrix for a set of inputs. First of all, we create a function that returns the matrix valued function.</p>
-<div class="sourceCode" id="cb31"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb32"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1"></a><span class="kw">def</span> quadratic(x, <span class="op">**</span>kwargs):</span>
-<span id="cb32-2"><a href="#cb32-2"></a>    <span class="co">&quot;&quot;&quot;Take in a vector of input values and return the design matrix associated </span></span>
-<span id="cb32-3"><a href="#cb32-3"></a><span class="co">    with the basis functions.&quot;&quot;&quot;</span></span>
-<span id="cb32-4"><a href="#cb32-4"></a>    <span class="cf">return</span> np.hstack([np.ones((x.shape[<span class="dv">0</span>], <span class="dv">1</span>)), x, x<span class="op">**</span><span class="dv">2</span>])</span></code></pre></div>
-<h2 id="functions-derived-from-quadratic-basis">Functions Derived from Quadratic Basis</h2>
-<p><br /><span class="math display">$$
-\mappingFunction(\inputScalar) = {\color{red}{\mappingScalar_0}}   + {\color{magenta}{\mappingScalar_1 \inputScalar}} + {\color{blue}{\mappingScalar_2 \inputScalar^2}}
-$$</span><br /></p>
+\]</span> where we are still in the one dimensional input setting so
+<span class="math inline">\(\mathbf{ x}\)</span> here represents a
+vector of our inputs with <span class="math inline">\(n\)</span>
+elements.</p>
+<p>Let’s try constructing such a matrix for a set of inputs. First of
+all, we create a function that returns the matrix valued function.</p>
+<div class="sourceCode" id="cb25"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb26"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> quadratic(x, <span class="op">**</span>kwargs):</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">&quot;&quot;&quot;Take in a vector of input values and return the design matrix associated </span></span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a><span class="co">    with the basis functions.&quot;&quot;&quot;</span></span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.hstack([np.ones((x.shape[<span class="dv">0</span>], <span class="dv">1</span>)), x, x<span class="op">**</span><span class="dv">2</span>])</span></code></pre></div>
+<h2 id="functions-derived-from-quadratic-basis">Functions Derived from
+Quadratic Basis</h2>
+<p><span class="math display">\[
+f(x) = {\color{red}{w_0}}   + {\color{magenta}{w_1 x}} +
+{\color{blue}{w_2 x^2}}
+\]</span></p>
 <div class="figure">
 <div id="quadratic-basis-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_basis002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_basis002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="quadratic-basis-2-magnify" class="magnify" onclick="magnifyFigure(&#39;quadratic-basis-2&#39;)">
+<div id="quadratic-basis-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;quadratic-basis-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="quadratic-basis-2-caption" class="caption-frame">
-<p>Figure: The set of functions which are combined to form a <em>quadratic</em> basis.</p>
-</div>
-</div>
-<p>This function takes in an <span class="math inline">$\numData \times 1$</span> dimensional vector and returns an <span class="math inline">$\numData \times 3$</span> dimensional <em>design matrix</em> containing the basis functions. We can plot those basis functions against there input as follows.</p>
-<p>The actual function we observe is then made up of a sum of these functions. This is the reason for the name basis. The term <em>basis</em> means ‘the underlying support or foundation for an idea, argument, or process’, and in this context they form the underlying support for our prediction function. Our prediction function can only be composed of a weighted linear sum of our basis functions.</p>
+<p>Figure: The set of functions which are combined to form a
+<em>quadratic</em> basis.</p>
+</div>
+</div>
+<p>This function takes in an <span class="math inline">\(n\times
+1\)</span> dimensional vector and returns an <span
+class="math inline">\(n\times 3\)</span> dimensional <em>design
+matrix</em> containing the basis functions. We can plot those basis
+functions against there input as follows.</p>
+<p>The actual function we observe is then made up of a sum of these
+functions. This is the reason for the name basis. The term
+<em>basis</em> means ‘the underlying support or foundation for an idea,
+argument, or process’, and in this context they form the underlying
+support for our prediction function. Our prediction function can only be
+composed of a weighted linear sum of our basis functions.</p>
 <h2 id="quadratic-functions">Quadratic Functions</h2>
 <div class="figure">
 <div id="quadratic-function-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_function002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="quadratic-function-2-magnify" class="magnify" onclick="magnifyFigure(&#39;quadratic-function-2&#39;)">
+<div id="quadratic-function-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;quadratic-function-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="quadratic-function-2-caption" class="caption-frame">
-<p>Figure: Functions constructed by weighted sum of the components of a quadratic basis.</p>
+<p>Figure: Functions constructed by weighted sum of the components of a
+quadratic basis.</p>
 </div>
 </div>
 <h2 id="rectified-linear-units">Rectified Linear Units</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/relu-basis.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/relu-basis.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The rectified linear unit is a basis function that emerged out of the deep learning community. Rectified linear units are popular in the current generation of multilayer perceptron models, or deep networks. These basis functions start flat, and then become linear functions at a certain threshold. <br /><span class="math display">$$
-\basisFunc_j(\inputScalar) = \inputScalar\heaviside(\mappingScalarTwo_j \inputScalar + \mappingScalarTwo_0)
-$$</span><br /></p>
-<div class="sourceCode" id="cb33"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb34"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1"></a></span>
-<span id="cb34-2"><a href="#cb34-2"></a><span class="im">from</span> mlai <span class="im">import</span> relu</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/relu-basis.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/relu-basis.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The rectified linear unit is a basis function that emerged out of the
+deep learning community. Rectified linear units are popular in the
+current generation of multilayer perceptron models, or deep networks.
+These basis functions start flat, and then become linear functions at a
+certain threshold. <span class="math display">\[
+\phi_j(x) = xH(v_j x+ v_0)
+\]</span></p>
+<div class="sourceCode" id="cb27"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb28"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb29"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> relu</span></code></pre></div>
 <div class="figure">
 <div id="relu-basis-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis004.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="relu-basis-2-magnify" class="magnify" onclick="magnifyFigure(&#39;relu-basis-2&#39;)">
+<div id="relu-basis-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;relu-basis-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="relu-basis-2-caption" class="caption-frame">
-<p>Figure: The set of functions which are combined to form a rectified linear unit basis.</p>
+<p>Figure: The set of functions which are combined to form a rectified
+linear unit basis.</p>
 </div>
 </div>
-<h2 id="functions-derived-from-relu-basis">Functions Derived from Relu Basis</h2>
-<p><br /><span class="math display">$$
-\mappingFunction(\inputScalar) = \color{red}{\mappingScalar_0}   + \color{magenta}{\mappingScalar_1 xH(x+1.0) } + \color{blue}{\mappingScalar_2 xH(x+0.33) } + \color{green}{\mappingScalar_3 xH(x-0.33)} +  \color{cyan}{\mappingScalar_4 xH(x-1.0)}
-$$</span><br /></p>
+<h2 id="functions-derived-from-relu-basis">Functions Derived from Relu
+Basis</h2>
+<p><span class="math display">\[
+f(x) = \color{red}{w_0}   + \color{magenta}{w_1 xH(x+1.0) } +
+\color{blue}{w_2 xH(x+0.33) } + \color{green}{w_3 xH(x-0.33)}
++  \color{cyan}{w_4 xH(x-1.0)}
+\]</span></p>
 <div class="figure">
 <div id="relu-function-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/relu_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_function002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="relu-function-2-magnify" class="magnify" onclick="magnifyFigure(&#39;relu-function-2&#39;)">
+<div id="relu-function-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;relu-function-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="relu-function-2-caption" class="caption-frame">
-<p>Figure: A rectified linear unit basis is made up of different rectified linear unit functions centered at different points.</p>
+<p>Figure: A rectified linear unit basis is made up of different
+rectified linear unit functions centered at different points.</p>
 </div>
 </div>
 <h2 id="gaussian-processes">Gaussian Processes</h2>
-<p>Models where we model the entire joint distribution of our training data, <span class="math inline">$p(\dataVector, \inputMatrix)$</span> are sometimes described as <em>generative models</em>. Because we can use sampling to generate data sets that represent all our assumptions. However, as we discussed in the sessions on  and , this can be a bad idea, because if our assumptions are wrong then we can make poor predictions. We can try to make more complex assumptions about data to alleviate the problem, but then this typically leads to challenges for tractable application of the sum and rules of probability that are needed to compute the relevant marginal and conditional densities. If we know the form of the question we wish to answer then we typically try and represent that directly, through <span class="math inline">$p(\dataVector|\inputMatrix)$</span>. In practice, we also have been making assumptions of conditional independence given the model parameters, <br /><span class="math display">$$
-p(\dataVector|\inputMatrix, \mappingVector) =
-\prod_{i=1}^{\numData} p(\dataScalar_i | \inputVector_i, \mappingVector)
-$$</span><br /> Gaussian processes are <em>not</em> normally considered to be <em>generative models</em>, but we will be much more interested in the principles of conditioning in Gaussian processes because we will use conditioning to make predictions between our test and training data. We will avoid the data conditional indpendence assumption in favour of a richer assumption about the data, in a Gaussian process we assume data is <em>jointly Gaussian</em> with a particular mean and covariance, <br /><span class="math display">$$
-\dataVector|\inputMatrix \sim \gaussianSamp{\mathbf{m}(\inputMatrix)}{\kernelMatrix(\inputMatrix)},
-$$</span><br /> where the conditioning is on the inputs <span class="math inline">$\inputMatrix$</span> which are used for computing the mean and covariance. For this reason they are known as mean and covariance functions.</p>
+<p>Models where we model the entire joint distribution of our training
+data, <span class="math inline">\(p(\mathbf{ y}, \mathbf{X})\)</span>
+are sometimes described as <em>generative models</em>. Because we can
+use sampling to generate data sets that represent all our assumptions.
+However, as we discussed in the sessions on and , this can be a bad
+idea, because if our assumptions are wrong then we can make poor
+predictions. We can try to make more complex assumptions about data to
+alleviate the problem, but then this typically leads to challenges for
+tractable application of the sum and rules of probability that are
+needed to compute the relevant marginal and conditional densities. If we
+know the form of the question we wish to answer then we typically try
+and represent that directly, through <span
+class="math inline">\(p(\mathbf{ y}|\mathbf{X})\)</span>. In practice,
+we also have been making assumptions of conditional independence given
+the model parameters, <span class="math display">\[
+p(\mathbf{ y}|\mathbf{X}, \mathbf{ w}) =
+\prod_{i=1}^{n} p(y_i | \mathbf{ x}_i, \mathbf{ w})
+\]</span> Gaussian processes are <em>not</em> normally considered to be
+<em>generative models</em>, but we will be much more interested in the
+principles of conditioning in Gaussian processes because we will use
+conditioning to make predictions between our test and training data. We
+will avoid the data conditional indpendence assumption in favour of a
+richer assumption about the data, in a Gaussian process we assume data
+is <em>jointly Gaussian</em> with a particular mean and covariance,
+<span class="math display">\[
+\mathbf{ y}|\mathbf{X}\sim
+\mathcal{N}\left(\mathbf{m}(\mathbf{X}),\mathbf{K}(\mathbf{X})\right),
+\]</span> where the conditioning is on the inputs <span
+class="math inline">\(\mathbf{X}\)</span> which are used for computing
+the mean and covariance. For this reason they are known as mean and
+covariance functions.</p>
 <h2 id="linear-model-overview">Linear Model Overview</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-model-overview.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/linear-model-overview.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>However, we are focussing on what happens in models which are non-linear in the inputs, whereas the above would be <em>linear</em> in the inputs. To consider these, we introduce a matrix, called the design matrix. We set each activation function computed at each data point to be <br /><span class="math display">$$
-\activationScalar_{i,j} = \activationScalar(\mappingVector^{(1)}_{j}, \inputVector_{i})
-$$</span><br /> and define the matrix of activations (known as the <em>design matrix</em> in statistics) to be, <br /><span class="math display">$$
-\activationMatrix = 
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-model-overview.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-model-overview.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>However, we are focussing on what happens in models which are
+non-linear in the inputs, whereas the above would be <em>linear</em> in
+the inputs. To consider these, we introduce a matrix, called the design
+matrix. We set each activation function computed at each data point to
+be <span class="math display">\[
+\phi_{i,j} = \phi(\mathbf{ w}^{(1)}_{j}, \mathbf{ x}_{i})
+\]</span> and define the matrix of activations (known as the <em>design
+matrix</em> in statistics) to be, <span class="math display">\[
+\boldsymbol{ \Phi}=
 \begin{bmatrix}
-\activationScalar_{1, 1} &amp; \activationScalar_{1, 2} &amp; \dots &amp; \activationScalar_{1, \numHidden} \\
-\activationScalar_{1, 2} &amp; \activationScalar_{1, 2} &amp; \dots &amp; \activationScalar_{1, \numData} \\
+\phi_{1, 1} &amp; \phi_{1, 2} &amp; \dots &amp; \phi_{1, h} \\
+\phi_{1, 2} &amp; \phi_{1, 2} &amp; \dots &amp; \phi_{1, n} \\
 \vdots &amp; \vdots &amp; \ddots &amp; \vdots \\
-\activationScalar_{\numData, 1} &amp; \activationScalar_{\numData, 2} &amp; \dots &amp; \activationScalar_{\numData, \numHidden}
+\phi_{n, 1} &amp; \phi_{n, 2} &amp; \dots &amp; \phi_{n, h}
 \end{bmatrix}.
-$$</span><br /> By convention this matrix always has <span class="math inline">$\numData$</span> rows and <span class="math inline">$\numHidden$</span> columns, now if we define the vector of all noise corruptions, <span class="math inline">$\noiseVector = \left[\noiseScalar_1, \dots \noiseScalar_\numData\right]^\top$</span>.</p>
-<p>If we define the prior distribution over the vector <span class="math inline">$\mappingVector$</span> to be Gaussian, <br /><span class="math display">$$
-\mappingVector \sim \gaussianSamp{\zerosVector}{\alpha\eye},
-$$</span><br /> then we can use rules of multivariate Gaussians to see that, <br /><span class="math display">$$
-\dataVector \sim \gaussianSamp{\zerosVector}{\alpha \activationMatrix \activationMatrix^\top + \dataStd^2 \eye}.
-$$</span><br /></p>
-<p>In other words, our training data is distributed as a multivariate Gaussian, with zero mean and a covariance given by <br /><span class="math display">$$
-\kernelMatrix = \alpha \activationMatrix \activationMatrix^\top + \dataStd^2 \eye.
-$$</span><br /></p>
-<p>This is an <span class="math inline">$\numData \times \numData$</span> size matrix. Its elements are in the form of a function. The maths shows that any element, index by <span class="math inline"><em>i</em></span> and <span class="math inline"><em>j</em></span>, is a function <em>only</em> of inputs associated with data points <span class="math inline"><em>i</em></span> and <span class="math inline"><em>j</em></span>, <span class="math inline">$\dataVector_i$</span>, <span class="math inline">$\dataVector_j$</span>. <span class="math inline">$\kernel_{i,j} = \kernel\left(\inputVector_i, \inputVector_j\right)$</span></p>
-<p>If we look at the portion of this function associated only with <span class="math inline">$\mappingFunction(\cdot)$</span>, i.e. we remove the noise, then we can write down the covariance associated with our neural network, <br /><span class="math display">$$
-\kernel_\mappingFunction\left(\inputVector_i, \inputVector_j\right) = \alpha \activationVector\left(\mappingMatrix_1, \inputVector_i\right)^\top \activationVector\left(\mappingMatrix_1, \inputVector_j\right)
-$$</span><br /> so the elements of the covariance or <em>kernel</em> matrix are formed by inner products of the rows of the <em>design matrix</em>.</p>
+\]</span> By convention this matrix always has <span
+class="math inline">\(n\)</span> rows and <span
+class="math inline">\(h\)</span> columns, now if we define the vector of
+all noise corruptions, <span class="math inline">\(\boldsymbol{
+\epsilon}= \left[\epsilon_1, \dots \epsilon_n\right]^\top\)</span>.</p>
+<p>If we define the prior distribution over the vector <span
+class="math inline">\(\mathbf{ w}\)</span> to be Gaussian, <span
+class="math display">\[
+\mathbf{ w}\sim \mathcal{N}\left(\mathbf{0},\alpha\mathbf{I}\right),
+\]</span> then we can use rules of multivariate Gaussians to see that,
+<span class="math display">\[
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}\right).
+\]</span></p>
+<p>In other words, our training data is distributed as a multivariate
+Gaussian, with zero mean and a covariance given by <span
+class="math display">\[
+\mathbf{K}= \alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2
+\mathbf{I}.
+\]</span></p>
+<p>This is an <span class="math inline">\(n\times n\)</span> size
+matrix. Its elements are in the form of a function. The maths shows that
+any element, index by <span class="math inline">\(i\)</span> and <span
+class="math inline">\(j\)</span>, is a function <em>only</em> of inputs
+associated with data points <span class="math inline">\(i\)</span> and
+<span class="math inline">\(j\)</span>, <span
+class="math inline">\(\mathbf{ y}_i\)</span>, <span
+class="math inline">\(\mathbf{ y}_j\)</span>. <span
+class="math inline">\(k_{i,j} = k\left(\mathbf{ x}_i, \mathbf{
+x}_j\right)\)</span></p>
+<p>If we look at the portion of this function associated only with <span
+class="math inline">\(f(\cdot)\)</span>, i.e. we remove the noise, then
+we can write down the covariance associated with our neural network,
+<span class="math display">\[
+k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)
+\]</span> so the elements of the covariance or <em>kernel</em> matrix
+are formed by inner products of the rows of the <em>design
+matrix</em>.</p>
 <h2 id="gaussian-process">Gaussian Process</h2>
-<p>This is the essence of a Gaussian process. Instead of making assumptions about our density over each data point, <span class="math inline">$\dataScalar_i$</span> as i.i.d. we make a joint Gaussian assumption over our data. The covariance matrix is now a function of both the parameters of the activation function, <span class="math inline">$\mappingMatrixTwo$</span>, and the input variables, <span class="math inline">$\inputMatrix$</span>. This comes about through integrating out the parameters of the model, <span class="math inline">$\mappingVector$</span>.</p>
+<p>This is the essence of a Gaussian process. Instead of making
+assumptions about our density over each data point, <span
+class="math inline">\(y_i\)</span> as i.i.d. we make a joint Gaussian
+assumption over our data. The covariance matrix is now a function of
+both the parameters of the activation function, <span
+class="math inline">\(\mathbf{V}\)</span>, and the input variables,
+<span class="math inline">\(\mathbf{X}\)</span>. This comes about
+through integrating out the parameters of the model, <span
+class="math inline">\(\mathbf{ w}\)</span>.</p>
 <h2 id="basis-functions-1">Basis Functions</h2>
-<p>We can basically put anything inside the basis functions, and many people do. These can be deep kernels <span class="citation" data-cites="Cho:deep09">(Cho and Saul 2009)</span> or we can learn the parameters of a convolutional neural network inside there.</p>
-<p>Viewing a neural network in this way is also what allows us to beform sensible <em>batch</em> normalizations <span class="citation" data-cites="Ioffe:batch15">(Ioffe and Szegedy 2015)</span>.</p>
+<p>We can basically put anything inside the basis functions, and many
+people do. These can be deep kernels <span class="citation"
+data-cites="Cho:deep09">(Cho and Saul, 2009)</span> or we can learn the
+parameters of a convolutional neural network inside there.</p>
+<p>Viewing a neural network in this way is also what allows us to beform
+sensible <em>batch</em> normalizations <span class="citation"
+data-cites="Ioffe:batch15">(Ioffe and Szegedy, 2015)</span>.</p>
 <h2 id="radial-basis-functions">Radial Basis Functions</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/radial-basis.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_ml/includes/radial-basis.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Another type of basis is sometimes known as a ‘radial basis’ because the effect basis functions are constructed on ‘centres’ and the effect of each basis function decreases as the radial distance from each centre increases.</p>
-<p><br /><span class="math display">$$
-\basisFunc_j(\inputScalar) = \exp\left(-\frac{(\inputScalar-\mu_j)^2}{\lengthScale^2}\right)
-$$</span><br /></p>
-<div class="sourceCode" id="cb35"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1"></a></span>
-<span id="cb35-2"><a href="#cb35-2"></a><span class="im">from</span> mlai <span class="im">import</span> radial</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/radial-basis.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/radial-basis.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Another type of basis is sometimes known as a ‘radial basis’ because
+the effect basis functions are constructed on ‘centres’ and the effect
+of each basis function decreases as the radial distance from each centre
+increases.</p>
+<p><span class="math display">\[
+\phi_j(x) = \exp\left(-\frac{(x-\mu_j)^2}{\ell^2}\right)
+\]</span></p>
+<div class="sourceCode" id="cb30"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb31"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> radial</span></code></pre></div>
 <div class="figure">
 <div id="radial-basis-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/radial_basis002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_basis002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="radial-basis-2-magnify" class="magnify" onclick="magnifyFigure(&#39;radial-basis-2&#39;)">
+<div id="radial-basis-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;radial-basis-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="radial-basis-2-caption" class="caption-frame">
-<p>Figure: The set of functions which are combined to form the radial basis.</p>
+<p>Figure: The set of functions which are combined to form the radial
+basis.</p>
 </div>
 </div>
-<h2 id="functions-derived-from-radial-basis">Functions Derived from Radial Basis</h2>
-<p><br /><span class="math display">$$
-\mappingFunction(\inputScalar) = \color{red}{\mappingScalar_1 e^{-2(\inputScalar+1)^2}}  + \color{magenta}{\mappingScalar_2e^{-2\inputScalar^2}} + \color{blue}{\mappingScalar_3 e^{-2(\inputScalar-1)^2}}
-$$</span><br /></p>
+<h2 id="functions-derived-from-radial-basis">Functions Derived from
+Radial Basis</h2>
+<p><span class="math display">\[
+f(x) = \color{red}{w_1 e^{-2(x+1)^2}}  + \color{magenta}{w_2e^{-2x^2}} +
+\color{blue}{w_3 e^{-2(x-1)^2}}
+\]</span></p>
 <div class="figure">
 <div id="radial-function-2-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/radial_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_function002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="radial-function-2-magnify" class="magnify" onclick="magnifyFigure(&#39;radial-function-2&#39;)">
+<div id="radial-function-2-magnify" class="magnify"
+onclick="magnifyFigure(&#39;radial-function-2&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="radial-function-2-caption" class="caption-frame">
-<p>Figure: A radial basis is made up of different locally effective functions centered at different points.</p>
+<p>Figure: A radial basis is made up of different locally effective
+functions centered at different points.</p>
 </div>
 </div>
 <h2 id="marginal-likelihood">Marginal Likelihood</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-from-basis-functions.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-from-basis-functions.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>To understand the Gaussian process we’re going to build on our understanding of the marginal likelihood for Bayesian regression. In the session on  we sampled directly from the weight vector, <span class="math inline">$\mappingVector$</span> and applied it to the basis matrix <span class="math inline">$\basisMatrix$</span> to obtain a sample from the prior and a sample from the posterior. It is often helpful to think of modeling techniques as <em>generative</em> models. To give some thought as to what the process for obtaining data from the model is. From the perspective of Gaussian processes, we want to start by thinking of basis function models, where the parameters are sampled from a prior, but move to thinking about sampling from the marginal likelihood directly.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-from-basis-functions.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-from-basis-functions.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>To understand the Gaussian process we’re going to build on our
+understanding of the marginal likelihood for Bayesian regression. In the
+session on we sampled directly from the weight vector, <span
+class="math inline">\(\mathbf{ w}\)</span> and applied it to the basis
+matrix <span class="math inline">\(\boldsymbol{ \Phi}\)</span> to obtain
+a sample from the prior and a sample from the posterior. It is often
+helpful to think of modeling techniques as <em>generative</em> models.
+To give some thought as to what the process for obtaining data from the
+model is. From the perspective of Gaussian processes, we want to start
+by thinking of basis function models, where the parameters are sampled
+from a prior, but move to thinking about sampling from the marginal
+likelihood directly.</p>
 <h2 id="sampling-from-the-prior">Sampling from the Prior</h2>
-<p>The first thing we’ll do is to set up the parameters of the model, these include the parameters of the prior, the parameters of the basis functions and the noise level.</p>
-<div class="sourceCode" id="cb36"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1"></a><span class="co"># set prior variance on w</span></span>
-<span id="cb36-2"><a href="#cb36-2"></a>alpha <span class="op">=</span> <span class="fl">4.</span></span>
-<span id="cb36-3"><a href="#cb36-3"></a><span class="co"># set the order of the polynomial basis set</span></span>
-<span id="cb36-4"><a href="#cb36-4"></a>degree <span class="op">=</span> <span class="dv">5</span></span>
-<span id="cb36-5"><a href="#cb36-5"></a><span class="co"># set the noise variance</span></span>
-<span id="cb36-6"><a href="#cb36-6"></a>sigma2 <span class="op">=</span> <span class="fl">0.01</span></span></code></pre></div>
-<p>Now we have the variance, we can sample from the prior distribution to see what form we are imposing on the functions <em>a priori</em>.</p>
-<p>Let’s now compute a range of values to make predictions at, spanning the <em>new</em> space of inputs,</p>
-<div class="sourceCode" id="cb37"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
-<div class="sourceCode" id="cb38"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1"></a><span class="kw">def</span> polynomial(x, degree, loc, scale):</span>
-<span id="cb38-2"><a href="#cb38-2"></a>    degrees <span class="op">=</span> np.arange(degree<span class="op">+</span><span class="dv">1</span>)</span>
-<span id="cb38-3"><a href="#cb38-3"></a>    <span class="cf">return</span> ((x<span class="op">-</span>loc)<span class="op">/</span>scale)<span class="op">**</span>degrees</span></code></pre></div>
+<p>The first thing we’ll do is to set up the parameters of the model,
+these include the parameters of the prior, the parameters of the basis
+functions and the noise level.</p>
+<div class="sourceCode" id="cb32"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="co"># set prior variance on w</span></span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>alpha <span class="op">=</span> <span class="fl">4.</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a><span class="co"># set the order of the polynomial basis set</span></span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>degree <span class="op">=</span> <span class="dv">5</span></span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="co"># set the noise variance</span></span>
+<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a>sigma2 <span class="op">=</span> <span class="fl">0.01</span></span></code></pre></div>
+<p>Now we have the variance, we can sample from the prior distribution
+to see what form we are imposing on the functions <em>a priori</em>.</p>
+<p>Let’s now compute a range of values to make predictions at, spanning
+the <em>new</em> space of inputs,</p>
+<div class="sourceCode" id="cb33"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span></code></pre></div>
+<div class="sourceCode" id="cb34"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> polynomial(x, degree, loc, scale):</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    degrees <span class="op">=</span> np.arange(degree<span class="op">+</span><span class="dv">1</span>)</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> ((x<span class="op">-</span>loc)<span class="op">/</span>scale)<span class="op">**</span>degrees</span></code></pre></div>
 <p>now let’s build the basis matrices. First we load in the data</p>
-<div class="sourceCode" id="cb39"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1"></a><span class="im">import</span> pods</span></code></pre></div>
-<div class="sourceCode" id="cb40"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
-<span id="cb40-2"><a href="#cb40-2"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
-<span id="cb40-3"><a href="#cb40-3"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span></code></pre></div>
-<div class="sourceCode" id="cb41"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1"></a>loc <span class="op">=</span> <span class="fl">1950.</span></span>
-<span id="cb41-2"><a href="#cb41-2"></a>scale <span class="op">=</span> <span class="fl">100.</span></span>
-<span id="cb41-3"><a href="#cb41-3"></a>num_data <span class="op">=</span> x.shape[<span class="dv">0</span>]</span>
-<span id="cb41-4"><a href="#cb41-4"></a>num_pred_data <span class="op">=</span> <span class="dv">100</span> <span class="co"># how many points to use for plotting predictions</span></span>
-<span id="cb41-5"><a href="#cb41-5"></a>x_pred <span class="op">=</span> np.linspace(<span class="dv">1880</span>, <span class="dv">2030</span>, num_pred_data)[:, np.newaxis] <span class="co"># input locations for predictions</span></span>
-<span id="cb41-6"><a href="#cb41-6"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, degree<span class="op">=</span>degree, loc<span class="op">=</span>loc, scale<span class="op">=</span>scale)</span>
-<span id="cb41-7"><a href="#cb41-7"></a>Phi <span class="op">=</span> polynomial(x, degree<span class="op">=</span>degree, loc<span class="op">=</span>loc, scale<span class="op">=</span>scale)</span></code></pre></div>
+<div class="sourceCode" id="cb35"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<div class="sourceCode" id="cb36"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> pods.datasets.olympic_marathon_men()</span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> data[<span class="st">&#39;X&#39;</span>]</span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> data[<span class="st">&#39;Y&#39;</span>]</span></code></pre></div>
+<div class="sourceCode" id="cb37"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>loc <span class="op">=</span> <span class="fl">1950.</span></span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a>scale <span class="op">=</span> <span class="fl">100.</span></span>
+<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>num_data <span class="op">=</span> x.shape[<span class="dv">0</span>]</span>
+<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a>num_pred_data <span class="op">=</span> <span class="dv">100</span> <span class="co"># how many points to use for plotting predictions</span></span>
+<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a>x_pred <span class="op">=</span> np.linspace(<span class="dv">1880</span>, <span class="dv">2030</span>, num_pred_data)[:, np.newaxis] <span class="co"># input locations for predictions</span></span>
+<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a>Phi_pred <span class="op">=</span> polynomial(x_pred, degree<span class="op">=</span>degree, loc<span class="op">=</span>loc, scale<span class="op">=</span>scale)</span>
+<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a>Phi <span class="op">=</span> polynomial(x, degree<span class="op">=</span>degree, loc<span class="op">=</span>loc, scale<span class="op">=</span>scale)</span></code></pre></div>
 <h2 id="weight-space-view">Weight Space View</h2>
-<p>To generate typical functional predictions from the model, we need a set of model parameters. We assume that the parameters are drawn independently from a Gaussian density, <br /><span class="math display">$$
-\weightVector \sim \gaussianSamp{\zerosVector}{\alpha\eye},
-$$</span><br /> then we can combine this with the definition of our prediction function <span class="math inline">$\mappingFunction(\inputVector)$</span>, <br /><span class="math display">$$
-\mappingFunction(\inputVector) = \weightVector^\top \basisVector(\inputVector).
-$$</span><br /> We can now sample from the prior density to obtain a vector <span class="math inline">$\weightVector$</span> using the function <code>np.random.normal</code> and combine these parameters with our basis to create some samples of what <span class="math inline">$\mappingFunction(\inputVector)$</span> looks like,</p>
+<p>To generate typical functional predictions from the model, we need a
+set of model parameters. We assume that the parameters are drawn
+independently from a Gaussian density, <span class="math display">\[
+\mathbf{ w}\sim \mathcal{N}\left(\mathbf{0},\alpha\mathbf{I}\right),
+\]</span> then we can combine this with the definition of our prediction
+function <span class="math inline">\(f(\mathbf{ x})\)</span>, <span
+class="math display">\[
+f(\mathbf{ x}) = \mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}).
+\]</span> We can now sample from the prior density to obtain a vector
+<span class="math inline">\(\mathbf{ w}\)</span> using the function
+<code>np.random.normal</code> and combine these parameters with our
+basis to create some samples of what <span
+class="math inline">\(f(\mathbf{ x})\)</span> looks like,</p>
 <h2 id="function-space-view">Function Space View</h2>
-<p>The process we have used to generate the samples is a two stage process. To obtain each function, we first generated a sample from the prior, <br /><span class="math display">$$
-\weightVector \sim \gaussianSamp{\zerosVector}{\alpha \eye}
-$$</span><br /> then if we compose our basis matrix, <span class="math inline">$\basisMatrix$</span> from the basis functions associated with each row then we get, <br /><span class="math display">$$
-\basisMatrix = \begin{bmatrix}\basisVector(\inputVector_1) \\ \vdots \\
-\basisVector(\inputVector_\numData)\end{bmatrix}
-$$</span><br /> then we can write down the vector of function values, as evaluated at <br /><span class="math display">$$
-\mappingFunctionVector = \begin{bmatrix} \mappingFunction_1
-\\ \vdots \mappingFunction_\numData\end{bmatrix}
-$$</span><br /> in the form <br /><span class="math display">$$
-\mappingFunctionVector = \basisMatrix\weightVector.
-$$</span><br /></p>
-<p>Now we can use standard properties of multivariate Gaussians to write down the probability density that is implied over <span class="math inline">$\mappingFunctionVector$</span>. In particular we know that if <span class="math inline">$\weightVector$</span> is sampled from a multivariate normal (or multivariate Gaussian) with covariance <span class="math inline">$\alpha \eye$</span> and zero mean, then assuming that <span class="math inline">$\basisMatrix$</span> is a deterministic matrix (i.e. it is not sampled from a probability density) then the vector <span class="math inline">$\mappingFunctionVector$</span> will also be distributed according to a zero mean multivariate normal as follows, <br /><span class="math display">$$
-\mappingFunctionVector \sim \gaussianSamp{\zerosVector}{\alpha \basisMatrix\basisMatrix^\top}.
-$$</span><br /></p>
-<p>The question now is, what happens if we sample <span class="math inline">$\mappingFunctionVector$</span> directly from this density, rather than first sampling <span class="math inline">$\weightVector$</span> and then multiplying by <span class="math inline">$\basisMatrix$</span>. Let’s try this. First of all we define the covariance as <br /><span class="math display">$$
-\kernelMatrix = \alpha
-\basisMatrix\basisMatrix^\top.
-$$</span><br /></p>
-<div class="sourceCode" id="cb42"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span></code></pre></div>
-<p>Now we can use the <code>np.random.multivariate_normal</code> command for sampling from a multivariate normal with covariance given by <span class="math inline">$\kernelMatrix$</span> and zero mean,</p>
-<div class="sourceCode" id="cb43"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>plot.big_wide_figsize)</span>
-<span id="cb43-2"><a href="#cb43-2"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10</span>):</span>
-<span id="cb43-3"><a href="#cb43-3"></a>    f_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span>
-<span id="cb43-4"><a href="#cb43-4"></a>    ax.plot(x_pred.flatten(), f_sample.flatten(), linewidth<span class="op">=</span><span class="dv">2</span>)</span>
-<span id="cb43-5"><a href="#cb43-5"></a>    </span>
-<span id="cb43-6"><a href="#cb43-6"></a>mlai.write_figure(<span class="st">&#39;gp-sample-basis-function.svg&#39;</span>, directory<span class="op">=</span><span class="st">&#39;../slides/diagrams/kern&#39;</span>)</span></code></pre></div>
+<p>The process we have used to generate the samples is a two stage
+process. To obtain each function, we first generated a sample from the
+prior, <span class="math display">\[
+\mathbf{ w}\sim \mathcal{N}\left(\mathbf{0},\alpha \mathbf{I}\right)
+\]</span> then if we compose our basis matrix, <span
+class="math inline">\(\boldsymbol{ \Phi}\)</span> from the basis
+functions associated with each row then we get, <span
+class="math display">\[
+\boldsymbol{ \Phi}= \begin{bmatrix}\boldsymbol{ \phi}(\mathbf{ x}_1) \\
+\vdots \\
+\boldsymbol{ \phi}(\mathbf{ x}_n)\end{bmatrix}
+\]</span> then we can write down the vector of function values, as
+evaluated at <span class="math display">\[
+\mathbf{ f}= \begin{bmatrix} f_1
+\\ \vdots f_n\end{bmatrix}
+\]</span> in the form <span class="math display">\[
+\mathbf{ f}= \boldsymbol{ \Phi}\mathbf{ w}.
+\]</span></p>
+<p>Now we can use standard properties of multivariate Gaussians to write
+down the probability density that is implied over <span
+class="math inline">\(\mathbf{ f}\)</span>. In particular we know that
+if <span class="math inline">\(\mathbf{ w}\)</span> is sampled from a
+multivariate normal (or multivariate Gaussian) with covariance <span
+class="math inline">\(\alpha \mathbf{I}\)</span> and zero mean, then
+assuming that <span class="math inline">\(\boldsymbol{ \Phi}\)</span> is
+a deterministic matrix (i.e. it is not sampled from a probability
+density) then the vector <span class="math inline">\(\mathbf{
+f}\)</span> will also be distributed according to a zero mean
+multivariate normal as follows, <span class="math display">\[
+\mathbf{ f}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top\right).
+\]</span></p>
+<p>The question now is, what happens if we sample <span
+class="math inline">\(\mathbf{ f}\)</span> directly from this density,
+rather than first sampling <span class="math inline">\(\mathbf{
+w}\)</span> and then multiplying by <span
+class="math inline">\(\boldsymbol{ \Phi}\)</span>. Let’s try this. First
+of all we define the covariance as <span class="math display">\[
+\mathbf{K}= \alpha
+\boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top.
+\]</span></p>
+<div class="sourceCode" id="cb38"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span></code></pre></div>
+<p>Now we can use the <code>np.random.multivariate_normal</code> command
+for sampling from a multivariate normal with covariance given by <span
+class="math inline">\(\mathbf{K}\)</span> and zero mean,</p>
+<div class="sourceCode" id="cb39"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>plot.big_wide_figsize)</span>
+<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10</span>):</span>
+<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a>    f_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span>
+<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>    ax.plot(x_pred.flatten(), f_sample.flatten(), linewidth<span class="op">=</span><span class="dv">2</span>)</span>
+<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a>mlai.write_figure(<span class="st">&#39;gp-sample-basis-function.svg&#39;</span>, directory<span class="op">=</span><span class="st">&#39;./kern&#39;</span>)</span></code></pre></div>
 <div class="figure">
 <div id="gp-sample-basis-function-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gp-sample-basis-function-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-sample-basis-function&#39;)">
+<div id="gp-sample-basis-function-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-sample-basis-function&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gp-sample-basis-function-caption" class="caption-frame">
-<p>Figure: Samples directly from the covariance function implied by the basis function based covariance, <span class="math inline">$\alpha \basisMatrix\basisMatrix^\top$</span>.</p>
-</div>
-</div>
-<p>The samples appear very similar to those which we obtained indirectly. That is no surprise because they are effectively drawn from the same mutivariate normal density. However, when sampling <span class="math inline">$\mappingFunctionVector$</span> directly we created the covariance for <span class="math inline">$\mappingFunctionVector$</span>. We can visualise the form of this covaraince in an image in python with a colorbar to show scale.</p>
+<p>Figure: Samples directly from the covariance function implied by the
+basis function based covariance, <span class="math inline">\(\alpha
+\boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top\)</span>.</p>
+</div>
+</div>
+<p>The samples appear very similar to those which we obtained
+indirectly. That is no surprise because they are effectively drawn from
+the same mutivariate normal density. However, when sampling <span
+class="math inline">\(\mathbf{ f}\)</span> directly we created the
+covariance for <span class="math inline">\(\mathbf{ f}\)</span>. We can
+visualise the form of this covaraince in an image in python with a
+colorbar to show scale.</p>
 <div class="figure">
 <div id="basis-covariance-function-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/basis-covariance-function.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/basis-covariance-function.svg" width="60%" style=" ">
 </object>
 </div>
-<div id="basis-covariance-function-magnify" class="magnify" onclick="magnifyFigure(&#39;basis-covariance-function&#39;)">
+<div id="basis-covariance-function-magnify" class="magnify"
+onclick="magnifyFigure(&#39;basis-covariance-function&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="basis-covariance-function-caption" class="caption-frame">
-<p>Figure: Covariance of the function implied by the basis set <span class="math inline">$\alpha\basisMatrix\basisMatrix^\top$</span>.</p>
-</div>
-</div>
-<p>This image is the covariance expressed between different points on the function. In regression we normally also add independent Gaussian noise to obtain our observations <span class="math inline">$\dataVector$</span>, <br /><span class="math display">$$
-\dataVector = \mappingFunctionVector + \boldsymbol{\epsilon}
-$$</span><br /> where the noise is sampled from an independent Gaussian distribution with variance <span class="math inline">$\dataStd^2$</span>, <br /><span class="math display">$$
-\epsilon \sim \gaussianSamp{\zerosVector}{\dataStd^2\eye}.
-$$</span><br /> we can use properties of Gaussian variables, i.e. the fact that sum of two Gaussian variables is also Gaussian, and that it’s covariance is given by the sum of the two covariances, whilst the mean is given by the sum of the means, to write down the marginal likelihood, <br /><span class="math display">$$
-\dataVector \sim \gaussianSamp{\zerosVector}{\basisMatrix\basisMatrix^\top +\dataStd^2\eye}.
-$$</span><br /> Sampling directly from this density gives us the noise corrupted functions,</p>
-<div class="sourceCode" id="cb44"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T <span class="op">+</span> sigma2<span class="op">*</span>np.eye(x_pred.size)</span>
-<span id="cb44-2"><a href="#cb44-2"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10</span>):</span>
-<span id="cb44-3"><a href="#cb44-3"></a>    y_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span>
-<span id="cb44-4"><a href="#cb44-4"></a>    ax.plot(x_pred.flatten(), y_sample.flatten())</span>
-<span id="cb44-5"><a href="#cb44-5"></a>    </span>
-<span id="cb44-6"><a href="#cb44-6"></a>mlai.write_figure(<span class="st">&#39;gp-sample-basis-function-plus-noise.svg&#39;</span>, </span>
-<span id="cb44-7"><a href="#cb44-7"></a>                  <span class="st">&#39;../slides/diagrams/kern&#39;</span>)</span></code></pre></div>
+<p>Figure: Covariance of the function implied by the basis set <span
+class="math inline">\(\alpha\boldsymbol{ \Phi}\boldsymbol{
+\Phi}^\top\)</span>.</p>
+</div>
+</div>
+<p>This image is the covariance expressed between different points on
+the function. In regression we normally also add independent Gaussian
+noise to obtain our observations <span class="math inline">\(\mathbf{
+y}\)</span>, <span class="math display">\[
+\mathbf{ y}= \mathbf{ f}+ \boldsymbol{\epsilon}
+\]</span> where the noise is sampled from an independent Gaussian
+distribution with variance <span
+class="math inline">\(\sigma^2\)</span>, <span class="math display">\[
+\epsilon \sim \mathcal{N}\left(\mathbf{0},\sigma^2\mathbf{I}\right).
+\]</span> we can use properties of Gaussian variables, i.e. the fact
+that sum of two Gaussian variables is also Gaussian, and that it’s
+covariance is given by the sum of the two covariances, whilst the mean
+is given by the sum of the means, to write down the marginal likelihood,
+<span class="math display">\[
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top +\sigma^2\mathbf{I}\right).
+\]</span> Sampling directly from this density gives us the noise
+corrupted functions,</p>
 <div class="figure">
-<div id="gp-sample-basis-functions-plus-noise-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function-plus-noise.svg" width="80%" style=" ">
+<div id="gp-sample-basis-functions-plus-noise-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-noise.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gp-sample-basis-functions-plus-noise-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-sample-basis-functions-plus-noise&#39;)">
+<div id="gp-sample-basis-functions-plus-noise-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-sample-basis-functions-plus-noise&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="gp-sample-basis-functions-plus-noise-caption" class="caption-frame">
-<p>Figure: Samples directly from the covariance function implied by the noise corrupted basis function based covariance, <span class="math inline">$\alpha \basisMatrix\basisMatrix^\top + \dataStd^2 \eye$</span>.</p>
-</div>
-</div>
-<p>where the effect of our noise term is to roughen the sampled functions, we can also increase the variance of the noise to see a different effect,</p>
-<div class="sourceCode" id="cb45"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1"></a>sigma2 <span class="op">=</span> <span class="fl">1.</span></span>
-<span id="cb45-2"><a href="#cb45-2"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T <span class="op">+</span> sigma2<span class="op">*</span>np.eye(x_pred.size)</span></code></pre></div>
-<div class="sourceCode" id="cb46"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>plot.big_wide_figsize)</span>
-<span id="cb46-2"><a href="#cb46-2"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10</span>):</span>
-<span id="cb46-3"><a href="#cb46-3"></a>    y_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span>
-<span id="cb46-4"><a href="#cb46-4"></a>    plt.plot(x_pred.flatten(), y_sample.flatten())</span>
-<span id="cb46-5"><a href="#cb46-5"></a>    </span>
-<span id="cb46-6"><a href="#cb46-6"></a>mlai.write_figure(<span class="st">&#39;gp-sample-basis-function-plus-large-noise.svg&#39;</span>, </span>
-<span id="cb46-7"><a href="#cb46-7"></a>                  <span class="st">&#39;../slides/diagrams/kern&#39;</span>)</span></code></pre></div>
+<div id="gp-sample-basis-functions-plus-noise-caption"
+class="caption-frame">
+<p>Figure: Samples directly from the covariance function implied by the
+noise corrupted basis function based covariance, <span
+class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top +
+\sigma^2 \mathbf{I}\)</span>.</p>
+</div>
+</div>
+<p>where the effect of our noise term is to roughen the sampled
+functions, we can also increase the variance of the noise to see a
+different effect,</p>
+<div class="sourceCode" id="cb40"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>sigma2 <span class="op">=</span> <span class="fl">1.</span></span>
+<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T <span class="op">+</span> sigma2<span class="op">*</span>np.eye(x_pred.size)</span></code></pre></div>
+<div class="sourceCode" id="cb41"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(figsize<span class="op">=</span>plot.big_wide_figsize)</span>
+<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">10</span>):</span>
+<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a>    y_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span>
+<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a>    plt.plot(x_pred.flatten(), y_sample.flatten())</span>
+<span id="cb41-5"><a href="#cb41-5" aria-hidden="true" tabindex="-1"></a>    </span>
+<span id="cb41-6"><a href="#cb41-6" aria-hidden="true" tabindex="-1"></a>mlai.write_figure(<span class="st">&#39;gp-sample-basis-function-plus-large-noise.svg&#39;</span>, </span>
+<span id="cb41-7"><a href="#cb41-7" aria-hidden="true" tabindex="-1"></a>                  directory<span class="op">=</span><span class="st">&#39;./kern&#39;</span>)</span></code></pre></div>
 <div class="figure">
-<div id="gp-sample-basis-functions-plus-large-noise-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function-plus-large-noise.svg" width="80%" style=" ">
+<div id="gp-sample-basis-functions-plus-large-noise-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-large-noise.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gp-sample-basis-functions-plus-large-noise-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-sample-basis-functions-plus-large-noise&#39;)">
+<div id="gp-sample-basis-functions-plus-large-noise-magnify"
+class="magnify"
+onclick="magnifyFigure(&#39;gp-sample-basis-functions-plus-large-noise&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="gp-sample-basis-functions-plus-large-noise-caption" class="caption-frame">
-<p>Figure: Samples directly from the covariance function implied by the noise corrupted basis function based covariance, <span class="math inline">$\alpha \basisMatrix\basisMatrix^\top + \eye$</span>.</p>
-</div>
-</div>
-<h2 id="non-degenerate-gaussian-processes">Non-degenerate Gaussian Processes</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/non-degenerate-gps.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/non-degenerate-gps.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The process described above is degenerate. The covariance function is of rank at most <span class="math inline">$\numHidden$</span> and since the theoretical amount of data could always increase <span class="math inline">$\numData \rightarrow \infty$</span>, the covariance function is not full rank. This means as we increase the amount of data to infinity, there will come a point where we can’t normalize the process because the multivariate Gaussian has the form, <br /><span class="math display">$$
-\gaussianDist{\mappingFunctionVector}{\zerosVector}{\kernelMatrix} = \frac{1}{\left(2\pi\right)^{\frac{\numData}{2}}\det{\kernelMatrix}^\frac{1}{2}} \exp\left(-\frac{\mappingFunctionVector^\top\kernelMatrix \mappingFunctionVector}{2}\right)
-$$</span><br /> and a non-degenerate kernel matrix leads to <span class="math inline">$\det{\kernelMatrix} = 0$</span> defeating the normalization (it’s equivalent to finding a projection in the high dimensional Gaussian where the variance of the the resulting univariate Gaussian is zero, i.e. there is a null space on the covariance, or alternatively you can imagine there are one or more directions where the Gaussian has become the delta function).</p>
+<div id="gp-sample-basis-functions-plus-large-noise-caption"
+class="caption-frame">
+<p>Figure: Samples directly from the covariance function implied by the
+noise corrupted basis function based covariance, <span
+class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top +
+\mathbf{I}\)</span>.</p>
+</div>
+</div>
+<h2 id="non-degenerate-gaussian-processes">Non-degenerate Gaussian
+Processes</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/non-degenerate-gps.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/non-degenerate-gps.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The process described above is degenerate. The covariance function is
+of rank at most <span class="math inline">\(h\)</span> and since the
+theoretical amount of data could always increase <span
+class="math inline">\(n\rightarrow \infty\)</span>, the covariance
+function is not full rank. This means as we increase the amount of data
+to infinity, there will come a point where we can’t normalize the
+process because the multivariate Gaussian has the form, <span
+class="math display">\[
+\mathcal{N}\left(\mathbf{ f}|\mathbf{0},\mathbf{K}\right) =
+\frac{1}{\left(2\pi\right)^{\frac{n}{2}}\det{\mathbf{K}}^\frac{1}{2}}
+\exp\left(-\frac{\mathbf{ f}^\top\mathbf{K}\mathbf{ f}}{2}\right)
+\]</span> and a non-degenerate kernel matrix leads to <span
+class="math inline">\(\det{\mathbf{K}} = 0\)</span> defeating the
+normalization (it’s equivalent to finding a projection in the high
+dimensional Gaussian where the variance of the the resulting univariate
+Gaussian is zero, i.e. there is a null space on the covariance, or
+alternatively you can imagine there are one or more directions where the
+Gaussian has become the delta function).</p>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip3">
@@ -1054,348 +2266,801 @@ <h2 id="non-degenerate-gaussian-processes">Non-degenerate Gaussian Processes</h2
 <title>
 Radford Neal
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/radford-neal.jpg" clip-path="url(#clip3)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/radford-neal.jpg" clip-path="url(#clip3)"/>
 </svg>
 </div>
-<p>In the machine learning field, it was Radford Neal <span class="citation" data-cites="Neal:bayesian94">(Neal 1994)</span> that realized the potential of the next step. In his 1994 thesis, he was considering Bayesian neural networks, of the type we described above, and in considered what would happen if you took the number of hidden nodes, or neurons, to infinity, i.e. <span class="math inline">$\numHidden \rightarrow \infty$</span>.</p>
+<p>In the machine learning field, it was Radford Neal <span
+class="citation" data-cites="Neal:bayesian94">(Neal, 1994)</span> that
+realized the potential of the next step. In his 1994 thesis, he was
+considering Bayesian neural networks, of the type we described above,
+and in considered what would happen if you took the number of hidden
+nodes, or neurons, to infinity, i.e. <span
+class="math inline">\(h\rightarrow \infty\)</span>.</p>
 <div class="figure">
 <div id="neal-infinite-priors-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/neal-infinite-priors.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//neal-infinite-priors.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="neal-infinite-priors-magnify" class="magnify" onclick="magnifyFigure(&#39;neal-infinite-priors&#39;)">
+<div id="neal-infinite-priors-magnify" class="magnify"
+onclick="magnifyFigure(&#39;neal-infinite-priors&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="neal-infinite-priors-caption" class="caption-frame">
-<p>Figure: Page 37 of <a href="http://www.cs.toronto.edu/~radford/ftp/thesis.pdf">Radford Neal’s 1994 thesis</a></p>
+<p>Figure: Page 37 of <a
+href="http://www.cs.toronto.edu/~radford/ftp/thesis.pdf">Radford Neal’s
+1994 thesis</a></p>
 </div>
 </div>
-<p>In loose terms, what Radford considers is what happens to the elements of the covariance function, <br /><span class="math display">$$
+<p>In loose terms, what Radford considers is what happens to the
+elements of the covariance function, <span class="math display">\[
   \begin{align*}
-  \kernel_\mappingFunction\left(\inputVector_i, \inputVector_j\right) &amp; = \alpha \activationVector\left(\mappingMatrix_1, \inputVector_i\right)^\top \activationVector\left(\mappingMatrix_1, \inputVector_j\right)\\
-  &amp; = \alpha \sum_k \activationScalar\left(\mappingVector^{(1)}_k, \inputVector_i\right) \activationScalar\left(\mappingVector^{(1)}_k, \inputVector_j\right)
+  k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) &amp; = \alpha
+\boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top
+\boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)\\
+  &amp; = \alpha \sum_k \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{
+x}_i\right) \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{ x}_j\right)
   \end{align*}
-  $$</span><br /> if instead of considering a finite number you sample infinitely many of these activation functions, sampling parameters from a prior density, <span class="math inline">$p(\mappingVectorTwo)$</span>, for each one, <br /><span class="math display">$$
-\kernel_\mappingFunction\left(\inputVector_i, \inputVector_j\right) = \alpha \int \activationScalar\left(\mappingVector^{(1)}, \inputVector_i\right) \activationScalar\left(\mappingVector^{(1)}, \inputVector_j\right) p(\mappingVector^{(1)}) \text{d}\mappingVector^{(1)}
-$$</span><br /> And that’s not <em>only</em> for Gaussian <span class="math inline">$p(\mappingVectorTwo)$</span>. In fact this result holds for a range of activations, and a range of prior densities because of the <em>central limit theorem</em>.</p>
-<p>To write it in the form of a probabilistic program, as long as the distribution for <span class="math inline"><em>ϕ</em><sub><em>i</em></sub></span> implied by this short probabilistic program, <br /><span class="math display">$$
+  \]</span> if instead of considering a finite number you sample
+infinitely many of these activation functions, sampling parameters from
+a prior density, <span class="math inline">\(p(\mathbf{ v})\)</span>,
+for each one, <span class="math display">\[
+k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \int
+\phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_i\right) \phi\left(\mathbf{
+w}^{(1)}, \mathbf{ x}_j\right) p(\mathbf{ w}^{(1)}) \text{d}\mathbf{
+w}^{(1)}
+\]</span> And that’s not <em>only</em> for Gaussian <span
+class="math inline">\(p(\mathbf{ v})\)</span>. In fact this result holds
+for a range of activations, and a range of prior densities because of
+the <em>central limit theorem</em>.</p>
+<p>To write it in the form of a probabilistic program, as long as the
+distribution for <span class="math inline">\(\phi_i\)</span> implied by
+this short probabilistic program, <span class="math display">\[
   \begin{align*}
-  \mappingVectorTwo &amp; \sim p(\cdot)\\
-  \phi_i &amp; = \activationScalar\left(\mappingVectorTwo, \inputVector_i\right), 
+  \mathbf{ v}&amp; \sim p(\cdot)\\
+  \phi_i &amp; = \phi\left(\mathbf{ v}, \mathbf{ x}_i\right),
   \end{align*}
-  $$</span><br /> has finite variance, then the result of taking the number of hidden units to infinity, with appropriate scaling, is also a Gaussian process.</p>
+  \]</span> has finite variance, then the result of taking the number of
+hidden units to infinity, with appropriate scaling, is also a Gaussian
+process.</p>
 <h2 id="further-reading">Further Reading</h2>
-<p>To understand this argument in more detail, I highly recommend reading chapter 2 of Neal’s thesis <span class="citation" data-cites="Neal:bayesian94">(Neal 1994)</span>, which remains easy to read and clear today. Indeed, for readers interested in Bayesian neural networks, both Raford Neal’s and David MacKay’s PhD thesis <span class="citation" data-cites="MacKay:bayesian92">(MacKay 1992)</span> remain essential reading. Both theses embody a clarity of thought, and an ability to weave together threads from different fields that was the business of machine learning in the 1990s. Radford and David were also pioneers in making their software widely available and publishing material on the web.</p>
+<p>To understand this argument in more detail, I highly recommend
+reading chapter 2 of Neal’s thesis <span class="citation"
+data-cites="Neal:bayesian94">(Neal, 1994)</span>, which remains easy to
+read and clear today. Indeed, for readers interested in Bayesian neural
+networks, both Raford Neal’s and David MacKay’s PhD thesis <span
+class="citation" data-cites="MacKay:bayesian92">(MacKay, 1992)</span>
+remain essential reading. Both theses embody a clarity of thought, and
+an ability to weave together threads from different fields that was the
+business of machine learning in the 1990s. Radford and David were also
+pioneers in making their software widely available and publishing
+material on the web.</p>
 <h2 id="gaussian-process-1">Gaussian Process</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-function-space.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-function-space.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>In our  we sampled from the prior over paraemters. Through the properties of multivariate Gaussian densities this prior over parameters implies a particular density for our data observations, <span class="math inline">$\dataVector$</span>. In this session we sampled directly from this distribution for our data, avoiding the intermediate weight-space representation. This is the approach taken by <em>Gaussian processes</em>. In a Gaussian process you specify the <em>covariance function</em> directly, rather than <em>implicitly</em> through a basis matrix and a prior over parameters. Gaussian processes have the advantage that they can be <em>nonparametric</em>, which in simple terms means that they can have <em>infinite</em> basis functions. In the lectures we introduced the <em>exponentiated quadratic</em> covariance, also known as the RBF or the Gaussian or the squared exponential covariance function. This covariance function is specified by <br /><span class="math display">$$
-\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left( -\frac{\left\Vert \inputVector-\inputVector^\prime\right\Vert^2}{2\ell^2}\right),
-$$</span><br /> where <span class="math inline">$\left\Vert\inputVector - \inputVector^\prime\right\Vert^2$</span> is the squared distance between the two input vectors <br /><span class="math display">$$
-\left\Vert\inputVector - \inputVector^\prime\right\Vert^2 = (\inputVector - \inputVector^\prime)^\top (\inputVector - \inputVector^\prime) 
-$$</span><br /> Let’s build a covariance matrix based on this function. First we define the form of the covariance function,</p>
-<div class="sourceCode" id="cb47"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1"></a></span>
-<span id="cb47-2"><a href="#cb47-2"></a><span class="im">from</span> mlai <span class="im">import</span> eq_cov</span></code></pre></div>
-<p>We can use this to compute <em>directly</em> the covariance for <span class="math inline">$\mappingFunctionVector$</span> at the points given by <code>x_pred</code>. Let’s define a new function <code>K()</code> which does this,</p>
-<div class="sourceCode" id="cb48"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1"></a></span>
-<span id="cb48-2"><a href="#cb48-2"></a><span class="im">from</span> mlai <span class="im">import</span> Kernel</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-function-space.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-function-space.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In our we sampled from the prior over paraemters. Through the
+properties of multivariate Gaussian densities this prior over parameters
+implies a particular density for our data observations, <span
+class="math inline">\(\mathbf{ y}\)</span>. In this session we sampled
+directly from this distribution for our data, avoiding the intermediate
+weight-space representation. This is the approach taken by <em>Gaussian
+processes</em>. In a Gaussian process you specify the <em>covariance
+function</em> directly, rather than <em>implicitly</em> through a basis
+matrix and a prior over parameters. Gaussian processes have the
+advantage that they can be <em>nonparametric</em>, which in simple terms
+means that they can have <em>infinite</em> basis functions. In the
+lectures we introduced the <em>exponentiated quadratic</em> covariance,
+also known as the RBF or the Gaussian or the squared exponential
+covariance function. This covariance function is specified by <span
+class="math display">\[
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left( -\frac{\left\Vert
+\mathbf{ x}-\mathbf{ x}^\prime\right\Vert^2}{2\ell^2}\right),
+\]</span> where <span class="math inline">\(\left\Vert\mathbf{ x}-
+\mathbf{ x}^\prime\right\Vert^2\)</span> is the squared distance between
+the two input vectors <span class="math display">\[
+\left\Vert\mathbf{ x}- \mathbf{ x}^\prime\right\Vert^2 = (\mathbf{ x}-
+\mathbf{ x}^\prime)^\top (\mathbf{ x}- \mathbf{ x}^\prime)
+\]</span> Let’s build a covariance matrix based on this function. First
+we define the form of the covariance function,</p>
+<div class="sourceCode" id="cb42"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb43"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> eq_cov</span></code></pre></div>
+<p>We can use this to compute <em>directly</em> the covariance for <span
+class="math inline">\(\mathbf{ f}\)</span> at the points given by
+<code>x_pred</code>. Let’s define a new function <code>K()</code> which
+does this,</p>
+<div class="sourceCode" id="cb44"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb45"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> Kernel</span></code></pre></div>
 <p>Now we can image the resulting covariance,</p>
-<div class="sourceCode" id="cb49"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1"></a>kernel <span class="op">=</span> Kernel(function<span class="op">=</span>eq_cov, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">10.</span>)</span>
-<span id="cb49-2"><a href="#cb49-2"></a>K <span class="op">=</span> kernel.K(x_pred, x_pred)</span></code></pre></div>
-<p>To visualise the covariance between the points we can use the <code>imshow</code> function in matplotlib.</p>
+<div class="sourceCode" id="cb46"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>kernel <span class="op">=</span> Kernel(function<span class="op">=</span>eq_cov, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">10.</span>)</span>
+<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> kernel.K(x_pred, x_pred)</span></code></pre></div>
+<p>To visualise the covariance between the points we can use the
+<code>imshow</code> function in matplotlib.</p>
 <p>Finally, we can sample functions from the marginal likelihood.</p>
-<h3 id="exercise-1">Exercise 1</h3>
-<p><strong>Moving Parameters</strong> Have a play with the parameters for this covariance function (the lengthscale and the variance) and see what effects the parameters have on the types of functions you observe.</p>
-<h2 id="bayesian-inference-by-rejection-sampling">Bayesian Inference by Rejection Sampling</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-intro-very-short.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-intro-very-short.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>One view of Bayesian inference is to assume we are given a mechanism for generating samples, where we assume that mechanism is representing on accurate view on the way we believe the world works.</p>
+<h3 id="exercise-2">Exercise 2</h3>
+<p><strong>Moving Parameters</strong> Have a play with the parameters
+for this covariance function (the lengthscale and the variance) and see
+what effects the parameters have on the types of functions you
+observe.</p>
+<h2 id="bayesian-inference-by-rejection-sampling">Bayesian Inference by
+Rejection Sampling</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-intro-very-short.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-intro-very-short.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>One view of Bayesian inference is to assume we are given a mechanism
+for generating samples, where we assume that mechanism is representing
+an accurate view on the way we believe the world works.</p>
 <p>This mechanism is known as our <em>prior</em> belief.</p>
-<p>We combine our prior belief with our observations of the real world by discarding all those samples that are inconsistent with our prior. The <em>likelihood</em> defines mathematically what we mean by inconsistent with the prior. The higher the noise level in the likelihood, the looser the notion of consistent.</p>
-<p>The samples that remain are considered to be samples from the <em>posterior</em>.</p>
-<p>This approach to Bayesian inference is closely related to two sampling techniques known as <em>rejection sampling</em> and <em>importance sampling</em>. It is realized in practice in an approach known as <em>approximate Bayesian computation</em> (ABC) or likelihood-free inference.</p>
-<p>In practice, the algorithm is often too slow to be practical, because most samples will be inconsistent with the data and as a result the mechanism has to be operated many times to obtain a few posterior samples.</p>
-<p>However, in the Gaussian process case, when the likelihood also assumes Gaussian noise, we can operate this mechanism mathematically, and obtain the posterior density <em>analytically</em>. This is the benefit of Gaussian processes.</p>
-<p>First we will load in two python functions for computing the covariance function.</p>
-<p>Next we sample from a multivariate normal density (a multivariate Gaussian), using the covariance function as the covariance matrix.</p>
-<div class="sourceCode" id="cb50"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1"></a>plot.rejection_samples(kernel<span class="op">=</span>kernel, </span>
-<span id="cb50-2"><a href="#cb50-2"></a>    diagrams<span class="op">=</span><span class="st">&#39;../slides/diagrams/gp&#39;</span>)</span></code></pre></div>
+<p>We combine our prior belief with our observations of the real world
+by discarding all those prior samples that are inconsistent with our
+observations. The <em>likelihood</em> defines mathematically what we
+mean by inconsistent with the observations. The higher the noise level
+in the likelihood, the looser the notion of consistent.</p>
+<p>The samples that remain are samples from the <em>posterior</em>.</p>
+<p>This approach to Bayesian inference is closely related to two
+sampling techniques known as <em>rejection sampling</em> and
+<em>importance sampling</em>. It is realized in practice in an approach
+known as <em>approximate Bayesian computation</em> (ABC) or
+likelihood-free inference.</p>
+<p>In practice, the algorithm is often too slow to be practical, because
+most samples will be inconsistent with the observations and as a result
+the mechanism must be operated many times to obtain a few posterior
+samples.</p>
+<p>However, in the Gaussian process case, when the likelihood also
+assumes Gaussian noise, we can operate this mechanism mathematically,
+and obtain the posterior density <em>analytically</em>. This is the
+benefit of Gaussian processes.</p>
+<p>First, we will load in two python functions for computing the
+covariance function.</p>
+<p>Next, we sample from a multivariate normal density (a multivariate
+Gaussian), using the covariance function as the covariance matrix.</p>
 <div class="figure">
 <div id="gp-rejection-samples-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/gp/gp_rejection_sample003.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample003.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/gp/gp_rejection_sample004.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample004.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/gp/gp_rejection_sample005.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample005.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="gp-rejection-samples-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-rejection-samples&#39;)">
+<div id="gp-rejection-samples-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-rejection-samples&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gp-rejection-samples-caption" class="caption-frame">
-<p>Figure: One view of Bayesian inference is we have a machine for generating samples (the <em>prior</em>), and we discard all samples inconsistent with our data, leaving the samples of interest (the <em>posterior</em>). This is a rejection sampling view of Bayesian inference. The Gaussian process allows us to do this analytically by multiplying the <em>prior</em> by the <em>likelihood</em>.</p>
+<p>Figure: One view of Bayesian inference is we have a machine for
+generating samples (the <em>prior</em>), and we discard all samples
+inconsistent with our data, leaving the samples of interest (the
+<em>posterior</em>). This is a rejection sampling view of Bayesian
+inference. The Gaussian process allows us to do this analytically by
+multiplying the <em>prior</em> by the <em>likelihood</em>.</p>
 </div>
 </div>
 <h2 id="gaussian-process-2">Gaussian Process</h2>
-<p>The Gaussian process perspective takes the marginal likelihood of the data to be a joint Gaussian density with a covariance given by <span class="math inline">$\kernelMatrix$</span>. So the model likelihood is of the form, <br /><span class="math display">$$
-p(\dataVector|\inputMatrix) =
-\frac{1}{(2\pi)^{\frac{\numData}{2}}|\kernelMatrix|^{\frac{1}{2}}}
-\exp\left(-\frac{1}{2}\dataVector^\top \left(\kernelMatrix+\dataStd^2
-\eye\right)^{-1}\dataVector\right)
-$$</span><br /> where the input data, <span class="math inline">$\inputMatrix$</span>, influences the density through the covariance matrix, <span class="math inline">$\kernelMatrix$</span> whose elements are computed through the covariance function, <span class="math inline">$\kernelScalar(\inputVector, \inputVector^\prime)$</span>.</p>
-<p>This means that the negative log likelihood (the objective function) is given by, <br /><span class="math display">$$
-\errorFunction(\boldsymbol{\theta}) = \frac{1}{2} \log |\kernelMatrix|
-+ \frac{1}{2} \dataVector^\top \left(\kernelMatrix +
-\dataStd^2\eye\right)^{-1}\dataVector
-$$</span><br /> where the <em>parameters</em> of the model are also embedded in the covariance function, they include the parameters of the kernel (such as lengthscale and variance), and the noise variance, <span class="math inline">$\dataStd^2$</span>. Let’s create a class in python for storing these variables.</p>
-<div class="sourceCode" id="cb51"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1"></a></span>
-<span id="cb51-2"><a href="#cb51-2"></a><span class="im">from</span> mlai <span class="im">import</span> GP</span></code></pre></div>
+<p>The Gaussian process perspective takes the marginal likelihood of the
+data to be a joint Gaussian density with a covariance given by <span
+class="math inline">\(\mathbf{K}\)</span>. So the model likelihood is of
+the form, <span class="math display">\[
+p(\mathbf{ y}|\mathbf{X}) =
+\frac{1}{(2\pi)^{\frac{n}{2}}|\mathbf{K}|^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}\mathbf{ y}^\top \left(\mathbf{K}+\sigma^2
+\mathbf{I}\right)^{-1}\mathbf{ y}\right)
+\]</span> where the input data, <span
+class="math inline">\(\mathbf{X}\)</span>, influences the density
+through the covariance matrix, <span
+class="math inline">\(\mathbf{K}\)</span> whose elements are computed
+through the covariance function, <span class="math inline">\(k(\mathbf{
+x}, \mathbf{ x}^\prime)\)</span>.</p>
+<p>This means that the negative log likelihood (the objective function)
+is given by, <span class="math display">\[
+E(\boldsymbol{\theta}) = \frac{1}{2} \log |\mathbf{K}|
++ \frac{1}{2} \mathbf{ y}^\top \left(\mathbf{K}+
+\sigma^2\mathbf{I}\right)^{-1}\mathbf{ y}
+\]</span> where the <em>parameters</em> of the model are also embedded
+in the covariance function, they include the parameters of the kernel
+(such as lengthscale and variance), and the noise variance, <span
+class="math inline">\(\sigma^2\)</span>. Let’s create a set of classes
+in python for storing these variables.</p>
+<div class="sourceCode" id="cb47"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb48"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> Model</span></code></pre></div>
+<div class="sourceCode" id="cb49"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb50"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> MapModel</span></code></pre></div>
+<div class="sourceCode" id="cb51"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb52"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> ProbModel</span></code></pre></div>
+<div class="sourceCode" id="cb53"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb54"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> ProbMapModel</span></code></pre></div>
+<div class="sourceCode" id="cb55"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb56"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> GP</span></code></pre></div>
 <h2 id="making-predictions">Making Predictions</h2>
-<p>We now have a probability density that represents functions. How do we make predictions with this density? The density is known as a process because it is <em>consistent</em>. By consistency, here, we mean that the model makes predictions for <span class="math inline">$\mappingFunctionVector$</span> that are unaffected by future values of <span class="math inline">$\mappingFunctionVector^*$</span> that are currently unobserved (such as test points). If we think of <span class="math inline">$\mappingFunctionVector^*$</span> as test points, we can still write down a joint probability density over the training observations, <span class="math inline">$\mappingFunctionVector$</span> and the test observations, <span class="math inline">$\mappingFunctionVector^*$</span>. This joint probability density will be Gaussian, with a covariance matrix given by our covariance function, <span class="math inline">$\kernelScalar(\inputVector_i, \inputVector_j)$</span>. <br /><span class="math display">$$
-\begin{bmatrix}\mappingFunctionVector \\ \mappingFunctionVector^*\end{bmatrix} \sim \gaussianSamp{\zerosVector}{\begin{bmatrix} \kernelMatrix &amp; \kernelMatrix_\ast \\
-\kernelMatrix_\ast^\top &amp; \kernelMatrix_{\ast,\ast}\end{bmatrix}}
-$$</span><br /> where here <span class="math inline">$\kernelMatrix$</span> is the covariance computed between all the training points, <span class="math inline">$\kernelMatrix_\ast$</span> is the covariance matrix computed between the training points and the test points and <span class="math inline">$\kernelMatrix_{\ast,\ast}$</span> is the covariance matrix computed betwen all the tests points and themselves. To be clear, let’s compute these now for our example, using <code>x</code> and <code>y</code> for the training data (although <code>y</code> doesn’t enter the covariance) and <code>x_pred</code> as the test locations.</p>
-<div class="sourceCode" id="cb52"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1"></a><span class="co"># set covariance function parameters</span></span>
-<span id="cb52-2"><a href="#cb52-2"></a>variance <span class="op">=</span> <span class="fl">16.0</span></span>
-<span id="cb52-3"><a href="#cb52-3"></a>lengthscale <span class="op">=</span> <span class="dv">8</span></span>
-<span id="cb52-4"><a href="#cb52-4"></a><span class="co"># set noise variance</span></span>
-<span id="cb52-5"><a href="#cb52-5"></a>sigma2 <span class="op">=</span> <span class="fl">0.05</span></span>
-<span id="cb52-6"><a href="#cb52-6"></a></span>
-<span id="cb52-7"><a href="#cb52-7"></a>kernel <span class="op">=</span> Kernel(eq_cov, variance<span class="op">=</span>variance, lengthscale<span class="op">=</span>lengthscale)</span>
-<span id="cb52-8"><a href="#cb52-8"></a>K <span class="op">=</span> kernel.K(x, x)</span>
-<span id="cb52-9"><a href="#cb52-9"></a>K_star <span class="op">=</span> kernel.K(x, x_pred)</span>
-<span id="cb52-10"><a href="#cb52-10"></a>K_starstar <span class="op">=</span> kernel.K(x_pred, x_pred)</span></code></pre></div>
-<p>Now we use this structure to visualise the covariance between test data and training data. This structure is how information is passed between test and training data. Unlike the maximum likelihood formalisms we’ve been considering so far, the structure expresses <em>correlation</em> between our different data points. However, just like the  we now have a <em>joint density</em> between some variables of interest. In particular we have the joint density over <span class="math inline">$p(\mappingFunctionVector, \mappingFunctionVector^*)$</span>. The joint density is <em>Gaussian</em> and <em>zero mean</em>. It is specified entirely by the <em>covariance matrix</em>, <span class="math inline">$\kernelMatrix$</span>. That covariance matrix is, in turn, defined by a covariance function. Now we will visualise the form of that covariance in the form of the matrix, <br /><span class="math display">$$
-\begin{bmatrix} \kernelMatrix &amp; \kernelMatrix_\ast \\ \kernelMatrix_\ast^\top
-&amp; \kernelMatrix_{\ast,\ast}\end{bmatrix}
-$$</span><br /></p>
-<p>There are four blocks to this color plot. The upper left block is the covariance of the training data with itself, <span class="math inline">$\kernelMatrix$</span>. We see some structure here due to the missing data from the first and second world wars. Alongside this covariance (to the right and below) we see the cross covariance between the training and the test data (<span class="math inline">$\kernelMatrix_*$</span> and <span class="math inline">$\kernelMatrix_*^\top$</span>). This is giving us the covariation between our training and our test data. Finally the lower right block The banded structure we now observe is because some of the training points are near to some of the test points. This is how we obtain ‘communication’ between our training data and our test data. If there is no structure in <span class="math inline">$\kernelMatrix_*$</span> then our belief about the test data simply matches our prior.</p>
-<h2 id="prediction-across-two-points-with-gps">Prediction Across Two Points with GPs</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gptwopointpred.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gptwopointpred.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<div class="sourceCode" id="cb53"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb53-2"><a href="#cb53-2"></a>np.random.seed(<span class="dv">4949</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb54"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1"></a><span class="im">import</span> teaching_plots <span class="im">as</span> plot</span>
-<span id="cb54-2"><a href="#cb54-2"></a><span class="im">import</span> pods</span></code></pre></div>
-<h3 id="sampling-a-function-from-a-gaussian">Sampling a Function from a Gaussian</h3>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gaussian-predict-index-one-and-two.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gaussian-predict-index-one-and-two.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<p>We now have a probability density that represents functions. How do
+we make predictions with this density? The density is known as a process
+because it is <em>consistent</em>. By consistency, here, we mean that
+the model makes predictions for <span class="math inline">\(\mathbf{
+f}\)</span> that are unaffected by future values of <span
+class="math inline">\(\mathbf{ f}^*\)</span> that are currently
+unobserved (such as test points). If we think of <span
+class="math inline">\(\mathbf{ f}^*\)</span> as test points, we can
+still write down a joint probability density over the training
+observations, <span class="math inline">\(\mathbf{ f}\)</span> and the
+test observations, <span class="math inline">\(\mathbf{ f}^*\)</span>.
+This joint probability density will be Gaussian, with a covariance
+matrix given by our covariance function, <span
+class="math inline">\(k(\mathbf{ x}_i, \mathbf{ x}_j)\)</span>. <span
+class="math display">\[
+\begin{bmatrix}\mathbf{ f}\\ \mathbf{ f}^*\end{bmatrix} \sim
+\mathcal{N}\left(\mathbf{0},\begin{bmatrix} \mathbf{K}&amp;
+\mathbf{K}_\ast \\
+\mathbf{K}_\ast^\top &amp; \mathbf{K}_{\ast,\ast}\end{bmatrix}\right)
+\]</span> where here <span class="math inline">\(\mathbf{K}\)</span> is
+the covariance computed between all the training points, <span
+class="math inline">\(\mathbf{K}_\ast\)</span> is the covariance matrix
+computed between the training points and the test points and <span
+class="math inline">\(\mathbf{K}_{\ast,\ast}\)</span> is the covariance
+matrix computed betwen all the tests points and themselves. To be clear,
+let’s compute these now for our example, using <code>x</code> and
+<code>y</code> for the training data (although <code>y</code> doesn’t
+enter the covariance) and <code>x_pred</code> as the test locations.</p>
+<div class="sourceCode" id="cb57"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="co"># set covariance function parameters</span></span>
+<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a>variance <span class="op">=</span> <span class="fl">16.0</span></span>
+<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>lengthscale <span class="op">=</span> <span class="dv">8</span></span>
+<span id="cb57-4"><a href="#cb57-4" aria-hidden="true" tabindex="-1"></a><span class="co"># set noise variance</span></span>
+<span id="cb57-5"><a href="#cb57-5" aria-hidden="true" tabindex="-1"></a>sigma2 <span class="op">=</span> <span class="fl">0.05</span></span>
+<span id="cb57-6"><a href="#cb57-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb57-7"><a href="#cb57-7" aria-hidden="true" tabindex="-1"></a>kernel <span class="op">=</span> Kernel(eq_cov, variance<span class="op">=</span>variance, lengthscale<span class="op">=</span>lengthscale)</span>
+<span id="cb57-8"><a href="#cb57-8" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> kernel.K(x, x)</span>
+<span id="cb57-9"><a href="#cb57-9" aria-hidden="true" tabindex="-1"></a>K_star <span class="op">=</span> kernel.K(x, x_pred)</span>
+<span id="cb57-10"><a href="#cb57-10" aria-hidden="true" tabindex="-1"></a>K_starstar <span class="op">=</span> kernel.K(x_pred, x_pred)</span></code></pre></div>
+<p>Now we use this structure to visualise the covariance between test
+data and training data. This structure is how information is passed
+between test and training data. Unlike the maximum likelihood formalisms
+we’ve been considering so far, the structure expresses
+<em>correlation</em> between our different data points. However, just
+like the we now have a <em>joint density</em> between some variables of
+interest. In particular we have the joint density over <span
+class="math inline">\(p(\mathbf{ f}, \mathbf{ f}^*)\)</span>. The joint
+density is <em>Gaussian</em> and <em>zero mean</em>. It is specified
+entirely by the <em>covariance matrix</em>, <span
+class="math inline">\(\mathbf{K}\)</span>. That covariance matrix is, in
+turn, defined by a covariance function. Now we will visualise the form
+of that covariance in the form of the matrix, <span
+class="math display">\[
+\begin{bmatrix} \mathbf{K}&amp; \mathbf{K}_\ast \\ \mathbf{K}_\ast^\top
+&amp; \mathbf{K}_{\ast,\ast}\end{bmatrix}
+\]</span></p>
+<div class="figure">
+<div id="block-predictive-covariance-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/block-predictive-covariance.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="block-predictive-covariance-magnify" class="magnify"
+onclick="magnifyFigure(&#39;block-predictive-covariance&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="block-predictive-covariance-caption" class="caption-frame">
+<p>Figure: Different blocks of the covariance function. The upper left
+block is the covariance of the training data with itself, <span
+class="math inline">\(\mathbf{K}\)</span>. The top right is the cross
+covariance between training data (rows) and prediction locations
+(columns). The lower left is the same matrix transposed. The bottom
+right is the covariance matrix of the test data with itself.</p>
+</div>
+</div>
+<p>There are four blocks to this plot. The upper left block is the
+covariance of the training data with itself, <span
+class="math inline">\(\mathbf{K}\)</span>. We see some structure here
+due to the missing data from the first and second world wars. Alongside
+this covariance (to the right and below) we see the cross covariance
+between the training and the test data (<span
+class="math inline">\(\mathbf{K}_*\)</span> and <span
+class="math inline">\(\mathbf{K}_*^\top\)</span>). This is giving us the
+covariation between our training and our test data. Finally the lower
+right block The banded structure we now observe is because some of the
+training points are near to some of the test points. This is how we
+obtain ‘communication’ between our training data and our test data. If
+there is no structure in <span
+class="math inline">\(\mathbf{K}_*\)</span> then our belief about the
+test data simply matches our prior.</p>
+<h2 id="prediction-across-two-points-with-gps">Prediction Across Two
+Points with GPs</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gptwopointpred.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gptwopointpred.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<div class="sourceCode" id="cb58"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">4949</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb59"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai.plot <span class="im">as</span> plot</span>
+<span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pods</span></code></pre></div>
+<h2 id="sampling-a-function">Sampling a Function</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpdistfunc.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpdistfunc.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>We will consider a Gaussian distribution with a particular structure
+of covariance matrix. We will generate <em>one</em> sample from a
+25-dimensional Gaussian density. <span class="math display">\[
+\mathbf{ f}=\left[f_{1},f_{2}\dots f_{25}\right].
+\]</span> in the figure below we plot these data on the <span
+class="math inline">\(y\)</span>-axis against their <em>indices</em> on
+the <span class="math inline">\(x\)</span>-axis.</p>
+<div class="sourceCode" id="cb60"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb61"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb61-2"><a href="#cb61-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> Kernel</span></code></pre></div>
+<div class="sourceCode" id="cb62"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb63"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> polynomial_cov</span></code></pre></div>
+<div class="sourceCode" id="cb64"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb65"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb65-2"><a href="#cb65-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> exponentiated_quadratic</span></code></pre></div>
+<div class="figure">
+<div id="gp-two-point-sample-1-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample008.svg" width="80%" style=" ">
+</object>
+</div>
+<div id="gp-two-point-sample-1-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-two-point-sample-1&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="gp-two-point-sample-1-caption" class="caption-frame">
+<p>Figure: A 25 dimensional correlated random variable (values ploted
+against index)</p>
+</div>
+</div>
+<h3 id="sampling-a-function-from-a-gaussian">Sampling a Function from a
+Gaussian</h3>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-two.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-two.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="two-point-sample-one-two-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample001.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="two-point-sample-one-two-magnify" class="magnify" onclick="magnifyFigure(&#39;two-point-sample-one-two&#39;)">
+<div id="two-point-sample-one-two-magnify" class="magnify"
+onclick="magnifyFigure(&#39;two-point-sample-one-two&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="two-point-sample-one-two-caption" class="caption-frame">
-<p>Figure: The joint Gaussian over <span class="math inline">$\mappingFunction_1$</span> and <span class="math inline">$\mappingFunction_2$</span> along with the conditional distribution of <span class="math inline">$\mappingFunction_2$</span> given <span class="math inline">$\mappingFunction_1$</span></p>
+<p>Figure: The joint Gaussian over <span
+class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_2\)</span> along with the conditional
+distribution of <span class="math inline">\(f_2\)</span> given <span
+class="math inline">\(f_1\)</span></p>
 </div>
 </div>
-<h3 id="joint-density-of-f_1-and-f_2">Joint Density of <span class="math inline"><em>f</em><sub>1</sub></span> and <span class="math inline"><em>f</em><sub>2</sub></span></h3>
+<h3 id="joint-density-of-f_1-and-f_2">Joint Density of <span
+class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_2\)</span></h3>
 <div class="figure">
 <div id="two-point-sample-one-two-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample012.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample012.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="two-point-sample-one-two-magnify" class="magnify" onclick="magnifyFigure(&#39;two-point-sample-one-two&#39;)">
+<div id="two-point-sample-one-two-magnify" class="magnify"
+onclick="magnifyFigure(&#39;two-point-sample-one-two&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="two-point-sample-one-two-caption" class="caption-frame">
-<p>Figure: The joint Gaussian over <span class="math inline">$\mappingFunction_1$</span> and <span class="math inline">$\mappingFunction_2$</span> along with the conditional distribution of <span class="math inline">$\mappingFunction_2$</span> given <span class="math inline">$\mappingFunction_1$</span></p>
+<p>Figure: The joint Gaussian over <span
+class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_2\)</span> along with the conditional
+distribution of <span class="math inline">\(f_2\)</span> given <span
+class="math inline">\(f_1\)</span></p>
 </div>
 </div>
-<ul>
-<li>The single contour of the Gaussian density represents the <font color="red">joint distribution, <span class="math inline">$p(\mappingFunction_1, \mappingFunction_2)$</span></font></li>
-</ul>
-<div class="incremental">
-<ul>
-<li>We observe that <font color="green"><span class="math inline">$\mappingFunction_1=?$</span></font></li>
-</ul>
+<h2 id="uluru">Uluru</h2>
+<div class="figure">
+<div id="uluru-as-probability-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/799px-Uluru_Panorama.jpg" width="" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
-<div class="incremental">
-<ul>
-<li><p>Conditional density: <font color="red"><span class="math inline">$p(\mappingFunction_2|\mappingFunction_1=?)$</span></font></p></li>
-<li><p>Prediction of <span class="math inline">$\mappingFunction_2$</span> from <span class="math inline">$\mappingFunction_1$</span> requires <em>conditional density</em>.</p></li>
-<li><p>Conditional density is <em>also</em> Gaussian. <br /><span class="math display">$$
-p(\mappingFunction_2|\mappingFunction_1) = {\mathcal{N}\left(\mappingFunction_2|\frac{\kernelScalar_{1, 2}}{\kernelScalar_{1, 1}}\mappingFunction_1,\kernelScalar_{2, 2} - \frac{\kernelScalar_{1,2}^2}{\kernelScalar_{1,1}}\right)}
-$$</span><br /> where covariance of joint density is given by <br /><span class="math display">$$
-\kernelMatrix= \begin{bmatrix} \kernelScalar_{1, 1} &amp; \kernelScalar_{1, 2}\\ \kernelScalar_{2, 1} &amp; \kernelScalar_{2, 2}\end{bmatrix}
-$$</span><br /></p></li>
-</ul>
 </div>
-<h3 id="joint-density-of-f_1-and-f_8">Joint Density of <span class="math inline"><em>f</em><sub>1</sub></span> and <span class="math inline"><em>f</em><sub>8</sub></span></h3>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gaussian-predict-index-one-and-eight.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gaussian-predict-index-one-and-eight.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div id="uluru-as-probability-magnify" class="magnify"
+onclick="magnifyFigure(&#39;uluru-as-probability&#39;)">
+<img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
+</div>
+<div id="uluru-as-probability-caption" class="caption-frame">
+<p>Figure: Uluru, the sacred rock in Australia. If we think of it as a
+probability density, viewing it from this side gives us one
+<em>marginal</em> from the density. Figuratively speaking, slicing
+through the rock would give a conditional density.</p>
+</div>
+</div>
+<p>When viewing these contour plots, I sometimes find it helpful to
+think of Uluru, the prominent rock formation in Australia. The rock
+rises above the surface of the plane, just like a probability density
+rising above the zero line. The rock is three dimensional, but when we
+view Uluru from the classical position, we are looking at one side of
+it. This is equivalent to viewing the marginal density.</p>
+<p>The joint density can be viewed from above, using contours. The
+conditional density is equivalent to <em>slicing</em> the rock. Uluru is
+a holy rock, so this has to be an imaginary slice. Imagine we cut down a
+vertical plane orthogonal to our view point (e.g. coming across our view
+point). This would give a profile of the rock, which when renormalized,
+would give us the conditional distribution, the value of conditioning
+would be the location of the slice in the direction we are facing.</p>
+<h2 id="prediction-with-correlated-gaussians">Prediction with Correlated
+Gaussians</h2>
+<p>Of course in practice, rather than manipulating mountains physically,
+the advantage of the Gaussian density is that we can perform these
+manipulations mathematically.</p>
+<p>Prediction of <span class="math inline">\(f_2\)</span> given <span
+class="math inline">\(f_1\)</span> requires the <em>conditional
+density</em>, <span class="math inline">\(p(f_2|f_1)\)</span>.Another
+remarkable property of the Gaussian density is that this conditional
+distribution is <em>also</em> guaranteed to be a Gaussian density. It
+has the form, <span class="math display">\[
+p(f_2|f_1) = \mathcal{N}\left(f_2|\frac{k_{1, 2}}{k_{1, 1}}f_1, k_{2, 2}
+- \frac{k_{1,2}^2}{k_{1,1}}\right)
+\]</span>where we have assumed that the covariance of the original joint
+density was given by <span class="math display">\[
+\mathbf{K}= \begin{bmatrix} k_{1, 1} &amp; k_{1, 2}\\ k_{2, 1} &amp;
+k_{2, 2}.\end{bmatrix}
+\]</span></p>
+<p>Using these formulae we can determine the conditional density for any
+of the elements of our vector <span class="math inline">\(\mathbf{
+f}\)</span>. For example, the variable <span
+class="math inline">\(f_8\)</span> is less correlated with <span
+class="math inline">\(f_1\)</span> than <span
+class="math inline">\(f_2\)</span>. If we consider this variable we see
+the conditional density is more diffuse.</p>
+<h3 id="joint-density-of-f_1-and-f_8">Joint Density of <span
+class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_8\)</span></h3>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-eight.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-eight.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="two-point-sample-13-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample013.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample013.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="two-point-sample-13-magnify" class="magnify" onclick="magnifyFigure(&#39;two-point-sample-13&#39;)">
+<div id="two-point-sample-13-magnify" class="magnify"
+onclick="magnifyFigure(&#39;two-point-sample-13&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="two-point-sample-13-caption" class="caption-frame">
-<p>Figure: Sample from the joint Gaussian model, points indexed by 1 and 8 highlighted.</p>
+<p>Figure: Sample from the joint Gaussian model, points indexed by 1 and
+8 highlighted.</p>
 </div>
 </div>
-<h3 id="prediction-of-mappingfunction_8-from-mappingfunction_1">Prediction of <span class="math inline">$\mappingFunction_{8}$</span> from <span class="math inline">$\mappingFunction_{1}$</span></h3>
+<h3 id="prediction-of-f_8-from-f_1">Prediction of <span
+class="math inline">\(f_{8}\)</span> from <span
+class="math inline">\(f_{1}\)</span></h3>
 <div class="figure">
 <div id="two-point-sample-one-eight-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample017.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample017.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="two-point-sample-one-eight-magnify" class="magnify" onclick="magnifyFigure(&#39;two-point-sample-one-eight&#39;)">
+<div id="two-point-sample-one-eight-magnify" class="magnify"
+onclick="magnifyFigure(&#39;two-point-sample-one-eight&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="two-point-sample-one-eight-caption" class="caption-frame">
-<p>Figure: The joint Gaussian over <span class="math inline">$\mappingFunction_1$</span> and <span class="math inline">$\mappingFunction_8$</span> along with the conditional distribution of <span class="math inline">$\mappingFunction_8$</span> given <span class="math inline">$\mappingFunction_1$</span></p>
+<p>Figure: The joint Gaussian over <span
+class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_8\)</span> along with the conditional
+distribution of <span class="math inline">\(f_8\)</span> given <span
+class="math inline">\(f_1\)</span></p>
 </div>
 </div>
 <ul>
-<li>The single contour of the Gaussian density represents the <font color="blue">joint distribution, <span class="math inline">$p(\mappingFunction_1, \mappingFunction_8)$</span></font></li>
+<li>The single contour of the Gaussian density represents the
+<font color="blue">joint distribution, <span
+class="math inline">\(p(f_1, f_8)\)</span></font></li>
 </ul>
-<div class="incremental">
+<p>. . .</p>
 <ul>
-<li>We observe a value for <font color="green"><span class="math inline">$\mappingFunction_1=-?$</span></font></li>
+<li>We observe a value for <font color="green"><span
+class="math inline">\(f_1=-?\)</span></font></li>
 </ul>
-</div>
-<div class="incremental">
+<p>. . .</p>
 <ul>
-<li><p>Conditional density: <font color="red"><span class="math inline">$p(\mappingFunction_5|\mappingFunction_1=?)$</span></font>.</p></li>
-<li><p>Prediction of <span class="math inline">$\mappingFunctionVector_*$</span> from <span class="math inline">$\mappingFunctionVector$</span> requires multivariate <em>conditional density</em>.</p></li>
-<li><p>Multivariate conditional density is <em>also</em> Gaussian. <large> <br /><span class="math display">$$
-p(\mappingFunctionVector_*|\mappingFunctionVector) = {\mathcal{N}\left(\mappingFunctionVector_*|\kernelMatrix_{*,\mappingFunctionVector}\kernelMatrix_{\mappingFunctionVector,\mappingFunctionVector}^{-1}\mappingFunctionVector,\kernelMatrix_{*,*}-\kernelMatrix_{*,\mappingFunctionVector} \kernelMatrix_{\mappingFunctionVector,\mappingFunctionVector}^{-1}\kernelMatrix_{\mappingFunctionVector,*}\right)}
-$$</span><br /> </large></p></li>
-<li><p>Here covariance of joint density is given by <br /><span class="math display">$$
-\kernelMatrix= \begin{bmatrix} \kernelMatrix_{\mappingFunctionVector, \mappingFunctionVector} &amp; \kernelMatrix_{*, \mappingFunctionVector}\\ \kernelMatrix_{\mappingFunctionVector, *} &amp; \kernelMatrix_{*, *}\end{bmatrix}
-$$</span><br /></p></li>
-<li><p>Prediction of <span class="math inline">$\mappingFunctionVector_*$</span> from <span class="math inline">$\mappingFunctionVector$</span> requires multivariate <em>conditional density</em>.</p></li>
-<li><p>Multivariate conditional density is <em>also</em> Gaussian. <large> <br /><span class="math display">$$
-p(\mappingFunctionVector_*|\mappingFunctionVector) = {\mathcal{N}\left(\mappingFunctionVector_*|\meanVector,\conditionalCovariance\right)}
-$$</span><br /> <br /><span class="math display">$$
-\meanVector= \kernelMatrix_{*,\mappingFunctionVector}\kernelMatrix_{\mappingFunctionVector,\mappingFunctionVector}^{-1}\mappingFunctionVector
-$$</span><br /> <br /><span class="math display">$$
-\conditionalCovariance = \kernelMatrix_{*,*}-\kernelMatrix_{*,\mappingFunctionVector} \kernelMatrix_{\mappingFunctionVector,\mappingFunctionVector}^{-1}\kernelMatrix_{\mappingFunctionVector,*}
-$$</span><br /> </large></p></li>
-<li><p>Here covariance of joint density is given by <br /><span class="math display">$$
-\kernelMatrix= \begin{bmatrix} \kernelMatrix_{\mappingFunctionVector, \mappingFunctionVector} &amp; \kernelMatrix_{*, \mappingFunctionVector}\\ \kernelMatrix_{\mappingFunctionVector, *} &amp; \kernelMatrix_{*, *}\end{bmatrix}
-$$</span><br /></p></li>
+<li><p>Conditional density: <font color="red"><span
+class="math inline">\(p(f_8|f_1=?)\)</span></font>.</p></li>
+<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span>
+from <span class="math inline">\(\mathbf{ f}\)</span> requires
+multivariate <em>conditional density</em>.</p></li>
+<li><p>Multivariate conditional density is <em>also</em> Gaussian.
+<large> <span class="math display">\[
+p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{
+f}_*|\mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{ f},\mathbf{
+f}}^{-1}\mathbf{ f},\mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}}
+\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{
+f},*}\right)}
+\]</span> </large></p></li>
+<li><p>Here covariance of joint density is given by <span
+class="math display">\[
+\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp;
+\mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp;
+\mathbf{K}_{*, *}\end{bmatrix}
+\]</span></p></li>
+<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span>
+from <span class="math inline">\(\mathbf{ f}\)</span> requires
+multivariate <em>conditional density</em>.</p></li>
+<li><p>Multivariate conditional density is <em>also</em> Gaussian.
+<large> <span class="math display">\[
+p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{
+f}_*|\boldsymbol{ \mu},\boldsymbol{ \Sigma}\right)}
+\]</span> <span class="math display">\[
+\boldsymbol{ \mu}= \mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{
+f},\mathbf{ f}}^{-1}\mathbf{ f}
+\]</span> <span class="math display">\[
+\boldsymbol{ \Sigma}= \mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}}
+\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{ f},*}
+\]</span> </large></p></li>
+<li><p>Here covariance of joint density is given by <span
+class="math display">\[
+\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp;
+\mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp;
+\mathbf{K}_{*, *}\end{bmatrix}
+\]</span></p></li>
 </ul>
-</div>
-<h2 id="the-importance-of-the-covariance-function">The Importance of the Covariance Function</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-covariance-function-importance.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-covariance-function-importance.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The covariance function encapsulates our assumptions about the data. The equations for the distribution of the prediction function, given the training observations, are highly sensitive to the covariation between the test locations and the training locations as expressed by the matrix <span class="math inline">$\kernelMatrix_*$</span>. We defined a matrix <span class="math inline"><strong>A</strong></span> which allowed us to express our conditional mean in the form, <br /><span class="math display">$$
-\meanVector_\mappingFunction = \mathbf{A}^\top \dataVector,
-$$</span><br /> where <span class="math inline">$\dataVector$</span> were our <em>training observations</em>. In other words our mean predictions are always a linear weighted combination of our <em>training data</em>. The weights are given by computing the covariation between the training and the test data (<span class="math inline">$\kernelMatrix_*$</span>) and scaling it by the inverse covariance of the training data observations, <span class="math inline">$\left[\kernelMatrix + \dataStd^2 \eye\right]^{-1}$</span>. This inverse is the main computational object that needs to be resolved for a Gaussian process. It has a computational burden which is <span class="math inline">$O(\numData^3)$</span> and a storage burden which is <span class="math inline">$O(\numData^2)$</span>. This makes working with Gaussian processes computationally intensive for the situation where <span class="math inline">$\numData&gt;10,000$</span>.</p>
+<h2 id="the-importance-of-the-covariance-function">The Importance of the
+Covariance Function</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-covariance-function-importance.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-covariance-function-importance.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The covariance function encapsulates our assumptions about the data.
+The equations for the distribution of the prediction function, given the
+training observations, are highly sensitive to the covariation between
+the test locations and the training locations as expressed by the matrix
+<span class="math inline">\(\mathbf{K}_*\)</span>. We defined a matrix
+<span class="math inline">\(\mathbf{A}\)</span> which allowed us to
+express our conditional mean in the form, <span class="math display">\[
+\boldsymbol{ \mu}_f= \mathbf{A}^\top \mathbf{ y},
+\]</span> where <span class="math inline">\(\mathbf{ y}\)</span> were
+our <em>training observations</em>. In other words our mean predictions
+are always a linear weighted combination of our <em>training data</em>.
+The weights are given by computing the covariation between the training
+and the test data (<span class="math inline">\(\mathbf{K}_*\)</span>)
+and scaling it by the inverse covariance of the training data
+observations, <span class="math inline">\(\left[\mathbf{K}+ \sigma^2
+\mathbf{I}\right]^{-1}\)</span>. This inverse is the main computational
+object that needs to be resolved for a Gaussian process. It has a
+computational burden which is <span
+class="math inline">\(O(n^3)\)</span> and a storage burden which is
+<span class="math inline">\(O(n^2)\)</span>. This makes working with
+Gaussian processes computationally intensive for the situation where
+<span class="math inline">\(n&gt;10,000\)</span>.</p>
 <div class="figure">
 <div id="intro-to-gps-figure" class="figure-frame">
 <iframe width="600" height="450" src="https://www.youtube.com/embed/ewJ3AxKclOg?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
-<div id="intro-to-gps-magnify" class="magnify" onclick="magnifyFigure(&#39;intro-to-gps&#39;)">
+<div id="intro-to-gps-magnify" class="magnify"
+onclick="magnifyFigure(&#39;intro-to-gps&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="intro-to-gps-caption" class="caption-frame">
-<p>Figure: Introduction to Gaussian processes given by Neil Lawrence at the 2014 Gaussian process Winter School at the University of Sheffield.</p>
+<p>Figure: Introduction to Gaussian processes given by Neil Lawrence at
+the 2014 Gaussian process Winter School at the University of
+Sheffield.</p>
 </div>
 </div>
 <h2 id="improving-the-numerics">Improving the Numerics</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-numerics-and-optimization.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-numerics-and-optimization.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>In practice we shouldn’t be using matrix inverse directly to solve the GP system. One more stable way is to compute the <em>Cholesky decomposition</em> of the kernel matrix. The log determinant of the covariance can also be derived from the Cholesky decomposition.</p>
-<div class="sourceCode" id="cb55"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1"></a></span>
-<span id="cb55-2"><a href="#cb55-2"></a><span class="im">from</span> mlai <span class="im">import</span> update_inverse</span></code></pre></div>
-<div class="sourceCode" id="cb56"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1"></a>GP.update_inverse <span class="op">=</span> update_inverse</span></code></pre></div>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-numerics-and-optimization.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-numerics-and-optimization.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>In practice we shouldn’t be using matrix inverse directly to solve
+the GP system. One more stable way is to compute the <em>Cholesky
+decomposition</em> of the kernel matrix. The log determinant of the
+covariance can also be derived from the Cholesky decomposition.</p>
+<div class="sourceCode" id="cb66"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> mlai</span></code></pre></div>
+<div class="sourceCode" id="cb67"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mlai <span class="im">import</span> update_inverse</span></code></pre></div>
+<div class="sourceCode" id="cb68"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>GP.update_inverse <span class="op">=</span> update_inverse</span></code></pre></div>
 <h2 id="capacity-control">Capacity Control</h2>
-<p>Gaussian processes are sometimes seen as part of a wider family of methods known as kernel methods. Kernel methods are also based around covariance functions, but in the field they are known as Mercer kernels. Mercer kernels have interpretations as inner products in potentially infinite dimensional Hilbert spaces. This interpretation arises because, if we take <span class="math inline"><em>α</em> = 1</span>, then the kernel can be expressed as <br /><span class="math display">$$
-\kernelMatrix = \basisMatrix\basisMatrix^\top 
-$$</span><br /> which imples the elements of the kernel are given by, <br /><span class="math display">$$
-\kernelScalar(\inputVector, \inputVector^\prime) = \basisVector(\inputVector)^\top \basisVector(\inputVector^\prime).
-$$</span><br /> So we see that the kernel function is developed from an inner product between the basis functions. Mercer’s theorem tells us that any valid <em>positive definite function</em> can be expressed as this inner product but with the caveat that the inner product could be <em>infinite length</em>. This idea has been used quite widely to <em>kernelize</em> algorithms that depend on inner products. The kernel functions are equivalent to covariance functions and they are parameterized accordingly. In the kernel modeling community it is generally accepted that kernel parameter estimation is a difficult problem and the normal solution is to cross validate to obtain parameters. This can cause difficulties when a large number of kernel parameters need to be estimated. In Gaussian process modelling kernel parameter estimation (in the simplest case proceeds) by maximum likelihood. This involves taking gradients of the likelihood with respect to the parameters of the covariance function.</p>
+<p>Gaussian processes are sometimes seen as part of a wider family of
+methods known as kernel methods. Kernel methods are also based around
+covariance functions, but in the field they are known as Mercer kernels.
+Mercer kernels have interpretations as inner products in potentially
+infinite dimensional Hilbert spaces. This interpretation arises because,
+if we take <span class="math inline">\(\alpha=1\)</span>, then the
+kernel can be expressed as <span class="math display">\[
+\mathbf{K}= \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top
+\]</span> which imples the elements of the kernel are given by, <span
+class="math display">\[
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \boldsymbol{ \phi}(\mathbf{
+x})^\top \boldsymbol{ \phi}(\mathbf{ x}^\prime).
+\]</span> So we see that the kernel function is developed from an inner
+product between the basis functions. Mercer’s theorem tells us that any
+valid <em>positive definite function</em> can be expressed as this inner
+product but with the caveat that the inner product could be <em>infinite
+length</em>. This idea has been used quite widely to <em>kernelize</em>
+algorithms that depend on inner products. The kernel functions are
+equivalent to covariance functions and they are parameterized
+accordingly. In the kernel modeling community it is generally accepted
+that kernel parameter estimation is a difficult problem and the normal
+solution is to cross validate to obtain parameters. This can cause
+difficulties when a large number of kernel parameters need to be
+estimated. In Gaussian process modelling kernel parameter estimation (in
+the simplest case proceeds) by maximum likelihood. This involves taking
+gradients of the likelihood with respect to the parameters of the
+covariance function.</p>
 <h2 id="gradients-of-the-likelihood">Gradients of the Likelihood</h2>
-<p>The easiest conceptual way to obtain the gradients is a two step process. The first step involves taking the gradient of the likelihood with respect to the covariance function, the second step involves considering the gradient of the covariance function with respect to its parameters.</p>
+<p>The easiest conceptual way to obtain the gradients is a two step
+process. The first step involves taking the gradient of the likelihood
+with respect to the covariance function, the second step involves
+considering the gradient of the covariance function with respect to its
+parameters.</p>
 <h2 id="overall-process-scale">Overall Process Scale</h2>
-<p>In general we won’t be able to find parameters of the covariance function through fixed point equations, we will need to do gradient based optimization.</p>
-<h2 id="capacity-control-and-data-fit">Capacity Control and Data Fit</h2>
-<p>The objective function can be decomposed into two terms, a capacity control term, and a data fit term. The capacity control term is the log determinant of the covariance. The data fit term is the matrix inner product between the data and the inverse covariance.</p>
-<h2 id="learning-covariance-parameters">Learning Covariance Parameters</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<p>In general we won’t be able to find parameters of the covariance
+function through fixed point equations, we will need to do gradient
+based optimization.</p>
+<h2 id="capacity-control-and-data-fit">Capacity Control and Data
+Fit</h2>
+<p>The objective function can be decomposed into two terms, a capacity
+control term, and a data fit term. The capacity control term is the log
+determinant of the covariance. The data fit term is the matrix inner
+product between the data and the inverse covariance.</p>
+<h2 id="learning-covariance-parameters">Learning Covariance
+Parameters</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <p>Can we determine covariance parameters from the data?</p>
-<p><br /><span class="math display">$$
-\gaussianDist{\dataVector}{\mathbf{0}}{\kernelMatrix}=\frac{1}{(2\pi)^\frac{\numData}{2}{\det{\kernelMatrix}^{\frac{1}{2}}}}{\exp\left(-\frac{\dataVector^{\top}\kernelMatrix^{-1}\dataVector}{2}\right)}
-$$</span><br /></p>
-<p><br /><span class="math display">$$
+<p><span class="math display">\[
+\mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}{\det{\mathbf{K}}^{\frac{1}{2}}}}{\exp\left(-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
+\]</span></p>
+<p><span class="math display">\[
 \begin{aligned}
-    \gaussianDist{\dataVector}{\mathbf{0}}{\kernelMatrix}=\frac{1}{(2\pi)^\frac{\numData}{2}\color{blue}{\det{\kernelMatrix}^{\frac{1}{2}}}}\color{red}{\exp\left(-\frac{\dataVector^{\top}\kernelMatrix^{-1}\dataVector}{2}\right)}
+    \mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}\color{blue}{\det{\mathbf{K}}^{\frac{1}{2}}}}\color{red}{\exp\left(-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
 \end{aligned}
-$$</span><br /></p>
-<p><br /><span class="math display">$$
+\]</span></p>
+<p><span class="math display">\[
 \begin{aligned}
-    \log \gaussianDist{\dataVector}{\mathbf{0}}{\kernelMatrix}=&amp;\color{blue}{-\frac{1}{2}\log\det{\kernelMatrix}}\color{red}{-\frac{\dataVector^{\top}\kernelMatrix^{-1}\dataVector}{2}} \\ &amp;-\frac{\numData}{2}\log2\pi
+    \log \mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=&amp;\color{blue}{-\frac{1}{2}\log\det{\mathbf{K}}}\color{red}{-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}} \\ &amp;-\frac{n}{2}\log2\pi
 \end{aligned}
-$$</span><br /></p>
-<p><br /><span class="math display">$$
-\errorFunction(\parameterVector) = \color{blue}{\frac{1}{2}\log\det{\kernelMatrix}} + \color{red}{\frac{\dataVector^{\top}\kernelMatrix^{-1}\dataVector}{2}}
-$$</span><br /></p>
-<h2 id="capacity-control-through-the-determinant">Capacity Control through the Determinant</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-capacity.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-capacity.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The parameters are <em>inside</em> the covariance function (matrix).  <br /><span class="math display">$$\kernelScalar_{i, j} = \kernelScalar(\inputVals_i, \inputVals_j; \parameterVector)$$</span><br /></p>
-<p><span> <br /><span class="math display">$$\kernelMatrix = \rotationMatrix \eigenvalueMatrix^2 \rotationMatrix^\top$$</span><br /></span></p>
-<div class="sourceCode" id="cb57"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1"></a>gpoptimizePlot1</span></code></pre></div>
+\]</span></p>
+<p><span class="math display">\[
+E(\boldsymbol{ \theta}) = \color{blue}{\frac{1}{2}\log\det{\mathbf{K}}}
++ \color{red}{\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}}
+\]</span></p>
+<h2 id="capacity-control-through-the-determinant">Capacity Control
+through the Determinant</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-capacity.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-capacity.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The parameters are <em>inside</em> the covariance function (matrix).
+<span class="math display">\[k_{i, j} = k(\mathbf{ x}_i, \mathbf{ x}_j;
+\boldsymbol{ \theta})\]</span></p>
+<p><span> <span class="math display">\[\mathbf{K}=
+\mathbf{R}\boldsymbol{ \Lambda}^2 \mathbf{R}^\top\]</span></span></p>
 <table>
 <tr>
 <td width="50%">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp-optimize-eigen.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimize-eigen.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="50%">
-<span class="math inline">$\eigenvalueMatrix$</span> represents distance on axes. <span class="math inline">$\rotationMatrix$</span> gives rotation.
+<span class="math inline">\(\boldsymbol{ \Lambda}\)</span> represents
+distance on axes. <span class="math inline">\(\mathbf{R}\)</span> gives
+rotation.
 </td>
 </tr>
 </table>
 <ul>
-<li><span class="math inline">$\eigenvalueMatrix$</span> is <em>diagonal</em>, <span class="math inline">$\rotationMatrix^\top\rotationMatrix = \eye$</span>.</li>
-<li>Useful representation since <span class="math inline">$\det{\kernelMatrix} = \det{\eigenvalueMatrix^2} = \det{\eigenvalueMatrix}^2$</span>.</li>
+<li><span class="math inline">\(\boldsymbol{ \Lambda}\)</span> is
+<em>diagonal</em>, <span
+class="math inline">\(\mathbf{R}^\top\mathbf{R}=
+\mathbf{I}\)</span>.</li>
+<li>Useful representation since <span
+class="math inline">\(\det{\mathbf{K}} = \det{\boldsymbol{ \Lambda}^2} =
+\det{\boldsymbol{ \Lambda}}^2\)</span>.</li>
 </ul>
-<div class="sourceCode" id="cb58"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1"></a>diagrams <span class="op">=</span> <span class="st">&#39;./gp/&#39;</span></span></code></pre></div>
 <div class="figure">
 <div id="gp-optimise-determinant-figure-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant009.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant009.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gp-optimise-determinant-figure-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-optimise-determinant-figure&#39;)">
+<div id="gp-optimise-determinant-figure-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-optimise-determinant-figure&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gp-optimise-determinant-figure-caption" class="caption-frame">
-<p>Figure: The determinant of the covariance is dependent only on the eigenvalues. It represents the ‘footprint’ of the Gaussian.</p>
+<p>Figure: The determinant of the covariance is dependent only on the
+eigenvalues. It represents the ‘footprint’ of the Gaussian.</p>
 </div>
 </div>
+<h2 id="quadratic-data-fit">Quadratic Data Fit</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="gp-optimise-quadratic-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/diagrams/gp-optimise-quadratic002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic002.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gp-optimise-quadratic-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-optimise-quadratic&#39;)">
+<div id="gp-optimise-quadratic-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-optimise-quadratic&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gp-optimise-quadratic-caption" class="caption-frame">
-<p>Figure: The data fit term of the Gaussian process is a quadratic loss centered around zero. This has eliptical contours, the principal axes of which are given by the covariance matrix.</p>
+<p>Figure: The data fit term of the Gaussian process is a quadratic loss
+centered around zero. This has eliptical contours, the principal axes of
+which are given by the covariance matrix.</p>
 </div>
 </div>
-<h2 id="quadratic-data-fit">Quadratic Data Fit</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-data-fit.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-data-fit.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
 <h2 id="data-fit-term">Data Fit Term</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-data-fit-capacity.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-optimize-data-fit-capacity.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit-capacity.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit-capacity.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="figure">
 <div id="gp-optimise-figure" class="figure-frame">
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise006.svg" width="100%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise006.svg" width="100%" style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise010.svg" width="100%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise010.svg" width="100%" style=" ">
 </object>
 </td>
 </tr>
@@ -1403,31 +3068,52 @@ <h2 id="data-fit-term">Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise016.svg" width="100%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise016.svg" width="100%" style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise021.svg" width="100%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise021.svg" width="100%" style=" ">
 </object>
 </td>
 </tr>
 </table>
 </div>
-<div id="gp-optimise-magnify" class="magnify" onclick="magnifyFigure(&#39;gp-optimise&#39;)">
+<div id="gp-optimise-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gp-optimise&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gp-optimise-caption" class="caption-frame">
-<p>Figure: Variation in the data fit term, the capacity term and the negative log likelihood for different lengthscales.</p>
-</div>
-</div>
-<h2 id="exponentiated-quadratic-covariance">Exponentiated Quadratic Covariance</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_kern/includes/eq-covariance.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_kern/includes/eq-covariance.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>The exponentiated quadratic covariance, also known as the Gaussian covariance or the RBF covariance and the squared exponential. Covariance between two points is related to the negative exponential of the squared distnace between those points. This covariance function can be derived in a few different ways: as the infinite limit of a radial basis function neural network, as diffusion in the heat equation, as a Gaussian filter in <em>Fourier space</em> or as the composition as a series of linear filters applied to a base function.</p>
-<p>The covariance takes the following form, <br /><span class="math display">$$
-\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right)
-$$</span><br /> where <span class="math inline">ℓ</span> is the <em>length scale</em> or <em>time scale</em> of the process and <span class="math inline"><em>α</em></span> represents the overall process variance.</p>
+<p>Figure: Variation in the data fit term, the capacity term and the
+negative log likelihood for different lengthscales.</p>
+</div>
+</div>
+<h2 id="exponentiated-quadratic-covariance">Exponentiated Quadratic
+Covariance</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_kern/includes/eq-covariance.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_kern/includes/eq-covariance.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>The exponentiated quadratic covariance, also known as the Gaussian
+covariance or the RBF covariance and the squared exponential. Covariance
+between two points is related to the negative exponential of the squared
+distnace between those points. This covariance function can be derived
+in a few different ways: as the infinite limit of a radial basis
+function neural network, as diffusion in the heat equation, as a
+Gaussian filter in <em>Fourier space</em> or as the composition as a
+series of linear filters applied to a base function.</p>
+<p>The covariance takes the following form, <span
+class="math display">\[
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left(-\frac{\left\Vert
+\mathbf{ x}-\mathbf{ x}^\prime \right\Vert_2^2}{2\ell^2}\right)
+\]</span> where <span class="math inline">\(\ell\)</span> is the
+<em>length scale</em> or <em>time scale</em> of the process and <span
+class="math inline">\(\alpha\)</span> represents the overall process
+variance.</p>
 <center>
-<br /><span class="math display">$$\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right)$$</span><br />
+<span class="math display">\[k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha
+\exp\left(-\frac{\left\Vert \mathbf{ x}-\mathbf{ x}^\prime
+\right\Vert_2^2}{2\ell^2}\right)\]</span>
 </center>
 <div class="figure">
 <div id="eq-covariance-plot-figure" class="figure-frame">
@@ -1445,41 +3131,79 @@ <h2 id="exponentiated-quadratic-covariance">Exponentiated Quadratic Covariance</
 </tr>
 </table>
 </div>
-<div id="eq-covariance-plot-magnify" class="magnify" onclick="magnifyFigure(&#39;eq-covariance-plot&#39;)">
+<div id="eq-covariance-plot-magnify" class="magnify"
+onclick="magnifyFigure(&#39;eq-covariance-plot&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="eq-covariance-plot-caption" class="caption-frame">
 <p>Figure: The exponentiated quadratic covariance function.</p>
 </div>
 </div>
-<h2 id="gpss-gaussian-process-summer-school">GPSS: Gaussian Process Summer School</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-summer-school.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gp-summer-school.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<h2 id="gpss-gaussian-process-summer-school">GPSS: Gaussian Process
+Summer School</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-summer-school.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-summer-school.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div style="width:1.5cm;text-align:center">
 
 </div>
-<p>If you’re interested in finding out more about Gaussian processes, you can attend the Gaussian process summer school, or view the lectures and material on line. Details of the school, future events and past events can be found at the website <a href="http://gpss.cc" class="uri">http://gpss.cc</a>.</p>
-<h2 id="gpy-a-gaussian-process-framework-in-python">GPy: A Gaussian Process Framework in Python</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gpy-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gpy-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>Gaussian processes are a flexible tool for non-parametric analysis with uncertainty. The GPy software was started in Sheffield to provide a easy to use interface to GPs. One which allowed the user to focus on the modelling rather than the mathematics.</p>
+<p>If you’re interested in finding out more about Gaussian processes,
+you can attend the Gaussian process summer school, or view the lectures
+and material on line. Details of the school, future events and past
+events can be found at the website <a href="http://gpss.cc"
+class="uri">http://gpss.cc</a>.</p>
+<div class="sourceCode" id="cb69"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>pip install gpy</span></code></pre></div>
+<h2 id="gpy-a-gaussian-process-framework-in-python">GPy: A Gaussian
+Process Framework in Python</h2>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_software/includes/gpy-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/gpy-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>Gaussian processes are a flexible tool for non-parametric analysis
+with uncertainty. The GPy software was started in Sheffield to provide a
+easy to use interface to GPs. One which allowed the user to focus on the
+modelling rather than the mathematics.</p>
 <div class="figure">
 <div id="gpy-software-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/gp/gpy.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gpy.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
-<div id="gpy-software-magnify" class="magnify" onclick="magnifyFigure(&#39;gpy-software&#39;)">
+<div id="gpy-software-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gpy-software&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gpy-software-caption" class="caption-frame">
-<p>Figure: GPy is a BSD licensed software code base for implementing Gaussian process models in Python. It is designed for teaching and modelling. We welcome contributions which can be made through the Github repository <a href="https://github.com/SheffieldML/GPy" class="uri">https://github.com/SheffieldML/GPy</a></p>
-</div>
-</div>
-<p>GPy is a BSD licensed software code base for implementing Gaussian process models in python. This allows GPs to be combined with a wide variety of software libraries.</p>
-<p>The software itself is available on <a href="https://github.com/SheffieldML/GPy">GitHub</a> and the team welcomes contributions.</p>
-<p>The aim for GPy is to be a probabilistic-style programming language, i.e. you specify the model rather than the algorithm. As well as a large range of covariance functions the software allows for non-Gaussian likelihoods, multivariate outputs, dimensionality reduction and approximations for larger data sets.</p>
-<p>The documentation for GPy can be found <a href="https://gpy.readthedocs.io/en/latest/">here</a>.</p>
+<p>Figure: GPy is a BSD licensed software code base for implementing
+Gaussian process models in Python. It is designed for teaching and
+modelling. We welcome contributions which can be made through the GitHub
+repository <a href="https://github.com/SheffieldML/GPy"
+class="uri">https://github.com/SheffieldML/GPy</a></p>
+</div>
+</div>
+<p>GPy is a BSD licensed software code base for implementing Gaussian
+process models in python. This allows GPs to be combined with a wide
+variety of software libraries.</p>
+<p>The software itself is available on <a
+href="https://github.com/SheffieldML/GPy">GitHub</a> and the team
+welcomes contributions.</p>
+<p>The aim for GPy is to be a probabilistic-style programming language,
+i.e., you specify the model rather than the algorithm. As well as a
+large range of covariance functions the software allows for non-Gaussian
+likelihoods, multivariate outputs, dimensionality reduction and
+approximations for larger data sets.</p>
+<p>The documentation for GPy can be found <a
+href="https://gpy.readthedocs.io/en/latest/">here</a>.</p>
 <h2 id="gpy-tutorial">GPy Tutorial</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gpy-tutorial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/gpy-tutorial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpy-tutorial.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpy-tutorial.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip4">
@@ -1492,7 +3216,7 @@ <h2 id="gpy-tutorial">GPy Tutorial</h2>
 <title>
 James Hensman
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/james-hensman.png" clip-path="url(#clip4)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/james-hensman.png" clip-path="url(#clip4)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip5">
@@ -1505,225 +3229,363 @@ <h2 id="gpy-tutorial">GPy Tutorial</h2>
 <title>
 Nicolas Durrande
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/nicolas-durrande2.jpg" clip-path="url(#clip5)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/nicolas-durrande2.jpg" clip-path="url(#clip5)"/>
 </svg>
 </div>
-<p>This GPy tutorial is based on material we share in the Gaussian process summer school for teaching these models <a href="https://gpss.cc" class="uri">https://gpss.cc</a>. It contains material from various members and former members of the Sheffield machine learning group, but particular mention should be made of <a href="https://sites.google.com/site/nicolasdurrandehomepage/">Nicolas Durrande</a> and <a href="https://jameshensman.github.io/">James Hensman</a>, see <a href="http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb" class="uri">http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb</a>.</p>
-<div class="sourceCode" id="cb59"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1"></a><span class="op">%</span>pip install gpy</span></code></pre></div>
-<div class="sourceCode" id="cb60"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb60-1"><a href="#cb60-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py&#39;</span>,<span class="st">&#39;mlai.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb61"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/teaching_plots.py&#39;</span>,<span class="st">&#39;teaching_plots.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb62"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1"></a>urllib.request.urlretrieve(<span class="st">&#39;https://raw.githubusercontent.com/lawrennd/talks/gh-pages/gp_tutorial.py&#39;</span>,<span class="st">&#39;gp_tutorial.py&#39;</span>)</span></code></pre></div>
-<div class="sourceCode" id="cb63"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb63-2"><a href="#cb63-2"></a><span class="im">import</span> GPy</span></code></pre></div>
-<p>To give a feel for the sofware we’ll start by creating an exponentiated quadratic covariance function, <br /><span class="math display">$$
-\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector - \inputVector^\prime}^2}{2\ell^2}\right),
-$$</span><br /> where the length scale is <span class="math inline">ℓ</span> and the variance is <span class="math inline"><em>α</em></span>.</p>
+<p>This GPy tutorial is based on material we share in the Gaussian
+process summer school for teaching these models <a
+href="https://gpss.cc" class="uri">https://gpss.cc</a>. It contains
+material from various members and former members of the Sheffield
+machine learning group, but particular mention should be made of <a
+href="https://sites.google.com/site/nicolasdurrandehomepage/">Nicolas
+Durrande</a> and <a href="https://jameshensman.github.io/">James
+Hensman</a>, see <a
+href="http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb"
+class="uri">http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb</a>.</p>
+<div class="sourceCode" id="cb70"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb70-2"><a href="#cb70-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> GPy</span></code></pre></div>
+<p>To give a feel for the software we’ll start by creating an
+exponentiated quadratic covariance function, <span
+class="math display">\[
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left(-\frac{\left\Vert
+\mathbf{ x}- \mathbf{ x}^\prime \right\Vert_2^2}{2\ell^2}\right),
+\]</span> where the length scale is <span
+class="math inline">\(\ell\)</span> and the variance is <span
+class="math inline">\(\alpha\)</span>.</p>
 <p>To set this up in GPy we create a kernel in the following manner.</p>
-<div class="sourceCode" id="cb64"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1"></a>input_dim<span class="op">=</span><span class="dv">1</span></span>
-<span id="cb64-2"><a href="#cb64-2"></a>alpha <span class="op">=</span> <span class="fl">1.0</span></span>
-<span id="cb64-3"><a href="#cb64-3"></a>lengthscale <span class="op">=</span> <span class="fl">2.0</span></span>
-<span id="cb64-4"><a href="#cb64-4"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim, variance<span class="op">=</span>alpha, lengthscale<span class="op">=</span>lengthscale)</span></code></pre></div>
+<div class="sourceCode" id="cb71"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a>input_dim<span class="op">=</span><span class="dv">1</span></span>
+<span id="cb71-2"><a href="#cb71-2" aria-hidden="true" tabindex="-1"></a>alpha <span class="op">=</span> <span class="fl">1.0</span></span>
+<span id="cb71-3"><a href="#cb71-3" aria-hidden="true" tabindex="-1"></a>lengthscale <span class="op">=</span> <span class="fl">2.0</span></span>
+<span id="cb71-4"><a href="#cb71-4" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim, variance<span class="op">=</span>alpha, lengthscale<span class="op">=</span>lengthscale)</span></code></pre></div>
 <p>That builds a kernel object for us. The kernel can be displayed.</p>
-<div class="sourceCode" id="cb65"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1"></a>display(kern)</span></code></pre></div>
-<p>Or because it’s one dimensional, you can also plot the kernel as a function of its inputs (while the other is fixed).</p>
+<div class="sourceCode" id="cb72"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a>display(kern)</span></code></pre></div>
+<p>Or because it’s one dimensional, you can also plot the kernel as a
+function of its inputs (while the other is fixed).</p>
 <div class="figure">
 <div id="gpy-eq-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-covariance.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-covariance.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gpy-eq-covariance-magnify" class="magnify" onclick="magnifyFigure(&#39;gpy-eq-covariance&#39;)">
+<div id="gpy-eq-covariance-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gpy-eq-covariance&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gpy-eq-covariance-caption" class="caption-frame">
-<p>Figure: The exponentiated quadratic covariance function as plotted by the <code>GPy.kern.plot</code> command.</p>
+<p>Figure: The exponentiated quadratic covariance function as plotted by
+the <code>GPy.kern.plot</code> command.</p>
 </div>
 </div>
-<p>You can set the lengthscale of the covariance to different values and plot the result.</p>
-<div class="sourceCode" id="cb66"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim)     <span class="co"># By default, the parameters are set to 1.</span></span>
-<span id="cb66-2"><a href="#cb66-2"></a>lengthscales <span class="op">=</span> np.asarray([<span class="fl">0.2</span>,<span class="fl">0.5</span>,<span class="fl">1.</span>,<span class="fl">2.</span>,<span class="fl">4.</span>])</span></code></pre></div>
+<p>You can set the length scale of the covariance to different values
+and plot the result.</p>
+<div class="sourceCode" id="cb73"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim)     <span class="co"># By default, the parameters are set to 1.</span></span>
+<span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a>lengthscales <span class="op">=</span> np.asarray([<span class="fl">0.2</span>,<span class="fl">0.5</span>,<span class="fl">1.</span>,<span class="fl">2.</span>,<span class="fl">4.</span>])</span></code></pre></div>
 <div class="figure">
 <div id="gpy-eq-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-covariance-lengthscales.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-covariance-lengthscales.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gpy-eq-covariance-magnify" class="magnify" onclick="magnifyFigure(&#39;gpy-eq-covariance&#39;)">
+<div id="gpy-eq-covariance-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gpy-eq-covariance&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gpy-eq-covariance-caption" class="caption-frame">
-<p>Figure: The exponentiated quadratic covariance function plotted for different lengthscales by <code>GPy.kern.plot</code> command.</p>
+<p>Figure: The exponentiated quadratic covariance function plotted for
+different length scales by <code>GPy.kern.plot</code> command.</p>
 </div>
 </div>
 <h2 id="covariance-functions-in-gpy">Covariance Functions in GPy</h2>
-<p>Many covariance functions are already implemented in GPy. Instead of rbf, try constructing and plotting the following covariance functions: <code>exponential</code>, <code>Matern32</code>, <code>Matern52</code>, <code>Brownian</code>, <code>linear</code>, <code>bias</code>, <code>rbfcos</code>, <code>periodic_Matern32</code>, etc. Some of these covariance functions, such as <code>rbfcos</code>, are not parametrized by a variance and a lengthscale. Furthermore, not all kernels are stationary (i.e., they can’t all be written as <span class="math inline">$\kernelScalar(\inputVector, \inputVector^\prime) = f(\inputVector-\inputVector^\prime)$</span>, see for example the Brownian covariance function). For plotting so it may be interesting to change the value of the fixed input.</p>
-<h2 id="combining-covariance-functions-in-gpy">Combining Covariance Functions in GPy</h2>
-<p>In GPy you can easily combine covariance functions you have created using the sum and product operators, <code>+</code> and <code>*</code>. So, for example, if we wish to combine an exponentiated quadratic covariance with a Matern 5/2 then we can write</p>
-<div class="sourceCode" id="cb67"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
-<span id="cb67-2"><a href="#cb67-2"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
-<span id="cb67-3"><a href="#cb67-3"></a>kern <span class="op">=</span> kern1 <span class="op">+</span> kern2</span>
-<span id="cb67-4"><a href="#cb67-4"></a>display(kern)</span></code></pre></div>
+<p>Many covariance functions are already implemented in GPy. Instead of
+rbf, try constructing and plotting the following covariance functions:
+<code>exponential</code>, <code>Matern32</code>, <code>Matern52</code>,
+<code>Brownian</code>, <code>linear</code>, <code>bias</code>,
+<code>rbfcos</code>, <code>periodic_Matern32</code>, etc. Some of these
+covariance functions, such as <code>rbfcos</code>, are not parametrized
+by a variance and a length scale. Further, not all kernels are
+stationary (i.e., they can’t all be written as <span
+class="math inline">\(k(\mathbf{ x}, \mathbf{ x}^\prime) = f(\mathbf{
+x}-\mathbf{ x}^\prime)\)</span>, see for example the Brownian covariance
+function). So for plotting it may be interesting to change the value of
+the fixed input.</p>
+<h2 id="combining-covariance-functions-in-gpy">Combining Covariance
+Functions in GPy</h2>
+<p>In GPy you can easily combine covariance functions you have created
+using the sum and product operators, <code>+</code> and <code>*</code>.
+So, for example, if we wish to combine an exponentiated quadratic
+covariance with a Matern 5/2 then we can write</p>
+<div class="sourceCode" id="cb74"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
+<span id="cb74-2"><a href="#cb74-2" aria-hidden="true" tabindex="-1"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
+<span id="cb74-3"><a href="#cb74-3" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> kern1 <span class="op">+</span> kern2</span>
+<span id="cb74-4"><a href="#cb74-4" aria-hidden="true" tabindex="-1"></a>display(kern)</span></code></pre></div>
 <div class="figure">
 <div id="gpy-eq-plus-matern52-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-plus-matern52-covariance.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-plus-matern52-covariance.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gpy-eq-plus-matern52-covariance-magnify" class="magnify" onclick="magnifyFigure(&#39;gpy-eq-plus-matern52-covariance&#39;)">
+<div id="gpy-eq-plus-matern52-covariance-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gpy-eq-plus-matern52-covariance&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="gpy-eq-plus-matern52-covariance-caption" class="caption-frame">
-<p>Figure: A combination of the exponentiated quadratic covariance plus the Matern <span class="math inline">5/2</span> covariance.</p>
+<p>Figure: A combination of the exponentiated quadratic covariance plus
+the Matern <span class="math inline">\(5/2\)</span> covariance.</p>
 </div>
 </div>
-<p>Or if we wanted to multiply them we can write</p>
-<div class="sourceCode" id="cb68"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
-<span id="cb68-2"><a href="#cb68-2"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
-<span id="cb68-3"><a href="#cb68-3"></a>kern <span class="op">=</span> kern1 <span class="op">*</span> kern2</span>
-<span id="cb68-4"><a href="#cb68-4"></a>display(kern)</span></code></pre></div>
+<p>Or if we wanted to multiply them, we can write</p>
+<div class="sourceCode" id="cb75"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
+<span id="cb75-2"><a href="#cb75-2" aria-hidden="true" tabindex="-1"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
+<span id="cb75-3"><a href="#cb75-3" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> kern1 <span class="op">*</span> kern2</span>
+<span id="cb75-4"><a href="#cb75-4" aria-hidden="true" tabindex="-1"></a>display(kern)</span></code></pre></div>
 <div class="figure">
 <div id="gpy-eq-times-matern52-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-times-matern52-covariance.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-times-matern52-covariance.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="gpy-eq-times-matern52-covariance-magnify" class="magnify" onclick="magnifyFigure(&#39;gpy-eq-times-matern52-covariance&#39;)">
+<div id="gpy-eq-times-matern52-covariance-magnify" class="magnify"
+onclick="magnifyFigure(&#39;gpy-eq-times-matern52-covariance&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="gpy-eq-times-matern52-covariance-caption" class="caption-frame">
-<p>Figure: A combination of the exponentiated quadratic covariance multiplied by the Matern <span class="math inline">5/2</span> covariance.</p>
+<div id="gpy-eq-times-matern52-covariance-caption"
+class="caption-frame">
+<p>Figure: A combination of the exponentiated quadratic covariance
+multiplied by the Matern <span class="math inline">\(5/2\)</span>
+covariance.</p>
 </div>
 </div>
-<p>You can learn about how to implement <a href="https://gpy.readthedocs.io/en/latest/tuto_creating_new_kernels.html">new kernel objects in GPy here</a>.</p>
+<p>You can learn about how to implement <a
+href="https://gpy.readthedocs.io/en/latest/tuto_creating_new_kernels.html">new
+kernel objects in GPy here</a>.</p>
 <div class="figure">
 <div id="nicolas-durrande-on-kernel-design-figure" class="figure-frame">
 <iframe width="600" height="450" src="https://www.youtube.com/embed/-sY8zW3Om1Y?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
-<div id="nicolas-durrande-on-kernel-design-magnify" class="magnify" onclick="magnifyFigure(&#39;nicolas-durrande-on-kernel-design&#39;)">
+<div id="nicolas-durrande-on-kernel-design-magnify" class="magnify"
+onclick="magnifyFigure(&#39;nicolas-durrande-on-kernel-design&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
-<div id="nicolas-durrande-on-kernel-design-caption" class="caption-frame">
-<p>Figure: Designing the covariance function for your Gaussian process is a key place in which you introduce your understanding of the data problem. To learn more about the design of covariance functions, see this talk from Nicolas Durrande at GPSS in 2016.</p>
-</div>
-</div>
-<h2 id="a-gaussian-process-regression-model">A Gaussian Process Regression Model</h2>
-<p>We will now combine the Gaussian process prior with some data to form a GP regression model with GPy. We will generate data from the function <br /><span class="math display">$$
-\mappingFunction( \inputScalar ) = − \cos(\pi \inputScalar ) + \sin(4\pi \inputScalar )
-$$</span><br /> over the domain <span class="math inline">[0, 1]</span>, adding some noise to gives <br /><span class="math display">$$
-\dataScalar(\inputScalar) = \mappingFunction(\inputScalar) + \noiseScalar,
-$$</span><br /> with the noise being Gaussian distributed, <span class="math inline">$\noiseScalar \sim \gaussianSamp{0}{0.01}$</span>.</p>
-<div class="sourceCode" id="cb69"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb69-1"><a href="#cb69-1"></a>X <span class="op">=</span> np.linspace(<span class="fl">0.05</span>,<span class="fl">0.95</span>,<span class="dv">10</span>)[:,np.newaxis]</span>
-<span id="cb69-2"><a href="#cb69-2"></a>Y <span class="op">=</span> <span class="op">-</span>np.cos(np.pi<span class="op">*</span>X) <span class="op">+</span> np.sin(<span class="dv">4</span><span class="op">*</span>np.pi<span class="op">*</span>X) <span class="op">+</span> np.random.normal(loc<span class="op">=</span><span class="fl">0.0</span>, scale<span class="op">=</span><span class="fl">0.1</span>, size<span class="op">=</span>(<span class="dv">10</span>,<span class="dv">1</span>))</span></code></pre></div>
+<div id="nicolas-durrande-on-kernel-design-caption"
+class="caption-frame">
+<p>Figure: Designing the covariance function for your Gaussian process
+is a key place in which you introduce your understanding of the data
+problem. To learn more about the design of covariance functions, see
+this talk from Nicolas Durrande at GPSS in 2016.</p>
+</div>
+</div>
+<h2 id="a-gaussian-process-regression-model">A Gaussian Process
+Regression Model</h2>
+<p>We will now combine the Gaussian process prior with some data to form
+a GP regression model with GPy. We will generate data from the function
+<span class="math display">\[
+f( x) = − \cos(\pi x) + \sin(4\pi x)
+\]</span> over the domain <span class="math inline">\([0, 1]\)</span>,
+adding some noise to gives <span class="math display">\[
+y(x) = f(x) + \epsilon,
+\]</span> with the noise being Gaussian distributed, <span
+class="math inline">\(\epsilon\sim
+\mathcal{N}\left(0,0.01\right)\)</span>.</p>
+<div class="sourceCode" id="cb76"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> np.linspace(<span class="fl">0.05</span>,<span class="fl">0.95</span>,<span class="dv">10</span>)[:,np.newaxis]</span>
+<span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> <span class="op">-</span>np.cos(np.pi<span class="op">*</span>X) <span class="op">+</span> np.sin(<span class="dv">4</span><span class="op">*</span>np.pi<span class="op">*</span>X) <span class="op">+</span> np.random.normal(loc<span class="op">=</span><span class="fl">0.0</span>, scale<span class="op">=</span><span class="fl">0.1</span>, size<span class="op">=</span>(<span class="dv">10</span>,<span class="dv">1</span>))</span></code></pre></div>
 <div class="figure">
 <div id="noisy-sine-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="noisy-sine-magnify" class="magnify" onclick="magnifyFigure(&#39;noisy-sine&#39;)">
+<div id="noisy-sine-magnify" class="magnify"
+onclick="magnifyFigure(&#39;noisy-sine&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="noisy-sine-caption" class="caption-frame">
-<p>Figure: Data from the noisy sine wave for fitting with a GPy model.</p>
-</div>
-</div>
-<p>A GP regression model based on an exponentiated quadratic covariance function can be defined by first defining a covariance function.</p>
-<div class="sourceCode" id="cb70"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb70-1"><a href="#cb70-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span><span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">1.</span>)</span></code></pre></div>
-<p>And then combining it with the data to form a Gaussian process model.</p>
-<div class="sourceCode" id="cb71"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb71-1"><a href="#cb71-1"></a>model <span class="op">=</span> GPy.models.GPRegression(X,Y,kern)</span></code></pre></div>
-<p>Just as for the covariance function object, we can find out about the model using the command <code>display(model)</code>.</p>
-<div class="sourceCode" id="cb72"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb72-1"><a href="#cb72-1"></a>display(model)</span></code></pre></div>
-<p>Note that by default the model includes some observation noise with variance 1. We can see the posterior mean prediction and visualize the marginal posterior variances using <code>model.plot()</code>.</p>
+<p>Figure: Data from the noisy sine wave for fitting with a GPy
+model.</p>
+</div>
+</div>
+<p>A GP regression model based on an exponentiated quadratic covariance
+function can be defined by first defining a covariance function.</p>
+<div class="sourceCode" id="cb77"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span><span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">1.</span>)</span></code></pre></div>
+<p>And then combining it with the data to form a Gaussian process
+model.</p>
+<div class="sourceCode" id="cb78"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> GPy.models.GPRegression(X,Y,kern)</span></code></pre></div>
+<p>Just as for the covariance function object, we can find out about the
+model using the command <code>display(model)</code>.</p>
+<div class="sourceCode" id="cb79"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a>display(model)</span></code></pre></div>
+<p>Note that by default the model includes some observation noise with
+variance 1. We can see the posterior mean prediction and visualize the
+marginal posterior variances using <code>model.plot()</code>.</p>
 <div class="figure">
 <div id="noisy-sine-gp-fit-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine-gp-fit.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-fit.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="noisy-sine-gp-fit-magnify" class="magnify" onclick="magnifyFigure(&#39;noisy-sine-gp-fit&#39;)">
+<div id="noisy-sine-gp-fit-magnify" class="magnify"
+onclick="magnifyFigure(&#39;noisy-sine-gp-fit&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="noisy-sine-gp-fit-caption" class="caption-frame">
-<p>Figure: A Gaussian process fit to the noisy sine data. Here the parameters of the process and the covariance function haven’t yet been optimized.</p>
-</div>
-</div>
-<p>You can also look directly at the predictions for the model using.</p>
-<div class="sourceCode" id="cb73"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb73-1"><a href="#cb73-1"></a>Xstar <span class="op">=</span> np.linspace(<span class="dv">0</span>, <span class="dv">10</span>, <span class="dv">100</span>)[:, np.newaxis]</span>
-<span id="cb73-2"><a href="#cb73-2"></a>Ystar, Vstar <span class="op">=</span> model.predict(Xstar)</span></code></pre></div>
-<p>Which gives you the mean (<code>Ystar</code>), the variance (<code>Vstar</code>) at the locations given by <code>Xstar</code>.</p>
-<h2 id="covariance-function-parameter-estimation">Covariance Function Parameter Estimation</h2>
-<p>As we have seen during the lectures, the parameters values can be estimated by maximizing the likelihood of the observations. Since we don’t want one of the variance to become negative during the optimization, we can constrain all parameters to be positive before running the optimisation.</p>
-<div class="sourceCode" id="cb74"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb74-1"><a href="#cb74-1"></a>model.constrain_positive()</span></code></pre></div>
-<p>The warnings are because the parameters are already constrained by default, the software is warning us that they are being reconstrained.</p>
-<p>Now we can optimize the model using the <code>model.optimize()</code> method. Here we switch messages on, which allows us to see the progession of the optimization.</p>
-<div class="sourceCode" id="cb75"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb75-1"><a href="#cb75-1"></a>model.optimize(messages<span class="op">=</span><span class="va">True</span>)</span></code></pre></div>
-<p>By default the optimization is using a limited memory BFGS optimizer <span class="citation" data-cites="Byrd:lbfgsb95">(Byrd, Lu, and Nocedal 1995)</span>.</p>
-<p>Once again we can display the model, now to see how the parameters have changed.</p>
-<div class="sourceCode" id="cb76"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb76-1"><a href="#cb76-1"></a>display(model)</span></code></pre></div>
-<p>The lengthscale is much smaller, as well as the noise level. The variance of the exponentiated quadratic has also reduced.</p>
+<p>Figure: A Gaussian process fit to the noisy sine data. Here the
+parameters of the process and the covariance function haven’t yet been
+optimized.</p>
+</div>
+</div>
+<p>You can also look directly at the predictions for the model
+using.</p>
+<div class="sourceCode" id="cb80"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a>Xstar <span class="op">=</span> np.linspace(<span class="dv">0</span>, <span class="dv">10</span>, <span class="dv">100</span>)[:, np.newaxis]</span>
+<span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a>Ystar, Vstar <span class="op">=</span> model.predict(Xstar)</span></code></pre></div>
+<p>Which gives you the mean (<code>Ystar</code>), the variance
+(<code>Vstar</code>) at the locations given by <code>Xstar</code>.</p>
+<h2 id="covariance-function-parameter-estimation">Covariance Function
+Parameter Estimation</h2>
+<p>As we have seen during the lectures, the parameters values can be
+estimated by maximizing the likelihood of the observations. Since we
+don’t want any of the variances to become negative during the
+optimization, we can constrain all parameters to be positive before
+running the optimization.</p>
+<div class="sourceCode" id="cb81"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb81-1"><a href="#cb81-1" aria-hidden="true" tabindex="-1"></a>model.constrain_positive()</span></code></pre></div>
+<p>The warnings are because the parameters are already constrained by
+default, the software is warning us that they are being
+reconstrained.</p>
+<p>Now we can optimize the model using the <code>model.optimize()</code>
+method. Here we switch messages on, which allows us to see the
+progression of the optimization.</p>
+<div class="sourceCode" id="cb82"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a>model.optimize(messages<span class="op">=</span><span class="va">True</span>)</span></code></pre></div>
+<p>By default, the optimization is using a limited memory BFGS optimizer
+<span class="citation" data-cites="Byrd:lbfgsb95">(Byrd et al.,
+1995)</span>.</p>
+<p>Once again, we can display the model, now to see how the parameters
+have changed.</p>
+<div class="sourceCode" id="cb83"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb83-1"><a href="#cb83-1" aria-hidden="true" tabindex="-1"></a>display(model)</span></code></pre></div>
+<p>The length scale is much smaller, as well as the noise level. The
+variance of the exponentiated quadratic has also reduced.</p>
 <div class="figure">
 <div id="noisy-sine-gp-optimized-fit-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine-gp-optimized-fit.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-optimized-fit.svg" width="80%" style=" ">
 </object>
 </div>
-<div id="noisy-sine-gp-optimized-fit-magnify" class="magnify" onclick="magnifyFigure(&#39;noisy-sine-gp-optimized-fit&#39;)">
+<div id="noisy-sine-gp-optimized-fit-magnify" class="magnify"
+onclick="magnifyFigure(&#39;noisy-sine-gp-optimized-fit&#39;)">
 <img class="img-button" src="{{ '/assets/images/Magnify_Large.svg' | relative_url }}" style="width:1.5ex">
 </div>
 <div id="noisy-sine-gp-optimized-fit-caption" class="caption-frame">
-<p>Figure: A Gaussian process fit to the noisy sine data with parameters optimized.</p>
+<p>Figure: A Gaussian process fit to the noisy sine data with parameters
+optimized.</p>
 </div>
 </div>
 <h2 id="review">Review</h2>
 <h2 id="other-software">Other Software</h2>
-<p><span style="text-align:right"><span class="editsection-bracket" style="">[</span><span class="editsection" style=""><a href="https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/other-gp-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/talks/edit/gh-pages/_gp/includes/other-gp-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span></span></p>
-<p>GPy has inspired other software solutions, first of all <a href="https://github.com/GPflow/GPflow">GPflow</a>, which uses Tensor Flow’s automatic differentiation engine to allow rapid prototyping of new covariance functions and algorithms. More recently, <a href="https://github.com/cornellius-gp/gpytorch">GPyTorch</a> uses PyTorch for the same purpose.</p>
-<p>The Probabilistic programming language <a href="https://pyro.ai/">pyro</a> also has GP support.</p>
+<div style="text-align:right">
+<span class="editsection-bracket" style="">[</span><span
+class="editsection"
+style=""><a href="https://github.com/lawrennd/snippets/edit/main/_gp/includes/other-gp-software.md" target="_blank" onclick="ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/other-gp-software.md', 13);">edit</a></span><span class="editsection-bracket" style="">]</span>
+</div>
+<p>GPy has inspired other software solutions, first of all <a
+href="https://github.com/GPflow/GPflow">GPflow</a>, which uses Tensor
+Flow’s automatic differentiation engine to allow rapid prototyping of
+new covariance functions and algorithms. More recently, <a
+href="https://github.com/cornellius-gp/gpytorch">GPyTorch</a> uses
+PyTorch for the same purpose.</p>
+<p>The Probabilistic programming language <a
+href="https://pyro.ai/">pyro</a> also has GP support.</p>
 <h2 id="further-reading-1">Further Reading</h2>
 <ul>
-<li><p>Chapter 2 of <span class="citation" data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
-<li><p>Rest of <span class="citation" data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
-<li><p>All of <span class="citation" data-cites="MacKay:bayesian92">MacKay (1992)</span></p></li>
+<li><p>Chapter 2 of <span class="citation"
+data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
+<li><p>Rest of <span class="citation" data-cites="Neal:bayesian94">Neal
+(1994)</span></p></li>
+<li><p>All of <span class="citation"
+data-cites="MacKay:bayesian92">MacKay (1992)</span></p></li>
 </ul>
 <h2 id="thanks">Thanks!</h2>
-<p>For more information on these subjects and more you might want to check the following resources.</p>
+<p>For more information on these subjects and more you might want to
+check the following resources.</p>
 <ul>
 <li>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></li>
-<li>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></li>
-<li>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></li>
-<li>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
+<li>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></li>
+<li>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></li>
+<li>blog: <a
+href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></li>
 </ul>
 <h1 class="unnumbered" id="references">References</h1>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Andrade:consistent14">
-<p>Andrade-Pacheco, Ricardo, Martin Mubangizi, John Quinn, and Neil D. Lawrence. 2014. “Consistent Mapping of Government Malaria Records Across a Changing Territory Delimitation.” <em>Malaria Journal</em> 13 (Suppl 1). <a href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>.</p>
-</div>
-<div id="ref-Byrd:lbfgsb95">
-<p>Byrd, Richard H., Peihuang Lu, and Jorge Nocedal. 1995. “A Limited Memory Algorithm for Bound Constrained Optimization.” <em>SIAM Journal on Scientific and Statistical Computing</em> 16 (5): 1190–1208.</p>
-</div>
-<div id="ref-Cho:deep09">
-<p>Cho, Youngmin, and Lawrence K. Saul. 2009. “Kernel Methods for Deep Learning.” In <em>Advances in Neural Information Processing Systems 22</em>, edited by Y. Bengio, D. Schuurmans, J. D. Lafferty, C. K. I. Williams, and A. Culotta, 342–50. Curran Associates, Inc. <a href="http://papers.nips.cc/paper/3628-kernel-methods-for-deep-learning.pdf">http://papers.nips.cc/paper/3628-kernel-methods-for-deep-learning.pdf</a>.</p>
-</div>
-<div id="ref-Gething:hmis06">
-<p>Gething, Peter W., Abdisalan M. Noor, Priscilla W. Gikandi, Esther A. A. Ogara, Simon I. Hay, Mark S. Nixon, Robert W. Snow, and Peter M. Atkinson. 2006. “Improving Imperfect Data from Health Management Information Systems in Africa Using Space–Time Geostatistics.” <em>PLoS Medicine</em> 3 (6). <a href="https://doi.org/10.1371/journal.pmed.0030271">https://doi.org/10.1371/journal.pmed.0030271</a>.</p>
-</div>
-<div id="ref-Ioffe:batch15">
-<p>Ioffe, Sergey, and Christian Szegedy. 2015. “Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.” In <em>Proceedings of the 32nd International Conference on Machine Learning</em>, edited by Francis Bach and David Blei, 37:448–56. Proceedings of Machine Learning Research. Lille, France: PMLR. <a href="http://proceedings.mlr.press/v37/ioffe15.html">http://proceedings.mlr.press/v37/ioffe15.html</a>.</p>
-</div>
-<div id="ref-Laplace:essai14">
-<p>Laplace, Pierre Simon. 1814. <em>Essai Philosophique Sur Les Probabilités</em>. 2nd ed. Paris: Courcier.</p>
-</div>
-<div id="ref-MacKay:bayesian92">
-<p>MacKay, David J. C. 1992. “Bayesian Methods for Adaptive Models.” PhD thesis, California Institute of Technology.</p>
-</div>
-<div id="ref-Mubangizi:malaria14">
-<p>Mubangizi, Martin, Ricardo Andrade-Pacheco, Michael Thomas Smith, John Quinn, and Neil D. Lawrence. 2014. “Malaria Surveillance with Multiple Data Sources Using Gaussian Process Models.” In <em>1st International Conference on the Use of Mobile ICT in Africa</em>.</p>
-</div>
-<div id="ref-Neal:bayesian94">
-<p>Neal, Radford M. 1994. “Bayesian Learning for Neural Networks.” PhD thesis, Dept. of Computer Science, University of Toronto.</p>
-</div>
-<div id="ref-Rasmussen:book06">
-<p>Rasmussen, Carl Edward, and Christopher K. I. Williams. 2006. <em>Gaussian Processes for Machine Learning</em>. Cambridge, MA: mit.</p>
-</div>
-<div id="ref-Rogers:book11">
-<p>Rogers, Simon, and Mark Girolami. 2011. <em>A First Course in Machine Learning</em>. CRC Press.</p>
-</div>
-<div id="ref-Tipping:probpca99">
-<p>Tipping, Michael E., and Christopher M. Bishop. 1999. “Probabilistic Principal Component Analysis.” <em>Journal of the Royal Statistical Society, B</em> 6 (3): 611–22. <a href="https://doi.org/doi:10.1111/1467-9868.00196">https://doi.org/doi:10.1111/1467-9868.00196</a>.</p>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Andrade:consistent14" class="csl-entry" role="listitem">
+Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014.
+Consistent mapping of government malaria records across a changing
+territory delimitation. Malaria Journal 13. <a
+href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>
+</div>
+<div id="ref-Byrd:lbfgsb95" class="csl-entry" role="listitem">
+Byrd, R.H., Lu, P., Nocedal, J., 1995. A limited memory algorithm for
+bound constrained optimization. SIAM Journal on Scientific and
+Statistical Computing 16, 1190–1208.
+</div>
+<div id="ref-Cho:deep09" class="csl-entry" role="listitem">
+Cho, Y., Saul, L.K., 2009. <a
+href="http://papers.nips.cc/paper/3628-kernel-methods-for-deep-learning.pdf">Kernel
+methods for deep learning</a>, in: Bengio, Y., Schuurmans, D., Lafferty,
+J.D., Williams, C.K.I., Culotta, A. (Eds.), Advances in Neural
+Information Processing Systems 22. Curran Associates, Inc., pp. 342–350.
+</div>
+<div id="ref-Gething:hmis06" class="csl-entry" role="listitem">
+Gething, P.W., Noor, A.M., Gikandi, P.W., Ogara, E.A.A., Hay, S.I.,
+Nixon, M.S., Snow, R.W., Atkinson, P.M., 2006. Improving imperfect data
+from health management information systems in <span>A</span>frica using
+space–time geostatistics. PLoS Medicine 3. <a
+href="https://doi.org/10.1371/journal.pmed.0030271">https://doi.org/10.1371/journal.pmed.0030271</a>
+</div>
+<div id="ref-Ioffe:batch15" class="csl-entry" role="listitem">
+Ioffe, S., Szegedy, C., 2015. <a
+href="http://proceedings.mlr.press/v37/ioffe15.html">Batch
+normalization: Accelerating deep network training by reducing internal
+covariate shift</a>, in: Bach, F., Blei, D. (Eds.), Proceedings of the
+32nd International Conference on Machine Learning, Proceedings of
+Machine Learning Research. PMLR, Lille, France, pp. 448–456.
+</div>
+<div id="ref-Laplace:essai14" class="csl-entry" role="listitem">
+Laplace, P.S., 1814. Essai philosophique sur les probabilités, 2nd ed.
+Courcier, Paris.
+</div>
+<div id="ref-MacKay:bayesian92" class="csl-entry" role="listitem">
+MacKay, D.J.C., 1992. Bayesian methods for adaptive models (PhD thesis).
+California Institute of Technology.
+</div>
+<div id="ref-Mubangizi:malaria14" class="csl-entry" role="listitem">
+Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence,
+N.D., 2014. Malaria surveillance with multiple data sources using
+<span>Gaussian</span> process models, in: 1st International Conference
+on the Use of Mobile <span>ICT</span> in Africa.
+</div>
+<div id="ref-Neal:bayesian94" class="csl-entry" role="listitem">
+Neal, R.M., 1994. Bayesian learning for neural networks (PhD thesis).
+Dept. of Computer Science, University of Toronto.
+</div>
+<div id="ref-Rasmussen:book06" class="csl-entry" role="listitem">
+Rasmussen, C.E., Williams, C.K.I., 2006. Gaussian processes for machine
+learning. mit, Cambridge, MA.
+</div>
+<div id="ref-Rogers:book11" class="csl-entry" role="listitem">
+Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC
+Press.
+</div>
+<div id="ref-Tipping:probpca99" class="csl-entry" role="listitem">
+Tipping, M.E., Bishop, C.M., 1999. Probabilistic principal component
+analysis. Journal of the Royal Statistical Society, B 6, 611–622. <a
+href="https://doi.org/doi:10.1111/1467-9868.00196">https://doi.org/doi:10.1111/1467-9868.00196</a>
 </div>
 </div>
 
diff --git a/_notebooks/01-what-is-machine-learning.ipynb b/_notebooks/01-what-is-machine-learning.ipynb
index b17ff89..99396c6 100644
--- a/_notebooks/01-what-is-machine-learning.ipynb
+++ b/_notebooks/01-what-is-machine-learning.ipynb
@@ -4,13 +4,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "What is Machine Learning?\n",
-    "=========================\n",
+    "# What is Machine Learning?\n",
     "\n",
     "### [Neil D. Lawrence](http://inverseprobability.com), Amazon Cambridge\n",
     "\n",
-    "and University of Sheffield \\#\\#\\# 2019-06-03"
-   ]
+    "and University of Sheffield\n",
+    "\n",
+    "### 2019-06-03"
+   ],
+   "id": "f21d883d-ceda-417f-8556-413a1032834d"
   },
   {
    "cell_type": "markdown",
@@ -21,309 +23,24 @@
     "prediction function and the objective function. We don’t so much focus\n",
     "on the derivation of particular algorithms, but more the general\n",
     "principles involved to give an idea of the machine learning *landscape*."
-   ]
+   ],
+   "id": "689b2b4c-695b-4145-b2e5-d0ca1953fb7c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "$$\n",
-    "\\newcommand{\\tk}[1]{}\n",
-    "\\newcommand{\\Amatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\KL}[2]{\\text{KL}\\left( #1\\,\\|\\,#2 \\right)}\n",
-    "\\newcommand{\\Kaast}{\\kernelMatrix_{\\mathbf{ \\ast}\\mathbf{ \\ast}}}\n",
-    "\\newcommand{\\Kastu}{\\kernelMatrix_{\\mathbf{ \\ast} \\inducingVector}}\n",
-    "\\newcommand{\\Kff}{\\kernelMatrix_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kfu}{\\kernelMatrix_{\\mappingFunctionVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuast}{\\kernelMatrix_{\\inducingVector \\bf\\ast}}\n",
-    "\\newcommand{\\Kuf}{\\kernelMatrix_{\\inducingVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kuu}{\\kernelMatrix_{\\inducingVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuui}{\\Kuu^{-1}}\n",
-    "\\newcommand{\\Qaast}{\\mathbf{Q}_{\\bf \\ast \\ast}}\n",
-    "\\newcommand{\\Qastf}{\\mathbf{Q}_{\\ast \\mappingFunction}}\n",
-    "\\newcommand{\\Qfast}{\\mathbf{Q}_{\\mappingFunctionVector \\bf \\ast}}\n",
-    "\\newcommand{\\Qff}{\\mathbf{Q}_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\aMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\aScalar}{a}\n",
-    "\\newcommand{\\aVector}{\\mathbf{a}}\n",
-    "\\newcommand{\\acceleration}{a}\n",
-    "\\newcommand{\\bMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\bScalar}{b}\n",
-    "\\newcommand{\\bVector}{\\mathbf{b}}\n",
-    "\\newcommand{\\basisFunc}{\\phi}\n",
-    "\\newcommand{\\basisFuncVector}{\\boldsymbol{ \\basisFunc}}\n",
-    "\\newcommand{\\basisFunction}{\\phi}\n",
-    "\\newcommand{\\basisLocation}{\\mu}\n",
-    "\\newcommand{\\basisMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\basisScalar}{\\basisFunction}\n",
-    "\\newcommand{\\basisVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\activationFunction}{\\phi}\n",
-    "\\newcommand{\\activationMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\activationScalar}{\\basisFunction}\n",
-    "\\newcommand{\\activationVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\bigO}{\\mathcal{O}}\n",
-    "\\newcommand{\\binomProb}{\\pi}\n",
-    "\\newcommand{\\cMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\cbasisMatrix}{\\hat{\\boldsymbol{ \\Phi}}}\n",
-    "\\newcommand{\\cdataMatrix}{\\hat{\\dataMatrix}}\n",
-    "\\newcommand{\\cdataScalar}{\\hat{\\dataScalar}}\n",
-    "\\newcommand{\\cdataVector}{\\hat{\\dataVector}}\n",
-    "\\newcommand{\\centeredKernelMatrix}{\\mathbf{ \\MakeUppercase{\\centeredKernelScalar}}}\n",
-    "\\newcommand{\\centeredKernelScalar}{b}\n",
-    "\\newcommand{\\centeredKernelVector}{\\centeredKernelScalar}\n",
-    "\\newcommand{\\centeringMatrix}{\\mathbf{H}}\n",
-    "\\newcommand{\\chiSquaredDist}[2]{\\chi_{#1}^{2}\\left(#2\\right)}\n",
-    "\\newcommand{\\chiSquaredSamp}[1]{\\chi_{#1}^{2}}\n",
-    "\\newcommand{\\conditionalCovariance}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\coregionalizationMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\coregionalizationScalar}{b}\n",
-    "\\newcommand{\\coregionalizationVector}{\\mathbf{ \\coregionalizationScalar}}\n",
-    "\\newcommand{\\covDist}[2]{\\text{cov}_{#2}\\left(#1\\right)}\n",
-    "\\newcommand{\\covSamp}[1]{\\text{cov}\\left(#1\\right)}\n",
-    "\\newcommand{\\covarianceScalar}{c}\n",
-    "\\newcommand{\\covarianceVector}{\\mathbf{ \\covarianceScalar}}\n",
-    "\\newcommand{\\covarianceMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\covarianceMatrixTwo}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\croupierScalar}{s}\n",
-    "\\newcommand{\\croupierVector}{\\mathbf{ \\croupierScalar}}\n",
-    "\\newcommand{\\croupierMatrix}{\\mathbf{ \\MakeUppercase{\\croupierScalar}}}\n",
-    "\\newcommand{\\dataDim}{p}\n",
-    "\\newcommand{\\dataIndex}{i}\n",
-    "\\newcommand{\\dataIndexTwo}{j}\n",
-    "\\newcommand{\\dataMatrix}{\\mathbf{Y}}\n",
-    "\\newcommand{\\dataScalar}{y}\n",
-    "\\newcommand{\\dataSet}{\\mathcal{D}}\n",
-    "\\newcommand{\\dataStd}{\\sigma}\n",
-    "\\newcommand{\\dataVector}{\\mathbf{ \\dataScalar}}\n",
-    "\\newcommand{\\decayRate}{d}\n",
-    "\\newcommand{\\degreeMatrix}{\\mathbf{ \\MakeUppercase{\\degreeScalar}}}\n",
-    "\\newcommand{\\degreeScalar}{d}\n",
-    "\\newcommand{\\degreeVector}{\\mathbf{ \\degreeScalar}}\n",
-    "\\newcommand{\\diag}[1]{\\text{diag}\\left(#1\\right)}\n",
-    "\\newcommand{\\diagonalMatrix}{\\mathbf{D}}\n",
-    "\\newcommand{\\diff}[2]{\\frac{\\text{d}#1}{\\text{d}#2}}\n",
-    "\\newcommand{\\diffTwo}[2]{\\frac{\\text{d}^2#1}{\\text{d}#2^2}}\n",
-    "\\newcommand{\\displacement}{x}\n",
-    "\\newcommand{\\displacementVector}{\\textbf{\\displacement}}\n",
-    "\\newcommand{\\distanceMatrix}{\\mathbf{ \\MakeUppercase{\\distanceScalar}}}\n",
-    "\\newcommand{\\distanceScalar}{d}\n",
-    "\\newcommand{\\distanceVector}{\\mathbf{ \\distanceScalar}}\n",
-    "\\newcommand{\\eigenvaltwo}{\\ell}\n",
-    "\\newcommand{\\eigenvaltwoMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\eigenvaltwoVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\eigenvalue}{\\lambda}\n",
-    "\\newcommand{\\eigenvalueMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\eigenvalueVector}{\\boldsymbol{ \\lambda}}\n",
-    "\\newcommand{\\eigenvector}{\\mathbf{ \\eigenvectorScalar}}\n",
-    "\\newcommand{\\eigenvectorMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\eigenvectorScalar}{u}\n",
-    "\\newcommand{\\eigenvectwo}{\\mathbf{v}}\n",
-    "\\newcommand{\\eigenvectwoMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\eigenvectwoScalar}{v}\n",
-    "\\newcommand{\\entropy}[1]{\\mathcal{H}\\left(#1\\right)}\n",
-    "\\newcommand{\\errorFunction}{E}\n",
-    "\\newcommand{\\expDist}[2]{\\left<#1\\right>_{#2}}\n",
-    "\\newcommand{\\expSamp}[1]{\\left<#1\\right>}\n",
-    "\\newcommand{\\expectation}[1]{\\left\\langle #1 \\right\\rangle }\n",
-    "\\newcommand{\\expectationDist}[2]{\\left\\langle #1 \\right\\rangle _{#2}}\n",
-    "\\newcommand{\\expectedDistanceMatrix}{\\mathcal{D}}\n",
-    "\\newcommand{\\eye}{\\mathbf{I}}\n",
-    "\\newcommand{\\fantasyDim}{r}\n",
-    "\\newcommand{\\fantasyMatrix}{\\mathbf{ \\MakeUppercase{\\fantasyScalar}}}\n",
-    "\\newcommand{\\fantasyScalar}{z}\n",
-    "\\newcommand{\\fantasyVector}{\\mathbf{ \\fantasyScalar}}\n",
-    "\\newcommand{\\featureStd}{\\varsigma}\n",
-    "\\newcommand{\\gammaCdf}[3]{\\mathcal{GAMMA CDF}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaDist}[3]{\\mathcal{G}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaSamp}[2]{\\mathcal{G}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\gaussianDist}[3]{\\mathcal{N}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gaussianSamp}[2]{\\mathcal{N}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\given}{|}\n",
-    "\\newcommand{\\half}{\\frac{1}{2}}\n",
-    "\\newcommand{\\heaviside}{H}\n",
-    "\\newcommand{\\hiddenMatrix}{\\mathbf{ \\MakeUppercase{\\hiddenScalar}}}\n",
-    "\\newcommand{\\hiddenScalar}{h}\n",
-    "\\newcommand{\\hiddenVector}{\\mathbf{ \\hiddenScalar}}\n",
-    "\\newcommand{\\identityMatrix}{\\eye}\n",
-    "\\newcommand{\\inducingInputScalar}{z}\n",
-    "\\newcommand{\\inducingInputVector}{\\mathbf{ \\inducingInputScalar}}\n",
-    "\\newcommand{\\inducingInputMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\inducingScalar}{u}\n",
-    "\\newcommand{\\inducingVector}{\\mathbf{ \\inducingScalar}}\n",
-    "\\newcommand{\\inducingMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\inlineDiff}[2]{\\text{d}#1/\\text{d}#2}\n",
-    "\\newcommand{\\inputDim}{q}\n",
-    "\\newcommand{\\inputMatrix}{\\mathbf{X}}\n",
-    "\\newcommand{\\inputScalar}{x}\n",
-    "\\newcommand{\\inputSpace}{\\mathcal{X}}\n",
-    "\\newcommand{\\inputVals}{\\inputVector}\n",
-    "\\newcommand{\\inputVector}{\\mathbf{ \\inputScalar}}\n",
-    "\\newcommand{\\iterNum}{k}\n",
-    "\\newcommand{\\kernel}{\\kernelScalar}\n",
-    "\\newcommand{\\kernelMatrix}{\\mathbf{K}}\n",
-    "\\newcommand{\\kernelScalar}{k}\n",
-    "\\newcommand{\\kernelVector}{\\mathbf{ \\kernelScalar}}\n",
-    "\\newcommand{\\kff}{\\kernelScalar_{\\mappingFunction \\mappingFunction}}\n",
-    "\\newcommand{\\kfu}{\\kernelVector_{\\mappingFunction \\inducingScalar}}\n",
-    "\\newcommand{\\kuf}{\\kernelVector_{\\inducingScalar \\mappingFunction}}\n",
-    "\\newcommand{\\kuu}{\\kernelVector_{\\inducingScalar \\inducingScalar}}\n",
-    "\\newcommand{\\lagrangeMultiplier}{\\lambda}\n",
-    "\\newcommand{\\lagrangeMultiplierMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\lagrangian}{L}\n",
-    "\\newcommand{\\laplacianFactor}{\\mathbf{ \\MakeUppercase{\\laplacianFactorScalar}}}\n",
-    "\\newcommand{\\laplacianFactorScalar}{m}\n",
-    "\\newcommand{\\laplacianFactorVector}{\\mathbf{ \\laplacianFactorScalar}}\n",
-    "\\newcommand{\\laplacianMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\laplacianScalar}{\\ell}\n",
-    "\\newcommand{\\laplacianVector}{\\mathbf{ \\ell}}\n",
-    "\\newcommand{\\latentDim}{q}\n",
-    "\\newcommand{\\latentDistanceMatrix}{\\boldsymbol{ \\Delta}}\n",
-    "\\newcommand{\\latentDistanceScalar}{\\delta}\n",
-    "\\newcommand{\\latentDistanceVector}{\\boldsymbol{ \\delta}}\n",
-    "\\newcommand{\\latentForce}{f}\n",
-    "\\newcommand{\\latentFunction}{u}\n",
-    "\\newcommand{\\latentFunctionVector}{\\mathbf{ \\latentFunction}}\n",
-    "\\newcommand{\\latentFunctionMatrix}{\\mathbf{ \\MakeUppercase{\\latentFunction}}}\n",
-    "\\newcommand{\\latentIndex}{j}\n",
-    "\\newcommand{\\latentScalar}{z}\n",
-    "\\newcommand{\\latentVector}{\\mathbf{ \\latentScalar}}\n",
-    "\\newcommand{\\latentMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\learnRate}{\\eta}\n",
-    "\\newcommand{\\lengthScale}{\\ell}\n",
-    "\\newcommand{\\rbfWidth}{\\ell}\n",
-    "\\newcommand{\\likelihoodBound}{\\mathcal{L}}\n",
-    "\\newcommand{\\likelihoodFunction}{L}\n",
-    "\\newcommand{\\locationScalar}{\\mu}\n",
-    "\\newcommand{\\locationVector}{\\boldsymbol{ \\locationScalar}}\n",
-    "\\newcommand{\\locationMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\variance}[1]{\\text{var}\\left( #1 \\right)}\n",
-    "\\newcommand{\\mappingFunction}{f}\n",
-    "\\newcommand{\\mappingFunctionMatrix}{\\mathbf{F}}\n",
-    "\\newcommand{\\mappingFunctionTwo}{g}\n",
-    "\\newcommand{\\mappingFunctionTwoMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\mappingFunctionTwoVector}{\\mathbf{ \\mappingFunctionTwo}}\n",
-    "\\newcommand{\\mappingFunctionVector}{\\mathbf{ \\mappingFunction}}\n",
-    "\\newcommand{\\scaleScalar}{s}\n",
-    "\\newcommand{\\mappingScalar}{w}\n",
-    "\\newcommand{\\mappingVector}{\\mathbf{ \\mappingScalar}}\n",
-    "\\newcommand{\\mappingMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\mappingScalarTwo}{v}\n",
-    "\\newcommand{\\mappingVectorTwo}{\\mathbf{ \\mappingScalarTwo}}\n",
-    "\\newcommand{\\mappingMatrixTwo}{\\mathbf{V}}\n",
-    "\\newcommand{\\maxIters}{K}\n",
-    "\\newcommand{\\meanMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanScalar}{\\mu}\n",
-    "\\newcommand{\\meanTwoMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanTwoScalar}{m}\n",
-    "\\newcommand{\\meanTwoVector}{\\mathbf{ \\meanTwoScalar}}\n",
-    "\\newcommand{\\meanVector}{\\boldsymbol{ \\meanScalar}}\n",
-    "\\newcommand{\\mrnaConcentration}{m}\n",
-    "\\newcommand{\\naturalFrequency}{\\omega}\n",
-    "\\newcommand{\\neighborhood}[1]{\\mathcal{N}\\left( #1 \\right)}\n",
-    "\\newcommand{\\neilurl}{http://inverseprobability.com/}\n",
-    "\\newcommand{\\noiseMatrix}{\\boldsymbol{ E}}\n",
-    "\\newcommand{\\noiseScalar}{\\epsilon}\n",
-    "\\newcommand{\\noiseVector}{\\boldsymbol{ \\epsilon}}\n",
-    "\\newcommand{\\norm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\normalizedLaplacianMatrix}{\\hat{\\mathbf{L}}}\n",
-    "\\newcommand{\\normalizedLaplacianScalar}{\\hat{\\ell}}\n",
-    "\\newcommand{\\normalizedLaplacianVector}{\\hat{\\mathbf{ \\ell}}}\n",
-    "\\newcommand{\\numActive}{m}\n",
-    "\\newcommand{\\numBasisFunc}{m}\n",
-    "\\newcommand{\\numComponents}{m}\n",
-    "\\newcommand{\\numComps}{K}\n",
-    "\\newcommand{\\numData}{n}\n",
-    "\\newcommand{\\numFeatures}{K}\n",
-    "\\newcommand{\\numHidden}{h}\n",
-    "\\newcommand{\\numInducing}{m}\n",
-    "\\newcommand{\\numLayers}{\\ell}\n",
-    "\\newcommand{\\numNeighbors}{K}\n",
-    "\\newcommand{\\numSequences}{s}\n",
-    "\\newcommand{\\numSuccess}{s}\n",
-    "\\newcommand{\\numTasks}{m}\n",
-    "\\newcommand{\\numTime}{T}\n",
-    "\\newcommand{\\numTrials}{S}\n",
-    "\\newcommand{\\outputIndex}{j}\n",
-    "\\newcommand{\\paramVector}{\\boldsymbol{ \\theta}}\n",
-    "\\newcommand{\\parameterMatrix}{\\boldsymbol{ \\Theta}}\n",
-    "\\newcommand{\\parameterScalar}{\\theta}\n",
-    "\\newcommand{\\parameterVector}{\\boldsymbol{ \\parameterScalar}}\n",
-    "\\newcommand{\\partDiff}[2]{\\frac{\\partial#1}{\\partial#2}}\n",
-    "\\newcommand{\\precisionScalar}{j}\n",
-    "\\newcommand{\\precisionVector}{\\mathbf{ \\precisionScalar}}\n",
-    "\\newcommand{\\precisionMatrix}{\\mathbf{J}}\n",
-    "\\newcommand{\\pseudotargetScalar}{\\widetilde{y}}\n",
-    "\\newcommand{\\pseudotargetVector}{\\mathbf{ \\pseudotargetScalar}}\n",
-    "\\newcommand{\\pseudotargetMatrix}{\\mathbf{ \\widetilde{Y}}}\n",
-    "\\newcommand{\\rank}[1]{\\text{rank}\\left(#1\\right)}\n",
-    "\\newcommand{\\rayleighDist}[2]{\\mathcal{R}\\left(#1|#2\\right)}\n",
-    "\\newcommand{\\rayleighSamp}[1]{\\mathcal{R}\\left(#1\\right)}\n",
-    "\\newcommand{\\responsibility}{r}\n",
-    "\\newcommand{\\rotationScalar}{r}\n",
-    "\\newcommand{\\rotationVector}{\\mathbf{ \\rotationScalar}}\n",
-    "\\newcommand{\\rotationMatrix}{\\mathbf{R}}\n",
-    "\\newcommand{\\sampleCovScalar}{s}\n",
-    "\\newcommand{\\sampleCovVector}{\\mathbf{ \\sampleCovScalar}}\n",
-    "\\newcommand{\\sampleCovMatrix}{\\mathbf{s}}\n",
-    "\\newcommand{\\scalarProduct}[2]{\\left\\langle{#1},{#2}\\right\\rangle}\n",
-    "\\newcommand{\\sign}[1]{\\text{sign}\\left(#1\\right)}\n",
-    "\\newcommand{\\sigmoid}[1]{\\sigma\\left(#1\\right)}\n",
-    "\\newcommand{\\singularvalue}{\\ell}\n",
-    "\\newcommand{\\singularvalueMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\singularvalueVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\sorth}{\\mathbf{u}}\n",
-    "\\newcommand{\\spar}{\\lambda}\n",
-    "\\newcommand{\\trace}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\BasalRate}{B}\n",
-    "\\newcommand{\\DampingCoefficient}{C}\n",
-    "\\newcommand{\\DecayRate}{D}\n",
-    "\\newcommand{\\Displacement}{X}\n",
-    "\\newcommand{\\LatentForce}{F}\n",
-    "\\newcommand{\\Mass}{M}\n",
-    "\\newcommand{\\Sensitivity}{S}\n",
-    "\\newcommand{\\basalRate}{b}\n",
-    "\\newcommand{\\dampingCoefficient}{c}\n",
-    "\\newcommand{\\mass}{m}\n",
-    "\\newcommand{\\sensitivity}{s}\n",
-    "\\newcommand{\\springScalar}{\\kappa}\n",
-    "\\newcommand{\\springVector}{\\boldsymbol{ \\kappa}}\n",
-    "\\newcommand{\\springMatrix}{\\boldsymbol{ \\mathcal{K}}}\n",
-    "\\newcommand{\\tfConcentration}{p}\n",
-    "\\newcommand{\\tfDecayRate}{\\delta}\n",
-    "\\newcommand{\\tfMrnaConcentration}{f}\n",
-    "\\newcommand{\\tfVector}{\\mathbf{ \\tfConcentration}}\n",
-    "\\newcommand{\\velocity}{v}\n",
-    "\\newcommand{\\sufficientStatsScalar}{g}\n",
-    "\\newcommand{\\sufficientStatsVector}{\\mathbf{ \\sufficientStatsScalar}}\n",
-    "\\newcommand{\\sufficientStatsMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\switchScalar}{s}\n",
-    "\\newcommand{\\switchVector}{\\mathbf{ \\switchScalar}}\n",
-    "\\newcommand{\\switchMatrix}{\\mathbf{S}}\n",
-    "\\newcommand{\\tr}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\loneNorm}[1]{\\left\\Vert #1 \\right\\Vert_1}\n",
-    "\\newcommand{\\ltwoNorm}[1]{\\left\\Vert #1 \\right\\Vert_2}\n",
-    "\\newcommand{\\onenorm}[1]{\\left\\vert#1\\right\\vert_1}\n",
-    "\\newcommand{\\twonorm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\vScalar}{v}\n",
-    "\\newcommand{\\vVector}{\\mathbf{v}}\n",
-    "\\newcommand{\\vMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\varianceDist}[2]{\\text{var}_{#2}\\left( #1 \\right)}\n",
-    "\\newcommand{\\vecb}[1]{\\left(#1\\right):}\n",
-    "\\newcommand{\\weightScalar}{w}\n",
-    "\\newcommand{\\weightVector}{\\mathbf{ \\weightScalar}}\n",
-    "\\newcommand{\\weightMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\weightedAdjacencyMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\weightedAdjacencyScalar}{a}\n",
-    "\\newcommand{\\weightedAdjacencyVector}{\\mathbf{ \\weightedAdjacencyScalar}}\n",
-    "\\newcommand{\\onesVector}{\\mathbf{1}}\n",
-    "\\newcommand{\\zerosVector}{\\mathbf{0}}\n",
     "$$"
-   ]
+   ],
+   "id": "7c9ef05b-6c24-4761-a09d-77d234bbb531"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "::: {.cell .markdown}\n",
+    "\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!---->\n",
@@ -333,39 +50,45 @@
     "<!--\n",
     "\n",
     "-->"
-   ]
+   ],
+   "id": "ebdd54d4-aa90-4cd5-bdea-568cdbe8da61"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Introduction\n",
-    "============"
-   ]
+    "# Introduction"
+   ],
+   "id": "31e9141c-346b-496e-ab0d-0d18dc494fe7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Data Science Africa\n",
-    "-------------------\n",
+    "## Data Science Africa\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_data-science/includes/data-science-africa.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_data-science/includes/data-science-africa.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//data-science-africa-logo.png\" style=\"width:30%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/data-science-africa-logo.png\" style=\"width:30%\">\n",
+    "Figure: <i>Data Science Africa <http://datascienceafrica.org> is a\n",
+    "ground up initiative for capacity building around data science, machine\n",
+    "learning and artificial intelligence on the African continent.</i>\n",
     "\n",
-    "Figure: <i>Data Science Africa\n",
-    "<a href=\"http://datascienceafrica.org\" class=\"uri\">http://datascienceafrica.org</a>\n",
-    "is a ground up initiative for capacity building around data science,\n",
-    "machine learning and artificial intelligence on the African\n",
-    "continent.</i>\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//dsa/dsa-events-october-2021.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
+    "Figure: <i>Data Science Africa meetings held up to October 2021.</i>\n",
     "Data Science Africa is a bottom up initiative for capacity building in\n",
     "data science, machine learning and artificial intelligence on the\n",
     "African continent.\n",
     "\n",
-    "As of 2019 there have been five workshops and five schools, located in\n",
-    "Nyeri, Kenya (twice); Kampala, Uganda; Arusha, Tanzania; Abuja, Nigeria;\n",
-    "Addis Ababa, Ethiopia and Accra, Ghana. The next event is scheduled for\n",
-    "June 2020 in Kampala, Uganda.\n",
+    "As of May 2023 there have been eleven workshops and schools, located in\n",
+    "seven different countries: Nyeri, Kenya (twice); Kampala, Uganda;\n",
+    "Arusha, Tanzania; Abuja, Nigeria; Addis Ababa, Ethiopia; Accra, Ghana;\n",
+    "Kampala, Uganda and Kimberley, South Africa (virtual), and in Kigali,\n",
+    "Rwanda.\n",
     "\n",
     "The main notion is *end-to-end* data science. For example, going from\n",
     "data collection in the farmer’s field to decision making in the Ministry\n",
@@ -383,7 +106,7 @@
     "Kenya. The organising board of the meeting is entirely made up of\n",
     "scientists and academics based on the African continent.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/data-science/africa-benefit-data-revolution.png\" style=\"width:70%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//data-science/africa-benefit-data-revolution.png\" style=\"width:70%\">\n",
     "\n",
     "Figure: <i>The lack of existing physical infrastructure on the African\n",
     "continent makes it a particularly interesting environment for deploying\n",
@@ -394,14 +117,18 @@
     "\n",
     "Guardian article on [Data Science\n",
     "Africa](https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information)"
-   ]
+   ],
+   "id": "8c2a9b93-8793-4af7-b531-863aa6dc95d7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Example: Prediction of Malaria Incidence in Uganda\n",
-    "--------------------------------------------------\n",
+    "## Example: Prediction of Malaria Incidence in Uganda\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
     "\n",
@@ -421,7 +148,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/martin-mubangizi.png\" clip-path=\"url(#clip0)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png\" clip-path=\"url(#clip0)\"/>\n",
     "\n",
     "</svg>\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
@@ -438,11 +165,11 @@
     "\n",
     "<title>\n",
     "\n",
-    "Ricardo Andrade Pacheco\n",
+    "Ricardo Andrade Pacecho\n",
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/ricardo-andrade-pacheco.png\" clip-path=\"url(#clip1)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png\" clip-path=\"url(#clip1)\"/>\n",
     "\n",
     "</svg>\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
@@ -463,7 +190,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/john-quinn.jpg\" clip-path=\"url(#clip2)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg\" clip-path=\"url(#clip2)\"/>\n",
     "\n",
     "</svg>\n",
     "\n",
@@ -478,7 +205,12 @@
     "collaboration with John Quinn and Martin Mubangizi (Andrade-Pacheco et\n",
     "al., 2014; Mubangizi et al., 2014). John and Martin were initally from\n",
     "the AI-DEV group from the University of Makerere in Kampala and more\n",
-    "latterly they were based at UN Global Pulse in Kampala.\n",
+    "latterly they were based at UN Global Pulse in Kampala. You can see the\n",
+    "work summarized on the UN Global Pulse [disease outbreaks project site\n",
+    "here](https://diseaseoutbreaks.unglobalpulse.net/uganda/).\n",
+    "\n",
+    "-   See [UN Global Pulse Disease Outbreaks\n",
+    "    Site](https://diseaseoutbreaks.unglobalpulse.net/uganda/)\n",
     "\n",
     "Malaria data is spatial data. Uganda is split into districts, and health\n",
     "reports can be found for each district. This suggests that models such\n",
@@ -488,20 +220,19 @@
     "location within a district, such as Nagongera which is a sentinel site\n",
     "based in the Tororo district.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/uganda-districts-2006.png\" style=\"width:50%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png\" style=\"width:50%\">\n",
     "\n",
-    "Figure: <i>Ugandan districs. Data SRTM/NASA from\n",
-    "<a href=\"https://dds.cr.usgs.gov/srtm/version2_1\" class=\"uri\">https://dds.cr.usgs.gov/srtm/version2_1</a>.</i>\n",
+    "Figure: <i>Ugandan districts. Data SRTM/NASA from\n",
+    "<https://dds.cr.usgs.gov/srtm/version2_1>.</i>\n",
     "\n",
-    "<span style=\"text-align:right\">(Andrade-Pacheco et al., 2014; Mubangizi\n",
-    "et al., 2014)</span>\n",
+    "(Andrade-Pacheco et al., 2014; Mubangizi et al., 2014)\n",
     "\n",
     "The common standard for collecting health data on the African continent\n",
     "is from the Health management information systems (HMIS). However, this\n",
     "data suffers from missing values (Gething et al., 2006) and diagnosis of\n",
     "diseases like typhoid and malaria may be confounded.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Tororo_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Tororo district, where the sentinel site, Nagongera, is\n",
     "located.</i>\n",
@@ -513,7 +244,7 @@
     "sites give accurate assessment of malaria disease levels in Uganda,\n",
     "including a site in Nagongera.\n",
     "\n",
-    "<img class=\"negate\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/sentinel_nagongera.png\" style=\"width:100%\">\n",
+    "<img class=\"negate\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>Sentinel and HMIS data along with rainfall and temperature\n",
     "for the Nagongera sentinel station in the Tororo district.</i>\n",
@@ -528,33 +259,33 @@
     "and temperature, to improve predictions from HMIS data of levels of\n",
     "malaria.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Mubende_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Mubende District.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/mubende.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>Prediction of malaria incidence in Mubende.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>The project arose out of the Gaussian process summer school\n",
     "held at Makerere in Kampala in 2013. The school led, in turn, to the\n",
     "Data Science Africa initiative.</i>"
-   ]
+   ],
+   "id": "ffda8944-a56f-4553-aabd-3d3940c5886a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Early Warning Systems\n",
-    "---------------------\n",
+    "## Early Warning Systems\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Kabarole_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Kabarole district in Uganda.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/kabarole.gif\" style=\"width:100%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>Estimate of the current disease situation in the Kabarole\n",
     "district over time. Estimate is constructed with a Gaussian process with\n",
@@ -582,7 +313,7 @@
     "Finally, there is a gray region which represents when the scale of the\n",
     "effect is small.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/monitor.gif\" style=\"width:50%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif\" style=\"width:50%\">\n",
     "\n",
     "Figure: <i>The map of Ugandan districts with an overview of the Malaria\n",
     "situation in each district.</i>\n",
@@ -590,39 +321,39 @@
     "These colors can now be observed directly on a spatial map of the\n",
     "districts to give an immediate impression of the current status of the\n",
     "disease across the country."
-   ]
+   ],
+   "id": "698fb2a0-0ac9-416c-b5cc-1d75d5db5be5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Machine Learning\n",
-    "----------------\n",
+    "## Machine Learning\n",
     "\n",
     "This talk is a general introduction to machine learning, we will\n",
     "highlight the technical challenges and the current solutions. We will\n",
     "give an overview of what is machine learning and why it is important."
-   ]
+   ],
+   "id": "f8166f49-e4e9-4a6b-937e-2775d36dda59"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Rise of Machine Learning\n",
-    "------------------------\n",
+    "## Rise of Machine Learning\n",
     "\n",
     "Machine learning is the combination of data and models, through\n",
     "computation, to make predictions. $$\n",
     "\\text{data} + \\text{model} \\stackrel{\\text{compute}}{\\rightarrow} \\text{prediction}\n",
     "$$"
-   ]
+   ],
+   "id": "5e370d0b-eaa5-41e2-beb1-4a31b4a0ba95"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Data Revolution\n",
-    "---------------\n",
+    "## Data Revolution\n",
     "\n",
     "Machine learning has risen in prominence due to the rise in data\n",
     "availability, and its interconnection with computers. The high bandwidth\n",
@@ -630,21 +361,25 @@
     "us and data via the computer. It is that channel that is being mediated\n",
     "by machine learning techniques.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/data-science/new-flow-of-information.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//data-science/new-flow-of-information.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Large amounts of data and high interconnection bandwidth mean\n",
     "that we receive much of our information about the world around us\n",
     "through computers.</i>"
-   ]
+   ],
+   "id": "919f2f72-7120-42e7-bc55-0ce767c6c85f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Supply Chain\n",
-    "------------\n",
+    "## Supply Chain\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/supply-chain/packhorse-bridge-burbage-brook.jpg\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/packhorse-bridge-burbage-brook.jpg\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>Packhorse Bridge under Burbage Edge. This packhorse route\n",
     "climbs steeply out of Hathersage and heads towards Sheffield. Packhorses\n",
@@ -669,16 +404,20 @@
     "The movement of goods from regions of supply to areas of demand is\n",
     "fundamental to our society. The physical infrastructure of supply chain\n",
     "has evolved a great deal over the last 300 years."
-   ]
+   ],
+   "id": "f99c5901-b479-4b1c-9988-0811aab910b8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Cromford\n",
-    "--------\n",
+    "## Cromford\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/supply-chain/cromford-mill.jpg\" style=\"width:80%\">\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/cromford.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/cromford.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/cromford-mill.jpg\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>Richard Arkwright is regarded of the founder of the modern\n",
     "factory system. Factories exploit distribution networks to centralize\n",
@@ -716,16 +455,20 @@
     "railway built in Britain.\n",
     "\n",
     "Cooper (1991)"
-   ]
+   ],
+   "id": "d8bcba69-064a-4b16-960b-3a70d30ea04c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Containerization\n",
-    "----------------\n",
+    "## Containerization\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/containerisation.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/containerisation.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/supply-chain/container-2539942_1920.jpg\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/container-2539942_1920.jpg\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>The container is one of the major drivers of globalization,\n",
     "and arguably the largest agent of social change in the last 100 years.\n",
@@ -742,12 +485,12 @@
     "<tr>\n",
     "<td width=\"45%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/supply-chain/wild-alaskan-cod.jpg\" style=\"width:90%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod.jpg\" style=\"width:90%\">\n",
     "\n",
     "</td>\n",
     "<td width=\"45%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/supply-chain/wild-alaskan-cod-made-in-china.jpg\" style=\"width:90%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod-made-in-china.jpg\" style=\"width:90%\">\n",
     "\n",
     "</td>\n",
     "</tr>\n",
@@ -761,9 +504,16 @@
     "in China, sold in North America. This is driven by the low cost of\n",
     "transport for frozen cod vs the higher relative cost of cod processing\n",
     "in the US versus China. Similarly,\n",
-    "<a href=\"https://www.telegraph.co.uk/news/uknews/1534286/12000-mile-trip-to-have-seafood-shelled.html\" target=\"_blank\" >Scottish\n",
+    "<a href=\"https://www.telegraph.co.uk/news/uknews/1534286/12000-mile-trip-to-have-seafood-shelled.html\" target=\"_blank\">Scottish\n",
     "prawns are also processed in China for sale in the UK.</a>\n",
     "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/environmental-impact-of-food-by-life-cycle.png\" style=\"width:70%\">\n",
+    "\n",
+    "Figure: <i>The transport cost of most foods is a very small portion of\n",
+    "the total cost. The exception is if foods are air freighted. Source:\n",
+    "<https://ourworldindata.org/food-choice-vs-eating-local> by Hannah\n",
+    "Ritche CC-BY</i>\n",
+    "\n",
     "This effect on cost of transport vs cost of processing is the main\n",
     "driver of the topology of the modern supply chain and the associated\n",
     "effect of globalization. If transport is much cheaper than processing,\n",
@@ -794,14 +544,18 @@
     "This is challenging, because as we introduce more mechanism to the\n",
     "models we use, it becomes harder to develop efficient algorithms to\n",
     "match those models to data."
-   ]
+   ],
+   "id": "ffeb8995-28ba-4b96-8488-c61a86901dee"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For Africa\n",
-    "----------\n",
+    "## For Africa\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain-africa.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_supply-chain/includes/supply-chain-africa.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "There is a large opportunity because infrastructures around automation\n",
     "are moving from physical infrastructure towards information\n",
@@ -820,7 +574,7 @@
     "these parameters to change the behavior of the function. The choice of\n",
     "mathematical function we use is a vital component of the model.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Kapchorwa_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Kapchorwa_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Kapchorwa District, home district of Stephen\n",
     "Kiprotich.</i>\n",
@@ -828,30 +582,33 @@
     "Stephen Kiprotich, the 2012 gold medal winner from the London Olympics,\n",
     "comes from Kapchorwa district, in eastern Uganda, near the border with\n",
     "Kenya."
-   ]
+   ],
+   "id": "bcf493d8-5ccf-4d23-8399-f6cda5b01780"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Olympic Marathon Data\n",
-    "---------------------\n",
+    "## Olympic Marathon Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "<table>\n",
     "<tr>\n",
     "<td width=\"70%\">\n",
     "\n",
     "-   Gold medal times for Olympic Marathon since 1896.\n",
-    "-   Marathons before 1924 didn’t have a standardised distance.\n",
+    "-   Marathons before 1924 didn’t have a standardized distance.\n",
     "-   Present results using pace per km.\n",
-    "-   In 1904 Marathon was badly organised leading to very slow times.\n",
+    "-   In 1904 Marathon was badly organized leading to very slow times.\n",
     "\n",
     "</td>\n",
     "<td width=\"30%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/Stephen_Kiprotich.jpg\" style=\"width:100%\">\n",
-    "<small>Image from Wikimedia Commons\n",
-    "<a href=\"http://bit.ly/16kMKHQ\" class=\"uri\">http://bit.ly/16kMKHQ</a></small>\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//Stephen_Kiprotich.jpg\" style=\"width:100%\">\n",
+    "<small>Image from Wikimedia Commons <http://bit.ly/16kMKHQ></small>\n",
     "\n",
     "</td>\n",
     "</tr>\n",
@@ -859,9 +616,10 @@
     "\n",
     "The first thing we will do is load a standard data set for regression\n",
     "modelling. The data consists of the pace of Olympic Gold Medal Marathon\n",
-    "winners for the Olympics from 1896 to present. First we load in the data\n",
+    "winners for the Olympics from 1896 to present. Let’s load in the data\n",
     "and plot."
-   ]
+   ],
+   "id": "5100f09e-517b-4034-8cd8-469ea13199fa"
   },
   {
    "cell_type": "code",
@@ -869,8 +627,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install --upgrade git+https://github.com/sods/ods"
-   ]
+    "%pip install pods"
+   ],
+   "id": "2cd033d0-3dc0-4956-a3fe-f13437754d1c"
   },
   {
    "cell_type": "code",
@@ -880,7 +639,8 @@
    "source": [
     "import numpy as np\n",
     "import pods"
-   ]
+   ],
+   "id": "c0a49b1b-51a8-42bf-b165-3cbff250a263"
   },
   {
    "cell_type": "code",
@@ -893,8 +653,10 @@
     "y = data['Y']\n",
     "\n",
     "offset = y.mean()\n",
-    "scale = np.sqrt(y.var())"
-   ]
+    "scale = np.sqrt(y.var())\n",
+    "yhat = (y - offset)/scale"
+   ],
+   "id": "8daafd3f-c44c-4e1b-91e5-be3d3c67c92f"
   },
   {
    "cell_type": "code",
@@ -903,9 +665,10 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import teaching_plots as plot\n",
+    "import mlai.plot as plot\n",
     "import mlai"
-   ]
+   ],
+   "id": "48b9d97f-3d5c-49f8-bfe6-34c48f89d009"
   },
   {
    "cell_type": "code",
@@ -913,9 +676,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
     "xlim = (1875,2030)\n",
     "ylim = (2.5, 6.5)\n",
-    "yhat = (y-offset)/scale\n",
     "\n",
     "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
     "_ = ax.plot(x, y, 'r.',markersize=10)\n",
@@ -924,36 +687,56 @@
     "ax.set_xlim(xlim)\n",
     "ax.set_ylim(ylim)\n",
     "\n",
-    "mlai.write_figure(figure=fig, \n",
-    "                  filename='olympic-marathon.svg', \n",
-    "                  diagrams='./datasets',\n",
-    "                  transparent=True, \n",
-    "                  facecolor=(1, 1, 1, 1))"
-   ]
+    "mlai.write_figure(filename='olympic-marathon.svg', \n",
+    "                  directory='./datasets')"
+   ],
+   "id": "754b760a-9b3c-4ee0-ae8d-a3933b1ba2d8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/datasets/olympic-marathon.svg\" class=\"\" width=\"\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//datasets/olympic-marathon.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "Figure: <i>Olympic marathon pace times since 1892.</i>\n",
+    "Figure: <i>Olympic marathon pace times since 1896.</i>\n",
     "\n",
-    "Things to notice about the data include the outlier in 1904, in this\n",
-    "year, the olympics was in St Louis, USA. Organizational problems and\n",
+    "Things to notice about the data include the outlier in 1904, in that\n",
+    "year the Olympics was in St Louis, USA. Organizational problems and\n",
     "challenges with dust kicked up by the cars following the race meant that\n",
-    "participants got lost, and only very few participants completed.\n",
+    "participants got lost, and only very few participants completed. More\n",
+    "recent years see more consistently quick marathons."
+   ],
+   "id": "d7d30bc1-97e7-4f48-98a6-75e5d2f408d6"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Polynomial Fits to Olympic Marthon Data\n",
     "\n",
-    "More recent years see more consistently quick marathons."
-   ]
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-polynomial.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-polynomial.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "bbc8c9f8-2b80-4e29-92b9-2aac46905cd6"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ],
+   "id": "78cf434d-5226-41df-986b-151dbe07d269"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Polynomial Fits to Olympic Data\n",
-    "-------------------------------"
-   ]
+    "Define the polynomial basis function."
+   ],
+   "id": "d16019fa-6a99-4ed9-9d45-99a8ea64dd49"
   },
   {
    "cell_type": "code",
@@ -961,11 +744,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "from matplotlib import pyplot as plt\n",
-    "import mlai\n",
-    "import pods"
-   ]
+    "import mlai"
+   ],
+   "id": "09b0d1c7-09ca-4f5d-839b-b7c632e97fa9"
   },
   {
    "cell_type": "code",
@@ -973,17 +754,142 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "basis = mlai.polynomial\n",
-    "\n",
-    "data = pods.datasets.olympic_marathon_men()\n",
+    "%load -n mlai.polynomial"
+   ],
+   "id": "73c020ff-0c67-467a-9cc6-d718d32c3744"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def polynomial(x, num_basis=4, data_limits=[-1., 1.]):\n",
+    "    \"Polynomial basis\"\n",
+    "    centre = data_limits[0]/2. + data_limits[1]/2.\n",
+    "    span = data_limits[1] - data_limits[0]\n",
+    "    z = np.asarray(x, dtype=float) - centre\n",
+    "    z = 2*z/span   # scale the inputs to be within -1, 1 where polynomials are well behaved\n",
+    "    Phi = np.zeros((x.shape[0], num_basis))\n",
+    "    for i in range(num_basis):\n",
+    "        Phi[:, i:i+1] = z**i\n",
+    "    return Phi"
+   ],
+   "id": "4f5d31d1-c630-460f-95d6-d91d68dae39b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we include the solution for the linear regression through\n",
+    "QR-decomposition."
+   ],
+   "id": "658e00f6-6ba9-413c-9d6d-2cd6aa559a35"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def basis_fit(Phi, y):\n",
+    "    \"Use QR decomposition to fit the basis.\"\"\"\n",
+    "    Q, R = np.linalg.qr(Phi)\n",
+    "    return sp.linalg.solve_triangular(R, Q.T@y) "
+   ],
+   "id": "ff5aab90-c585-4556-bf50-1084bf9a72f2"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Linear Fit"
+   ],
+   "id": "2a00e7da-9ed5-4265-ad67-3fc0e431e4a8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "poly_args = {'num_basis':2, # two basis functions (1 and x)\n",
+    "             'data_limits':xlim}\n",
+    "Phi = polynomial(x, **poly_args)\n",
+    "w = basis_fit(Phi, y)"
+   ],
+   "id": "2c8034cb-6834-4a1c-b1df-bf738c7c029e"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we make some predictions for the fit."
+   ],
+   "id": "b1ab132c-3631-426b-8d47-46fa5af856bb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_pred = np.linspace(xlim[0], xlim[1], 400)[:, np.newaxis]\n",
+    "Phi_pred = polynomial(x_pred, **poly_args)\n",
+    "f_pred = Phi_pred@w"
+   ],
+   "id": "722af050-3c74-4a43-9261-469fa37d868f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "ebdaa78f-1b3c-4b4f-abf2-8f0f45f2edc9"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
     "\n",
-    "x = data['X']\n",
-    "y = data['Y']\n",
+    "_ = ax.plot(x_pred, f_pred, 'b-', linewidth=2)\n",
     "\n",
-    "xlim = [1892, 2020]\n",
+    "mlai.write_figure(filename='olympic-marathon-polynomial-2.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "0725803b-8ad1-4f65-8df6-a1feeb502273"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-2.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "basis=mlai.Basis(mlai.polynomial, number=1, data_limits=xlim)"
-   ]
+    "Figure: <i>Fit of a 1-degree polynomial (a linear model) to the Olympic\n",
+    "marathon data.</i>"
+   ],
+   "id": "f8804b10-1093-4c40-8ddd-4137ba9a0827"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cubic Fit"
+   ],
+   "id": "807a4981-169c-4f20-9289-dec00ba9a994"
   },
   {
    "cell_type": "code",
@@ -991,8 +897,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "poly_args = {'num_basis':4, # four basis: 1, x, x^2, x^3\n",
+    "             'data_limits':xlim}\n",
+    "Phi = polynomial(x, **poly_args)\n",
+    "w = basis_fit(Phi, y)"
+   ],
+   "id": "f25ad870-5881-4d7a-b14a-2777fd03a778"
   },
   {
    "cell_type": "code",
@@ -1000,12 +910,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot.rmse_fit(x, y, param_name='number', param_range=(1, 27), \n",
-    "              model=mlai.LM, \n",
-    "              basis=basis,\n",
-    "              xlim=xlim, objective_ylim=[0, 0.8],\n",
-    "              diagrams='./ml')"
-   ]
+    "Phi_pred = polynomial(x_pred, **poly_args)\n",
+    "f_pred = Phi_pred@w"
+   ],
+   "id": "7e785cac-6442-4d87-abc2-809be58a2c11"
   },
   {
    "cell_type": "code",
@@ -1013,8 +921,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ipywidgets import IntSlider"
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "0ba7e21b-22af-4d5d-9b19-056ad2295837"
   },
   {
    "cell_type": "code",
@@ -1022,10 +933,40 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_LM_polynomial_number{num_basis:0>3}.svg',\n",
-    "                            directory='./ml', \n",
-    "                            num_basis=IntSlider(1,1,27,1))"
-   ]
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
+    "\n",
+    "_ = ax.plot(x_pred, f_pred, 'b-', linewidth=2)\n",
+    "\n",
+    "mlai.write_figure(filename='olympic-marathon-polynomial-4.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "ba2b9fcb-da58-452e-b3e2-0aab81212a69"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-4.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Fit of a 3-degree polynomial (a cubic model) to the Olympic\n",
+    "marathon data.</i>"
+   ],
+   "id": "6144b4a4-e311-418f-8e31-6158d29cffc4"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9th Degree Polynomial Fit\n",
+    "\n",
+    "Now we’ll try a 9th degree polynomial fit to the data."
+   ],
+   "id": "d92c7749-4aee-406a-bbdd-d35e10c1b1d4"
   },
   {
    "cell_type": "code",
@@ -1033,12 +974,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "from matplotlib import pyplot as plt\n",
-    "import teaching_plots as plot\n",
-    "import mlai\n",
-    "import pods"
-   ]
+    "poly_args = {'num_basis':10, # basis up to x^9\n",
+    "             'data_limits':xlim}\n",
+    "Phi = polynomial(x, **poly_args)\n",
+    "w = basis_fit(Phi, y)"
+   ],
+   "id": "4a680789-2628-406e-aeee-020b75c24662"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Phi_pred = polynomial(x_pred, **poly_args)\n",
+    "f_pred = Phi_pred@w"
+   ],
+   "id": "708c2e82-6fb1-4cbf-9a36-65e38806be60"
   },
   {
    "cell_type": "code",
@@ -1046,20 +998,52 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "basis = mlai.polynomial\n",
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "e1f379ec-b22a-4d28-a9da-da93266f4574"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
     "\n",
-    "data = pods.datasets.olympic_marathon_men()\n",
+    "_ = ax.plot(x_pred, f_pred, 'b-', linewidth=2)\n",
     "\n",
-    "x = data['X']\n",
-    "y = data['Y']\n",
+    "mlai.write_figure(filename='olympic-marathon-polynomial-10.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "8eff6d59-2593-4afb-a219-40c1d2411495"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-10.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "xlim = [1892, 2020]\n",
-    "max_basis = 27\n",
+    "Figure: <i>Fit of a 9-degree polynomial to the Olympic marathon\n",
+    "data.</i>"
+   ],
+   "id": "879f1c3a-f536-4313-b237-11eedc467f00"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 16th Degree Polynomial Fit\n",
     "\n",
-    "ll = np.array([np.nan]*(max_basis))\n",
-    "sum_squares = np.array([np.nan]*(max_basis))\n",
-    "basis=mlai.Basis(mlai.polynomial, number=1, data_limits=xlim)"
-   ]
+    "Now we’ll try a 16th degree polynomial fit to the data."
+   ],
+   "id": "41040aac-acbe-48ce-a04e-0db0c7b8cffb"
   },
   {
    "cell_type": "code",
@@ -1067,16 +1051,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot.rmse_fit(x, y, param_name='number', param_range=(1, 28), \n",
-    "              model=mlai.LM, basis=basis, \n",
-    "              xlim=xlim, objective_ylim=[0, 0.8],\n",
-    "              diagrams='./ml')"
-   ]
+    "poly_args = {'num_basis':17, # basis up to x^16\n",
+    "             'data_limits':xlim}\n",
+    "Phi = polynomial(x, **poly_args)\n",
+    "w = basis_fit(Phi, y)"
+   ],
+   "id": "b679b14e-b0d4-4952-b968-e8e081c57313"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
-   "source": []
+   "outputs": [],
+   "source": [
+    "Phi_pred = polynomial(x_pred, **poly_args)\n",
+    "f_pred = Phi_pred@w"
+   ],
+   "id": "f3db05ae-03d4-4ca9-b3f3-c748f9eac3c9"
   },
   {
    "cell_type": "code",
@@ -1084,32 +1075,129 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_LM_polynomial_number{num_basis:0>3}.svg',\n",
-    "                            directory='./ml', \n",
-    "                            num_basis=IntSlider(1,1,28,1))"
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "7176f512-26df-4ad1-b870-59ff149156c8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
+    "\n",
+    "_ = ax.plot(x_pred, f_pred, 'b-', linewidth=2)\n",
+    "\n",
+    "mlai.write_figure(filename='olympic-marathon-polynomial-17.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "bb2f04a2-a04d-43c5-a05c-ff24ce3774a0"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_LM_polynomial_number002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-17.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "Figure: <i>Fit of a 1 degree polynomial to the olympic marathon\n",
-    "data.</i>\n",
+    "Figure: <i>Fit of a 16-degree polynomial to the Olympic marathon\n",
+    "data.</i>"
+   ],
+   "id": "0bd19f38-943f-44e5-b7d2-9d079cfb3f69"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 26th Degree Polynomial Fit\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_LM_polynomial_number003.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "Now we’ll try a 26th degree polynomial fit to the data."
+   ],
+   "id": "8657df6f-fe1f-42d0-9ef3-2b684e8899fb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "poly_args = {'num_basis':27, # basis up to x^26\n",
+    "             'data_limits':xlim}\n",
+    "Phi = polynomial(x, **poly_args)\n",
+    "w = basis_fit(Phi, y)"
+   ],
+   "id": "dd7eea65-45c0-4d70-920a-63677b804c94"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Phi_pred = polynomial(x_pred, **poly_args)\n",
+    "f_pred = Phi_pred@w"
+   ],
+   "id": "9420968b-e3c7-4ccc-99de-b68e09bd3738"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "29e5101e-9ea9-4f46-8122-b2cb26ceabf7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
+    "\n",
+    "_ = ax.plot(x_pred, f_pred, 'b-', linewidth=2)\n",
     "\n",
-    "Figure: <i>Fit of a 2 degree polynomial to the olympic marathon\n",
+    "mlai.write_figure(filename='olympic-marathon-polynomial-27.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "586592df-8d02-4aa4-aef1-fda279a1c020"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-27.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Fit of a 26-degree polynomial to the Olympic marathon\n",
     "data.</i>"
-   ]
+   ],
+   "id": "1287f735-5c77-4772-8b10-f34ac80b16ce"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "What does Machine Learning do?\n",
-    "------------------------------\n",
+    "## What does Machine Learning do?\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-does-machine-learning-do.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-does-machine-learning-do.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Any process of automation allows us to scale what we do by codifying a\n",
     "process in some way that makes it efficient and repeatable. Machine\n",
@@ -1118,7 +1206,7 @@
     "learnt by a computer. If we can create these mathematical functions in\n",
     "ways in which they can interconnect, then we can also build systems.\n",
     "\n",
-    "Machine learning works through codifing a prediction of interest into a\n",
+    "Machine learning works through codifying a prediction of interest into a\n",
     "mathematical function. For example, we can try and predict the\n",
     "probability that a customer wants to by a jersey given knowledge of\n",
     "their age, and the latitude where they live. The technique known as\n",
@@ -1127,72 +1215,152 @@
     "\n",
     "$$ \\text{odds} = \\frac{p(\\text{bought})}{p(\\text{not bought})} $$\n",
     "\n",
-    "$$ \\log \\text{odds}  = \\beta_0 + \\beta_1 \\text{age} + \\beta_2 \\text{latitude}.$$\n",
-    "Here $\\beta_0$, $\\beta_1$ and $\\beta_2$ are the parameters of the model.\n",
-    "If $\\beta_1$ and $\\beta_2$ are both positive, then the log-odds that\n",
-    "someone will buy a jumper increase with increasing latitude and age, so\n",
-    "the further north you are and the older you are the more likely you are\n",
-    "to buy a jumper. The parameter $\\beta_0$ is an offset parameter, and\n",
-    "gives the log-odds of buying a jumper at zero age and on the equator. It\n",
-    "is likely to be negative[1] indicating that the purchase is\n",
-    "odds-against. This is actually a classical statistical model, and models\n",
-    "like logistic regression are widely used to estimate probabilities from\n",
-    "ad-click prediction to risk of disease.\n",
+    "$$ \\log \\text{odds}  = w_0 + w_1 \\text{age} + w_2 \\text{latitude}.$$\n",
+    "Here $w_0$, $w_1$ and $w_2$ are the parameters of the model. If $w_1$\n",
+    "and $w_2$ are both positive, then the log-odds that someone will buy a\n",
+    "jumper increase with increasing latitude and age, so the further north\n",
+    "you are and the older you are the more likely you are to buy a jumper.\n",
+    "The parameter $w_0$ is an offset parameter and gives the log-odds of\n",
+    "buying a jumper at zero age and on the equator. It is likely to be\n",
+    "negative[1] indicating that the purchase is odds-against. This is also a\n",
+    "classical statistical model, and models like logistic regression are\n",
+    "widely used to estimate probabilities from ad-click prediction to\n",
+    "disease risk.\n",
     "\n",
     "This is called a generalized linear model, we can also think of it as\n",
     "estimating the *probability* of a purchase as a nonlinear function of\n",
-    "the features (age, lattitude) and the parameters (the $\\beta$ values).\n",
-    "The function is known as the *sigmoid* or [logistic\n",
+    "the features (age, latitude) and the parameters (the $w$ values). The\n",
+    "function is known as the *sigmoid* or [logistic\n",
     "function](https://en.wikipedia.org/wiki/Logistic_regression), thus the\n",
     "name *logistic* regression.\n",
     "\n",
-    "$$ p(\\text{bought}) =  \\sigma\\left(\\beta_0 + \\beta_1 \\text{age} + \\beta_2 \\text{latitude}\\right).$$\n",
+    "[1] The logarithm of a number less than one is negative, for a number\n",
+    "greater than one the logarithm is positive. So if odds are greater than\n",
+    "evens (odds-on) the log-odds are positive, if the odds are less than\n",
+    "evens (odds-against) the log-odds will be negative."
+   ],
+   "id": "08729fab-2909-4467-9807-c9b86407668c"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sigmoid Function\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/sigmoid-function.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/sigmoid-function.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "3d2b3529-f06e-4251-945b-4ae7ef0094b4"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai.plot as plot"
+   ],
+   "id": "7a3c6921-013c-4615-ae56-16aa6129c397"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot.logistic('./ml/logistic.svg')"
+   ],
+   "id": "2f906093-50de-472e-ac9b-f02cbd047218"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/logistic.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>The logistic function.</i>\n",
+    "\n",
+    "The function has this characeristic ‘s’-shape (from where the term\n",
+    "sigmoid, as in sigma, comes from). It also takes the input from the\n",
+    "entire real line and ‘squashes’ it into an output that is between zero\n",
+    "and one. For this reason it is sometimes also called a ‘squashing\n",
+    "function’.\n",
+    "\n",
+    "The sigmoid comes from the inverting the odds ratio, $$\n",
+    "\\frac{\\pi}{(1-\\pi)}\n",
+    "$$ where $\\pi$ is the probability of a positive outcome and $1-\\pi$ is\n",
+    "the probability of a negative outcome\n",
+    "\n",
+    "$$ p(\\text{bought}) =  \\sigma\\left(w_0 + w_1 \\text{age} + w_2 \\text{latitude}\\right).$$\n",
+    "\n",
     "In the case where we have *features* to help us predict, we sometimes\n",
     "denote such features as a vector, $\\mathbf{ x}$, and we then use an\n",
     "inner product between the features and the parameters,\n",
-    "$\\boldsymbol{\\beta}^\\top \\mathbf{ x}= \\beta_1 x_1 + \\beta_2 x_2 + \\beta_3 x_3 ...$,\n",
-    "to represent the argument of the sigmoid.\n",
+    "$\\mathbf{ w}^\\top \\mathbf{ x}= w_1 x_1 + w_2 x_2 + w_3 x_3 ...$, to\n",
+    "represent the argument of the sigmoid.\n",
     "\n",
-    "$$ p(\\text{bought}) =  \\sigma\\left(\\boldsymbol{\\beta}^\\top \\mathbf{ x}\\right).$$\n",
+    "$$ p(\\text{bought}) =  \\sigma\\left(\\mathbf{ w}^\\top \\mathbf{ x}\\right).$$\n",
     "More generally, we aim to predict some aspect of our data, $y$, by\n",
     "relating it through a mathematical function, $f(\\cdot)$, to the\n",
-    "parameters, $\\boldsymbol{\\beta}$ and the data, $\\mathbf{ x}$.\n",
+    "parameters, $\\mathbf{ w}$ and the data, $\\mathbf{ x}$.\n",
     "\n",
-    "$$ y=  f\\left(\\mathbf{ x}, \\boldsymbol{\\beta}\\right).$$ We call\n",
-    "$f(\\cdot)$ the *prediction function*.\n",
+    "$$ y=  f\\left(\\mathbf{ x}, \\mathbf{ w}\\right).$$ We call $f(\\cdot)$ the\n",
+    "*prediction function*.\n",
     "\n",
     "To obtain the fit to data, we use a separate function called the\n",
     "*objective function* that gives us a mathematical representation of the\n",
     "difference between our predictions and the real data.\n",
     "\n",
-    "$$E(\\boldsymbol{\\beta}, \\mathbf{Y}, \\mathbf{X})$$ A commonly used\n",
-    "examples (for example in a regression problem) is least squares,\n",
-    "$$E(\\boldsymbol{\\beta}, \\mathbf{Y}, \\mathbf{X}) = \\sum_{i=1}^n\\left(y_i - f(\\mathbf{ x}_i, \\boldsymbol{\\beta})\\right)^2.$$\n",
+    "$$E(\\mathbf{ w}, \\mathbf{Y}, \\mathbf{X})$$ A commonly used examples (for\n",
+    "example in a regression problem) is least squares,\n",
+    "$$E(\\mathbf{ w}, \\mathbf{Y}, \\mathbf{X}) = \\sum_{i=1}^n\\left(y_i - f(\\mathbf{ x}_i, \\mathbf{ w})\\right)^2.$$\n",
     "\n",
     "If a linear prediction function is combined with the least squares\n",
-    "objective function then that gives us a classical *linear regression*,\n",
+    "objective function, then that gives us a classical *linear regression*,\n",
     "another classical statistical model. Statistics often focusses on linear\n",
     "models because it makes interpretation of the model easier.\n",
     "Interpretation is key in statistics because the aim is normally to\n",
     "validate questions by analysis of data. Machine learning has typically\n",
-    "focussed more on the prediction function itself and worried less about\n",
-    "the interpretation of parameters, which are normally denoted by\n",
-    "$\\mathbf{w}$ instead of $\\boldsymbol{\\beta}$. As a result *non-linear*\n",
-    "functions are explored more often as they tend to improve quality of\n",
-    "predictions but at the expense of interpretability.\n",
+    "focused more on the prediction function itself and worried less about\n",
+    "the interpretation of parameters. In statistics, where interpretation is\n",
+    "typically more important than prediction, parameters are normally\n",
+    "denoted by $\\boldsymbol{\\beta}$ instead of $\\mathbf{ w}$.\n",
     "\n",
-    "[1] The logarithm of a number less than one is negative, for a number\n",
-    "greater than one the logarithm is positive. So if odds are greater than\n",
-    "evens (odds-on) the log-odds are positive, if the odds are less than\n",
-    "evens (odds-against) the log-odds will be negative."
-   ]
+    "A key difference between statistics and machine learning, is that\n",
+    "(traditionally) machine learning has focussed on predictive capability\n",
+    "and statistics has focussed on interpretability. That means that in a\n",
+    "statistics class far more emphasis will be placed on interpretation of\n",
+    "the parameters. In machine learning, the parameters, \\$, are just a\n",
+    "means to an end. But in statistics, when we denote the parameters by\n",
+    "$\\boldsymbol{\\beta}$, we often use the parameters to tell us something\n",
+    "about the disease.\n",
+    "\n",
+    "So we move between\n",
+    "$$ p(\\text{bought}) =  \\sigma\\left(w_0 + w_1 \\text{age} + w_2 \\text{latitude}\\right).$$\n",
+    "\n",
+    "to denote the emphasis is on predictive power to\n",
+    "\n",
+    "$$ p(\\text{bought}) =  \\sigma\\left(\\beta_0 + \\beta_1 \\text{age} + \\beta_2 \\text{latitude}\\right).$$\n",
+    "\n",
+    "to denote the emphasis is on interpretation of the parameters.\n",
+    "\n",
+    "Another effect of the focus on prediction in machine learning is that\n",
+    "*non-linear* approaches, which can be harder to interpret, are more\n",
+    "widely deployedin machine learning – they tend to improve quality of\n",
+    "predictions at the expense of interpretability."
+   ],
+   "id": "909bd233-e185-4b06-a464-4ec1ee55e3bf"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "What is Machine Learning?\n",
-    "-------------------------\n",
+    "## What is Machine Learning?\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-2.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-2.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Machine learning allows us to extract knowledge from data to form a\n",
     "prediction.\n",
@@ -1219,33 +1387,42 @@
     "the increased prominence of machine learning. This prominence is\n",
     "surfacing in two different but overlapping domains: data science and\n",
     "artificial intelligence."
-   ]
+   ],
+   "id": "acc73d97-3931-45fd-88ef-0c03aecb53a6"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From Model to Decision\n",
-    "----------------------\n",
+    "## From Model to Decision\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-end-to-end.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml-end-to-end.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The real challenge, however, is end-to-end decision making. Taking\n",
     "information from the environment and using it to drive decision making\n",
     "to achieve goals."
-   ]
+   ],
+   "id": "12ddb0dd-d9a6-4145-bbc9-212c1e5b4613"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Artificial Intelligence and Data Science\n",
-    "----------------------------------------\n",
+    "## Artificial Intelligence and Data Science\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ai/includes/ai-vs-data-science-2.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ai/includes/ai-vs-data-science-2.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Artificial intelligence has the objective of endowing computers with\n",
     "human-like intelligent capabilities. For example, understanding an image\n",
     "(computer vision) or the contents of some speech (speech recognition),\n",
     "the meaning of a sentence (natural language processing) or the\n",
     "translation of a sentence (machine translation)."
-   ]
+   ],
+   "id": "525ec1c2-7076-4507-ac0e-c65e216f8da8"
   },
   {
    "cell_type": "markdown",
@@ -1312,14 +1489,18 @@
     "question selection or even answer a question without the expense of a\n",
     "full randomized control trial (referred to as A/B testing in modern\n",
     "internet parlance)."
-   ]
+   ],
+   "id": "962e7b57-cbf1-47c1-abef-22dd419de2c6"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Neural Networks and Prediction Functions\n",
-    "----------------------------------------\n",
+    "## Neural Networks and Prediction Functions\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/neural-networks.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/neural-networks.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Neural networks are adaptive non-linear function models. Originally,\n",
     "they were studied (by McCulloch and Pitts (McCulloch and Pitts, 1943))\n",
@@ -1346,14 +1527,14 @@
     "hidden units, or the number of neurons. The elements of this vector\n",
     "function are known as the *activation* function of the neural network\n",
     "and $\\mathbf{V}$ are the parameters of the activation functions."
-   ]
+   ],
+   "id": "ff3f147f-897e-44c0-82e6-50c149f0f25d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Relations with Classical Statistics\n",
-    "-----------------------------------\n",
+    "## Relations with Classical Statistics\n",
     "\n",
     "In statistics activation functions are traditionally known as *basis\n",
     "functions*. And we would think of this as a *linear model*. It’s doesn’t\n",
@@ -1362,14 +1543,14 @@
     "$\\mathbf{V}$. The linear model terminology refers to the fact that the\n",
     "model is *linear in the parameters*, but it is *not* linear in the data\n",
     "unless the activation functions are chosen to be linear."
-   ]
+   ],
+   "id": "3143e07f-3ed2-4828-b126-69a7de0102d1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Adaptive Basis Functions\n",
-    "------------------------\n",
+    "## Adaptive Basis Functions\n",
     "\n",
     "The first difference in the (early) neural network literature to the\n",
     "classical statistical literature is the decision to optimize these\n",
@@ -1383,14 +1564,14 @@
     "normally use $\\boldsymbol{\\beta}$ when I care about the value of these\n",
     "parameters, and $\\mathbf{ w}$ when I care more about the quality of the\n",
     "prediction."
-   ]
+   ],
+   "id": "e7d3b706-8fdf-4eb6-b3d9-84d1ef060bad"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Machine Learning\n",
-    "----------------\n",
+    "## Machine Learning\n",
     "\n",
     "The key idea in machine learning is to observe the system in practice,\n",
     "and then emulate its behavior with mathematics. That leads to a design\n",
@@ -1401,14 +1582,18 @@
     "1.  Supervised learning\n",
     "2.  Unsupervised learning\n",
     "3.  Reinforcement learning"
-   ]
+   ],
+   "id": "4947410c-37af-482a-ae85-26b6a81e30cd"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Supervised Learning\n",
-    "===================\n",
+    "# Supervised Learning\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Supervised learning is one of the most widely deployed machine learning\n",
     "technologies, and a particular domain of success has been\n",
@@ -1417,14 +1602,18 @@
     "different classes (e.g. dog or cat). This simple idea underpins a lot of\n",
     "machine learning. By scanning across the image we can also determine\n",
     "where the animal is in the image."
-   ]
+   ],
+   "id": "73ceb8d7-b706-4edf-8742-239b50edbf7f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Introduction to Classification\n",
-    "------------------------------\n",
+    "## Introduction to Classification\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Classification is perhaps the technique most closely assocated with\n",
     "machine learning. In the speech based agents, on-device classifiers are\n",
@@ -1474,14 +1663,18 @@
     "relevant in the prediction, (2) defining the appropriate *class of\n",
     "function*, $f(\\cdot)$, to use and (3) selecting the right parameters,\n",
     "$\\mathbf{ w}$."
-   ]
+   ],
+   "id": "f17c33e2-5fc5-4787-83ec-cdbd24af8556"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Classification Examples\n",
-    "-----------------------\n",
+    "## Classification Examples\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "-   Classifiying hand written digits from binary images (automatic zip\n",
     "    code reading)\n",
@@ -1491,17 +1684,21 @@
     "-   Categorization of document types (different types of news article on\n",
     "    the internet)\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/perceptron001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\"><img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/perceptron044.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\"><img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron044.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The perceptron algorithm.</i>"
-   ]
+   ],
+   "id": "47aac3f5-391c-4636-abb7-2af766c1615e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Logistic Regression\n",
-    "-------------------\n",
+    "## Logistic Regression\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "A logistic regression is an approach to classification which extends the\n",
     "linear basis function models we’ve already explored. Rather than\n",
@@ -1552,7 +1749,8 @@
     "\\pi = g(\\mathbf{ w}^\\top\n",
     "\\boldsymbol{ \\phi}(\\mathbf{ x})).\n",
     "$$"
-   ]
+   ],
+   "id": "8beed29c-80ab-4b3c-b9fe-63ce8557a207"
   },
   {
    "cell_type": "code",
@@ -1560,8 +1758,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "61ec61b4-a416-4f52-8f17-6a7a53231156"
   },
   {
    "cell_type": "code",
@@ -1570,19 +1769,20 @@
    "outputs": [],
    "source": [
     "plot.logistic('./ml/logistic.svg')"
-   ]
+   ],
+   "id": "f6979019-7fb6-422f-9e65-61f30c07da6b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Basis Function\n",
-    "--------------\n",
+    "## Basis Function\n",
     "\n",
     "We’ll define our prediction, objective and gradient functions below. But\n",
     "before we start, we need to define a basis function for our model. Let’s\n",
     "start with the linear basis."
-   ]
+   ],
+   "id": "c5ab1d85-18b8-42bd-a265-a5042bed2b1b"
   },
   {
    "cell_type": "code",
@@ -1591,7 +1791,18 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "29506b89-4333-4e8e-b3f3-ca3958a7a205"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "3005f52c-9095-4ddb-b012-5a3c385c9a86"
   },
   {
    "cell_type": "code",
@@ -1599,18 +1810,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s linear mlai.py"
-   ]
+    "%load -n mlai.linear"
+   ],
+   "id": "17ace8a5-9a5a-4090-bd7f-d92bcba1a930"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Prediction Function\n",
-    "-------------------\n",
+    "## Prediction Function\n",
     "\n",
     "Now we have the basis function let’s define the prediction function."
-   ]
+   ],
+   "id": "676939c1-d499-4037-9399-be032960037e"
   },
   {
    "cell_type": "code",
@@ -1619,7 +1831,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "8c26c562-b74e-4075-90e9-bbae39ed4328"
   },
   {
    "cell_type": "code",
@@ -1632,7 +1845,8 @@
     "    Phi = basis(x, **kwargs)\n",
     "    f = np.dot(Phi, w)\n",
     "    return 1./(1+np.exp(-f)), Phi"
-   ]
+   ],
+   "id": "80955320-82af-4373-b032-fd58039f9925"
   },
   {
    "cell_type": "markdown",
@@ -1643,47 +1857,7 @@
     "name logistic regression) or sometimes it is called the sigmoid\n",
     "function. For a particular value of the input to the link function,\n",
     "$f_i = \\mathbf{ w}^\\top \\boldsymbol{ \\phi}(\\mathbf{ x}_i)$ we can plot\n",
-    "the value of the inverse link function as below."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Sigmoid Function"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import teaching_plots as plot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot.logistic('./ml/logistic.svg')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/logistic.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
-    "\n",
-    "Figure: <i>The logistic function.</i>\n",
-    "\n",
-    "The function has this characeristic ‘s’-shape (from where the term\n",
-    "sigmoid, as in sigma, comes from). It also takes the input from the\n",
-    "entire real line and ‘squashes’ it into an output that is between zero\n",
-    "and one. For this reason it is sometimes also called a ‘squashing\n",
-    "function’.\n",
+    "the value of the inverse link function as below.\n",
     "\n",
     "By replacing the inverse link with the sigmoid we can write $\\pi$ as a\n",
     "function of the input and the parameter vector as, $$\n",
@@ -1716,14 +1890,14 @@
     "\n",
     "but writing it mathematically makes it easier to write our objective\n",
     "function within a single mathematical equation."
-   ]
+   ],
+   "id": "d2f8cea8-f2b0-457e-a907-757c087c6a86"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Maximum Likelihood\n",
-    "------------------\n",
+    "## Maximum Likelihood\n",
     "\n",
     "To obtain the parameters of the model, we need to maximize the\n",
     "likelihood, or minimize the objective function, normally taken to be the\n",
@@ -1745,7 +1919,8 @@
     "\\sum_{i=1}^n(1-y_i)\\log \\left(1-g\\left(\\mathbf{ w}^\\top\n",
     "\\boldsymbol{ \\phi}(\\mathbf{ x}_i)\\right)\\right).\n",
     "$$"
-   ]
+   ],
+   "id": "402b0ea0-249e-4bae-a7be-2f9bb70247f0"
   },
   {
    "cell_type": "code",
@@ -1754,7 +1929,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "e57822af-784b-4f2d-a4d2-d5d3ce84ffa6"
   },
   {
    "cell_type": "code",
@@ -1768,7 +1944,8 @@
     "    posind = np.where(labs==1)\n",
     "    negind = np.where(labs==0)\n",
     "    return -np.log(g[posind, :]).sum() - np.log(1-g[negind, :]).sum()"
-   ]
+   ],
+   "id": "52130290-c9c2-44da-9f66-83a69295b978"
   },
   {
    "cell_type": "markdown",
@@ -1815,7 +1992,8 @@
     "(1-y_i)\\left(g\\left(\\mathbf{ w}^\\top \\boldsymbol{ \\phi}(\\mathbf{ x})\\right)\\right)\n",
     "\\boldsymbol{ \\phi}(\\mathbf{ x}_i).\n",
     "$$"
-   ]
+   ],
+   "id": "2e662427-e4aa-4e26-8f70-714d3be8ed6d"
   },
   {
    "cell_type": "code",
@@ -1824,7 +2002,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "b4d4f581-5c3c-45b9-8ef4-92bc98b9abc5"
   },
   {
    "cell_type": "code",
@@ -1840,42 +2019,191 @@
     "    negind = np.where(labs==0 )\n",
     "    dw += (Phi[negind]*g[negind]).sum(0)\n",
     "    return dw[:, None]"
-   ]
+   ],
+   "id": "a09360a2-5ad3-4fb4-9668-52a4d13f7531"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Optimization of the Function\n",
+    "\n",
+    "Reorganizing the gradient to find a stationary point of the function\n",
+    "with respect to the parameters $\\mathbf{ w}$ turns out to be impossible.\n",
+    "Optimization has to proceed by *numerical methods*. Options include the\n",
+    "multidimensional variant of [Newton’s\n",
+    "method](http://en.wikipedia.org/wiki/Newton%27s_method) or [gradient\n",
+    "based optimization\n",
+    "methods](http://en.wikipedia.org/wiki/Gradient_method) like we used for\n",
+    "optimizing matrix factorization for the movie recommender system. We\n",
+    "recall from matrix factorization that, for large data, *stochastic\n",
+    "gradient descent* or the Robbins Munro (Robbins and Monro, 1951)\n",
+    "optimization procedure worked best for function minimization."
+   ],
+   "id": "7cd71b8e-b53a-484e-95fe-35832a334008"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Nigeria NMIS Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "As an example data set we will use Nigerian Millennium Development Goals\n",
+    "Information System Health Facility (The Office of the Senior Special\n",
+    "Assistant to the President on the Millennium Development Goals\n",
+    "(OSSAP-MDGs) and Columbia University, 2014). It can be found here\n",
+    "<https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014>.\n",
+    "\n",
+    "Taking from the information on the site,\n",
+    "\n",
+    "> The Nigeria MDG (Millennium Development Goals) Information System –\n",
+    "> NMIS health facility data is collected by the Office of the Senior\n",
+    "> Special Assistant to the President on the Millennium Development Goals\n",
+    "> (OSSAP-MDGs) in partner with the Sustainable Engineering Lab at\n",
+    "> Columbia University. A rigorous, geo-referenced baseline facility\n",
+    "> inventory across Nigeria is created spanning from 2009 to 2011 with an\n",
+    "> additional survey effort to increase coverage in 2014, to build\n",
+    "> Nigeria’s first nation-wide inventory of health facility. The database\n",
+    "> includes 34,139 health facilities info in Nigeria.\n",
+    ">\n",
+    "> The goal of this database is to make the data collected available to\n",
+    "> planners, government officials, and the public, to be used to make\n",
+    "> strategic decisions for planning relevant interventions.\n",
+    ">\n",
+    "> For data inquiry, please contact Ms. Funlola Osinupebi, Performance\n",
+    "> Monitoring & Communications, Advisory Power Team, Office of the Vice\n",
+    "> President at funlola.osinupebi@aptovp.org\n",
+    ">\n",
+    "> To learn more, please visit\n",
+    "> <http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/>\n",
+    ">\n",
+    "> Suggested citation: Nigeria NMIS facility database (2014), the Office\n",
+    "> of the Senior Special Assistant to the President on the Millennium\n",
+    "> Development Goals (OSSAP-MDGs) & Columbia University\n",
+    "\n",
+    "For ease of use we’ve packaged this data set in the `pods` library"
+   ],
+   "id": "7d50df52-0d80-418a-a0c7-ff74a2e68c6b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pods\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "In Sheffield we created a suite of software tools for ‘Open Data\n",
+    "Science’. Open data science is an approach to sharing code, models and\n",
+    "data that should make it easier for companies, health professionals and\n",
+    "scientists to gain access to data science techniques.\n",
+    "\n",
+    "You can also check this blog post on [Open Data\n",
+    "Science](http://inverseprobability.com/2014/07/01/open-data-science).\n",
+    "\n",
+    "The software can be installed using\n",
+    "\n",
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/ods>\n",
+    "\n",
+    "Once `pods` is installed, it can be imported in the usual manner."
+   ],
+   "id": "18aeae4b-8405-4f33-8585-3d3dd62fb9b7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pods"
+   ],
+   "id": "954ea58f-f28c-40c2-9272-650e30d8d201"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pods.datasets.nigeria_nmis()['Y']\n",
+    "data.head()"
+   ],
+   "id": "bbc4a83d-a908-496b-80ff-1299cbefbff8"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, you can access the data directly with the following\n",
+    "commands.\n",
+    "\n",
+    "``` python\n",
+    "import urllib.request\n",
+    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')\n",
+    "\n",
+    "import pandas as pd\n",
+    "data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')\n",
+    "```\n",
+    "\n",
+    "Once it is loaded in the data can be summarized using the `describe`\n",
+    "method in pandas."
+   ],
+   "id": "f0db0880-346b-43b5-be2c-375d35f909c4"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.describe()"
+   ],
+   "id": "509eb807-4ea8-4fb9-bcf5-e39be7c804d2"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Optimization of the Function\n",
-    "----------------------------\n",
-    "\n",
-    "Reorganizing the gradient to find a stationary point of the function\n",
-    "with respect to the parameters $\\mathbf{ w}$ turns out to be impossible.\n",
-    "Optimization has to proceed by *numerical methods*. Options include the\n",
-    "multidimensional variant of [Newton’s\n",
-    "method](http://en.wikipedia.org/wiki/Newton%27s_method) or [gradient\n",
-    "based optimization\n",
-    "methods](http://en.wikipedia.org/wiki/Gradient_method) like we used for\n",
-    "optimizing matrix factorization for the movie recommender system. We\n",
-    "recall from matrix factorization that, for large data, *stochastic\n",
-    "gradient descent* or the Robbins Munro (Robbins and Monro, 1951)\n",
-    "optimization procedure worked best for function minimization."
-   ]
+    "We can also find out the dimensions of the dataset using the `shape`\n",
+    "property."
+   ],
+   "id": "c710c82b-899b-45e3-9d8e-52d70c451c5e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.shape"
+   ],
+   "id": "0389c8eb-fcde-49ec-b048-5b98703baa50"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Nigerian NMIS Data\n",
-    "------------------\n",
-    "\n",
-    "First we will load in the Nigerian NMIS health data. Our aim will be to\n",
-    "predict whether a center has maternal health delivery services given the\n",
-    "attributes in the data. We will predict of the number of nurses, the\n",
-    "number of doctors, location etc.\n",
+    "Dataframes have different functions that you can use to explore and\n",
+    "understand your data. In python and the Jupyter notebook it is possible\n",
+    "to see a list of all possible functions and attributes by typing the\n",
+    "name of the object followed by `.<Tab>` for example in the above case if\n",
+    "we type `data.<Tab>` it show the columns available (these are attributes\n",
+    "in pandas dataframes) such as `num_nurses_fulltime`, and also functions,\n",
+    "such as `.describe()`.\n",
     "\n",
-    "Let’s first remind ourselves of the data."
-   ]
+    "For functions we can also see the documentation about the function by\n",
+    "following the name with a question mark. This will open a box with\n",
+    "documentation at the bottom which can be closed with the x button."
+   ],
+   "id": "704fa8c0-3c76-4664-8898-a8df2a16b8f2"
   },
   {
    "cell_type": "code",
@@ -1883,8 +2211,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import urllib.request"
-   ]
+    "data.describe?"
+   ],
+   "id": "6c3839e6-c51e-4df9-94e4-3561edd8b34e"
   },
   {
    "cell_type": "code",
@@ -1892,8 +2221,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')"
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "import mlai\n",
+    "import mlai.plot as plot"
+   ],
+   "id": "0cfceef7-07d5-4ea3-ac4d-2f21e2d1bf90"
   },
   {
    "cell_type": "code",
@@ -1901,27 +2233,45 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd"
-   ]
+    "fig, ax = plt.subplots(figsize=plot.big_figsize)\n",
+    "ax.plot(data.longitude, data.latitude, 'ro', alpha=0.01)\n",
+    "ax.set_xlabel('longitude')\n",
+    "ax.set_ylabel('latitude')\n",
+    "\n",
+    "mlai.write_figure('nigerian-health-facilities.png', directory='./ml')"
+   ],
+   "id": "014ae4a0-a536-45f6-bcb9-a2539443da7a"
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')"
-   ]
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png\" style=\"width:60%\">\n",
+    "\n",
+    "Figure: <i>Location of the over thirty-four thousand health facilities\n",
+    "registered in the NMIS data across Nigeria. Each facility plotted\n",
+    "according to its latitude and longitude.</i>"
+   ],
+   "id": "9cd1a369-dae5-47b0-8569-3e64fe75d478"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "data.head()}\n",
+    "## Nigeria NMIS Data Classification\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "Our aim will be to predict whether a center has maternal health delivery\n",
+    "services given the attributes in the data. We will predict of the number\n",
+    "of nurses, the number of doctors, location etc.\n",
     "\n",
     "Now we will convert this data into a form which we can use as inputs\n",
     "`X`, and labels `y`."
-   ]
+   ],
+   "id": "937de532-2807-4fb2-a6b1-206e40000952"
   },
   {
    "cell_type": "code",
@@ -1931,7 +2281,8 @@
    "source": [
     "import pandas as pd\n",
     "import numpy as np"
-   ]
+   ],
+   "id": "0430daad-15a2-4ec7-9449-7d236196d0c3"
   },
   {
    "cell_type": "code",
@@ -1971,7 +2322,8 @@
     "    type_names.append(type_col)\n",
     "    X.loc[:, type_col] = 0.0 \n",
     "    X.loc[index, type_col] = 1.0"
-   ]
+   ],
+   "id": "f98f40df-bedc-412e-b41b-f28f61a931c5"
   },
   {
    "cell_type": "markdown",
@@ -1979,7 +2331,8 @@
    "source": [
     "This has given us a new data frame `X` which contains the different\n",
     "facility types in different columns."
-   ]
+   ],
+   "id": "3e988efb-ad6e-495d-aabb-5f7156a63d34"
   },
   {
    "cell_type": "code",
@@ -1988,18 +2341,23 @@
    "outputs": [],
    "source": [
     "X.describe()"
-   ]
+   ],
+   "id": "b8abbaa8-895e-46e8-a317-d6172a47c7bc"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Batch Gradient Descent\n",
-    "----------------------\n",
+    "## Batch Gradient Descent\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression-gradient-descent.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/logistic-regression-gradient-descent.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "We will need to define some initial random values for our vector and\n",
     "then minimize the objective by descending the gradient."
-   ]
+   ],
+   "id": "44b9c1d3-96d5-43d9-926e-91675c287bfb"
   },
   {
    "cell_type": "code",
@@ -2016,7 +2374,8 @@
     "y_train = y.iloc[train_indices]==True\n",
     "X_test = X.iloc[test_indices]\n",
     "y_test = y.iloc[test_indices]==True"
-   ]
+   ],
+   "id": "aa26d80d-870a-45d8-8684-eb461e174816"
   },
   {
    "cell_type": "code",
@@ -2025,7 +2384,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "2575fcd7-a481-4e48-86c9-3f792a9cb22d"
   },
   {
    "cell_type": "code",
@@ -2042,14 +2402,16 @@
     "    w -= eta*gradient(g, Phi, y_train) + 0.001*w\n",
     "    if not i % 100:\n",
     "        print(\"Iter\", i, \"Objective\", objective(g, y_train))"
-   ]
+   ],
+   "id": "1d1e7f7d-5770-4945-97e7-d21a13a43c26"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Let’s look at the weights and how they relate to the inputs."
-   ]
+   ],
+   "id": "6ead36cd-b058-4074-9497-68466b7a670b"
   },
   {
    "cell_type": "code",
@@ -2058,7 +2420,8 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt"
-   ]
+   ],
+   "id": "870c9b08-9824-4454-90f1-0105c16fcc98"
   },
   {
    "cell_type": "code",
@@ -2067,7 +2430,8 @@
    "outputs": [],
    "source": [
     "print(w)"
-   ]
+   ],
+   "id": "6510bfe7-92a4-40a3-8947-4c729b875c41"
   },
   {
    "cell_type": "markdown",
@@ -2076,7 +2440,8 @@
     "What does the magnitude of the weight vectors tell you about the\n",
     "different parameters and their influence on outcome? Are the weights of\n",
     "roughly the same size, if not, how might you fix this?"
-   ]
+   ],
+   "id": "01db6c1f-5a2c-4898-85b7-ff8621aa157d"
   },
   {
    "cell_type": "code",
@@ -2086,35 +2451,38 @@
    "source": [
     "g_test, Phi_test = predict(w, X_test, linear)\n",
     "np.sum(g_test[y_test]>0.5)"
-   ]
+   ],
+   "id": "44d5f14c-13ec-4f62-a3d2-f74715489d98"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Stochastic Gradient Descent\n",
-    "---------------------------"
-   ]
+    "## Stochastic Gradient Descent"
+   ],
+   "id": "06a9630a-a16c-4055-a54d-bfcbb6e8f6fa"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 2\n",
+    "### Exercise 1\n",
     "\n",
     "Now construct a stochastic gradient descent algorithm and run it on the\n",
     "data. Is it faster or slower than batch gradient descent? What can you\n",
     "do to improve convergence speed?"
-   ]
+   ],
+   "id": "28632e1a-c44d-4a1c-867b-538223ccd25f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 2 Answer\n",
+    "### Exercise 1 Answer\n",
     "\n",
-    "Write your answer to Exercise 2 here"
-   ]
+    "Write your answer to Exercise 1 here"
+   ],
+   "id": "b7e7675c-3322-4ece-8558-42b9922f7752"
   },
   {
    "cell_type": "code",
@@ -2124,14 +2492,18 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "a01df952-0b9d-4ee2-afd6-fa5f8f817f9a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Regression\n",
-    "----------\n",
+    "## Regression\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Classification is the case where our prediction function gives a\n",
     "discrete valued output, normally associated with a ‘class’. Regression\n",
@@ -2143,14 +2515,18 @@
     "is the practice of predicting a function value between existing data,\n",
     "and ‘extrapolation’, which is the practice of predicting a function\n",
     "value beyond the regime where we have data."
-   ]
+   ],
+   "id": "0ebb2fa8-37fb-4662-9878-b1a45c7a6829"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Regression Examples\n",
-    "-------------------\n",
+    "## Regression Examples\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-examples.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/regression-examples.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Regression involves predicting a real value, $y_i$, given an input\n",
     "vector, $\\mathbf{ x}_i$. For example, the Tecator data involves\n",
@@ -2159,14 +2535,18 @@
     "to age measured through a back-trace of tree rings. Regression has also\n",
     "been used to predict the quality of board game moves given expert rated\n",
     "training data."
-   ]
+   ],
+   "id": "8f5d6d3d-6c88-46f2-8a97-38c2d62873d8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Supervised Learning Challenges\n",
-    "------------------------------\n",
+    "## Supervised Learning Challenges\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning-challenges.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/supervised-learning-challenges.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "There are three principal challenges in constructing a problem for\n",
     "supervised learning.\n",
@@ -2175,14 +2555,18 @@
     "    prediction\n",
     "2.  defining the appropriate *class of function*, $f(\\cdot)$.\n",
     "3.  selecting the right parameters, $\\mathbf{ w}$."
-   ]
+   ],
+   "id": "bd3ff95d-3286-4125-b694-39d9d027ca0d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Feature Selection\n",
-    "-----------------\n",
+    "## Feature Selection\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/feature-selection.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/feature-selection.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Feature selection is a critical stage in the algorithm design process.\n",
     "In the Olympic prediction example above we’re only using time to predict\n",
@@ -2209,14 +2593,18 @@
     "Google’s success. These algorithms are in turn highly dependent on the\n",
     "feature sets used. Facebook in particular has made heavy investments in\n",
     "machine learning pipelines for evaluation of the feature utility."
-   ]
+   ],
+   "id": "03b44f56-f39a-44ed-be14-5e3416d6f484"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Class of Function, $f(\\cdot)$\n",
-    "-----------------------------\n",
+    "## Class of Function, $f(\\cdot)$\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "By class of function we mean, what are the characteristics of the\n",
     "mapping between $\\mathbf{x}$ and $y$. Often, we might choose it to be a\n",
@@ -2224,16 +2612,42 @@
     "the prediction is a forecast, for example the demand of a particular\n",
     "product, then the function would need some periodic components to\n",
     "reflect seasonal or weekly effects."
-   ]
+   ],
+   "id": "01f25a9e-f921-4558-be81-f2d33b69f34d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Analysis of US Birth Rates\n",
-    "--------------------------\n",
+    "## Analysis of US Birth Rates\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/bda-forecasting.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/bda-forecasting.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
+    "\n",
+    "<defs> <clipPath id=\"clip3\">\n",
+    "\n",
+    "<style>\n",
+    "circle {\n",
+    "  fill: black;\n",
+    "}\n",
+    "</style>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bialik-fridaythe13th-1.png\" style=\"width:70%\">\n",
+    "<circle cx=\"100\" cy=\"100\" r=\"100\"/> </clipPath> </defs>\n",
+    "\n",
+    "<title>\n",
+    "\n",
+    "Aki Vehtari\n",
+    "\n",
+    "</title>\n",
+    "\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/aki-vehtari.jpg\" clip-path=\"url(#clip3)\"/>\n",
+    "\n",
+    "</svg>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bialik-fridaythe13th-1.png\" style=\"width:70%\">\n",
     "\n",
     "Figure: <i>This is a retrospective analysis of US births by Aki Vehtari.\n",
     "The challenges of forecasting. Even with seasonal and weekly effects\n",
@@ -2248,12 +2662,12 @@
     "<tr>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bda_cover_1.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover_1.png\" style=\"width:80%\">\n",
     "\n",
     "</td>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bda_cover.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover.png\" style=\"width:80%\">\n",
     "\n",
     "</td>\n",
     "</tr>\n",
@@ -2297,14 +2711,18 @@
     "data](http://lib.stat.cmu.edu/datasets/tecator), where the fat, water\n",
     "and protein content of meat samples was predicted as a function of the\n",
     "absorption of infrared light."
-   ]
+   ],
+   "id": "b8308303-dc26-46ec-b6ed-6899d7844d16"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Class of Function: Neural Networks\n",
-    "----------------------------------\n",
+    "## Class of Function: Neural Networks\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function-neural-network.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/class-of-function-neural-network.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "One class of function that has become popular recently is neural network\n",
     "functions, in particular deep neural networks. The ImageNet challenge\n",
@@ -2315,14 +2733,18 @@
     "improve performance so much, particularly when we know that rotational\n",
     "invariances and scale invariances are also applicable for object\n",
     "detection in images."
-   ]
+   ],
+   "id": "c0e7ac43-9db2-480b-8050-dad26543a06a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Deep Learning\n",
-    "=============\n",
+    "# Deep Learning\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-overview.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-overview.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Classical statistical models and simple machine learning models have a\n",
     "great deal in common. The main difference between the fields is\n",
@@ -2351,28 +2773,33 @@
     "prediction) rather than an end in themselves (interpretable).\n",
     "\n",
     "<!-- No slide titles in this context -->"
-   ]
+   ],
+   "id": "5ff7b9b4-23cb-4918-b392-d46374331236"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "DeepFace\n",
-    "--------\n",
+    "## DeepFace\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/deepface_neg.png\" style=\"width:100%\">\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-face.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-face.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//deepface_neg.png\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>The DeepFace architecture (Taigman et al., 2014), visualized\n",
     "through colors to represent the functional mappings at each layer. There\n",
     "are 120 million parameters in the model.</i>\n",
     "\n",
     "The DeepFace architecture (Taigman et al., 2014) consists of layers that\n",
-    "deal with *translation* and *rotational* invariances. These layers are\n",
-    "followed by three locally-connected layers and two fully-connected\n",
-    "layers. Color illustrates feature maps produced at each layer. The\n",
-    "neural network includes more than 120 million parameters, where more\n",
-    "than 95% come from the local and fully connected layers."
-   ]
+    "deal with *translation* invariances, known as convolutional layers.\n",
+    "These layers are followed by three locally-connected layers and two\n",
+    "fully-connected layers. Color illustrates feature maps produced at each\n",
+    "layer. The neural network includes more than 120 million parameters,\n",
+    "where more than 95% come from the local and fully connected layers."
+   ],
+   "id": "0c0296a0-481d-4260-91bc-4e3db4738ba1"
   },
   {
    "cell_type": "markdown",
@@ -2380,7 +2807,11 @@
    "source": [
     "### Deep Learning as Pinball\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/576px-Early_Pinball.jpg\" style=\"width:50%\">\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-as-pinball.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deep-learning-as-pinball.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//576px-Early_Pinball.jpg\" style=\"width:50%\">\n",
     "\n",
     "Figure: <i>Deep learning models are composition of simple functions. We\n",
     "can think of a pinball machine as an analogy. Each layer of pins\n",
@@ -2408,41 +2839,15 @@
     "the decision: a classification of the input object.\n",
     "\n",
     "An image has more than one number associated with it, so it is like\n",
-    "playing pinball in a *hyper-space*."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pods\n",
-    "from ipywidgets import IntSlider"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pods.notebook.display_plots('pinball{sample:0>3}.svg', \n",
-    "                            directory='.',\n",
-    "                            sample=IntSlider(1, 1, 2, 1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/pinball001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "playing pinball in a *hyper-space*.\n",
+    "\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//pinball001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>At initialization, the pins, which represent the parameters\n",
     "of the function, aren’t in the right place to bring the balls to the\n",
     "correct decisions.</i>\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/pinball002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//pinball002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>After learning the pins are now in the right place to bring\n",
     "the balls to the correct decisions.</i>\n",
@@ -2467,14 +2872,14 @@
     "of possible paths for the ball through the machine. This helps to make\n",
     "them more data efficient and gives some robustness to adversarial\n",
     "examples."
-   ]
+   ],
+   "id": "1ace69b4-8e46-47ba-810b-de1f3ba5a386"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Encoding Knowledge\n",
-    "------------------\n",
+    "## Encoding Knowledge\n",
     "\n",
     "Knowledge that is not encoded in the prediction function must be learned\n",
     "through data. So any unspecified invariance (such as rotational or scale\n",
@@ -2487,14 +2892,18 @@
     "Unfortunately many invariances are non-trivial to incorporate and many\n",
     "machine learning algorithms focus on simpler concepts such as linearity\n",
     "or smoothness."
-   ]
+   ],
+   "id": "6a718029-1218-40eb-b07c-c6758d5a2ab7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Parameter Estimation: Objective Functions\n",
-    "-----------------------------------------\n",
+    "## Parameter Estimation: Objective Functions\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/parameter-estimation.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/parameter-estimation.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Once we have a set of features, and the class of functions we use is\n",
     "determined, we need to find the parameters of the model.\n",
@@ -2543,14 +2952,18 @@
     "about the population that we don’t want our models to have. For example,\n",
     "if we design a face detector using Californians may not perform well\n",
     "when deployed in Kampala, Uganda."
-   ]
+   ],
+   "id": "e13f7a32-f947-4c3d-9ed6-0f6af02e9bf3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Generalization and Overfitting\n",
-    "------------------------------\n",
+    "## Generalization and Overfitting\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/generalization-and-overfitting.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/generalization-and-overfitting.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Once a supervised learning system is trained it can be placed in a\n",
     "sequential pipeline to automate a process that used to be done manually.\n",
@@ -2570,15 +2983,20 @@
     "systems given only its training data is known as its *generalization*\n",
     "ability. This is the system’s ability to predict in areas where it\n",
     "hasn’t previously seen data."
-   ]
+   ],
+   "id": "53de6c61-c8f8-446e-98bc-3e40ed1a7702"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Hold Out Validation on Olympic Marathon Data\n",
-    "--------------------------------------------"
-   ]
+    "## Hold Out Validation on Olympic Marathon Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-hold-out-validation.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-hold-out-validation.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "425f4006-dd14-464d-afd1-9b8efeac2341"
   },
   {
    "cell_type": "code",
@@ -2586,9 +3004,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot\n",
+    "import mlai.plot as plot\n",
     "import mlai"
-   ]
+   ],
+   "id": "5adf571a-abbf-4354-a3ac-9c3510d94adb"
   },
   {
    "cell_type": "code",
@@ -2599,7 +3018,8 @@
     "data_limits=xlim\n",
     "basis = mlai.Basis(mlai.polynomial, number=1, data_limits=data_limits)\n",
     "max_basis = 11"
-   ]
+   ],
+   "id": "bde20820-f545-4e18-807c-079c80f6e008"
   },
   {
    "cell_type": "code",
@@ -2613,7 +3033,8 @@
     "                 permute=False, objective_ylim=[0, 0.8], \n",
     "                 xlim=data_limits, prefix='olympic_val_extra', \n",
     "                 diagrams='./ml')"
-   ]
+   ],
+   "id": "b2748703-fee0-4ea9-a97b-495ba1d87758"
   },
   {
    "cell_type": "code",
@@ -2621,9 +3042,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "d32986a9-7b20-43ba-b741-0072a37d1fbb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "7340c357-53b0-41ac-89c9-ea4c8404b923"
   },
   {
    "cell_type": "code",
@@ -2631,36 +3063,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_val_extra_LM_polynomial_number{num_basis:0>3}.svg', \n",
+    "nu.display_plots('olympic_val_extra_LM_polynomial_number{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(1, 1, max_basis, 1))"
-   ]
+   ],
+   "id": "9069af87-0001-493a-bdbd-1ac96b9f2256"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number011.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number011.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Olympic marathon data with validation error for\n",
     "extrapolation.</i>"
-   ]
+   ],
+   "id": "cbfa2012-ff62-4d36-8aa7-2297054fbb99"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Extrapolation\n",
-    "-------------"
-   ]
+    "## Extrapolation"
+   ],
+   "id": "0812c04b-6400-40be-8c8b-1b620e154988"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Interpolation\n",
-    "-------------"
-   ]
+    "## Interpolation"
+   ],
+   "id": "dfe1915e-64ba-45dd-84c7-04abc69d810d"
   },
   {
    "cell_type": "code",
@@ -2668,8 +3102,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "2d83c4fe-9592-45c8-abdd-2f3cb388e864"
   },
   {
    "cell_type": "code",
@@ -2682,7 +3117,8 @@
     "                 xlim=data_limits, prefix='olympic_val_inter', \n",
     "                 objective_ylim=[0.1, 0.6], permute=True,\n",
     "                 diagrams='./ml')"
-   ]
+   ],
+   "id": "d76c707f-77b0-4685-ae7f-50348350bb1f"
   },
   {
    "cell_type": "code",
@@ -2690,9 +3126,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "7addac2a-cfc6-4e3d-ba6a-dddbd47e302a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "d989ef7b-a43d-42c8-9286-6db56d08c0af"
   },
   {
    "cell_type": "code",
@@ -2700,35 +3147,36 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_val_inter_LM_polynomial_number{num_basis:0>3}.svg', \n",
+    "nu.display_plots('olympic_val_inter_LM_polynomial_number{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(1, 1, max_basis, 1))"
-   ]
+   ],
+   "id": "b59b2697-5e33-4e89-a0d1-3fb11a8ac883"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number011.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number011.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Olympic marathon data with validation error for\n",
     "interpolation.</i>"
-   ]
+   ],
+   "id": "c0e581bf-2c62-49cd-ab1a-89ebcf70436e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Choice of Validation Set\n",
-    "------------------------"
-   ]
+    "## Choice of Validation Set"
+   ],
+   "id": "da946f13-a766-46c9-948b-2062fd5aa181"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Hold Out Data\n",
-    "-------------\n",
+    "## Hold Out Data\n",
     "\n",
     "You have a conclusion as to which model fits best under the training\n",
     "error, but how do the two models perform in terms of validation? In this\n",
@@ -2739,7 +3187,8 @@
     "end of the time series. This means that we are validating on future\n",
     "predictions. We will hold out data from after 1980 and fit the model to\n",
     "the data before 1980."
-   ]
+   ],
+   "id": "b8f80e63-32c2-4212-bd5c-d73ba0162271"
   },
   {
    "cell_type": "code",
@@ -2757,18 +3206,20 @@
     "# Create a hold out set\n",
     "x_valid = np.take(x, indices_hold_out, axis=0)\n",
     "y_valid = np.take(y, indices_hold_out, axis=0)"
-   ]
+   ],
+   "id": "034011c6-70d8-4108-91d6-faff141a5fc8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 3\n",
+    "### Exercise 2\n",
     "\n",
     "For both the linear and quadratic models, fit the model to the data up\n",
     "until 1980 and then compute the error on the held out data (from 1980\n",
     "onwards). Which model performs better on the validation data?"
-   ]
+   ],
+   "id": "a852ebf7-066a-45b8-bc0c-f8b7714e53e8"
   },
   {
    "cell_type": "code",
@@ -2776,27 +3227,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Write your answer to Exercise 3 here\n",
+    "# Write your answer to Exercise 2 here\n",
+    "\n",
+    "\n",
     "\n",
     "\n"
-   ]
+   ],
+   "id": "66865dc3-1b9c-4a21-a201-fe708e79b48b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Richer Basis Set\n",
-    "----------------\n",
+    "## Richer Basis Set\n",
     "\n",
     "Now we have an approach for deciding which model to retain, we can\n",
     "consider the entire family of polynomial bases, with arbitrary degrees."
-   ]
+   ],
+   "id": "584fb21e-4c64-4b3a-b26a-1fdff78874a8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 4\n",
+    "### Exercise 3\n",
     "\n",
     "Now we are going to build a more sophisticated form of basis function,\n",
     "one that can accept arguments to its inputs (similar to those we used in\n",
@@ -2824,7 +3278,8 @@
     "\n",
     "Which polynomial has the minimum training error? Which polynomial has\n",
     "the minimum validation error?"
-   ]
+   ],
+   "id": "a6c156e5-fee4-4ed7-bbe3-081b73e0c27c"
   },
   {
    "cell_type": "code",
@@ -2832,27 +3287,61 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Write your answer to Exercise 4 here\n",
+    "# Write your answer to Exercise 3 here\n",
+    "\n",
+    "\n",
     "\n",
     "\n"
-   ]
+   ],
+   "id": "eba7b4b5-1e20-477d-bdd2-9237efacb665"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bias Variance Decomposition\n",
-    "---------------------------\n",
-    "\n",
-    "The bias-variance decomposition considers the expected test error for\n",
-    "different variations of the *training data* sampled from,\n",
-    "$\\Pr(\\mathbf{ y}, y)$ $$\n",
-    "\\mathbb{E}\\left[ \\left(y- f^*(\\mathbf{ y})\\right)^2 \\right].\n",
-    "$$ This can be decomposed into two parts, $$\n",
-    "\\mathbb{E}\\left[ \\left(y- f(\\mathbf{ y})\\right)^2 \\right] = \\text{bias}\\left[f^*(\\mathbf{ y})\\right]^2 + \\text{variance}\\left[f^*(\\mathbf{ y})\\right] +\\sigma^2,\n",
+    "## Bias Variance Decomposition\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-dilemma.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-dilemma.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "One of Breiman’s ideas for improving predictive performance is known as\n",
+    "bagging (**Breiman:bagging96?**). The idea is to train a number of\n",
+    "models on the data such that they overfit (high variance). Then average\n",
+    "the predictions of these models. The models are trained on different\n",
+    "bootstrap samples (Efron, 1979) and their predictions are aggregated\n",
+    "giving us the acronym, Bagging. By combining decision trees with\n",
+    "bagging, we recover random forests (Breiman, 2001).\n",
+    "\n",
+    "Bias and variance can also be estimated through Efron’s bootstrap\n",
+    "(Efron, 1979), and the traditional view has been that there’s a form of\n",
+    "Goldilocks effect, where the best predictions are given by the model\n",
+    "that is ‘just right’ for the amount of data available. Not to simple,\n",
+    "not too complex. The idea is that bias decreases with increasing model\n",
+    "complexity and variance increases with increasing model complexity.\n",
+    "Typically plots begin with the Mummy bear on the left (too much bias)\n",
+    "end with the Daddy bear on the right (too much variance) and show a dip\n",
+    "in the middle where the Baby bear (just) right finds themselves.\n",
+    "\n",
+    "The Daddy bear is typically positioned at the point where the model can\n",
+    "exactly interpolate the data. For a generalized linear model (McCullagh\n",
+    "and Nelder, 1989), this is the point at which the number of parameters\n",
+    "is equal to the number of data[1].\n",
+    "\n",
+    "The bias-variance decomposition (**Geman:biasvariance92?**) considers\n",
+    "the expected test error for different variations of the *training data*\n",
+    "sampled from, $\\mathbb{P}(\\mathbf{ x}, y)$ $$\\begin{align*}\n",
+    "R(\\mathbf{ w}) = & \\int \\left(y- f^*(\\mathbf{ x})\\right)^2 \\mathbb{P}(y, \\mathbf{ x}) \\text{d}y\\text{d}\\mathbf{ x}\\\\\n",
+    "& \\triangleq \\mathbb{E}\\left[ \\left(y- f^*(\\mathbf{ x})\\right)^2 \\right].\n",
+    "\\end{align*}$$\n",
+    "\n",
+    "This can be decomposed into two parts, $$\n",
+    "\\begin{align*}\n",
+    "\\mathbb{E}\\left[ \\left(y- f(\\mathbf{ x})\\right)^2 \\right] = & \\text{bias}\\left[f^*(\\mathbf{ x})\\right]^2  + \\text{variance}\\left[f^*(\\mathbf{ x})\\right]  +\\sigma^2,\n",
+    "\\end{align*}\n",
     "$$ where the bias is given by $$\n",
-    "  \\text{bias}\\left[f^*(\\mathbf{ y})\\right] =\n",
-    "\\mathbb{E}\\left[f^*(\\mathbf{ y})\\right] * f(\\mathbf{ y})\n",
+    "  \\text{bias}\\left[f^*(\\mathbf{ x})\\right] =\n",
+    "\\mathbb{E}\\left[f^*(\\mathbf{ x})\\right] - f(\\mathbf{ x})\n",
     "$$ and it summarizes error that arises from the model’s inability to\n",
     "represent the underlying complexity of the data. For example, if we were\n",
     "to model the marathon pace of the winning runner from the Olympics by\n",
@@ -2861,9 +3350,9 @@
     "changing (typically getting faster).\n",
     "\n",
     "The variance term is given by $$\n",
-    "  \\text{variance}\\left[f^*(\\mathbf{ y})\\right] = \\mathbb{E}\\left[\\left(f^*(\\mathbf{ y}) - \\mathbb{E}\\left[f^*(\\mathbf{ y})\\right]\\right)^2\\right].\n",
+    "  \\text{variance}\\left[f^*(\\mathbf{ x})\\right] = \\mathbb{E}\\left[\\left(f^*(\\mathbf{ x}) - \\mathbb{E}\\left[f^*(\\mathbf{ x})\\right]\\right)^2\\right].\n",
     "  $$ The variance term is often described as arising from a model that\n",
-    "is too complex, but we have to be careful with this idea. Is the model\n",
+    "is too complex, but we must be careful with this idea. Is the model\n",
     "really too complex relative to the real world that generates the data?\n",
     "The real world is a complex place, and it is rare that we are\n",
     "constructing mathematical models that are more complex than the world\n",
@@ -2873,18 +3362,26 @@
     "\n",
     "Models that exhibit high variance are sometimes said to ‘overfit’ the\n",
     "data whereas models that exhibit high bias are sometimes described as\n",
-    "‘underfitting’ the data."
-   ]
+    "‘underfitting’ the data.\n",
+    "\n",
+    "[1] Assuming we are ignoring parameters in the link function and the\n",
+    "distribution function."
+   ],
+   "id": "63624ab4-041a-45f7-b946-1e495c8370f0"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bias vs Variance Error Plots\n",
-    "----------------------------\n",
+    "## Bias vs Variance Error Plots\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-plots.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bias-variance-plots.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Helper function for sampling data from two different classes."
-   ]
+   ],
+   "id": "e45d82ef-8887-4e6e-8e9f-f4a71785ca9b"
   },
   {
    "cell_type": "code",
@@ -2893,7 +3390,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "aab7dd96-6e9b-4ec8-a645-ef3c2a58cb79"
   },
   {
    "cell_type": "code",
@@ -2921,14 +3419,16 @@
     "        X.append(np.random.multivariate_normal(mean=mean, cov=neg_cov, size=per_class))\n",
     "        y.append(np.zeros((per_class, 1)))\n",
     "    return np.vstack(X), np.vstack(y).flatten()"
-   ]
+   ],
+   "id": "20c656f1-eb1e-4f7a-9e43-3cdd586ca96b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Helper function for plotting the decision boundary of the SVM."
-   ]
+   ],
+   "id": "f2fb1716-bc03-4b2c-93a6-f53a484ea553"
   },
   {
    "cell_type": "code",
@@ -2956,7 +3456,8 @@
     "                     levels=[Z.min(), 0, Z.max()], \n",
     "                     colors=[[0.5, 1.0, 0.5], [1.0, 0.5, 0.5]])\n",
     "    return out"
-   ]
+   ],
+   "id": "2630022b-fc32-4e03-a44b-2d192cb4c5de"
   },
   {
    "cell_type": "code",
@@ -2965,7 +3466,8 @@
    "outputs": [],
    "source": [
     "import urllib.request"
-   ]
+   ],
+   "id": "9b9ff14a-80ac-44fa-838c-460f461496f6"
   },
   {
    "cell_type": "code",
@@ -2974,7 +3476,8 @@
    "outputs": [],
    "source": [
     "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py','mlai.py')"
-   ]
+   ],
+   "id": "f5b556c7-f847-44b5-a153-c2fbba6f9056"
   },
   {
    "cell_type": "code",
@@ -2984,7 +3487,8 @@
    "source": [
     "import mlai\n",
     "import os"
-   ]
+   ],
+   "id": "7a17f3c0-2d9a-49dd-95f6-4982a185f251"
   },
   {
    "cell_type": "code",
@@ -3024,7 +3528,8 @@
     "                          figure=fig,\n",
     "                          transparent=True)\n",
     "    return xlim, ylim"
-   ]
+   ],
+   "id": "c2c16735-97ba-4b84-a657-44b94f6eb362"
   },
   {
    "cell_type": "code",
@@ -3039,7 +3544,8 @@
     "\n",
     "matplotlib.rc('font', **font)\n",
     "import matplotlib.pyplot as plt"
-   ]
+   ],
+   "id": "0a824e91-4588-481f-ba67-91bf714af76c"
   },
   {
    "cell_type": "code",
@@ -3048,7 +3554,8 @@
    "outputs": [],
    "source": [
     "from sklearn import svm"
-   ]
+   ],
+   "id": "f52940f2-43d0-48bd-b550-ee32b00c7283"
   },
   {
    "cell_type": "code",
@@ -3082,7 +3589,8 @@
     "                           titles=titles,\n",
     "                          xlim=xlim,\n",
     "                          ylim=ylim)"
-   ]
+   ],
+   "id": "8f4c89ab-7d19-4aa8-8c7a-b8bd3d7b24f6"
   },
   {
    "cell_type": "code",
@@ -3090,9 +3598,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "02dce558-5a9f-429d-a251-214b9052391e"
   },
   {
    "cell_type": "code",
@@ -3100,10 +3609,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('bias-variance{samp:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "7e7c8d35-89df-44ba-9f7e-768b1c272838"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('bias-variance{samp:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            samp=IntSlider(0,0,10,1))"
-   ]
+   ],
+   "id": "a7592e70-22f2-4d98-a8ac-274382834dbb"
   },
   {
    "cell_type": "markdown",
@@ -3111,22 +3631,23 @@
    "source": [
     "<!---->\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bias-variance000.png\" style=\"width:80%\"><img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bias-variance010.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance000.png\" style=\"width:80%\"><img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance010.png\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>In each figure the simpler model is on the left, and the more\n",
     "complex model is on the right. Each fit is done to a different version\n",
     "of the data set. The simpler model is more consistent in its errors\n",
     "(bias error), whereas the more complex model is varying in its errors\n",
     "(variance error).</i>"
-   ]
+   ],
+   "id": "d7fca466-c7a1-42fc-8791-190d4036feae"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Overfitting\n",
-    "-----------"
-   ]
+    "## Overfitting"
+   ],
+   "id": "fc2f32a3-5bd4-4ef3-a93a-af313c2f17d4"
   },
   {
    "cell_type": "code",
@@ -3136,7 +3657,8 @@
    "source": [
     "from IPython.lib.display import YouTubeVideo\n",
     "YouTubeVideo('py8QrZPT48s')"
-   ]
+   ],
+   "id": "ac21743e-61f0-49ee-807c-a3982561952f"
   },
   {
    "cell_type": "markdown",
@@ -3180,18 +3702,23 @@
     "checking their test performance more times than was permitted by the\n",
     "challenge rules. This was then reported as “AI’s first doping scandal”.\n",
     "The team lead was fired by Baidu."
-   ]
+   ],
+   "id": "fdebad1f-4149-4ead-a294-665625e7d83f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Olympic Data with Bayesian Polynomials\n",
-    "--------------------------------------\n",
+    "## Olympic Data with Bayesian Polynomials\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-bayesian-polynomial.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-bayesian-polynomial.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Five fold cross validation tests the ability of the model to\n",
     "*interpolate*."
-   ]
+   ],
+   "id": "a3b5846d-ab1a-48f9-8970-c8fce9edbea6"
   },
   {
    "cell_type": "code",
@@ -3201,7 +3728,8 @@
    "source": [
     "import mlai\n",
     "import pods"
-   ]
+   ],
+   "id": "fbc45598-7eb0-49cd-9989-149f81a5c8c3"
   },
   {
    "cell_type": "code",
@@ -3209,11 +3737,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_limits = [1892, 2020]\n",
+    "data_limits = (1888, 2020)\n",
     "basis = mlai.Basis(mlai.polynomial, number=1, data_limits=data_limits)\n",
     "\n",
     "max_basis = y.shape[0]"
-   ]
+   ],
+   "id": "85f83c4c-78e8-4db5-9edd-a6e0aa61a732"
   },
   {
    "cell_type": "code",
@@ -3221,8 +3750,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "1d1dc50f-5ede-4a6a-8d77-e3315bd9cb9b"
   },
   {
    "cell_type": "code",
@@ -3239,7 +3769,8 @@
     "              xlim=data_limits, \n",
     "              objective_ylim=[0.5,1.6]\n",
     "              diagrams='./ml')"
-   ]
+   ],
+   "id": "a02f847c-8ea4-4b3e-bb90-972e50c8052c"
   },
   {
    "cell_type": "code",
@@ -3247,9 +3778,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "4539adcf-d410-44f8-b60f-8975c3cfa6c3"
   },
   {
    "cell_type": "code",
@@ -3257,32 +3789,44 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_BLM_polynomial_number{num_basis:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "a37e9c61-a993-4023-8b94-2287bd4c109a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('olympic_BLM_polynomial_number{num_basis:0>3}.svg', \n",
     "                            directory='./ml/', \n",
     "                            num_basis=IntSlider(1, 1, 27, 1))"
-   ]
+   ],
+   "id": "938f00a0-c3fd-46d7-bdd9-d57a2b666e98"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Bayesian fit with 26th degree polynomial and negative\n",
     "marginal log likelihood.</i>"
-   ]
+   ],
+   "id": "3c6cc03a-18a1-471c-8bad-7509d3d70ee5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Hold Out Validation\n",
-    "-------------------\n",
+    "## Hold Out Validation\n",
     "\n",
     "For the polynomial fit, we will now look at *hold out* validation, where\n",
     "we are holding out some of the most recent points. This tests the abilit\n",
     "of our model to *extrapolate*."
-   ]
+   ],
+   "id": "8e785875-b1af-434a-9a50-36f4d6fcffea"
   },
   {
    "cell_type": "code",
@@ -3290,8 +3834,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "c176a7a2-8359-453c-911e-ad2c26d15cb9"
   },
   {
    "cell_type": "code",
@@ -3308,7 +3853,8 @@
     "              xlim=data_limits, \n",
     "              objective_ylim=[0.1,0.6], \n",
     "              permute=False)"
-   ]
+   ],
+   "id": "97db8789-4bc4-4dc3-a143-3e732632cce6"
   },
   {
    "cell_type": "code",
@@ -3316,9 +3862,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "840390ff-1445-4529-99e5-8afd80452fa7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "7002fad1-540f-4477-92fa-64f6508ba7ba"
   },
   {
    "cell_type": "code",
@@ -3326,31 +3883,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_val_BLM_polynomial_number{num_basis:0>3}.svg', \n",
+    "nu.display_plots('olympic_val_BLM_polynomial_number{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(1, 1, 27, 1))"
-   ]
+   ],
+   "id": "914352a9-7a16-4c9d-ab43-f52d7a83c1f4"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_val_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Bayesian fit with 26th degree polynomial and hold out\n",
     "validation scores.</i>"
-   ]
+   ],
+   "id": "b74070af-8494-4b11-a75a-b49e65f89d00"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "5-fold Cross Validation\n",
-    "-----------------------\n",
+    "## 5-fold Cross Validation\n",
     "\n",
     "Five fold cross validation tests the ability of the model to\n",
     "*interpolate*."
-   ]
+   ],
+   "id": "466bd4dd-92b0-4a08-9f19-895ee1f47de1"
   },
   {
    "cell_type": "code",
@@ -3368,7 +3927,8 @@
     "            xlim=data_limits, \n",
     "            objective_ylim=[0.2,0.6], \n",
     "            num_parts=num_parts)"
-   ]
+   ],
+   "id": "ebcdaa96-a91d-410f-a668-a916ae79e2b2"
   },
   {
    "cell_type": "code",
@@ -3376,9 +3936,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "e6dc3ae3-b5ce-47b8-93df-d5ec525cfd57"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "ee916e8b-2556-4a6d-b0eb-498ec7141185"
   },
   {
    "cell_type": "code",
@@ -3386,30 +3957,52 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('olympic_5cv{part:0>2}_BLM_polynomial_number{num_basis:0>3}.svg', \n",
+    "nu.display_plots('olympic_5cv{part:0>2}_BLM_polynomial_number{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            part=(0, 5), \n",
     "                            num_basis=IntSlider(1, 1, 27, 1))"
-   ]
+   ],
+   "id": "337b3cb4-6ed4-4ae3-a506-f6ef3f40ec00"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number026.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Bayesian fit with 26th degree polynomial and five fold cross\n",
     "validation scores.</i>\n",
     "\n",
     "<!-- Leave unsupervised and reinforcement learning in the notes -->"
-   ]
+   ],
+   "id": "9eef1049-191c-427b-868c-3fc49763780b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Unsupervised Learning\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/unsupervised-learning.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/unsupervised-learning.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "db8deb78-cb13-415b-b8d0-0cfaaf1acc6c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Unsupervised Learning\n",
-    "=====================\n",
+    "## Unsupervised Learning\n",
+    "\n",
+    "Supervised learning is when your data is provided with labels. Now we\n",
+    "are going to turn to a different form of learning, commonly known as\n",
+    "*unsupervised* learning. In unsupervised learning our data isn’t\n",
+    "necessarily labelled in any form, but we want models that give us a\n",
+    "better understanding of the data. We’ve actually seen an example of this\n",
+    "already with , which we introduces in the context of *objective\n",
+    "functions*. Now we will introduce a more probabilistic approach to such\n",
+    "models, specifically we are interested in *latent variable* modelling.\n",
     "\n",
     "In unsupervised learning you have data, $\\mathbf{ x}$, but no labels\n",
     "$y$. The aim in unsupervised learning is to extract structure from data.\n",
@@ -3418,14 +4011,14 @@
     "driven by the labels. Supervised learning algorithms try and focus on\n",
     "the aspects of the data which are relevant to predicting the labels. But\n",
     "in unsupervised learning there are no labels."
-   ]
+   ],
+   "id": "96f942db-bfc0-4f79-8346-910cf2fcbc7c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Context\n",
-    "-------\n",
+    "## Context\n",
     "\n",
     "Humans can easily sort a number of objects into objects that share\n",
     "similar characteristics. We easily categorize animals or vehicles. But\n",
@@ -3438,14 +4031,14 @@
     "algorithm that can go through its entire list of products and\n",
     "automatically sort them into groups such that similar products are\n",
     "located together."
-   ]
+   ],
+   "id": "018faf47-8e39-415b-b0de-b283afb1a018"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Discrete vs Continuous\n",
-    "----------------------\n",
+    "## Discrete vs Continuous\n",
     "\n",
     "Supervised learning is broadly divided into classification: i.e. wake\n",
     "word classification in the Amazon Echo, and regression, e.g. shelf life\n",
@@ -3453,15 +4046,239 @@
     "broadly split into methods that cluster the data (i.e. provide a\n",
     "discrete label) and methods that represent the data as a continuous\n",
     "value."
-   ]
+   ],
+   "id": "2bc98508-b769-42c3-a84c-deab4daf067c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Clustering\n",
-    "----------\n",
+    "## Clustering\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/clustering.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/clustering.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "-   One common approach, not deeply covered in this course.\n",
+    "\n",
+    "-   Associate each data point, $\\mathbf{ y}_{i, :}$ with one of $k$\n",
+    "    different discrete groups.\n",
+    "\n",
+    "-   For example:\n",
+    "\n",
+    "    -   Clustering animals into discrete groups. Are animals discrete or\n",
+    "        continuous?\n",
+    "    -   Clustering into different different *political* affiliations.\n",
+    "\n",
+    "-   Humans do seem to like clusters:\n",
+    "\n",
+    "    -   Very useful when interacting with biologists.\n",
+    "\n",
+    "-   Subtle difference between clustering and *vector quantisation*\n",
+    "\n",
+    "-   Little anecdote.\n",
+    "\n",
+    "-   To my mind difference is in clustering there should be a reduction\n",
+    "    in data density between samples.\n",
+    "\n",
+    "-   This definition is not universally applied.\n",
+    "\n",
+    "-   For today’s purposes we merge them:\n",
+    "\n",
+    "    -   Determine how to allocate each point to a group and *harder*\n",
+    "        total number of groups.\n",
+    "\n",
+    "-   Simple algorithm for allocating points to groups.\n",
+    "\n",
+    "-   *Require*: Set of $k$ cluster centres & assignment of each points to\n",
+    "    a cluster.\n",
+    "\n",
+    "1.  Initialize cluster centres as randomly selected data points.\n",
+    "    1.  Assign each data point to *nearest* cluster centre.\n",
+    "    2.  Update each cluster centre by setting it to the mean of assigned\n",
+    "        data points.\n",
+    "    3.  Repeat 2 and 3 until cluster allocations do not change.\n",
+    "\n",
+    "-   This minimizes the objective $$\n",
+    "    E=\\sum_{j=1}^K \\sum_{i\\ \\text{allocated to}\\ j}  \\left(\\mathbf{ y}_{i, :} - \\boldsymbol{ \\mu}_{j, :}\\right)^\\top\\left(\\mathbf{ y}_{i, :} - \\boldsymbol{ \\mu}_{j, :}\\right)\n",
+    "    $$ *i.e.* it minimizes thesum of Euclidean squared distances betwen\n",
+    "    points and their associated centres.\n",
+    "-   The minimum is *not* guaranteed to be *global* or *unique*.\n",
+    "-   This objective is a non-convex optimization problem."
+   ],
+   "id": "bdc56e87-d8f2-484e-8907-ee491e88fed1"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai\n",
+    "import numpy as np"
+   ],
+   "id": "0aac7a9d-fac4-47af-b653-e44903a865ad"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def write_plot(counter, caption):\n",
+    "    directory = \"./ml\"\n",
+    "    filestub = f\"kmeans_clustering_{counter:0>3}\"\n",
+    "    mlai.write_figure(filestub+\".svg\", directory=directory)\n",
+    "    f = open(os.path.join(directory,filestub) + '.md', 'w')\n",
+    "    f.write(caption)\n",
+    "    f.close()"
+   ],
+   "id": "2822a82e-2a6f-4f11-b9ce-5239214694a1"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=(5,5))\n",
+    "fontsize = 20\n",
+    "\n",
+    "num_clust_points = 30\n",
+    "\n",
+    "Y = np.vstack([np.random.normal(size=(num_clust_points, 2)) + 2.5,\n",
+    "       np.random.normal(size=(num_clust_points, 2)) - 2.5,\n",
+    "       np.random.normal(size=(num_clust_points, 2)) + np.array([2.5, -2.5])])\n",
+    "\n",
+    "centre_inds = np.random.permutation(Y.shape[0])[:3]\n",
+    "centres = Y[centre_inds, :]\n",
+    "\n",
+    "ax.cla()\n",
+    "\n",
+    "ax.plot(Y[:, 0], Y[:, 1], '.', color=[0, 0, 0], markersize=10)\n",
+    "ax.set_xlabel('$y_1$')\n",
+    "ax.set_ylabel('$y_2$')\n",
+    "ax.set_title('Data')\n",
+    "counter=0\n",
+    "write_plot(counter, 'Data set to be analyzed. Initialize cluster centres.')\n",
+    "ax.plot(centres[:, 0], centres[:, 1], 'o', color=[0,0,0], linewidth=3, markersize=12)    \n",
+    "counter+=1\n",
+    "write_plot(counter, 'Allocate each point to the cluster with the nearest centre')\n",
+    "i = 0\n",
+    "\n",
+    "for i in range(6):\n",
+    "    dist_mat = ((Y[:, :, None] - centres.T[None, :, :])**2).sum(1)\n",
+    "    ind = dist_mat.argmin(1)\n",
+    "    ax.cla()\n",
+    "    ax.plot(Y[ind==0, 0], Y[ind==0, 1], 'x', color= [1, 0, 0], markersize=10)\n",
+    "    ax.plot(Y[ind==1, 0], Y[ind==1, 1], 'o', color=[0, 1, 0], markersize=10)\n",
+    "    ax.plot(Y[ind==2, 0], Y[ind==2, 1], '+', color=[0, 0, 1], markersize=10)\n",
+    "    c = ax.plot(centres[:, 0], centres[:, 1], 'o', color=[0,0, 0], markersize=12, linewidth=3)\n",
+    "    ax.set_xlabel('$y_1$',fontsize=fontsize)\n",
+    "    ax.set_ylabel('$y_2$',fontsize=fontsize)\n",
+    "    ax.set_title('Iteration ' + str(i))\n",
+    "    counter+=1\n",
+    "    write_plot(counter, 'Update each centre by setting to the mean of the allocated points.')\n",
+    "    for j in range(centres.shape[0]):\n",
+    "          centres[j, :] = np.mean(Y[ind==j, :], 0)\n",
+    "    c[0].set_data(centres[:, 0], centres[:, 1])\n",
+    "    counter+=1\n",
+    "    mlai.write_figure(f\"kmeans_clustering_{counter:0>3}.svg\", directory=\"./ml\")\n",
+    "    write_plot(counter, 'Allocate each data point to the nearest cluster centre.')"
+   ],
+   "id": "4132c077-4cab-4f9e-9a30-0749f6a55d20"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "c079f4a5-2742-4dfd-9baf-1f6f377ba5bb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots(\"kmeans_clustering_{counter:0>3}.svg\", directory=\"./ml\", \n",
+    "                            text_top='kmeans_clustering_{counter:0>3}.tex', counter=(0, 13))"
+   ],
+   "id": "23ad9cab-4417-47f1-a930-7331f526cf3c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import mlai"
+   ],
+   "id": "7c927c7e-af89-4cd0-b3b5-cfa64dd24401"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=(5,5))\n",
+    "\n",
+    "num_centres = 20\n",
+    "num_data = 200\n",
+    "centres = np.random.normal(size=(num_centres, 2))\n",
+    "w = np.random.normal(size=(num_centres, 2))*0.1\n",
+    "alloc = np.random.randint(0, num_centres, size=(num_data))\n",
+    "sigma = np.random.normal(size=(num_centres, 1))*0.05\n",
+    "epsilon = np.random.normal(size=(num_data,2))*sigma[alloc, :]\n",
     "\n",
+    "Y = w[alloc, :]*np.random.normal(size=(num_data, 1)) + centres[alloc, :] + epsilon\n",
+    "\n",
+    "ax.plot(Y[:, 0], Y[:, 1], 'rx')\n",
+    "ax.set_xlabel('$y_1$', fontsize=20)\n",
+    "ax.set_ylabel('$y_2$', fontsize=20)\n",
+    "\n",
+    "mlai.write_figure(\"cluster_data00.svg\", directory=\"./ml/\")\n",
+    "pi_vals = np.linspace(-np.pi, np.pi, 200)[:, None]\n",
+    "for i in range(num_centres):\n",
+    "    ax.plot(centres[i, 0], centres[i, 1], 'o', markersize=5, color=[0, 0, 0], linewidth=2)\n",
+    "    x = np.hstack([np.sin(pi_vals), np.cos(pi_vals)])\n",
+    "    L = np.linalg.cholesky(np.outer(w[i, :],w[i, :]) + sigma[i]**2*np.eye(2))\n",
+    "    el = np.dot(x, L.T)\n",
+    "    ax.plot(centres[i, 0] + el[:, 0], centres[i, 1] + el[:, 1], linewidth=2, color=[0,0,0])\n",
+    "mlai.write_figure(\"cluster_data01.svg\", directory=\"./ml/\")"
+   ],
+   "id": "8c36cea7-29fc-4ff4-b251-5834110152de"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "37ecf7ee-72b1-4c87-92b5-1c90ea68d7ab"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('cluster_data{counter:0>2}.svg', directory='./ml', counter=(0, 1))"
+   ],
+   "id": "a0ebdb56-207f-47ca-9a3f-99be61be6852"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "Clustering methods associate each data point with a different label.\n",
     "Unlike in classification the label is not provided by a human annotator.\n",
     "It is allocated by the computer. Clustering is quite intuitive for\n",
@@ -3505,7 +4322,8 @@
     "by the products they’ve purchased in the past. This could be a binary\n",
     "vector $\\mathbf{ x}_i$. We can then define a distance between the\n",
     "cluster center and the customer."
-   ]
+   ],
+   "id": "2d7fa02d-8ddd-4059-b67c-af33e8ad6ae7"
   },
   {
    "cell_type": "markdown",
@@ -3528,7 +4346,8 @@
     "$$ where the notation $\\mathbf{i}_j$ represents all the indices of each\n",
     "data point which has been allocated to the $j$th cluster represented by\n",
     "the center $\\boldsymbol{ \\mu}_j$."
-   ]
+   ],
+   "id": "42091476-bd3c-48a2-8efa-2e5e22379680"
   },
   {
    "cell_type": "markdown",
@@ -3545,7 +4364,8 @@
     "the initial choice of centers. For more technical details on $k$-means\n",
     "clustering you can watch a video of Alex Ihler introducing the algorithm\n",
     "here."
-   ]
+   ],
+   "id": "8b9ee964-6c94-4f80-866f-cbf277d97586"
   },
   {
    "cell_type": "markdown",
@@ -3553,10 +4373,11 @@
    "source": [
     "### $k$-Means Clustering\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/kmeans-clustering/kmeans_clustering_013.svg\" class=\"\" width=\"\\width\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/kmeans_clustering_013.svg\" class=\"\" width=\"\\width\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Clustering with the $k$-means clustering algorithm.</i>"
-   ]
+   ],
+   "id": "6abfb53a-eb34-4a82-ae3c-1d1d9a3e20d1"
   },
   {
    "cell_type": "code",
@@ -3566,14 +4387,16 @@
    "source": [
     "from IPython.lib.display import YouTubeVideo\n",
     "YouTubeVideo('mfqmoUN-Cuw')"
-   ]
+   ],
+   "id": "1578ca65-db0e-4c3c-a862-e08c27c93d5a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Figure: <i>$k$-means clustering by Alex Ihler.</i>"
-   ]
+   ],
+   "id": "d4e1962a-a756-4c57-a949-948f3dfddb90"
   },
   {
    "cell_type": "markdown",
@@ -3584,7 +4407,8 @@
     "Other approaches to clustering involve forming taxonomies of the cluster\n",
     "centers, like humans apply to animals, to form trees. You can learn more\n",
     "about agglomerative clustering in this video from Alex Ihler."
-   ]
+   ],
+   "id": "df900dac-3b70-42db-baa6-b0bff1228aa9"
   },
   {
    "cell_type": "code",
@@ -3594,14 +4418,16 @@
    "source": [
     "from IPython.lib.display import YouTubeVideo\n",
     "YouTubeVideo('OcoE7JlbXvY')"
-   ]
+   ],
+   "id": "17793767-587b-4a98-bb19-5e28a4d6451d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Figure: <i>Hierarchical Clustering by Alex Ihler.</i>"
-   ]
+   ],
+   "id": "e24a72d7-f362-4fa1-b8a6-f5a7de1d5a50"
   },
   {
    "cell_type": "markdown",
@@ -3624,7 +4450,8 @@
     "center that is then allowed to evolve over time through a mutation rate.\n",
     "The time of separation between different species is estimated via these\n",
     "mutation rates."
-   ]
+   ],
+   "id": "d0d96411-64bf-4f86-b4a1-a623bf0a29b2"
   },
   {
    "cell_type": "markdown",
@@ -3641,7 +4468,8 @@
     "for example running shoes should be in more than one group, they are\n",
     "‘sporting goods’ and they are ‘apparel’. A tree structure doesn’t allow\n",
     "this allocation."
-   ]
+   ],
+   "id": "a71b99e6-73b6-408d-85dc-c3fcd17e8d88"
   },
   {
    "cell_type": "markdown",
@@ -3653,20 +4481,41 @@
     "cognitive science. Researchers like Josh Tenenbaum have developed\n",
     "algorithms that decompose data in more complex ways, but they can\n",
     "normally only be applied to smaller data sets."
-   ]
+   ],
+   "id": "7b2f2e9f-ac0b-407b-8414-2f0f29d528ad"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Other Clustering Approaches\n",
+    "\n",
+    "-   Spectral clustering (Shi and Malik (2000),Ng et al. (n.d.))\n",
+    "    -   Allows clusters which aren’t convex hulls.\n",
+    "-   Dirichlet process\n",
+    "    -   A probabilistic formulation for a clustering algorithm that is\n",
+    "        *non-parametric*.\n",
+    "    -   Loosely speaking it allows infinite clusters\n",
+    "    -   In practice useful for dealing with previously unknown species\n",
+    "        (e.g. a “Black Swan Event”)."
+   ],
+   "id": "504c9e5f-a33b-427b-9415-a5ba8316b2fc"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Dimensionality Reduction\n",
-    "------------------------\n",
+    "## Dimensionality Reduction\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_dimred/includes/dimensionality-reduction-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_dimred/includes/dimensionality-reduction-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Dimensionality reduction methods compress the data by replacing the\n",
     "original data with a reduced number of continuous variables. One way of\n",
     "thinking of these methods is to imagine a marionette.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/marionette.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/marionette.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Thinking of dimensionality reduction as a marionette. We\n",
     "observe the high dimensional pose of the puppet, $\\mathbf{ x}$, but the\n",
@@ -3694,7 +4543,8 @@
     "It assumes that the data we observe is generated from some lower\n",
     "dimensional underlying process. It then seeks to recover the values\n",
     "associated with this low dimensional process."
-   ]
+   ],
+   "id": "77d9b15b-809a-45bd-ab55-b28d09832c5b"
   },
   {
    "cell_type": "markdown",
@@ -3733,8 +4583,9 @@
     "[word2vec](https://arxiv.org/abs/1301.3781) algorithm performed a\n",
     "dimensionality reduction on words, now you can take any word and map it\n",
     "to a latent space where similar words exhibit similar characteristics. A\n",
-    "personality space for words."
-   ]
+    "‘personality space’ for words."
+   ],
+   "id": "798ac42c-aa78-46d2-9a79-c4536edf48ed"
   },
   {
    "cell_type": "markdown",
@@ -3761,7 +4612,7 @@
     "all subjects, but the subject’s IQ is assumed to differ leading to\n",
     "different scores for each subject.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/demManifoldPrint_all_1_2.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/demManifoldPrint_all_1_2.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Visualization of the first two principal components of an\n",
     "artificial data set. The data was generated by taking an image of a\n",
@@ -3770,7 +4621,8 @@
     "The underlying circular shape is derived from the rotation of the data.\n",
     "Each image in the data set is projected on to the location its projected\n",
     "to in the latent space.</i>"
-   ]
+   ],
+   "id": "f0d7bbf8-9680-4dc0-a20f-4166f54ffa89"
   },
   {
    "cell_type": "markdown",
@@ -3795,7 +4647,8 @@
     "preference for outdoor job). In factor analysis the parameters\n",
     "$\\mathbf{W}$ are known as the factor *loadings* and in PCA they are\n",
     "known as the principal components."
-   ]
+   ],
+   "id": "9b97ba7e-4911-4b06-acfe-9ebb6aeb12a1"
   },
   {
    "cell_type": "markdown",
@@ -3871,14 +4724,18 @@
     "matrix (which would be impossible to compute) embeds similarities\n",
     "between pages according to how far apart they are via a random walk\n",
     "along the linkage matrix."
-   ]
+   ],
+   "id": "68c2731d-87e8-400d-9749-fbe856707163"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Reinforcement Learning\n",
-    "======================\n",
+    "# Reinforcement Learning\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/reinforcement-learning.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/reinforcement-learning.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The final domain of learning we will review is known as reinforcement\n",
     "learning. The domain of reinforcement learning is one that many\n",
@@ -3894,14 +4751,14 @@
     "but the reward is normally delayed. There may have been many actions\n",
     "that affected the outcome, but which actions had a positive effect and\n",
     "which a negative effect?"
-   ]
+   ],
+   "id": "4c9e9b24-b47d-4d81-acd7-76364eca301a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "“Reward”\n",
-    "--------\n",
+    "## “Reward”\n",
     "\n",
     "-   In reinforcement learning some context is given, in the form of a\n",
     "    reward. But it is often *delayed*\n",
@@ -3926,14 +4783,14 @@
     "agent’s ability to interact with the user and understand intent.\n",
     "However, they are not yet mature enough to be deployed in this\n",
     "application."
-   ]
+   ],
+   "id": "220dc80e-1910-4286-94a2-15d8d045110e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Game Play\n",
-    "---------\n",
+    "## Game Play\n",
     "\n",
     "An area where reinforcement learning methods have been deployed with\n",
     "high profile success is game play. In game play the reward is delayed to\n",
@@ -3976,14 +4833,14 @@
     "as the use of fast compute to generate and process very large quantities\n",
     "of data. In its standard form it is not seen as a very data-efficient\n",
     "approach."
-   ]
+   ],
+   "id": "9f5a6d8d-d4a7-47bb-b64a-d3decc2d69a5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "AlphaGo\n",
-    "-------\n",
+    "## AlphaGo\n",
     "\n",
     "The ancient Chinese game of Go was considered a challenge for artificial\n",
     "intelligence for two reasons. Firstly, the game tree has a very high\n",
@@ -4025,14 +4882,14 @@
     "as the use of fast compute to generate and process very large quantities\n",
     "of data. In its standard form it is not seen as a very data-efficient\n",
     "approach."
-   ]
+   ],
+   "id": "590435e3-3e62-49b5-b89e-42d7e1353cc0"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Reinforcement Learning and Classical Control\n",
-    "--------------------------------------------\n",
+    "## Reinforcement Learning and Classical Control\n",
     "\n",
     "An alternative approach to reinforcement learning is to use a prediction\n",
     "function to suggest how the world will evolve in response to your\n",
@@ -4058,14 +4915,14 @@
     "of control and reinforcement learning. Results at this interface could\n",
     "be very important for improving the quality of robotic and drone\n",
     "control."
-   ]
+   ],
+   "id": "dcc993f9-d31a-4ff2-83be-09eab6b78903"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Optimization Methods\n",
-    "--------------------\n",
+    "## Optimization Methods\n",
     "\n",
     "As we implied above, reinforcement learning can also used to improve\n",
     "user experience. In that case the reward is gained when the user buys a\n",
@@ -4089,28 +4946,28 @@
     "mathematical function. For example, what is the mathematical function\n",
     "that relates a user’s experience to the probability that they will buy a\n",
     "product?"
-   ]
+   ],
+   "id": "e7bc7c5e-49ab-4661-94a3-ed848876da83"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bayesian Optimization\n",
-    "---------------------\n",
+    "## Bayesian Optimization\n",
     "\n",
     "One approach to these problems is to use machine learning methods to\n",
     "develop a *surrogate model* for the optimization task. The surrogate\n",
     "model is a prediction function that attempts to recreate the process we\n",
     "are finding hard to model. We try to simultaneously fit the surrogate\n",
     "model and optimize the process."
-   ]
+   ],
+   "id": "0a1d305c-0d47-4681-a7c3-191f49a475dd"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Surrogate Models\n",
-    "----------------\n",
+    "## Surrogate Models\n",
     "\n",
     "Bayesian optimization methods use a *surrogate model* (normally a\n",
     "specific form of regression model). They use this to predict how the\n",
@@ -4122,14 +4979,14 @@
     "model of the real world. In bandit methods strategies are determined\n",
     "without turning to a model to motivate them. They are *model free*\n",
     "methods."
-   ]
+   ],
+   "id": "b862bcf6-dff0-4bec-b76b-f7e6ebab5b8f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Model-Based and Model Free: Performance\n",
-    "---------------------------------------\n",
+    "## Model-Based and Model Free: Performance\n",
     "\n",
     "Because of their different philosophies, if a class of prediction\n",
     "functions is chosen, then a model-based approach might have better\n",
@@ -4148,14 +5005,18 @@
     "rules. The important characteristic of machine learning is that the form\n",
     "of these functions, as dictated by their parameters, is determined by\n",
     "acquiring data from the real world."
-   ]
+   ],
+   "id": "13469678-c455-4668-a1c7-73a43fe3295f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Deployment\n",
-    "----------\n",
+    "## Deployment\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/deployment.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/deployment.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The methods we have introduced are roughly speaking introduced in order\n",
     "of difficulty of deployment. While supervised learning is more involved\n",
@@ -4174,14 +5035,14 @@
     "organizations I’ve been proposing “Data Readiness Levels”. More needs to\n",
     "be done in this area to improve the efficiency of the data science\n",
     "pipeline."
-   ]
+   ],
+   "id": "72d3285a-3263-47e3-b5db-3ab7083b66a1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Where to Deploy?\n",
-    "----------------\n",
+    "## Where to Deploy?\n",
     "\n",
     "In relation to what AI can and can’t do today Andrew Ng is quoted as\n",
     "saying:\n",
@@ -4193,14 +5054,14 @@
     "[1] The quote can be found in the Harvard Business Review Article [“What\n",
     "Artificial Intelligence Can and Can’t Do Right\n",
     "Now”](https://hbr.org/2016/11/what-artificial-intelligence-can-and-cant-do-right-now)."
-   ]
+   ],
+   "id": "2b4e6e9d-2732-41f6-9cf4-1e08dde93143"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Is this Right?\n",
-    "--------------\n",
+    "## Is this Right?\n",
     "\n",
     "I would broadly agree with this quote but only in the context of\n",
     "supervised learning. If a human expert takes around that amount of time,\n",
@@ -4261,14 +5122,14 @@
     "[1] This trend was very clear at the moment, [I spoke about\n",
     "it](%7B%7Bsite.baseurl%20%7D%7D/) at a recent Dagstuhl workshop on new\n",
     "directions for kernel methods and Gaussian processes."
-   ]
+   ],
+   "id": "49e5091c-c50d-495b-a49c-3e4466abf473"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Model Choice\n",
-    "------------\n",
+    "## Model Choice\n",
     "\n",
     "Common to all machine learning methods is the initial choice of useful\n",
     "classes of functions. The deep learning revolution is associated with a\n",
@@ -4282,14 +5143,14 @@
     "<!-- such as avoiding a crash, to deliberately ram into another vehicle -->\n",
     "<!-- To deliver complex solutions, like self driving cars, many sub-components from a  -->\n",
     "<!-- Domain expertise becomWith regard to deIn particular, we are moving beyond the era where there is a short -->"
-   ]
+   ],
+   "id": "bc6d953c-dcb4-42c2-8f9a-948cfab2834e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Thanks!\n",
-    "-------\n",
+    "## Thanks!\n",
     "\n",
     "For more information on these subjects and more you might want to check\n",
     "the following resources.\n",
@@ -4300,15 +5161,16 @@
     "    Page](http://www.theguardian.com/profile/neil-lawrence)\n",
     "-   blog:\n",
     "    [http://inverseprobability.com](http://inverseprobability.com/blog.html)"
-   ]
+   ],
+   "id": "41041846-5d71-473e-9623-3b38e14b49a7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "References\n",
-    "----------"
-   ]
+    "## References"
+   ],
+   "id": "7d55b180-d558-4509-99a9-0253d9c02257"
   },
   {
    "cell_type": "markdown",
@@ -4319,11 +5181,17 @@
     "territory delimitation. Malaria Journal 13.\n",
     "<https://doi.org/10.1186/1475-2875-13-S1-P5>\n",
     "\n",
+    "Breiman, L., 2001. Random forests. Mach. Learn. 45, 5–32.\n",
+    "<https://doi.org/10.1023/A:1010933404324>\n",
+    "\n",
     "Cooper, B., 1991. Transformation of a valley: Derbyshire derwent.\n",
     "Scarthin Books.\n",
     "\n",
-    "Gelman, A., Carlin, J.B., Stern, H.S., Rubin, D.B., 2013. Bayesian data\n",
-    "analysis, 3rd ed. Chapman; Hall.\n",
+    "Efron, B., 1979. Bootstrap methods: Another look at the jackkife. Annals\n",
+    "of Statistics 7, 1–26.\n",
+    "\n",
+    "Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., Rubin,\n",
+    "D.B., 2013. Bayesian data analysis, 3rd ed. Chapman; Hall.\n",
     "\n",
     "Gething, P.W., Noor, A.M., Gikandi, P.W., Ogara, E.A.A., Hay, S.I.,\n",
     "Nixon, M.S., Snow, R.W., Atkinson, P.M., 2006. Improving imperfect data\n",
@@ -4331,25 +5199,40 @@
     "geostatistics. PLoS Medicine 3.\n",
     "<https://doi.org/10.1371/journal.pmed.0030271>\n",
     "\n",
-    "Lawrence, N.D., 2015. How Africa can benefit from the data revolution.\n",
+    "Lawrence, N.D., 2015. [How Africa can benefit from the data\n",
+    "revolution](https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information).\n",
+    "\n",
+    "McCullagh, P., Nelder, J.A., 1989. Generalized linear models, 2nd ed.\n",
+    "Chapman; Hall.\n",
     "\n",
     "McCulloch, W.S., Pitts, W., 1943. A logical calculus of the ideas\n",
     "immanent in nervous activity. Bulletin of Mathematical Biophysics 5,\n",
-    "115–133.\n",
+    "115–133. <https://doi.org/10.1007/BF02478259>\n",
     "\n",
     "Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence,\n",
     "N.D., 2014. Malaria surveillance with multiple data sources using\n",
     "Gaussian process models, in: 1st International Conference on the Use of\n",
     "Mobile ICT in Africa.\n",
     "\n",
+    "Ng, A.Y., Jordan, M.I., Weiss, Y., n.d. On spectral clustering: Analysis\n",
+    "and an algorithm.\n",
+    "\n",
     "Robbins, H., Monro, S., 1951. A stochastic approximation method. Annals\n",
     "of Mathematical Statistics 22, 400–407.\n",
     "\n",
+    "Shi, J., Malik, J., 2000. Normalized cuts and image segmentation. IEEE\n",
+    "Transactions on Pattern Analysis and Machine Intelligence 22, 888–905.\n",
+    "\n",
     "Taigman, Y., Yang, M., Ranzato, M., Wolf, L., 2014. DeepFace: Closing\n",
     "the gap to human-level performance in face verification, in: Proceedings\n",
     "of the IEEE Computer Society Conference on Computer Vision and Pattern\n",
-    "Recognition. <https://doi.org/10.1109/CVPR.2014.220>"
-   ]
+    "Recognition. <https://doi.org/10.1109/CVPR.2014.220>\n",
+    "\n",
+    "The Office of the Senior Special Assistant to the President on the\n",
+    "Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.\n",
+    "Nigeria NMIS facility database."
+   ],
+   "id": "86df9150-8e0e-45ea-bc39-d874932f1b10"
   }
  ],
  "nbformat": 4,
diff --git a/_notebooks/02-ml-systems.ipynb b/_notebooks/02-ml-systems.ipynb
index 852a618..3ef769e 100644
--- a/_notebooks/02-ml-systems.ipynb
+++ b/_notebooks/02-ml-systems.ipynb
@@ -2,11 +2,9 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "38ac04d8",
    "metadata": {},
    "source": [
-    "Introduction to Machine Learning Systems\n",
-    "========================================\n",
+    "# Introduction to Machine Learning Systems\n",
     "\n",
     "### [Eric Meissner](https://www.linkedin.com/in/meissnereric/)\n",
     "\n",
@@ -15,11 +13,11 @@
     "### [Neil D. Lawrence](http://inverseprobability.com)\n",
     "\n",
     "### 2020-07-24"
-   ]
+   ],
+   "id": "a54aaab6-a9a8-4bd0-b81e-4ba898a5d5ec"
   },
   {
    "cell_type": "markdown",
-   "id": "a8abc322",
    "metadata": {},
    "source": [
     "**Abstract**: This notebook introduces some of the challenges of\n",
@@ -30,311 +28,24 @@
     "concepts, not to authoritatively answer any questions about the state of\n",
     "Nigerian health facilities or Covid19, but it may give you ideas about\n",
     "how to try and do that in your own country."
-   ]
+   ],
+   "id": "48accf03-6f37-4508-a971-c9f0f8fbd53d"
   },
   {
    "cell_type": "markdown",
-   "id": "53590e4b",
    "metadata": {},
    "source": [
     "$$\n",
-    "\\newcommand{\\tk}[1]{}\n",
-    "\\newcommand{\\Amatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\KL}[2]{\\text{KL}\\left( #1\\,\\|\\,#2 \\right)}\n",
-    "\\newcommand{\\Kaast}{\\kernelMatrix_{\\mathbf{ \\ast}\\mathbf{ \\ast}}}\n",
-    "\\newcommand{\\Kastu}{\\kernelMatrix_{\\mathbf{ \\ast} \\inducingVector}}\n",
-    "\\newcommand{\\Kff}{\\kernelMatrix_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kfu}{\\kernelMatrix_{\\mappingFunctionVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuast}{\\kernelMatrix_{\\inducingVector \\bf\\ast}}\n",
-    "\\newcommand{\\Kuf}{\\kernelMatrix_{\\inducingVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kuu}{\\kernelMatrix_{\\inducingVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuui}{\\Kuu^{-1}}\n",
-    "\\newcommand{\\Qaast}{\\mathbf{Q}_{\\bf \\ast \\ast}}\n",
-    "\\newcommand{\\Qastf}{\\mathbf{Q}_{\\ast \\mappingFunction}}\n",
-    "\\newcommand{\\Qfast}{\\mathbf{Q}_{\\mappingFunctionVector \\bf \\ast}}\n",
-    "\\newcommand{\\Qff}{\\mathbf{Q}_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\aMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\aScalar}{a}\n",
-    "\\newcommand{\\aVector}{\\mathbf{a}}\n",
-    "\\newcommand{\\acceleration}{a}\n",
-    "\\newcommand{\\bMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\bScalar}{b}\n",
-    "\\newcommand{\\bVector}{\\mathbf{b}}\n",
-    "\\newcommand{\\basisFunc}{\\phi}\n",
-    "\\newcommand{\\basisFuncVector}{\\boldsymbol{ \\basisFunc}}\n",
-    "\\newcommand{\\basisFunction}{\\phi}\n",
-    "\\newcommand{\\basisLocation}{\\mu}\n",
-    "\\newcommand{\\basisMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\basisScalar}{\\basisFunction}\n",
-    "\\newcommand{\\basisVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\activationFunction}{\\phi}\n",
-    "\\newcommand{\\activationMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\activationScalar}{\\basisFunction}\n",
-    "\\newcommand{\\activationVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\bigO}{\\mathcal{O}}\n",
-    "\\newcommand{\\binomProb}{\\pi}\n",
-    "\\newcommand{\\cMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\cbasisMatrix}{\\hat{\\boldsymbol{ \\Phi}}}\n",
-    "\\newcommand{\\cdataMatrix}{\\hat{\\dataMatrix}}\n",
-    "\\newcommand{\\cdataScalar}{\\hat{\\dataScalar}}\n",
-    "\\newcommand{\\cdataVector}{\\hat{\\dataVector}}\n",
-    "\\newcommand{\\centeredKernelMatrix}{\\mathbf{ \\MakeUppercase{\\centeredKernelScalar}}}\n",
-    "\\newcommand{\\centeredKernelScalar}{b}\n",
-    "\\newcommand{\\centeredKernelVector}{\\centeredKernelScalar}\n",
-    "\\newcommand{\\centeringMatrix}{\\mathbf{H}}\n",
-    "\\newcommand{\\chiSquaredDist}[2]{\\chi_{#1}^{2}\\left(#2\\right)}\n",
-    "\\newcommand{\\chiSquaredSamp}[1]{\\chi_{#1}^{2}}\n",
-    "\\newcommand{\\conditionalCovariance}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\coregionalizationMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\coregionalizationScalar}{b}\n",
-    "\\newcommand{\\coregionalizationVector}{\\mathbf{ \\coregionalizationScalar}}\n",
-    "\\newcommand{\\covDist}[2]{\\text{cov}_{#2}\\left(#1\\right)}\n",
-    "\\newcommand{\\covSamp}[1]{\\text{cov}\\left(#1\\right)}\n",
-    "\\newcommand{\\covarianceScalar}{c}\n",
-    "\\newcommand{\\covarianceVector}{\\mathbf{ \\covarianceScalar}}\n",
-    "\\newcommand{\\covarianceMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\covarianceMatrixTwo}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\croupierScalar}{s}\n",
-    "\\newcommand{\\croupierVector}{\\mathbf{ \\croupierScalar}}\n",
-    "\\newcommand{\\croupierMatrix}{\\mathbf{ \\MakeUppercase{\\croupierScalar}}}\n",
-    "\\newcommand{\\dataDim}{p}\n",
-    "\\newcommand{\\dataIndex}{i}\n",
-    "\\newcommand{\\dataIndexTwo}{j}\n",
-    "\\newcommand{\\dataMatrix}{\\mathbf{Y}}\n",
-    "\\newcommand{\\dataScalar}{y}\n",
-    "\\newcommand{\\dataSet}{\\mathcal{D}}\n",
-    "\\newcommand{\\dataStd}{\\sigma}\n",
-    "\\newcommand{\\dataVector}{\\mathbf{ \\dataScalar}}\n",
-    "\\newcommand{\\decayRate}{d}\n",
-    "\\newcommand{\\degreeMatrix}{\\mathbf{ \\MakeUppercase{\\degreeScalar}}}\n",
-    "\\newcommand{\\degreeScalar}{d}\n",
-    "\\newcommand{\\degreeVector}{\\mathbf{ \\degreeScalar}}\n",
-    "\\newcommand{\\diag}[1]{\\text{diag}\\left(#1\\right)}\n",
-    "\\newcommand{\\diagonalMatrix}{\\mathbf{D}}\n",
-    "\\newcommand{\\diff}[2]{\\frac{\\text{d}#1}{\\text{d}#2}}\n",
-    "\\newcommand{\\diffTwo}[2]{\\frac{\\text{d}^2#1}{\\text{d}#2^2}}\n",
-    "\\newcommand{\\displacement}{x}\n",
-    "\\newcommand{\\displacementVector}{\\textbf{\\displacement}}\n",
-    "\\newcommand{\\distanceMatrix}{\\mathbf{ \\MakeUppercase{\\distanceScalar}}}\n",
-    "\\newcommand{\\distanceScalar}{d}\n",
-    "\\newcommand{\\distanceVector}{\\mathbf{ \\distanceScalar}}\n",
-    "\\newcommand{\\eigenvaltwo}{\\ell}\n",
-    "\\newcommand{\\eigenvaltwoMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\eigenvaltwoVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\eigenvalue}{\\lambda}\n",
-    "\\newcommand{\\eigenvalueMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\eigenvalueVector}{\\boldsymbol{ \\lambda}}\n",
-    "\\newcommand{\\eigenvector}{\\mathbf{ \\eigenvectorScalar}}\n",
-    "\\newcommand{\\eigenvectorMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\eigenvectorScalar}{u}\n",
-    "\\newcommand{\\eigenvectwo}{\\mathbf{v}}\n",
-    "\\newcommand{\\eigenvectwoMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\eigenvectwoScalar}{v}\n",
-    "\\newcommand{\\entropy}[1]{\\mathcal{H}\\left(#1\\right)}\n",
-    "\\newcommand{\\errorFunction}{E}\n",
-    "\\newcommand{\\expDist}[2]{\\left<#1\\right>_{#2}}\n",
-    "\\newcommand{\\expSamp}[1]{\\left<#1\\right>}\n",
-    "\\newcommand{\\expectation}[1]{\\left\\langle #1 \\right\\rangle }\n",
-    "\\newcommand{\\expectationDist}[2]{\\left\\langle #1 \\right\\rangle _{#2}}\n",
-    "\\newcommand{\\expectedDistanceMatrix}{\\mathcal{D}}\n",
-    "\\newcommand{\\eye}{\\mathbf{I}}\n",
-    "\\newcommand{\\fantasyDim}{r}\n",
-    "\\newcommand{\\fantasyMatrix}{\\mathbf{ \\MakeUppercase{\\fantasyScalar}}}\n",
-    "\\newcommand{\\fantasyScalar}{z}\n",
-    "\\newcommand{\\fantasyVector}{\\mathbf{ \\fantasyScalar}}\n",
-    "\\newcommand{\\featureStd}{\\varsigma}\n",
-    "\\newcommand{\\gammaCdf}[3]{\\mathcal{GAMMA CDF}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaDist}[3]{\\mathcal{G}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaSamp}[2]{\\mathcal{G}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\gaussianDist}[3]{\\mathcal{N}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gaussianSamp}[2]{\\mathcal{N}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\given}{|}\n",
-    "\\newcommand{\\half}{\\frac{1}{2}}\n",
-    "\\newcommand{\\heaviside}{H}\n",
-    "\\newcommand{\\hiddenMatrix}{\\mathbf{ \\MakeUppercase{\\hiddenScalar}}}\n",
-    "\\newcommand{\\hiddenScalar}{h}\n",
-    "\\newcommand{\\hiddenVector}{\\mathbf{ \\hiddenScalar}}\n",
-    "\\newcommand{\\identityMatrix}{\\eye}\n",
-    "\\newcommand{\\inducingInputScalar}{z}\n",
-    "\\newcommand{\\inducingInputVector}{\\mathbf{ \\inducingInputScalar}}\n",
-    "\\newcommand{\\inducingInputMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\inducingScalar}{u}\n",
-    "\\newcommand{\\inducingVector}{\\mathbf{ \\inducingScalar}}\n",
-    "\\newcommand{\\inducingMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\inlineDiff}[2]{\\text{d}#1/\\text{d}#2}\n",
-    "\\newcommand{\\inputDim}{q}\n",
-    "\\newcommand{\\inputMatrix}{\\mathbf{X}}\n",
-    "\\newcommand{\\inputScalar}{x}\n",
-    "\\newcommand{\\inputSpace}{\\mathcal{X}}\n",
-    "\\newcommand{\\inputVals}{\\inputVector}\n",
-    "\\newcommand{\\inputVector}{\\mathbf{ \\inputScalar}}\n",
-    "\\newcommand{\\iterNum}{k}\n",
-    "\\newcommand{\\kernel}{\\kernelScalar}\n",
-    "\\newcommand{\\kernelMatrix}{\\mathbf{K}}\n",
-    "\\newcommand{\\kernelScalar}{k}\n",
-    "\\newcommand{\\kernelVector}{\\mathbf{ \\kernelScalar}}\n",
-    "\\newcommand{\\kff}{\\kernelScalar_{\\mappingFunction \\mappingFunction}}\n",
-    "\\newcommand{\\kfu}{\\kernelVector_{\\mappingFunction \\inducingScalar}}\n",
-    "\\newcommand{\\kuf}{\\kernelVector_{\\inducingScalar \\mappingFunction}}\n",
-    "\\newcommand{\\kuu}{\\kernelVector_{\\inducingScalar \\inducingScalar}}\n",
-    "\\newcommand{\\lagrangeMultiplier}{\\lambda}\n",
-    "\\newcommand{\\lagrangeMultiplierMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\lagrangian}{L}\n",
-    "\\newcommand{\\laplacianFactor}{\\mathbf{ \\MakeUppercase{\\laplacianFactorScalar}}}\n",
-    "\\newcommand{\\laplacianFactorScalar}{m}\n",
-    "\\newcommand{\\laplacianFactorVector}{\\mathbf{ \\laplacianFactorScalar}}\n",
-    "\\newcommand{\\laplacianMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\laplacianScalar}{\\ell}\n",
-    "\\newcommand{\\laplacianVector}{\\mathbf{ \\ell}}\n",
-    "\\newcommand{\\latentDim}{q}\n",
-    "\\newcommand{\\latentDistanceMatrix}{\\boldsymbol{ \\Delta}}\n",
-    "\\newcommand{\\latentDistanceScalar}{\\delta}\n",
-    "\\newcommand{\\latentDistanceVector}{\\boldsymbol{ \\delta}}\n",
-    "\\newcommand{\\latentForce}{f}\n",
-    "\\newcommand{\\latentFunction}{u}\n",
-    "\\newcommand{\\latentFunctionVector}{\\mathbf{ \\latentFunction}}\n",
-    "\\newcommand{\\latentFunctionMatrix}{\\mathbf{ \\MakeUppercase{\\latentFunction}}}\n",
-    "\\newcommand{\\latentIndex}{j}\n",
-    "\\newcommand{\\latentScalar}{z}\n",
-    "\\newcommand{\\latentVector}{\\mathbf{ \\latentScalar}}\n",
-    "\\newcommand{\\latentMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\learnRate}{\\eta}\n",
-    "\\newcommand{\\lengthScale}{\\ell}\n",
-    "\\newcommand{\\rbfWidth}{\\ell}\n",
-    "\\newcommand{\\likelihoodBound}{\\mathcal{L}}\n",
-    "\\newcommand{\\likelihoodFunction}{L}\n",
-    "\\newcommand{\\locationScalar}{\\mu}\n",
-    "\\newcommand{\\locationVector}{\\boldsymbol{ \\locationScalar}}\n",
-    "\\newcommand{\\locationMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\variance}[1]{\\text{var}\\left( #1 \\right)}\n",
-    "\\newcommand{\\mappingFunction}{f}\n",
-    "\\newcommand{\\mappingFunctionMatrix}{\\mathbf{F}}\n",
-    "\\newcommand{\\mappingFunctionTwo}{g}\n",
-    "\\newcommand{\\mappingFunctionTwoMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\mappingFunctionTwoVector}{\\mathbf{ \\mappingFunctionTwo}}\n",
-    "\\newcommand{\\mappingFunctionVector}{\\mathbf{ \\mappingFunction}}\n",
-    "\\newcommand{\\scaleScalar}{s}\n",
-    "\\newcommand{\\mappingScalar}{w}\n",
-    "\\newcommand{\\mappingVector}{\\mathbf{ \\mappingScalar}}\n",
-    "\\newcommand{\\mappingMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\mappingScalarTwo}{v}\n",
-    "\\newcommand{\\mappingVectorTwo}{\\mathbf{ \\mappingScalarTwo}}\n",
-    "\\newcommand{\\mappingMatrixTwo}{\\mathbf{V}}\n",
-    "\\newcommand{\\maxIters}{K}\n",
-    "\\newcommand{\\meanMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanScalar}{\\mu}\n",
-    "\\newcommand{\\meanTwoMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanTwoScalar}{m}\n",
-    "\\newcommand{\\meanTwoVector}{\\mathbf{ \\meanTwoScalar}}\n",
-    "\\newcommand{\\meanVector}{\\boldsymbol{ \\meanScalar}}\n",
-    "\\newcommand{\\mrnaConcentration}{m}\n",
-    "\\newcommand{\\naturalFrequency}{\\omega}\n",
-    "\\newcommand{\\neighborhood}[1]{\\mathcal{N}\\left( #1 \\right)}\n",
-    "\\newcommand{\\neilurl}{http://inverseprobability.com/}\n",
-    "\\newcommand{\\noiseMatrix}{\\boldsymbol{ E}}\n",
-    "\\newcommand{\\noiseScalar}{\\epsilon}\n",
-    "\\newcommand{\\noiseVector}{\\boldsymbol{ \\epsilon}}\n",
-    "\\newcommand{\\norm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\normalizedLaplacianMatrix}{\\hat{\\mathbf{L}}}\n",
-    "\\newcommand{\\normalizedLaplacianScalar}{\\hat{\\ell}}\n",
-    "\\newcommand{\\normalizedLaplacianVector}{\\hat{\\mathbf{ \\ell}}}\n",
-    "\\newcommand{\\numActive}{m}\n",
-    "\\newcommand{\\numBasisFunc}{m}\n",
-    "\\newcommand{\\numComponents}{m}\n",
-    "\\newcommand{\\numComps}{K}\n",
-    "\\newcommand{\\numData}{n}\n",
-    "\\newcommand{\\numFeatures}{K}\n",
-    "\\newcommand{\\numHidden}{h}\n",
-    "\\newcommand{\\numInducing}{m}\n",
-    "\\newcommand{\\numLayers}{\\ell}\n",
-    "\\newcommand{\\numNeighbors}{K}\n",
-    "\\newcommand{\\numSequences}{s}\n",
-    "\\newcommand{\\numSuccess}{s}\n",
-    "\\newcommand{\\numTasks}{m}\n",
-    "\\newcommand{\\numTime}{T}\n",
-    "\\newcommand{\\numTrials}{S}\n",
-    "\\newcommand{\\outputIndex}{j}\n",
-    "\\newcommand{\\paramVector}{\\boldsymbol{ \\theta}}\n",
-    "\\newcommand{\\parameterMatrix}{\\boldsymbol{ \\Theta}}\n",
-    "\\newcommand{\\parameterScalar}{\\theta}\n",
-    "\\newcommand{\\parameterVector}{\\boldsymbol{ \\parameterScalar}}\n",
-    "\\newcommand{\\partDiff}[2]{\\frac{\\partial#1}{\\partial#2}}\n",
-    "\\newcommand{\\precisionScalar}{j}\n",
-    "\\newcommand{\\precisionVector}{\\mathbf{ \\precisionScalar}}\n",
-    "\\newcommand{\\precisionMatrix}{\\mathbf{J}}\n",
-    "\\newcommand{\\pseudotargetScalar}{\\widetilde{y}}\n",
-    "\\newcommand{\\pseudotargetVector}{\\mathbf{ \\pseudotargetScalar}}\n",
-    "\\newcommand{\\pseudotargetMatrix}{\\mathbf{ \\widetilde{Y}}}\n",
-    "\\newcommand{\\rank}[1]{\\text{rank}\\left(#1\\right)}\n",
-    "\\newcommand{\\rayleighDist}[2]{\\mathcal{R}\\left(#1|#2\\right)}\n",
-    "\\newcommand{\\rayleighSamp}[1]{\\mathcal{R}\\left(#1\\right)}\n",
-    "\\newcommand{\\responsibility}{r}\n",
-    "\\newcommand{\\rotationScalar}{r}\n",
-    "\\newcommand{\\rotationVector}{\\mathbf{ \\rotationScalar}}\n",
-    "\\newcommand{\\rotationMatrix}{\\mathbf{R}}\n",
-    "\\newcommand{\\sampleCovScalar}{s}\n",
-    "\\newcommand{\\sampleCovVector}{\\mathbf{ \\sampleCovScalar}}\n",
-    "\\newcommand{\\sampleCovMatrix}{\\mathbf{s}}\n",
-    "\\newcommand{\\scalarProduct}[2]{\\left\\langle{#1},{#2}\\right\\rangle}\n",
-    "\\newcommand{\\sign}[1]{\\text{sign}\\left(#1\\right)}\n",
-    "\\newcommand{\\sigmoid}[1]{\\sigma\\left(#1\\right)}\n",
-    "\\newcommand{\\singularvalue}{\\ell}\n",
-    "\\newcommand{\\singularvalueMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\singularvalueVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\sorth}{\\mathbf{u}}\n",
-    "\\newcommand{\\spar}{\\lambda}\n",
-    "\\newcommand{\\trace}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\BasalRate}{B}\n",
-    "\\newcommand{\\DampingCoefficient}{C}\n",
-    "\\newcommand{\\DecayRate}{D}\n",
-    "\\newcommand{\\Displacement}{X}\n",
-    "\\newcommand{\\LatentForce}{F}\n",
-    "\\newcommand{\\Mass}{M}\n",
-    "\\newcommand{\\Sensitivity}{S}\n",
-    "\\newcommand{\\basalRate}{b}\n",
-    "\\newcommand{\\dampingCoefficient}{c}\n",
-    "\\newcommand{\\mass}{m}\n",
-    "\\newcommand{\\sensitivity}{s}\n",
-    "\\newcommand{\\springScalar}{\\kappa}\n",
-    "\\newcommand{\\springVector}{\\boldsymbol{ \\kappa}}\n",
-    "\\newcommand{\\springMatrix}{\\boldsymbol{ \\mathcal{K}}}\n",
-    "\\newcommand{\\tfConcentration}{p}\n",
-    "\\newcommand{\\tfDecayRate}{\\delta}\n",
-    "\\newcommand{\\tfMrnaConcentration}{f}\n",
-    "\\newcommand{\\tfVector}{\\mathbf{ \\tfConcentration}}\n",
-    "\\newcommand{\\velocity}{v}\n",
-    "\\newcommand{\\sufficientStatsScalar}{g}\n",
-    "\\newcommand{\\sufficientStatsVector}{\\mathbf{ \\sufficientStatsScalar}}\n",
-    "\\newcommand{\\sufficientStatsMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\switchScalar}{s}\n",
-    "\\newcommand{\\switchVector}{\\mathbf{ \\switchScalar}}\n",
-    "\\newcommand{\\switchMatrix}{\\mathbf{S}}\n",
-    "\\newcommand{\\tr}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\loneNorm}[1]{\\left\\Vert #1 \\right\\Vert_1}\n",
-    "\\newcommand{\\ltwoNorm}[1]{\\left\\Vert #1 \\right\\Vert_2}\n",
-    "\\newcommand{\\onenorm}[1]{\\left\\vert#1\\right\\vert_1}\n",
-    "\\newcommand{\\twonorm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\vScalar}{v}\n",
-    "\\newcommand{\\vVector}{\\mathbf{v}}\n",
-    "\\newcommand{\\vMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\varianceDist}[2]{\\text{var}_{#2}\\left( #1 \\right)}\n",
-    "\\newcommand{\\vecb}[1]{\\left(#1\\right):}\n",
-    "\\newcommand{\\weightScalar}{w}\n",
-    "\\newcommand{\\weightVector}{\\mathbf{ \\weightScalar}}\n",
-    "\\newcommand{\\weightMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\weightedAdjacencyMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\weightedAdjacencyScalar}{a}\n",
-    "\\newcommand{\\weightedAdjacencyVector}{\\mathbf{ \\weightedAdjacencyScalar}}\n",
-    "\\newcommand{\\onesVector}{\\mathbf{1}}\n",
-    "\\newcommand{\\zerosVector}{\\mathbf{0}}\n",
     "$$"
-   ]
+   ],
+   "id": "3a82b391-6034-464f-b372-255e5e377487"
   },
   {
    "cell_type": "markdown",
-   "id": "49df968d",
    "metadata": {},
    "source": [
+    "::: {.cell .markdown}\n",
+    "\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!---->\n",
@@ -344,21 +55,20 @@
     "<!--\n",
     "\n",
     "-->"
-   ]
+   ],
+   "id": "4972f8f1-cd6f-491a-a978-9accd63f2474"
   },
   {
    "cell_type": "markdown",
-   "id": "04c8e7d8",
    "metadata": {},
    "source": [
-    "Question\n",
-    "--------\n",
+    "## Nigerian Health Facility Distribution\n",
     "\n",
     "In this notebook, we explore the question of health facility\n",
     "distribution in Nigeria, spatially, and in relation to population\n",
     "density.\n",
     "\n",
-    "We answer and visualize the question “How does the number of health\n",
+    "We explore and visualize the question “How does the number of health\n",
     "facilities per capita vary across Nigeria?”\n",
     "\n",
     "Rather than focussing purely on using tools like `pandas` to manipulate\n",
@@ -367,20 +77,19 @@
     "Machine learning can be summarized as $$\n",
     "\\text{model} + \\text{data} \\xrightarrow{\\text{compute}} \\text{prediction}\n",
     "$$ and many machine learning courses focus a lot on the model part. But\n",
-    "to build a machine learning system in practice, a lot of work has to be\n",
+    "to build a machine learning system in practice, a lot of work must be\n",
     "put into the data part. This notebook gives some pointers on that work\n",
     "and how to think about your machine learning systems design."
-   ]
+   ],
+   "id": "41ff4764-7886-4289-a2c7-51c2b1df88e0"
   },
   {
    "cell_type": "markdown",
-   "id": "bcd0cf4c",
    "metadata": {},
    "source": [
-    "Datasets\n",
-    "--------\n",
+    "## Datasets\n",
     "\n",
-    "In this notebook , we download 4 datasets:\n",
+    "In this notebook, we download 4 datasets:\n",
     "\n",
     "-   Nigeria NMIS health facility data\n",
     "-   Population data for Administrative Zone 1 (states) areas in Nigeria\n",
@@ -393,110 +102,286 @@
     "looking at the health examples, try to imagine how SafeBoda may have had\n",
     "to design their systems to be scalable and reliable for storing and\n",
     "sharing data."
-   ]
+   ],
+   "id": "1f6dbeca-f6a2-4c39-84ce-4b51a6213b1a"
   },
   {
    "cell_type": "markdown",
-   "id": "49372c0f",
    "metadata": {},
    "source": [
-    "Imports, Installs, and Downloads\n",
-    "--------------------------------\n",
+    "## Imports, Installs, and Downloads\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-installs.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-installs.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "First, we’re going to download some particular python libraries for\n",
     "dealing with geospatial data. We’re dowloading\n",
     "[`geopandas`](https://geopandas.org) which will help us deal with ‘shape\n",
-    "files’ that give the geographical lay out of Nigeria. And to get a small\n",
-    "database set up running quickly, we’re installing\n",
-    "[`csv-to-sqlite`](https://pypi.org/project/csv-to-sqlite/) which allows\n",
-    "us to convert CSV data to a simple database."
-   ]
+    "files’ that give the geographical lay out of Nigeria. We also need\n",
+    "`pygeos` for indexing."
+   ],
+   "id": "d1bd7bdf-f5f3-4f44-be78-a4f30ebd4fae"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install geopandas"
+   ],
+   "id": "5a2d6bb2-5754-4a6e-8688-c361f8f2a257"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install pygeos"
+   ],
+   "id": "d0bf7f58-fc1c-4bb6-a51c-df9303cf3bd1"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "d547f658-2bc4-43cf-9597-3a3149040109"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams.update({'font.size': 22})"
+   ],
+   "id": "0a222833-387c-48ed-bdd3-d924b0f04638"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!--setupplotcode{import seaborn as sns\n",
+    "sns.set_style('darkgrid')\n",
+    "sns.set_context('paper')\n",
+    "sns.set_palette('colorblind')}-->"
+   ],
+   "id": "86eaff6b-efe9-43a9-91d3-1b26024d242e"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## notutils\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "This small package is a helper package for various notebook utilities\n",
+    "used below.\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "34fc07c2-0f78-4597-acf9-4d6e554cd5b1"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install notutils"
+   ],
+   "id": "edeb448b-ce55-45a5-b3a4-6578757a961f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub:\n",
+    "<https://github.com/lawrennd/notutils>\n",
+    "\n",
+    "Once `notutils` is installed, it can be imported in the usual manner."
+   ],
+   "id": "38c9e248-4857-4151-b6b9-59f2b9268362"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils"
+   ],
+   "id": "74306c1b-74a7-4066-8eaa-8df2c1c3a908"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pods\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "In Sheffield we created a suite of software tools for ‘Open Data\n",
+    "Science’. Open data science is an approach to sharing code, models and\n",
+    "data that should make it easier for companies, health professionals and\n",
+    "scientists to gain access to data science techniques.\n",
+    "\n",
+    "You can also check this blog post on [Open Data\n",
+    "Science](http://inverseprobability.com/2014/07/01/open-data-science).\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "3e4ac54a-ae32-439f-b730-02d2d05d2373"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install pods"
+   ],
+   "id": "ad601aa2-774b-40f6-93ba-5269b613af4f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/ods>\n",
+    "\n",
+    "Once `pods` is installed, it can be imported in the usual manner."
+   ],
+   "id": "7360e3bf-1705-4569-a9fe-afed9a2b66ea"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pods"
+   ],
+   "id": "b0cd2fc9-439e-438f-98f1-a0da84bc8c06"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlai\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "The `mlai` software is a suite of helper functions for teaching and\n",
+    "demonstrating machine learning algorithms. It was first used in the\n",
+    "Machine Learning and Adaptive Intelligence course in Sheffield in 2013.\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "8c14f655-0c71-4ef2-893d-5a8d1f5c6cc1"
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "70a7d7d2",
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: geopandas in /Users/neil/anaconda3/lib/python3.6/site-packages (0.8.1)\n",
-      "Requirement already satisfied: pandas>=0.23.0 in /Users/neil/anaconda3/lib/python3.6/site-packages (from geopandas) (1.0.2)\n",
-      "Requirement already satisfied: pyproj>=2.2.0 in /Users/neil/anaconda3/lib/python3.6/site-packages (from geopandas) (3.0.0.post1)\n",
-      "Requirement already satisfied: shapely in /Users/neil/anaconda3/lib/python3.6/site-packages (from geopandas) (1.7.1)\n",
-      "Requirement already satisfied: fiona in /Users/neil/anaconda3/lib/python3.6/site-packages (from geopandas) (1.8.17)\n",
-      "Requirement already satisfied: numpy>=1.13.3 in /Users/neil/anaconda3/lib/python3.6/site-packages (from pandas>=0.23.0->geopandas) (1.18.2)\n",
-      "Requirement already satisfied: pytz>=2017.2 in /Users/neil/anaconda3/lib/python3.6/site-packages (from pandas>=0.23.0->geopandas) (2019.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.6.1 in /Users/neil/anaconda3/lib/python3.6/site-packages (from pandas>=0.23.0->geopandas) (2.8.1)\n",
-      "Requirement already satisfied: certifi in /Users/neil/anaconda3/lib/python3.6/site-packages (from pyproj>=2.2.0->geopandas) (2020.6.20)\n",
-      "Requirement already satisfied: munch in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (2.5.0)\n",
-      "Requirement already satisfied: attrs>=17 in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (19.3.0)\n",
-      "Requirement already satisfied: six>=1.7 in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (1.14.0)\n",
-      "Requirement already satisfied: click<8,>=4.0 in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (7.1.1)\n",
-      "Requirement already satisfied: click-plugins>=1.0 in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (1.1.1)\n",
-      "Requirement already satisfied: cligj>=0.5 in /Users/neil/anaconda3/lib/python3.6/site-packages (from fiona->geopandas) (0.7.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "\u001b[31mERROR: Could not find a version that satisfies the requirement decarteslabs[complete] (from versions: none)\u001b[0m\n",
-      "\u001b[31mERROR: No matching distribution found for decarteslabs[complete]\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
+   "outputs": [],
+   "source": [
+    "%pip install mlai"
    ],
+   "id": "ae2c92da-63cb-4ae2-a807-1be2dd094193"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
-    "%pip install geopandas\n",
-    "%pip install decarteslabs[complete]"
-   ]
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/mlai>\n",
+    "\n",
+    "Once `mlai` is installed, it can be imported in the usual manner."
+   ],
+   "id": "0680bf0b-9ead-42c6-baa0-b145d3010987"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "78e87953-0cfa-452d-a3ab-c67541091ecb"
   },
   {
    "cell_type": "markdown",
-   "id": "969895dc",
    "metadata": {},
    "source": [
-    "Databases and Joins\n",
-    "-------------------\n",
+    "## Databases and Joins\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/databases-and-joins.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/databases-and-joins.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
-    "The main idea we will be working with today is called the ‘join’. A join\n",
-    "does exactly what it sounds like, it combines two database tables.\n",
+    "The main idea we will be working with in this practical is the ‘join’. A\n",
+    "join does exactly what it sounds like, it combines two database tables.\n",
     "\n",
-    "You have already started to look at data structures, in particular you\n",
-    "have been learning about `pandas` which is a great way of storing and\n",
-    "structuring your data set to make it easier to plot and manipulate your\n",
-    "data.\n",
+    "You may have already started to look at data structures and learning\n",
+    "about `pandas` which is a great way of storing and structuring your data\n",
+    "set to make it easier to plot and manipulate your data.\n",
     "\n",
     "Pandas is great for the data scientist to analyze data because it makes\n",
     "many operations easier. But it is not so good for building the machine\n",
     "learning system. In a machine learning system, you may have to handle a\n",
     "lot of data. Even if you start with building a system where you only\n",
     "have a few customers, perhaps you build an online taxi system (like\n",
-    "SafeBoda) for Kampala. Maybe you will have 50 customers. Then maybe your\n",
-    "system can be handled with some python scripts and pandas."
-   ]
+    "[SafeBoda](https://safeboda.com/ug/)) for Kampala. Maybe you will have\n",
+    "50 customers. Then maybe your system can be handled with some python\n",
+    "scripts and `pandas`."
+   ],
+   "id": "cf445d57-9757-4c76-8a00-ba4cb02b0d48"
   },
   {
    "cell_type": "markdown",
-   "id": "3440c8cc",
    "metadata": {},
    "source": [
-    "Scaling ML Systems\n",
-    "------------------\n",
+    "## Scaling ML Systems\n",
     "\n",
-    "But what if you are succesful? What if everyone in Kampala wants to use\n",
+    "But what if you are successful? What if everyone in Kampala wants to use\n",
     "your system? There are 1.5 million people in Kampala and maybe 100,000\n",
-    "Boda Boda drivers.\n",
+    "Boda Boda drivers.[1]\n",
     "\n",
     "What if you are even more succesful? What if everyone in Lagos wants to\n",
     "use your system? There are around 20 million people in Lagos … and maybe\n",
-    "as many Okada drivers as people in Kampala!\n",
+    "as many Okada\\[^okada\\] drivers as people in Kampala!\n",
+    "\n",
+    "\\[^okada\\] In Lagos the Boda Boda is called an Okada.\n",
     "\n",
     "We want to build safe and reliable machine learning systems. Building\n",
-    "them from pandas and python is about as safe and reliable as [taking six\n",
-    "children to school on a boda\n",
+    "them from `pandas` and python is about as safe and reliable as [taking\n",
+    "six children to school on a boda\n",
     "boda](https://www.monitor.co.ug/News/National/Boda-accidents-kill-10-city-UN-report-Kampala/688334-4324032-15oru2dz/index.html).\n",
     "\n",
     "To build a reliable system, we need to turn to *databases*. In this\n",
-    "notebook [we’ll be focussing on SQL\n",
+    "notebook [we’ll be focusing on SQL\n",
     "databases](https://en.wikipedia.org/wiki/Join_(SQL)) and how you bring\n",
     "together different streams of data in a Machine Learning System.\n",
     "\n",
@@ -507,422 +392,216 @@
     "with screws.\n",
     "\n",
     "But instead of using a welder or screws to join data, we join it using\n",
-    "particular columns of the data. We can join data together using people’s\n",
-    "names. One database may contain where people live, another database may\n",
-    "contain where they go to school. If we join these two databases we can\n",
-    "have a database which shows where people live and where they got to\n",
-    "school.\n",
-    "\n",
-    "In the notebook, we will join together some data about where the health\n",
-    "centres are in Nigeria and where the have been cases of Covid19. There\n",
-    "are other challenges in the ML System Design that are not going to be\n",
-    "covered here. They include: how to update the data bases, and how to\n",
-    "control access to the data bases from different users (boda boda\n",
-    "drivers, riders, administrators etc)."
-   ]
+    "columns of the data. We can join data together using people’s names. One\n",
+    "database may contain where people live, another database may contain\n",
+    "where they go to school. If we join these two databases, we can have a\n",
+    "database which shows where people live and where they got to school.\n",
+    "\n",
+    "In the notebook, we will join some data about where the health centers\n",
+    "are in Nigeria with data about where there have been cases of Covid19.\n",
+    "There are other challenges in the ML System Design that are not going to\n",
+    "be covered here. They include how to update the databases and how to\n",
+    "control access to the databases from different users (boda boda drivers,\n",
+    "riders, administrators etc).\n",
+    "\n",
+    "[1] Boda Boda is the name for the motorbike taxis found commonly in\n",
+    "Kampala."
+   ],
+   "id": "e913aa42-201c-4d9b-93e1-f22e966f0083"
   },
   {
    "cell_type": "markdown",
-   "id": "b916285a",
    "metadata": {},
    "source": [
-    "Hospital Data\n",
-    "-------------\n",
-    "\n",
-    "The first and primary dataset we use is the NMIS health facility\n",
-    "dataset, which contains data on the location, type, and staffing of\n",
-    "health facilities across Nigeria."
-   ]
+    "# Nigeria NMIS Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "As an example data set we will use Nigerian Millennium Development Goals\n",
+    "Information System Health Facility (The Office of the Senior Special\n",
+    "Assistant to the President on the Millennium Development Goals\n",
+    "(OSSAP-MDGs) and Columbia University, 2014). It can be found here\n",
+    "<https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014>.\n",
+    "\n",
+    "Taking from the information on the site,\n",
+    "\n",
+    "> The Nigeria MDG (Millennium Development Goals) Information System –\n",
+    "> NMIS health facility data is collected by the Office of the Senior\n",
+    "> Special Assistant to the President on the Millennium Development Goals\n",
+    "> (OSSAP-MDGs) in partner with the Sustainable Engineering Lab at\n",
+    "> Columbia University. A rigorous, geo-referenced baseline facility\n",
+    "> inventory across Nigeria is created spanning from 2009 to 2011 with an\n",
+    "> additional survey effort to increase coverage in 2014, to build\n",
+    "> Nigeria’s first nation-wide inventory of health facility. The database\n",
+    "> includes 34,139 health facilities info in Nigeria.\n",
+    ">\n",
+    "> The goal of this database is to make the data collected available to\n",
+    "> planners, government officials, and the public, to be used to make\n",
+    "> strategic decisions for planning relevant interventions.\n",
+    ">\n",
+    "> For data inquiry, please contact Ms. Funlola Osinupebi, Performance\n",
+    "> Monitoring & Communications, Advisory Power Team, Office of the Vice\n",
+    "> President at funlola.osinupebi@aptovp.org\n",
+    ">\n",
+    "> To learn more, please visit\n",
+    "> <http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/>\n",
+    ">\n",
+    "> Suggested citation: Nigeria NMIS facility database (2014), the Office\n",
+    "> of the Senior Special Assistant to the President on the Millennium\n",
+    "> Development Goals (OSSAP-MDGs) & Columbia University\n",
+    "\n",
+    "For ease of use we’ve packaged this data set in the `pods` library"
+   ],
+   "id": "307d3133-e190-4fcc-b068-a33c5f02b5b6"
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "e0cabd7f",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "data = pods.datasets.nigeria_nmis()['Y']\n",
+    "data.head()"
+   ],
+   "id": "9bac110a-267f-4f2c-84d3-31f10fd7b13d"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, you can access the data directly with the following\n",
+    "commands.\n",
+    "\n",
+    "``` python\n",
     "import urllib.request\n",
-    "import pandas as pd"
-   ]
+    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')\n",
+    "\n",
+    "import pandas as pd\n",
+    "data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')\n",
+    "```\n",
+    "\n",
+    "Once it is loaded in the data can be summarized using the `describe`\n",
+    "method in pandas."
+   ],
+   "id": "79125ce7-edb4-4d27-a7b0-cc0ae6bfbc0b"
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "493b7e67",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')\n",
-    "hospital_data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "020b5f63",
-   "metadata": {},
-   "source": [
-    "It’s always a good idea to inspect your data once it’s downloaded to\n",
-    "check it contains what you expect. In `pandas` you can do this with the\n",
-    "`.head()` method. That allows us to see the first few entries of the\n",
-    "`pandas` data structure."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "b79731dc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>facility_name</th>\n",
-       "      <th>facility_type_display</th>\n",
-       "      <th>maternal_health_delivery_services</th>\n",
-       "      <th>emergency_transport</th>\n",
-       "      <th>skilled_birth_attendant</th>\n",
-       "      <th>num_chews_fulltime</th>\n",
-       "      <th>phcn_electricity</th>\n",
-       "      <th>c_section_yn</th>\n",
-       "      <th>child_health_measles_immun_calc</th>\n",
-       "      <th>num_nurses_fulltime</th>\n",
-       "      <th>...</th>\n",
-       "      <th>antenatal_care_yn</th>\n",
-       "      <th>family_planning_yn</th>\n",
-       "      <th>malaria_treatment_artemisinin</th>\n",
-       "      <th>sector</th>\n",
-       "      <th>formhub_photo_id</th>\n",
-       "      <th>gps</th>\n",
-       "      <th>survey_id</th>\n",
-       "      <th>unique_lga</th>\n",
-       "      <th>latitude</th>\n",
-       "      <th>longitude</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>HEALTH POST KAGBANG</td>\n",
-       "      <td>Health Post</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>health</td>\n",
-       "      <td>1393335750723.jpg</td>\n",
-       "      <td>6.54340807 9.08470312 218.8000030517578 5.0</td>\n",
-       "      <td>451a0efb-5fa6-4bad-93cd-7cf19eb50833</td>\n",
-       "      <td>cross_river_obudu</td>\n",
-       "      <td>6.543408</td>\n",
-       "      <td>9.084703</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Alhari Clinic</td>\n",
-       "      <td>Primary Health Centre (PHC)</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>health</td>\n",
-       "      <td>1393316873034.jpg</td>\n",
-       "      <td>9.00720861 7.67780798 432.8999938964844 5.0</td>\n",
-       "      <td>5ddb68d6-02d2-44de-9df3-ebc840a1da42</td>\n",
-       "      <td>nasarawa_karu</td>\n",
-       "      <td>9.007209</td>\n",
-       "      <td>7.677808</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Primary health centre umukuru okehi</td>\n",
-       "      <td>Basic Health Centre / Primary Health Clinic</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>health</td>\n",
-       "      <td>1393594715772.jpg</td>\n",
-       "      <td>5.1297 7.1592 73.3 5</td>\n",
-       "      <td>2173b656-14eb-400d-9eef-76830379b065</td>\n",
-       "      <td>rivers_etche</td>\n",
-       "      <td>5.129700</td>\n",
-       "      <td>7.159200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>PHC EHOM CENTRAL</td>\n",
-       "      <td>Primary Health Centre (PHC)</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>health</td>\n",
-       "      <td>1393330657159.jpg</td>\n",
-       "      <td>5.4633 8.1464 117.8 5</td>\n",
-       "      <td>963abf9d-5a72-4b35-811e-9c1830adc88b</td>\n",
-       "      <td>cross_river_biase</td>\n",
-       "      <td>5.463300</td>\n",
-       "      <td>8.146400</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Health post</td>\n",
-       "      <td>Health Post</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>health</td>\n",
-       "      <td>1393342042946.jpg</td>\n",
-       "      <td>5.504 8.0251 52.4 5</td>\n",
-       "      <td>9d09aaac-578c-4a48-a054-dee678a05422</td>\n",
-       "      <td>cross_river_biase</td>\n",
-       "      <td>5.504000</td>\n",
-       "      <td>8.025100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 30 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                         facility_name  \\\n",
-       "0                  HEALTH POST KAGBANG   \n",
-       "1                        Alhari Clinic   \n",
-       "2  Primary health centre umukuru okehi   \n",
-       "3                     PHC EHOM CENTRAL   \n",
-       "4                          Health post   \n",
-       "\n",
-       "                         facility_type_display  \\\n",
-       "0                                  Health Post   \n",
-       "1                  Primary Health Centre (PHC)   \n",
-       "2  Basic Health Centre / Primary Health Clinic   \n",
-       "3                  Primary Health Centre (PHC)   \n",
-       "4                                  Health Post   \n",
-       "\n",
-       "  maternal_health_delivery_services  emergency_transport  \\\n",
-       "0                              True                 True   \n",
-       "1                              True                 True   \n",
-       "2                              True                 True   \n",
-       "3                              True                False   \n",
-       "4                              True                False   \n",
-       "\n",
-       "  skilled_birth_attendant  num_chews_fulltime phcn_electricity c_section_yn  \\\n",
-       "0                   False                 1.0             True        False   \n",
-       "1                    True                 4.0             True        False   \n",
-       "2                    True                 2.0             True         True   \n",
-       "3                   False                 2.0            False        False   \n",
-       "4                   False                 0.0            False        False   \n",
-       "\n",
-       "  child_health_measles_immun_calc  num_nurses_fulltime  ...  \\\n",
-       "0                            True                  0.0  ...   \n",
-       "1                            True                  1.0  ...   \n",
-       "2                            True                  4.0  ...   \n",
-       "3                            True                  0.0  ...   \n",
-       "4                            True                  0.0  ...   \n",
-       "\n",
-       "   antenatal_care_yn  family_planning_yn malaria_treatment_artemisinin  \\\n",
-       "0               True               False                          True   \n",
-       "1               True               False                          True   \n",
-       "2               True                True                          True   \n",
-       "3               True                True                          True   \n",
-       "4               True                True                          True   \n",
-       "\n",
-       "   sector   formhub_photo_id                                          gps  \\\n",
-       "0  health  1393335750723.jpg  6.54340807 9.08470312 218.8000030517578 5.0   \n",
-       "1  health  1393316873034.jpg  9.00720861 7.67780798 432.8999938964844 5.0   \n",
-       "2  health  1393594715772.jpg                         5.1297 7.1592 73.3 5   \n",
-       "3  health  1393330657159.jpg                        5.4633 8.1464 117.8 5   \n",
-       "4  health  1393342042946.jpg                          5.504 8.0251 52.4 5   \n",
-       "\n",
-       "                              survey_id         unique_lga  latitude longitude  \n",
-       "0  451a0efb-5fa6-4bad-93cd-7cf19eb50833  cross_river_obudu  6.543408  9.084703  \n",
-       "1  5ddb68d6-02d2-44de-9df3-ebc840a1da42      nasarawa_karu  9.007209  7.677808  \n",
-       "2  2173b656-14eb-400d-9eef-76830379b065       rivers_etche  5.129700  7.159200  \n",
-       "3  963abf9d-5a72-4b35-811e-9c1830adc88b  cross_river_biase  5.463300  8.146400  \n",
-       "4  9d09aaac-578c-4a48-a054-dee678a05422  cross_river_biase  5.504000  8.025100  \n",
-       "\n",
-       "[5 rows x 30 columns]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hospital_data.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "dbddc1d4",
-   "metadata": {},
-   "source": [
-    "We can also check in `pandas` what the different columns of the data\n",
-    "frame are to see what it contains."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "62e6f569",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['facility_name', 'facility_type_display',\n",
-       "       'maternal_health_delivery_services', 'emergency_transport',\n",
-       "       'skilled_birth_attendant', 'num_chews_fulltime', 'phcn_electricity',\n",
-       "       'c_section_yn', 'child_health_measles_immun_calc',\n",
-       "       'num_nurses_fulltime', 'num_nursemidwives_fulltime',\n",
-       "       'num_doctors_fulltime', 'date_of_survey', 'facility_id', 'community',\n",
-       "       'ward', 'management', 'improved_water_supply', 'improved_sanitation',\n",
-       "       'vaccines_fridge_freezer', 'antenatal_care_yn', 'family_planning_yn',\n",
-       "       'malaria_treatment_artemisinin', 'sector', 'formhub_photo_id', 'gps',\n",
-       "       'survey_id', 'unique_lga', 'latitude', 'longitude'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hospital_data.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "570aafdf",
-   "metadata": {},
-   "source": [
-    "We can immiediately see that there are facility names, dates, and some\n",
-    "characteristics of each health center such as number of doctors etc. As\n",
-    "well as all that, we have two fields, `latitude` and `longitude` that\n",
-    "likely give us the hospital locaiton. Let’s plot them to have a look."
-   ]
+    "data.describe()"
+   ],
+   "id": "fd13612b-bd3f-4454-8122-8498d59f3cb8"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also find out the dimensions of the dataset using the `shape`\n",
+    "property."
+   ],
+   "id": "8be7737e-5a9f-422c-97a9-09ade6b85317"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.shape"
+   ],
+   "id": "719dc90c-cf18-4a6a-a898-c6dc1795f352"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Dataframes have different functions that you can use to explore and\n",
+    "understand your data. In python and the Jupyter notebook it is possible\n",
+    "to see a list of all possible functions and attributes by typing the\n",
+    "name of the object followed by `.<Tab>` for example in the above case if\n",
+    "we type `data.<Tab>` it show the columns available (these are attributes\n",
+    "in pandas dataframes) such as `num_nurses_fulltime`, and also functions,\n",
+    "such as `.describe()`.\n",
+    "\n",
+    "For functions we can also see the documentation about the function by\n",
+    "following the name with a question mark. This will open a box with\n",
+    "documentation at the bottom which can be closed with the x button."
+   ],
+   "id": "7c867c2b-a006-48b0-9001-4148a11c6b6c"
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "e4556763",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt"
-   ]
+    "data.describe?"
+   ],
+   "id": "90a047c0-3e0a-46cb-bb06-236a6b6ba481"
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "37b099c5",
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[<matplotlib.lines.Line2D at 0x10f4e0208>]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdya+lZ5Ie9jh3zsw75c05k2SRVdVNVXVJLTXKkgzBdrckDwLc8sKD0Ct5ABr+AwxbghbaaCHDC2+0asCyLMCWbdiAZWvVtgyjBUgCulpS9aDqrpEsMpnTnef5eBH9Q8Q5lSyyWFkkk3VeIJH3nnvOd77hfSOeeOKJeAfD4TAmYzImYzIm4+UbU5/0CUzGZEzGZEzGRxsTAz4ZkzEZk/GSjokBn4zJmIzJeEnHxIBPxmRMxmS8pGNiwCdjMiZjMl7SMfNxftnNmzeHr7/++sf5lZMxGZMxGS/9+K3f+q314XB4a/z1j9WAv/766/G1r33t4/zKyZiMyZiMl34MBoO3n/f6hEKZjMmYjMl4ScfEgE/GZEzGZLyk4wMN+GAw+NuDweDpYDD43ef87b8YDAbDwWBw8ydzepMxGZMxGZPxfuPDIPC/ExH/zviLg8Hg1Yj4NyPi+y/4nCZjMiZjMibjQ4wPNODD4fA3ImLzOX/6byPiv4yISTOVyZiMyZiMT2B8JBXKYDD4ixHxcDgcfn0wGHzQe381In41IuK11177KF83Ge83hsOIy8v8NxxGDAYRU1P57wOey2RMxmS8/ONHNuCDweBqRPy1iPi3Psz7h8Phr0XEr0VEfPWrX52g9Rc1Li8jTk/z//Pzen1mJg343Fz+PxmTMRmf2fFREPgXIuKNiIC+X4mIfzYYDP7kcDh8/CJPbjLaGA7TUJ+eRpyd5f9TU/nzxUUa8tnZiOnpNOKHhxELC/kzoz4ZkzEZn6nxIxvw4XD4OxFx2++DweCtiPjqcDhcf4Hn9dkdaI8f1ocdFRKR7724iNjdjXj8OP+/uEhjPTMTMT8fce1aHu/wMA345WUeY2kp4sqV/Pnq1Xz/hFqZjMn4zIwPNOCDweDvRcQvRsTNwWDwbkT89eFw+N/9pE/sMzPGDfblZRnns7N8HZIeDNIg979dXkZsb0f8wR9EHB0lNbKzE3F8HLG6GnHrVqLx2dmIg4M6ztxcxOZm/p2Bn5vLv11e5ndMT+c/nHk/1wmfPhmT8akfH2jAh8Phr3zA319/YWfzWRjDYSLki4v8eTgsCuP8vAw42oOhnp7O187OkvqAvIfDNNh+3tpKg3p+HvHsWb7vypX8f3o6X7tyJRE3Y333bh53aiqPsbhYxnp2to7HeYyft/dMxmRMxqdqfKy9UD7Tg+E+OSkUDOnOzyf6jSjjzXAPBomcB4NCvT57dpZGdG8v0ffubiLv2dmkS3Z3iyrZ38/jHx4WT760lMd++jSN9vJyHu/4OP+/vMyffd/OTjqBhYUy2NPT6RDm5ydGfDIm41M2Jgb8RQyKkPPzNODQcEQaaQnHLvsbDPJ1Bndmpj5zdJQ/DwZpYHd3E3nv76dBjigEf3xcyc3hMA3t8XGex/l5InFGemYmjTEncnpa9MvlZTkSxn1+vqiZ5eX87IRHn4zJ+NSMiQH/ccdwWEb77KyoCpzy1FT+7eQkUThje3GRhhp1cXiY7716teiUk5M03CcnaUQPDvKze3v593v38pgMr++5uMhj7OzkOV65kp/b3S2KZno6j8Mgo32Ojioa2NvL9y0uFn/OqE+M+GRMxic+Jgb8xx3okG4EIXCom0G9uCjDd3FRycyDg/oZvRGRxn1vL1+bmkrjeXaWBnRpKQ3rYJC/cxwnJ4X0oezFxTzWYJDnMTeXaP7iIo350VEZ76OjurapqYiVlfysBKpjixYmYzIm4xMbEwP+UYcCGhQIg8ZwDgaJfCNG+W1GnSFkFB1jdjZ/HgyKMnGs4TBRt6RmRBr109M03AsL+Y9TmZsrueHsbPHYJyelD6eOmZ7O60G3LCzk346Pi17hlJz3RKEyGR/HeJ466icx7z6u73mBY2LAP8royb+zszTi+GaKDtrrqalC1GgO/PbsbBrTiELgh4dpuKemEvUOh0WfSHweHhaNgcc+Ps7/r19P2eDpaaL069fz+5aWynivruZ7jo/LMJ+cVEKU7tzr167lz6KJhYV8bXa2qJWPc7yEC20yPuKQX4oYTfC/6FyMQjlgy++f8pzPxIB/lOFBQ7qQ9fFxolwcM6M3MzNKr5gQXt/eriTk5WUaaBWW5+dpZLe3y+j7bgnN/f3i0s/O8v+1tfyfxFDxj+pM5wyFHx9XFSdunHMSGVxclKZ8fz9fv3Ytv+cnbUTlGDisHs1E1H1BE33Wq09/kk6sS2Ejai5HfLyO8/KyQIwCNefzUWm897tv1qXr8f+nnC6cGPCPMkyk4bAUHDMz1ZNkdjb/TU2NqlFMEmqRo6OU+G1s5GclLqem0kDirr0+N5fvn5+vxCfVyMxMou2Oik3Ma9dK5TI3V8Z5OEwDf3ychl0pPnqIwadquX9/9NoZ/n4PXqQROT/P86TuiagkMJrJeZyf57m6vunpVM58EhHCT3q4Xj97Ji+i/81wWOChF5yhBXuCHkKNeLGGnQM5Pi7g0XMvnvlHOe77oWzHU2/hOj7K93yMY2LAP8qYmqqwDmetKIYED3qYmyvuGHVyfl6JQYqUZ89K992TmwcHxbNvbpZz2N7O9y4v1/uXl8tJzM0lHQI9zc0VJ47fnp7ORRKRhvz8PKkTP29v59+vXs3jUqU4lu/tapsXgVZw7ru7pfJhsC4u8j5sb+fPipI4I+e1v5/34dVXs5BJS4FP+/gwyJrB7kYV1fDj6vV9d/9eslPzN2I0Ge9cXwT14Dp6tCpx36uFP8o86yj7eZ08z89HHZTv/RSPiQH/KEPCj1ER2lN3kOqhK0w4yHB6Ot9zdBSxvp7GyHEYHob5+DilhOvrVc25uZnveeWVNKrXr6eBunKlJjqlikRqR6cmqJ8XFvI9rocEcXs7/3ZwkEaURHF2NuLmH27CND9fC+DHRSuQ18FBRiXopI2NcjjvvltO5NGjPMelpVTLKFIaDPL3s7OI996LePAg4stfrnzAp3UwGv0+dtqsv6+3Zug/i8i6ARZRqQL+YfSSY/Xv6worCNbxzYcXQT1I5juO40fk/yjHj0qPmafjvLrv+JSj7eeNiQH/UQfPvbCQxhZ3PTWVhqeXzVOWzM0VD312lkbn8eOIt9/Oz2xujlZmdoTw+HEV+uzt5ftN4pmZRMdHRxF37uTva2tpqBYX85/wED3SF9vpaalbUCZLSxEPH6YhxHX3QqSVlaoKXVsbdQgfhLjw+Tj7ubmibiCg4TCPDf2fnKQR39ysRGtExDvv5P8nJ3k+5Jc7O3lM/y8sRHznO3kNb76Z90lV7KdtMJIdBfp9pi1Vhg0Sp/8X5Y1TA4eH9d6Iqjd4nhF0f/vzpLYCQshmr14toy4nMY7cnQvn4u/j0QXndX5ehp9x7cewnj4Kun+e8XZf3LuLi8pBzc7m+uiRx6dsTAz4jzr6RFxcHNVfQ9sRo2GoyXl8nGhyby8N85Mn+fPJSZXLX7mSnPXWVk6mra2I3/7tiK9/vfTgP/dzaYwU3dy8WQnHmzfzGEtLabxOTgr1W9Bzc4WcGYmzs6KB/K4k/+CgpIMReSwcvtcZ4/cberfs7o4qZ65dS1UM5Hh5GfH97+d7Dw7yc/v7+d7Hj5OHx4+6H1A7mmd7O4/r98Eg0frRUcow793Lc5VL+KSVBpx1d1A9xO8OdJyrldzt1b3uo/ss4dzBxczM859XR9YM3N7eaE7COajiRRGK4BSjnZxUriKiDP+1a9XArddK9GRlv9YX1ZfHd/qeLqG1RrWkUHS3vZ2f6+0lPkVjYsB/1NHpAohD+Xr35h0d4XR3dnJSP3yYBmVrq5DF0VFOFij0/DwN1je+EfE7v1Pff3mZv1+5EvGlL0Xcvp2Ie2UlOw+KDPDxEaMoicHigJTtR+TrW1ulLjk9LYpndjbff3CQkYPin7t3RznqhYXRVriub28vUTRuG6qbm0tHNjdXXPvv/E7EP/kn5eBmZpIGmZ3N85udTSTNiF9e5j3Y3Mz7qmXAwUFFHzs7+e/NN/N9HN21a+nsyCI/rkXaDXQ3lgzyeCQmQvKsODGItdcTcNIR+ezM1bOzfE3twPMMOMpmPIqcni6Hbg0wwM4XOj8+zrnSowI0BZrrypVR/jxitFNnj+oAhq6EYXR/lOSpY/lcz0u5n64joq6bgOBTSL9NDPiPOoRhFhXj1Qt4/F0C5PAwJ8fhYRqYR4/SmECHkpmLi5V4ZKy68e7j3Xcjfv7n82dIkoRxbq6SnvPzeVx8fEQhMMgiYrT3ypUrtSidt0Kh8/NyRGica9dG2wRAWBDlxUWe78OHpaHHc0OC5Ijvvhvxf/1fSZE8fZr3KCKd08//fH7Pykpd98ZG3rONjTzu2VneN0hU0nZ6OuK7381j/sIvFKpbWqpnubT08TTt6lw32gSK9n9ESTa9l8Hk5DsvfHZWzxB67X15GHHv1XnyeYMRcx+mp3OO9u9C6/TOmxRTXfLpuxyPQ+i0kHOLqGRpTyKOq2u68qg7OtSI4z2PK+/n3SMDx2SkGftO7X0Kx8SA/yijJ9kiasIooqGlZkS7BBD3/Z3vJEI+PEzjpOBmYSEN0+JifebwMI9/5cpoiXtEyfYiyohCIIyQToLCaXKsniDqcrHp6USykqoM/fx8vm9np/TmFxdpNKen87zn5pIKcZ+gGxQIPe+771aFqWu6erUW4//z/+Q9Wl8fXTTPnqVRv38/r/Xp04gbN0pd4pkoROIcLP7Nzfzc3l7EN7+Z333zZp77wUEeS0L46tVCoT+JISKKyOdydFRUF6eJUmFUIOPZ2ULeUDkU7O+dZhkMqvBLodnxcd4n8wj37Dt6XsNzhHTNTd91eFjvp2JyfeOOw1xD00VU1NNpEwAIAh5XjXSU3ikn95CslTy2P8fOq6tgnpqqJm8iE59xHZ9C+iRiYsA//PDAhVnQd080zc3VRBXCHh7mv6dPI37/90v+tr09itJXV9OYQFhPntRCHDfeEUW74HLxjhC1c+mT/+SkVCl9H83OUy4v58+7u5UI3d6upNfmZn7HrVt5jI2NomampvI6LFAInFPZ2ipuGx8uIXx6mt/tHj1vvPVWoaG7d2vh7+zkzxKa5+d53js7+Z1Pn5ZzXFnJSMC2c6ureS4rK3keN26kEV9YKESuHfCPu5A9BwaYMRWBcZact+gEqrWrkuuMGFWC3Lgx6pA5Bs3Q9vfr+W9slBG9dq2S2V36KhpAQ+3vjxpvkYIocnc3f796tQrPFhfr2vraGE/Qivi6gXXt5mdXuoxL/MYNbTfA40nrzrl39E4o4DPu+9rap7YobGLAP+zoPbxNdHzj+G46OG8oc2enkpYR1bxqbi6NxXCYiTWG4r338tirq/n6o0c/eD6/8AsRb7xR/O3iYv4PzUbkcfVNgWaU2o831zJB8cerq3mcra2SH+K9oSBJqouLPC5k3lvUundHR3ks/cm3t8sBSkB+kPTs5CT5d50boVcLThIKYtzZKaXN4WEZkJWVPAfPcGGh6Kz19bw/168nQvd8rl7N46BjOs8fUa/1eYLCgZCpb4AA53l0lH+Xd2BwUQ69w+Tpac4H1zGOPOURXK9Ec0R+v3yG6zo7y2u/e7fu6c5OGVbPhQEVVbm2iMpFOIeeVOUAFHw5pkihJy+trXH0y2m7l93Q90Iic7kbZ07KcC/MTwbcd83PV9uK2dlaK5/SMTHgH3Z0OoTMaH+/UM2VK9U5UIJuMEhD8c47NUE2N3PS3b1byODevURPvD4VxWuv5ST62tcSNRp/4S9E/Lk/lwnMxcVM8N28WQtqerp494ODUpzgiO/erXA6ohZaxGgy9uwsj/+zP1sVo9CaSW17t55AI2mkYDk5SZR7cpLvffYsjbkiIvTP0VGinY2NH/4cnj7Nc7h9uwwEA4C/vXo1owRqHkVPESVVHAzynu/uViUrhOw53L2b53R0VInfrj5yH3uvGs+hGwVqhsGgKDRRE2O0t5f3Ync3jycRSJ5qfu3uVhthqD4ir1VewvkwZgzjzk46Z4bRc97YSIdweJjneuXKaCUwtZGE/Xh3S69FVFTFQXQOW1QBQDCeAE13GAZn2A2pCKFTgl3OyHiPS1xFtf7uOjz3w8MCMBzMhwEXn9CYGPAPGh4invvwMBdKRHHbEblo9/by71DlO+/ka4p2cJbCWMiJdnthIRfnykoa9c3NnNS3b6fROjjI4p0vfKFCfghheTl/78mjk5NE6AynZB1jMj9fr7m+o6MKkVVqzs2lMaTLxhF26ZiQ/91383cOCqITwr/1Vr3HfdnfL03xh6l8U425u1vImC45Is/pypWswrx+vVCY9rs237BZxcVF3Xs02e5uGXyJOAbN+UKsFrZ7zwChs8jwNC+T7O2FU47lnjBwZHadZjEvOR7IlqFEQeF3XXN3zJA45Ly/X62FGTtOy+AcRJoQt7mARx4O8z3n5xUdMrbUJD2/0Xn73k+//+s69v45Bnp2tu5nR/6cApVP5+K7dLMnizkDTsNc+RSOiQF/3ujaaInKiEIneEihu4VgUhwcJGWCMoBQFNxAD8vL+XvnFhcX6xwGgzTUqJGzs0zi3bmTry0vj24AwZgsLdWih8JMRAZza2t0p3qTuSto1tYKKc/NZZTw9Gkhov39/Bl9E5Gf3d5OBzQ9nX9z7Y8flw4YB8tAQPF6mkDn7zcYIChqfj7vx+xsIVSGaG0tX5d0jUjDPjVV5foSvcvLtfAvLurva2ujxU4MKlQ+NVVJacZIMdHlZXHyIhWg4Nq1MrRaJFxcVNR0eFhIm1KH3DSiDNe4AqVTGY6ly6V5YJ6jpLqhhTgh0F5I5PVeX2DnJ0lta8M2fs7VHOlGelxR0hOcPXnq+6yPcf1+R8wSsu6zCASdRR/fZZfmfXca1vQkifmSDIYM/SDMg576hBNCo1P6Tjubm3U8PUwsdHwxIzw/n7z34WFOflwp6R/DcetWbVjMYD55Us5mcbGkhBH5vvX1mowKYkz89fXaO3NurhC7RXB8nCh2a6sSYtAj6oFzunZtVIkABXMYtLRUD+4hZ9MNyauv5jH6FnLPGySDnAgEy3jv7VV7AdvCCY3JJSMKaR8e5r2zdZx7QM7n+l0XROnZP31a/DGnJvKRJ0FJMLQUIL7XPeBc9vYq6df1+2Sc+/ujGvbd3UqEzsxURSs0iXOHhDnAGzfyHmhu5vz8i6jn1+V2kpI3bpSjtl4WFytSEdF1A92VJ86Foe4cef/dtcw8x3T13ARpr1xAxKizo4xyLHRQxKgunVGfJDFfgmGC7O9Xksvi2d+vRQA5MtRCxXEqAlKSGERb6F0iCSXRGVH0xpUrhYjX1ipZqVBnby//v369+HYJu7m5/AwUScfbC0QePaoFsr6e38MwQT5nZ6UqUSmKR1flpzBDos//T57kuUXU3xkgTg/CdF2+Uxiu9P6DBqNxeJjGGE89N1fIV/EP3ff2duUDKCR8ZmWljDoqpCfIUCLONyLvoTyFqGNrK79PuwPRkopGORXKn729eq9nwsgzPru7eV+7xNB9Rr1cvVrb56EyzGXzg0G8erXmI1DSHcxgUE4wYrTMXWWvOYMO7OqUrijpBW6dquiUSk+W9veOo/SIWls98ug0iCiuV+v6HsVqEqL9+fYqZWt3gsBfgiH5aB/Ks7NCz5QVEHZfgNQj+/uJpBkU2XOFLhGJoikAvGdrqyoOLRaSvIWF/N9elORtvc+FLoaO53xRNIwvuZ4CGXSPhSXJevdunjM+f3Exr00lpb7jJJK3b+f3M+gzM/nenZ00EPPzo5z90VEtnunpfJ/mXXqfMzLP08A/b5yd5XlobDU3l9cBfdmUGc8+N5fHFQG5P/h+BVWSlLhmKH9pKQ3E3l5pn6Fg+n2G4Ogon6GipK7XFhlR/XBaqDsRoWvidNFft2+PJurkX5xzT+ZNTdV9xulL5HaaYHm5Er/khT3ngZ4AeCIKZXej2nXcvfKxo3cO0Hs6jeGceqFTByr+xim6ZvSWaAxF1QEHByEP5PcefZHofgqTl8bEgPfBe0v4mFyok8ePCw3J6FuEkoGSU1eupDIE+pmejvjc53IhkoXRQyuQgUYkWxYW0tg/eJDnIdGnvDcijYLSaH/nWIS2QtCNjXJOMzN5PYeHhRgvL9PgUwSsrZUBfPy4+rYwdu5V7x8BzaFWcOkPHhQd8a1vJUK3WNA+eHwqjI7YGcXnjevXy0BxUgwspCia6Vr98QKmq1fz38pKNe1aWMjnyDBwlp6DyMH5QrX01/hVaM51rq8XOkdj7O/nfdEDRmEUQwUdMqDUO+PVhZLtKLP5+fwuSLPnK1RJ+j7PlCSRke71BAw6ZO0cURicwjg/Pa7k6Iqd7iQ8k4hRGaVrc3xKJ9/vXCnFuuPgaER48hf+Z/xF2Rx35+s/hWNiwPuAYM/Py1gJt4+OSmKEPrDd2MlJoTBVfMLbk5N8TTHA+nr1sMazC1OFjFDM2lrx3tQmtkDrofV4P3Jo0AJZXi56hyFRbh5RBr9XcqIQ1tfTyeA6Sdl0p+td21AhjPLsbBkIxu/4OOLzn0+eezjMiGV+Pu+fkn1cKgM3XtjTZXoLC4lCfZfXoMGuYfeMtafVCyYiDfb0dDowydC1tULl7hv0rnioO4W5ubxfnDMqCn3g/jmO+0ea97u/m+cmwtnYSOeBnz45yfkA2ftelbjuLypEVa/39s0uuuGdmqqIyPXQ1ZsX3Xh3VYtz4UQiRgvDDAa2D5GfeQcEmZODQa3Bjv4Hg2r81hUj9OgRdW+9z7GHw2oVIdHLQZIDd6oH3dV7oXCmvucTrNScGHDDA1ZZOBhUkowWF1oiDZyezp95eKGXRUKOtrJSxpLCAg1iYtL1Mng3buRivXGjElwQLQStVLhHBQwZ9AZNzc7ma87t6dNaGGdnlWy6uEiDDS1KzF67lsaWQbCwUC0WRac7KBTm5tIQQata5F5e5v0TRaAfGDRGeHm55IsMpQQkx8GIOe/uaJeWarE+eTJaXYcfPzsblcGpbOT43FcIUcGUaxI5yI3Qfy8tVaKyzxFGWvL1m9/M+4vmQaNwhoNBRjF4dFFV7wHPGDMotO4im959sfO/njuj6fh+Z7gj6ngMnPnVE39dk29wAIrBoPVeENULoBhr88gx/K0/L0afIXd/HZdxFo0Mh7kWeqQr2mDkSXxRnyjTiLp+r+nE2YuMPiaDPjHgEaU2EP7SxEISJqMH3nfS6TrgnvCC6BhyhruH8DL1JhF0t7iYdMvqann+2dlKFpm0OHGJRlw8ftQ5olPW1vI8z85SS/7WW2nAoP2dnULyJydp5C0kVYjoiK2tfI2RGg7L2ZFGCvWnp9OAq9Q8OKhOjJAjxyJi6Jrkbpyh0V6A4T64h5LD7qdFDyleuVJqEQu3O10IjGNz3p2OgWJ1PZQEpLjBuc/MjLYMQC/hbeU/vvOdQuykbcNh1hLculUGdHs76R4OqHcCFI1R/vTrYtx6oQtDS5Xi811lYh65d4wYQ8mAu2edV7YmGMauhun5o26Eu8KEA4kopwQ4cEQ+05+vPEFXqnCCELt7bZ16XU7I9VMr9eRuX/fyNhRV5ua4xPEnNCYGPKKMgbC6y+ju38/ybeF5RPGLDDwE18tv9/fLuCgwoVFeXh7Vn+JOJaQkOrtefDAoiR+J1LVrVRlpApu0KAsLNKIWL6WFhCJjcOtWITwl5qenaTREJib58nIhWwvXd+CE9/fT4Lo+4evdu3kfnjwpFQrqpCs2emdF4W031BQxkqMkmtBvl5Lt7eU1zM2V8+ll5rdu1ffqh0HyB0F3GZrIglNdWqr5IUmtXQAawoDSO02AckG7cWqrq+W8IkqtIjJB76gepVunVT85qSQ4B+++Oo+IQs4Ms+syOtXQ0XNXZbk274kYBT+9mvJ5qNo5dcTveOZ3T676vHOIKL7e5xjRnrvwO8fTuzZy3OaTyIF9YBt6Ba1nIyqXC+sO8yeEyicGPGJ0sZoQOFCcJL50Y2OUGxRibWzk4oG+5ucTRQuVGVOVgyib5eU0cgcHuejI4IS8aIOdnXydKqCjJIoNId3+fhkevKDEKmQbkVWdS0vVP0QC0zVPT9duLqgOxgqN4R5Ab6in6emkYpaXS1p2dpbX8fBh0VBdKsbZuQaGZjAoRU6XYUI+jAOHQj3DiZEtSrj2MnBRFZ5d2Iwy0/AKXdKLfTwDzorhkUjrz0eCtRfRaHeASnn2LP/vSXE5kNXVkgJKvtlmz32Tn5A/6LmCmzcr+vDcOM1ewg7FGuat72TkGUSUBAM8jq4ZuvEEpnvN2HPyjG5XpTh+xA9GEaIQf+vySkZfJCb6JQDoCVNzoXcRNbe7pDJi9Fo6h68YzxowLzu1+oKN+MSAmxzQshCKd6WF3tkZ1YYvLZXBQjXwujLdb7+dyToLCV/aF9j166Nl5zhtEYAQfzDIc+ttY7v3dx2MwnBYfUq6lj2ijJ3zFMb2PS8VgaBCTL5r1/K43/teJYju3k1+dm2tELvKvK6UODrKz0GYR0dptDRgQpmsrBRVBemiMq5ezaiIAXbNEBl6qkdJfXNoCEq/FahXZerWVmn6leqjGNwbzlBBiPsseuMMGHoJZRw5A2GuXVxUO1uGEIp+9dWS+zEqjDa6BB/ctcxdsQLByrsAIAyn4zJUDGA3WAxTr6p0L4AZxqzL/1zvODJ3nh2RQ89d8cLBu/8MPIRuLqH3PPvp6bz/Ip1xKWK/h6JgarPe3rhfk3MFlNwz66/bDU7B/Xw/J/ZjjokBF1Lx3MI5E2VlpRoiMfAeCLRn0klWQtr2cvzc52oB0NhKvnXeUaLTAjOJoQJIAxqietHDw6KbmirNNsrD9dDAMgrOaWampHDXr9d10oYz+G+/nRsjROSi2doq6dubb1Y1Y0Six+99r5CY7oWHh3WtwvCrV9NRMrJ09RJxndv0dyX5KB9KoM5h0gOLnIbD4t0h0s7rLy9XBINnRbkwtKitR49q8Y/zsxGjvbKdh1yGvIdnfCRma6sAACAASURBVO1axBe/WEnxpaVEzaSoZJT7+5Uj6MbT392b8V7YioTcg4ii//qmJOZER57yKxGjyUWKD89FVShD1qkH9wdX35OLEYXUHc/59dd61KTZlzUr6qOoMQfowCUwzYte6Yz/7vQNeoXM0LWIOkSx1manL8e17Gg38/0FovCfbgPOaA6HpXyQxMIZ4hU7n2wizc1Vl72Imvxka96Dw1xerp+97+QkVRnHx8mBHxwU5bG6Wi1pTSql49PThb6El6en1c/i2bOqnPR3iVTviSijj/bpEjU9RBzj2rXqB+7eMLp2xFH6Lbn57FkaIbvmQEg7O0VPkGmtrka88Ub18EZtdb0vvp9jW1wsjnt/v0J7XRAZVMhIgZEEcTcWPg9ZTk0VmraQRVNLS/UZ1bPe0ymJiEpuupfdcHd989pa0lgRlbBF5zH4zpWMEdXD8NjIguGXr3B+qDaJbQlvKF301dUc5rvvpu7pCJ2B6+fTKSo5HDmB+fnRrpacBkDVKRTUJsfAiYz/7ll6dq6jJ1o5gk6fAC6+3zPq+SnvB+S64e7Ozdzs8ks0q/e8QCrlp9uAW2AmWpfOdQProTFAfdNWyIzxF5JHVMvTlZUqDPBANa3qXNr2dk5yErLLy9FkZg/LoBnoWgi9s1OqB0YV4oaQcNGUEZ3zI01bXi6p4fJylfpTrEBn8/NFnWxu1qLSjmB1NZUUP/uzo1EGA++eQYIRVZiDkoKQOmcOTTEwCqgkBDkTMkFcet+X1PjSlyJef73ayaLJaP+npqqKMqLuGSN3clI9tTn8iEKInc/vCVHP1ByCznrPdkolSdbNzUq2orfcU++FDmdmcv75O/7dvDs7y3MlKextX7vR7Bw0x+NcPZPxtgpd1dONLcTdnYLR5YcduaMkOh2keG2csulOpVeNdhUJPpxzcJ+BhL5xhnXIwEPZXWLYvw/aprox78YR+guiUj7QgA8Gg78dEf9uRDwdDodf+cPX/puI+OWIOI2I70TEfzIcDt9nG5VP8egTtCMBD5Tn7tuM6WInZF1bywdz9WrSCwzb0lIu6F5Crll8T2hZMF01MBhUIvPgoJQX5GoMgXPEX8/MJOKdmqpIAlKwrRYlAidBdgfZ6qAn08/Y4BAXFtIx3L49mgibnq6WAZApPl/rV3kCKhghL4TMILn/09NVEWmXece8dSuvYXOzHPBgUAaKQeyJ3ffrcPiNb+Tz/eN/vJCf3tgWvlDczjO3blUUhBYgTfQ8oWI9VtwTHQfJTiUuySGhU9y+XMfsbOYaUEVUPgyTBKtEuXntnDxbzl600pN9koQMsucO0DA+4/ShOoCI/KxqT5EHVM4YQssGAwiBAyvmr2fa1SNdmdJpMwZektR3dhWW4b52VI9qlKfhHBh+TqjTId1AO1ZX3XTUPX7tP8b4MAj870TE34qIv9te+78j4q8Oh8PzwWDwX0fEX42I/+qFndXHNUw+BiBi9KaTvRkPHqTBYMjQLjdvFg2hsnJ1tfZcXF2tCbC2Vtw0NBxRhsxihb5NNnpUTa4iKjoQLnae/OIijayGWgcHGZ4zsI4VURWU09O1jdjFRelaKU/sH/mP/3EV31B3vP56IXW67eEw3wcVnp9XZaVdeFZWqjoR6qTiEGGIIJxLl9JJjvpHwsmYo6J6ld7zxqNHaQx1Eew0A2TqPkoG9w0ZOKEub8Q5oxZ6cu78PL/P9dgXFUqG2HtOAFjQzErvl4jR3t3yFV1VIipBPbkX5Jau0VyiGhpP3jGUXV7HwV9eVsfN4bAcREf1jGrXaEvYO7bvdk4S/OatZ9tRdKcqeg8aEWVXHjl36rBOiQ6H1UBNkzmqHZtkQOOdDnEerrHXe7hGhvsFJjI/0IAPh8PfGAwGr4+99uvt138aEf/BCzujj3N0NQcEhLtimLphVhHIuExP56J6/fWkC9bWRhMdZIA3b5bEajhMo6WvuLAzogwiZyLBxhioLozIRWqLtog897290XPlgKheGBBhJxrD3/rEZBgtJOjhj//xfP/Xv15FJTdvJiKNKGR5/346O0nD09O8zp2dQupPniSaf/XV4lWfPi2DQD/fZV6UE7ZAi6hnxeBHjFIz6JQPGvhjcwIStjg9967yILPs8j80HATcZWvn5yUHHacBJDfPzvK4aBwGWUhvnqytFZ/McPSkN8Ml2vIMuioFGOnIkyE0J6wPVAqjLIHOkJsnwIRnBUEzdJ2Djqj51tciIMKRRdTcde8N3yNRzAkpKOoJyD5nOB/31vqgOiIkkADtdSC9n4xz6hSOaxIZyV+MJ3Z/zPEiOPD/NCL+l/f742Aw+NWI+NWIiNdee+0FfN0LHB09ePh6U/SEJOQJNXjv9HSiKN32hFtkhsJV1YUm1Px8Gr2uNOH5oW5J047KGVgLuWf8FR/oxcIx9JJp5fQ9WeUzvYcEvlGmHmJcWcnzW1yM+CN/pCoayejoryNqkt+4UbIu9xzNhNZYX69y5q2t/KfrosXYuyZG1GLsSUj/8PvT0+lYcL0fNCB/1AbN/NlZ7ezDATOMDAQNMGPfZag2iO5KGlGYKG59vVAiGWPfgNj3k69qYUARwWGg7HwHtZKaAM9maqpyJOOtHhgrBqYbOqCmG0kJVuvIfPY8rCfPfbzRFaPWpXsMKWeBs+9JSO87O0vH/81vVhuI27dT1aOLIydgLUHbxAaDQT17gIM6iBHnKNgB6Nq66Q6qf9f4fXTNLwCJ/1gGfDAY/LWIOI+I//H93jMcDn8tIn4tIuKrX/3qiyN/XtSQEOw8OENERnZ6mhMBh0x7ig/DeUPosuvdQEaMIhsGAvIXWkKQfScTyI2RMKmpPXoYqfLOeyFZ0UBHfYyfvRhNQosFd4l71+XNxgiudXGxqIXFxVxMFxd5z6AqdA9aoEvQHj0qrT31iCgFesWxuw9d8hkxinJUvAp/8cSPH//wuYBCgUSdoyiIc+4RlmIn9x8/a950Q4yf7xQAqSntcUftu7vV151D9kzlYRTudFUNoySac6xeBOR9nPPZWc7h69cr4gMOIipR2fXMkHCnb/ocRzeMywJFol2f7Vw7fYKbtkZ6qwHndHyckdy//JcR3/52RRpf/3q2J/jX/rW8N6hBkaXn1Dn27hTM1S4v7POrOySRgfcz4ug1oGjc7ryA8ZEN+GAw+MuRyc0/Nxy+QFb+4x6SXOP8pElmUdy7l8jw8LAmvjBScqt3BYQwKU6Ew703hwSK79LOtG8B1VF/l1b5bM+id60suR7E3/snQ0Q9Qel7etYcfYQT7YhKObt7hqoYDstgare7spLHoNmWRCWv8wzce3y2a9zdLXWOxXN6msezsEnzLPrV1XxewuMPM958sxRFqAn3TMIOtdGLo/xMeRBRSeuImis9T+G4DH8vnIkoNEhX777v7lYiVJHXcJjI8wtfyO9wT8/OKsrpnK9o0vH1yPF8zQn9UVCLnU5zvhCrOUmBQW3DyUlQMvgoEsdbXy9aiZLn1q26dx2xcrAczN5e7rOqgliLiD/4g/z9X//XC7h0qaK8j3vTr926ljuwTl2jSNA98Ax7MjPi+VTJC6JPIj6iAR8MBv9OZNLy3xgOhz+kUfNLMPpi6nI6ScM+FhdLqtX5RCqUmZlEU0+f5oRBHzx7lmXrvZtdDwMjStEBYXeecPyBo1giKgLQKAg3N653haogQ/KuiFqMvSiFkaSYiRitEu3OzQTWdIkBgeYUXtjirSdqhfOaL52eVhHU5WVdHwMRUY4ioqIEKpDeeOn69Xzv97+f37m2VlvdjY9f+qVSUnT9L6TWjbHRpYvdkUWUYZdEY9i7hr1vw/f4cT0HSTiyRAaR8ZibK+NzcpLzTIQTkYgUnadlwfx81RB4r2QrikYSO6IKiswlxVYMJzqD9JYqi+MxT7sG2xy33vw+HCZQgK456d4ytoMs9wfa1kKg7wBlLTx9msbd7lW4dkVWnjMwwwh7H+QNrPT3dGVLV570nFqXL3bU/jHKCP9eRPxiRNwcDAbvRsRfj1SdzEfE/z3Ih/VPh8Phf/5CzujjHJCVSWUyQEqMBlSA55Xo7B3oGOfFxZKX6R1ycZE0we3bFWKafBZ8D7W61tXvHnqXdTHiQsuOVtAqEBTDhqfEiXeuklqCoYTgTFYOAf0CEaMdLDjH4CwoSex0s7mZhnxmJhOY77xThl5f8Ih6Js7jxo28hxAyTh+qo2mG/pwjjvbsrDavmJ5OA3n3bvVZce840V4x6dolEClFIhIN96R152o5Bc+2F7qcn9eelY7nfaureT03b1YF8Olp/g4hmp+Sq1tbo1W5e3vVf35joxLn+/v5+4MHVU3bZaZkpNvbdW8Mz348YjXHRK+dE0ZHmLed5+5gSeEVMMHxQN+dzmEc0SGii46ovRcw296uiINCqMtjUUydw0Y7jvPb/fwZbq8x7J377nmB8WjrxxgfRoXyK895+b97Id/+SQ6o0GCce6iJP1RduLlZSIDqg0FjRHtVF4NAX43HNYmFjz1866gH5dLDbryfydTRsBAxYtQZcCwMpAUp5JfAgb6EjouLxb0eHVVhCiRisaMbNjYq5IeIe/n4O+/k8RkdSTGNmYTOvRiKUYPW9veTkmEkVbBGVGGQxSnxKfG7slIFTYNBxM/8TH52erqcGlqDQXNPhc+QH6S1t1fzxusKoSJKt390VIVRNvk4Pk50aDNrmxVzQp4dI2euiFi6PO/4uJwVzb3n8/RpqaciEu2jX6amSo2ysVH0w9JSRaA4Z2i702mQZ++3DTFHlHHtLXl7UZD5BrS4x6g/lEcHGRyXtbW8nPfv29/O5yQiWFxMgMBIA1wkoyhL59A564iKunxvlwSOI2iG+Xnjh/3txxwvQoXycg4IDXcKtUEQ8/MV4qp6Y1AlrjqnvL1dShNacdyhhBDHABFY7H1RQza4264zhaAZPhPcpO+cOikUR9R3pGHEJCUhJCG6RSAxyHBQloyHnF5zfsNhlXpPTdXuP/7GWHz722UMLi6KrxU1cHYcRkT+rq2ua56fT3QvQcdxSoi6V8JxDpfxiijD0sN6i/rwsJJg/kbFsb1dkYJNAiAsVJv5xliQS0LunK6oQlL8zp3RKIKjuH8/jTQVlCTlwUHRKyLAzsv3RKOopSfbGHjzczisaKg3y/IMxzlhRtV3Qeueh3ns/eZyREW++PIOcMZzCRQv7teNGxF/+k/nex4/rshuZSWpy8XFWgvWHfqzAxjN4jwnyNp8FwUDaM7pJ2ScP8z46TXgJgMUMD1dKhJogFqE8VxezkXHwOA/e3m6hw1RM2zQy/R0GapuoKAZ0jQ8sr9DXThVDaH8TQjfue0e2glPJRAt0IWF6vUCtan+7JK9hYWKREzwHkYqtumJqpWVNHIM6fx8HmNrK6tW19dHnQOO0ne6/tu389hQKg50ZqYoBteDf2RQFA1BhnZ1p8XufcHxzAy10P7aterjYcHafPnGjeL97bREteNauuHXOEoEQ7LHgdy6NeqEUB5Q9WBQ9Qa6LDJqBwe11+b5ed5rEQVa7PKyIphuEOnTOTj3c7y/h4jieXQByszPEYW+3QcOQcTCOSouOjzM+dV7oHMwXdooAlPR+7nPpcPb3MycE0WNNc4AA2DoRdFYdzQEBcBdn+vyMt2ZfYLjp9eAm3QmQg9HhZEUHhAt7pRRjqjE5vJyGqajo1zUkIW/C9OFiWSEpGTdMPRwV2IUqo1I7tIChpbsot6NriZYPfRl1LpksBcl+Ky8gEKNiNpYwntU8HX+b3W1DMy3vlV8pYZdU1NpZHpE0FEg7pdhoQWnZFldrSIllZDLyxWxeE6Ska++mgtO4Y/NFiBkhkNTLc6r6+Wnp+vZU8fgttfXC92Pl6+733IFPQEIgXYZKEWFPUwfP87zkJvBbUvI+Yz7f3Awqnfn4HtbCNyyPV9JZRcW0gBCmD3BDf13Iyoiihh977gs7+rVUbQqeeu9XbGB5jMvOprvCcLe2XFqqjYEkbj80pcqUbmzU/Ri13JTpVAVAVsiFiDFvWUHurTyBapJPur46TXgEJ6JYrKgDCJGvTXDbcFJUCrqmJlJw81gSU7hL01CnB2drjAST6sacHe3emcI5YW03/xm0Rb0u1rWMiKqOLt8i3ZYhBGR56TrIa5xYSENU19gDFuPWiAUcrMeRfh+XP7hYVE/T5+Wgd7YKPnY+noVJx0eFp115Urej7t369kxRIp2ImqzBoVU+Orbt9MYUmzs7OT3oTw4W0Z3bq4aWzmGkJsBZXQV6dg5R22AnIjnz3i6f5eXpZ/vCXSOQFj/3nvpwPf3M9rgUIfD/Nm8e/KkooWtrXwdsiVx7Q5bt8Kpqfy8PVQZSgab8ccX450lqbthRXd07rgn8xjPvuuT/jj9vd2Id3AQ8YOKDo7JHET/DYd5ndev5/NWlyAaMsc4EpG3WgNOVuTWaSDn9gLVJB91/PQacCqSrtbAv6lOEz4yiBaVRWjhdy349ev50E0cHt5igFwgMpNDcurkJA2ZkPXp05zEmispkrl5s3TnDNzFRerVXZOkn9AWcpCA7Wob6AJffPt2Gf+I6mOOp+8KnZ6971l6iDWiEkt7e8XfRlSCdDxxhd7xHtzu3FzeA9Waw2FFKJeXif57cy5oanW1FDU3blQHxa2tNKS7u+l0e5k7B0+NQE0zNZXn0MN0zqbLQOUajo/rfFFnXXa2uZmfe+212iZtairnjw6P5+c5L1ZX83pwvYNBqStQUQsLo+d1715p6y8vy9E8eJD3tytZRHXOgcoDyOlouif3AJf3U1h4Prhza5BTEQX3+wgRQ70AADRs/XUlFUPu2Iy7ZxxRuQlrj3PoORKUIkfVhQbe79rHHdXHOH56DXhEoUAIwsO0MITXu7s1Cci5GM2ODKamStnCgA4Go1twWQxQJCSBR6XOQIfgMk0oC2djo0LOg4OqetzaKjpgfb14PyXdd+8WqtnfL8QekYZB0yjIG3qG4iKKeuid7TgctIPEqWtZWqqSeyX4V64UTx5R3DSD2SsH5+dTPfD0aaGszc38TjSVUHhpqbhVyFKSmZG/dq2elairRzR2r9nYyPsCLW5vl7Hi2EVU+GZ9aw4P09C6D4zMlSu1BVqXW2qjAOFD4+4pBdHjx5XMXV8vZIujF/67Bvf02bOKEBhA9MPpaX2eEzJHoVYGXKQFjDC4EWX03M8uae3KKOurc9TmSi97F530AhwovNOewEQ33n2d2zylc/q3bpUkVR4HCndPx7+Pw+GAXWe/7o9x/HQb8IjiECVuLDAPdGGhFr4kF25NX+yIUndAC+vrhXp9zkJVlg/hWhBCR8eBPiB3BsKkZij1f7h/v0JBhkTod3ycn9ndTQQqoWria2gFfUFVFxdpxNwXKPq110bDWChacYl8gO9GtYw7s9XVur779ysx3Eu5qQbIJxWlUM8whKpeScZ6EQwDw1Axos4fcpe8jMjvWVsr5I5L57x8XsJraamKpeRXXBt6C30Aifvu3kxpaalAA1352Vmen801fM/cXNFIeuww0HNzRRMNh2mwRC0ARu+tg7Mm7ew1CZx016t3xU1E/e8eQu8MNUPbZYSMIsqtf581AxxA3BE/iPR7zYT7Pn5uXbFFyXJ4WF0GOU3igvHvknjtEUN3Fp+AImViwBm4jg767z2k6h63G1aSva6DnZmpzQTQML04hwFGM0AfDMXFRe1erxc5GoKx1/MiIl97+rQ2QxiXL0ZUQsxCcr1Qd8Qomiah6+XjjFMPrd1HDoImeXq6KBftZ20HRjnDoUhAiQqocdBbnBnD2FETfl5i9/vfr8rXXsGH4qJZnprKxJ3iF+hqe3uU9lKodeVKlqvr+724WMZ7eTn/vrZWSJGh5/wlJkV9cg/6tfQkNwNuX0eJVhFMd+pra6P8OZ5bG4Te62d3N19HiaFOIGrOjZE3P+UiRIW9Srkj0PPzBBSQsKhS5AcgdJksR9groaFrlEqvzeh1Ap1qY1w5CzkI87prvKFo0l8ApOvCzQfzJ6KcYad4esTxMY+JAe9JGDz0eDgYURMa6qMN5+3x0RcXpXs2EXd2qrjEgrSAyMMY1PGwGHKLqAVuMkO1ji08Nsl6ohKSM/EUnfQwViLSPaAlRpN0yaWWtY4fUSjZxgE9mYSntb0XvlIlJYUAyuD8vPbDpCARmXR5JLqhRxE7O2lE7t3L41tg0CXkST63tlZO6/Q0P6fa8vQ0nUFHqFr2SopFpGNYXc1/U1M5B6DGZ8/KGHjeojAGlu59c7MSy/IXd+6kgVeHEFEaZpSaRlxXrpSGvCf3KHtQCRw8xNsRMuNGodJ5aHOjK20ADzSd+WR+UlCpIu269K78MTdVqzL02gGjHhnliHqWgIRrQN+gRMeNbZ9DHKp1zkmY210R05OqHXlPOPBPcHigDFgfHlxE8Yl9Q9OLi+JScc8WK/TV6RcyKMaVhycnvHWrjND+fnUD3NqqsBSil/CDdq5fr+8YDqvZE0N7dlY7yUCy1Czj98Dx9SDvSSbX3ictdIRiishj28CCcbtzp+SQjq+bnfvyzjvFt29tRfz+79cz+MpXkr5B55DAMc6cACne/n61XWXkOOpe3BNRRoOBG+e0+64zaKjhML/r3r3KqYgYbCztHn/vexklafZ1fFzbpaGqbt6saOLJk3LU5JKrq+kQ9vbqOfYdkew/2htgieZ6r++pqTwO1Y9qYW2RGWdInrPpiqNuwDhdtQp9HUjYU12JqobDMuaO0SNezkO9AJ06A2+NmX+Mbk8s9jXue6Bw5wcsdcNsfYzXdPSkq2Nwop+AImViwI1xTWdPwIyjT5PYgkAlyJJbLPqA9Ey3EBJCgQKF5Io5FhZKwaCE32tHR2kYhd7Oj5qBsbUTkOKNtbVSV5jcJi5E2sNACPnRo4oG0DeQdEf7HcmrxLx2rYoyOBnJYGiRsSYDPD/P1//ZPxs13hERv/u7+bn790tN4llwOpA6vrbrwhVuKAqCvuwcpGUCg9EbXClEmpnJ68AF37xZ97LLUyNKg/zoUWmv33svDb7ioN3ddErn56MNlx48GK0OHgyqGReqgRMSvZEErq0VJSHxLsksNyGaPDioPMfWVs1vRUkLC+WEIooLZuj6VoAipohC/J0GMz86ldGTnJwwiovUr9N6ruHKlTLgFCHeC1H3Nf3DaI7nGeBxdZW5z1GwD/I0EwT+CQ4Pt2eVI0az5B4Q4ycR4nOKZxhDi8GEjKgFHlF8OdpDGIsDv3Yt4q230tjgXe1sr4c3fhSy8roJhROXLNS7o1e7uQ7nwLk4N5phemZI1aSOKMXO6WmVgENOuPylpVI94IwZPaiKHG5rK6/9eePb307jBtWr4lxcrJ4qogsca+fynV/EaNKqFzt55oyVzSxUN16/XnJF8k8oldGGZHHIjOzubvHSFA+9oMpcwkX39gu92ZSE5tpaFeJQ76DZ9FRnPN97r7hxLSA8IxQSUMLw6gfkWrucj0KpFwABCPIOETmHGFWgwXVaD53yA3Jw0xyG+0Au2ZUxvUlbxKixjfhBkPZhbEJH5uNJ0U7nfQLGO2JiwGt0eqDTJl2qJDTzu0naM/L4UdWRCwtVODI9XbuiC5txs4wGNH9xke/VQW5ch/vwYVYZmkiSgDhFyAwa7zva2FvRQusTtSdkJLZWVkYTXl02BqH2MmuSONfTd1BnsG2AIUkGQUkw4o9x6d0gkm/afebq1USmp6fVmxrfyTE5DnQsIvA8r1+vLcJWVvJ43kPnf3lZ6LZvoRZRtEJHiNeulZEl91QUI3EG5aN+HIvu/PIyjR999uJiPnvUmGt0zK5/PziohmJoANr1DkY001pfr2fdn2lPEjO8FxeF5IfDNPAaYNnsg/xTq1pzVZLX3OzSRRw4J8Q5RtTrEv1dmQWI9DVrHvTn/KPSHOPIvHPrPcn5CY2JATe6EYNE+4PvJcIMAsPZCz4kHU2szh0r3DERr17N7HzviRJRjbLOznLBbm+X7hnKp1q4e3dUPSMhKLw/Pa0ugmRS3fH43m6AjF5h15FN/xuj38vCGd47d6qAgnFyLK1Mh8PScjNyElP4dE5NPuDevaw8vXu31DyoEpQB1D81VXI6peP6hXi2EojT02lsNjdrk4Pz8+LQoXmbGvd+MeM9qjkYqhMVlOfned7mFxSunSqUChD0PiqLi6NdEs03YIJE8+QkDbccSOeCqYKWl9PoKslnLLe3y9neulXtkzXGwv/iu0Vj6D7zSzLX/aUAEd2JLCKqORkKBPft983Nckzmoz1YyVbRZ+4fB94TkJC64Xl0GqTfq/ezE59w9WUfEwPeR384DHnXeY8/vPEwStjada/DYRpbaGhra7RYQ6FIz/7rJnftWnKjT58WuoQEHzwoDhRaw9tC/FQIfdJyUrh43KKwdfx+OC9heF+I/XPCW/I40kjOAd8LLaI3uvOAXNfW8rrv3o347nfTeNy9m/dhbi7iy1+OeOONyh/QPCtm+v73KyKR6OU019aqTcEXv1gywK5UsdsNFIorpg4SZeDq3avOfTMKt26VA37jjUKts7NZ7YqzF71NTRWHrEDJHDs5SS6dY2Kc7Ms6HOb82tys5y4ZyElEFAetwEdiXuLVub/7bn721q2KFhjdLl+VTOwae2hddNEVIdZGR+WizojKNagsPTxM6sd9pv4RSZrnXSnmHMcTrcAOyavrAbxEhS/JmBjw543O4+HWJBw98K4oYdwZbh5/OCy+mWRQefvZWYbGik5U/1EyQPFnZxGf/3wm86BG3ebwpBYrxYE+3iKFzudDx4wyxNMr5MZDwk7TdH0uXpWkzO8MBV0t+gXK7dEKjrRrgc/OIl5/PeKXfzniX/yL3NvQfptf+Uq2DkVBiUjQC+fnic71JunVqFeu5HuuXy96oisXoLfeHMm9w9FH/CB/614zTHIHNPvXr+f32KlJKfvNm3k+N29WrxrnYW5J1J2fpxHTq9uc6xtaKJ46ByvcxAAAIABJREFUOKgK2i6946DRWJQ/t25VO1pc+NlZ3vOlpXIoHJl7YHMMCV5zCAp3f9E35oTv93svuDL/BoM8J3UFp6fpXERq7tXCQqlTxkvrzUdRZt+qz2YXEWW09X/pTds+5WNiwJ83xoX6vTjA/xrqW8wRo4ayJzMhJmhCePzsWTWN8j6Ts0uVbt+uSWjiS8gJsyXUIM2IcijQtnaeDHKXInZu73lyyo5k6K87are4LT7IC3fb1QKcHuOk0pHRvHIl+f2bN9MYv/12GdBXXsmNGCzIy8syaFNTxcG6jpWVvM/04CiX69cLMXajwXFzwtQcm5u10Hv04bo7JQUNoswocdAdKyv5TJ3D9nbSQhGjSdDuoCXBV1aqyRllx8ZGRijasYr2RCCSwjduFHrtTc84C31WXC/9OBkiw7eyUsndjrBtrOzY4/SNtdV/Ji2MGJVvRuRzFbG6j853YyOvR4sEDsD6Oz+vqDSiZLiotmfPqoYC9UM1JiJ6CcbEgD9vMAZ+NgkgBKgRNytpaOJBdyohIYgnT3IBmNx6QT98WM2zuqEzcSFn+yxSTiipRkcwTAxV54CF0bTr5+f1uyRRTzB1PXinkyBvv+O+O5/MkDknuxG5X87Je/C8jCEHNjeXi/RP/IlCw+6zzSY40Pn5NDqHh1VtiMLY2alz4YQZma2tUoC4r6gl8sbZ2XQcjHI3fNRC4zQBrllTMOfI6DOWjs8BcKh2lMH9bm6W4sTOQ5yYc5+aSsMm4pAPuHOnNiogn1RAZJ5rosboogOpedAkXb7YOwCqSkX7oA8jir7oiX8US6+WBCh6Z0CRghyAtYZy2dysAiGj707kHvTiutPT6oIph2R+dBrsJRgTA/680SWFPbloYkcUEmb0+kYQ3kNbK5O+s1PGxALiEKhHLGLhoTBd5R4EiF+0IDt6PjkpRwDt98IMn+8a3K7cgD4HgzIUp6eJWuzKQ/pIg23ozc0BSCD2hdE5UlwyR+CektZF/KDq5eAgDbt7//BhJgtdx+FhoTsJMM/BTuc7O3le8g7Cc8adUfL8j49LFsmA0O73IpLO6/s8+eXFRZWZMyyDQW3+LAknKUuRMa7ZtzvQ4WF1znQvJCcV6DiXe/dGIysyQHMRjaJZl2RvV2eZVypqKXv6dTtXn7u8rDnUk5fUSz1nxBn7/eSkmqB5ZtQpmsndvj2672kHGWik7e3RZmTuoy3uHjwokLW6+tLQJxETA/78wVCbcBBU13NHlCGE/iBSk0VhSi+qMDHPz/N/O8p0J9H5WNSKzYCF/TrXKV9eWRlNtPa+EBGjtAlOsmuMqRoYcmoH7/nOd0YX9tlZJuVQIjhQ1+3c8e2MMg2vncM5Efpq97Ub4Rs3qppU86eIutcKi87OSi2hDF7B0/R0dSbc3a2NIU5Pq+2vc5CL0CPGuSo6Ymx1+0OzdX3z3FyF9jTb7kVvJzscjibOSEU5xqOjSngL//Unf++9Qt7Hx5Un0C51aSnnFwWVZ0SOiSrglO7eLac3N1dKD/JHzgXIwDOLrMw7kkP5Cc9VQZXPOXf3z1zA7W9tFfUkOe9909N5vQ8eFF1pDruP+/ulxtnerupmhXKiof396g8jF/GSjIkB/6DBIOJGJTKhKhQKw2jH74hCc5ubWR7ee4wzaqenuch4fUkwi6Hz6gwOw9YTU9qnQnf48R6iOo5FCx2SYaEOejvT6elUdVh4PfGnv4WFHZHXLnLQS0X5PsPTk007O+kcfvM38zxu304FCt7YdSjV7+epAIjKxa41uOfr1/P8dP2j0iFPPDhIWks3Ogv89dcrEUsuurOTx19YKP15zzX0e2ugQ5zr9nZVxHbKRYIclUVDrakWp21H9Z2douOWl2tTCRJVSfHl5VGn05OLz6tUnJvLeyVpStGix717RyWF8uhyW69D+4DQ9nbN7aOjPO7rrxcA6Gouz/zWrTLSjHkvPNJ2wnpShIRC2drKf9/4Rn4fiuf7308H8eqrVeAmL4H6eknGxIA/bzB640bVAu0tJSXpImoBMlAqJh2LIgMnvbSUEwoig6g7N8ho+ruCFioOnCXEKiTu8i7UjhAX8vPP9aiwc/6KUGiZqUYoKSQRIUsICRIVCgu5u+M6Ps4Q9n/6nyL+5/85Df+dO1ki/0f/aMSf+lNpyG/cGDV2ogWRCqQtwuk7qkgm4qwZLjK1nZ18fn1TiYgygufn+Z7d3Xx21CvPntUmyhA1GWLXFlP8dJ0y1KsNrnnESItmPItnz4pXvnq1+qFHFC/fJY366VDSbG3ltdy+XfMHADg6Gk3uUpasr+d1U/Zwmp3acQzXbP705DIDrh+N94nw3n67dOTyRQYnYIOLW7fyfjHgCn98DwBFZosu+e53K9kq/7S8XD1oVlcz8lDINfNymcSX62w/rmEhS8TQrHod8oZGTRgh2fjkttApKVTCQUUUIhbRykp9T0/8aFo0GJRhxpELCaH3iFFesRfqOM++bVlXlvicJl3z89WUSfJRkm1cf0vj3hOZ3SlxJHt7Ef/b/xbxN/9m3fe9vSyT39tLpMmhrawUMmTY8PTu//Z2nZ89PyFJSgfVipzMo0fFz7qn09NJTUj6PXlSCLknB/VOYUg8az08Ispo4cztddqpC82fIFnzRl6lG0fvv3q16Btz8Pw8jdHKSiU5/c7Jea6cmapgnPrlZRr8nZ1KgjNomo71ja1FOp2a0WSqg5Dd3aJxREmAiDYAZ2eVgLcGAQ6gRFUntU2PiK1NrSXefjvngf4zvTkWJK5dr4rpiIkBf+lH57GhVaiv98qIKIOo+15HBQpsBoMyLhGlPIDahHASeByFxczA+rlzyuNJVcZeiMugXlwU5YM/hNZdr2tgfCBIEx+vLQl6cZGJMUoa+YD19UIyuOilpVElwuVlovp/+A+f/wz++T9PVPT5zxeC3d7O10j4qEhQB1evVjRy+3Y+L05GwlgyFNruzcFssAD5yWlIsklSoxAgUpWK5gRjzpBD8aI4CiVctgIt0lNzjkG0yQSUjAe3XRtHJeK5d6/44U6l2cFeqwJO37UdH1fflNnZpPwuLpKCmp7O5yWK0LKhq5G6sgiAoKwxfzjfXmhjnckTdMrQunAMRl0SfVznbT1sbGTkojJXJa01IJohAgBwnid7/JSPiQHvoyf1TE6TpxcadMWHxcFwMgwWjuTZcJhcngWvvanjQwCkUXZvUQDEKUiAbmykYXj0qDS4X/5yIXqcNClcRCUYGYOIUdTeES6UyxDcvp3XsL9f/KFIgXFDl0SUJIthgcbcw5OTiN/+7fd/Ft/8Zn3u4qKKLu7dK66TYkAYLmHctb4R+TvFCaqi7wHpWiUyHzyo/h+OPTWVr0G+GoNRZpgHeG+GSmJUGTwDj25TwCQagjbn5oqTh87NuWfP6pzNTdGHa++UmHNnUBW0rKwUpaJcHsp2TsCHKLS3PzAnB4OSbsqbiEQ5HdLbiNKxU1uZJ71fDopOj5Wus3dNol7PfmurZKSnp1WItLVV/XN06Lxxo/Ij9PIv4ZgY8D56Ge54PwYFDhACA4kTJgXE1/nZQhG2UpAw8l63c4meEkrPRQCSWGSC3/texO/9XvLGa2sZ6q+vR/zSL1Xhj4nd0fThYUm4OKOIasTPyH3/+4VYOw9KK31ykqiMIkEYe+dOZf9xsaIFYbr7y9g/bzD8jx6VTI86ou/AIhTG//ae3Tdv5r3vdIXFbMOFa9eqA6KyegbFPdzayuOhIFTK4tl7cleyFO/NeGkDq+hnaytL1W26fPduni8nNz2d9/Lb367IrrezpeIALHSphPj9XTtiBvDx48qnkOednyfinp3N158+LQdjMw1ti/WqIUlVBBNRxlUUqs2sQrP19VFarstZOVWSU79bg2glKi4RppzBwkLO2fX1uieKkHoOZWEhwcft21W5+5IU7TxvTAx4RE2kXpUH7eDWoNQeHjK80ITkjGTa/fuFBEmdbtwopMUYn5/nxBPC9r7ckBc0hft+663RnisM/re/XeXQJmdP8FABcAx7e5VAevIkr8cihMTn5gpVrq4WKpqezkWyuVkVfsNh0Rm9SRdE5t/9+xE/+7OpEHjekKBz/igcIT7+dWWl6KMbNyqZq8gkYjSiUMW3uTmKzBgZaI/SSEUj3j4iDS2DEFEctHsaUYgcDw4YbG3ltfzmb+a1T09ntelbb2WbgM9/vtD01FT+LvyXJIacp6erPYA6AREkqWanzKBwlNHFRfZc/43fSGdy505KQz/3uVLfkIK+9VZFk7OzeQ0HB/kcPW/fbX2gasxh80yVJPqLkZYvkftQNdyLyyJG0b0o57vfrc6LR0dJw33/+6Xyun8/r+3VVzPCImFFc76kY2LAu857XJ3RM9wWay+KoQ/ufLQFg6NlqPWMgPJNQgU+GutcXJQ6QnITOsTfWigrKzXZhd402rY308hJuA2p4k1xzBI90A/E12mkvrlB74WOirDY0EeSV50zheIGg4h//9+P+Bt/4wefyfx8xJe+VNetIEMf793d6mAH4dl4GlcKKTIOSugPD9OAizgg0K6R1+MaZTI9XbvS9F4gIjKfNZ86r767W9WRDPnXvhbx9a9XhLK5WVr+V1+tayGXG+fi8cMaaeFtqVE6qhQZiBYky9fXU7759/9+fs+dO2nwvvnNiH/lX0nHoce6OXrrVlUxdpWUKI/aw73m9PXF53DNL5Tb1au1ZdzGRkVNEaWgQqcBHe+9Vwqvs7PqMHl0lJt+fOc7+b3my3BYfe37vUNpvaRjYsAtlK73xsP1CkG83rjSw2YBpGEmPHSBJ++l4rTKkLsEk0WG/9vcrP0YJZ0gHHtr6poH2dy8WehM1VzXXdPT4nxRBhKF6+u1QQLFDCN182aV80tGCu/1yGAU8b29MRAHAR3/+T+f0ck/+AfVQfHGjYh/9V+N+At/Ib9ndzc/y1lBjrYXEz73SMP3RFRYT/3AKfSiH53oSATtpalBFJ59dTXf//RpPpfeegBtRPWhOEhEtr+fz3NpKT+/uFgNtzwvht75o4q2top66HkBc9E87oogEsFOoclZ2Hj5X/yLvPY7d/L4Dx5EfOtbmZv44hcL3HjW3RFKVOuZYp4DJwAOGSHnzrlwUpL6PtOT96ga1a448W9+s4QF29ulLHn6NKtyf+/3MppU2To3l9TRnTsRP/dzReP5+0uWuOxjYsAt+ogfLK6wIDzknpzxmZ68hITxxiY8Y8ZAQKjr62Xw0SPogKtXy7nculULKaJe+93fLarCjj2oB4goolC8BBaN7JMntdfjs2cZJpPP7e3l36emqm8HhNv1vYNBGhhhuwgEykHFWKBdmfAzPxPxH//Hifh+//fzPty5E/Fn/kyGvKenibRECJzH7dtVNSf5ZPFD2egvEUsv7Wbkrl3La1tfr2jKZg0kgtqaMqD4dEqQ4+M0enqI21zC84AeO+rj7CFR6g3gQaJ1e7t47oii3iQVzRkyOPOyR43mHWMlaasU/8GD4omnpxN5f+97NQ+/973aSWl3N/+3FR2jjMJDZ3CQVEiKpFzH/fvl1Cg/OGr5JLmV/f2IP/iDah53cjLq6CRkt7fz8++8k/ORDLZX3CrsAbRw9xMD/hKPXgQAMfdEZi/o8f5eEWjzgX6MrmWFGBnOy8uarKSFvc83PnVmpnhNx2eUFGt85StpeM/OMgn2xS8mgu00kOM5ztRULkqFHwcHyR/u71f423ty7O1lxdy9e0WnrK+X03n0KJGPzXhxsl0JgypCNUVUIdIbb6TR/qVfGi20gfgci5G5ebMSap6R7+CA0SiMusjFjjs0xJQh+qNEVHQgypLom53NxT8zkwZIAdBbb+Vxr16tqtJevn56Wtro5eW8d/PzeR4iAAVfP/uzJUvl9DkbDrjTFpqX9cIf1b+uQd9swEMU9fhx6dypoU5OUj9961ZJL6Ht4TCN5OJi5Q6WlnJe9E0d6L9tDCFJy2FeXOTf9NTRiZLskihAr5NvfSvPQTTx9a8XxQLBS7qLZtxXxVLmPf16T1r2/jAv4ZgY8N6sp6Pq9+PGOn0Cjcl6S/aZ/BIxih6E4/THvleBgkQOjeqNG4XCleGjBtbWcqExVDYs9jtnEVGGYG4u0SHjxdlsbCTSxfe6tuvX8xzu3KljQqqPH2cSbmMjF86zZxm+/vzPF1+MYujqnd5hkdKk6297Ob/E6MbGaO+No6PR3ieMPipFpDEupyQj07Xw8eN8z4MH+ZmnT8tYqpKcmsrX19fzvLQw7TsbMaD6cD98WHI6x9U868aN5LnxspKq9+4ldWRudBqOogN1hLJYWirDBjFTzmir0CkJz1vC/Rd/MeLv/t28titXMgE+GET8wi+UE8fBS7g/fZrfqYePyBDN43pUuqKCSGnReow1x4He6qqrb32r0L0q1jt38vO2mTs4qMgMIqcnB8gePUqg8PnP5+dFg30tv6TjAw34YDD42xHx70bE0+Fw+JU/fG0tIv6XiHg9It6KiP9oOBxu/eRO8yc4TGqoulcrfpjPMthQckQtzKmp2lQAj6po4uKiFnlPVs3O1lZWXhdWk/NpZRpRiEyvFBK4Ho4zMBFl2LxHmH3jRjkgioFr13LSU9JA5Ts7afwgUoqQzc1KJvUm+0qpoWA7yTPqfUeciOKzLX65iaOj0sQzAO4LtMWQ9epIaB+6trGvku9nz5KWmZpK40PGKbnYC4CePMnzf/AgUd/aWhWIoA+ePKkqQAoSPO78fFJHq6vlmNbW0rBoNyBHIuISvZBxmnccXG+dwIh3ukrCGa3mPP7oH434lV+J+PVfz+f22mv5ml40HJh5grK6di3PlbMzn/f2Khmv5oDq6fKyDPH5eX5+c3NU2dX5bjLG3m3z/Dyjn9/7vXKgT57kd1+/no5zejrij/2xfDbkk6+8EvHmmzmXu1N8ydF3xIdD4H8nIv5WRPzd9tpfiYh/OBwO/+ZgMPgrf/j7f/XiT+9jGj+OJ2Y8I8qAj2tP/U53CtkwKJrlR4xO8E4PWFCUF73EOCL/JtnI4EiCSZBq2vP0aZWga2+L49bzxO4sQnrFGgzAw4e1sBcWSmmiDJvD2Nkpo07qaHOBXpov9HZPqWTQKNBd10r3vi7OpVf59fd4xpK4utxtbaURePy48hEUDt/8ZrU/uH8/UbLoym4xvYpQ8nZnp57vzk5+N1oDt/vKK4kKe2TlX1dd6IWCUuNozs9Hk3TuH1VOByaoEHNHREQW+pf+UskOoWPJXry3BDyD535JdDLkzkPSFtes6OnKlXyGwIrjqqp0XtaLyKnP81deKUXU6mopqeQBcPs3b+ZnGHBb2K2uFnh6yccHGvDhcPgbg8Hg9bGX/72I+MU//Pl/iIj/L15GA96pBgvxR01odBmiyUNG9vDh6Mays7O1PyaaBN/H+EPcNLGQUz/fs7MqAOpGFF0gJKUY8E/ZtnATir28rIY+W1v5na+8knrgiFpQ/u3sJMJU4be3Vwkp6B1FRA7pvC2yXnotySkCwuMKt/HavS8HCqI3gMKHOqaoZ2qqnokkFqOv0dbTp5WIffQotdGqB5Xk/6k/lfJGCUYtT8kqGa1bt8pgR1SBjeOdnVWvl7ffrt7a5gXkS/IH+fZjRpTxZtAvLkaTi+6l+yo5Sq737Fk6L0UvFEtaJNgHE2UmJyMh2ROxHObjx+mwSfzM3ZOTnPvWzGCQ92BzswAJRwuE3LhRSexr1yqR/Cf/ZH73s2cZuUgaDwapZ1fHcOdOUWSKdyh/XrK2se83PioHfmc4HD6KiBgOh48Gg8Ht93vjYDD41Yj41YiI11577SN+3Y858MKMZEQhE5OP7EkBw4cduLSIQl97e6mqEPZLYuIsV1YKBZiwjABVhaSV82aMO1plDFEG6AOSRYsK2pF1f/312s7NhrhLS2k8HjwoOSLNbU9AcirT08mZ6iuyupr/FhdTCfD664VMqW465+++4Y7Hqx/x11Aa+oYhVPbNcOlrEVEG8/S0FrYy+SdP6jmgGBRRKdb52teqJbCxvx/x//6/1Q5WQnBtrZ4HR9jL7K9fz3N7++28N2guxkckRK8vV4GrVgyFRnAvzJMeXQASIjDzkiM9OkpHpeXx7m4lXtE8s7N5j8gmqTmUyaMGVWzqS4I+URF8cpKfefYsv99mGT26Gg4rAhkMSmnSe9V88YvlWO7cyd+pSpaW8vuoSqanq7CO415cTOOt86Bk+0vWNvb9xk88iTkcDn8tIn4tIuKrX/3q8APe/pM4gUooQrNQC2PY1RGQxY9yfIkX4efeXjmDiCpf1hpTS0ytPCMqCdedTC8aMuEhd82QdFZjrNfXC4H0xB9Dil9cWSkj/e1vJ4rBJapGhFoVXDACKBKJLdw0eZlezNQFEGxH2ML7g4P8LK09gw5RC3N95vCwrlEPGgqQra08/94XnMzM905NJcI+OEhEtrFRRTVnZ2nM3nnn/Z+1nh96Tq+tpRNEWY1XnA6HaWREVLu7lXB2zyJKhYFimZmpzZB7O1qIVpGX86K26VLTiPwdR+18bBS8u5vXu71difZHj/K5P3qUfLi6AluPPX48mnzWo/vZs9EEuu9cWSmuHE9/cFDRLspHDx9Od2urIrE7dxJY3L9fiiLboQE+T5/muTHQ1s6dO/ls33gj33/lykuv/e7joxrwJ4PB4N4fou97EfH0RZ7UCx0MgjCTLluzKJOuG8sPGlAvhQWEDKnu7JSiQ7GMisLZ2VzAFrTEDY0wOVhPRPWdyRlMO9RAo1ev1tZROEFSL2F012pbyA8fVohOFvfmm2Uwybw6v3txkYticbFCXJEGKRkZXO/PfP16nYskFBmmZK7r9Hlo//BwtA+4BC9DcXaW148aonNHHXFKdMFnZ+m4bLLgmT558sOfPcSsLcGNG3ldJye1g40qTs9EFSmqSG7hyZP62XxBM/Xt3MwNXHDvPz4+L6F099lz6TmSi4uSMfak9ttv52vaPbz1Vj7Pz38+j88Ay5n4jr4/J1ChadvRUVV0AgLoxMGgCsW2t2veouNEJd/7Xn7+/v3S+Pfkv+udmspnqfWxObqyUj9/BnjvPj6qAf8/I+IvR8Tf/MP///4LO6MXPSw0C51RoA8lsYsoPryjoYhR5BhRaB7XKORTKNHpGrwz1AJBCystBM7DOfoeqNukN1lV/tHIoggsgi7hg3YtfvfFRrkWA0P75EmhnevXq89ERB734cNc5KurtelA3+2FzlwYvLxcuunr18toMUD42fHybI7RfSbVtOmA921t5TPQRqA3ZNJfRkUttK81AO2wdqd6rjxvrKwkitMFsSdU7WvJWb/zTioh5ufTeHhO8gTC+4jazZ1zZcCpOAwO7v1UUmRzzql3LAQ40HJyNb6XEb5xowqZUHxoKM6wS2YVOnHWIgAcekfM165VGb5iNw5eu9+33hotolMQ9+RJcvMUS+bi1FQCIgVolF706+SWL2Gr2A8zPoyM8O9FJixvDgaDdyPir0ca7v91MBj8ZxHx/Yj4D3+SJ/ljDRVruEGoN6KMO3RCs834mYAMiYnXDa5F1bnmW7dSw4orpEt99dWS9OGjqQJoxKlPFGFAVa6FsZPQifjBndINCHppaVQnK6FIGshxLS3lYtjdrfN7551Csw8fFtXS9eAd2UNHvde0qMP916JWUg6PSapHW87oaBkgaRtRGwKQ60GAdmFnwDjq3d3qaCgCiihEiHv9YR0Sv/jFMlKSyDdulFPGeUfk/1Q7zi2iruvwMOfD9HTRTPp1c8z4beXqIoUflqvxGfPRXCYdpG+37RoEzLGh/ewUNTU1mn+h/hChcWb/8l/WzkVTU9WL5/y8Go5RUTH4Su1FP2olVHV2GSlkjgrCo/cNOTRx8wy19iVf/Axw3uPjw6hQfuV9/vTnXvC5vPgh4dU3GYgYzdYbqrpmxm7JkydpUGiYSdBwuhGFiKC63iMZ16jPBwN8/Xp+7tmzSiDa+Vz2HO2BA5dU7IkkqgSTvVe8Kee2L6J8wHvvlbFlkNfWKlKZmqok5+FhIup/8A9ykRr370f86T9dSGxjo0rnFxcrqdWpExwwLpsxhOg4svFCpH5e0OBwmDytDoGMxcVFUiMQ4NFRtS+IqEKOmZlKXJ6eVgm2ROv7zSc6dDLI995LA7WyksodSVHPAbLXevXZs9RZ375dRk4TKjTf+no5ZPTacFj7f3K647ka8wWN5e99c4+IUu5cXo4aafTY3bt5nyLyvrz2WlVTisoY74hUfkTUZtM7O6WKshZQNdaNfJTEo97gEpTWpPW4svKDBlsEJvLEh3fV12coYfm88dmsxFS5pnpO4iSiwncSLCFZ3wrLot7YSC0wxLa5mQb9jTcqweL7qCciCnE6D83yTUz9wYWwUI9JLImHivFvZqYKJbqcDP1B0y0cpSpQ8QaBu16JuIhyNgqCSPy++92I/+P/yIgioiKP997Llqi/+Iv5+5tvloyPs1NkI2kllN/cLO5fmIsDFfIq5OB4oFLRw+Zmbch8eZnGVx5jdzeP9fhxqXlEIlevVu/swSANau8Tfv16tUp1zq5DNKBHuJwKpOt85ubSubz9dhknJeJ4exSRPiyKckgbLy9TxulZovNI+8bzJOah43KYEo0S0Ds7NZdIQrUHWFkpbbVo5tq1LDxiiAECFZCPH+d9uXevGnhxaBGjZfbmJNWK+W9uKvJ6/Likq6Jg9RER5dxFLSJTPXPQVY49rkD7DI3PngFnNIXM9L4WwuVlPlxZe0PYGVGocHOzaAwIlxHtjZT0pZDUIXnr9EqvXDOh7CoeUVTJ7GyhDFpujkXFI4rFruW90rIn+fQmocLBd/eE0+JiGTFtPV2TZBTjzemRKr7zzuiuOK6JnE5BB+NsVxzyQPeYEdeOFNKk5tnZqbyBIiPUFFTWt0XjbPuGD8fHaRwk1BTCUNuIkKhh1tdH+3KYW8NhbRSMx97ZKV2y6EJksLBQyFYNAK2M3BLXAAAgAElEQVS/40rSduWQCMazZsC9j2PGTbsXcgioCbRexGhTK07d7kPQvmpUyccvfCG5fk7j8DCvk1MxH50XTj0iPydPsL5ekazaAxRf72bo/og4IjIaWFoa1bL3Jl/mo+el2tk5dqnvZ2x89gz48XEhKvyZZKUQ1aRj1LvkypAA6+0+Iwol40A7jeG4/ZiMOmciETkYVHITx2hyW2B94kHyMvk9S39ykguFsoXypNMTeEeGfne3dLeQW+cbVYl2SokD6Nzr06fZVEuFJTTWJYS6G0JHkmSSq5Qg5HLdQIsahMuQM0f9rW8VunYvX3mljOH16+k4PGt0AtUKowOlrqxUb26OBZePI+dYIyoxvLSUkjyo0Hf1LdW6EdnZqRYAfa50FI0PFo2gIPb2SnbX1SW97D2inLVNOvD9qhblFfx9MEj9/uc+VyjeM97bq3nufxEbrp/z0wvFPprHx4nwHzzI75KXsDaJAqiEOMb790dfsz6dtzXH+csLQOOiJrLhz+B4+Q14L9K5uEh0YIEKq2/dKkNKhRExWoADaUEyEYnYJBkhhsPDRFImCxT+fsME69IuhmttrULonswh48IL4wupCHSvM2ltBcbJ9DJrCAR3e3qamf5nzyoMnZrKxSUxiNvc2ir5Vd8Rpw+Im+YXdQLVCo85JWiJM5HItDgvLpIfxp8fHpaTlIS7vEwqC40yHJYSpu/Qo/KOvh1thELznYyixBwnbRd7zpOx6Vw5x/X22yXHk5gkIVxZGd1v032PKIMqZ8JQiWxQBPqkMPA9MosoY90lrtRR7vPVq4WC9RwRHULtnPfiYkU1enebpzdvpmPZ2KiE5tpaKVS2t5N6u3073/vsWWrth8N0EPrdM7IS4p4LsCWC7CopxVc+Iz+kHkAi1fq03iYG/FM4JG0kNEjVeF0TkvKAOuJ5w2eE2yYKtBNRRunu3fz9w2hKOz9pEokE5uYyPNzaKv2sBBEDIZxGJzCKHIdEjYVtAUA+ftbz2+bAaAbJNoVGEp74yRs3Iv7tfzvif//ff/Da3nyzON1XXqkkpWIXieHe99k9g0D7foqUEe6ThHBEyd4k/VT79c1xyRQNfLneG0dH1TTKVmlvvVXIf22tNPbQO6MlQadVqfLtO3fyM/v7owoiVY+Q7/x8GnJl+7dvV0WnOXFwUM63UyP+jqZirDmgjjrRTBo5yStsbBRFIU9BpeOe9/4zjs1IojgkM0n9zB3FO5zIvXtlqLV/3dpKTbkIq/f6wa9Ty9Dbo0Xkl7o6hVIL+udIRI4kuD3K+YyNl9uAoxg8HChJEkgyZm8vF8yNG0UhMKg8OHri/Lwm+82bOTEeP64mPHfuVFb7gwx4dzAoEqF8RFU5Qo3QG8cj4QRF6bXRZVnjFMuVK4WgISLG3P2y4HGEEnacg+tjdP7iX0zD89//93Vtf+bPRHz5y4V0VLjRbEuUQvj6vegeh1r43Oeqjzl9uwSuY6B7trerepOCheFB/ZDI6Ys+M1NO+5VXEgEyONeu5TN98qQWu06KGxuF/uyO1I1Fr2pkTLT33d/PYzJUenmQNopSvve9clIcUZd2SgpKiFOOQKY9+jS6ZLK3FKD6uXo1z/P3fi+fAaRPOiphL1LiTM1Z/b1FgJ4TyhJd8eBBbYg8HNZORdRRMzNp5DXB0kJ3bq7m1d5eqVZQimSM9imV8+DUKMkY9M+ofNB4uQ04z20IAyU4GJO1tSpbt6iEV4pdHKeHc4NBJb4gIJ9lBH/YGHcwuEN8oihBZejcXJURM1oKX3plpA0ULi+rlwTjf3pa/T7wtAzNo0e1qS7t9NxcGajl5VowvY3nyUnEL/9yLnyFG1BSREn15udHE5A29UVFbWxk4hNKOj3N3VZcl14ht27Vs6R2sI8m/nk4rArI09NKvs7NJXf66qvVRZEOWOLxG98oCsJmDqIE8r779/N89Qlxvl0XrV87pQR1DYcZkdeg943ncHxcrWZRFuYTA8QgahNg44m+p6TE+OVlPQ+FNySlJyeF7Dc2KiH55ps5jzY38/wfPBiliDgcEZ15TCXlHpjXWhGcnlYhlbyEhLuCM2tVPcNv/mbOg4WFdLB7e3l+Il3XgUpStXpxUaoleQA5GjLbz7DxjnjZDTjv7iFZRAo8JKl+5mcKbUs+QuDCQVVnqsY6Qo+o138ULm3cwXgNahVyzs9X35Fvf7v4yuPj/P2NN6qgiGFm4NAmtNIRo/27VUFqCHR4mItre7uQ5PFxoiFObWGhuuX5/fw8F/mzZ6O7jUtMktcxIF3F8t57eV77+4XuNG4iVZyZyWMzqCgvSTrGBdqF7ldWqoXt4mKibIZucTENFvnce+/ltb/9dhkkaF+vbQky6NZ7SAcl6W7dqmfkHN95pxAh9IqOMbc2NvJ6zbeZmXLCEGSnyhgmVa+2CeNoRRyiR/MJd+6Z26hZ97/p6WrFyhgeHlb/c2vFuSii6k7ZHABuzIEvfSm7OZ6fpxF++DB/vn+/ktgczDe+kQZcM60nT/L+zM1VXxsVzJL+rnthoVoZ92ikR5mf8fFyG3CTNqJ02HNzEf/8n5csSrMm20TRk+ILI0YTfmR/jmkR/SihGD767KzoE4ZfIhIPSS54cJBojzGBuKeni8PncBS06Gd961bxwxzD48d5LRJuKBhKkoiiVySIIJrBYHTneslE26ppkSskX1sr/hcahCpfeSXP/623spufZNiDB3k8CcTl5eLLNzYqwcWIo2I4CTvRUN3Q3uPAUQ3r67Uz/LvvVrT15En1L1F+jT+m0VeYZS4w1jpKolEYUQidVJLzkxtAldBA46k5nLW10a3rVHjii7s6qCufOjVGHogyM8+gcEYXbdQrUsk9e58TBV69r45nYM4pY79+vTZH/rN/NhVC6+vpRL/85Yq83OPvfjfrLMgZteTd2EgH8Mor5eT05OEwrAMA7MNQmp/B8XIbcA8Xoo7IxaFknVwvIifSG29UOEbCRPxvoVpgJoq/4/B6CfPzvD5EywBbQBCXMLTvAI/LOz0d3YFHBPDkSRkQ75HQgpokQlWmQdjUBtqjQmWrq4Wo6H87raSKU0II0masIGGNu3C43ZAIvx8+jPgn/2S0TP3hw/z36qu5sH/rt9KQvflmfR8jbqDGnMsrrxTSZ+i6PI5R2txM493RKpno6WnSJqRsOzt5PhKPXYFx92718uiInbGkdpLXgMJdw9xc7QT09Gk5JclE99vuSOhAUsfej3tpqeaUQaYqIWwuSO4uLY1udYbawHuTTU5N5fs5AE5DF8iVlZpv8gQiwatXc07du5cVnBGVMHc/JSvlLfoenj7/7rt5j+7erU6avVGWuWi+jldQ/5SMl/eqxxM4jOjhYfVS6GFq39VahhoVQa6HPzs+zrLx997LifIn/kTxreN0C6MsYYl6YeT7ZgYWx5MnmUTC5e3u5ncpWsEBQ3EmuOIQiTmI+d13a1duVIwWm8+e5aKRUHr6NOJ3fqcM9L17ET/3c5WQGg6r3e1rrxW9w3Dik1X23biRC5rUjy6YYTs8jPjt365zHx/vvJPHOD5O47ixUc2cREz2gUTNdJkfpDo9XUh8fr72vCSnkxiGmDulwTAwCGR9Es8S4pLJKyuF/iBKTcuUsS8vV08WGw/jwBm/hYWq+EVD7e4m0GDQOGvo305CjPs4iIH+UW2cXe8BpKjJPJLUvXmz5jND+/jxaL3E7Gw6XkDANd24Udy5RlK7u7VvJeMuonNvKVXcS1XLX/hCcegUYgqY3Ifx3io/hePlvGrIIGJUSUKLvLNTVIk+HdeuFdpl/PFkFm1EGrxf//WcKNev52f+0T/K973xRr6na4Atmr4zyniZey8Bh06hf4a+97/Y2KhCn7OznMyUFI5/dFRVixr49EY+Ck8gd8nA3/qtXFiSoPpYfOUr+f36eCwtJe2Bt7xzJ0NeSMt5Q4K937cSbUZIE6L3G10j//hxlVJD11/4QsQf+SPVh+bwMBEdKiWiHKTCH+Xq+qGIQnCwu7ulAkIvXFykEVpZqS6KWsRKzi0u1gbUOh5C4CgJicCIpAZWV0fpLYh1d7ciFsl1z+sLX6hkJpqKofr/23v32Mju687zXLL4KpL14KP46m52q19Sq2VZliI5ztgT2wnGxjq2YXsCD5yZbLI7zvyRZDDAYncGAXYwg8FigB3M7gwSzMDIehzAQRYbj5M4cezYjvOwndiyZD1b3ZJaTTXfZLHJYrH4LJJ3//jqk/Mrit1NNdtilXQP0Ogmux63bt17fuf3Pd/v97D7ChvkwAcUNODW4fAEmrHDw95IpfcxM6PnhU1xrjH6Ft3dclgMG9Z8dnZqJOLFRffcoUm8tqaigHulp0fHMjfnJmsMibhwwQsDGDfg3exuKXBu5V/zFo/GS+BUzmZ+04ceCdmsEg2VCxajo6NOVerocMgkbASaqTpl7BgLQGenJuyMjNQmCSptKh3kviHn26y2OkJpiDQZOIUb7/hx3SBsc0+e9JsKtSQVKMdOUltackc3BBhsUzs71Rtg2ntzszeJgDIQbwwOugScBieUPsQWVEHguigk29uVbJHfx7Fe+/Jl/Tw05FNa+JkFFZXj7KzDCfDXl5a0yMDNxrg/PO+YlqHuBC6gAYdStKdHr4nXB/REhkhMTSnRZDI+yADxCwlxYkKLGq/PVp5qHnodnHColCQqdou45e3saIE5ftwX5rNn/bunkUfVyY6HgoT7gf4NCZyGPPcHx0qBwqJCj4VkDEaP2yKQHjscdnorK3qdmRl9Jvx3Fhb0eCb2cB/CeNncFH2UuHpV733+vM7BuXPeA4FBRGFE9U2D923QrLxZNF4CDyW0NDDAKEmc996rVX162qlJbKVRop086apMmiO7u849DXm33Hgo17hhqICovDc2nKdqVrs7CJtN4MIkYBpP/BkZceFIT4/PnwwH4La1+SQUM0/UVPXh8UOn29jwbT0VKpL11VVXM0aRbkjwX3BfkgHsFhbQcJvf0+PfC/My2bmYOZ5JAu/qcnXi0pJ2BPCXd3c9YV+65Jg+kAQLGo01RB8wXLhewqYgE31IANPT3hPY3VUyRQ4PL/zkSf+sGDNRxcNDZwLQzo6Ooa9Pr4NAir7B8LCa1XyfHBuDB8DVYR0hOuL9uba4dikiwsRs5hAiTdV02rFr+N9m+jmVcsx7dtan24Q7hs5Ob1rjrMh1ywJVLGpBNqvF7+PYvb+bm51WSnF0/LjOyeysj3LjfACl0V8IBUDsnsMd79ssGieBhxJiqg4qnN1dHzhA4uLmw1p1dlb/T/KYnHSaWYiJ9/Rom0iCMnPIgW12aH/KDQKzBFw7xMNJLrjVLS8rCeDfvbCgm/7UKb+hzfz/gV64aUnKeGywuFB1gQczdQUmRHOzFo583het2VmHeGCe4BkN6wGmAeeDpEk/gZ+hxM3Pe7N4bk7n9MMfNvva1xxaYLI4ykiqO9gOGHXhYmem13r6abN3vtPPLwmjtVXNSIQ+7BjMfOADuw6+A5gn6bSOmYSSz+v92ZUgaIKPjT4AewJsAsDXceNjUQKvHx1VVQ0DJHRqPHdOx9/ZWWtJAORj5gsy1TpNUa6/MGCiAFfQpKUZj3gmn3doCRXt5cta2OilXLjg5zxcXLq7XVMwOOiJPYp8wAUFB8wejpOGKtqKwcFaP3aa0QyCYAcCREfyThJ4AwQVNskSTwsqDDyXuSDM9MWGbBKwOCpPJMYId6ioqlU1MLnhuAkfesgnfkBzAxIBAwfDBPcOt9ZUxysrbts5O+t0NZgJYJW5nCADM70nVDDwZm4wbl4qQarf7W1XFSK8KBRUJU1OqhIsFnVMp087lAEzBZwX+AX/FexY29ocquF44SpTIQEF7e4KkvjkJ93oicXv9OnaG3BtTQmOBbu93RdQM0FZk5P67J2d2m3df7/jwsA+LLbz865o7epyfQAWrignkY+HjXEgFeAFkt/MjO9K8HVnxwFkUirpvaHkVSqCCfJ5XVcoGZGId3f76C+SNqpcRGhbW3rNkDGEOGe/JBbi5KGhGYkvZPRQfT/zjNnzzzssVqmYff/7rqcIaYb0gDDqCsVCUaT/x4iKSh9IinsxnHvKLiW85jG94vvADZPdbEjRfRtGYyRwbna2ijRKSDAkDZI3VSjYMLg1sEs4ZcdMjy0WfYrO+98vLPyFF5QgPvYxVYoo50L1HX4pZr5tJoGBz1E5UYnv7jp3lmN86ik9FvtNGBzcxEjPd3Zcgp3L6cZYXXXxDZJ0OM/M4WxuVoWfTkvIsrCg17h4sXbSd7ms/2Mx2N11vHlrS+/JItbe7go+dj6wK7q67O8G5LIl3tnxm3tgwCmOVHULCzqW5WVvMrJotbf70IUTJ/S+CwviC7e0yGMDrJveBwtiONIuhD4wcjJzCiKNy9FRJdW+PveWBsMdGtLnh41BX4DHcN7TaW+eFgrOpz52rLbCRmyVTmthhXtNn4briWuOhEvBgKApjHAnx64E/B2BDg3x7m5dL7OzUkRyvVBJr63pu3z0UYcL+SyIaUjIZm7LG1bIPT06r8z35D5EnMV9EfaKwPjDHS/NS7NaHvvbNBrj04dNQb40GodsrWBuQA2EFggrAM4qUmU41FQA4+O+Nd7ZURIHihkcdKy9UlH1e/Wq38yhKg9Ih4ocuhmfA44ztLPdXb3eiRO6WaJIr7e1pQTR1+eQS09P7Viplha/calWYdQgXoEJAiY6MqLkhG8F7A2auyQ2/CbY4saxe3ybKQGHLouhTzMUvp6eWh430u+eHlVz9A2AuqpVN8UaG3OZ/bFjes7yss4R1RminatXhVPz+RlfFrI1UL1mMj78mO042HZrq0/L4bls34GZzPR8hhnPzXmVHNIaFxb8+wfPJWlzDZjpfJTLes/z5/3zocRkEeV7IXlzPwAV0UTm90BMuCKyG2WhBSajkkWIBUzCrhWYDpycqh8Tr5B9E9Jq8R0i+QKvhTTCbFbnKeSPMz0JtgyLFJoCFmlYL2/jBqZZoyTwMAkCRZAkaaKw2tPEYvuFv/OVK2qEra8L2zx/XjcNN1koGeaCwyPazBeNK1dUqSMcuXpVfx57zD0ZuKEQu6BYgymCgIeEhsoRkQdybPyggXjMXHlJczGX0+OXlnTTbG3pJqAqwwoWrxNocUAKLDo0mTBKAtfGCGl42D8zzBcWC/B38E6sagsFXxiBvsw8SZo5P7+7W9/JzIw+0wMPKDniyd7S4tPJeT2Od2FB79nb67syFksWKa4hmC7Q0sKG2D33aJECayWRYmJ144aP2MObg8WN3gdJjwHGAwPev8D/hF1NaLmbzdZaFOBBz3vvVaTSUAbnBhOmycm/0RhQUXO/hLtaeNj4jCws6PsJPUiOH/cFEkVlJqOFmsETmYxX1ZircVwI1drbnX1DQQVHH38edq7cN7wfcCWf5W2evM0aJYFz0VExgFGylSIBILel4cbK/cILZl/9qq/cV68KIvmVX1GCxksDz2v8uGlqskUsFl1uzFavo0MX8Kuv1mLVUaSLvrVVNwSLCub56+tKuj09LiE3U5UD7aqnxyXasCTA7NltwJGlImxqEtRw7JgvcEA9LS2qwEPpPH4Y2awS0+qqU8l4L76DkG4JrYxEWKnoPK6u1rrFMaR3d9cZBtDQ2N2wi+Azk3SzWS08eNqMjnrFxiT1tTXnWc/Pu+CGie5UuzTIaNSiGmULDzaNpziCJSiGS0tq7gH34C2C3ziuflgI9PX5wGdGiJk5hlup6BwhUqIynZ52DQDXO0IhaIosYnxX7ERIlmZ67OKiwzwsBPQreC5Cnrk5HedP/ITZX/yFfh4Z0XsVCmbvepfeZ3nZJ9fTZD9xwt0iwbXD7yn0q2fnE6p8MUBDAMWOiAWYezzE9Pmcb/NojARu5hcLlRVcZPi7VI6Li/qyqZ4WFsRc2N5WQkVJNjtr9qd/avaJT7jH9/PP6/XwhV5fFwZO8oIWhmWtmV9I8/NOp6OqgZZn5skGLJJqplr1sWbAKjBqCoVa+lh7uyo6MFwmz+zu6vixj6VZROXGYNytLd1sVMLMpAQaoDING3osmMvLfr6bmnSz0kAKzZcQDDU1ecO2r8/7DVAGm5r0HKqr3V09FgFPyBQZHNTx9/SYfetb/nwS02OPKakgjcfulH4EXjPAFyFzhsRC1Q2FdHzcMX5UhWYuUGpvdzdC4BUoj+C0x4/7lp8FnSYxxwFmbuaMIRrwwIBwr1lUOD+plKtPCXZFxaKzZOJY90Uup+8GeIfvD1ZTU5N2Ibmcip441vXywQ+6AIcdLgXJyIgXI9yH2DlTIff2+s6H3WQIJ3K9ca+wOCAY4zOEzduk+jazRkngfHlmvgXliwxNfcAr2UamUu5CB67MTZLJqBKH4tXaavaOd7gNaj4vahdNLgYfdHW51SdWtODPodqO4bgMUGDcFpUVyYUKksQGvfDeez0B9/c7mwK3vfl5l5rDGAEnzeVUTUPP4wY5edKnsnPcMEzAK6mMQ5kyghmON5/3hLm05EOQwVXhmYd4LKo7YJgo8qoUMyngAxJAJqPjuXLFj/0Tn9DuaWxMC9y5c4Kf2JJznPyMAIdzgJgLymfoyU6zcnnZGUflshZ2sOGpKcfLu7t1DaGUJbHCGsHgCYl86NjIDqdadUydxiU9DAyuqJzp39CsxxsHCqeZ89yhCqJ/QPnJPcB3EceqtrmWgHMoXE6dcjEPjUcgHBYXM30HUBOBEYFlwgWnqcm9XFioeF8WWnYlvD5QF8M9gFKSaJAEbuY3NzJyKm8uXppRfMlUfFQC8IphpayuupycqimbVbXX1eW4K/Lg1lanyo2Pu+IMeTIiEaqFmRlVi5//vH+GVErDEc6fd0ZEd7cnQBal4WGvxJiMA6aKZ3Zfn1d2NO5YxGBywJAAVjpzpnYOJ9U557erS4lya0uLHjcS1EGUd0zOAdYh0a2vKxmgtGxv1+MGBryJR4WYSjncUSq5//nsrC/QNG9RrUJFe+97zR5+2G9uMNzWViWbkLFDpYc3DO83O+vwADRBFhWOm2Y48B3Ta1j4ymV9V3w3W1vO2BkZcYk81yVNdXoSJETYSEBO+JZw7ULZY+oPiy/NbBrIsE3YSYWiN2CSnh6HrUjoDHJgN8BxdnU5hEg/JxR/tbR4o5vPxIg5XouBHMCOoSfQ6qpX1dAH2e3SU0CARMIOeeBJNFACJ0LlW9ioAtMN8TMEBCMjwvUyGd1sMAc++EG/0cOLhIo//JlK4/77dWE/84wSwbFjTpkDIy6Xzb7wBbO//MvaY9/eNvvyl/W+996r55886T4m3BxTU94YY8gCEFHY+EGFSYLa2HD+7mOPKWGRbFgUwiZtaFOKDJ5kT+Kh2lxedo8ZFj34xeWy3mt1VeeBxQEY6sQJ90fh/4AN4lhJ9MYNb/JiMUtDF2YQ3t5UbVTBra21nHq262avb9zNzDi1knMY2ugi2qFZFseOGbPrCRuoJ074NclwYyAy8G4zn+VJEx41KZVmd7fOAwmZqpPvxswTMxBYKOCh/8Px8RoUAGDnLDC7u241APTFdUZ/hWue65Oihd4KIjKoqcB3FEyVil5vcdGLlULBd0ShHQVFAckZIR6VfhL7RuMk8FAYA8fbzLeONBXDCgJ88id/UjfuxIQSRTot0/lz53QzFArePWfOX7jKUwXxu5MnndJHhdHX502cycnXJ+8wnnlGW85s1itOM90kPT1eXdEAohHG5wSnxkODSo3mK7QwvKXBZLlZaKKG2Ck3NZRDkhYJBpoYeDjeFAw6SKW0dQ+l/lTSL72kqi9sblJFRpFTDrmRgbWwdoUJAvwA3RDVKHALTeUo0oLFZPSrV71Bu7CgY2AwBZAAuzE+y/CwL8ahWIkKEs+YdNq59zRNaSqyCGNQxnmmH9Hf75+PhAnGzW6K64/XpgHNtRjqIngeNsrgyOwqenqc7QJODnuEngO4Pvx3pkXR11ledpMpjgP2CupfrlV2BSwKZrXKZRI53y87qqEhndukyr5tNEYCByYIV+K9jQy2d2GXmubawICk3FNTupF6elRJY1APRQnWB5UXQSOFJIbyEUwPJkZvrxaKsbFbfx4SfybjGGjITQ+rRjB0MG6sTKl++cxmXtUiokANF/LGwVBpRtFMZJtPUujv9yQBfIVyEWolja/+fr3HlSs+YIHvimbc6Kh+RgCEkIQqLpvVa2BLGtLFNje9AY3tAF4e8M5JxhwvU4nwRe/sVCUIRoxKdXNT3xnju8CsNzf1/XR2akEeGnI5uJl2ddWqqy0RvFSrut64/mAp4WPDuDO+C+Azeg8kMxIf3ytNPnZOoQMikAvXwV6IgaHO7Ai2t2ubjjRUmSGbz7vwCP481xbnl+uV66dQ8B0U5wAlJ30WPG5Y2Hp6fHHJ5Xw+KHqIJG4bjZHAUX+Z1XogwIrgAkeaHHo/0MgaHnYclyYnVq3QnKje9lscwMJpyLCthG2CRBx2BgZY+wVbYBqeoUkPeDAd/2LRoYRCQe/PlpQKmgqexIWCE8wxbHBRKYUQAVxvPifCJ0yHwKH7+ryqjWMfWgCOms16c5akH/pXcH6Qn4cccrjmVNd4vaytKRmYKZGWy/rO6FvQJ4jj2mk8fX0SAnV1KTnQ6F1YcKjEzN8DLnM+77s3jgncm+sOBoeZV6Pg5DT7oAiSANkd0rClyUoCLRR0nPQdWHhDC1mqXr7rkAsNVAKcFjbr2V3Gce2EJnoYXNuck5Mn3RKW+4m+Cf0IICdsILheT5xQjwiojEUD+mgq5apmhkpzfXCsLFxJ3DYaI4FzoVBxhAq60CMFsQgXF5VNb6/bWyIZp8qEYQEzgKSwt9pH1MD2c21NkAwVOAIdKvF775U8fr946CEd2/R0rQIyvKhZLNbXRe1iPBwCoJDRsrKimwYeNdUgNEa24mYOATB9Bbiiqck9SBAYAaEMDvpnjyK/6YB2kJUzQNdM5wBMGk+tQuQAACAASURBVG4vf0olh1So/MNFmeR37pyr8159VVXvuXM6tslJd/kDs+b5VPhtbW7PSoJdWqodoLCz43RN4CIWJ6ARYKcQm4Z7D7YL5ATdby9zCtsAcGYzZ7q0tvruL+Q90+SkGQ2UF/YrWKTpv7Bwo/4k6XONscAjaWfxwUpgeLi2B0LiBobjvLJwoLTkmiqVaodjsCjgZEhlDkmA921p8cLnbext8kajMRK4mSftMKjy+MKBQkjQ0JJaWlxwAUeZ7rmZ+2uTHMEaSdwhNxk2BDxbpsRTEUHres979PgXXqg95kJBf0olfR6ohtDE4NHiwQ3mTIJYWHBWR1eXEud3v6tqqbNTyam5We8RTpsHriHxw8Vle0yDMjTkovEIPrmz4zcwdE3Gt5XL7qvBzU8FODRUCwOw0MEiAmIBEwUnRvjT2qpFiS0+bBCgCzNf4MMeyLFjfl6Z7nL9ujc4Q7zazBdIkgnug2C8NL/pJzB42czfM1RjgkuD6/f1eZEBtIBaeG+zjgQY7v7AtkMsHCiKRRo7YRZ3Ej0LLpL1lhZvgtM0xLgr5MrTrGVXxW4QKIUdSRRpUeZ6w2se2ikNVhZcxviZ+W6SRTSJA0fjJPD9IvSEMPOkA84KtjY/rwvLTMmkVPIKFW8OKhW2yjSDSNyIF8z0/Kkp/f3446oOt7bk73HmjCegj35U1TPb2UrF7IknzP7sz/Q6PT3i2YLFFovOyKDih47W3OzVy8SELx5/9Vfe0CyXfTjDXhEOo75CYy0z386zVQ9nIJIooMhBKwu9XhDPwJBhyx2OysLTmsYaMnKadsPD+txgvyyEQCxzc0oOJDEa2jhCsnsJG3wcz3PP6XOC9+LBwvGjjM3nfRoMnHYz9xTZ3XU5OI1jJODwqTkWkhXHSZW9vu7yec4x/HsESATVLz0OdgdmtdYSIdWVngwLP4sm3wnHhvwfPxMgkYEB7yOETBMw9uZm7TrYFfB98rp4e4+N6TGFgn9HGHXRvKfRaeaNZHYXSRw4GjuBg82FSZwbiqqA6np93SXIYWILfU/AisPKm7+pdMy0GMzMmP3e7ylBUK0/+aQsU//+3/fK9OxZ/f+rr5p973tuYpRK6XWWlyVThiKGl0kUuXFWLidaIIl2c1Pwz9WrtT4T3ACoBRFVAI+Y6RyEntZgl1RznK9QJQpLAn/wpaXaxlbImoE/TKJmV4ANbnjzU5GhMKQa5w/yd3YSobERSdjMkwtJk+QIjDA2ps8/PCz46vHHJayqVJS0Bgf13jAoQvYRVD7sXjmW7W0tpNi+8t4kQKpuzjmUORZJKnlwbpI4Qih6KqHkP6zwgYBCuI8FhEWHMXfAK7w+LBh2pjRGQ+YQSZmqmGsi9MMBj2exAapJp93ilh3BwIAbdXFuOU9J1X3H0dgJnEZYCK/A/Tbzht/Cgv7Gcxu6GomayhoxAwwUMEPeK4p8yszXvubV3cCAD4z4zndUVeN6R3Po+nUdT2+vHhdOiR8bc0rZ1JSO9fhxp9ytr8uTmUr34kXn19LZ5zxgJwAcMTurnxFrwLog0cAUQGRDMiL5gGfy+YE+8PqmKiZBATMNDjr+TCLi++D9aGjx+iws4O3wudNpt5ZFhLW5qYSMYIjFlgUI7nMuZ/bII/7/Zt5IDZu8VKIk6dAwigS+u+uCJbxPMGHKZNwWAIw7irxoMHNoCYgmbBCSyJmOxO4H7xa+ExIrr8+OJPRLIXhdrrNw0SBJs3CSvENaHz+z4wp55xQtLA4UUXw+dnowZ8DpQzouTc0k7jgOdfaiKPoXZvY/m1lsZs+Z2S/FcbxxNw6sJljhgTa4WFBjVqtKJisrnsCpMm7ccBl6qeTezPCIqQRQuZHEsKUlIcFeoQJ74gkdQ3+/08VYIEol3eDANaurSuChvDyMhQWnC4a/y2bd+D/cOZTLOgZEIwxrpqHIzoLpQtWqjpPKFVVia6uSLOZS+bzfqCHFzMx/B2WPm5aki/MdiwI2v+DX7AaoavEJp9FJVVetOo0QOTbYOosbeDIilL4+d21kUQ6FWCQxdij9/e6HDg1xd9enBFE98zoMIWDnEdrNsnuDAscxhBYPZr6gspujsg2rWNhDYbMSGIXvAWiN74MkGnpnh7J4CpJwkQippey6WDDCJj6fI2R+hXTdkK0T2ltAE2xvd6965lqiPKXxmUAmh4o7TuBRFI2Y2a+b2YU4jtejKPr/zOzTZvaFu3RsCi7QkN8dcp9RlM3N+YWytORjtF5+2Yfd0lyanPSK5o/+SEl2YMDsM5+xvxtrZubqSBJApaLngl2aualVSBmkQYo5FcyV8PG3i9VVJcSdHedSX74sARK7iIsXvUkKZNPWJqUnUEl/vycXjhsrAryiMURiwAHsC5JhWMmFeCjNTyAAM6/gucHZ1vOH76RUEqMkn/d+BdODWDR5X8y+wmSAz01rq9Mrw6ZfmGS4jsKBAwMDOo+wU1jwSEB40lAMmOlY8Y0BwuFnrk0orEAqBLg8x8hiDGuKHQAVK8wTrll2FSRX7gfeH2EW+Djng4WAXRoJnX8zJxVfFZggLE5h4obhwr0HrBZCN1TZLCIMj+Acs7ByjSRxqDjs/iVlZh1RFFXNLG1m04c/pD3BBRPikmZ+gTKCKzR57+z0am131xtvqZRuxtlZORROTOim7O8XrPBbv2X2q7/qW3MafCQS4BOEHRMTDr+EMTenmwIWzNycN8UOGtjbIj+nk1+p6A9V5+nT+qxwaEP1Xrms54W8XTxGgF5omOZynjjhLpNECGAapPxgqvDv2QWk0zpnDGMulfS8uTmdZwZH41t+8qTTJFmsOAck3bk5T2jhlB4azGzXsfKlSRlqBGDRkHRI3jAueF2uo0rF51tGkcM4cVzrOolsnkWPytfMCw6KAhYfEjYSfwoM/L1JltwDJNRQ4k5QifP9sIgglDJzRSa9Es7N9rbj7VTwJP1wAeQe4JrgZ5I3fQ0z701wfQCjwXRhgQrv6yTuKO44gcdxPBVF0X8ws3EzWzezb8Rx/I29j4ui6LNm9lkzsxMnTtzJG73+Sw4ZImzNcXGjGuTCIYlwc/X36/E/+IGbV7HNn501++Y3zf7RP9J7jI/r5xde0HNOnXLf7Ecfdf+TMDIZNciWlhxeuHTpYJ+1r68WSpkO1kO2qFSpc3M63nxeLopUNBsbOj78mPN5LWQrKz7geX3dIRUznaP+fsfb2d6G3hbgmlDMQgHS5KTj41jhrq/rPMSxwx5s1/v7a8UbJFy8xJlhCSec5wIVbG8r+QMh8f3DyybBAAFQ6ZLsWYTBq2HMwFve2nI6KVRBbGeBkTY3HYvGfz60KABrLpf9+eDplYquraUl7R76+72K39527D5MwiwY+zXuSfR7dzphwkWrwBxUBDbsXkold1IMG6OhoVZIXYTRFEIsNI7j2E3P6EmkUrq+acoC6+ylBifxhuIwEErezD5mZqfMrGRmvx9F0S/EcfzF8HFxHH/OzD5nZvbII4/Ed/BGr0/i3DhcnMyqpGqcmfEE3tXltML1db+odndV+VHVcYHNzuriGxsz+4//UYwFWATf+55YJffdpxvqoYfMfvhDP67hYVXely97oy6T8Ybl7YKJ7XuD5uArrygBkXyZdhNyhDc2vFIEmjh+XM/FxhPWy4svuhFToeA9gL18YzPfwSBZX11VErx8WWwYBEx9fS45n5pyp77d3dqRaGa+ULAQT0zo554e907v7XV2CIZaHM/Cgk+lR4qN/w1YdYhxA8swYgzBTWisxaLB6Lq1NV8U4tgZNjgeorzk9WieYloFjITtwPXruj6R0F+/rs90773+WJIbcEaowAS64roIG40h5MGOiD/r62JCmel16Q2xEPf26vlzc7XNbCT9Ifeb6w38mx0L32lvr/7QwzBT4RMKgLivkzhUHAZC+RkzG4vjuGhmFkXRl83sPWb2xVs+643GrWT0JJ/29lo/EbbCIyM+0Ybp20xUHxrSzXPmjGOmk5NKjuWyXAP/4i+8qjDTxf3DH7rkOZv1ygxBxpUrjoczAeagETIIzJQQUD8iwsH4KZtVgnz6aZd7MyJtfl7JjcRBgsYVsFLRroIqbmdHO5Le3loPcfBXFgkzryJfekmNXOiZKytm1675VpnxWLu7DtPQgCUh8T67u3ouirzQOnh21huiYL3Q8KhYoS8ieaeBR9MO2IDrAGgGMQ0GTgwDgZ1C/4JqnwUOdWk4zmxtTZ+TsXbsdNravOlbKul7AapAhciuCU94sHcWorA5SHLmc4ULbMjG4jE8/tVX3Y+lXPbF6NVXtcCjIoUYMDWle6SpqXaXS4UP1g08BGwTsnDou4SsGWK/nXUSbzgOk8DHzezdURSlTRDKB83sibtyVGGE3e6wmRJidODDNC8HBhyTzOfV7Bsb079JumZmX/qSfj86qiqyWtXAgCiSYyDWouGNYqZEjqkUmCx/UPkdJmDQhDMt8SIBOgJfL5X8ZgdqeOopfRbUmBhAkSCpdAksBS5dct8Rqn44x/B8s1ln1QANIHxZW1NFjrsdAws6O51uR9MNxamZy60RjcCsYbuOdQBiJXZbcL7Bf4tFX6R7e50OSLNzZ0eP+frX9VnxezlzRkmMZIUDJPNHd3aU3DGbam11WmposctkmmzWpeKIxlpaHMLAERCuO5zyUJkYSuFJ4GETH2EPzCuSKdj/3l3U9rY+a0ghRTMQUj35vmhwwp5h0Ak9Fo4LXJ7vIxT2sLsNdzl7i7AkDhWHwcB/EEXRl8zsR2a2bWZP2WtQyV2PsLIguOBCzi7VEDjj+rpu+v5+swcfVAJcXdUF+dhjuiG/8Q1d1GfO6Hc9PUp4S0tuTg8/nFhZ0Q08N1fbxLzT5I03NO/JlpURbmzXNzaEw6dSfsOHPG626OVy7YiwpSUdb0+Pb93Btdka00SECREmbaot+MFU51TeSORJJiTK+Xkl0mzWEwkNuo4Ol7Ej8CmVapt+nAuGNExOOkaN2RWNslSq1gphe7uWtrm1pXPwx38sTj0NQ3YO732v2fvfr+diNRvy0/GHQU0J7z+V8hFlcP9DSIbvFIx/ctKbluxS8Fh5+WV9v+y4qIL38qzNdCylUu2UH84diZXdoZl2jbCxeMzGhnZvPB5qJMfDosDuiYYnNEvOO2K4sNcUJuhbFWFJHCoOxUKJ4/hfm9m/vkvH8sYCIQA3Ig00uMGZjA8sBhqg+ULld+aMqGw049bXtZUtlyWBHxtzKXcYHR1m3/723fss7BpGR5X0Vlb8hge6AI6ZmhL2Pjysc7C05DhsJuPNuSjyBm5ra61IJp8Xfs6N2Nqqn3M5h5WosEKDLxK6mc4xla2ZV9WplI4Rb5Tl5drpM9WqGyZ1d3ujlKQfWqViU4r9a2enL0Ykl/Fxp/fhrwHcgfkSMNr8vP3d1Hu8TBAWwZTB+5sEB02yqUnHQUUKnxqBEgsQtMDFRWfzUFBEkYqIGzf0HfN+nOvQt4QFE5dBzjPsKywAwLpZeEmO7LZ4Tl+fPntYiDQ3i/uO6RmWDnyXPHcv75zqnfPCQhfuFPYm6P2KsCQOHY0hg6KawqiIKo6Vneq7WPSBCGClTU26mbJZXcDgedCoRka8WbW87EyJhx5SpbaXJtjfr0rpbge4K8wZ6HP7xVNPKQl1d3uDlOYrCceslh0RRWLOmPnNx78vX9ZnvXpV5/Ohh8x+6ZfMPvQh336jdlxZcUOwmRndsNj0mnlC4fPAxqFvsL7u/Yr1dS2uqEWjSK8/Pa3v4OJFbwZS1YZ8aOwCFhb0Xixk16+7n3cU+XAGKmGwbCAhjrNY9Aq/UnFKYkuLFhKzWpYTDWM+F8l4fd0XPaAIMy00mYzO2+ysc6WPHXPDtMVF9+JmVBkJnV0YjJCQarg3eYYiGai1Fy5ogWecHN4nQDrlsr6P7m6HxICHwLJJ4uwIqL4TUc6RRP0ncChOXKBMYKFrbuY4WxzrQoVlAE1paUkX2CuvuAIPNRjWpXioAL00NWmSz7PPqnIz0401MuIubnczaIQdNCoVUSC5gUPvl5UVJQn48WaCkMbGxEDhczzzjJgoP/yhzo2ZzsN3v6s//+7fmX3kI74T6OpyeCKbVUK4ft0rYjxZuJkRU9GjYJrL7KweNzSkpIHZFjYDONnNz7tQK6TFUfWy1QdmAeJgqw/8Y+YsG2wAwIF53aYmLRxDQzqWa9cclmpvd49rJvBQ7eMJH0I/5bL3YXCaBCNHpXrihN4XkzGweqyAGT4dKkzNnE0VcrFDmicsq/2qX5g6J0740ApgLRZ1oLOVFZ2nri4VLRxjKI+HNZOIco4s6j+BU3Fwc0SRbs61NR+DRTXW2uocaNgGKyt6/osv6ubJ5bzS6e72v2ngsEVNpZSAqOBmZnwc248jWGxaWtx2dC90EwaVLNv6kyf12bJZ0R0nJ2sff+WKRD8PPKAbEi8R6In7xb/5N0ooZ87oHM7N6fiY4H7mjOPglYqfH7b20O+Q31MxVyo6nyQGGmxtbQ7LtLVpRwD8w+6LhAULhSYuEu3FRR/HBeVuZUX/NzSkz0wyhs2Sz2vQtJlP1SkUHKbAiXBqyvUG2AB0d/vubXDQ//+VV/SYyUmHQzY2xPrIZBwug5pJ0uzo0O/7+vT7wUFfEMNd0142FtcsQpn9gmQNLzscYII4aX1dO7ypKb3v/LwS+QMPOHRCNb6XaprEmx71n8CBQ4pFV+5h8nTjhs8cZFs9NeXVztycU9hmZmRqhOINwQG8Z3w9MBN68kn9CWNnx7m0N4tQZv9GYn1dSSOcX3irBF4uexNsfl7HdfasXoeRbmDc4LB/9mdKutWqqsjmZm/G7Q0SRrGo1zXzhRSePT7UDLblMSQmhDaLi+5CCF4bx06JhE3ClJhiUc+ZnnbMGOgkldL3A10QhgeJKVRAksxYYE6eNPv4x/XYZ5/1IuD++5VQERLBS4eXj23u+rp6JuzSdnZ0bHClzbS4pdP6/diYN9rpQYRMjdCHPp3WAgt7p7lZi00ofEE5aebnEJESbCyq6ZsFDX/omyyKwJRPP60ETmOdPkOh4B7sYPZcJ0kcWdR/Aq9WxVlGPLK8rATBQF8k5VQig4OOZ87N6UIE837mGeHA8H7B+LDvhPb21FPu2f1GIp8XNJHJmP3N37yx57a0OFacy2kLPTFxc3FPSGlrb1clzgT7xUXfdsPOMHP6X3e3MzyY8bg3qLDwyMDfBWk/DdE41vEuL7vfy+CgD58A9kL2jrJweFhJrqnJEx6DiVdWdJzg23x/WJ8CXdCQNVOC7epy/jMLcmj0hbjnox9V0l5acjYMi0JTk9MWYdTA9FleFv99aMib5MAqzc1aVM08mS8t6VpDTMQ1CluJa5o5kOm0FgCsV0OXRXYq4Y4UjJ6hF2ZOMbxdEoflFOL00EA7Oz15I6yamFAVDqyU8LjrIuo/gZNkSRpggIwPGx93cyJgExRxo6O68GdnfcI2wwEwK+rtrWVoPP+82efukA2J8u+N0AmjSLg0eCZJamNDi1OYwIEFmpv12QsFx3qhAnZ3+3DlvTJ/M8cxSyUfv8YIrb3x4IPeeGMxMKulb4Y2roigzPQ7htPSxAPHBT8O+cowidradGzwkysV55DTQCOpYKyFYVJIi8TrhGYmXHWMvIaGaulxUOlgu7ADmZ8XHPL8885Fv+ceNR5RYra1uWUvSdfMF5Mo8oYqDWG+wxAeAlfGRzsU6bA7CK8FMxf0IPTCvvZ2HtsoTtfXfYcQWtPSUIfnbeZN8YTHXTdR/wm8WpXIAqENSr+5Ob8ZkY7T1Ud0gKwZWW84nKC9XZxbKnA4uzMzBz+28+eVcMtlbfep2PZikHthlZERJWcodCxKKBTxT0YBaqbtNZL+iYla29CtLVW0vb1KTO9+t9m3vqXz0NvrTJUTJ3x+pJlu2osXhdP++Z/XHvM998gygMTHSLbeXm/OofZkF3DjhvOCzWo/S7XqO6WQNkdjk6admc+ynJ11mtr2tuCx06eddx4aQNHIzOV8gejo0M80Rycm/JwxPR1+cibjDeF0WovjxISO7/HHfTxcpaKd3NKSCgEokDCAWFAoIObna/26qbahBnK8fNcsZrwW1TfJHxw6XJyZdE81vbnp3HsYK/sFbpL0iXZ2dK6uX9dxmXlj+Kd+qhb7TnjcdRH1n8DZfqJCZIgACkUz/e7SJV1g99/vysJSSc+j0gNrPH7c1Y5MI4f/u1fOfrNA2YjgAY/ugQFnvZC0w+SNDDtkbQCdIEcmkYSsFBqN6bQSA9xluNqIf3Z3zR5+WL/73vecMTM0pC0w3GsqxXTa7O/9PVWUk5OuTrxwQecSL5flZS084LJQOcFs9/KeEf2kUsLd2UngSRLHSmQwM/g+KxWXuUPro7mLmRcNbZIWWDhQzeCgPheVOZg2cnmq8/D5DDbY2HBaXXe3zjG8dTjZCwva+TGfE3Mw7IbPn9dzqbyXl31WaX+/7yTa2oTLA1NRkMBs4Zpk58lgBXYHUCGhIFIVh8ZSLKJ7k+32tkNcUGoZ4RcOG4Erfu5cUnHXYdR/AqeZh3oNKTV86WvXfNyVmaqjc+eU5JaX9fMrr+iGi2Ozn/s53TRc/KurtaOpDjreqa/Ppe5mtcfY3a1kOTurRiiJuKtLxxVFPsigvd2ZEkAGfLYXX3z9+2K/upcatrLimHQ6rceMjOgYlpaUnC5eVNKsVh0zZiE6flzNyp4e52zj5zww4CIeePLd3a4y3NrSDgTDJoQmVI/9/U4ZZJfBgtPWpt9jCzA5KcbM7Ky/J74hVPAoBqkcgY+KRWchMTy6r8+Vs2EzMGS3MCQBCAUq3vi4ft/Xp3OP8pOd3cqKztXUlP4UClr0cjm/tnjtwUFfYOfmtLthNmippD/ALXNzOiednfq+8nkdE/L8cFRauey7MHaomFXRHN7P9S8cKkFzFIrjo4+6FfPAgBZgPNGTqKuo/wTe0aGkxwBi+N9UHExyJxGGAoutLbP//J9rX+/551WZ/sqv6IajyYfoJxy2eqvIZp1OhvPf6dNaPJCUnzqlhEjCnp31sWBgjyQaGlSYNr30kr8X234zbzh94AOeAPAlZ7YjzcFcThLx0GgIpgYMiIUFnyiDydPmpleMoYdzOM4MWOTKFS005bLjr5hPhV7UuZzeBwoiMnZEIaureh28qYELaCxDkQO3Zssfx179zs3pWoGZg6UAyQ+YDesBMHIqVvoB4O7g+tPTbtEK/9zMaZAkad6jqcmpoLOzumbpB5h5xYsXSWiVUCx69b26qteBt4+sfm1Nr8nnIVjUuZYrFcfW9yZwFrHdXfdvZ8EdGRGERlM4tK5Noq6i/hO4mUuqaeqxvU6lHDstlRxKuHZNN/3v/M7+r/fXf62K85OfdDodUSjI0OrLX97/uadPC3KAVsb29cwZWYJCycKYKZ9XMiQxwj9G7o8Cjm18R8frm49USKELXT7vWG8m42O+8LBGeHH8uLBv6H3FohtGUaXRXNzaksoU2t/oqBYhEgzbcSb2fP3rYuxMTDhccOqUN2LTaW2/UW4OD/tEcj5jpeLVtJleAxhncbF2qjmMC9S1JF48xDkn/AHSuPdep4uixgRSAc5aXq71OudYMhlR6/Doxs6WUXQ0nrERiCI3IkPUhOUt1XtHhxbOUNNQrfqQ6/l5Z/5gqgW1FE9t2Dv0HkKDN84ZcBEDu0MYpbm5VlVKfwU72CRxN0Q0RgKHNkUChKvb3u7eylxsYK1jY7eWvD/9tFSGy8tezbe0KOEgVnn8cVXC1aqq25/8SVW+0PZgbmxtaSFhLubSkleHjP6CqUHTla07/Fo8zVMp/e74cedzEyjtBgY8eZMkmLzDFpqmLZPhOa61tdoxZPPzjin/6Ee+q1lYUEWL5Wo266KVGzf052/+RkmCHcX0tDNJymVP5IODOj7mJO7sCHJIp/V6qEYXF/U8mmssTltb7nEzOaljJ8HQ30AMBIRDv2R62vsfXD84KIZYN4Mjrl7Vca2uilvf0aFF6cUXdXwEQ46x8TXTazCWje97cNCHIoNfs/tiYV5a0vmkOJme9lFn6bQvWuWyFkGaoEBa2A6zS6DJiXCK5B02NWm+hgMvwOhpQidR99EYCRzZMNDE0pInhmJRCQAub6lUy7y4WWBeVKnodWGu4BsxNGT2qU+5hSmY7sSEmoSnTnnDcXnZHeugPS4v+5YWv+X1dR1zHOsmDYdOIEQimY6Ovj6BmwljffRRJaGhIceF4T4DkVSrwsHb25UcgZUYWVap+HPSafcHxwSpo0Pb/0uXxGrBzxps/DvfcRgAIZSZEvPIiGOqbW1KalTk7CLa23UuX3lFoprr1x2rhuqITweYNAMbGJ7c2+ue3SxaCFPm5txT5bnnnPoImySX03sNDTkjBAsCehMkxJERfUcsluWyri98YKATQrOjaQ3EcvmyJ89KRc/nHIWYOgssFrUsXNmsriEzx8GhPSJ+gokVyuHZDdCb4F7ie2dxRLIPeyaJhonGSOBgnqjqMELq6DB75zuVAEolPfbUKfe1uFVkMk5Tu3ZNF/3Kiv6mMWimm//KFV3Yw8PevPr0p3UTgv1ieA+WzU0zMaHXu+eeWhoWCbS5WVUywwbAbLNZUbfgrnd1Cab56Z8WbxylHH4o16+7sGN93UeBXbvmiS2d1nmantZjsRJFUo3HOQmso0NVKCIVvKC3t7Vwbm87G4JJ5wwLGBz0Ri9VKcl1ZkZ/rl1TT+LSJSX+vQEDI5fTa9B0jWM/PqCq1lada95zZsZHwNG0xWoWXBhGCEMV5uedGURDEU9v6JFhktzcVMVeKOg76e5WwoXlRF/m5EmdF64VkiVNWuAn7Fi5/mDJIMzp73e6I7BfqLbd2vLzAaQDXIaLYjjNB5MszglNXo4rdB7k92auRUhohEce9Z/A4bY2NSlpzc/rJpqf9+oPYQGY8tqaLuzHHtOkmf3iU5/SjX7pkvNoQbfk9QAAIABJREFU4XIzECCV8gTY3q7tO4Nzv/1ts3/yT5yhAUZr5skYLndXlxIDiWJhwf1IenpcDIKxFjft6Kg3bOE9Dw8rSbFoZDKyCBgZUXN2fV2LWEdH7VR7ttGM0aLiO3VK75VOeyXLIsDPWK9SvY2P+zgwqrbNTZ33QsF9Uvg3wpgbN5ynPDsrWGJsrBaa2BvAHTBAgGBorrI7O35cibJYVMIFIllbc5aLmWPCNB1hNMFhT6e1mCwsODvkxg2HnBDY0EDmvE5N6T3Pn9d3S4M2k3EYK5/X+wNbsVhXq3otxFlx7P7tuZw+V1+fGuShFw8c+bY2V0gynYgEi/qTBj+7G64Bms4sAOyqgHdCimI67dcD3PYkiR9p1H8CpxlH9cOFlcmoMn3mGZ/FODWlKmx42Ce3P/CAqmiiq8vs135NlXu1qiTyx3/s/3/qlJIP+Od3vuP/l0rpeYWCEn+xqC041TBueUyFIbFT5aC8KxZ1gy8t6Qbt7/fq+eJF3SxXrzoPFwiBAbgkTSrmpiZV+IWCbvz5effoYJABuxYk9Mjemfxy4YJcCUlsqPPOn3d4gKkyzOUcH3faI3jwww8r0YyMeNVMdU6DbXpa53d6en+qZBjz816dwubgs2P4xGLb16cm89iYoBmYLuDfoash05mo1BlWPTXl/t2zsz6ZHuiOvgUNVphMSN2h+KEnoM9g5hh4X587LUaRm6xR6WazXv12d+t6vv9+320BWVG4hFRKsG/une1tt1WgmobJRAGEdzuLAdg59Fp2k6iceR0q8SSOLOo/gbP13tjQDR8aUC0s1F74uZwq5r/9W99aZrNmP//znpgXF51t8dJLnrxpqI2N+c1x7VrtsWxvaw7kiRNKtAhXwHW5sbioSSrQHBcWvEqlqUgFyLaaxiML0uqqe3/Dp0YgA7WQph++zVT/JP65OVXLMAyoqDhvsGVOntTj8AgZHdXnZIFhNNs99ziHemFBN3t7u57/0EOqhnHqA3vGEXB6Wq8/OXnz3VEYVHhIz/lDs7Zc9q09VSbNSrjYVOH0J9rafKjv6dM6HhIdFq9QH9kNcZ6Wlrx6HRhQ8oUB09SkhYlGIK9Bc5rkTuW6uemOjb29PgkqitzCoLtb7zMwUOs8SGETQh9w5fns0C+Bf0jCsI7wGudYOI/lcm3y5txQsTN7laSfxJFF/SdwM99ilsvOj4WzDF0M3LJSUfV1/ryS9aVLuhEeflgVFR347m6zz39erw+2Wyi498WtYnxcN1Sx6HAGlRDHQiXLNvnkSSUeJrFMTPhNB62NOZfAKti1cqPi6WHm9DnUo7wn0megpa4uLV7j434jhoMNXnzR35tdQibjzT64462t+gxzc3o+1DhggWzW6WdYyVarOpfZrF6HZM/idZDIZp3FwiINpk6jFNoc+DsKU8Z/seB3dTlchVS8qUkJ18yht60tb3LncjrW6Wn3j8cJk50BC2cUeUUP44j+AwkXDJ9j6e/3hFoouPoUe4JMRswovndw+VBWT1EDBMTjoCFynXEeosgpofyO88m1w64NyAvRGNcc+HwSRxqNkcCpqMBRacaggMtm3ePj5Zf1+Gef9effuKHZl2bCxamOwtc30/Pz+dsnl9ZWPa5YFMd4bs7xYS72UCQD/MDzwMYREJFAob5B2SP5A8NAEwwplci/GVcWTkJHep3LubEXYpvNTS1WS0v6HfMo2UbPzDh17V3vcjMmxm+RTHk/3gc6ZDhMgUqvWKyVbt8uosir3M5OJXKmyPD8zk5XmyJi2dlRkscfheZkHDsjhs+zvu5GaPPzDp+srmqRLZe1W+jtdVEWC/XLLzt0ZabPjD0Axlokx3DSfGg/iyVBKqXv/dQpv9Z5zl5vHYqQ/WZMgvHDZoIzTrKHmcLrAPUg3wfuCemIZo57w7rh+UkcaTRGAi+VhGMXi4I4qJJu3PCG1dqaIJHb0QfZtnPTmflFGs6gvFUMDirBYm9LIwhLWNgkeGAUCi5TRnzR16fkQBJmu49PB5U31RWVEdt7YJvQH5rf8R5AAnEsTJpG8NycKnIzwUHI+KmMabDt7opeuLQkDjw862LREwZ+G8eOubc33jUkGSa141k9NnZrr3NiYMBnfzLogl0Gi0Wp5DxvaJ0M7oWTvr2t98MioKVFSR/VJmrOF17QZ5ue1vU2O+tspPV1x4pJ8GZ6fG+vc/QvXHC4JBTVMKkerxI8XmDxhAMWWPBvlSBJ1vsFPaP9gt1COLUH217UoMBOYNy8HvDcXk/wJI4s6j+Br62JXTE+7g2Xq1cdPx4Y8OrxoD4mZl5phEHyfve7NSPyZoFCL5NR8stmdRNwU1K58nqYbsHRBtOHMcAOAJbK7Kx7nbMtDt3oQroiiQNL1HRaSXJ52RWCKytacOLY7WeRpWezSm5ra64EZIsdx0rwlYqSLjao0Ay5mZktOTxcuwuIImcuhGZLKytKnsBI+8XIiGAwdiDQEUl6QAJAMhwTvu6hLQI+HkAg7ACA4qanlcBochaLrjVg1ufamh4HtkysrKgSf9e7vJkeuhTS1wg9u1mAwZCBNcLG4I+ruqWhDn8e5Wmp5D0Cs9peE8cFZEQPJkngRx71n8AnJ7WdB/8ul3XjT08rCbS1eSMom1USuhUt7VbR1SUZ/UMP6Sb8wz98/WPgbff0CAfmIkZoBCxCA6mpSckACfXqqipyMN0QtkAeDp5bqeg9EBjFsU9mB6IgcQNrUGH193uS7u31ZiICGDPdtFNTtT0GxCqweMzcbmBnx9k1wAIoJBHdIJ6hasP8CuMp2DYDA9642xsnT5p9+MO+o+jo8OHT6+s+pBhK6eqqjp3dC0wKqt041g5hc9PhMSwOEN+Mj/t0os1NtyBeWvJFhoV2v2Dns73t5lPAG7BxQk8SEinQxJsFR4SYfS6n8w/ExPcF+2VtzRfNdFrfPdcUM0WTONKo/wQ+Pu43ImKIUsl5skwBZybjzeYB3iwGBlSB/szPyPhpcFAX6s/9nBLBH/yBY4YXLwrzZtArWDw+LHC8Sb5IoME0Q8UiUnV2Ej09tcNqgQQ6OvwGAmYJsUi40VSjsDOAUUKmSRRp8QA3p0GHTSrV/+6uD74FoqJRC4uDahKGDfMgqf7n5rySBxPu7XVIAdtWMOkbN/Qcmn9NTS6ZZ9eAvSp2vYzNm5lx3J3hHExiAgtnAYHdsbXl0+SZFERjmEQN/HS7oB9z333uKFmt6tqiYY6DIosf5mBvdoSGV+D5hYJX34jM8LVHSYuTIrNHE/ZJXUT9J3Dc4ph9yLY2ipQEb9zQRYXTH7ayBw2w2Pl5KS5pHK6sqBK/7z6XmJNc4FKzLab5hcgDKTvVabns8EgmowYrFRjNTkydqJbBffEYhxZ544bjk8jaSagh24CGJ1UgJlg011j80mn5wmxvC7/t6xN9kmYYCwAwTVeXQw/h9BYz58CvrPiC8Oqrev6jj7oIJJ+XAnNiwoU5YMqbm1q0f+u3xBx63/tczMMuZH5e52ttzTnvZs5dZ/ACPHpgHIYqLCz4omLmiQpmE5AY8MmtggUXw6iQvskgDD43jw9VjUcRLCA0QxlMzfmi0dnf73ALjXQGciRRF1HfCRxRw/S0kiGzK4tFd4yDkWHmisA3EuCRDARgtiSMDehwVLZgp1DmcBkkadNcw10OzwwScKnk9Dbw5+1tfUaUmE1NboBk5tt85ieyyITDfGmssdjg40HyIzmReEmc587phl1bc8hmZETQ1c6Odh0M1RgZ8QEMLFacJyYkYQjGQGT47YuL3phdXtZCgUnUzeLJJ3XeP/xhX+Cosq9e9ep5ddWrd6bCYzY1Py9e+vCwc7jb2nTu8SfnXPB6iKOAOLjW9vPYoVI9dswxdypbEne90e1CUQ+L1+Cgi4u47oeHvefBdfVGPPOT+LFHfSdwLDUHBhwimZlxaToClxdecByZTvpBo6dHfGt4zFRsx497ww03QTBnBtWWSr7dJzHCBkFGDbcYOfrYmDfRlpa0c4CbzFZ/dVWfq6nJPTXAlPmZ5ubyso7jq181+/3f9wk8TU1m//gfm/3szwr2Qb0J5kzzFMwYHjnY/rFjOkYmDOF1zexEKn8m8iAgwYs8HJwcxz7nkj/5vM77xobEQzeLxx8XAwYYCD8XWBME02UGBhxWw9OjXHZMmuNMp71nwWtvbSmRsQOikcz4teVlvVbYxGxvlzq3r8+TIrASnG+YJqGnyFEmQY6DhiQ+9P39DheyuwQz5xrm7yTqIuo7ga+uKlmfPetNqZUVx2O52O65R4ljZEQ3cjarCvIg8e53+xTwnR1J70dHfZs4OOh+GJgO4U9CUsVFj60pJk9YxlJxw6aYn3fMmiYli1EU6efjx917G84wVR0VJDakf/iHLkoidnflh14sSok6MuKJemHBF5pwWjxeMtiKco5TKX2+iQlfvJqbVfXiQY60HJ8Tbnbc8GDSvPii46f7NTD3i+ef1/eDGvH6dacMQiHEp7tcFitkYEC7C/QBUB+Bjba3HebAMndkRK81NOS2udmsFrPFRf+81apX4hcvuhd7W5vL+vkOQrodlfhRS9DxRoHRBB+e64vvNJTtYwOQVN91FfWdwKGpnT7tnXGoXlSA4MxI4Lu7D4ZdmunmfOc7fYufy/k8RSpqbsyBAbdrRalHFZNO6+YPh8tiBISsnBs5n3eBxcKC3zTQuWi6wXYpFLwa5IYL5yeWy2Z/8ic3/4x/+qfC8UNONBU0yXV11W1cEROF/HK42HHsUAWLFckJ+iCDIpDdwx5CjYkatrnZrQJuFygsMRpjcUTUFHrdEHNz+syPPOKzMmEC0eBlgAGQBw6HmYwWcawPSMzHj7ucfnHRhyh3dKjIQF0Z2ilw/YR+JAhrjipCIRBVOApO+kuhRD+kxybMk7qK+k7gbPMzGfe2PntWFSyKxnxeyfTRR5UMZmddtIHF7M3i53++dmv98MPeGKRaAw+lemJLiYk+ohcYJ+CoCDkItuAMbOBGfvXV2qTd1ub2sog7aJQxpDf0nV5a2p/THsbiosvOkdOj/gTCAOdn0hF+LWCgLIrsKHA5ZLJMf79DRMWi3vPqVe9JNDUJMunrczjFzHdRt4py2ZvTy8verEylHDLaL+bnvccBbr+7632KYtHx8nxezyGRITyCr40YiN5FHPv339srPB8Iz8ybf+EA6HqKkHMeqjoxJgvprQlkUrdR3wk8nGOYzerCevBBJRwqQHjBDzzgjamrV7Xdv3LF7Ctfef3rnjxp9m//rW68SkU377FjtcmbCgRMnRuTn5HdhxJlPDbwlAitO1dW9P+ZjP5/cVGfBbobCk6SIa57Cwva0oeeKDBhYJPcLjBj6uhw2iCDHYACVlcdUoHxMTjojBOahzRxSyUlSDj5t4vdXSllKxVRFPEThyZ4s2BIMAsoGDOsJAZG7xd4t2MqBQwDRx+b3nze+w8MbGZ8GY9rbnYIB4iB16H5zOLI36F8vp7jVqrOJOo66juBt7fr5qLaam/3yd+MNOvqUqJhi55Kmb3//a4i28+YPhx3BrZOpb3fFhFbVgbsghua1VqHAofAKQ69yqEM4uqWTiuZFQqq7HZ2HIfNZPR5eF229ozhgrLW0qJk+OijavbdLO6/3yvMzk6njKE+ZNJRSMOkIQtWz26oVPIFanxcTeU3EphChX4czL/cG6dPewXY3u7e1JyrxUVXue4XWL+2t2thp9EIjxlBUyhvN1NBMDDgCzgDFJqanCIZLvTQFKHfAfchnU+SYxI/pqjvBB6yMGi6kKTPn39zjwNmwX5BEg8NhMLZneEkHhYRBC/r624bikWnmXtFI3RhcUHKjf/KwIDZr/+62S/8wv7H9ulPCwNn5BzsC0QZs7MOUZCYQyUjsBBYv5lTI99o8iZefdWbfYuLOrbBQR0XtrFYI8BNBnIBhopjPRcmzH5x333OG0ewAi0wbErHsXYmYONwt1mMSyWHuDIZh9dQV5rV8ry5VhLcOIkfcxwqgUdRlDOz3zazi2YWm9kvx3H8t3fjwP4uaLJQHdVj0BQKVaBUjMAvUP8QJlWrzltmXibJGW9xmkuhvzivQRMujtWo++pXzX7zN82+9S09/8QJs3/4D6UwLRScelkuO2vHzPnpDHKemnK1I0m2p8fHiKFUfeaZw52zxUUf+gDWevas70y2t/VeXV0+Ao7mLf9mWjtQ1d7I5Zxfjw/62JjYKQiTsI1FtNTUpB3R6qpT/kLvma4uh7dCOTkNbQqMJGkn8SbEYSvw/2RmX4/j+FNRFLWaWfp2T3jbRFi1w56hUgQOCbfcNCtXV7XVpylq5gsDiQ7GB6KTalUY/m/8htmv/qoSIAZT4Oc08sz0WHY21apPCSqXnY9OEkJKDfsFC4Ob7UYOGnEsqieYO0KhVEqJvFLxsXM0qnkewzfM9uf8t7frufPzvvAsL/siNjnpTcr1df9clYoWjRs3tLM5dkzvB47e0+PnHwgLFksSSRxB3HECj6IoY2bvM7P/0cwsjuMtMzsApeBtGM3NzvGmUdne7jQukiVim72WsmFgxL+97WpDDLRgPNAcRGCEPwgVplkt22V21g2sgINOnVLixKfEzD1Y2ttlM3Ar+uJBA3+TwUEdx/S0jjeXU6KFo27mfyO6udVrzsx4TwHLW5g2sIiWltxPhYUzlVKV/sIL4niPjur5fI+hZcGP0zUwiSQOEIepwO8xs6KZ/bcoih40syfN7J/HcVxDKYii6LNm9lkzsxMnThzi7Ro8SOLNze5OiIQ7lKWDlcNgoIlmpuRLExI4BUEOAxNKJYllcAU8e1aQQU+Pz9Ck2megLkm0u9sbp6F7opmzXRAp5fNmn/yk2X//73fn/KCeNHOuOP0C8P7lZe8T4E19s4DvTQOSBZTkTQ8A+TswzNWrbpQ1M+OUytVVn4gEtzuJJI44DkPwTJnZu8zsv8Rx/JCZrZrZv9z7oDiOPxfH8SNxHD/Sz+iqt2uQgAsFJdNMxulq7e1KHPm8K01pZDL0AV4yfhQo6MyU2ObmzP7qr5S829u1UHz726JTzs6qksaFL7Q8XVlRtTs/r9ccHNTvl5eVJJk7CoSDovQznzH7B//g7pyb8XHh78jfzXwAQxzrnLHATEzcmn3CuaZ5DL4PVRMMG+ENPQpEYjgXsngwUYgGLzz8pPpO4ojjMAl80swm4zhmMu2XTAk9iVsFuHJ/v2hyvb1KUkNDPkqts9NxWfw3SObMpwRGQISBAhJvdMZ7dXUpOWJfi8oSZscPf6jKHbrd+Lh+3t11+iCT5eE3Q5NraRGUgunW3YiZGefLY1uAfQAq2IOYQ2H5OzTkdgcDA2r4FgreQB4acg8cM/2NyAnnxrU1p1ci0ArhqCSSOKK4YwgljuPZKIomoig6H8fxi2b2QTN74e4d2ls8mpqcu14qKWFQYcJ4YIhCKqUknst544xGJo046IGdnUpAm5ve6JudFabd0+NOfq2tcvvDqxo1Z2ur7GTvvdcrdRqHoStib6+OKZMRznzlyt07N5OT+uylUu0g4M5OcbTNHNvfG/39Dnf09Un+zvzLd7zDZ2yurmrB29wUY+f6dU/KDJ7G94XFFdoggxmSCjyJI47DslB+zcx+9zUGyjUz+6XDH9LbKIBUwHOrVfcWZ/htOAR4bc3hFWhvzH5E9PTyy0owmDQx5R4IgAYpMvy+PsfB19fdBTGX02NpcILHFwq1fio9PYJc7mYCx6irWNTnPX/eh1X09en9l5d1DvZK6VMpSfaxtU2ndcynTrlJGY1gvHR2d5Xki0XBOA8+WLvAgpmPjByefZNEEncxDpXA4zh+2sweuUvH8vYMGnTh3EEalxhQ0XzDzhYTpxCXbm3VRKFr19x5b2fH517Oz7tJP8MwmprcETCKlCzByItFLQL5vFuO3rihhDYy4vQ9MPO7FeDyiLhgvSCm2dpS5T835+pSVJHAIu3tqtTf9z59JiiB+HOH0vHFRV+Y3vlOfa5r1/R+6bRz0Y8dczZKEknUSdS3EvPtEqFNJ7Ml4RrD9ybpUEXDxAhx4UzG7OMfN/vOd4RtA0W89JIS9eio4+hIwvGVaWtTtd3WJlx7YkLv1dvrVgCMRMNxDy77yZN67Vv5eh80aBRS/XZ0aDewsSF2TSrl7ogoQTc3VXXncqrQ02mz97xH7Bt8VGDVoDA186YwXi9m+nc2q8/U3+89hXw+wbyTqLtIEni9BEkcNzhc/2BbkKzDSfCYW+EumE57wnn+eUnWw/jpn1ZlzfAKMOb5eR9m29qqhL+2pqoTNz4zN9GqVh1rjyK93pkzYr+Eww7uNEjUDz6o956cdAgFP5jeXp9MhFgpl/O/T592lgkDeRE9hRV4KqUdxeysPtvOjip2BFgYqSV4dxJ1GEkCr6dA4GPm9L7OTvfVwBgLehsNTjOnwhWLZl/84uuTt5nZX/6l2WOPKWHNzbnQh8bp4KAqTcapDQ66cIXjGR3V+6yu6nGMlBsaUsV6EGfCg8T2tpqsjz3mPQJUm1iddnbqz6lTqso5V7mcEjwe6qGSFYZOyC9PpZS0mc7e1ubJHypiEknUYSQJvN4Coc3OjhIiWDZ8cKamY+hEVUmjc2Hh1g3FF1/0UWEMq52a8teFK55OC6KggoWh0dmpx5bLjhO3tSmhj47evQROTE4Km2aQQ2jnG1oRMPiZYRqzs/p7aMhxb2iQoTsgFEzseRkFFz4G29kkkqizSEC9eoxwQgw8aHBo+NmLiz7HEiYKg3lvFaWSTwhiGnmx6GwU5Obd3e5KuLKivy9cULNva0tJP5v1ijef1//dc8/dPReYQx0/7o1UFg88X1ZXtXDduKHHAEPduKFEDqxDM5jANoAFMjQZw+8bo6/EEjaJOowkgddrIJiBkUKiYrAviZwkFjb/bhewSmiGZrN6bX6GU97RocQ+PKym3sCA/r9aVXU+MaG/qYIZQH03Y3tbn7Fc9gEaa2t6r6Eht8kFEmLYg5lglLU1P9YbN/Ra2AWQzIGiGNgLzRCoJvR/TyKJOooEQqnXwIu6qcktaPHxoHlJUxOxi5kajx/6kNnXv77/6x4/roTNazY1CRNnKHMc+6Djs2edG55K+UT6555z/5Ry2X25mWx+N6OjQ7BMuewY9unTtbNFgUXw6oYSiQyeSptztrvrI9bwbDerHcTMQol/TRJJ1GEkCbxeg2RCEscJD0dDM5fXh43NY8fM/uk/VeL67ndrX/PCBVWtjAdLpdyytafH/bHX1kTBM9PrkAyrVbFbGFi8seHV/+ysj7S72YSdNxodHe6kmM/7wGqERwyYwLiKCeo0fSsV95Dh/3p7fap8OMMSkRSNzeZmn2ifRBJ1GkkCr+doanIaXzgKLnQnNHNvDqb/DA+b/bN/plFrzz3nE3fCAQ3AC93dgiBgegwO+hT53V0l+t3d2kHRTMUhwabT+l0UiRHy3veaff/7es5hor1di0VXlydyzK2YNcqItKEh3zkwRCOO3WY39IwhqXd2Oh0Te19w/3ocRJxEEnsiSeCNEBhgbW25DzbJiSRDQxOcvFCQn0l/v5Lg0pJ+39Xl5lmplCsy19Ychunt9Sq2rU3/t7rqjocMdaCqrVb1HJgt+JHPzyvh47c9NOTDkG8XHBNNxrU1r7pxYsQvvbNT/6bpi89JZ6ezc1Br0jsoFNxqFnaKmQ+9SCW3RhL1H8lV2ihBxUuCIint7LhrYHOzknhXlw9fxiMFNSJwxOamKvO1NT2mt1fVON4g+GeTvONYbJWtLWHS09OOOW9va7EAnz97Vv83NaVEevKkIBaGEh8kgfPafHZ2BRsbbgmQzXr1jQFYd7c+38qKuzjCW0fdChYO9g0EBX6eGFUl0SCRJPBGCapwGnBUoQwipkkH/W193afOl8sOG2BXS/MOoyuody0tqtbn5rzq3972qrZYVLJbW3NqYxwreTIjcn3dfUpw8CsUxAa51RCGMLq73dLVzCcMwTCZmtLnQlhk5rL/SsXtdaemdAw9PT5K7cwZp1uSzPFeSUQ7STRQJAm8kQJMnMammcMCsEqYwj456XzytTWfa9nU5LQ5hhaQzFdWHFdeXvYpOfwO2CaOlRTxAcfulUYg1TAVMkOIu7sPbggFpbGz01Wm2AvMzjpX/fJlLQ6PPaZdAP0CuOLj4/o38zBPn1bFjuc3DdDWVv0xc2gliSTqPJIE3miBqhBJORzxUHhCkkXgg6Mf48DCOZpMgI8icaWRrDPUIZ9XZQudETpfW5svBrBlqNTN9O+5OYdYmpvfmE8Ki9Xqqi8Q1aqSt5net1rVsS4uykEQCIfF6vJls698xaGSbNbsgQeU5B99VOdia8unIYUT5pNIogEiSeCNFlTeJO5QTQg+DKuCRNTbq4qa6pRxaTxuZ8dHqpXL+kPFjVCor08VbXOzwzHg6kyyL5edzjgz44tGtSo4hwnwtwuMpGiIrq/rOPYOcGCxSKe14EB1XFrS+3z5y7WPX14WtXJz06f1MHItPF8J/p1Eg0SSwBstqL43N90TJZ1WcoUXTiOOhl04RQbMN5fzxAszBJk6TI/paSVP5PPNzb4YULW3troUfXtbj3vhBQ2WQO7f3e3Hdzupv5lPh4eXncvpefsZdO3uCvqhgcvEoJde8scwgYd49lntNuDXp9M+pILfJZFEA0SSwBsxwoG9KAvBhGlEMgiC8WBg1mGjcXPTEyq/h/8cSvY3NvReAwPOq4bZAdQB1j4+bvbEE7Uc8HL54J8t5KlvbvokeAyrpqddfGPmxw/WjoNjmMDD5G2m152bE8yC/SzwDhYGSSTRAJEk8EaOlhavjLu7ncs9OKifFxedjtfZ6XABU3jAsBlosLnpyZLkzER6Rovt7Pig4K0tVeNLS852+cEP7lzAk836UAaOGcios1MLyfHj+1fiVPn5vHDyvVPrOV4zNTJRdVYqOnazJHkn0XCRJPBGjb1+4AwqYHDByooqWWZrMDa4AAAKtElEQVRAokycm1My3thQwt/cVCXa3a2qF1Vme7s3LplLaeZTeBimsLzsXPFKRbS9NxKdnUrauZzeK5t1G4GeHj0G7LulRXBOtVr7Pv39Lihipujp06rWSyU9huSdyZj9xE/o8ewezFyRmUQSDRRJAm/UwDWPihFDK7Dori53MDRz2GRtzYcYYOi0uyuflNZWeYmvrbm3NhS90VFV9ODrwCtQAxlyXCgcTKhDwBlHxp7Nylyrq8sXlDhWlQ+dMZt19gzTclhEymU9b2hIifrpp33w8cMPmz3yiM/wLBS0SPT1+c4jiSQaKJIE3qgBrIEwBt/sjQ0lc8ydgEcQ2yDOQcWZyahKbWmRj8n2thgkTKkZHtbrl0ouhcf/BBXn6dPO9b527WAJHGUpPPOuLr0XFEDmboK/I3eHiw6NsFLRcWFstbXlIp6+Ps0ITaV03BcuuMCos1PGX5mM+5onkUSDRZLAGzmg9GHERGLGitbMByRXqz4IeXvbkzp+4DBITpyQIGZjwwcoU+2bCa7AexxnxNVVJcZUyuz97xcGPTFx8+PO55VMGdNWLjtThs9RKum129qcD7666jMx29pU/be2Ov97e9ttAtbXHY5pazP7wAfcvyWT0YLBQtHdnUzcSaIhI0ngjRxU4bAyQgc9oBQeAyyysaFEmcv5TEyYHgsL7h+SzaqSBvNmUERLi5JkLif8++pVnzvJCLNf/mWzS5fMxsZ8EANV7vKyHsP4tvZ2LS5bW/7++bz8Uxgth4dJS4uOlSk5KEKhPZq56IjKPp3Waz3wgFf1KFIRNiHkSSKJBoskgTd6hMOOkcZjzIQyk+oWXJypNL29eo2QodHWpuetrroQp73daYZMaW9vV/KDY10s+uT43l6zT3xC73HtmhqOGxv6/+vX9fooQXkvPguS9vl5t4jd2HD/lpUVLR4kcI6jUNBr9Pe7KVdrq4y1RkfdmRDIBCUrdgMJ/p1EA0aSwBs9SKrI6hGiwBEHLgErp2IFWzZzFSUJe2XF8e3WVlXE0BFJsPiHZDKqdnt6XDQUWreOjup3U1OqyqNIIh+8Tkoll/szPBhmDY1SBisA+Zj5WDU48V1dSt7HjimZl8t6veFhHw4NnIKgKYkkGjySBP5WCGASIBMohsyvpApHft/eroSHZ0oUqeFXqbigBVdCRpXh1If4pbPThwS3t7tfCe8fLiqVinDxtjazhx7SgvDyyw7RZDLOeoljHUsu59NzOjs9ycexe7LgX5JOK8mTvAcHfSFj+ASYN7uRJJJ4C0RyJb9VgqRJc9LMxTBmzgUPOeHhhHYqWRqiDDcG3gCWSad98AOvAZyxtlY7n9NMVTozMzkmmodjY8Knl5d9cAQUx1OndOxbW5p0j+3t5qZ+t7ioXQQccnYWfX3e+DTTz3zmKHJ+exJJvAUiSeBvpYCjzb+rVf8ZCAHcmUnysDfW1517HcdqMLa0CIeGhdLZqYQINBOKYPBkAbLBe8VMiZaGJAySEyccMpmddQokHuMDA/44hEYdHc5bb21VZQ9Wn816EmcgAwsMplsJ1p3EWyySBP5WDZIVgx5IZkAcVKJwqRn8C3MD33Eqe6pckj72rkAn+LGE1T+QSCYjhklzsy8ca2taHDIZDVBGkr+5qap8cFBJGcgjnDxvVouNszjtTdDhgpZEEm/BSBL4WzVI0syVpCrFcS/0/QA+odEJRoxa08wn61DV81rg3vwdJm5+LhQEebS0uMy/pcXsvvuUkNkRNDf7XEuOCUglqZyTSOJ1kSTwt3KAgYewhplP7yEpkphDe9idHWecVKv7UxVpVIa/N6sdjmCmYzh3Tkl8dVV4NlzyJDknkcQdR5LA3+oRGjYhpw8rXjMl3EpFGDP8caa9mzlMEjJbYKSYeYPwZgOBo8gHNCTc6ySSuGuRJPC3UzAYmUoZ3nh3t0vumR+ZTrsHSWen87r3NgTByG+XjBM8Ookk7nocOoFHUdRsZk+Y2VQcxx85/CEl8WMNpORAJWbeCIwiQRuYRSHcSRJvEknUZdyNCvyfm9llM8vchddK4s0Iqub9BC3wtJNIIom6j0MpGqIoOmZm/4OZ/fbdOZwkkkgiiSQOGoeVpP3fZva/mtnuzR4QRdFnoyh6IoqiJ4oY6yeRRBJJJHHouOMEHkXRR8xsPo7jJ2/1uDiOPxfH8SNxHD/S399/p2+XRBJJJJHEnjhMBf5TZvbRKIpeNbP/18w+EEXRF+/KUSWRRBJJJHHbuOMEHsfxv4rj+FgcxyfN7NNm9u04jn/hrh1ZEkkkkUQSt4w3lQf+5JNPLkRRdP3NfM+7HH1mtnDUB3GXIvks9RnJZ6nPOOrPMrrfL6MYUUcSt40oip6I4/iRoz6OuxHJZ6nPSD5LfUa9fpbEGDmJJJJIokEjSeBJJJFEEg0aSQJ/Y/G5oz6AuxjJZ6nPSD5LfUZdfpYEA08iiSSSaNBIKvAkkkgiiQaNJIEnkUQSSTRoJAn8gBFFUXMURU9FUfQnR30sh40oinJRFH0piqIrURRdjqLoJ4/6mO40oij6F1EUXYqi6Pkoin4viqL2oz6mg0YURZ+Pomg+iqLng9/1RFH0zSiKXn7t7/xRHuNB4yaf5f987Rp7NoqiP4iiKHeUx3jQ2O+zBP/3v0RRFEdR1HcUx7Y3kgR+8MA2960Q/8nMvh7H8b1m9qA16OeKomjEzH7dzB6J4/iimTWbVMGNEl8wsw/t+d2/NLM/j+P4rJn9+Ws/N0J8wV7/Wb5pZhfjOH6Hmb1kZv/qzT6oO4wv2Os/i0VRdNzMftbMxt/sA7pZJAn8APFWss2NoihjZu8zs//HzCyO4604jktHe1SHipSZdURRlDKztJlNH/HxHDjiOP5rM1vc8+uPmdnvvPbv3zGzj7+pB3WHsd9nieP4G3Ecb7/24/fN7NibfmB3EDf5XszM/i+T+2rdMD+SBH6wuK1tbgPFPWZWNLP/9hok9NtRFHUe9UHdScRxPGVm/8FUEc2Y2XIcx9842qM6dAzEcTxjZvba34UjPp67Fb9sZl876oO404ii6KOmqWPPHPWxhJEk8NvEQW1zGyhSZvYuM/svcRw/ZGar1jjb9Jp4DR/+mJmdMrNhM+uMoigxVKuziKLoN8xs28x+96iP5U4iiqK0mf2Gmf3vR30seyNJ4LePt5pt7qSZTcZx/IPXfv6SKaE3YvyMmY3FcVyM47hqZl82s/cc8TEdNuaiKBoyM3vt7/kjPp5DRRRFv2hmHzGzz8SNKzo5bSoSnnktDxwzsx9FUTR4pEdlSQK/bbzVbHPjOJ41s4kois6/9qsPmtkLR3hIh4lxM3t3FEXpKIoi02dpyIZsEF8xs1987d+/aGZ/dITHcqiIouhDZva/mdlH4zheO+rjudOI4/i5OI4LcRyffC0PTJrZu167l440kgT+9oxfM7PfjaLoWTN7p5n9H0d8PHcUr+0ivmRmPzKz50zXc11KnveLKIp+z8z+1szOR1E0GUXR/2Rm/97MfjaKopdNjId/f5THeNC4yWf5TTPrNrNvRlH0dBRF//VID/KAcZPPUpeRSOmTSCKJJBo0kgo8iSSSSKJBI0ngSSSRRBINGkkCTyKJJJJo0EgSeBJJJJFEg0aSwJNIIokkGjSSBJ5EEkkk0aCRJPAkkkgiiQaN/x8dcX53obyelQAAAABJRU5ErkJggg==\n",
-      "text/plain": [
-       "<Figure size 432x288 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai\n",
+    "import mlai.plot as plot"
    ],
+   "id": "f169de61-7368-4f03-b326-6ef75d0755cb"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "plt.plot(hospital_data.longitude, hospital_data.latitude,'ro', alpha=0.01)"
-   ]
+    "fig, ax = plt.subplots(figsize=plot.big_figsize)\n",
+    "ax.plot(data.longitude, data.latitude, 'ro', alpha=0.01)\n",
+    "ax.set_xlabel('longitude')\n",
+    "ax.set_ylabel('latitude')\n",
+    "\n",
+    "mlai.write_figure('nigerian-health-facilities.png', directory='./ml')"
+   ],
+   "id": "6f621a86-5ada-4e65-92f9-48badfd400f6"
   },
   {
    "cell_type": "markdown",
-   "id": "1042a712",
    "metadata": {},
    "source": [
-    "There we have the location of these different hospitals. We set alpha in\n",
-    "the plot to 0.01 to make the dots transparent, so we can see the\n",
-    "locations of each health center."
-   ]
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png\" style=\"width:60%\">\n",
+    "\n",
+    "Figure: <i>Location of the over thirty-four thousand health facilities\n",
+    "registered in the NMIS data across Nigeria. Each facility plotted\n",
+    "according to its latitude and longitude.</i>"
+   ],
+   "id": "44788536-f449-491d-a2b0-871e15786e9f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hospital_data = data"
+   ],
+   "id": "bc157f71-4d3e-41f2-8ebd-08a064d3a42e"
   },
   {
    "cell_type": "markdown",
-   "id": "036e8aba",
    "metadata": {},
    "source": [
-    "Administrative Zone Geo Data\n",
-    "----------------------------\n",
+    "## Administrative Zone Geo Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-spatial-join.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-spatial-join.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "A very common operation is the need to map from locations in a country\n",
     "to the administrative regions. If we were building a ride sharing app,\n",
@@ -930,10 +609,10 @@
     "could know how many riders we had in different city areas.\n",
     "\n",
     "Administrative regions have various names like cities, counties,\n",
-    "districts or states. These conversions for the administrative regions\n",
+    "districts, or states. These conversions for the administrative regions\n",
     "are important for getting the right information to the right people.\n",
     "\n",
-    "Of course, if we had a knowlegdeable Nigerian, we could ask her about\n",
+    "Of course, if we had a knowledgeable Nigerian, we could ask her about\n",
     "what the right location for each of these health facilities is, which\n",
     "state is it in? But given that we have the latitude and longitude, we\n",
     "should be able to find out automatically what the different states are.\n",
@@ -949,34 +628,115 @@
     "They have been made available by the [Humanitarian Data\n",
     "Exchange](https://data.humdata.org/), you can also find other states\n",
     "data from the same site."
-   ]
+   ],
+   "id": "b23c6eeb-223a-4a3c-9edb-8d9717300cc5"
   },
   {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "1cd93f7b",
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import zipfile"
-   ]
+    "## Nigerian Administrative Zones Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-administrative-zones-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-administrative-zones-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "For ease of use we’ve packaged this data set in the `pods` library"
+   ],
+   "id": "c7084525-45f7-4550-91ab-30901e727a60"
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "e90d978c",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "data = pods.datasets.nigerian_administrative_zones()['Y']\n",
+    "data.set_index(\"admin1Name_en\", inplace=True)\n",
+    "data.head()"
+   ],
+   "id": "a45a41c2-7f71-4b1d-82bd-182533fe46a9"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively you can access the data directly with the following\n",
+    "commands.\n",
+    "\n",
+    "``` python\n",
+    "import zipfile\n",
+    "\n",
     "admin_zones_url = 'https://data.humdata.org/dataset/81ac1d38-f603-4a98-804d-325c658599a3/resource/0bc2f7bb-9ff6-40db-a569-1989b8ffd3bc/download/nga_admbnda_osgof_eha_itos.gdb.zip'\n",
     "_, msg = urllib.request.urlretrieve(admin_zones_url, 'nga_admbnda_osgof_eha_itos.gdb.zip')\n",
-    "with zipfile.ZipFile('nga_admbnda_osgof_eha_itos.gdb.zip', 'r') as zip_ref:\n",
-    "    zip_ref.extractall('nga_admbnda_osgof_eha_itos.gdb')"
-   ]
+    "with zipfile.ZipFile('/content/nga_admbnda_osgof_eha_itos.gdb.zip', 'r') as zip_ref:\n",
+    "    zip_ref.extractall('/content/nga_admbnda_osgof_eha_itos.gdb')\n",
+    "\n",
+    "import geopandas as gpd\n",
+    "import fiona\n",
+    "\n",
+    "states_file = \"./nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/\"\n",
+    "\n",
+    "layers = fiona.listlayers(states_file)\n",
+    "data = gpd.read_file(states_file, layer=1)\n",
+    "data.crs = \"EPSG:4326\"\n",
+    "data = data.set_index('admin1Name_en')\n",
+    "    \n",
+    "```"
+   ],
+   "id": "6c915ec0-5c49-4e74-9e01-f8f16343700f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai\n",
+    "import mlai.plot as plot"
+   ],
+   "id": "bea028f6-e900-4da3-8ac6-3ff16c462d5e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_figsize)\n",
+    "data.plot(ax=ax, color='white', edgecolor='black')\n",
+    "ax.set_xlabel('longitude')\n",
+    "ax.set_ylabel('latitude')\n",
+    "\n",
+    "mlai.write_figure('nigerian-state-borders.svg', directory='./ml')"
+   ],
+   "id": "eb6fe05d-970a-413f-a565-d6b321ca3305"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-state-borders.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Border locations for the thirty-six different states of\n",
+    "Nigeria.</i>"
+   ],
+   "id": "8791c8a9-d107-4a1d-9338-5f24947e6651"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zones_gdf = data\n",
+    "zones_gdf['admin1Name_en'] = zones_gdf.index"
+   ],
+   "id": "70c7efa9-2e00-45b9-890e-fb483e2978bf"
   },
   {
    "cell_type": "markdown",
-   "id": "8584ac77",
    "metadata": {},
    "source": [
     "Now we have this data of the outlines of the different states in\n",
@@ -1003,19 +763,18 @@
     "coordinates of individual health facilities (which we already converted\n",
     "to the appropriate `Point` type when moving the health data to a\n",
     "GeoDataFrame.)"
-   ]
+   ],
+   "id": "b5834f5e-ea5f-4f00-80ba-59483ebe6ce7"
   },
   {
    "cell_type": "markdown",
-   "id": "525e9b77",
    "metadata": {},
    "source": [
-    "Joining a GeoDataFrame\n",
-    "----------------------\n",
+    "## Joining a GeoDataFrame\n",
     "\n",
     "The first database join we’re going to do is a special one, it’s a\n",
-    "‘spatial join’. We’re going to join together the locations of the\n",
-    "hospitals with their states.\n",
+    "‘spatial join’. We’re going to join the locations of the hospitals with\n",
+    "their states.\n",
     "\n",
     "This join is unusual because it requires some mathematics to get right.\n",
     "The outline files give us the borders of the different states in\n",
@@ -1026,137 +785,120 @@
     "belongs to. Fortunately, the mathematics you need is already programmed\n",
     "for you in GeoPandas. That means all we need to do is convert our\n",
     "`pandas` dataframe of health facilities into a `GeoDataFrame` which\n",
-    "allows us to do the spatial join."
-   ]
+    "allows us to do the spatial join.\n",
+    "\n",
+    "First, we convert the hospital data to a `geopandas` data frame."
+   ],
+   "id": "b9a66674-b01c-419b-9358-de77fb7e024d"
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "db249d99",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import geopandas as gpd"
-   ]
+   ],
+   "id": "8d002657-a7c9-4743-abd9-788d159e6097"
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "b1c2304d",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "hosp_gdf = gpd.GeoDataFrame(\n",
-    "    hospital_data, geometry=gpd.points_from_xy(hospital_data.longitude, hospital_data.latitude))\n",
+    "geometry = gpd.points_from_xy(hospital_data.longitude, hospital_data.latitude)\n",
+    "hosp_gdf = gpd.GeoDataFrame(hospital_data, \n",
+    "                            geometry=geometry)\n",
     "hosp_gdf.crs = \"EPSG:4326\""
-   ]
+   ],
+   "id": "bed94194-2d0f-42e6-9c1d-396e3ebf035a"
   },
   {
    "cell_type": "markdown",
-   "id": "59d9a338",
    "metadata": {},
    "source": [
     "There are some technial details here: the `crs` refers to the coordinate\n",
     "system in use by a particular GeoDataFrame. `EPSG:4326` is the standard\n",
     "coordinate system of latitude/longitude."
-   ]
+   ],
+   "id": "2cb1dee1-3bad-44ee-a854-9c0d17527f6b"
   },
   {
    "cell_type": "markdown",
-   "id": "e7712d27",
    "metadata": {},
    "source": [
-    "Your First Join: Converting GPS Coordinates to States\n",
-    "-----------------------------------------------------\n",
+    "## Your First Join: Converting GPS Coordinates to States\n",
     "\n",
     "Now we have the data in the `GeoPandas` format, we can start converting\n",
     "into states. We will use the [`fiona`](https://pypi.org/project/Fiona/)\n",
     "library for reading the right layers from the files. Before we do the\n",
     "join, lets plot the location of health centers and states on the same\n",
     "map."
-   ]
+   ],
+   "id": "37517ab2-bd79-4384-bca0-07238dab3f17"
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "b75f9523",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "import fiona"
-   ]
+    "world_gdf = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
+    "world_gdf.crs = \"EPSG:4326\"\n",
+    "nigeria_gdf = world_gdf[(world_gdf['name'] == 'Nigeria')]"
+   ],
+   "id": "35e27957-c44c-4cd5-8415-1f3158fdd2bf"
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "340ec265",
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ImportError",
-     "evalue": "The descartes package is required for plotting polygons in geopandas. You can install it using 'conda install -c conda-forge descartes' or 'pip install descartes'.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36m_plot_polygon_collection\u001b[0;34m(ax, geoms, values, color, cmap, vmin, vmax, **kwargs)\u001b[0m\n\u001b[1;32m    119\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 120\u001b[0;31m         \u001b[0;32mfrom\u001b[0m \u001b[0mdescartes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpatch\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mPolygonPatch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    121\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'descartes'",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-13-b151f53465f0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mworld\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"EPSG:4326\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mnigeria\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mworld\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mworld\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'name'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'Nigeria'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mbase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnigeria\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'white'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0medgecolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'black'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m11\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m11\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0mlayers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfiona\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlistlayers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstates_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/geodataframe.py\u001b[0m in \u001b[0;36mplot\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    919\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mthere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    920\u001b[0m         \"\"\"\n\u001b[0;32m--> 921\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mplot_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    922\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    923\u001b[0m     \u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mplot_dataframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__doc__\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36mplot_dataframe\u001b[0;34m(df, column, cmap, color, ax, cax, categorical, legend, scheme, k, vmin, vmax, markersize, figsize, legend_kwds, categories, classification_kwds, missing_kwds, aspect, **style_kwds)\u001b[0m\n\u001b[1;32m    621\u001b[0m             \u001b[0mmarkersize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmarkersize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    622\u001b[0m             \u001b[0maspect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maspect\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 623\u001b[0;31m             \u001b[0;34m**\u001b[0m\u001b[0mstyle_kwds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    624\u001b[0m         )\n\u001b[1;32m    625\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36mplot_series\u001b[0;34m(s, cmap, color, ax, figsize, aspect, **style_kwds)\u001b[0m\n\u001b[1;32m    412\u001b[0m         \u001b[0mvalues_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpoly_idx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcmap\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    413\u001b[0m         _plot_polygon_collection(\n\u001b[0;32m--> 414\u001b[0;31m             \u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpolys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfacecolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfacecolor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcmap\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcmap\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mstyle_kwds\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    415\u001b[0m         )\n\u001b[1;32m    416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/geopandas/plotting.py\u001b[0m in \u001b[0;36m_plot_polygon_collection\u001b[0;34m(ax, geoms, values, color, cmap, vmin, vmax, **kwargs)\u001b[0m\n\u001b[1;32m    121\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    122\u001b[0m         raise ImportError(\n\u001b[0;32m--> 123\u001b[0;31m             \u001b[0;34m\"The descartes package is required for plotting polygons in geopandas. \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    124\u001b[0m             \u001b[0;34m\"You can install it using 'conda install -c conda-forge descartes' or \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    125\u001b[0m             \u001b[0;34m\"'pip install descartes'.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mImportError\u001b[0m: The descartes package is required for plotting polygons in geopandas. You can install it using 'conda install -c conda-forge descartes' or 'pip install descartes'."
-     ]
-    },
-    {
-     "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnsAAAJ5CAYAAADW/PacAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAWB0lEQVR4nO3dX4jv913n8de7iVGotcLmLEj+mMCebo1BiDuESi+stLskuUhuupJA0UroudkouxYholSJV7aIIMQ/Z9nSVbDZ6IUeJJILN6KIKTmlu8GkBA7RbQ4Rcqw1N6WN2f3sxYwynk7O/Gbym5PkxeMBB+b7+33mN2/4MHOefL/zm++stQIAQKd3vdUDAABwcsQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAsUNjb2Y+OzOvzMxfvcHzMzO/NjMXZubZmfnB7Y8JAMBxbHJm73NJ7rrC83cnOb3370yS33jzYwEAsA2Hxt5a68+S/P0VltyX5LfXrqeTfPfMfM+2BgQA4Pi28Tt7NyR5ad/xxb3HAAB4i127hdeYAx478B5sM3Mmu5d68+53v/vfvf/979/ClwcA6PfFL37x79Zap476eduIvYtJbtp3fGOSlw9auNY6m+Rskuzs7Kzz589v4csDAPSbmf9znM/bxmXcc0l+bO9duR9I8upa62+38LoAALxJh57Zm5nPJ/lQkutn5mKSX0jybUmy1vrNJE8kuSfJhSRfT/ITJzUsAABHc2jsrbUeOOT5leQ/bW0iAAC2xh00AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCgmNgDACgm9gAAiok9AIBiYg8AoJjYAwAoJvYAAIqJPQCAYmIPAKCY2AMAKCb2AACKiT0AgGJiDwCg2EaxNzN3zcwLM3NhZh4+4PmbZ+apmfnSzDw7M/dsf1QAAI7q0NibmWuSPJrk7iS3JXlgZm67bNnPJ3l8rXVHkvuT/Pq2BwUA4Og2ObN3Z5ILa60X11qvJXksyX2XrVlJvmvv4/cmeXl7IwIAcFybxN4NSV7ad3xx77H9fjHJx2bmYpInkvzkQS80M2dm5vzMnL906dIxxgUA4Cg2ib054LF12fEDST631roxyT1JfmdmvuW111pn11o7a62dU6dOHX1aAACOZJPYu5jkpn3HN+ZbL9M+mOTxJFlr/WWS70hy/TYGBADg+DaJvWeSnJ6ZW2fmuuy+AePcZWu+kuTDSTIz35fd2HOdFgDgLXZo7K21Xk/yUJInk3w5u++6fW5mHpmZe/eWfTLJJ2bmfyf5fJKPr7Uuv9QLAMBVdu0mi9ZaT2T3jRf7H/vUvo+fT/LB7Y4GAMCb5Q4aAADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQLGNYm9m7pqZF2bmwsw8/AZrfnRmnp+Z52bmd7c7JgAAx3HtYQtm5pokjyb590kuJnlmZs6ttZ7ft+Z0kp9N8sG11tdm5l+f1MAAAGxukzN7dya5sNZ6ca31WpLHktx32ZpPJHl0rfW1JFlrvbLdMQEAOI5NYu+GJC/tO76499h+70vyvpn5i5l5embu2taAAAAc36GXcZPMAY+tA17ndJIPJbkxyZ/PzO1rrX/4Fy80cybJmSS5+eabjzwsAABHs8mZvYtJbtp3fGOSlw9Y84drrX9ca/11kheyG3//wlrr7FprZ621c+rUqePODADAhjaJvWeSnJ6ZW2fmuiT3Jzl32Zo/SPIjSTIz12f3su6L2xwUAICjOzT21lqvJ3koyZNJvpzk8bXWczPzyMzcu7fsySRfnZnnkzyV5GfWWl89qaEBANjMrHX5r99dHTs7O+v8+fNvydcGAHinmZkvrrV2jvp57qABAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFNoq9mblrZl6YmQsz8/AV1n10ZtbM7GxvRAAAjuvQ2JuZa5I8muTuJLcleWBmbjtg3XuS/FSSL2x7SAAAjmeTM3t3Jrmw1npxrfVakseS3HfAul9K8ukk39jifAAAvAmbxN4NSV7ad3xx77F/NjN3JLlprfVHW5wNAIA3aZPYmwMeW//85My7kvxqkk8e+kIzZ2bm/Mycv3Tp0uZTAgBwLJvE3sUkN+07vjHJy/uO35Pk9iR/OjN/k+QDSc4d9CaNtdbZtdbOWmvn1KlTx58aAICNbBJ7zyQ5PTO3zsx1Se5Pcu6fnlxrvbrWun6tdcta65YkTye5d611/kQmBgBgY4fG3lrr9SQPJXkyyZeTPL7Wem5mHpmZe096QAAAju/aTRattZ5I8sRlj33qDdZ+6M2PBQDANriDBgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFNso9mbmrpl5YWYuzMzDBzz/0zPz/Mw8OzN/MjPfu/1RAQA4qkNjb2auSfJokruT3JbkgZm57bJlX0qys9b6gSS/n+TT2x4UAICj2+TM3p1JLqy1XlxrvZbksST37V+w1npqrfX1vcOnk9y43TEBADiOTWLvhiQv7Tu+uPfYG3kwyR8f9MTMnJmZ8zNz/tKlS5tPCQDAsWwSe3PAY+vAhTMfS7KT5DMHPb/WOrvW2llr7Zw6dWrzKQEAOJZrN1hzMclN+45vTPLy5Ytm5iNJfi7JD6+1vrmd8QAAeDM2ObP3TJLTM3PrzFyX5P4k5/YvmJk7kvxWknvXWq9sf0wAAI7j0Nhba72e5KEkTyb5cpLH11rPzcwjM3Pv3rLPJPnOJL83M/9rZs69wcsBAHAVbXIZN2utJ5I8cdljn9r38Ue2PBcAAFvgDhoAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAMbEHAFBM7AEAFBN7AADFxB4AQDGxBwBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAsY1ib2bumpkXZubCzDx8wPPfPjP/Y+/5L8zMLdseFACAozs09mbmmiSPJrk7yW1JHpiZ2y5b9mCSr621/k2SX03yy9seFACAo9vkzN6dSS6stV5ca72W5LEk91225r4k/33v499P8uGZme2NCQDAcWwSezckeWnf8cW9xw5cs9Z6PcmrSf7VNgYEAOD4rt1gzUFn6NYx1mRmziQ5s3f4zZn5qw2+Pu8s1yf5u7d6CLbOvnayr53sa69/e5xP2iT2Lia5ad/xjUlefoM1F2fm2iTvTfL3l7/QWutskrNJMjPn11o7xxmaty/72sm+drKvnexrr5k5f5zP2+Qy7jNJTs/MrTNzXZL7k5y7bM25JD++9/FHk/zPtda3nNkDAODqOvTM3lrr9Zl5KMmTSa5J8tm11nMz80iS82utc0n+W5LfmZkL2T2jd/9JDg0AwGY2uYybtdYTSZ647LFP7fv4G0n+4xG/9tkjruedwb52sq+d7Gsn+9rrWHs7rrYCAPRyuzQAgGInHntutdZpg3396Zl5fmaenZk/mZnvfSvm5GgO29d96z46M2tmvOPvHWCTfZ2ZH937nn1uZn73as/I0W3wc/jmmXlqZr6097P4nrdiTo5mZj47M6+80Z+nm12/trfvz87MDx72micae2611mnDff1Skp211g9k964qn766U3JUG+5rZuY9SX4qyReu7oQcxyb7OjOnk/xskg+utb4/yX++6oNyJBt+v/58ksfXWndk942Tv351p+SYPpfkris8f3eS03v/ziT5jcNe8KTP7LnVWqdD93Wt9dRa6+t7h09n9+8z8va2yfdrkvxSduP9G1dzOI5tk339RJJH11pfS5K11itXeUaObpN9XUm+a+/j9+Zb/0Yub0NrrT/LAX+reJ/7kvz22vV0ku+eme+50muedOy51VqnTfZ1vweT/PGJTsQ2HLqvM3NHkpvWWn90NQfjTdnk+/V9Sd43M38xM0/PzJXOKvD2sMm+/mKSj83Mxez+RY2fvDqjccKO+n/wZn965U3Y2q3WeFvZeM9m5mNJdpL88IlOxDZccV9n5l3Z/VWLj1+tgdiKTb5fr83uJaEPZfcs/J/PzO1rrX844dk4vk329YEkn1tr/crM/FB2/x7u7Wut/3fy43GCjtxNJ31m7yi3WsuVbrXG28om+5qZ+UiSn0ty71rrm1dpNo7vsH19T5Lbk/zpzPxNkg8kOedNGm97m/4c/sO11j+utf46yQvZjT/evjbZ1weTPJ4ka62/TPId2b1vLu9sG/0fvN9Jx55brXU6dF/3Lvf9VnZDz+//vDNccV/XWq+uta5fa92y1rolu7+Lee9a61j3auSq2eTn8B8k+ZEkmZnrs3tZ98WrOiVHtcm+fiXJh5NkZr4vu7F36apOyUk4l+TH9t6V+4Ekr661/vZKn3Cil3Hdaq3Thvv6mSTfmeT39t5v85W11r1v2dAcasN95R1mw319Msl/mJnnk/zfJD+z1vrqWzc1h9lwXz+Z5L/OzH/J7mW+jzuZ8vY3M5/P7q9UXL/3+5a/kOTbkmSt9ZvZ/f3Le5JcSPL1JD9x6GvadwCAXu6gAQBQTOwBABQTewAAxcQeAEAxsQcAUEzsAQAUE3sAAMXEHgBAsf8PcJbBV772OwgAAAAASUVORK5CYII=\n",
-      "text/plain": [
-       "<Figure size 792x792 with 1 Axes>"
-      ]
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
    ],
+   "id": "9fa2dd6a-b5b8-4974-b342-3dacf232f14e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
-    "states_file = \"nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/nga_admbnda_osgof_eha_itos.gdb/\"\n",
+    "fig, ax = plt.subplots(figsize=plot.big_figsize)\n",
+    "nigeria_gdf.plot(ax=ax, color='white', edgecolor='black', alpha=0)\n",
+    "zones_gdf.plot(ax=ax, color='white', edgecolor='black')\n",
+    "hosp_gdf.plot(ax=ax, color='b', alpha=0.02)\n",
+    "ax.set_xlabel('longitude')\n",
+    "ax.set_ylabel('latitude')\n",
     "\n",
-    "# geopandas included map, filtered to just Nigeria\n",
-    "world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))\n",
-    "world.crs = \"EPSG:4326\"\n",
-    "nigeria = world[(world['name'] == 'Nigeria')]\n",
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
-    "\n",
-    "layers = fiona.listlayers(states_file)\n",
-    "zones_gdf = gpd.read_file(states_file, layer=1)\n",
-    "zones_gdf.crs = \"EPSG:4326\"\n",
-    "zones_gdf = zones_gdf.set_index('admin1Name_en')\n",
-    "zones_gdf.plot(ax=base, color='white', edgecolor='black')\n",
-    "\n",
-    "# We can now plot our ``GeoDataFrame``.\n",
-    "hosp_gdf.plot(ax=base, color='b', alpha=0.02, )\n",
+    "mlai.write_figure('nigeria-states-and-health-facilities.svg', directory='./ml')"
+   ],
+   "id": "dc4deada-1c37-4ab8-aba9-9e4377823db3"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigeria-states-and-health-facilities.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "plt.show()"
-   ]
+    "Figure: <i>The outline of the thirty-six different states of nigeria\n",
+    "with the location sof the health centers plotted on the map.</i>"
+   ],
+   "id": "9682fa64-d4aa-4645-95cb-168d6ddb2a31"
   },
   {
    "cell_type": "markdown",
-   "id": "97ac1323",
    "metadata": {},
    "source": [
-    "Performing the Spatial Join\n",
-    "---------------------------\n",
+    "## Performing the Spatial Join\n",
     "\n",
     "We’ve now plotted the different health center locations across the\n",
     "states. You can clearly see that each of the dots falls within a\n",
-    "different state. For helping the visualisation, we’ve made the dots\n",
+    "different state. For helping the visualization, we’ve made the dots\n",
     "somewhat transparent (we set the `alpha` in the plot). This means that\n",
     "we can see the regions where there are more health centers, you should\n",
     "be able to spot where the major cities in Nigeria are given the\n",
@@ -1167,80 +909,80 @@
     "`GeoPandas` provides us with the spatial join. Here we’re going to do a\n",
     "[`left` or `outer`\n",
     "join](https://en.wikipedia.org/wiki/Join_(SQL)#Left_outer_join)."
-   ]
+   ],
+   "id": "dc505ed5-c0d0-4141-be37-85db59a6f340"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4ad0c444",
    "metadata": {},
    "outputs": [],
    "source": [
     "from geopandas.tools import sjoin"
-   ]
+   ],
+   "id": "4e2ab850-a067-4fb6-a8b2-c5777ca7982b"
   },
   {
    "cell_type": "markdown",
-   "id": "0c5895fa",
    "metadata": {},
    "source": [
     "We have two GeoPandas data frames, `hosp_gdf` and `zones_gdf`. Let’s\n",
     "have a look at the columns the contain."
-   ]
+   ],
+   "id": "269a4b98-70f7-4732-a37f-f54889116e88"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bd3f6eae",
    "metadata": {},
    "outputs": [],
    "source": [
     "hosp_gdf.columns"
-   ]
+   ],
+   "id": "eac229a0-98c4-4d6f-915e-7fe1e2c99fb5"
   },
   {
    "cell_type": "markdown",
-   "id": "841e856e",
    "metadata": {},
    "source": [
     "We can see that this is the GeoDataFrame containing the information\n",
     "about the hospital. Now let’s have a look at the `zones_gdf` data frame."
-   ]
+   ],
+   "id": "920deb10-9802-4232-8944-a597a5563484"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5c4641b0",
    "metadata": {},
    "outputs": [],
    "source": [
     "zones_gdf.columns"
-   ]
+   ],
+   "id": "af60a622-c0e7-4d2c-8bd5-e0f3ebaecaee"
   },
   {
    "cell_type": "markdown",
-   "id": "4efda939",
    "metadata": {},
    "source": [
     "You can see that this data frame has a different set of columns. It has\n",
     "all the different administrative regions. But there is one column name\n",
     "that overlaps. We can find it by looking for the intersection between\n",
     "the two sets."
-   ]
+   ],
+   "id": "85ca9607-306a-4f8c-84b8-52cd6299341b"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "72114d0b",
    "metadata": {},
    "outputs": [],
    "source": [
     "set(hosp_gdf.columns).intersection(set(zones_gdf.columns))"
-   ]
+   ],
+   "id": "8dadbfeb-2b50-4169-b570-7b0595131722"
   },
   {
    "cell_type": "markdown",
-   "id": "3444a292",
    "metadata": {},
    "source": [
     "Here we’ve converted the lists of columns into python ‘sets’, and then\n",
@@ -1253,21 +995,21 @@
     "on geographical locations, if the join was on customer name or some\n",
     "other discrete variable, we could do the join in pandas or directly in\n",
     "SQL."
-   ]
+   ],
+   "id": "b45b4314-4bca-4df9-83ed-37c63ac42a8e"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5085bb4d",
    "metadata": {},
    "outputs": [],
    "source": [
     "hosp_state_joined = sjoin(hosp_gdf, zones_gdf, how='left')"
-   ]
+   ],
+   "id": "5cad2909-decd-408c-997c-75a409681882"
   },
   {
    "cell_type": "markdown",
-   "id": "3cceea5e",
    "metadata": {},
    "source": [
     "The intersection of the two data frames indicates how the two data\n",
@@ -1276,24 +1018,24 @@
     "together on two pieces of metal. If the holes don’t match, the join\n",
     "can’t be done. There has to be an intersection.\n",
     "\n",
-    "But what will the result look like? Well the join should be the ‘union’\n",
+    "But what will the result look like? Well, the join should be the ‘union’\n",
     "of the two data frames. We can have a look at what the union should be\n",
     "by (again) converting the columns to sets."
-   ]
+   ],
+   "id": "dc3c96c8-b6f9-424a-9aa9-2acb51e0c003"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cde93dd2",
    "metadata": {},
    "outputs": [],
    "source": [
     "set(hosp_gdf.columns).union(set(zones_gdf.columns))"
-   ]
+   ],
+   "id": "97f2ef6e-3de3-46aa-ac97-d2e25c0c594e"
   },
   {
    "cell_type": "markdown",
-   "id": "b34ca270",
    "metadata": {},
    "source": [
     "That gives a list of all the columns (notice that ‘geometry’ only\n",
@@ -1304,21 +1046,21 @@
     "new column: `index_right`. The two original data bases had separate\n",
     "indices. The `index_right` column represents the index from the\n",
     "`zones_gdf`, which is the Nigerian state."
-   ]
+   ],
+   "id": "6f982542-f6c7-4719-8fb8-d332bb41213c"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ba54160e",
    "metadata": {},
    "outputs": [],
    "source": [
     "set(hosp_state_joined.columns)"
-   ]
+   ],
+   "id": "de5c926a-a69d-4363-9fbc-ea7771017f59"
   },
   {
    "cell_type": "markdown",
-   "id": "f8019f71",
    "metadata": {},
    "source": [
     "Great! They are all there! We have completed our join. We had two\n",
@@ -1326,25 +1068,28 @@
     "hospitals. But by performing an ‘outer’ or a ‘left’ join, we now have a\n",
     "single data frame with all the information in the same place! Let’s have\n",
     "a look at the first frew entries in the new data frame."
-   ]
+   ],
+   "id": "be2217a2-4a44-4a1a-afd6-b57fe4c1f011"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2aa496aa",
    "metadata": {},
    "outputs": [],
    "source": [
     "hosp_state_joined.head()"
-   ]
+   ],
+   "id": "b9289463-9881-4499-947c-671e5bc8df3c"
   },
   {
    "cell_type": "markdown",
-   "id": "d6ae432e",
    "metadata": {},
    "source": [
-    "SQL Database\n",
-    "------------\n",
+    "## SQL Database\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sql.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sql.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Our first join was a special one, because it involved spatial data. That\n",
     "meant using the special `gdb` format and the `GeoPandas` tool for\n",
@@ -1356,19 +1101,21 @@
     "playing with database commands on your local machine. For a real system,\n",
     "you would need to set up a server to run the database. The server is a\n",
     "separate machine with the job of answering database queries. SQLite\n",
-    "pretends to be a proper database, but doesn’t require us to go to the\n",
+    "pretends to be a proper database but doesn’t require us to go to the\n",
     "extra work of setting up a server. Popular SQL server software includes\n",
-    "[`MySQL`](https://www.mysql.com/) which is free or [Microsoft’s SQL\n",
-    "Server](https://www.microsoft.com/en-gb/sql-server/sql-server-2019).\n",
+    "[`MariaDB`](https://mariadb.org/) which is open source, or [Microsoft’s\n",
+    "SQL Server](https://www.microsoft.com/en-gb/sql-server/sql-server-2019).\n",
     "\n",
     "A typical machine learning installation might have you running a\n",
     "database from a cloud service (such as AWS, Azure or Google Cloud\n",
-    "Platform). That cloud service would host the database for you and you\n",
+    "Platform). That cloud service would host the database for you, and you\n",
     "would pay according to the number of queries made.\n",
     "\n",
     "Many start-up companies were formed on the back of a `MySQL` server\n",
-    "hosted on top of AWS. You can [read how to do that\n",
-    "here](https://aws.amazon.com/getting-started/hands-on/create-mysql-db/).\n",
+    "hosted on top of AWS. Although since MySQL was sold to Sun, and then\n",
+    "passed on to Oracle, the open source community has turned its attention\n",
+    "to `MariaDB`, here’s the [AWS instructions on how to set up\n",
+    "`MariaDB`](https://aws.amazon.com/getting-started/hands-on/create-mariadb-db/).\n",
     "\n",
     "If you were designing your own ride hailing app, or any other major\n",
     "commercial software you would want to investigate whether you would need\n",
@@ -1380,15 +1127,18 @@
     "production ML system) we’ll also give the equivalent `pandas` commands,\n",
     "which would often be what you would use when you’re doing data analysis\n",
     "in `python` and `Jupyter`."
-   ]
+   ],
+   "id": "156afa6d-b366-49e2-98ea-e58b76bc1e70"
   },
   {
    "cell_type": "markdown",
-   "id": "2bdc6802",
    "metadata": {},
    "source": [
-    "Create the SQLite Database\n",
-    "--------------------------\n",
+    "## Create the SQLite Database\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sqlite.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-sqlite.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The beautiful thing about SQLite is that it allows us to play with SQL\n",
     "without going to the work of setting up a proper SQL server. Creating a\n",
@@ -1396,41 +1146,41 @@
     "database, we’ll first write our joined data to a CSV file, then we’ll\n",
     "use a little utility to convert our hospital database into a SQLite\n",
     "database."
-   ]
+   ],
+   "id": "3324a769-c424-414f-8148-98c1fb5d4019"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a7e8889d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "hosp_state_joined.to_csv('facilities.csv')"
-   ]
+    "hosp_state_joined.to_csv(\"hospitals_zones_joined.csv\")"
+   ],
+   "id": "e0f61f56-94d1-4fdd-ad9d-815aa5b5ab3b"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bf345450",
    "metadata": {},
    "outputs": [],
    "source": [
     "%pip install csv-to-sqlite"
-   ]
+   ],
+   "id": "dfc5f012-9c47-4589-bd9e-86891c09bab8"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5d819b08",
    "metadata": {},
    "outputs": [],
    "source": [
-    "!csv-to-sqlite -f facilities.csv -t full -o db.sqlite"
-   ]
+    "!csv-to-sqlite -f hospitals_zones_joined.csv -t full -o db.sqlite"
+   ],
+   "id": "c0accaf5-3fb2-41dd-9441-5cc3e2907db2"
   },
   {
    "cell_type": "markdown",
-   "id": "5e4febc1",
    "metadata": {},
    "source": [
     "Rather than being installed on a separate server, SQLite simply stores\n",
@@ -1438,40 +1188,63 @@
     "\n",
     "In the database there can be several ‘tables’. Each table can be thought\n",
     "of as like a separate dataframe. The table name we’ve just saved is\n",
-    "‘hospitals\\_zones\\_joined’."
-   ]
+    "‘hospitals_zones_joined’."
+   ],
+   "id": "590ffdeb-6fb5-403d-8075-2c2c4ac522dd"
   },
   {
    "cell_type": "markdown",
-   "id": "27030945",
    "metadata": {},
    "source": [
-    "Accessing the SQL Database\n",
-    "--------------------------\n",
+    "## Accessing the SQL Database\n",
     "\n",
     "Now that we have a SQL database, we can create a connection to it and\n",
     "query it using SQL commands. Let’s try to simply select the data we\n",
-    "wrote to it, to make sure its the same.\n",
+    "wrote to it, to make sure it’s the same.\n",
     "\n",
     "Start by making a connection to the database. This will often be done\n",
     "via remote connections, but for this example we’ll connect locally to\n",
-    "the database using the filepath directly."
-   ]
+    "the database using the filepath directly.\n",
+    "\n",
+    "To access a data base, the first thing that is made is a connection.\n",
+    "Then SQL is used to extract the information required. A typical SQL\n",
+    "command is `SELECT`. It allows us to extract rows from a given table. It\n",
+    "operates a bit like the `.head()` method in `pandas`, it will return the\n",
+    "first `N` rows (by default the `.head()` command returns the first 5\n",
+    "rows, but you can set `N` to whatever you like. Here we’ve included a\n",
+    "default value of 5 to make it match the `pandas` command.\n",
+    "\n",
+    "We do this using an `execute` command on the connection.\n",
+    "\n",
+    "Typically, its good software engineering practice to ‘wrap’ the database\n",
+    "command in some python code. This allows the commands to be maintained.\n",
+    "You will also be asked to do this in your final assessment, including\n",
+    "re-writing some of the code - pay attention to the slight syntax\n",
+    "differences and multi-statement queries.Below we wrap the SQL command\n",
+    "\n",
+    "    SELECT * FROM table_name LIMIT N\n",
+    "\n",
+    "in python code. This SQL command selects the first `N` entries from a\n",
+    "given database called `table_name`.\n",
+    "\n",
+    "We can pass the `table_name` and number of rows, `n`, to the python\n",
+    "command."
+   ],
+   "id": "a8bfb8f0-ef6a-4560-8e85-4d4509a6dce3"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e6fd2166",
    "metadata": {},
    "outputs": [],
    "source": [
     "import sqlite3"
-   ]
+   ],
+   "id": "048ceaaa-6342-4811-834a-ebb3a8e8d6fd"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4f23c8c6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1488,21 +1261,21 @@
     "        print(e)\n",
     "\n",
     "    return conn"
-   ]
+   ],
+   "id": "0012adc1-a954-4e2d-89c5-8ca082f6f7c3"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1150e6af",
    "metadata": {},
    "outputs": [],
    "source": [
     "conn = create_connection(\"db.sqlite\")"
-   ]
+   ],
+   "id": "97234f30-a175-46e1-9af6-dbef5cf9d869"
   },
   {
    "cell_type": "markdown",
-   "id": "f0a7f5f4",
    "metadata": {},
    "source": [
     "Now that we have a connection, we can write a command and pass it to the\n",
@@ -1513,30 +1286,17 @@
     "command is `SELECT`. It allows us to extract rows from a given table. It\n",
     "operates a bit like the `.head()` method in `pandas`, it will return the\n",
     "first `N` rows (by default the `.head()` command returns the first 5\n",
-    "rows, but you can set `n` to whatever you like. Here we’ve included a\n",
+    "rows, but you can set `N` to whatever you like. Here we’ve included a\n",
     "default value of 5 to make it match the `pandas` command.\n",
     "\n",
     "The python library, `sqlite3`, allows us to access the SQL database\n",
-    "directly from python. We do this using an `execute` command on the\n",
-    "connection.\n",
-    "\n",
-    "Typically, its good software engineering practice to ‘wrap’ the database\n",
-    "command in some python code. This allows the commands to be maintained.\n",
-    "Below we wrap the SQL command\n",
-    "\n",
-    "    SELECT * FROM [table_name] LIMIT : N\n",
-    "\n",
-    "in python code. This SQL command selects the first `N` entries from a\n",
-    "given database called `table_name`.\n",
-    "\n",
-    "We can pass the `table_name` and number of rows, `N` to the python\n",
-    "command."
-   ]
+    "directly from python."
+   ],
+   "id": "eb595c74-f37f-4793-a1f3-9f02d13a2857"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5003ba78",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1548,27 +1308,27 @@
     "    :param n: Number of rows to query\n",
     "    \"\"\"\n",
     "    cur = conn.cursor()\n",
-    "    cur.execute(f\"SELECT * FROM [{table}] LIMIT :limitNum\", {\"limitNum\": n})\n",
+    "    cur.execute(f'SELECT * FROM {table} LIMIT {n}')\n",
     "\n",
     "    rows = cur.fetchall()\n",
     "    return rows"
-   ]
+   ],
+   "id": "b2d6aa3b-5666-4f9c-b899-85b941507f86"
   },
   {
    "cell_type": "markdown",
-   "id": "5970024f",
    "metadata": {},
    "source": [
     "Let’s have a go at calling the command to extract the first three\n",
     "facilities from our health center database. Let’s try creating a\n",
-    "function that does the same thing the pandas .head() method does so we\n",
+    "function that does the same thing the pandas `.head()` method does so we\n",
     "can inspect our database."
-   ]
+   ],
+   "id": "56aecafc-cffe-4348-94f4-cc8a521d207b"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bd2a75b7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1576,134 +1336,221 @@
     "  rows = select_top(conn, table, n)\n",
     "  for r in rows:\n",
     "      print(r)"
-   ]
+   ],
+   "id": "25cddf48-2cc6-4b51-ab50-08ea3ed23eeb"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a0e262b9",
    "metadata": {},
    "outputs": [],
    "source": [
-    "head(conn, 'facilities')"
-   ]
+    "head(conn, \"hospitals_zones_joined\")"
+   ],
+   "id": "5b8824aa-2e9f-4c90-8622-13377606b4f1"
   },
   {
    "cell_type": "markdown",
-   "id": "c81bef14",
    "metadata": {},
    "source": [
-    "Great! We now have the data base in SQLite, and some python functions\n",
-    "that operate on the data base by wrapping SQL commands.\n",
+    "Great! We now have the database in and some python functions that\n",
+    "operate on the data base by wrapping SQL commands.\n",
     "\n",
     "We will return to the SQL command style after download and add the other\n",
     "datasets to the database using a combination of `pandas` and the\n",
-    "`csv-to-sqlite` utility.\n",
+    "database utilities.\n",
     "\n",
     "Our next task will be to introduce data on COVID19 so that we can join\n",
     "that to our other data sets."
-   ]
+   ],
+   "id": "ad665ce5-b4ab-4087-b1f2-15c985c1dc08"
   },
   {
    "cell_type": "markdown",
-   "id": "948bca93",
    "metadata": {},
    "source": [
-    "Covid Data\n",
-    "----------\n",
+    "## Covid Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-covid-join.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigeria-nmis-covid-join.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Now we have the health data, we’re going to combine it with [data about\n",
     "COVID-19 cases in Nigeria over\n",
     "time](https://github.com/dsfsi/covid19africa). This data is kindly\n",
-    "provided by Africa open COVID-19 data working group, which Elaine\n",
-    "Nsoesie has been working with. The data is taken from Twitter, and only\n",
-    "goes up until May 2020.\n",
+    "provided by Africa open COVID-19 data working group, which [Elaine\n",
+    "Nsoesie](https://www.bu.edu/sph/profile/elaine-nsoesie/) has been\n",
+    "working with. The data is taken from Twitter, and only goes up until May\n",
+    "2020.\n",
     "\n",
-    "They provide their data in github. We can access the cases we’re\n",
+    "They provide their data in GitHub. We can access the cases we’re\n",
     "interested in from the following URL.\n",
     "\n",
+    "<https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv>\n",
+    "\n",
     "For convenience, we’ll load the data into pandas first, but our next\n",
     "step will be to create a new SQLite table containing the data. Then\n",
     "we’ll join that table to our existing tables."
-   ]
+   ],
+   "id": "3dedc866-fa97-4421-986f-7647a9b7f925"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Nigerian COVID Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-covid-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-covid-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "At the beginning of the COVID-19 outbreak, the Consortium for African\n",
+    "COVID-19 Data formed to bring together data from across the African\n",
+    "continent on COVID-19 cases (Marivate et al., 2020). These cases are\n",
+    "recorded in the following GitHub repository:\n",
+    "<https://github.com/dsfsi/covid19africa>.\n",
+    "\n",
+    "For ease of use we’ve packaged this data set in the `pods` library"
+   ],
+   "id": "3d39c6f3-e03a-492d-978c-8940f2044aa1"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pods"
+   ],
+   "id": "cfa6c63c-8495-4cfb-b0d7-979aba06c6ac"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "13ad6017",
    "metadata": {},
    "outputs": [],
    "source": [
-    "covid_data_url = 'https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv'\n",
-    "covid_data_csv = 'cases.csv'\n",
-    "urllib.request.urlretrieve(covid_data_url, covid_data_csv)\n",
-    "covid_data = pd.read_csv(covid_data_csv)"
-   ]
+    "data = pods.datasets.nigerian_covid()['Y']\n",
+    "data.head()"
+   ],
+   "id": "87984557-8ccc-4ce5-a39c-ef3378a15dcf"
   },
   {
    "cell_type": "markdown",
-   "id": "2b7ac867",
    "metadata": {},
    "source": [
-    "As normal, we should inspect our data to check that it contains what we\n",
-    "expect."
-   ]
+    "Alternatively, you can access the data directly with the following\n",
+    "commands.\n",
+    "\n",
+    "``` python\n",
+    "import urllib.request\n",
+    "import pandas as pd\n",
+    "\n",
+    "urllib.request.urlretrieve('https://raw.githubusercontent.com/dsfsi/covid19africa/master/data/line_lists/line-list-nigeria.csv', 'line-list-nigeria.csv')\n",
+    "data = pd.read_csv('line-list-nigeria.csv', parse_dates=['date', \n",
+    "                                                         'date_confirmation', \n",
+    "                                                         'date_admission_hospital', \n",
+    "                                                         'date_onset_symptoms',\n",
+    "                                                         'death_date'])\n",
+    "```\n",
+    "\n",
+    "Once it is loaded in the data can be summarized using the `describe`\n",
+    "method in pandas."
+   ],
+   "id": "8b73decb-8e66-4619-8730-6dde4e6bce88"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.describe()"
+   ],
+   "id": "9b0cb616-de45-4e43-877e-a4347dd5a04e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai\n",
+    "import mlai.plot as plot"
+   ],
+   "id": "dd83bf84-7ffd-4671-992e-6eee49a1f098"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "84982973",
    "metadata": {},
    "outputs": [],
    "source": [
-    "covid_data.head()"
-   ]
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "data['count_column'] = True\n",
+    "fig.autofmt_xdate(rotation=45)\n",
+    "ax.plot(data.date, data.count_column.cumsum())\n",
+    "\n",
+    "ax.plot()\n",
+    "ax.set_xlabel('date')\n",
+    "ax.set_ylabel('case counts')\n",
+    "\n",
+    "mlai.write_figure('nigerian-covid-data.svg', directory='./datasets')"
+   ],
+   "id": "0088efd9-02a8-4699-a6fd-05c6331eff68"
   },
   {
    "cell_type": "markdown",
-   "id": "fc698290",
    "metadata": {},
    "source": [
-    "And we can get an idea of all the information in the data from looking\n",
-    "at the columns."
-   ]
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//datasets/nigerian-covid-data.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Evolution of COVID-19 cases in Nigeria.</i>"
+   ],
+   "id": "8ef62652-45b1-458e-9884-849b171a23d3"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0ce0c5e3",
    "metadata": {},
    "outputs": [],
    "source": [
-    "covid_data.columns"
-   ]
+    "covid_data=data\n",
+    "covid_data.to_csv('cases.csv')"
+   ],
+   "id": "5bd2cb75-166e-42c6-97a2-4f43074ae2d7"
   },
   {
    "cell_type": "markdown",
-   "id": "034d4d36",
    "metadata": {},
    "source": [
     "Now we convert this CSV file we’ve downloaded into a new table in the\n",
-    "database file. We can do this, again, with the csv-to-sqlite script."
-   ]
+    "database file.\n",
+    "\n",
+    "We can do this, again, with the csv-to-sqlite script."
+   ],
+   "id": "d53aa2dc-f4fe-4077-bc2d-82dc5f1a42ac"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bf5550df",
    "metadata": {},
    "outputs": [],
    "source": [
     "!csv-to-sqlite -f cases.csv -t full -o db.sqlite"
-   ]
+   ],
+   "id": "1e0ccf80-d4cb-480d-aa63-fe1dbc6e693d"
   },
   {
    "cell_type": "markdown",
-   "id": "6ae426ec",
    "metadata": {},
    "source": [
-    "Population Data\n",
-    "---------------\n",
+    "## Population Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-population-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigerian-population-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Now we have information about COVID cases, and we have information about\n",
     "how many health centers and how many doctors and nurses there are in\n",
@@ -1713,114 +1560,141 @@
     "\n",
     "If we were running our ride hailing service, we would also need\n",
     "information about how many people there were in different areas, so we\n",
-    "could understand what the *demand* for the boda boda rides might be.\n",
+    "could understand what the demand for the boda boda rides might be.\n",
     "\n",
     "To access the number of people we can get population statistics from the\n",
     "[Humanitarian Data Exchange](https://data.humdata.org/).\n",
     "\n",
     "We also want to have population data for each state in Nigeria, so that\n",
     "we can see attributes like whether there are zones of high health\n",
-    "facility density but low population density."
-   ]
+    "facility density but low population density.\n",
+    "\n",
+    "``` python\n",
+    "import urllib\n",
+    "\n",
+    "pop_url = \"https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066/resource/d9fc551a-b5e4-4bed-9d0d-b047b6961817/download/nga_admpop_adm1_2020.csv\"\n",
+    "_, msg = urllib.request.urlretrieve(pop_url,\"nga_admpop_adm1_2020.csv\")\n",
+    "data = pd.read_csv(\"nga_admpop_adm1_2020.csv\")\n",
+    "```\n",
+    "\n",
+    "To do joins with this data, we must first make sure that the columns\n",
+    "have the right names. The name should match the same name of the column\n",
+    "in our existing data. So we reset the column names, and the name of the\n",
+    "index, as follows.\n",
+    "\n",
+    "``` python\n",
+    "data.dropna(axis=0, how=\"all\", inplace=True)\n",
+    "data.dropna(axis=1, how=\"all\", inplace=True)\n",
+    "data.rename(columns = {\"ADM0_NAME\" : \"admin0Name_en\", \n",
+    "                       \"ADM0_PCODE\" : \"admin0Pcode\", \n",
+    "                       \"ADM1_NAME\" : \"admin1Name_en\", \n",
+    "                       \"ADM1_PCODE\" : \"admin1Pcode\", \n",
+    "                       \"T_TL\" : \"population\"},\n",
+    "            inplace=True)\n",
+    "data[\"admin0Name_en\"] = data[\"admin0Name_en\"].str.title()\n",
+    "data[\"admin1Name_en\"] = data[\"admin1Name_en\"].str.title()\n",
+    "    \n",
+    "data = data.set_index(\"admin1Name_en\")\n",
+    "```"
+   ],
+   "id": "bbfd2dec-ddd4-480e-b8ce-c719c27ea6bd"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a525bf66",
    "metadata": {},
    "outputs": [],
    "source": [
-    "pop_url = 'https://data.humdata.org/dataset/a7c3de5e-ff27-4746-99cd-05f2ad9b1066/resource/d9fc551a-b5e4-4bed-9d0d-b047b6961817/download/nga_pop_adm1_2016.csv'\n",
-    "_, msg = urllib.request.urlretrieve(pop_url,'nga_pop_adm1_2016.csv')\n",
-    "pop_data = pd.read_csv('nga_pop_adm1_2016.csv')"
-   ]
+    "data = pods.datasets.nigerian_population()[\"Y\"]"
+   ],
+   "id": "242f0a36-3cf1-43bd-bfd0-466b21467721"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "14ada4a5",
    "metadata": {},
    "outputs": [],
    "source": [
-    "pop_data.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "69d3ead5",
-   "metadata": {},
-   "source": [
-    "To do joins with this data, we must first make sure that the columns\n",
-    "have the right names. The name should match the same name of the column\n",
-    "in our existing data. So we reset the column names, and the name of the\n",
-    "index, as follows."
-   ]
+    "data.head()"
+   ],
+   "id": "22e6228d-5b49-4629-8764-859a8dc96142"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7e21595f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "pop_data.columns = ['admin1Name_en', 'admin1Pcode', 'admin0Name_en', 'admin0Pcode', 'population']\n",
-    "pop_data = pop_data.set_index('admin1Name_en')"
-   ]
+    "pop_data=data"
+   ],
+   "id": "bf49baf7-c0ca-443d-ba96-0f98a143d0e3"
   },
   {
    "cell_type": "markdown",
-   "id": "b6d16737",
    "metadata": {},
    "source": [
     "When doing this for real world data, you should also make sure that the\n",
     "names used in the rows are the same across the different data bases. For\n",
     "example, has someone decided to use an abbreviation for ‘Federal Capital\n",
     "Territory’ and set it as ‘FCT’. The computer won’t understand these are\n",
-    "the same states, and if you do a join with such data you can get\n",
+    "the same states, and if you do a join with such data, you can get\n",
     "duplicate entries or missing entries. This sort of thing happens a lot\n",
     "in real world data and takes a lot of time to sort out. Fortunately, in\n",
-    "this case, the data is well curated and we don’t have these problems."
-   ]
+    "this case, the data is well curated, and we don’t have these problems."
+   ],
+   "id": "4d9cdeee-a84c-402e-91de-96fc17b1b1fd"
   },
   {
    "cell_type": "markdown",
-   "id": "e4c4c400",
    "metadata": {},
    "source": [
-    "Save to database file\n",
-    "---------------------\n",
+    "## Save to database file\n",
     "\n",
     "The next step is to add this new CSV file as an additional table in our\n",
-    "SQLite database. This is done using the script as before."
-   ]
+    "database."
+   ],
+   "id": "1f9fe873-2cb6-4394-b86c-4f42b6522b6f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the Population Data into the SQLite Database\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigerian-population-data-sqlite.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_systems/includes/nigerian-population-data-sqlite.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "We can load the data into the SQLite database using the script as\n",
+    "before."
+   ],
+   "id": "5d09def5-6eab-43a5-aa21-6e83ea395d04"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8ccd03e7",
    "metadata": {},
    "outputs": [],
    "source": [
     "pop_data.to_csv('pop_data.csv')"
-   ]
+   ],
+   "id": "2ea483f0-4f2a-44a6-bed3-dcd701e07539"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "79e6964d",
    "metadata": {},
    "outputs": [],
    "source": [
     "!csv-to-sqlite -f pop_data.csv -t full -o db.sqlite"
-   ]
+   ],
+   "id": "7e54c4dc-4699-435a-a60b-55d1bd723907"
   },
   {
    "cell_type": "markdown",
-   "id": "62f8f385",
    "metadata": {},
    "source": [
-    "Computing per capita hospitals and COVID\n",
-    "----------------------------------------\n",
+    "## Computing per capita hospitals and COVID\n",
     "\n",
     "The Minister of Health in Abuja may be interested in which states are\n",
     "most vulnerable to COVID19. We now have all the information in our SQL\n",
@@ -1828,14 +1702,20 @@
     "and what the COVID19 situation is.\n",
     "\n",
     "To do this, we will use the `JOIN` operation from SQL and introduce a\n",
-    "new operation called `GROUPBY`.\n",
-    "\n",
-    "#### Joining in Pandas\n",
+    "new operation called `GROUPBY`."
+   ],
+   "id": "43f37d96-aed7-4cac-9fd5-9cd3e04dc59b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Joining in Pandas\n",
     "\n",
     "As before, these operations can be done in pandas or GeoPandas. Before\n",
     "we create the SQL commands, we’ll show how you can do that in pandas.\n",
     "\n",
-    "In pandas, the equivalent of a database table is a dataframe. So the\n",
+    "In `pandas`, the equivalent of a database table is a dataframe. So, the\n",
     "JOIN operation takes two dataframes and joins them based on the key. The\n",
     "key is that special shared column between the two tables. The place\n",
     "where the ‘holes align’ so the two databases can be joined together.\n",
@@ -1846,31 +1726,31 @@
     "\n",
     "This is sometimes where problems can creep in. If in one table Abuja’s\n",
     "state is encoded as ‘FCT’ or ‘FCT-Abuja’, and in another table it’s\n",
-    "encoded as ‘Federal Capital Territory’, they won’t match and that data\n",
+    "encoded as ‘Federal Capital Territory’, they won’t match, and that data\n",
     "wouldn’t appear in the joined table.\n",
     "\n",
     "In simple terms, a JOIN operation takes two tables (or dataframes) and\n",
     "combines them based on some key, in this case the index of the Pandas\n",
     "data frame which is the state name."
-   ]
+   ],
+   "id": "a9dcd40c-85ac-4fb2-bffa-d2e3acd5e3e7"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "19e56178",
    "metadata": {},
    "outputs": [],
    "source": [
+    "zones_gdf.set_index(\"admin1Name_en\", inplace=True)\n",
     "pop_joined = zones_gdf.join(pop_data['population'], how='inner')"
-   ]
+   ],
+   "id": "f6895105-0655-4ceb-bb01-7f82ba44d96a"
   },
   {
    "cell_type": "markdown",
-   "id": "3137bff4",
    "metadata": {},
    "source": [
-    "GroupBy in Pandas\n",
-    "-----------------\n",
+    "## GroupBy in Pandas\n",
     "\n",
     "Our COVID19 data is in the form of individual cases. But we are\n",
     "interested in total case counts for each state. There is a special data\n",
@@ -1885,153 +1765,162 @@
     "such as to count the rows in each group, or to sum or take the mean over\n",
     "the values in some column (imagine each case row had the age of the\n",
     "patient, and you were interested in the mean age of patients.)"
-   ]
+   ],
+   "id": "c5747c36-52a8-4c3c-b212-acf10aa62ea4"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "32008ffa",
    "metadata": {},
    "outputs": [],
    "source": [
     "covid_cases_by_state = covid_data.groupby(['province/state']).count()['case_id']"
-   ]
+   ],
+   "id": "9615e6f0-033e-409f-9d56-fc9160f63b35"
   },
   {
    "cell_type": "markdown",
-   "id": "5138a50b",
    "metadata": {},
    "source": [
     "The `.groupby()` method on the dataframe has now given us a new data\n",
     "series that contains the total number of covid cases in each state. We\n",
     "can examine it to check we have something sensible."
-   ]
+   ],
+   "id": "c02f95b6-51e8-4693-8719-d25f04707b1e"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3d7f60d8",
    "metadata": {},
    "outputs": [],
    "source": [
     "covid_cases_by_state"
-   ]
+   ],
+   "id": "81afb692-4691-4b1b-8f2f-d629042dab90"
   },
   {
    "cell_type": "markdown",
-   "id": "f240cb48",
    "metadata": {},
    "source": [
-    "Now we have this new data series, it can be added to the pandas data\n",
-    "frame as a new column."
-   ]
+    "Now we have this new data series, it can be added to the pandas\n",
+    "dataframe as a new column."
+   ],
+   "id": "eb022ae7-4cda-4eae-ba5e-97103a978e01"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e703b61f",
    "metadata": {},
    "outputs": [],
    "source": [
     "pop_joined['covid_cases_by_state'] = covid_cases_by_state"
-   ]
+   ],
+   "id": "7ab1d5be-2fd7-4691-86f7-907b6292324d"
   },
   {
    "cell_type": "markdown",
-   "id": "ba33abde",
    "metadata": {},
    "source": [
     "The spatial join we did on the original data frame to obtain\n",
-    "hosp\\_state\\_joined introduced a new column, index\\_right which contains\n",
+    "hosp_state_joined introduced a new column, `index_right` that contains\n",
     "the state of each of the hospitals. Let’s have a quick look at it below."
-   ]
+   ],
+   "id": "85287173-538e-496b-83c6-95384841b59b"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1ac1c160",
    "metadata": {},
    "outputs": [],
    "source": [
     "hosp_state_joined['index_right']"
-   ]
+   ],
+   "id": "2d29904d-33f3-44f1-b57f-5786ce62a095"
   },
   {
    "cell_type": "markdown",
-   "id": "f071c427",
    "metadata": {},
    "source": [
     "To count the hospitals in each of the states, we first create a grouped\n",
     "series where we’ve grouped on these states."
-   ]
+   ],
+   "id": "1a36f96b-9dbe-4b35-9e4a-0332b25671fa"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "99a7f974",
    "metadata": {},
    "outputs": [],
    "source": [
-    "grouped = hosp_state_joined.groupby('index_right')"
-   ]
+    "grouped = hosp_state_joined.groupby('admin1Name_en')"
+   ],
+   "id": "5e0e1293-c603-4f72-97a3-210f77aa2de9"
   },
   {
    "cell_type": "markdown",
-   "id": "04d4631d",
    "metadata": {},
    "source": [
     "This python operation now goes through each of the groups and counts how\n",
     "many hospitals there are in each state. It stores the result in a\n",
-    "dictionary. If you’re new to Python, then to understand this code you\n",
+    "dictionary. If you’re new to python, then to understand this code you\n",
     "need to understand what a ‘dictionary comprehension’ is. In this case\n",
     "the dictionary comprehension is being used to create a python dictionary\n",
     "of states and total hospital counts. That’s then being converted into a\n",
     "`pandas` Data Series and added to the `pop_joined` dataframe."
-   ]
+   ],
+   "id": "310ef6bd-08a2-4156-a7ba-5af71719ca3a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ],
+   "id": "0ba8c126-8305-47e5-9e1d-22bc2f50445d"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5c50a0e7",
    "metadata": {},
    "outputs": [],
    "source": [
     "counted_groups = {k: len(v) for k, v in grouped.groups.items()}\n",
     "pop_joined['hosp_state'] = pd.Series(counted_groups)"
-   ]
+   ],
+   "id": "a50604aa-56e6-4ecf-b246-51399d4de23d"
   },
   {
    "cell_type": "markdown",
-   "id": "42f29c61",
    "metadata": {},
    "source": [
     "For convenience, we can now add a new data series to the data frame that\n",
     "contains the per capita information about hospitals. that makes it easy\n",
     "to retrieve later."
-   ]
+   ],
+   "id": "bc054bcb-2584-4f18-9285-8499c9a04779"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "baec3556",
    "metadata": {},
    "outputs": [],
    "source": [
     "pop_joined['hosp_per_capita_10k'] = (pop_joined['hosp_state'] * 10000 )/ pop_joined['population']"
-   ]
+   ],
+   "id": "dc883ad4-bd2a-404e-b8d8-a0319639f3c2"
   },
   {
    "cell_type": "markdown",
-   "id": "1d1df617",
    "metadata": {},
    "source": [
-    "SQL-style\n",
-    "---------\n",
+    "## SQL-style\n",
     "\n",
     "That’s the `pandas` approach to doing it. But `pandas` itself is\n",
-    "inspired by database language, in particular relational databases such\n",
-    "as SQL. To do these types of joins at scale, e.g. for our ride hailing\n",
-    "app, we need to see how to do these joins in a database.\n",
+    "inspired by database languages, in particular relational databases such\n",
+    "as SQL. To do these types of joins at scale, e.g., for a ride hailing\n",
+    "app, we need to do these joins in a database.\n",
     "\n",
     "As before, we’ll wrap the underlying SQL commands with a convenient\n",
     "python command.\n",
@@ -2040,13 +1929,13 @@
     "command](https://www.w3schools.com/sql/sql_select.asp), which extracts\n",
     "`FROM` a particular table. It then completes an\n",
     "[`INNER JOIN`](https://www.w3schools.com/sql/sql_join_inner.asp) using\n",
-    "particular columns (`provice/state` and `index_right`)"
-   ]
+    "particular columns (`province/state` and `admin1Name_en`)"
+   ],
+   "id": "b15ca88d-c5fe-482c-b89f-8595d6190638"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "132f2e16",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2056,159 +1945,236 @@
     "    \"\"\"\n",
     "    cur = conn.cursor()\n",
     "    cur.execute(\"\"\"\n",
-    "                SELECT ct.[province/state] as [state], ct.[case_count], ft.[facility_count]\n",
+    "                SELECT ct.`province/state` as state, ct.case_count, ft.facility_count\n",
     "                FROM\n",
-    "                    (SELECT [province/state], COUNT(*) as [case_count] FROM [cases] GROUP BY [province/state]) ct\n",
+    "                    (SELECT `province/state`, COUNT(*) as case_count FROM cases GROUP BY `province/state`) ct\n",
     "                INNER JOIN \n",
-    "                    (SELECT [index_right], COUNT(*) as [facility_count] FROM [facilities] GROUP BY [index_right]) ft\n",
+    "                    (SELECT admin1Name_en, COUNT(*) as facility_count FROM hospitals_zones_joined GROUP BY admin1Name_en) ft\n",
     "                ON\n",
-    "                    ct.[province/state] = ft.[index_right]\n",
+    "                    ct.`province/state` = ft.admin1Name_en\n",
     "                \"\"\")\n",
     "\n",
     "    rows = cur.fetchall()\n",
     "    return rows"
-   ]
+   ],
+   "id": "2d36f94b-9507-46c7-8b33-a8deb290fb6c"
   },
   {
    "cell_type": "markdown",
-   "id": "4b61896f",
    "metadata": {},
    "source": [
     "Now we’ve created our python wrapper, we can connect to the data base\n",
     "and run our SQL command on the database using the wrapper."
-   ]
+   ],
+   "id": "7d4cf587-5206-4a61-98e7-70dabc18d14d"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fe5f21cd",
    "metadata": {},
    "outputs": [],
    "source": [
     "conn = create_connection(\"db.sqlite\")"
-   ]
+   ],
+   "id": "8d4ede0f-3eb1-47bc-a1e8-70b2c0ee3b80"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5c4edd2a",
    "metadata": {},
    "outputs": [],
    "source": [
     "state_cases_hosps = join_counts(conn)"
-   ]
+   ],
+   "id": "5d9815c5-b1e7-45a8-b9f2-ea4ffa5e36d4"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1a03858b",
    "metadata": {},
    "outputs": [],
    "source": [
     "for row in state_cases_hosps:\n",
     "    print(\"State {} \\t\\t Covid Cases {} \\t\\t Health Facilities {}\".format(row[0], row[1], row[2]))"
-   ]
+   ],
+   "id": "4dd5953e-b6e2-4415-9029-72bf72e34fe7"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c421edda",
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
+    "base = nigeria_gdf.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
     "pop_joined.plot(ax=base, column='population', edgecolor='black', legend=True)\n",
     "base.set_title(\"Population of Nigerian States\")"
-   ]
+   ],
+   "id": "9c7a58f6-0bcf-46b8-8ad0-0b33ab4136eb"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "be53c352",
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
+    "base = nigeria_gdf.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
     "pop_joined.plot(ax=base, column='hosp_per_capita_10k', edgecolor='black', legend=True)\n",
     "base.set_title(\"Hospitals Per Capita (10k) of Nigerian States\")"
-   ]
+   ],
+   "id": "738d90e0-82d8-4f59-b100-ca1155349cf3"
   },
   {
    "cell_type": "markdown",
-   "id": "0ff80566",
    "metadata": {},
    "source": [
-    "::: {.cell .markdown}\n",
+    "### Exercise 1\n",
+    "\n",
+    "Add a new column the dataframe for covid cases per 10,000 population, in\n",
+    "the same way we computed health facilities per 10k capita."
+   ],
+   "id": "84052d05-e19c-4bcd-87d0-bbb64386b0f3"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write your answer to Exercise 1 here\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ],
+   "id": "700c002b-2838-4bb6-aa63-b0fb8233293a"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Exercise 2\n",
+    "\n",
+    "Add a new column for covid cases per health facility."
+   ],
+   "id": "66d4670a-9842-4f74-9753-10d9aa1b72d3"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write your answer to Exercise 2 here\n",
     "\n",
-    "Exercise\n",
-    "--------\n",
     "\n",
-    "1.  Add a new column the dataframe for covid cases per 10,000\n",
-    "    population, in the same way we computed health facilities per 10k\n",
-    "    capita.\n",
     "\n",
-    "2.  Add a new column for covid cases per health facility.\n",
+    "\n"
+   ],
+   "id": "fb3861ab-b49a-43a4-815f-9dbefcd10cf9"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Exercise 3\n",
     "\n",
     "Do this in both the SQL and the Pandas styles to get a feel for how they\n",
-    "differ.\n",
+    "differ."
+   ],
+   "id": "753812a9-c2f8-4b00-b722-1c80d3580260"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write your answer to Exercise 3 here\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ],
+   "id": "f2f258e2-c935-4e55-a122-d1e32d793f05"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Exercise 4\n",
+    "\n",
+    "Perform an inner join using SQL on your databases and convert the result\n",
+    "into a `pandas` DataFrame."
+   ],
+   "id": "2e4f36b4-7e44-4c4c-979b-22cd7390a0e5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write your answer to Exercise 4 here\n",
+    "\n",
+    "\n",
     "\n",
-    "{:::"
-   ]
+    "\n"
+   ],
+   "id": "3e279494-79f6-4269-8168-ac0d65c2f19c"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7bf114dc",
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
     "# pop_joined['cases_per_capita_10k'] = ???\n",
     "# pop_joined['cases_per_facility'] = ???"
-   ]
+   ],
+   "id": "aac06e79-50c0-4206-9815-4a8f8a96fbfb"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "cce92a95",
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
+    "base = nigeria_gdf.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
     "pop_joined.plot(ax=base, column='cases_per_capita_10k', edgecolor='black', legend=True)\n",
     "base.set_title(\"Covid Cases Per Capita (10k) of Nigerian States\")"
-   ]
+   ],
+   "id": "0f464b0f-dadd-4b2f-8dd4-7296ea437410"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "702c54c1",
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
+    "base = nigeria_gdf.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
     "pop_joined.plot(ax=base, column='covid_cases_by_state', edgecolor='black', legend=True)\n",
     "base.set_title(\"Covid Cases by State\")"
-   ]
+   ],
+   "id": "f31401f4-5bae-43a6-abff-01d98761dd4a"
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "68940fd8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = nigeria.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
+    "base = nigeria_gdf.plot(color='white', edgecolor='black', alpha=0, figsize=(11, 11))\n",
     "pop_joined.plot(ax=base, column='cases_per_facility', edgecolor='black', legend=True)\n",
     "base.set_title(\"Covid Cases per Health Facility\")"
-   ]
+   ],
+   "id": "30b4a5cc-0454-457a-b3d2-46bc623bdb36"
   },
   {
    "cell_type": "markdown",
-   "id": "31d8c1c3",
    "metadata": {},
    "source": [
-    "Thanks!\n",
-    "-------\n",
+    "## Thanks!\n",
     "\n",
     "For more information on these subjects and more you might want to check\n",
     "the following resources.\n",
@@ -2219,37 +2185,33 @@
     "    Page](http://www.theguardian.com/profile/neil-lawrence)\n",
     "-   blog:\n",
     "    [http://inverseprobability.com](http://inverseprobability.com/blog.html)"
-   ]
+   ],
+   "id": "98c2e522-fc18-48d6-bf88-04a23963d302"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## References"
+   ],
+   "id": "c75788f5-00bb-4e10-a4f4-da8d005243d4"
   },
   {
    "cell_type": "markdown",
-   "id": "aa93382e",
    "metadata": {},
    "source": [
-    "References\n",
-    "----------"
-   ]
+    "Marivate, V., Nsoesie, E., Bekele, E., Africa open COVID-19 data working\n",
+    "group, 2020. <span class=\"nocase\">Coronavirus COVID-19 (2019-nCoV) Data\n",
+    "Repository for Africa</span>. <https://doi.org/10.5281/zenodo.3757554>\n",
+    "\n",
+    "The Office of the Senior Special Assistant to the President on the\n",
+    "Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.\n",
+    "Nigeria NMIS facility database."
+   ],
+   "id": "6ae372af-4e54-4156-86bd-ea85bc5731c2"
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
-  }
- },
  "nbformat": 4,
- "nbformat_minor": 5
+ "nbformat_minor": 5,
+ "metadata": {}
 }
diff --git a/_notebooks/03-bayesian-methods-abuja.ipynb b/_notebooks/03-bayesian-methods-abuja.ipynb
index f7a2167..564f3d4 100644
--- a/_notebooks/03-bayesian-methods-abuja.ipynb
+++ b/_notebooks/03-bayesian-methods-abuja.ipynb
@@ -4,15 +4,19 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bayesian Methods\n",
-    "================\n",
+    "# Bayesian Methods\n",
     "\n",
     "### [Neil D. Lawrence](http://inverseprobability.com), Amazon Cambridge\n",
     "\n",
-    "and University of Sheffield \\#\\#\\# [Oluwasanmi\n",
-    "Koyejo](https://sanmi.cs.illinois.edu/), Google and University of\n",
-    "Illinois \\#\\#\\# 2018-11-14"
-   ]
+    "and University of Sheffield\n",
+    "\n",
+    "### [Oluwasanmi Koyejo](https://sanmi.cs.illinois.edu/), Google and\n",
+    "\n",
+    "University of Illinois\n",
+    "\n",
+    "### 2018-11-14"
+   ],
+   "id": "635dac69-8cd8-4b4b-b9ac-2e9106cd1213"
   },
   {
    "cell_type": "markdown",
@@ -24,309 +28,24 @@
     "practice to Naive Bayesian classification. In this session we review the\n",
     "probabilistic formulation of a classification model, reviewing initially\n",
     "maximum likelihood and the naive Bayes model."
-   ]
+   ],
+   "id": "efc361ac-eff9-4c78-9b90-32328f379948"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "$$\n",
-    "\\newcommand{\\tk}[1]{}\n",
-    "\\newcommand{\\Amatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\KL}[2]{\\text{KL}\\left( #1\\,\\|\\,#2 \\right)}\n",
-    "\\newcommand{\\Kaast}{\\kernelMatrix_{\\mathbf{ \\ast}\\mathbf{ \\ast}}}\n",
-    "\\newcommand{\\Kastu}{\\kernelMatrix_{\\mathbf{ \\ast} \\inducingVector}}\n",
-    "\\newcommand{\\Kff}{\\kernelMatrix_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kfu}{\\kernelMatrix_{\\mappingFunctionVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuast}{\\kernelMatrix_{\\inducingVector \\bf\\ast}}\n",
-    "\\newcommand{\\Kuf}{\\kernelMatrix_{\\inducingVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kuu}{\\kernelMatrix_{\\inducingVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuui}{\\Kuu^{-1}}\n",
-    "\\newcommand{\\Qaast}{\\mathbf{Q}_{\\bf \\ast \\ast}}\n",
-    "\\newcommand{\\Qastf}{\\mathbf{Q}_{\\ast \\mappingFunction}}\n",
-    "\\newcommand{\\Qfast}{\\mathbf{Q}_{\\mappingFunctionVector \\bf \\ast}}\n",
-    "\\newcommand{\\Qff}{\\mathbf{Q}_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\aMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\aScalar}{a}\n",
-    "\\newcommand{\\aVector}{\\mathbf{a}}\n",
-    "\\newcommand{\\acceleration}{a}\n",
-    "\\newcommand{\\bMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\bScalar}{b}\n",
-    "\\newcommand{\\bVector}{\\mathbf{b}}\n",
-    "\\newcommand{\\basisFunc}{\\phi}\n",
-    "\\newcommand{\\basisFuncVector}{\\boldsymbol{ \\basisFunc}}\n",
-    "\\newcommand{\\basisFunction}{\\phi}\n",
-    "\\newcommand{\\basisLocation}{\\mu}\n",
-    "\\newcommand{\\basisMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\basisScalar}{\\basisFunction}\n",
-    "\\newcommand{\\basisVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\activationFunction}{\\phi}\n",
-    "\\newcommand{\\activationMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\activationScalar}{\\basisFunction}\n",
-    "\\newcommand{\\activationVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\bigO}{\\mathcal{O}}\n",
-    "\\newcommand{\\binomProb}{\\pi}\n",
-    "\\newcommand{\\cMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\cbasisMatrix}{\\hat{\\boldsymbol{ \\Phi}}}\n",
-    "\\newcommand{\\cdataMatrix}{\\hat{\\dataMatrix}}\n",
-    "\\newcommand{\\cdataScalar}{\\hat{\\dataScalar}}\n",
-    "\\newcommand{\\cdataVector}{\\hat{\\dataVector}}\n",
-    "\\newcommand{\\centeredKernelMatrix}{\\mathbf{ \\MakeUppercase{\\centeredKernelScalar}}}\n",
-    "\\newcommand{\\centeredKernelScalar}{b}\n",
-    "\\newcommand{\\centeredKernelVector}{\\centeredKernelScalar}\n",
-    "\\newcommand{\\centeringMatrix}{\\mathbf{H}}\n",
-    "\\newcommand{\\chiSquaredDist}[2]{\\chi_{#1}^{2}\\left(#2\\right)}\n",
-    "\\newcommand{\\chiSquaredSamp}[1]{\\chi_{#1}^{2}}\n",
-    "\\newcommand{\\conditionalCovariance}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\coregionalizationMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\coregionalizationScalar}{b}\n",
-    "\\newcommand{\\coregionalizationVector}{\\mathbf{ \\coregionalizationScalar}}\n",
-    "\\newcommand{\\covDist}[2]{\\text{cov}_{#2}\\left(#1\\right)}\n",
-    "\\newcommand{\\covSamp}[1]{\\text{cov}\\left(#1\\right)}\n",
-    "\\newcommand{\\covarianceScalar}{c}\n",
-    "\\newcommand{\\covarianceVector}{\\mathbf{ \\covarianceScalar}}\n",
-    "\\newcommand{\\covarianceMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\covarianceMatrixTwo}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\croupierScalar}{s}\n",
-    "\\newcommand{\\croupierVector}{\\mathbf{ \\croupierScalar}}\n",
-    "\\newcommand{\\croupierMatrix}{\\mathbf{ \\MakeUppercase{\\croupierScalar}}}\n",
-    "\\newcommand{\\dataDim}{p}\n",
-    "\\newcommand{\\dataIndex}{i}\n",
-    "\\newcommand{\\dataIndexTwo}{j}\n",
-    "\\newcommand{\\dataMatrix}{\\mathbf{Y}}\n",
-    "\\newcommand{\\dataScalar}{y}\n",
-    "\\newcommand{\\dataSet}{\\mathcal{D}}\n",
-    "\\newcommand{\\dataStd}{\\sigma}\n",
-    "\\newcommand{\\dataVector}{\\mathbf{ \\dataScalar}}\n",
-    "\\newcommand{\\decayRate}{d}\n",
-    "\\newcommand{\\degreeMatrix}{\\mathbf{ \\MakeUppercase{\\degreeScalar}}}\n",
-    "\\newcommand{\\degreeScalar}{d}\n",
-    "\\newcommand{\\degreeVector}{\\mathbf{ \\degreeScalar}}\n",
-    "\\newcommand{\\diag}[1]{\\text{diag}\\left(#1\\right)}\n",
-    "\\newcommand{\\diagonalMatrix}{\\mathbf{D}}\n",
-    "\\newcommand{\\diff}[2]{\\frac{\\text{d}#1}{\\text{d}#2}}\n",
-    "\\newcommand{\\diffTwo}[2]{\\frac{\\text{d}^2#1}{\\text{d}#2^2}}\n",
-    "\\newcommand{\\displacement}{x}\n",
-    "\\newcommand{\\displacementVector}{\\textbf{\\displacement}}\n",
-    "\\newcommand{\\distanceMatrix}{\\mathbf{ \\MakeUppercase{\\distanceScalar}}}\n",
-    "\\newcommand{\\distanceScalar}{d}\n",
-    "\\newcommand{\\distanceVector}{\\mathbf{ \\distanceScalar}}\n",
-    "\\newcommand{\\eigenvaltwo}{\\ell}\n",
-    "\\newcommand{\\eigenvaltwoMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\eigenvaltwoVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\eigenvalue}{\\lambda}\n",
-    "\\newcommand{\\eigenvalueMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\eigenvalueVector}{\\boldsymbol{ \\lambda}}\n",
-    "\\newcommand{\\eigenvector}{\\mathbf{ \\eigenvectorScalar}}\n",
-    "\\newcommand{\\eigenvectorMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\eigenvectorScalar}{u}\n",
-    "\\newcommand{\\eigenvectwo}{\\mathbf{v}}\n",
-    "\\newcommand{\\eigenvectwoMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\eigenvectwoScalar}{v}\n",
-    "\\newcommand{\\entropy}[1]{\\mathcal{H}\\left(#1\\right)}\n",
-    "\\newcommand{\\errorFunction}{E}\n",
-    "\\newcommand{\\expDist}[2]{\\left<#1\\right>_{#2}}\n",
-    "\\newcommand{\\expSamp}[1]{\\left<#1\\right>}\n",
-    "\\newcommand{\\expectation}[1]{\\left\\langle #1 \\right\\rangle }\n",
-    "\\newcommand{\\expectationDist}[2]{\\left\\langle #1 \\right\\rangle _{#2}}\n",
-    "\\newcommand{\\expectedDistanceMatrix}{\\mathcal{D}}\n",
-    "\\newcommand{\\eye}{\\mathbf{I}}\n",
-    "\\newcommand{\\fantasyDim}{r}\n",
-    "\\newcommand{\\fantasyMatrix}{\\mathbf{ \\MakeUppercase{\\fantasyScalar}}}\n",
-    "\\newcommand{\\fantasyScalar}{z}\n",
-    "\\newcommand{\\fantasyVector}{\\mathbf{ \\fantasyScalar}}\n",
-    "\\newcommand{\\featureStd}{\\varsigma}\n",
-    "\\newcommand{\\gammaCdf}[3]{\\mathcal{GAMMA CDF}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaDist}[3]{\\mathcal{G}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaSamp}[2]{\\mathcal{G}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\gaussianDist}[3]{\\mathcal{N}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gaussianSamp}[2]{\\mathcal{N}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\given}{|}\n",
-    "\\newcommand{\\half}{\\frac{1}{2}}\n",
-    "\\newcommand{\\heaviside}{H}\n",
-    "\\newcommand{\\hiddenMatrix}{\\mathbf{ \\MakeUppercase{\\hiddenScalar}}}\n",
-    "\\newcommand{\\hiddenScalar}{h}\n",
-    "\\newcommand{\\hiddenVector}{\\mathbf{ \\hiddenScalar}}\n",
-    "\\newcommand{\\identityMatrix}{\\eye}\n",
-    "\\newcommand{\\inducingInputScalar}{z}\n",
-    "\\newcommand{\\inducingInputVector}{\\mathbf{ \\inducingInputScalar}}\n",
-    "\\newcommand{\\inducingInputMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\inducingScalar}{u}\n",
-    "\\newcommand{\\inducingVector}{\\mathbf{ \\inducingScalar}}\n",
-    "\\newcommand{\\inducingMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\inlineDiff}[2]{\\text{d}#1/\\text{d}#2}\n",
-    "\\newcommand{\\inputDim}{q}\n",
-    "\\newcommand{\\inputMatrix}{\\mathbf{X}}\n",
-    "\\newcommand{\\inputScalar}{x}\n",
-    "\\newcommand{\\inputSpace}{\\mathcal{X}}\n",
-    "\\newcommand{\\inputVals}{\\inputVector}\n",
-    "\\newcommand{\\inputVector}{\\mathbf{ \\inputScalar}}\n",
-    "\\newcommand{\\iterNum}{k}\n",
-    "\\newcommand{\\kernel}{\\kernelScalar}\n",
-    "\\newcommand{\\kernelMatrix}{\\mathbf{K}}\n",
-    "\\newcommand{\\kernelScalar}{k}\n",
-    "\\newcommand{\\kernelVector}{\\mathbf{ \\kernelScalar}}\n",
-    "\\newcommand{\\kff}{\\kernelScalar_{\\mappingFunction \\mappingFunction}}\n",
-    "\\newcommand{\\kfu}{\\kernelVector_{\\mappingFunction \\inducingScalar}}\n",
-    "\\newcommand{\\kuf}{\\kernelVector_{\\inducingScalar \\mappingFunction}}\n",
-    "\\newcommand{\\kuu}{\\kernelVector_{\\inducingScalar \\inducingScalar}}\n",
-    "\\newcommand{\\lagrangeMultiplier}{\\lambda}\n",
-    "\\newcommand{\\lagrangeMultiplierMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\lagrangian}{L}\n",
-    "\\newcommand{\\laplacianFactor}{\\mathbf{ \\MakeUppercase{\\laplacianFactorScalar}}}\n",
-    "\\newcommand{\\laplacianFactorScalar}{m}\n",
-    "\\newcommand{\\laplacianFactorVector}{\\mathbf{ \\laplacianFactorScalar}}\n",
-    "\\newcommand{\\laplacianMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\laplacianScalar}{\\ell}\n",
-    "\\newcommand{\\laplacianVector}{\\mathbf{ \\ell}}\n",
-    "\\newcommand{\\latentDim}{q}\n",
-    "\\newcommand{\\latentDistanceMatrix}{\\boldsymbol{ \\Delta}}\n",
-    "\\newcommand{\\latentDistanceScalar}{\\delta}\n",
-    "\\newcommand{\\latentDistanceVector}{\\boldsymbol{ \\delta}}\n",
-    "\\newcommand{\\latentForce}{f}\n",
-    "\\newcommand{\\latentFunction}{u}\n",
-    "\\newcommand{\\latentFunctionVector}{\\mathbf{ \\latentFunction}}\n",
-    "\\newcommand{\\latentFunctionMatrix}{\\mathbf{ \\MakeUppercase{\\latentFunction}}}\n",
-    "\\newcommand{\\latentIndex}{j}\n",
-    "\\newcommand{\\latentScalar}{z}\n",
-    "\\newcommand{\\latentVector}{\\mathbf{ \\latentScalar}}\n",
-    "\\newcommand{\\latentMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\learnRate}{\\eta}\n",
-    "\\newcommand{\\lengthScale}{\\ell}\n",
-    "\\newcommand{\\rbfWidth}{\\ell}\n",
-    "\\newcommand{\\likelihoodBound}{\\mathcal{L}}\n",
-    "\\newcommand{\\likelihoodFunction}{L}\n",
-    "\\newcommand{\\locationScalar}{\\mu}\n",
-    "\\newcommand{\\locationVector}{\\boldsymbol{ \\locationScalar}}\n",
-    "\\newcommand{\\locationMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\variance}[1]{\\text{var}\\left( #1 \\right)}\n",
-    "\\newcommand{\\mappingFunction}{f}\n",
-    "\\newcommand{\\mappingFunctionMatrix}{\\mathbf{F}}\n",
-    "\\newcommand{\\mappingFunctionTwo}{g}\n",
-    "\\newcommand{\\mappingFunctionTwoMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\mappingFunctionTwoVector}{\\mathbf{ \\mappingFunctionTwo}}\n",
-    "\\newcommand{\\mappingFunctionVector}{\\mathbf{ \\mappingFunction}}\n",
-    "\\newcommand{\\scaleScalar}{s}\n",
-    "\\newcommand{\\mappingScalar}{w}\n",
-    "\\newcommand{\\mappingVector}{\\mathbf{ \\mappingScalar}}\n",
-    "\\newcommand{\\mappingMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\mappingScalarTwo}{v}\n",
-    "\\newcommand{\\mappingVectorTwo}{\\mathbf{ \\mappingScalarTwo}}\n",
-    "\\newcommand{\\mappingMatrixTwo}{\\mathbf{V}}\n",
-    "\\newcommand{\\maxIters}{K}\n",
-    "\\newcommand{\\meanMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanScalar}{\\mu}\n",
-    "\\newcommand{\\meanTwoMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanTwoScalar}{m}\n",
-    "\\newcommand{\\meanTwoVector}{\\mathbf{ \\meanTwoScalar}}\n",
-    "\\newcommand{\\meanVector}{\\boldsymbol{ \\meanScalar}}\n",
-    "\\newcommand{\\mrnaConcentration}{m}\n",
-    "\\newcommand{\\naturalFrequency}{\\omega}\n",
-    "\\newcommand{\\neighborhood}[1]{\\mathcal{N}\\left( #1 \\right)}\n",
-    "\\newcommand{\\neilurl}{http://inverseprobability.com/}\n",
-    "\\newcommand{\\noiseMatrix}{\\boldsymbol{ E}}\n",
-    "\\newcommand{\\noiseScalar}{\\epsilon}\n",
-    "\\newcommand{\\noiseVector}{\\boldsymbol{ \\epsilon}}\n",
-    "\\newcommand{\\norm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\normalizedLaplacianMatrix}{\\hat{\\mathbf{L}}}\n",
-    "\\newcommand{\\normalizedLaplacianScalar}{\\hat{\\ell}}\n",
-    "\\newcommand{\\normalizedLaplacianVector}{\\hat{\\mathbf{ \\ell}}}\n",
-    "\\newcommand{\\numActive}{m}\n",
-    "\\newcommand{\\numBasisFunc}{m}\n",
-    "\\newcommand{\\numComponents}{m}\n",
-    "\\newcommand{\\numComps}{K}\n",
-    "\\newcommand{\\numData}{n}\n",
-    "\\newcommand{\\numFeatures}{K}\n",
-    "\\newcommand{\\numHidden}{h}\n",
-    "\\newcommand{\\numInducing}{m}\n",
-    "\\newcommand{\\numLayers}{\\ell}\n",
-    "\\newcommand{\\numNeighbors}{K}\n",
-    "\\newcommand{\\numSequences}{s}\n",
-    "\\newcommand{\\numSuccess}{s}\n",
-    "\\newcommand{\\numTasks}{m}\n",
-    "\\newcommand{\\numTime}{T}\n",
-    "\\newcommand{\\numTrials}{S}\n",
-    "\\newcommand{\\outputIndex}{j}\n",
-    "\\newcommand{\\paramVector}{\\boldsymbol{ \\theta}}\n",
-    "\\newcommand{\\parameterMatrix}{\\boldsymbol{ \\Theta}}\n",
-    "\\newcommand{\\parameterScalar}{\\theta}\n",
-    "\\newcommand{\\parameterVector}{\\boldsymbol{ \\parameterScalar}}\n",
-    "\\newcommand{\\partDiff}[2]{\\frac{\\partial#1}{\\partial#2}}\n",
-    "\\newcommand{\\precisionScalar}{j}\n",
-    "\\newcommand{\\precisionVector}{\\mathbf{ \\precisionScalar}}\n",
-    "\\newcommand{\\precisionMatrix}{\\mathbf{J}}\n",
-    "\\newcommand{\\pseudotargetScalar}{\\widetilde{y}}\n",
-    "\\newcommand{\\pseudotargetVector}{\\mathbf{ \\pseudotargetScalar}}\n",
-    "\\newcommand{\\pseudotargetMatrix}{\\mathbf{ \\widetilde{Y}}}\n",
-    "\\newcommand{\\rank}[1]{\\text{rank}\\left(#1\\right)}\n",
-    "\\newcommand{\\rayleighDist}[2]{\\mathcal{R}\\left(#1|#2\\right)}\n",
-    "\\newcommand{\\rayleighSamp}[1]{\\mathcal{R}\\left(#1\\right)}\n",
-    "\\newcommand{\\responsibility}{r}\n",
-    "\\newcommand{\\rotationScalar}{r}\n",
-    "\\newcommand{\\rotationVector}{\\mathbf{ \\rotationScalar}}\n",
-    "\\newcommand{\\rotationMatrix}{\\mathbf{R}}\n",
-    "\\newcommand{\\sampleCovScalar}{s}\n",
-    "\\newcommand{\\sampleCovVector}{\\mathbf{ \\sampleCovScalar}}\n",
-    "\\newcommand{\\sampleCovMatrix}{\\mathbf{s}}\n",
-    "\\newcommand{\\scalarProduct}[2]{\\left\\langle{#1},{#2}\\right\\rangle}\n",
-    "\\newcommand{\\sign}[1]{\\text{sign}\\left(#1\\right)}\n",
-    "\\newcommand{\\sigmoid}[1]{\\sigma\\left(#1\\right)}\n",
-    "\\newcommand{\\singularvalue}{\\ell}\n",
-    "\\newcommand{\\singularvalueMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\singularvalueVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\sorth}{\\mathbf{u}}\n",
-    "\\newcommand{\\spar}{\\lambda}\n",
-    "\\newcommand{\\trace}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\BasalRate}{B}\n",
-    "\\newcommand{\\DampingCoefficient}{C}\n",
-    "\\newcommand{\\DecayRate}{D}\n",
-    "\\newcommand{\\Displacement}{X}\n",
-    "\\newcommand{\\LatentForce}{F}\n",
-    "\\newcommand{\\Mass}{M}\n",
-    "\\newcommand{\\Sensitivity}{S}\n",
-    "\\newcommand{\\basalRate}{b}\n",
-    "\\newcommand{\\dampingCoefficient}{c}\n",
-    "\\newcommand{\\mass}{m}\n",
-    "\\newcommand{\\sensitivity}{s}\n",
-    "\\newcommand{\\springScalar}{\\kappa}\n",
-    "\\newcommand{\\springVector}{\\boldsymbol{ \\kappa}}\n",
-    "\\newcommand{\\springMatrix}{\\boldsymbol{ \\mathcal{K}}}\n",
-    "\\newcommand{\\tfConcentration}{p}\n",
-    "\\newcommand{\\tfDecayRate}{\\delta}\n",
-    "\\newcommand{\\tfMrnaConcentration}{f}\n",
-    "\\newcommand{\\tfVector}{\\mathbf{ \\tfConcentration}}\n",
-    "\\newcommand{\\velocity}{v}\n",
-    "\\newcommand{\\sufficientStatsScalar}{g}\n",
-    "\\newcommand{\\sufficientStatsVector}{\\mathbf{ \\sufficientStatsScalar}}\n",
-    "\\newcommand{\\sufficientStatsMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\switchScalar}{s}\n",
-    "\\newcommand{\\switchVector}{\\mathbf{ \\switchScalar}}\n",
-    "\\newcommand{\\switchMatrix}{\\mathbf{S}}\n",
-    "\\newcommand{\\tr}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\loneNorm}[1]{\\left\\Vert #1 \\right\\Vert_1}\n",
-    "\\newcommand{\\ltwoNorm}[1]{\\left\\Vert #1 \\right\\Vert_2}\n",
-    "\\newcommand{\\onenorm}[1]{\\left\\vert#1\\right\\vert_1}\n",
-    "\\newcommand{\\twonorm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\vScalar}{v}\n",
-    "\\newcommand{\\vVector}{\\mathbf{v}}\n",
-    "\\newcommand{\\vMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\varianceDist}[2]{\\text{var}_{#2}\\left( #1 \\right)}\n",
-    "\\newcommand{\\vecb}[1]{\\left(#1\\right):}\n",
-    "\\newcommand{\\weightScalar}{w}\n",
-    "\\newcommand{\\weightVector}{\\mathbf{ \\weightScalar}}\n",
-    "\\newcommand{\\weightMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\weightedAdjacencyMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\weightedAdjacencyScalar}{a}\n",
-    "\\newcommand{\\weightedAdjacencyVector}{\\mathbf{ \\weightedAdjacencyScalar}}\n",
-    "\\newcommand{\\onesVector}{\\mathbf{1}}\n",
-    "\\newcommand{\\zerosVector}{\\mathbf{0}}\n",
     "$$"
-   ]
+   ],
+   "id": "f84ad120-166e-4626-9c93-1065425036c8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "::: {.cell .markdown}\n",
+    "\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!---->\n",
@@ -336,14 +55,18 @@
     "<!--\n",
     "\n",
     "-->"
-   ]
+   ],
+   "id": "b9e351e3-271e-4bcd-9320-afe094ff1d37"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "What is Machine Learning?\n",
-    "=========================\n",
+    "# What is Machine Learning?\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "What is machine learning? At its most basic level machine learning is a\n",
     "combination of\n",
@@ -364,22 +87,22 @@
     "In practice we normally perform machine learning using two functions. To\n",
     "combine data with a model we typically make use of:\n",
     "\n",
-    "**a prediction function** a function which is used to make the\n",
-    "predictions. It includes our beliefs about the regularities of the\n",
-    "universe, our assumptions about how the world works, e.g. smoothness,\n",
-    "spatial similarities, temporal similarities.\n",
+    "**a prediction function** it is used to make the predictions. It\n",
+    "includes our beliefs about the regularities of the universe, our\n",
+    "assumptions about how the world works, e.g., smoothness, spatial\n",
+    "similarities, temporal similarities.\n",
     "\n",
-    "**an objective function** a function which defines the cost of\n",
-    "misprediction. Typically it includes knowledge about the world’s\n",
-    "generating processes (probabilistic objectives) or the costs we pay for\n",
-    "mispredictions (empiricial risk minimization).\n",
+    "**an objective function** it defines the ‘cost’ of misprediction.\n",
+    "Typically, it includes knowledge about the world’s generating processes\n",
+    "(probabilistic objectives) or the costs we pay for mispredictions\n",
+    "(empirical risk minimization).\n",
     "\n",
     "The combination of data and model through the prediction function and\n",
     "the objective function leads to a *learning algorithm*. The class of\n",
     "prediction functions and objective functions we can make use of is\n",
     "restricted by the algorithms they lead to. If the prediction function or\n",
     "the objective function are too complex, then it can be difficult to find\n",
-    "an appropriate learning algorithm. Much of the acdemic field of machine\n",
+    "an appropriate learning algorithm. Much of the academic field of machine\n",
     "learning is the quest for new learning algorithms that allow us to bring\n",
     "different types of models and data together.\n",
     "\n",
@@ -389,19 +112,25 @@
     "Example](https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf).\n",
     "\n",
     "You can also check my post blog post on [What is Machine\n",
-    "Learning?](http://inverseprobability.com/2017/07/17/what-is-machine-learning).."
-   ]
+    "Learning?](http://inverseprobability.com/2017/07/17/what-is-machine-learning)."
+   ],
+   "id": "459396c4-9250-472e-8a24-acc7b3dddca1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Nigerian NMIS Data\n",
-    "==================\n",
+    "# Nigeria NMIS Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
-    "As an example data set we will use Nigerian NMIS Health Facility data\n",
-    "from openAFRICA. It can be found here\n",
-    "<a href=\"https://africaopendata.org/dataset/nigeria-nmis-health-facility-data-2014\" class=\"uri\">https://africaopendata.org/dataset/nigeria-nmis-health-facility-data-2014</a>\n",
+    "As an example data set we will use Nigerian Millennium Development Goals\n",
+    "Information System Health Facility (The Office of the Senior Special\n",
+    "Assistant to the President on the Millennium Development Goals\n",
+    "(OSSAP-MDGs) and Columbia University, 2014). It can be found here\n",
+    "<https://energydata.info/dataset/nigeria-nmis-education-facility-data-2014>.\n",
     "\n",
     "Taking from the information on the site,\n",
     "\n",
@@ -424,21 +153,37 @@
     "> President at funlola.osinupebi@aptovp.org\n",
     ">\n",
     "> To learn more, please visit\n",
-    "> <a href=\"http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/\" class=\"uri\">http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/</a>\n",
+    "> <http://csd.columbia.edu/2014/03/10/the-nigeria-mdg-information-system-nmis-takes-open-data-further/>\n",
     ">\n",
     "> Suggested citation: Nigeria NMIS facility database (2014), the Office\n",
     "> of the Senior Special Assistant to the President on the Millennium\n",
-    "> Development Goals (OSSAP-MDGs) & Columbia University"
-   ]
+    "> Development Goals (OSSAP-MDGs) & Columbia University\n",
+    "\n",
+    "For ease of use we’ve packaged this data set in the `pods` library"
+   ],
+   "id": "ae5e993a-d5d2-4b95-9bf7-07f28ad73997"
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import urllib.request"
-   ]
+    "## pods\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "In Sheffield we created a suite of software tools for ‘Open Data\n",
+    "Science’. Open data science is an approach to sharing code, models and\n",
+    "data that should make it easier for companies, health professionals and\n",
+    "scientists to gain access to data science techniques.\n",
+    "\n",
+    "You can also check this blog post on [Open Data\n",
+    "Science](http://inverseprobability.com/2014/07/01/open-data-science).\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "98d4ce23-e3ad-4bb4-9249-4c0f06ed0dc3"
   },
   {
    "cell_type": "code",
@@ -446,8 +191,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')"
-   ]
+    "%pip install pods"
+   ],
+   "id": "7ae7c2da-669c-4b99-b067-1168b3741792"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/ods>\n",
+    "\n",
+    "Once `pods` is installed, it can be imported in the usual manner."
+   ],
+   "id": "2f9c0b88-4a83-46f4-b838-d301067ddad9"
   },
   {
    "cell_type": "code",
@@ -455,8 +213,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd"
-   ]
+    "import pods"
+   ],
+   "id": "29e12ec7-66d0-45cf-888a-acf6fff59958"
   },
   {
    "cell_type": "code",
@@ -464,16 +223,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')"
-   ]
+    "data = pods.datasets.nigeria_nmis()['Y']\n",
+    "data.head()"
+   ],
+   "id": "71d554c7-32a6-400d-a9b0-2edf5648233d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "Alternatively, you can access the data directly with the following\n",
+    "commands.\n",
+    "\n",
+    "``` python\n",
+    "import urllib.request\n",
+    "urllib.request.urlretrieve('https://energydata.info/dataset/f85d1796-e7f2-4630-be84-79420174e3bd/resource/6e640a13-cab4-457b-b9e6-0336051bac27/download/healthmopupandbaselinenmisfacility.csv', 'healthmopupandbaselinenmisfacility.csv')\n",
+    "\n",
+    "import pandas as pd\n",
+    "data = pd.read_csv('healthmopupandbaselinenmisfacility.csv')\n",
+    "```\n",
+    "\n",
     "Once it is loaded in the data can be summarized using the `describe`\n",
     "method in pandas."
-   ]
+   ],
+   "id": "1d3c1ac8-a582-40d6-ae92-d5fe54c1f84f"
   },
   {
    "cell_type": "code",
@@ -482,23 +255,45 @@
    "outputs": [],
    "source": [
     "data.describe()"
-   ]
+   ],
+   "id": "0809ee8c-cadc-41b4-8edd-8e33af59e419"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In python and jupyter notebook it is possible to see a list of all\n",
-    "possible functions and attributes by typing the name of the object\n",
-    "followed by `.<Tab>` for example in the above case if we type\n",
-    "`data.<Tab>` it show the columns available (these are attributes in\n",
-    "pandas dataframes) such as `num_nurses_fulltime`, and also functions,\n",
+    "We can also find out the dimensions of the dataset using the `shape`\n",
+    "property."
+   ],
+   "id": "c30fc32a-ea71-4a5b-acc3-591fb66f2a93"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.shape"
+   ],
+   "id": "53b20f89-a6cb-4927-b18f-cad4ad7fc62f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Dataframes have different functions that you can use to explore and\n",
+    "understand your data. In python and the Jupyter notebook it is possible\n",
+    "to see a list of all possible functions and attributes by typing the\n",
+    "name of the object followed by `.<Tab>` for example in the above case if\n",
+    "we type `data.<Tab>` it show the columns available (these are attributes\n",
+    "in pandas dataframes) such as `num_nurses_fulltime`, and also functions,\n",
     "such as `.describe()`.\n",
     "\n",
     "For functions we can also see the documentation about the function by\n",
     "following the name with a question mark. This will open a box with\n",
     "documentation at the bottom which can be closed with the x button."
-   ]
+   ],
+   "id": "947507dc-d0ec-449f-8dda-97a1e98aa645"
   },
   {
    "cell_type": "code",
@@ -507,12 +302,97 @@
    "outputs": [],
    "source": [
     "data.describe?"
-   ]
+   ],
+   "id": "47e49c2e-8caf-46e0-b585-375ab38c7f06"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import mlai\n",
+    "import mlai.plot as plot"
+   ],
+   "id": "3e958953-7b25-41c6-b2d8-a816a5199f41"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_figsize)\n",
+    "ax.plot(data.longitude, data.latitude, 'ro', alpha=0.01)\n",
+    "ax.set_xlabel('longitude')\n",
+    "ax.set_ylabel('latitude')\n",
+    "\n",
+    "mlai.write_figure('nigerian-health-facilities.png', directory='./ml')"
+   ],
+   "id": "d4c02825-863b-4fd3-91b0-a99787810204"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png\" style=\"width:60%\">\n",
+    "\n",
+    "Figure: <i>Location of the over thirty-four thousand health facilities\n",
+    "registered in the NMIS data across Nigeria. Each facility plotted\n",
+    "according to its latitude and longitude.</i>"
+   ],
+   "id": "18f2e7d0-dbde-496b-bf88-f8d25b0576be"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# Probabilities\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "We are now going to do some simple review of probabilities and use this\n",
+    "review to explore some aspects of our data.\n",
+    "\n",
+    "A probability distribution expresses uncertainty about the outcome of an\n",
+    "event. We often encode this uncertainty in a variable. So if we are\n",
+    "considering the outcome of an event, $Y$, to be a coin toss, then we\n",
+    "might consider $Y=1$ to be heads and $Y=0$ to be tails. We represent the\n",
+    "probability of a given outcome with the notation: $$\n",
+    "P(Y=1) = 0.5\n",
+    "$$ The first rule of probability is that the probability must normalize.\n",
+    "The sum of the probability of all events must equal 1. So if the\n",
+    "probability of heads ($Y=1$) is 0.5, then the probability of tails (the\n",
+    "only other possible outcome) is given by $$\n",
+    "P(Y=0) = 1-P(Y=1) = 0.5\n",
+    "$$\n",
+    "\n",
+    "Probabilities are often defined as the limit of the ratio between the\n",
+    "number of positive outcomes (e.g. *heads*) given the number of trials.\n",
+    "If the number of positive outcomes for event $y$ is denoted by $n$ and\n",
+    "the number of trials is denoted by $N$ then this gives the ratio $$\n",
+    "P(Y=y) = \\lim_{N\\rightarrow\n",
+    "\\infty}\\frac{n_y}{N}.\n",
+    "$$ In practice we never get to observe an event infinite times, so\n",
+    "rather than considering this we often use the following estimate $$\n",
+    "P(Y=y) \\approx \\frac{n_y}{N}.\n",
+    "$$"
+   ],
+   "id": "0aaa36b6-cd8d-4e7b-acf3-6e2452308cca"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Exploring the NMIS Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-explore.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-explore.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
     "The NMIS facility data is stored in an object known as a ‘data frame’.\n",
     "Data frames come from the statistical family of programming languages\n",
     "based on `S`, the most widely used of which is\n",
@@ -521,7 +401,8 @@
     "method summarizes which columns there are in the data frame and gives us\n",
     "counts, means, standard deviations and percentiles for the values in\n",
     "those columns. To access a column directly we can write"
-   ]
+   ],
+   "id": "aa518683-d702-4507-b9e1-039e7163c253"
   },
   {
    "cell_type": "code",
@@ -531,7 +412,8 @@
    "source": [
     "print(data['num_doctors_fulltime'])\n",
     "#print(data['num_nurses_fulltime'])"
-   ]
+   ],
+   "id": "c1e360ea-6f5d-4408-8cf3-e0521b85f605"
   },
   {
    "cell_type": "markdown",
@@ -540,7 +422,8 @@
     "This shows the number of doctors per facility, number of nurses and\n",
     "number of community health workers (CHEWS). We can plot the number of\n",
     "doctors against the number of nurses as follows."
-   ]
+   ],
+   "id": "40882eac-844d-4089-9c1d-573af8be83d6"
   },
   {
    "cell_type": "code",
@@ -548,10 +431,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# this ensures the plot appears in the web browser\n",
-    "%matplotlib inline \n",
     "import matplotlib.pyplot as plt # this imports the plotting library in python"
-   ]
+   ],
+   "id": "a8ae57eb-776a-4c6c-8546-41a234a305e8"
   },
   {
    "cell_type": "code",
@@ -560,7 +442,8 @@
    "outputs": [],
    "source": [
     "_ = plt.plot(data['num_doctors_fulltime'], data['num_nurses_fulltime'], 'rx')"
-   ]
+   ],
+   "id": "fcf23955-2c14-4f76-b17d-ac27901eef11"
   },
   {
    "cell_type": "markdown",
@@ -568,7 +451,8 @@
    "source": [
     "You may be curious what the arguments we give to `plt.plot` are for, now\n",
     "is the perfect time to look at the documentation"
-   ]
+   ],
+   "id": "b221a310-5099-480e-9113-083f5fee877e"
   },
   {
    "cell_type": "code",
@@ -577,7 +461,8 @@
    "outputs": [],
    "source": [
     "plt.plot?"
-   ]
+   ],
+   "id": "2ff0eda9-bc9f-405c-ba25-f5929df11ef3"
   },
   {
    "cell_type": "markdown",
@@ -586,7 +471,8 @@
     "We immediately note that some facilities have a lot of nurses, which\n",
     "prevent’s us seeing the detail of the main number of facilities. First\n",
     "lets identify the facilities with the most nurses."
-   ]
+   ],
+   "id": "745b51a5-de56-4a0f-895f-e0ebb34631b4"
   },
   {
    "cell_type": "code",
@@ -595,7 +481,8 @@
    "outputs": [],
    "source": [
     "data[data['num_nurses_fulltime']>100]"
-   ]
+   ],
+   "id": "de2047f4-c32a-40ce-9fbc-af8a60201ea8"
   },
   {
    "cell_type": "markdown",
@@ -610,7 +497,8 @@
     "`True`. We can also sort the result. To sort the result by the values in\n",
     "the `num_nurses_fulltime` column in *descending* order we use the\n",
     "following command."
-   ]
+   ],
+   "id": "ff714701-cde0-44b6-a5fa-90a49ba974f8"
   },
   {
    "cell_type": "code",
@@ -619,7 +507,8 @@
    "outputs": [],
    "source": [
     "data[data['num_nurses_fulltime']>100].sort_values(by='num_nurses_fulltime', ascending=False)"
-   ]
+   ],
+   "id": "075d06d4-2c24-4f5f-8c91-3c44b0a95094"
   },
   {
    "cell_type": "markdown",
@@ -628,15 +517,16 @@
     "We now see that the ‘University of Calabar Teaching Hospital’ is a large\n",
     "outlier with 513 nurses. We can try and determine how much of an outlier\n",
     "by histograming the data."
-   ]
+   ],
+   "id": "504c8a04-282a-4455-b0f3-f254daf34f96"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Plotting the Data\n",
-    "-----------------"
-   ]
+    "## Plotting the Data"
+   ],
+   "id": "5ecf51b2-b1b2-44a9-ad2b-21158445cde3"
   },
   {
    "cell_type": "code",
@@ -646,7 +536,8 @@
    "source": [
     "data['num_nurses_fulltime'].hist(bins=20) # histogram the data with 20 bins.\n",
     "plt.title('Histogram of Number of Nurses')"
-   ]
+   ],
+   "id": "cebea1df-6ee9-4514-9fdf-37e656ce5650"
   },
   {
    "cell_type": "markdown",
@@ -656,7 +547,8 @@
     "facilities with zero or one nurse that we don’t see the histogram for\n",
     "hospitals with many nurses. We can try more bins and using a *log* scale\n",
     "on the $y$-axis."
-   ]
+   ],
+   "id": "caf6f225-949f-417e-ae79-7e74a17eee38"
   },
   {
    "cell_type": "code",
@@ -668,32 +560,17 @@
     "plt.title('Histogram of Number of Nurses')\n",
     "ax = plt.gca()\n",
     "ax.set_yscale('log')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Exercise 1\n",
-    "\n",
-    "Read on the internet about the following python libraries: `numpy`,\n",
-    "`matplotlib`, `scipy` and `pandas`. What functionality does each provide\n",
-    "python?"
-   ]
+   ],
+   "id": "6d2e6f5e-b5b3-4973-8286-5fc2a20d3ca7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "::: {.cell .markdown}\n",
-    "\n",
-    "### Exercise 1 Answer\n",
-    "\n",
-    "Write your answer to Exercise 1 here\n",
-    "\n",
     "Let’s try and see how the number of nurses relates to the number of\n",
     "doctors."
-   ]
+   ],
+   "id": "bab7ce3e-2b80-48cc-b85d-fd0491d1d429"
   },
   {
    "cell_type": "code",
@@ -709,7 +586,8 @@
     "plt.title('Number of Nurses against Number of Doctors')\n",
     "plt.ylabel('number of nurses')\n",
     "plt.xlabel('number of doctors')"
-   ]
+   ],
+   "id": "3c974e61-adaf-4e12-9e47-384957a6c469"
   },
   {
    "cell_type": "markdown",
@@ -730,53 +608,23 @@
     "console window. We can move up and down the notebook and run each part\n",
     "in a different order. The *state* of the program is always as we left it\n",
     "after running the previous part."
-   ]
+   ],
+   "id": "a6ae1605-035b-44fb-b439-bcf63d0275f3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Probabilities\n",
-    "=============\n",
-    "\n",
-    "We are now going to do some simple review of probabilities and use this\n",
-    "review to explore some aspects of our data.\n",
+    "## Probability and the NMIS Data\n",
     "\n",
-    "A probability distribution expresses uncertainty about the outcome of an\n",
-    "event. We often encode this uncertainty in a variable. So if we are\n",
-    "considering the outcome of an event, $Y$, to be a coin toss, then we\n",
-    "might consider $Y=1$ to be heads and $Y=0$ to be tails. We represent the\n",
-    "probability of a given outcome with the notation: $$\n",
-    "P(Y=1) = 0.5\n",
-    "$$ The first rule of probability is that the probability must normalize.\n",
-    "The sum of the probability of all events must equal 1. So if the\n",
-    "probability of heads ($Y=1$) is 0.5, then the probability of tails (the\n",
-    "only other possible outcome) is given by $$\n",
-    "P(Y=0) = 1-P(Y=1) = 0.5\n",
-    "$$\n",
-    "\n",
-    "Probabilities are often defined as the limit of the ratio between the\n",
-    "number of positive outcomes (e.g. *heads*) given the number of trials.\n",
-    "If the number of positive outcomes for event $y$ is denoted by $n$ and\n",
-    "the number of trials is denoted by $N$ then this gives the ratio $$\n",
-    "P(Y=y) = \\lim_{N\\rightarrow\n",
-    "\\infty}\\frac{n_y}{N}.\n",
-    "$$ In practice we never get to observe an event infinite times, so\n",
-    "rather than considering this we often use the following estimate $$\n",
-    "P(Y=y) \\approx \\frac{n_y}{N}.\n",
-    "$$"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Probability and the NMIS Data\n",
-    "-----------------------------\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-probability.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-probability.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Let’s use the sum rule to compute the estimate the probability that a\n",
     "facility has more than two nurses."
-   ]
+   ],
+   "id": "e4c72e3a-ea67-4372-a779-9ac538ac98c2"
   },
   {
    "cell_type": "code",
@@ -789,14 +637,14 @@
     "\n",
     "prob_large = float(large)/float(total_facilities)\n",
     "print(\"Probability of number of nurses being greather than 2 is:\", prob_large)"
-   ]
+   ],
+   "id": "3c8f753c-e0a7-4e91-9e2a-e0e78b7da9c5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Conditioning\n",
-    "============\n",
+    "# Conditioning\n",
     "\n",
     "When predicting whether a coin turns up head or tails, we might think\n",
     "that this event is *independent* of the year or time of day. If we\n",
@@ -811,7 +659,8 @@
     "the number of doctors. For this we can try estimating $P(Y>2 | X>1)$ and\n",
     "compare the result, for example to $P(Y>2|X\\leq 1)$ using our empirical\n",
     "estimate of the probability."
-   ]
+   ],
+   "id": "2d56ae4b-dc02-4ee3-ad4e-361777cacc0b"
   },
   {
    "cell_type": "code",
@@ -823,17 +672,19 @@
     "total_large_doctors = (data.num_doctors_fulltime>1).sum()\n",
     "prob_both_large = large/total_large_doctors\n",
     "print(\"Probability of number of nurses being greater than 2 given number of doctors is greater than 1 is:\", prob_both_large)"
-   ]
+   ],
+   "id": "eaba9bfe-f9b7-4983-b77d-c111b0846dba"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 2\n",
+    "### Exercise 1\n",
     "\n",
     "Write code that prints out the probability of nurses being greater than\n",
     "2 for different numbers of doctors."
-   ]
+   ],
+   "id": "9026afa3-370d-4a4e-9f0c-cc2aabf60bdb"
   },
   {
    "cell_type": "code",
@@ -841,10 +692,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Write your answer to Exercise 2 here\n",
+    "# Write your answer to Exercise 1 here\n",
+    "\n",
+    "\n",
     "\n",
     "\n"
-   ]
+   ],
+   "id": "0824097f-1494-49e0-92eb-f73a21f5d6da"
   },
   {
    "cell_type": "markdown",
@@ -865,33 +719,20 @@
     "The different basic probability distributions.\n",
     "\n",
     "</center>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import teaching_plots as plot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot.prob_diagram(diagrams='./mlai')"
-   ]
+   ],
+   "id": "69d6f854-b47c-4f3a-8ad1-04d55d95de2f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A Pictorial Definition of Probability\n",
-    "-------------------------------------"
-   ]
+    "## A Pictorial Definition of Probability\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "dbed4771-9566-45b8-b8a1-927315ccf714"
   },
   {
    "cell_type": "code",
@@ -899,8 +740,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "from mlai import plot"
+   ],
+   "id": "c9308c64-60c3-4e4f-acdb-e9a392eeef09"
   },
   {
    "cell_type": "code",
@@ -908,43 +750,43 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot.prob_diagram(diagrams='../slides/diagrams')"
-   ]
+    "plot.prob_diagram(diagrams='./mlai')"
+   ],
+   "id": "d318ed86-2bbb-4adb-9125-9ecda8dd0ba5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/mlai/prob_diagram.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//mlai/prob_diagram.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Diagram representing the different probabilities, joint,\n",
     "marginal and conditional. This diagram was inspired by lectures given by\n",
     "Christopher Bishop.</i>\n",
     "\n",
-    "<span style=\"text-align:right\">Inspired by lectures from Christopher\n",
-    "Bishop</span>"
-   ]
+    "Inspired by lectures from Christopher Bishop"
+   ],
+   "id": "2750d35e-29a3-4a08-88ec-7e7da2b8150c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Definition of probability distributions\n",
-    "---------------------------------------\n",
+    "## Definition of probability distributions\n",
     "\n",
     "| Terminology             | Definition                                             | Probability Notation         |\n",
-    "|:------------------------|:-------------------------------------------------------|:-----------------------------|\n",
+    "|:----------|:-------------------------------------|:---------------------|\n",
     "| Joint Probability       | $\\lim_{N\\rightarrow\\infty}\\frac{n_{X=3,Y=4}}{N}$       | $P\\left(X=3,Y=4\\right)$      |\n",
     "| Marginal Probability    | $\\lim_{N\\rightarrow\\infty}\\frac{n_{X=5}}{N}$           | $P\\left(X=5\\right)$          |\n",
     "| Conditional Probability | $\\lim_{N\\rightarrow\\infty}\\frac{n_{X=3,Y=4}}{n_{Y=4}}$ | $P\\left(X=3\\vert Y=4\\right)$ |"
-   ]
+   ],
+   "id": "2ea4f118-43e7-4658-a5de-953e45d17956"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Notational Details\n",
-    "------------------\n",
+    "## Notational Details\n",
     "\n",
     "Typically we should write out $P\\left(X=x,Y=y\\right)$, but in practice\n",
     "we often shorten this to $P\\left(x,y\\right)$. This looks very much like\n",
@@ -969,7 +811,8 @@
     "simultaneous questions, what’s the probability that the number of nurses\n",
     "was over 2 and the number of doctors was 1? Or any other question that\n",
     "may occur to us. Again we can easily use pandas to ask such questions."
-   ]
+   ],
+   "id": "0f515cae-6851-466c-915a-09d273d36126"
   },
   {
    "cell_type": "code",
@@ -982,14 +825,14 @@
     "total_facilities = data.num_nurses_fulltime.count() # this is total number of films\n",
     "prob_large = float(large)/float(total_facilities)\n",
     "print(\"Probability of nurses being greater than 2 and number of doctors being\", num_doctors, \"is:\", prob_large)"
-   ]
+   ],
+   "id": "88b63db9-445f-48cc-879d-58a54c08bbf6"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The Product Rule\n",
-    "----------------\n",
+    "## The Product Rule\n",
     "\n",
     "This number is the joint probability, $P(Y, X)$ which is much *smaller*\n",
     "than the conditional probability. The number can never be bigger than\n",
@@ -1010,7 +853,8 @@
     "p(y, x) = p(y|x)p(x)\n",
     "$$ We can see the relation working in practice for our data above by\n",
     "computing the different values for $x=1$."
-   ]
+   ],
+   "id": "22a399d6-b7b1-46eb-828e-b347139a1a20"
   },
   {
    "cell_type": "code",
@@ -1020,21 +864,21 @@
    "source": [
     "num_doctors=1\n",
     "num_nurses=2\n",
-    "p_x = float((data.num_doctors_fulltime==num_doctors).sum())/float(data.num_nurses_fulltime.count())\n",
+    "p_x = float((data.num_doctors_fulltime==num_doctors).sum())/float(data.num_doctors_fulltime.count())\n",
     "p_y_given_x = float((data.num_nurses_fulltime[data.num_doctors_fulltime==num_doctors]>num_nurses).sum())/float((data.num_doctors_fulltime==num_doctors).sum())\n",
     "p_y_and_x = float((data.num_nurses_fulltime[data.num_doctors_fulltime==num_doctors]>num_nurses).sum())/float(data.num_nurses_fulltime.count())\n",
     "\n",
     "print(\"P(x) is\", p_x)\n",
     "print(\"P(y|x) is\", p_y_given_x)\n",
     "print(\"P(y,x) is\", p_y_and_x)"
-   ]
+   ],
+   "id": "fe063a3b-a9f0-4f5b-bf66-1c7f53157519"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The Sum Rule\n",
-    "------------\n",
+    "## The Sum Rule\n",
     "\n",
     "The other *fundamental rule* of probability is the *sum rule* this tells\n",
     "us how to get a *marginal* distribution from the joint distribution.\n",
@@ -1044,17 +888,19 @@
     "$$ Or in our shortened notation $$\n",
     "P(y) = \\sum_{x} P(y, x)\n",
     "$$"
-   ]
+   ],
+   "id": "e726a451-d109-48cb-93df-908457d6d064"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 3\n",
+    "### Exercise 2\n",
     "\n",
     "Write code that computes $P(y)$ by adding $P(y, x)$ for all values of\n",
     "$x$."
-   ]
+   ],
+   "id": "8e5d1449-7680-42b8-ab60-85e420595755"
   },
   {
    "cell_type": "code",
@@ -1062,17 +908,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Write your answer to Exercise 3 here\n",
+    "# Write your answer to Exercise 2 here\n",
+    "\n",
+    "\n",
     "\n",
     "\n"
-   ]
+   ],
+   "id": "0a08847f-3284-4f8f-8354-202b6b06094a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bayes’ Rule\n",
-    "-----------\n",
+    "## Bayes’ Rule\n",
     "\n",
     "Bayes’ rule is a very simple rule, it’s hardly worth the name of a rule\n",
     "at all. It follows directly from the product rule of probability.\n",
@@ -1087,39 +935,44 @@
     "Each of these probability distributions represents the answer to a\n",
     "question we have about the world. Bayes rule (via the product rule)\n",
     "tells us how to *invert* the probability."
-   ]
+   ],
+   "id": "07b6d44d-2351-4fc4-96d6-d264893d48f8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Further Reading\n",
-    "---------------\n",
+    "## Further Reading\n",
     "\n",
     "-   Probability distributions: page 12–17 (Section 1.2) of Bishop (2006)"
-   ]
+   ],
+   "id": "b92021d4-ac11-4c5a-9dee-44d42be89a08"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Exercises\n",
-    "---------\n",
+    "## Exercises\n",
     "\n",
     "-   Exercise 1.3 of Bishop (2006)"
-   ]
+   ],
+   "id": "80aa9579-3db6-4931-9157-3bbcd4294c1e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Probabilities for Extracting Information from Data\n",
-    "--------------------------------------------------\n",
+    "## Probabilities for Extracting Information from Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probability-review.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "What use is all this probability in data science? Let’s think about how\n",
     "we might use the probabilities to do some decision making. Let’s look at\n",
     "the information data."
-   ]
+   ],
+   "id": "41cbc108-a169-4033-84bc-deebff83dd82"
   },
   {
    "cell_type": "code",
@@ -1128,13 +981,14 @@
    "outputs": [],
    "source": [
     "data.columns"
-   ]
+   ],
+   "id": "39fd24a9-7611-41aa-b277-c5f4c73cab99"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 1\n",
+    "### Exercise 3\n",
     "\n",
     "Now we see we have several additional features. Let’s assume we want to\n",
     "predict `maternal_health_delivery_services`. How would we go about doing\n",
@@ -1146,25 +1000,28 @@
     "Should you be using a joint or a conditional distribution? If it’s\n",
     "conditional, what should the distribution be over, and what should it be\n",
     "conditioned on?"
-   ]
+   ],
+   "id": "dd6e3dab-4553-422e-8648-dc73a3b029ac"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "::: {.cell .markdown}\n",
+    "### Exercise 3 Answer\n",
     "\n",
-    "### Exercise 1 Answer\n",
-    "\n",
-    "Write your answer to Exercise 1 here"
-   ]
+    "Write your answer to Exercise 3 here"
+   ],
+   "id": "e2b2df91-e6ab-4797-8c9c-da769307e9ff"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Probabilistic Modelling\n",
-    "-----------------------\n",
+    "## Probabilistic Modelling\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/probabilistic-modelling.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/probabilistic-modelling.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "This Bayesian approach is designed to deal with uncertainty arising from\n",
     "fitting our prediction function to the data we have, a reduced data set.\n",
@@ -1220,14 +1077,18 @@
     "p(\\mathbf{ y}_*|\\mathbf{ y}) = \\int p(\\mathbf{ y}_*|\\mathbf{X}_*, \\boldsymbol{ \\theta}) p(\\boldsymbol{ \\theta}| \\mathbf{ y}, \\mathbf{X}) p(\\mathbf{X}) p(\\mathbf{X}_*) \\text{d} \\boldsymbol{ \\theta}\\text{d} \\mathbf{X}\\text{d}\\mathbf{X}_*\n",
     "$$ and we have *unsupervised learning* (from where we can get deep\n",
     "generative models)."
-   ]
+   ],
+   "id": "d66df8c0-1679-44b6-9af4-152527c046b1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Graphical Models\n",
-    "----------------\n",
+    "## Graphical Models\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/graphical-models.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/graphical-models.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "One way of representing a joint distribution is to consider conditional\n",
     "dependencies between data. Conditional dependencies allow us to\n",
@@ -1236,7 +1097,8 @@
     "conditional relationships between points that are neighboring, often in\n",
     "time or space. It can be decomposed in the following form.\n",
     "$$p(\\mathbf{ y}) = p(y_n| y_{n-1}) p(y_{n-1}|y_{n-2}) \\dots p(y_{2} | y_{1})$$"
-   ]
+   ],
+   "id": "b9f736a6-ce5f-4b08-93f4-027c0e76fff4"
   },
   {
    "cell_type": "code",
@@ -1249,7 +1111,8 @@
     "\n",
     "rc(\"font\", **{'family':'sans-serif','sans-serif':['Helvetica']}, size=30)\n",
     "rc(\"text\", usetex=True)"
-   ]
+   ],
+   "id": "786061bf-2ca6-4778-96fb-7bbebbbed1b9"
   },
   {
    "cell_type": "code",
@@ -1272,13 +1135,14 @@
     "pgm.add_edge(\"y_2\", \"y_3\")\n",
     "\n",
     "pgm.render().figure.savefig(\"./ml/markov.svg\", transparent=True)"
-   ]
+   ],
+   "id": "bed4a782-61f5-4e31-a89c-cd1d0db61cd5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/markov.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/markov.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A Markov chain is a simple form of probabilistic graphical\n",
     "model providing a particular decomposition of the joint density.</i>\n",
@@ -1298,7 +1162,7 @@
     "of *C Difficile* infection following colon surgery (Steele et al.,\n",
     "2012).\n",
     "\n",
-    "<img class=\"negate\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/bayes-net-diagnosis.png\" style=\"width:60%\">\n",
+    "<img class=\"negate\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//bayes-net-diagnosis.png\" style=\"width:60%\">\n",
     "\n",
     "Figure: <i>A probabilistic directed graph used to predict the\n",
     "perioperative risk of *C Difficile* infection following colon surgery.\n",
@@ -1308,14 +1172,18 @@
     "\n",
     "To capture the complexity in the interelationship between the data, the\n",
     "graph itself becomes more complex, and less interpretable."
-   ]
+   ],
+   "id": "2ec95e98-fdb6-403e-b16c-dc42c965dea9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Introduction to Classification\n",
-    "------------------------------\n",
+    "## Introduction to Classification\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Classification is perhaps the technique most closely assocated with\n",
     "machine learning. In the speech based agents, on-device classifiers are\n",
@@ -1365,14 +1233,18 @@
     "relevant in the prediction, (2) defining the appropriate *class of\n",
     "function*, $f(\\cdot)$, to use and (3) selecting the right parameters,\n",
     "$\\mathbf{ w}$."
-   ]
+   ],
+   "id": "bba2ea18-c655-4ac0-b9de-3c6bad0fd322"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Classification Examples\n",
-    "-----------------------\n",
+    "## Classification Examples\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/classification-examples.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "-   Classifiying hand written digits from binary images (automatic zip\n",
     "    code reading)\n",
@@ -1381,14 +1253,18 @@
     "-   Classifying type of cancer given gene expression data.\n",
     "-   Categorization of document types (different types of news article on\n",
     "    the internet)"
-   ]
+   ],
+   "id": "bda292e2-0ffc-4184-a7fe-9af7759b03f8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bernoulli Distribution\n",
-    "----------------------\n",
+    "## Bernoulli Distribution\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-distribution.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-distribution.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Our focus has been on models where the objective function is inspired by\n",
     "a probabilistic analysis of the problem. In particular we’ve argued that\n",
@@ -1404,7 +1280,7 @@
     "$\\pi$ to be a variable) then we can specify the probability distribution\n",
     "through a table.\n",
     "\n",
-    "|   $y$  |     0     |   1   |\n",
+    "|  $y$   |     0     |   1   |\n",
     "|:------:|:---------:|:-----:|\n",
     "| $P(y)$ | $(1-\\pi)$ | $\\pi$ |\n",
     "\n",
@@ -1438,7 +1314,8 @@
     "where he considers Pascal’s triangle in forming combinations of the\n",
     "Bernoulli distribution to realise the binomial distribution for the\n",
     "outcome of positive trials."
-   ]
+   ],
+   "id": "bafe09f0-bc3e-42dc-a20b-993d005cc10f"
   },
   {
    "cell_type": "code",
@@ -1446,9 +1323,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
-    "pods.notebook.display_google_book(id='CF4UAAAAQAAJ', page='PA87')"
-   ]
+    "import notutils as nu\n",
+    "nu.display_google_book(id='CF4UAAAAQAAJ', page='PA87')"
+   ],
+   "id": "2ebf86b9-3995-4f57-9cca-5fe0c2ba92b7"
   },
   {
    "cell_type": "code",
@@ -1457,8 +1335,9 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "af1b17b2-6be7-48bd-93b7-04681fe7638f"
   },
   {
    "cell_type": "code",
@@ -1468,13 +1347,14 @@
    "source": [
     "fig, ax = plt.subplots(figsize=plot.one_figsize)\n",
     "plot.bernoulli_urn(ax, diagrams='./ml/')"
-   ]
+   ],
+   "id": "306a4bb4-7315-4067-9f1c-6f9d151608d1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bernoulli-urn.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bernoulli-urn.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Jacob Bernoulli described the Bernoulli distribution through\n",
     "an urn in which there are black and red balls.</i>\n",
@@ -1496,7 +1376,8 @@
     "\n",
     "For this reason in Bayes’s distribution there is considered to be\n",
     "*aleatoric* uncertainty about the distribution parameter."
-   ]
+   ],
+   "id": "9287a5c6-1979-4bd9-bba2-51f40c72e6b4"
   },
   {
    "cell_type": "code",
@@ -1505,8 +1386,9 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "a8a061dd-c360-4da0-bbca-2fbc8c33f617"
   },
   {
    "cell_type": "code",
@@ -1516,13 +1398,14 @@
    "source": [
     "fig, ax = plt.subplots(figsize=plot.one_figsize)\n",
     "plot.bayes_billiard(ax, diagrams='./ml/')"
-   ]
+   ],
+   "id": "674ab06c-b1d2-4cfc-810a-ecd431644803"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/bayes-billiard009.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard009.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Thomas Bayes described the Bernoulli distribution\n",
     "independently of Jacob Bernoulli. He used the analogy of a billiard\n",
@@ -1532,7 +1415,8 @@
     "ball (in the figure) gives the outcome as either left or right (relative\n",
     "to the first ball). This is the origin of the term Bayesian because the\n",
     "parameter of the distribution is drawn from a probsbility.</i>"
-   ]
+   ],
+   "id": "64663c60-a90c-483f-8421-5d526371b031"
   },
   {
    "cell_type": "code",
@@ -1540,9 +1424,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "56c8516a-7317-4658-847f-17ab6f533390"
   },
   {
    "cell_type": "code",
@@ -1550,17 +1435,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('bayes-billiard{counter:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "f32fa897-fddc-43b4-b205-a70a76a0055b"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('bayes-billiard{counter:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            counter=IntSlider(0,0,9,1))"
-   ]
+   ],
+   "id": "50d84159-84c0-4c23-a2d4-dde76da3f41d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Maximum Likelihood in the Bernoulli\n",
-    "-----------------------------------\n",
+    "## Maximum Likelihood in the Bernoulli\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-maximum-likelihood.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/bernoulli-maximum-likelihood.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Maximum likelihood in the Bernoulli distribution is straightforward.\n",
     "Let’s assume we have data, $\\mathbf{ y}$ which consists of a vector of\n",
@@ -1595,7 +1494,8 @@
     "estimate the probability of a coin being heads, and you tossed the coin\n",
     "100 times, and recovered 47 heads, then the estimate of the probability\n",
     "of heads should be $\\frac{47}{100}$."
-   ]
+   ],
+   "id": "43f4a9d5-94b3-442a-8ba4-5c434cd659ef"
   },
   {
    "cell_type": "markdown",
@@ -1605,7 +1505,8 @@
     "\n",
     "Show that the maximum likelihood solution we have found is a *minimum*\n",
     "for our objective."
-   ]
+   ],
+   "id": "681a5f5e-ce48-49f0-9de7-e0fdd81faaaf"
   },
   {
    "cell_type": "markdown",
@@ -1614,7 +1515,8 @@
     "### Exercise 4 Answer\n",
     "\n",
     "Write your answer to Exercise 4 here"
-   ]
+   ],
+   "id": "efa2418d-88bf-411a-a538-96b4c726aab4"
   },
   {
    "cell_type": "code",
@@ -1624,7 +1526,8 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "028ec197-091d-4376-8d58-f6b0bcfc5db0"
   },
   {
    "cell_type": "markdown",
@@ -1641,14 +1544,18 @@
     "2.  Likelihood\n",
     "3.  Posterior distribution\n",
     "4.  Marginal likelihood"
-   ]
+   ],
+   "id": "427ed3f3-6825-4f87-9a5a-db6b4538cdfa"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Naive Bayes Classifiers\n",
-    "-----------------------\n",
+    "## Naive Bayes Classifiers\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/naive-bayes.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/naive-bayes.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "*Note*: Everything we do below is possible using standard packages like\n",
     "`scikit-learn`, our purpose in this session is to help you understand\n",
@@ -1686,14 +1593,14 @@
     "\n",
     "In naive Bayes we make certain simplifying assumptions that allow us to\n",
     "perform all of the above in practice."
-   ]
+   ],
+   "id": "5cac2b6b-a77e-4e0c-89b0-750ce01dbeaa"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Data Conditional Independence\n",
-    "-----------------------------\n",
+    "## Data Conditional Independence\n",
     "\n",
     "If we are given model parameters $\\boldsymbol{ \\theta}$ we assume that\n",
     "conditioned on all these parameters that all data points in the model\n",
@@ -1713,14 +1620,14 @@
     "\n",
     "Computing posterior distribution in this case becomes easier, this is\n",
     "known as the ‘Bayes classifier’."
-   ]
+   ],
+   "id": "66751653-054a-4863-a188-189ec908e36c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Feature Conditional Independence\n",
-    "--------------------------------\n",
+    "## Feature Conditional Independence\n",
     "\n",
     "$$\n",
     "p(\\mathbf{ x}_i | y_i, \\boldsymbol{ \\theta}) = \\prod_{j=1}^{p} p(x_{i,j}|y_i, \\boldsymbol{ \\theta})\n",
@@ -1732,14 +1639,14 @@
     "parameters *and* the label. So for each data point we have\n",
     "$$p(\\mathbf{ x}_i | y_i, \\boldsymbol{ \\theta}) = \\prod_{j=1}^{p} p(x_{i,j}|y_i,\\boldsymbol{ \\theta})$$\n",
     "where $p$ is the dimensionality of our inputs."
-   ]
+   ],
+   "id": "c6bdbb40-f0cf-4bf6-9e9d-67a76a59769a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Marginal Density for $y_i$\n",
-    "--------------------------\n",
+    "## Marginal Density for $y_i$\n",
     "\n",
     "$$\n",
     "p(x_{i,j},y_i| \\boldsymbol{ \\theta}) = p(x_{i,j}|y_i, \\boldsymbol{ \\theta})p(y_i).\n",
@@ -1759,14 +1666,14 @@
     "for our prior over $y_i$, $$p(y_i|\\pi) = \\pi^{y_i} (1-\\pi)^{1-y_i}$$\n",
     "where $\\pi$ now has the interpretation as being the *prior* probability\n",
     "that the classification should be positive."
-   ]
+   ],
+   "id": "86d41a8f-c6b2-4723-99a7-f4305ec3c230"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Joint Density for Naive Bayes\n",
-    "-----------------------------\n",
+    "## Joint Density for Naive Bayes\n",
     "\n",
     "This allows us to write down the full joint density of the training\n",
     "data, $$\n",
@@ -1815,39 +1722,27 @@
     "$\\boldsymbol{ \\theta}$ alone so we have, $$\n",
     "E(\\pi, \\boldsymbol{ \\theta}) = E(\\boldsymbol{ \\theta}) + E(\\pi).\n",
     "$$"
-   ]
+   ],
+   "id": "9701196b-8ae7-401b-a40a-2efaba270034"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Nigerian NMIS Data\n",
-    "------------------\n",
+    "## Nigeria NMIS Data Classification\n",
     "\n",
-    "First we will load in the Nigerian NMIS health data. Our aim will be to\n",
-    "predict whether a center has maternal health delivery services given the\n",
-    "attributes in the data. We will predict of the number of nurses, the\n",
-    "number of doctors, location etc.\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/nigeria-nmis-data-classification.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "Our aim will be to predict whether a center has maternal health delivery\n",
+    "services given the attributes in the data. We will predict of the number\n",
+    "of nurses, the number of doctors, location etc.\n",
     "\n",
-    "Let’s first remind ourselves of the data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
     "Now we will convert this data into a form which we can use as inputs\n",
     "`X`, and labels `y`."
-   ]
+   ],
+   "id": "4cc866d0-d400-4c13-a87a-758e4a0e507d"
   },
   {
    "cell_type": "code",
@@ -1857,7 +1752,8 @@
    "source": [
     "import pandas as pd\n",
     "import numpy as np"
-   ]
+   ],
+   "id": "54a93596-6354-4130-935a-3723d7c8d628"
   },
   {
    "cell_type": "code",
@@ -1897,7 +1793,8 @@
     "    type_names.append(type_col)\n",
     "    X.loc[:, type_col] = 0.0 \n",
     "    X.loc[index, type_col] = 1.0"
-   ]
+   ],
+   "id": "c6206f8d-d977-43b5-b7a8-ec7681d31923"
   },
   {
    "cell_type": "markdown",
@@ -1905,7 +1802,8 @@
    "source": [
     "This has given us a new data frame `X` which contains the different\n",
     "facility types in different columns."
-   ]
+   ],
+   "id": "6872db9b-3dca-4ad0-aca7-8b6bddd5ad4f"
   },
   {
    "cell_type": "code",
@@ -1914,20 +1812,25 @@
    "outputs": [],
    "source": [
     "X.describe()"
-   ]
+   ],
+   "id": "6bdc4fd3-b21b-4bb0-bb6f-97b1feb0aebf"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Naive Bayes NMIS\n",
-    "----------------\n",
+    "## Naive Bayes NMIS\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-naive-bayes.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/nigeria-nmis-data-naive-bayes.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "We can now specify the naive Bayes model. For the genres we want to\n",
     "model the data as Bernoulli distributed, and for the year and body count\n",
     "we want to model the data as Gaussian distributed. We set up two data\n",
     "frames to contain the parameters for the rows and the columns below."
-   ]
+   ],
+   "id": "c04cbf9e-9d06-4711-ab97-2fb95c348b73"
   },
   {
    "cell_type": "code",
@@ -1952,7 +1855,8 @@
     "                'longitude']\n",
     "Bernoulli = pd.DataFrame(data=np.zeros((2,len(binary_columns))), columns=binary_columns, index=['theta_0', 'theta_1'])\n",
     "Gaussian = pd.DataFrame(data=np.zeros((4,len(real_columns))), columns=real_columns, index=['mu_0', 'sigma2_0', 'mu_1', 'sigma2_1'])"
-   ]
+   ],
+   "id": "f4fdda5f-1a49-488e-83d0-463678626db2"
   },
   {
    "cell_type": "markdown",
@@ -1960,7 +1864,8 @@
    "source": [
     "Now we have the data in a form ready for analysis, let’s construct our\n",
     "data matrix."
-   ]
+   ],
+   "id": "d001db1b-ea24-4f99-a757-832ccd0cb490"
   },
   {
    "cell_type": "code",
@@ -1976,7 +1881,8 @@
     "y_train = y.iloc[train_indices]==True\n",
     "X_test = X.iloc[test_indices]\n",
     "y_test = y.iloc[test_indices]==True"
-   ]
+   ],
+   "id": "f7554a83-b758-41fb-808a-5815ceb89ece"
   },
   {
    "cell_type": "markdown",
@@ -1988,7 +1894,8 @@
     "solution for the Bernoulli. Or by computing the empirical mean and\n",
     "variance of the data for the Gaussian, which also gives us the maximum\n",
     "likelihood solution."
-   ]
+   ],
+   "id": "4250fac6-bf65-4de8-8f2c-8d2b4e08667f"
   },
   {
    "cell_type": "code",
@@ -2005,7 +1912,8 @@
     "    if column in Bernoulli:\n",
     "        Bernoulli[column]['theta_0'] = X_train[column][~y_train].sum()/(~y_train).sum()\n",
     "        Bernoulli[column]['theta_1'] = X_train[column][y_train].sum()/(y_train).sum()"
-   ]
+   ],
+   "id": "8760b1aa-a00d-486f-b91c-9b9993a90376"
   },
   {
    "cell_type": "markdown",
@@ -2013,7 +1921,8 @@
    "source": [
     "We can examine the nature of the distributions we’ve fitted to the model\n",
     "by looking at the entries in these data frames."
-   ]
+   ],
+   "id": "2f615c45-4ba0-4319-8aba-f233dede119a"
   },
   {
    "cell_type": "code",
@@ -2022,7 +1931,8 @@
    "outputs": [],
    "source": [
     "Bernoulli"
-   ]
+   ],
+   "id": "7494c4ce-c912-42b4-bf16-4c7780491484"
   },
   {
    "cell_type": "markdown",
@@ -2039,7 +1949,8 @@
     "The naive Bayes assumption says that the joint probability for these\n",
     "services is given by the product of each of these Bernoulli\n",
     "distributions."
-   ]
+   ],
+   "id": "fae89b19-2626-48d8-b4a3-0b4f3ae7527f"
   },
   {
    "cell_type": "code",
@@ -2048,7 +1959,8 @@
    "outputs": [],
    "source": [
     "Gaussian"
-   ]
+   ],
+   "id": "0deeeab2-e5be-428d-a925-816aa8c07839"
   },
   {
    "cell_type": "markdown",
@@ -2068,7 +1980,8 @@
     "\n",
     "The final model parameter is the prior probability of the positive\n",
     "class, $\\pi$, which is computed by maximum likelihood."
-   ]
+   ],
+   "id": "471e4729-c44a-48d9-bae4-6211119087ed"
   },
   {
    "cell_type": "code",
@@ -2077,7 +1990,8 @@
    "outputs": [],
    "source": [
     "prior = float(y_train.sum())/len(y_train)"
-   ]
+   ],
+   "id": "103c933f-730a-424d-a4b9-25fa32fba500"
   },
   {
    "cell_type": "markdown",
@@ -2085,14 +1999,14 @@
    "source": [
     "The prior probability tells us that slightly more facilities have\n",
     "maternity services than those that don’t."
-   ]
+   ],
+   "id": "a64dbed9-a2c1-4016-a581-2149ea3a5250"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Making Predictions\n",
-    "------------------\n",
+    "## Making Predictions\n",
     "\n",
     "Naive Bayes has given us the class conditional densities:\n",
     "$p(\\mathbf{ x}_i | y_i, \\boldsymbol{ \\theta})$. To make predictions with\n",
@@ -2125,7 +2039,8 @@
     "\\boldsymbol{ \\theta})p(y^*|\\pi)}{\\sum_{y^*=0}^1 \\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \\boldsymbol{ \\theta})p(y^*|\\pi)}\n",
     "$$ This formula is also fairly straightforward to implement. First we\n",
     "implement the log probabilities for the Gaussian density."
-   ]
+   ],
+   "id": "d2799300-234e-40ff-abac-8087ce64e438"
   },
   {
    "cell_type": "code",
@@ -2135,7 +2050,8 @@
    "source": [
     "def log_gaussian(x, mu, sigma2):\n",
     "    return -0.5* np.log(2*np.pi*sigma2)-((x-mu)**2)/(2*sigma2)"
-   ]
+   ],
+   "id": "4be2e55c-2c48-4f43-9dcb-df3fb566b9fd"
   },
   {
    "cell_type": "markdown",
@@ -2149,7 +2065,8 @@
     "and smaller, and may be difficult to represent accurately (or even\n",
     "underflow). Working in log space can ameliorate this problem. We can\n",
     "also compute the log probability for the Bernoulli distribution."
-   ]
+   ],
+   "id": "a8c20a0f-48f7-4884-ad99-9e2e02d5e701"
   },
   {
    "cell_type": "code",
@@ -2159,14 +2076,14 @@
    "source": [
     "def log_bernoulli(x, theta):\n",
     "    return x*np.log(theta) + (1-x)*np.log(1-theta)"
-   ]
+   ],
+   "id": "eb84ad72-ccb3-4327-a7f0-82db7e3b7ef3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Laplace Smoothing\n",
-    "-----------------\n",
+    "## Laplace Smoothing\n",
     "\n",
     "Before we proceed, let’s just pause and think for a moment what will\n",
     "happen if `theta` here is either zero or one. This will result in\n",
@@ -2181,7 +2098,8 @@
     "wish to predict the sun rise the following day to describe his idea of\n",
     "smoothing, which can be found at the bottom of following page from\n",
     "Laplace’s ‘Essai Philosophique …’"
-   ]
+   ],
+   "id": "3718bd49-2da9-4825-b9e3-97d58b5d2ab5"
   },
   {
    "cell_type": "code",
@@ -2189,9 +2107,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
-    "pods.notebook.display_google_book(id='1YQPAAAAQAAJ', page='PA16')"
-   ]
+    "import notutils as nu\n",
+    "nu.display_google_book(id='1YQPAAAAQAAJ', page='PA16')"
+   ],
+   "id": "ce02c4df-d41e-45c9-badc-a3e723dfa02c"
   },
   {
    "cell_type": "markdown",
@@ -2236,7 +2155,8 @@
     "\\pi = \\frac{\\sum_{i=1}^{n} y_i + 1}{n+ 2}\n",
     "$$ to prevent problems with certainty causing numerical issues and\n",
     "misclassifications. Let’s refit the Bernoulli features now."
-   ]
+   ],
+   "id": "92e50a8a-e3a2-404e-97bc-386c28c4026c"
   },
   {
    "cell_type": "code",
@@ -2249,14 +2169,16 @@
     "    if column in Bernoulli:\n",
     "        Bernoulli[column]['theta_0'] = (X_train[column][~y_train].sum() + 1)/((~y_train).sum() + 2)\n",
     "        Bernoulli[column]['theta_1'] = (X_train[column][y_train].sum() + 1)/((y_train).sum() + 2)"
-   ]
+   ],
+   "id": "557af691-9c3c-4b68-b25c-6254e590bca7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "That places us in a position to write the prediction function."
-   ]
+   ],
+   "id": "928a92b1-6747-403e-acd0-248b89d14abf"
   },
   {
    "cell_type": "code",
@@ -2266,7 +2188,8 @@
    "source": [
     "import numpy as np\n",
     "import pandas as pd"
-   ]
+   ],
+   "id": "dc4a68cd-2c27-4f60-9bb0-93ce42941841"
   },
   {
    "cell_type": "code",
@@ -2291,14 +2214,16 @@
     "                                                               + np.exp(log_negative.values[i] + np.log(1-prior)))\n",
     "    return v\n",
     "    #return np.exp(log_positive + np.log(prior))/(np.exp(log_positive + np.log(prior)) + np.exp(log_negative + np.log(1-prior)))"
-   ]
+   ],
+   "id": "e6fb8db7-615c-446f-a50c-cb41cb855186"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Now we are in a position to make the predictions for the test data."
-   ]
+   ],
+   "id": "95ea08cb-d622-448a-94c0-9a5d4b99c1e5"
   },
   {
    "cell_type": "code",
@@ -2307,7 +2232,8 @@
    "outputs": [],
    "source": [
     "p_y = predict(X_test, Gaussian, Bernoulli, prior)"
-   ]
+   ],
+   "id": "f1724a5d-5128-4dc0-a253-fedda26731bf"
   },
   {
    "cell_type": "markdown",
@@ -2318,7 +2244,8 @@
     "with greater than 50% probability of membership of the positive class to\n",
     "the positive class. We can then compare to the true values, and see how\n",
     "many of these values we got correct. This is our total number correct."
-   ]
+   ],
+   "id": "a7536f34-f130-46c8-a904-37f0f9118121"
   },
   {
    "cell_type": "code",
@@ -2329,7 +2256,8 @@
     "correct = y_test.eq(p_y>0.5)\n",
     "total_correct = sum(correct)\n",
     "print(\"Total correct\", total_correct, \" out of \", len(y_test), \"which is\", float(total_correct)/len(y_test), \"%\")"
-   ]
+   ],
+   "id": "8de1b8aa-d613-474f-a7c0-63c13a3ad5ec"
   },
   {
    "cell_type": "markdown",
@@ -2344,7 +2272,8 @@
     "contain the false positives and the false negatives. Along the rows of\n",
     "the matrix we place the actual class, and along the columns we place our\n",
     "predicted class."
-   ]
+   ],
+   "id": "d641a709-1cab-475a-b63c-b0c4b723972e"
   },
   {
    "cell_type": "code",
@@ -2360,7 +2289,8 @@
     "confusion_matrix['predicted no maternity']['actual maternity'] = (y_test & ~(p_y>0.5)).sum()\n",
     "confusion_matrix['predicted no maternity']['actual no maternity'] = (~y_test & ~(p_y>0.5)).sum()\n",
     "confusion_matrix"
-   ]
+   ],
+   "id": "25e9f749-905e-4c7a-bd5e-76ae2f8b4eb7"
   },
   {
    "cell_type": "markdown",
@@ -2372,7 +2302,8 @@
     "valid? Are some features more helpful than others? What happens if you\n",
     "remove features that appear to be less helpful. How might you select\n",
     "such features?"
-   ]
+   ],
+   "id": "8159efb1-3fcf-454b-99fe-9f9d265a4558"
   },
   {
    "cell_type": "markdown",
@@ -2381,7 +2312,8 @@
     "### Exercise 5 Answer\n",
     "\n",
     "Write your answer to Exercise 5 here"
-   ]
+   ],
+   "id": "a167afda-3863-4568-8f46-2e2578241a0c"
   },
   {
    "cell_type": "code",
@@ -2391,7 +2323,8 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "94819798-190c-48c9-a931-a5f04d733ecc"
   },
   {
    "cell_type": "markdown",
@@ -2406,7 +2339,8 @@
     "test set how low do you have to set the threshold to avoid all the false\n",
     "negatives (i.e. facilities where you predicted there was no maternity,\n",
     "but in actuality there were?"
-   ]
+   ],
+   "id": "5c804c94-7d87-4e80-ba87-7bb5d57db940"
   },
   {
    "cell_type": "markdown",
@@ -2415,7 +2349,8 @@
     "### Exercise 6 Answer\n",
     "\n",
     "Write your answer to Exercise 6 here"
-   ]
+   ],
+   "id": "bf62eb17-bfb7-4b44-814d-22fb58c6b64a"
   },
   {
    "cell_type": "code",
@@ -2425,21 +2360,22 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "63f9748d-e24b-4348-a6c2-b95c1e0d3791"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Making Predictions\n",
-    "------------------\n",
+    "## Making Predictions\n",
     "\n",
     "Naive Bayes has given us the class conditional densities:\n",
     "$p(\\mathbf{ x}_i | y_i, \\boldsymbol{ \\theta})$. To make predictions with\n",
     "these densities we need to form the distribution given by $$\n",
     "P(y^*| \\mathbf{ y}, \\mathbf{X}, \\mathbf{ x}^*, \\boldsymbol{ \\theta})\n",
     "$$"
-   ]
+   ],
+   "id": "7f1ff4a3-01d6-4bca-981a-f866bd15d7d6"
   },
   {
    "cell_type": "markdown",
@@ -2455,7 +2391,8 @@
     "$$ $$\n",
     "\\sigma^2 = \\frac{\\sum_{i=1}^{n} (x_i - \\mu)^2}{n}\n",
     "$$"
-   ]
+   ],
+   "id": "27548be0-bd80-4584-ab07-fcf20bc69b07"
   },
   {
    "cell_type": "markdown",
@@ -2464,7 +2401,8 @@
     "### Exercise 7 Answer\n",
     "\n",
     "Write your answer to Exercise 7 here"
-   ]
+   ],
+   "id": "8cd6d641-70fc-4d8c-8811-6ac3213654a3"
   },
   {
    "cell_type": "code",
@@ -2474,7 +2412,8 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "b469980e-0e9e-42f2-bcb4-d154b7a74907"
   },
   {
    "cell_type": "markdown",
@@ -2508,14 +2447,14 @@
     "data. To fit the model we consider each feature in turn, we select the\n",
     "positive class and fit parameters for that class, then we select each\n",
     "negative class and fit features for that class. We have code below."
-   ]
+   ],
+   "id": "59370eb2-5fb6-4df4-a3a8-a81e247b385a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Naive Bayes Summary\n",
-    "-------------------\n",
+    "## Naive Bayes Summary\n",
     "\n",
     "Naive Bayes is making very simple assumptions about the data, in\n",
     "particular it is modeling the full *joint* probability of the data set,\n",
@@ -2536,33 +2475,33 @@
     "of the modeling the joint probability density. However, the\n",
     "factorization assumption that allows us to do this efficiently is very\n",
     "strong and may lead to poor decision boundaries in practice."
-   ]
+   ],
+   "id": "8c77ada3-0dce-4422-9c4b-db046432761c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Other Reading\n",
-    "-------------\n",
+    "## Other Reading\n",
     "\n",
     "-   Chapter 5 of Rogers and Girolami (2011) up to pg 179 (Section 5.1,\n",
     "    and 5.2 up to 5.2.2)."
-   ]
+   ],
+   "id": "51ea3522-37cf-41f3-9a6e-b074d5fa8ea5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "References\n",
-    "----------"
-   ]
+    "## References"
+   ],
+   "id": "626ecc68-e486-432e-9ea5-5fef4235efc5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Thanks!\n",
-    "-------\n",
+    "## Thanks!\n",
     "\n",
     "For more information on these subjects and more you might want to check\n",
     "the following resources.\n",
@@ -2573,7 +2512,8 @@
     "    Page](http://www.theguardian.com/profile/neil-lawrence)\n",
     "-   blog:\n",
     "    [http://inverseprobability.com](http://inverseprobability.com/blog.html)"
-   ]
+   ],
+   "id": "fd2742da-f26b-4059-a183-97ddc76fac94"
   },
   {
    "cell_type": "markdown",
@@ -2592,8 +2532,13 @@
     "E., Avital, I., Stojadinovic, A., 2012. Using machine-learned Bayesian\n",
     "belief networks to predict perioperative risk of clostridium difficile\n",
     "infection following colon surgery. Interact J Med Res 1, e6.\n",
-    "<https://doi.org/10.2196/ijmr.2131>"
-   ]
+    "<https://doi.org/10.2196/ijmr.2131>\n",
+    "\n",
+    "The Office of the Senior Special Assistant to the President on the\n",
+    "Millennium Development Goals (OSSAP-MDGs), Columbia University, 2014.\n",
+    "Nigeria NMIS facility database."
+   ],
+   "id": "8b58cc63-ef57-42ad-b92e-962e2ca0bf03"
   }
  ],
  "nbformat": 4,
diff --git a/_notebooks/04-gaussian-processes.ipynb b/_notebooks/04-gaussian-processes.ipynb
index 15aceae..2eca555 100644
--- a/_notebooks/04-gaussian-processes.ipynb
+++ b/_notebooks/04-gaussian-processes.ipynb
@@ -4,13 +4,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gaussian Processes\n",
-    "==================\n",
-    "\n",
-    "### [Neil D. Lawrence](http://inverseprobability.com)\n",
+    "# Gaussian Processes\n",
     "\n",
     "### 2020-11-13"
-   ]
+   ],
+   "id": "c02c1cfa-fa00-4b15-a731-827989b57aae"
   },
   {
    "cell_type": "markdown",
@@ -25,311 +23,24 @@
     "fitting Gaussian processes tend to be more complex than parametric\n",
     "models. In this sessions I will introduce Gaussian processes and explain\n",
     "why sustaining uncertainty is important."
-   ]
+   ],
+   "id": "e2d7b20e-37c0-4364-bf26-c0d9c6fdbdf9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "$$\n",
-    "\\newcommand{\\tk}[1]{}\n",
-    "\\newcommand{\\Amatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\KL}[2]{\\text{KL}\\left( #1\\,\\|\\,#2 \\right)}\n",
-    "\\newcommand{\\Kaast}{\\kernelMatrix_{\\mathbf{ \\ast}\\mathbf{ \\ast}}}\n",
-    "\\newcommand{\\Kastu}{\\kernelMatrix_{\\mathbf{ \\ast} \\inducingVector}}\n",
-    "\\newcommand{\\Kff}{\\kernelMatrix_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kfu}{\\kernelMatrix_{\\mappingFunctionVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuast}{\\kernelMatrix_{\\inducingVector \\bf\\ast}}\n",
-    "\\newcommand{\\Kuf}{\\kernelMatrix_{\\inducingVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\Kuu}{\\kernelMatrix_{\\inducingVector \\inducingVector}}\n",
-    "\\newcommand{\\Kuui}{\\Kuu^{-1}}\n",
-    "\\newcommand{\\Qaast}{\\mathbf{Q}_{\\bf \\ast \\ast}}\n",
-    "\\newcommand{\\Qastf}{\\mathbf{Q}_{\\ast \\mappingFunction}}\n",
-    "\\newcommand{\\Qfast}{\\mathbf{Q}_{\\mappingFunctionVector \\bf \\ast}}\n",
-    "\\newcommand{\\Qff}{\\mathbf{Q}_{\\mappingFunctionVector \\mappingFunctionVector}}\n",
-    "\\newcommand{\\aMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\aScalar}{a}\n",
-    "\\newcommand{\\aVector}{\\mathbf{a}}\n",
-    "\\newcommand{\\acceleration}{a}\n",
-    "\\newcommand{\\bMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\bScalar}{b}\n",
-    "\\newcommand{\\bVector}{\\mathbf{b}}\n",
-    "\\newcommand{\\basisFunc}{\\phi}\n",
-    "\\newcommand{\\basisFuncVector}{\\boldsymbol{ \\basisFunc}}\n",
-    "\\newcommand{\\basisFunction}{\\phi}\n",
-    "\\newcommand{\\basisLocation}{\\mu}\n",
-    "\\newcommand{\\basisMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\basisScalar}{\\basisFunction}\n",
-    "\\newcommand{\\basisVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\activationFunction}{\\phi}\n",
-    "\\newcommand{\\activationMatrix}{\\boldsymbol{ \\Phi}}\n",
-    "\\newcommand{\\activationScalar}{\\basisFunction}\n",
-    "\\newcommand{\\activationVector}{\\boldsymbol{ \\basisFunction}}\n",
-    "\\newcommand{\\bigO}{\\mathcal{O}}\n",
-    "\\newcommand{\\binomProb}{\\pi}\n",
-    "\\newcommand{\\cMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\cbasisMatrix}{\\hat{\\boldsymbol{ \\Phi}}}\n",
-    "\\newcommand{\\cdataMatrix}{\\hat{\\dataMatrix}}\n",
-    "\\newcommand{\\cdataScalar}{\\hat{\\dataScalar}}\n",
-    "\\newcommand{\\cdataVector}{\\hat{\\dataVector}}\n",
-    "\\newcommand{\\centeredKernelMatrix}{\\mathbf{ \\MakeUppercase{\\centeredKernelScalar}}}\n",
-    "\\newcommand{\\centeredKernelScalar}{b}\n",
-    "\\newcommand{\\centeredKernelVector}{\\centeredKernelScalar}\n",
-    "\\newcommand{\\centeringMatrix}{\\mathbf{H}}\n",
-    "\\newcommand{\\chiSquaredDist}[2]{\\chi_{#1}^{2}\\left(#2\\right)}\n",
-    "\\newcommand{\\chiSquaredSamp}[1]{\\chi_{#1}^{2}}\n",
-    "\\newcommand{\\conditionalCovariance}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\coregionalizationMatrix}{\\mathbf{B}}\n",
-    "\\newcommand{\\coregionalizationScalar}{b}\n",
-    "\\newcommand{\\coregionalizationVector}{\\mathbf{ \\coregionalizationScalar}}\n",
-    "\\newcommand{\\covDist}[2]{\\text{cov}_{#2}\\left(#1\\right)}\n",
-    "\\newcommand{\\covSamp}[1]{\\text{cov}\\left(#1\\right)}\n",
-    "\\newcommand{\\covarianceScalar}{c}\n",
-    "\\newcommand{\\covarianceVector}{\\mathbf{ \\covarianceScalar}}\n",
-    "\\newcommand{\\covarianceMatrix}{\\mathbf{C}}\n",
-    "\\newcommand{\\covarianceMatrixTwo}{\\boldsymbol{ \\Sigma}}\n",
-    "\\newcommand{\\croupierScalar}{s}\n",
-    "\\newcommand{\\croupierVector}{\\mathbf{ \\croupierScalar}}\n",
-    "\\newcommand{\\croupierMatrix}{\\mathbf{ \\MakeUppercase{\\croupierScalar}}}\n",
-    "\\newcommand{\\dataDim}{p}\n",
-    "\\newcommand{\\dataIndex}{i}\n",
-    "\\newcommand{\\dataIndexTwo}{j}\n",
-    "\\newcommand{\\dataMatrix}{\\mathbf{Y}}\n",
-    "\\newcommand{\\dataScalar}{y}\n",
-    "\\newcommand{\\dataSet}{\\mathcal{D}}\n",
-    "\\newcommand{\\dataStd}{\\sigma}\n",
-    "\\newcommand{\\dataVector}{\\mathbf{ \\dataScalar}}\n",
-    "\\newcommand{\\decayRate}{d}\n",
-    "\\newcommand{\\degreeMatrix}{\\mathbf{ \\MakeUppercase{\\degreeScalar}}}\n",
-    "\\newcommand{\\degreeScalar}{d}\n",
-    "\\newcommand{\\degreeVector}{\\mathbf{ \\degreeScalar}}\n",
-    "\\newcommand{\\diag}[1]{\\text{diag}\\left(#1\\right)}\n",
-    "\\newcommand{\\diagonalMatrix}{\\mathbf{D}}\n",
-    "\\newcommand{\\diff}[2]{\\frac{\\text{d}#1}{\\text{d}#2}}\n",
-    "\\newcommand{\\diffTwo}[2]{\\frac{\\text{d}^2#1}{\\text{d}#2^2}}\n",
-    "\\newcommand{\\displacement}{x}\n",
-    "\\newcommand{\\displacementVector}{\\textbf{\\displacement}}\n",
-    "\\newcommand{\\distanceMatrix}{\\mathbf{ \\MakeUppercase{\\distanceScalar}}}\n",
-    "\\newcommand{\\distanceScalar}{d}\n",
-    "\\newcommand{\\distanceVector}{\\mathbf{ \\distanceScalar}}\n",
-    "\\newcommand{\\eigenvaltwo}{\\ell}\n",
-    "\\newcommand{\\eigenvaltwoMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\eigenvaltwoVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\eigenvalue}{\\lambda}\n",
-    "\\newcommand{\\eigenvalueMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\eigenvalueVector}{\\boldsymbol{ \\lambda}}\n",
-    "\\newcommand{\\eigenvector}{\\mathbf{ \\eigenvectorScalar}}\n",
-    "\\newcommand{\\eigenvectorMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\eigenvectorScalar}{u}\n",
-    "\\newcommand{\\eigenvectwo}{\\mathbf{v}}\n",
-    "\\newcommand{\\eigenvectwoMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\eigenvectwoScalar}{v}\n",
-    "\\newcommand{\\entropy}[1]{\\mathcal{H}\\left(#1\\right)}\n",
-    "\\newcommand{\\errorFunction}{E}\n",
-    "\\newcommand{\\expDist}[2]{\\left<#1\\right>_{#2}}\n",
-    "\\newcommand{\\expSamp}[1]{\\left<#1\\right>}\n",
-    "\\newcommand{\\expectation}[1]{\\left\\langle #1 \\right\\rangle }\n",
-    "\\newcommand{\\expectationDist}[2]{\\left\\langle #1 \\right\\rangle _{#2}}\n",
-    "\\newcommand{\\expectedDistanceMatrix}{\\mathcal{D}}\n",
-    "\\newcommand{\\eye}{\\mathbf{I}}\n",
-    "\\newcommand{\\fantasyDim}{r}\n",
-    "\\newcommand{\\fantasyMatrix}{\\mathbf{ \\MakeUppercase{\\fantasyScalar}}}\n",
-    "\\newcommand{\\fantasyScalar}{z}\n",
-    "\\newcommand{\\fantasyVector}{\\mathbf{ \\fantasyScalar}}\n",
-    "\\newcommand{\\featureStd}{\\varsigma}\n",
-    "\\newcommand{\\gammaCdf}[3]{\\mathcal{GAMMA CDF}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaDist}[3]{\\mathcal{G}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gammaSamp}[2]{\\mathcal{G}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\gaussianDist}[3]{\\mathcal{N}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\gaussianSamp}[2]{\\mathcal{N}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\uniformDist}[3]{\\mathcal{U}\\left(#1|#2,#3\\right)}\n",
-    "\\newcommand{\\uniformSamp}[2]{\\mathcal{U}\\left(#1,#2\\right)}\n",
-    "\\newcommand{\\given}{|}\n",
-    "\\newcommand{\\half}{\\frac{1}{2}}\n",
-    "\\newcommand{\\heaviside}{H}\n",
-    "\\newcommand{\\hiddenMatrix}{\\mathbf{ \\MakeUppercase{\\hiddenScalar}}}\n",
-    "\\newcommand{\\hiddenScalar}{h}\n",
-    "\\newcommand{\\hiddenVector}{\\mathbf{ \\hiddenScalar}}\n",
-    "\\newcommand{\\identityMatrix}{\\eye}\n",
-    "\\newcommand{\\inducingInputScalar}{z}\n",
-    "\\newcommand{\\inducingInputVector}{\\mathbf{ \\inducingInputScalar}}\n",
-    "\\newcommand{\\inducingInputMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\inducingScalar}{u}\n",
-    "\\newcommand{\\inducingVector}{\\mathbf{ \\inducingScalar}}\n",
-    "\\newcommand{\\inducingMatrix}{\\mathbf{U}}\n",
-    "\\newcommand{\\inlineDiff}[2]{\\text{d}#1/\\text{d}#2}\n",
-    "\\newcommand{\\inputDim}{q}\n",
-    "\\newcommand{\\inputMatrix}{\\mathbf{X}}\n",
-    "\\newcommand{\\inputScalar}{x}\n",
-    "\\newcommand{\\inputSpace}{\\mathcal{X}}\n",
-    "\\newcommand{\\inputVals}{\\inputVector}\n",
-    "\\newcommand{\\inputVector}{\\mathbf{ \\inputScalar}}\n",
-    "\\newcommand{\\iterNum}{k}\n",
-    "\\newcommand{\\kernel}{\\kernelScalar}\n",
-    "\\newcommand{\\kernelMatrix}{\\mathbf{K}}\n",
-    "\\newcommand{\\kernelScalar}{k}\n",
-    "\\newcommand{\\kernelVector}{\\mathbf{ \\kernelScalar}}\n",
-    "\\newcommand{\\kff}{\\kernelScalar_{\\mappingFunction \\mappingFunction}}\n",
-    "\\newcommand{\\kfu}{\\kernelVector_{\\mappingFunction \\inducingScalar}}\n",
-    "\\newcommand{\\kuf}{\\kernelVector_{\\inducingScalar \\mappingFunction}}\n",
-    "\\newcommand{\\kuu}{\\kernelVector_{\\inducingScalar \\inducingScalar}}\n",
-    "\\newcommand{\\lagrangeMultiplier}{\\lambda}\n",
-    "\\newcommand{\\lagrangeMultiplierMatrix}{\\boldsymbol{ \\Lambda}}\n",
-    "\\newcommand{\\lagrangian}{L}\n",
-    "\\newcommand{\\laplacianFactor}{\\mathbf{ \\MakeUppercase{\\laplacianFactorScalar}}}\n",
-    "\\newcommand{\\laplacianFactorScalar}{m}\n",
-    "\\newcommand{\\laplacianFactorVector}{\\mathbf{ \\laplacianFactorScalar}}\n",
-    "\\newcommand{\\laplacianMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\laplacianScalar}{\\ell}\n",
-    "\\newcommand{\\laplacianVector}{\\mathbf{ \\ell}}\n",
-    "\\newcommand{\\latentDim}{q}\n",
-    "\\newcommand{\\latentDistanceMatrix}{\\boldsymbol{ \\Delta}}\n",
-    "\\newcommand{\\latentDistanceScalar}{\\delta}\n",
-    "\\newcommand{\\latentDistanceVector}{\\boldsymbol{ \\delta}}\n",
-    "\\newcommand{\\latentForce}{f}\n",
-    "\\newcommand{\\latentFunction}{u}\n",
-    "\\newcommand{\\latentFunctionVector}{\\mathbf{ \\latentFunction}}\n",
-    "\\newcommand{\\latentFunctionMatrix}{\\mathbf{ \\MakeUppercase{\\latentFunction}}}\n",
-    "\\newcommand{\\latentIndex}{j}\n",
-    "\\newcommand{\\latentScalar}{z}\n",
-    "\\newcommand{\\latentVector}{\\mathbf{ \\latentScalar}}\n",
-    "\\newcommand{\\latentMatrix}{\\mathbf{Z}}\n",
-    "\\newcommand{\\learnRate}{\\eta}\n",
-    "\\newcommand{\\lengthScale}{\\ell}\n",
-    "\\newcommand{\\rbfWidth}{\\ell}\n",
-    "\\newcommand{\\likelihoodBound}{\\mathcal{L}}\n",
-    "\\newcommand{\\likelihoodFunction}{L}\n",
-    "\\newcommand{\\locationScalar}{\\mu}\n",
-    "\\newcommand{\\locationVector}{\\boldsymbol{ \\locationScalar}}\n",
-    "\\newcommand{\\locationMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\variance}[1]{\\text{var}\\left( #1 \\right)}\n",
-    "\\newcommand{\\mappingFunction}{f}\n",
-    "\\newcommand{\\mappingFunctionMatrix}{\\mathbf{F}}\n",
-    "\\newcommand{\\mappingFunctionTwo}{g}\n",
-    "\\newcommand{\\mappingFunctionTwoMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\mappingFunctionTwoVector}{\\mathbf{ \\mappingFunctionTwo}}\n",
-    "\\newcommand{\\mappingFunctionVector}{\\mathbf{ \\mappingFunction}}\n",
-    "\\newcommand{\\scaleScalar}{s}\n",
-    "\\newcommand{\\mappingScalar}{w}\n",
-    "\\newcommand{\\mappingVector}{\\mathbf{ \\mappingScalar}}\n",
-    "\\newcommand{\\mappingMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\mappingScalarTwo}{v}\n",
-    "\\newcommand{\\mappingVectorTwo}{\\mathbf{ \\mappingScalarTwo}}\n",
-    "\\newcommand{\\mappingMatrixTwo}{\\mathbf{V}}\n",
-    "\\newcommand{\\maxIters}{K}\n",
-    "\\newcommand{\\meanMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanScalar}{\\mu}\n",
-    "\\newcommand{\\meanTwoMatrix}{\\mathbf{M}}\n",
-    "\\newcommand{\\meanTwoScalar}{m}\n",
-    "\\newcommand{\\meanTwoVector}{\\mathbf{ \\meanTwoScalar}}\n",
-    "\\newcommand{\\meanVector}{\\boldsymbol{ \\meanScalar}}\n",
-    "\\newcommand{\\mrnaConcentration}{m}\n",
-    "\\newcommand{\\naturalFrequency}{\\omega}\n",
-    "\\newcommand{\\neighborhood}[1]{\\mathcal{N}\\left( #1 \\right)}\n",
-    "\\newcommand{\\neilurl}{http://inverseprobability.com/}\n",
-    "\\newcommand{\\noiseMatrix}{\\boldsymbol{ E}}\n",
-    "\\newcommand{\\noiseScalar}{\\epsilon}\n",
-    "\\newcommand{\\noiseVector}{\\boldsymbol{ \\epsilon}}\n",
-    "\\newcommand{\\norm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\normalizedLaplacianMatrix}{\\hat{\\mathbf{L}}}\n",
-    "\\newcommand{\\normalizedLaplacianScalar}{\\hat{\\ell}}\n",
-    "\\newcommand{\\normalizedLaplacianVector}{\\hat{\\mathbf{ \\ell}}}\n",
-    "\\newcommand{\\numActive}{m}\n",
-    "\\newcommand{\\numBasisFunc}{m}\n",
-    "\\newcommand{\\numComponents}{m}\n",
-    "\\newcommand{\\numComps}{K}\n",
-    "\\newcommand{\\numData}{n}\n",
-    "\\newcommand{\\numFeatures}{K}\n",
-    "\\newcommand{\\numHidden}{h}\n",
-    "\\newcommand{\\numInducing}{m}\n",
-    "\\newcommand{\\numLayers}{\\ell}\n",
-    "\\newcommand{\\numNeighbors}{K}\n",
-    "\\newcommand{\\numSequences}{s}\n",
-    "\\newcommand{\\numSuccess}{s}\n",
-    "\\newcommand{\\numTasks}{m}\n",
-    "\\newcommand{\\numTime}{T}\n",
-    "\\newcommand{\\numTrials}{S}\n",
-    "\\newcommand{\\outputIndex}{j}\n",
-    "\\newcommand{\\paramVector}{\\boldsymbol{ \\theta}}\n",
-    "\\newcommand{\\parameterMatrix}{\\boldsymbol{ \\Theta}}\n",
-    "\\newcommand{\\parameterScalar}{\\theta}\n",
-    "\\newcommand{\\parameterVector}{\\boldsymbol{ \\parameterScalar}}\n",
-    "\\newcommand{\\partDiff}[2]{\\frac{\\partial#1}{\\partial#2}}\n",
-    "\\newcommand{\\precisionScalar}{j}\n",
-    "\\newcommand{\\precisionVector}{\\mathbf{ \\precisionScalar}}\n",
-    "\\newcommand{\\precisionMatrix}{\\mathbf{J}}\n",
-    "\\newcommand{\\pseudotargetScalar}{\\widetilde{y}}\n",
-    "\\newcommand{\\pseudotargetVector}{\\mathbf{ \\pseudotargetScalar}}\n",
-    "\\newcommand{\\pseudotargetMatrix}{\\mathbf{ \\widetilde{Y}}}\n",
-    "\\newcommand{\\rank}[1]{\\text{rank}\\left(#1\\right)}\n",
-    "\\newcommand{\\rayleighDist}[2]{\\mathcal{R}\\left(#1|#2\\right)}\n",
-    "\\newcommand{\\rayleighSamp}[1]{\\mathcal{R}\\left(#1\\right)}\n",
-    "\\newcommand{\\responsibility}{r}\n",
-    "\\newcommand{\\rotationScalar}{r}\n",
-    "\\newcommand{\\rotationVector}{\\mathbf{ \\rotationScalar}}\n",
-    "\\newcommand{\\rotationMatrix}{\\mathbf{R}}\n",
-    "\\newcommand{\\sampleCovScalar}{s}\n",
-    "\\newcommand{\\sampleCovVector}{\\mathbf{ \\sampleCovScalar}}\n",
-    "\\newcommand{\\sampleCovMatrix}{\\mathbf{s}}\n",
-    "\\newcommand{\\scalarProduct}[2]{\\left\\langle{#1},{#2}\\right\\rangle}\n",
-    "\\newcommand{\\sign}[1]{\\text{sign}\\left(#1\\right)}\n",
-    "\\newcommand{\\sigmoid}[1]{\\sigma\\left(#1\\right)}\n",
-    "\\newcommand{\\singularvalue}{\\ell}\n",
-    "\\newcommand{\\singularvalueMatrix}{\\mathbf{L}}\n",
-    "\\newcommand{\\singularvalueVector}{\\mathbf{l}}\n",
-    "\\newcommand{\\sorth}{\\mathbf{u}}\n",
-    "\\newcommand{\\spar}{\\lambda}\n",
-    "\\newcommand{\\trace}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\BasalRate}{B}\n",
-    "\\newcommand{\\DampingCoefficient}{C}\n",
-    "\\newcommand{\\DecayRate}{D}\n",
-    "\\newcommand{\\Displacement}{X}\n",
-    "\\newcommand{\\LatentForce}{F}\n",
-    "\\newcommand{\\Mass}{M}\n",
-    "\\newcommand{\\Sensitivity}{S}\n",
-    "\\newcommand{\\basalRate}{b}\n",
-    "\\newcommand{\\dampingCoefficient}{c}\n",
-    "\\newcommand{\\mass}{m}\n",
-    "\\newcommand{\\sensitivity}{s}\n",
-    "\\newcommand{\\springScalar}{\\kappa}\n",
-    "\\newcommand{\\springVector}{\\boldsymbol{ \\kappa}}\n",
-    "\\newcommand{\\springMatrix}{\\boldsymbol{ \\mathcal{K}}}\n",
-    "\\newcommand{\\tfConcentration}{p}\n",
-    "\\newcommand{\\tfDecayRate}{\\delta}\n",
-    "\\newcommand{\\tfMrnaConcentration}{f}\n",
-    "\\newcommand{\\tfVector}{\\mathbf{ \\tfConcentration}}\n",
-    "\\newcommand{\\velocity}{v}\n",
-    "\\newcommand{\\sufficientStatsScalar}{g}\n",
-    "\\newcommand{\\sufficientStatsVector}{\\mathbf{ \\sufficientStatsScalar}}\n",
-    "\\newcommand{\\sufficientStatsMatrix}{\\mathbf{G}}\n",
-    "\\newcommand{\\switchScalar}{s}\n",
-    "\\newcommand{\\switchVector}{\\mathbf{ \\switchScalar}}\n",
-    "\\newcommand{\\switchMatrix}{\\mathbf{S}}\n",
-    "\\newcommand{\\tr}[1]{\\text{tr}\\left(#1\\right)}\n",
-    "\\newcommand{\\loneNorm}[1]{\\left\\Vert #1 \\right\\Vert_1}\n",
-    "\\newcommand{\\ltwoNorm}[1]{\\left\\Vert #1 \\right\\Vert_2}\n",
-    "\\newcommand{\\onenorm}[1]{\\left\\vert#1\\right\\vert_1}\n",
-    "\\newcommand{\\twonorm}[1]{\\left\\Vert #1 \\right\\Vert}\n",
-    "\\newcommand{\\vScalar}{v}\n",
-    "\\newcommand{\\vVector}{\\mathbf{v}}\n",
-    "\\newcommand{\\vMatrix}{\\mathbf{V}}\n",
-    "\\newcommand{\\varianceDist}[2]{\\text{var}_{#2}\\left( #1 \\right)}\n",
-    "\\newcommand{\\vecb}[1]{\\left(#1\\right):}\n",
-    "\\newcommand{\\weightScalar}{w}\n",
-    "\\newcommand{\\weightVector}{\\mathbf{ \\weightScalar}}\n",
-    "\\newcommand{\\weightMatrix}{\\mathbf{W}}\n",
-    "\\newcommand{\\weightedAdjacencyMatrix}{\\mathbf{A}}\n",
-    "\\newcommand{\\weightedAdjacencyScalar}{a}\n",
-    "\\newcommand{\\weightedAdjacencyVector}{\\mathbf{ \\weightedAdjacencyScalar}}\n",
-    "\\newcommand{\\onesVector}{\\mathbf{1}}\n",
-    "\\newcommand{\\zerosVector}{\\mathbf{0}}\n",
     "$$"
-   ]
+   ],
+   "id": "c4b84963-87a9-4fe4-b0c4-4135141c9e8b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "::: {.cell .markdown}\n",
+    "\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!-- Do not edit this file locally. -->\n",
     "<!---->\n",
@@ -339,17 +50,20 @@
     "<!--\n",
     "\n",
     "-->"
-   ]
+   ],
+   "id": "66921a82-61bd-4235-92cf-0341f200bfab"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Setup\n",
-    "-----\n",
+    "## Setup\n",
     "\n",
-    "First we download some libraries and files to support the notebook."
-   ]
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_notebooks/includes/notebook-setup.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "0d90c63c-a8bc-437f-81e9-d2f2fbc31cd6"
   },
   {
    "cell_type": "code",
@@ -357,17 +71,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import urllib.request"
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "plt.rcParams.update({'font.size': 22})"
+   ],
+   "id": "d941bf3c-5c1d-4631-a09e-89e1cffb7a0a"
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py','mlai.py')"
-   ]
+    "<!--setupplotcode{import seaborn as sns\n",
+    "sns.set_style('darkgrid')\n",
+    "sns.set_context('paper')\n",
+    "sns.set_palette('colorblind')}-->"
+   ],
+   "id": "a32c1689-a047-4c86-8a57-b603ca208ad1"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## notutils\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/notutils-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "This small package is a helper package for various notebook utilities\n",
+    "used below.\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "b627017c-f2fd-4899-8d74-2cd632681d65"
   },
   {
    "cell_type": "code",
@@ -375,8 +110,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/teaching_plots.py','teaching_plots.py')"
-   ]
+    "%pip install notutils"
+   ],
+   "id": "cfc320f7-d489-4a2e-ad38-056a0575c7b2"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub:\n",
+    "<https://github.com/lawrennd/notutils>\n",
+    "\n",
+    "Once `notutils` is installed, it can be imported in the usual manner."
+   ],
+   "id": "cd94f4d4-9ecd-4e9a-91fb-9f45668a93a9"
   },
   {
    "cell_type": "code",
@@ -384,15 +133,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/gp_tutorial.py','gp_tutorial.py')"
-   ]
+    "import notutils"
+   ],
+   "id": "7ce0f3c6-58ad-4f44-ae04-f81025adeff8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "pods\n",
-    "----\n",
+    "## pods\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/pods-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "In Sheffield we created a suite of software tools for ‘Open Data\n",
     "Science’. Open data science is an approach to sharing code, models and\n",
@@ -403,7 +156,8 @@
     "Science](http://inverseprobability.com/2014/07/01/open-data-science).\n",
     "\n",
     "The software can be installed using"
-   ]
+   ],
+   "id": "8b5c9e7f-1f21-4de5-aff7-3bb57ddc5e8e"
   },
   {
    "cell_type": "code",
@@ -411,8 +165,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install --upgrade git+https://github.com/sods/ods"
-   ]
+    "%pip install pods"
+   ],
+   "id": "0bb5fed6-2b6a-42b3-9875-f53981a40039"
   },
   {
    "cell_type": "markdown",
@@ -420,11 +175,11 @@
    "source": [
     "from the command prompt where you can access your python installation.\n",
     "\n",
-    "The code is also available on github:\n",
-    "<a href=\"https://github.com/sods/ods\" class=\"uri\">https://github.com/sods/ods</a>\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/ods>\n",
     "\n",
     "Once `pods` is installed, it can be imported in the usual manner."
-   ]
+   ],
+   "id": "8b090fc1-d1f5-4fb3-9934-2e009962a254"
   },
   {
    "cell_type": "code",
@@ -433,46 +188,105 @@
    "outputs": [],
    "source": [
     "import pods"
-   ]
+   ],
+   "id": "f6df8cdb-b094-43f8-b4a2-de4ddf3f970d"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlai\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/mlai-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "The `mlai` software is a suite of helper functions for teaching and\n",
+    "demonstrating machine learning algorithms. It was first used in the\n",
+    "Machine Learning and Adaptive Intelligence course in Sheffield in 2013.\n",
+    "\n",
+    "The software can be installed using"
+   ],
+   "id": "9c065d71-d568-4053-bfeb-da0441e4ec02"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install mlai"
+   ],
+   "id": "b5a1945c-9b25-489b-bae5-6f487eac5dbb"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "from the command prompt where you can access your python installation.\n",
+    "\n",
+    "The code is also available on GitHub: <https://github.com/lawrennd/mlai>\n",
+    "\n",
+    "Once `mlai` is installed, it can be imported in the usual manner."
+   ],
+   "id": "7e05a0e8-381a-4086-a8da-af9267c371dd"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "8dad9c59-e3c8-461d-ae7f-44879fd5561e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/rasmussen-williams-book.jpg\" style=\"width:50%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/rasmussen-williams-book.jpg\" style=\"width:50%\">\n",
     "\n",
     "Figure: <i>A key reference for Gaussian process models remains the\n",
     "excellent book “Gaussian Processes for Machine Learning” (Rasmussen and\n",
     "Williams (2006)). The book is also\n",
-    "<a href=\"http://www.gaussianprocess.org/gpml/\" target=\"_blank\" >freely\n",
+    "<a href=\"http://www.gaussianprocess.org/gpml/\" target=\"_blank\">freely\n",
     "available online</a>.</i>\n",
     "\n",
     "Rasmussen and Williams (2006) is still one of the most important\n",
     "references on Gaussian process models. It is [available freely\n",
     "online](http://www.gaussianprocess.org/gpml/)."
-   ]
+   ],
+   "id": "90c84595-b4be-44eb-82aa-b3262b413049"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A First Course in Machine Learning\n",
-    "----------------------------------\n",
+    "## A First Course in Machine Learning\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/mlai/a-first-course-in-machine-learning.jpg\" style=\"width:40%\">\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/first-course-book.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/first-course-book.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//mlai/a-first-course-in-machine-learning.jpg\" style=\"width:40%\">\n",
     "\n",
     "Figure: <i>The main course text is “A First Course in Machine Learning”\n",
     "by Rogers and Girolami (2011).</i>\n",
     "\n",
     "<!--include{_gp/includes/what-is-a-gp.md}-->"
-   ]
+   ],
+   "id": "afecd9d9-c61d-4395-a152-17dd5e0479a1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Example: Prediction of Malaria Incidence in Uganda\n",
-    "--------------------------------------------------\n",
+    "## Example: Prediction of Malaria Incidence in Uganda\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_health/includes/malaria-gp.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
     "\n",
@@ -492,7 +306,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/martin-mubangizi.png\" clip-path=\"url(#clip0)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png\" clip-path=\"url(#clip0)\"/>\n",
     "\n",
     "</svg>\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
@@ -509,11 +323,11 @@
     "\n",
     "<title>\n",
     "\n",
-    "Ricardo Andrade Pacheco\n",
+    "Ricardo Andrade Pacecho\n",
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/ricardo-andrade-pacheco.png\" clip-path=\"url(#clip1)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png\" clip-path=\"url(#clip1)\"/>\n",
     "\n",
     "</svg>\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
@@ -534,7 +348,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/john-quinn.jpg\" clip-path=\"url(#clip2)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg\" clip-path=\"url(#clip2)\"/>\n",
     "\n",
     "</svg>\n",
     "\n",
@@ -549,7 +363,12 @@
     "collaboration with John Quinn and Martin Mubangizi (Andrade-Pacheco et\n",
     "al., 2014; Mubangizi et al., 2014). John and Martin were initally from\n",
     "the AI-DEV group from the University of Makerere in Kampala and more\n",
-    "latterly they were based at UN Global Pulse in Kampala.\n",
+    "latterly they were based at UN Global Pulse in Kampala. You can see the\n",
+    "work summarized on the UN Global Pulse [disease outbreaks project site\n",
+    "here](https://diseaseoutbreaks.unglobalpulse.net/uganda/).\n",
+    "\n",
+    "-   See [UN Global Pulse Disease Outbreaks\n",
+    "    Site](https://diseaseoutbreaks.unglobalpulse.net/uganda/)\n",
     "\n",
     "Malaria data is spatial data. Uganda is split into districts, and health\n",
     "reports can be found for each district. This suggests that models such\n",
@@ -559,20 +378,19 @@
     "location within a district, such as Nagongera which is a sentinel site\n",
     "based in the Tororo district.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/uganda-districts-2006.png\" style=\"width:50%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png\" style=\"width:50%\">\n",
     "\n",
-    "Figure: <i>Ugandan districs. Data SRTM/NASA from\n",
-    "<a href=\"https://dds.cr.usgs.gov/srtm/version2_1\" class=\"uri\">https://dds.cr.usgs.gov/srtm/version2_1</a>.</i>\n",
+    "Figure: <i>Ugandan districts. Data SRTM/NASA from\n",
+    "<https://dds.cr.usgs.gov/srtm/version2_1>.</i>\n",
     "\n",
-    "<span style=\"text-align:right\">(Andrade-Pacheco et al., 2014; Mubangizi\n",
-    "et al., 2014)</span>\n",
+    "(Andrade-Pacheco et al., 2014; Mubangizi et al., 2014)\n",
     "\n",
     "The common standard for collecting health data on the African continent\n",
     "is from the Health management information systems (HMIS). However, this\n",
     "data suffers from missing values (Gething et al., 2006) and diagnosis of\n",
     "diseases like typhoid and malaria may be confounded.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Tororo_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Tororo district, where the sentinel site, Nagongera, is\n",
     "located.</i>\n",
@@ -584,7 +402,7 @@
     "sites give accurate assessment of malaria disease levels in Uganda,\n",
     "including a site in Nagongera.\n",
     "\n",
-    "<img class=\"negate\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/sentinel_nagongera.png\" style=\"width:100%\">\n",
+    "<img class=\"negate\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>Sentinel and HMIS data along with rainfall and temperature\n",
     "for the Nagongera sentinel station in the Tororo district.</i>\n",
@@ -599,33 +417,33 @@
     "and temperature, to improve predictions from HMIS data of levels of\n",
     "malaria.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Mubende_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Mubende District.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/mubende.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>Prediction of malaria incidence in Mubende.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>The project arose out of the Gaussian process summer school\n",
     "held at Makerere in Kampala in 2013. The school led, in turn, to the\n",
     "Data Science Africa initiative.</i>"
-   ]
+   ],
+   "id": "e2a3abf4-e15c-49b6-8e75-ba59ee7b320e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Early Warning Systems\n",
-    "---------------------\n",
+    "## Early Warning Systems\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/Kabarole_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg\" class=\"\" width=\"50%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The Kabarole district in Uganda.</i>\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/kabarole.gif\" style=\"width:100%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>Estimate of the current disease situation in the Kabarole\n",
     "district over time. Estimate is constructed with a Gaussian process with\n",
@@ -653,7 +471,7 @@
     "Finally, there is a gray region which represents when the scale of the\n",
     "effect is small.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/health/monitor.gif\" style=\"width:50%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif\" style=\"width:50%\">\n",
     "\n",
     "Figure: <i>The map of Ugandan districts with an overview of the Malaria\n",
     "situation in each district.</i>\n",
@@ -661,14 +479,18 @@
     "These colors can now be observed directly on a spatial map of the\n",
     "districts to give an immediate impression of the current status of the\n",
     "disease across the country."
-   ]
+   ],
+   "id": "1dac77bd-8a7e-4b01-ae39-c5d1ed0f9200"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "What is Machine Learning?\n",
-    "=========================\n",
+    "# What is Machine Learning?\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/what-is-ml.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "What is machine learning? At its most basic level machine learning is a\n",
     "combination of\n",
@@ -689,22 +511,22 @@
     "In practice we normally perform machine learning using two functions. To\n",
     "combine data with a model we typically make use of:\n",
     "\n",
-    "**a prediction function** a function which is used to make the\n",
-    "predictions. It includes our beliefs about the regularities of the\n",
-    "universe, our assumptions about how the world works, e.g. smoothness,\n",
-    "spatial similarities, temporal similarities.\n",
+    "**a prediction function** it is used to make the predictions. It\n",
+    "includes our beliefs about the regularities of the universe, our\n",
+    "assumptions about how the world works, e.g., smoothness, spatial\n",
+    "similarities, temporal similarities.\n",
     "\n",
-    "**an objective function** a function which defines the cost of\n",
-    "misprediction. Typically it includes knowledge about the world’s\n",
-    "generating processes (probabilistic objectives) or the costs we pay for\n",
-    "mispredictions (empiricial risk minimization).\n",
+    "**an objective function** it defines the ‘cost’ of misprediction.\n",
+    "Typically, it includes knowledge about the world’s generating processes\n",
+    "(probabilistic objectives) or the costs we pay for mispredictions\n",
+    "(empirical risk minimization).\n",
     "\n",
     "The combination of data and model through the prediction function and\n",
     "the objective function leads to a *learning algorithm*. The class of\n",
     "prediction functions and objective functions we can make use of is\n",
     "restricted by the algorithms they lead to. If the prediction function or\n",
     "the objective function are too complex, then it can be difficult to find\n",
-    "an appropriate learning algorithm. Much of the acdemic field of machine\n",
+    "an appropriate learning algorithm. Much of the academic field of machine\n",
     "learning is the quest for new learning algorithms that allow us to bring\n",
     "different types of models and data together.\n",
     "\n",
@@ -714,15 +536,19 @@
     "Example](https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf).\n",
     "\n",
     "You can also check my post blog post on [What is Machine\n",
-    "Learning?](http://inverseprobability.com/2017/07/17/what-is-machine-learning).."
-   ]
+    "Learning?](http://inverseprobability.com/2017/07/17/what-is-machine-learning)."
+   ],
+   "id": "0d1bf8f3-8054-4bc3-906f-2d74e2cf95e5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Overdetermined System\n",
-    "---------------------\n",
+    "## Overdetermined System\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-system.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-system.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The challenge with a linear model is that it has two unknowns, $m$, and\n",
     "$c$. Observing data allows us to write down a system of simultaneous\n",
@@ -731,30 +557,35 @@
     "a second data point, $x= 3$, $y=1$, then we can write two simultaneous\n",
     "linear equations of the form.\n",
     "\n",
-    "point 1: $x= 1$, $y=3$ $$3 = m + c$$ point 2: $x= 3$, $y=1$\n",
-    "$$1 = 3m + c$$\n",
+    "point 1: $x= 1$, $y=3$ $$\n",
+    "3 = m + c\n",
+    "$$ point 2: $x= 3$, $y=1$ $$\n",
+    "1 = 3m + c\n",
+    "$$\n",
     "\n",
     "The solution to these two simultaneous equations can be represented\n",
     "graphically as\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/over_determined_system003.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system003.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The solution of two linear equations represented as the fit\n",
     "of a straight line through two data</i>\n",
     "\n",
-    "The challenge comes when a third data point is observed and it doesn’t\n",
-    "naturally fit on the straight line.\n",
+    "The challenge comes when a third data point is observed, and it doesn’t\n",
+    "fit on the straight line.\n",
     "\n",
-    "point 3: $x= 2$, $y=2.5$ $$2.5 = 2m + c$$\n",
+    "point 3: $x= 2$, $y=2.5$ $$\n",
+    "2.5 = 2m + c\n",
+    "$$\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/over_determined_system004.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system004.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A third observation of data is inconsistent with the solution\n",
     "dictated by the first two observations</i>\n",
     "\n",
     "Now there are three candidate lines, each consistent with our data.\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/over_determined_system007.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system007.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Three solutions to the problem, each consistent with two\n",
     "points of the three observations</i>\n",
@@ -763,7 +594,8 @@
     "than we need to determine our parameters. The problem arises because the\n",
     "model is a simplification of the real world, and the data we observe is\n",
     "therefore inconsistent with our model."
-   ]
+   ],
+   "id": "a5a068bd-65f7-4f04-9140-baae13da1ebc"
   },
   {
    "cell_type": "code",
@@ -771,8 +603,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "5c6d2f26-2d50-4e5f-aee9-6cd216d75e6c"
   },
   {
    "cell_type": "code",
@@ -781,7 +614,8 @@
    "outputs": [],
    "source": [
     "plot.over_determined_system(diagrams='./ml')"
-   ]
+   ],
+   "id": "b29e09c9-23a9-4da9-9e40-0756c0380e92"
   },
   {
    "cell_type": "code",
@@ -790,8 +624,9 @@
    "outputs": [],
    "source": [
     "from ipywidgets import IntSlider\n",
-    "import pods"
-   ]
+    "import notutils as nu"
+   ],
+   "id": "ad30c66e-a39b-41fc-9702-e1a628f4dad7"
   },
   {
    "cell_type": "code",
@@ -799,20 +634,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('over_determined_system{samp:0>3}.svg',\n",
-    "                            directory='./ml', \n",
-    "                            samp=IntSlider(1,1,7,1))"
-   ]
+    "nu.display_plots('over_determined_system{samp:0>3}.svg',\n",
+    "                  directory='./ml', \n",
+    "                  samp=IntSlider(1,1,7,1))"
+   ],
+   "id": "ced7e252-facd-4dac-bdfd-cad1eaadb15a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Pierre-Simon Laplace\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-laplace-intro.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/overdetermined-laplace-intro.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
     "The solution was proposed by Pierre-Simon Laplace. His idea was to\n",
     "accept that the model was an incomplete representation of the real\n",
-    "world, and the manner in which it was incomplete is *unknown*. His idea\n",
-    "was that such unknowns could be dealt with through probability."
-   ]
+    "world, and the way it was incomplete is *unknown*. His idea was that\n",
+    "such unknowns could be dealt with through probability."
+   ],
+   "id": "936ea41f-2e24-4609-bfe7-a957e647fecb"
   },
   {
    "cell_type": "markdown",
@@ -820,10 +663,15 @@
    "source": [
     "### Pierre-Simon Laplace\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/Pierre-Simon_Laplace.png\" style=\"width:30%\">\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplace-portrait.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplace-portrait.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/Pierre-Simon_Laplace.png\" style=\"width:30%\">\n",
     "\n",
     "Figure: <i>Pierre-Simon Laplace 1749-1827.</i>"
-   ]
+   ],
+   "id": "f3fc089c-e321-4791-a5dd-b18f9ff40df1"
   },
   {
    "cell_type": "code",
@@ -831,9 +679,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
-    "pods.notebook.display_google_book(id='1YQPAAAAQAAJ', page='PR17-IA2')"
-   ]
+    "import notutils as nu\n",
+    "nu.display_google_book(id='1YQPAAAAQAAJ', page='PR17-IA2')"
+   ],
+   "id": "851cd53b-1272-4566-a4e0-f13c5ba2d6f4"
   },
   {
    "cell_type": "markdown",
@@ -856,9 +705,21 @@
     "\n",
     "This notion is known as *Laplace’s demon* or *Laplace’s superman*.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/physics/laplacesDeterminismEnglish.png\" style=\"width:60%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//physics/laplacesDeterminismEnglish.png\" style=\"width:60%\">\n",
     "\n",
-    "Figure: <i>Laplace’s determinsim in English translation.</i>\n",
+    "Figure: <i>Laplace’s determinsim in English translation.</i>"
+   ],
+   "id": "7dd072ba-fb77-428b-ab62-eac1cdcf954c"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Laplace’s Gremlin\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplaces-determinism.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_physics/includes/laplaces-determinism.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Unfortunately, most analyses of his ideas stop at that point, whereas\n",
     "his real point is that such a notion is unreachable. Not so much\n",
@@ -871,7 +732,8 @@
     ">\n",
     "> Probability is relative, in part to this ignorance, in part to our\n",
     "> knowledge."
-   ]
+   ],
+   "id": "d5047db3-0cfe-445a-88e6-f3bcd44dc27c"
   },
   {
    "cell_type": "code",
@@ -879,15 +741,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
-    "pods.notebook.display_google_book(id='1YQPAAAAQAAJ', page='PR17-IA4')"
-   ]
+    "import notutils as nu\n",
+    "nu.display_google_book(id='1YQPAAAAQAAJ', page='PR17-IA4')"
+   ],
+   "id": "e686be3b-c6b3-4728-b0aa-d9a02bc201e7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/physics/philosophicaless00lapliala.png\" style=\"width:60%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.png\" style=\"width:60%\">\n",
     "\n",
     "Figure: <i>To Laplace, determinism is a strawman. Ignorance of mechanism\n",
     "and data leads to uncertainty which should be dealt with through\n",
@@ -897,14 +760,28 @@
     "Universe due to our ignorance about the world, Laplace’s suggestion, and\n",
     "focus in this essay is that we turn to probability to deal with this\n",
     "uncertainty. This is also our inspiration for using probability in\n",
-    "machine learning.\n",
+    "machine learning. This is the true message of Laplace’s essay, not\n",
+    "determinism, but the gremlin of uncertainty that emerges from our\n",
+    "ignorance.\n",
     "\n",
     "The “forces by which nature is animated” is our *model*, the “situation\n",
     "of beings that compose it” is our *data* and the “intelligence\n",
     "sufficiently vast enough to submit these data to analysis” is our\n",
     "compute. The fly in the ointment is our *ignorance* about these aspects.\n",
     "And *probability* is the tool we use to incorporate this ignorance\n",
-    "leading to uncertainty or *doubt* in our predictions.\n",
+    "leading to uncertainty or *doubt* in our predictions."
+   ],
+   "id": "14739d2b-c8ad-48ed-8109-f15616ac72a1"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Latent Variables\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/laplace-latent-variable-solution.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/laplace-latent-variable-solution.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Laplace’s concept was that the reason that the data doesn’t match up to\n",
     "the model is because of unconsidered factors, and that these might be\n",
@@ -914,30 +791,25 @@
     "But in the context Laplace uses it, the variable is so common that it\n",
     "has other names such as a “slack” variable or the *noise* in the system.\n",
     "\n",
-    "point 1: $x= 1$, $y=3$ $$\n",
-    "3 = m + c + \\epsilon_1\n",
-    "$$ point 2: $x= 3$, $y=1$ $$\n",
-    "1 = 3m + c + \\epsilon_2\n",
-    "$$ point 3: $x= 2$, $y=2.5$ $$\n",
-    "2.5 = 2m + c + \\epsilon_3\n",
-    "$$\n",
+    "point 1: $x= 1$, $y=3$ \\[ 3 = m + c + \\_1 \\] point 2: $x= 3$, $y=1$ \\[ 1\n",
+    "= 3m + c + \\_2 \\] point 3: $x= 2$, $y=2.5$ \\[ 2.5 = 2m + c + \\_3 \\]\n",
     "\n",
     "Laplace’s trick has converted the *overdetermined* system into an\n",
     "*underdetermined* system. He has now added three variables,\n",
     "$\\{\\epsilon_i\\}_{i=1}^3$, which represent the unknown corruptions of the\n",
     "real world. Laplace’s idea is that we should represent that unknown\n",
     "corruption with a *probability distribution*."
-   ]
+   ],
+   "id": "e67221b2-a772-4ad2-8767-4fa8182e3d3c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A Probabilistic Process\n",
-    "-----------------------\n",
+    "## A Probabilistic Process\n",
     "\n",
-    "However, it was left to an admirer of Gauss to develop a practical\n",
-    "probability density for that purpose. It was Carl Friederich Gauss who\n",
+    "However, it was left to an admirer of Laplace to develop a practical\n",
+    "probability density for that purpose. It was Carl Friedrich Gauss who\n",
     "suggested that the *Gaussian* density (which at the time was unnamed!)\n",
     "should be used to represent this error.\n",
     "\n",
@@ -945,25 +817,29 @@
     "part, and a stochastic part. This type of function is sometimes known as\n",
     "a probabilistic or stochastic process, to distinguish it from a\n",
     "deterministic process."
-   ]
+   ],
+   "id": "b22425a5-4c8f-4a78-a31e-633627a4f081"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Two Important Gaussian Properties\n",
-    "---------------------------------\n",
+    "## Two Important Gaussian Properties\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/univariate-gaussian-properties.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/univariate-gaussian-properties.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The Gaussian density has many important properties, but for the moment\n",
     "we’ll review two of them."
-   ]
+   ],
+   "id": "28900d08-c4e0-455c-87be-5a5fad7ce59a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Sum of Gaussians\n",
-    "----------------\n",
+    "## Sum of Gaussians\n",
     "\n",
     "If we assume that a variable, $y_i$, is sampled from a Gaussian density,\n",
     "\n",
@@ -986,14 +862,14 @@
     "summed together tend to a Gaussian density. That is the [*central limit\n",
     "theorem*](https://en.wikipedia.org/wiki/Central_limit_theorem) which is\n",
     "a major justification for the use of a Gaussian density."
-   ]
+   ],
+   "id": "fba05094-086c-4342-8c9b-1441779ad631"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Scaling a Gaussian\n",
-    "------------------\n",
+    "## Scaling a Gaussian\n",
     "\n",
     "Less unusual is the *scaling* property of a Gaussian density. If a\n",
     "variable, $y$, is sampled from a Gaussian density,\n",
@@ -1040,14 +916,18 @@
     "Principal Component Analysis (Tipping and Bishop, 1999), because we\n",
     "integrated out the inputs (or *latent* variables they would be called in\n",
     "that case)."
-   ]
+   ],
+   "id": "3e1649e1-f85d-4d2a-964d-f3851a9306e6"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Laplace’s Idea\n",
-    "--------------\n",
+    "## Laplace’s Idea\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-log-likelihood.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-log-likelihood.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Laplace had the idea to augment the observations by noise, that is\n",
     "equivalent to considering a probability density whose mean is given by\n",
@@ -1058,18 +938,17 @@
     "corrupted by noise. Laplace didn’t suggest the Gaussian density for that\n",
     "purpose, that was an innovation from Carl Friederich Gauss, which is\n",
     "what gives the Gaussian density its name."
-   ]
+   ],
+   "id": "6dfce428-858c-4b51-b81e-7a2721c05331"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Height as a Function of Weight\n",
-    "------------------------------\n",
-    "\n",
-    "In the standard Gaussian, parametized by mean and variance.\n",
+    "## Height as a Function of Weight\n",
     "\n",
-    "Make the mean a linear function of an *input*.\n",
+    "In the standard Gaussian, parameterized by mean and variance, make the\n",
+    "mean a linear function of an *input*.\n",
     "\n",
     "This leads to a regression model. $$\n",
     "\\begin{align*}\n",
@@ -1079,34 +958,44 @@
     "$$\n",
     "\n",
     "Assume $y_i$ is height and $x_i$ is weight."
-   ]
+   ],
+   "id": "4acbcf5f-e049-47ec-a937-ce2892a81eb7"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Linear Algebra\n",
-    "==============\n",
+    "## Olympic Marathon Data\n",
     "\n",
-    "Linear algebra provides a very similar role, when we introduce [linear\n",
-    "algebra](http://en.wikipedia.org/wiki/Linear_algebra), it is because we\n",
-    "are faced with a large number of addition and multiplication operations.\n",
-    "These operations need to be done together and would be very tedious to\n",
-    "write down as a group. So the first reason we reach for linear algebra\n",
-    "is for a more compact representation of our mathematical formulae."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Running Example: Olympic Marathons\n",
-    "----------------------------------\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/olympic-marathon-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "<table>\n",
+    "<tr>\n",
+    "<td width=\"70%\">\n",
+    "\n",
+    "-   Gold medal times for Olympic Marathon since 1896.\n",
+    "-   Marathons before 1924 didn’t have a standardized distance.\n",
+    "-   Present results using pace per km.\n",
+    "-   In 1904 Marathon was badly organized leading to very slow times.\n",
+    "\n",
+    "</td>\n",
+    "<td width=\"30%\">\n",
+    "\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//Stephen_Kiprotich.jpg\" style=\"width:100%\">\n",
+    "<small>Image from Wikimedia Commons <http://bit.ly/16kMKHQ></small>\n",
+    "\n",
+    "</td>\n",
+    "</tr>\n",
+    "</table>\n",
     "\n",
-    "Now we will load in the Olympic marathon data. This is data of the\n",
-    "olympic marath times for the men’s marathon from the first olympics in\n",
-    "1896 up until the London 2012 olympics."
-   ]
+    "The first thing we will do is load a standard data set for regression\n",
+    "modelling. The data consists of the pace of Olympic Gold Medal Marathon\n",
+    "winners for the Olympics from 1896 to present. Let’s load in the data\n",
+    "and plot."
+   ],
+   "id": "ec70b849-801d-4a4c-a164-a19719947900"
   },
   {
    "cell_type": "code",
@@ -1114,8 +1003,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import numpy as np\n",
     "import pods"
-   ]
+   ],
+   "id": "93012545-e599-4961-b557-10cf12802b7f"
   },
   {
    "cell_type": "code",
@@ -1125,15 +1016,25 @@
    "source": [
     "data = pods.datasets.olympic_marathon_men()\n",
     "x = data['X']\n",
-    "y = data['Y']"
-   ]
+    "y = data['Y']\n",
+    "\n",
+    "offset = y.mean()\n",
+    "scale = np.sqrt(y.var())\n",
+    "yhat = (y - offset)/scale"
+   ],
+   "id": "cdd8b1f7-9310-4115-8633-3ec002b290ff"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "You can see what these values are by typing:"
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "aee76fd2-17a4-40f0-a8f0-197cbe36f131"
   },
   {
    "cell_type": "code",
@@ -1141,62 +1042,65 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(x)\n",
-    "print(y)"
-   ]
+    "\n",
+    "xlim = (1875,2030)\n",
+    "ylim = (2.5, 6.5)\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "_ = ax.plot(x, y, 'r.',markersize=10)\n",
+    "ax.set_xlabel('year', fontsize=20)\n",
+    "ax.set_ylabel('pace min/km', fontsize=20)\n",
+    "ax.set_xlim(xlim)\n",
+    "ax.set_ylim(ylim)\n",
+    "\n",
+    "mlai.write_figure(filename='olympic-marathon.svg', \n",
+    "                  directory='./datasets')"
+   ],
+   "id": "afc902a7-19f5-4948-b7f2-9dd57b01886e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that they are not `pandas` data frames for this example, they are\n",
-    "just arrays of dimensionality $n\\times 1$, where $n$ is the number of\n",
-    "data.\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//datasets/olympic-marathon.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
-    "The aim of this lab is to have you coding linear regression in python.\n",
-    "We will do it in two ways, once using iterative updates (coordinate\n",
-    "ascent) and then using linear algebra. The linear algebra approach will\n",
-    "not only work much better, it is easy to extend to multiple input linear\n",
-    "regression and *non-linear* regression using basis functions."
-   ]
+    "Figure: <i>Olympic marathon pace times since 1896.</i>\n",
+    "\n",
+    "Things to notice about the data include the outlier in 1904, in that\n",
+    "year the Olympics was in St Louis, USA. Organizational problems and\n",
+    "challenges with dust kicked up by the cars following the race meant that\n",
+    "participants got lost, and only very few participants completed. More\n",
+    "recent years see more consistently quick marathons."
+   ],
+   "id": "5c5b5436-81ee-4dc0-924c-3eabb4b30547"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Plotting the Data\n",
-    "-----------------\n",
+    "## Running Example: Olympic Marathons\n",
     "\n",
-    "You can make a plot of $y$ vs $x$ with the following command:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%matplotlib inline \n",
-    "import matplotlib.pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(x, y, 'rx')\n",
-    "plt.xlabel('year')\n",
-    "plt.ylabel('pace in min/km')"
-   ]
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-linear-regression.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-linear-regression.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "Note that `x` and `y` are not `pandas` data frames for this example,\n",
+    "they are just arrays of dimensionality $n\\times 1$, where $n$ is the\n",
+    "number of data.\n",
+    "\n",
+    "The aim of this lab is to have you coding linear regression in python.\n",
+    "We will do it in two ways, once using iterative updates (coordinate\n",
+    "ascent) and then using linear algebra. The linear algebra approach will\n",
+    "not only work much better, it is also easy to extend to multiple input\n",
+    "linear regression and *non-linear* regression using basis functions."
+   ],
+   "id": "b122ff52-870a-4cba-a7a4-00939b61f733"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Maximum Likelihood: Iterative Solution\n",
-    "--------------------------------------\n",
+    "## Maximum Likelihood: Iterative Solution\n",
     "\n",
     "Now we will take the maximum likelihood approach we derived in the\n",
     "lecture to fit a line, $y_i=mx_i + c$, to the data you’ve plotted. We\n",
@@ -1204,7 +1108,8 @@
     "E(m, c) =  \\sum_{i=1}^n(y_i-mx_i-c)^2\n",
     "$$ with respect to $m$, $c$ and $\\sigma^2$. We can start with an initial\n",
     "guess for $m$,"
-   ]
+   ],
+   "id": "67cf9fe1-0716-46c1-b1b9-bfca3df3c24e"
   },
   {
    "cell_type": "code",
@@ -1214,7 +1119,8 @@
    "source": [
     "m = -0.4\n",
     "c = 80"
-   ]
+   ],
+   "id": "879a23d6-6ba1-4277-8a30-6686cd313191"
   },
   {
    "cell_type": "markdown",
@@ -1222,41 +1128,92 @@
    "source": [
     "Then we use the maximum likelihood update to find an estimate for the\n",
     "offset, $c$."
-   ]
+   ],
+   "id": "9b3a95b7-c8d4-42a6-bbc0-e2073288537e"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Log Likelihood for Multivariate Regression\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-multivariate-log-likelihood.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-multivariate-log-likelihood.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "33a88913-9eab-4e21-9f14-618be040f396"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quadratic Loss\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-direct-solution.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-direct-solution.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "Now we’ve identified the empirical risk with the loss, we’ll use\n",
+    "$E(\\mathbf{ w})$ to represent our objective function. $$\n",
+    "E(\\mathbf{ w}) = \\sum_{i=1}^n\\left(y_i - f(\\mathbf{ x}_i, \\mathbf{ w})\\right)^2\n",
+    "$$ gives us our objective.\n",
+    "\n",
+    "In the case of the linear prediction function, we can substitute\n",
+    "$f(\\mathbf{ x}_i, \\mathbf{ w}) = \\mathbf{ w}^\\top \\mathbf{ x}_i$. $$\n",
+    "E(\\mathbf{ w}) = \\sum_{i=1}^n\\left(y_i - \\mathbf{ w}^\\top \\mathbf{ x}_i\\right)^2\n",
+    "$$ To compute the gradient of the objective, we first expand the\n",
+    "brackets."
+   ],
+   "id": "83013905-8081-4a4b-9f61-e29d8683d532"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Log Likelihood for Multivariate Regression\n",
-    "------------------------------------------"
-   ]
+    "## Bracket Expansion\n",
+    "\n",
+    "$$\n",
+    "\\begin{align*}\n",
+    "  E(\\mathbf{ w},\\sigma^2)  = &\n",
+    "\\frac{n}{2}\\log \\sigma^2 + \\frac{1}{2\\sigma^2}\\sum\n",
+    "_{i=1}^{n}y_i^{2}-\\frac{1}{\\sigma^2}\\sum\n",
+    "_{i=1}^{n}y_i\\mathbf{ w}^{\\top}\\mathbf{ x}_i\\\\&+\\frac{1}{2\\sigma^2}\\sum\n",
+    "_{i=1}^{n}\\mathbf{ w}^{\\top}\\mathbf{ x}_i\\mathbf{ x}_i^{\\top}\\mathbf{ w}\n",
+    "+\\text{const}.\\\\\n",
+    "    = & \\frac{n}{2}\\log \\sigma^2 + \\frac{1}{2\\sigma^2}\\sum\n",
+    "_{i=1}^{n}y_i^{2}-\\frac{1}{\\sigma^2}\n",
+    "\\mathbf{ w}^\\top\\sum_{i=1}^{n}\\mathbf{ x}_iy_i\\\\&+\\frac{1}{2\\sigma^2}\n",
+    "\\mathbf{ w}^{\\top}\\left[\\sum\n",
+    "_{i=1}^{n}\\mathbf{ x}_i\\mathbf{ x}_i^{\\top}\\right]\\mathbf{ w}+\\text{const}.\n",
+    "\\end{align*}\n",
+    "$$"
+   ],
+   "id": "e6a267d8-6821-45d1-93d1-abb08c52bb63"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Multiple Input Solution with Linear Algebra\n",
-    "===========================================\n",
+    "# Solution with Linear Algebra\n",
     "\n",
-    "You’ve now seen how slow it can be to perform a coordinate ascent on a\n",
-    "system. Another approach to solving the system (which is not always\n",
-    "possible, particularly in *non-linear* systems) is to go direct to the\n",
-    "minimum. To do this we need to introduce *linear algebra*. We will\n",
-    "represent all our errors and functions in the form of linear algebra. As\n",
-    "we mentioned above, linear algebra is just a shorthand for performing\n",
-    "lots of multiplications and additions simultaneously. What does it have\n",
-    "to do with our system then? Well the first thing to note is that the\n",
-    "linear function we were trying to fit has the following form: $$\n",
+    "In this section we’re going compute the minimum of the quadratic loss\n",
+    "with respect to the parameters. When we do this, we’ll also review\n",
+    "*linear algebra*. We will represent all our errors and functions in the\n",
+    "form of matrices and vectors.\n",
+    "\n",
+    "Linear algebra is just a shorthand for performing lots of\n",
+    "multiplications and additions simultaneously. What does it have to do\n",
+    "with our system then? Well, the first thing to note is that the classic\n",
+    "linear function we fit for a one-dimensional regression has the form: $$\n",
     "f(x) = mx + c\n",
     "$$ the classical form for a straight line. From a linear algebraic\n",
-    "perspective we are looking for multiplications and additions. We are\n",
+    "perspective, we are looking for multiplications and additions. We are\n",
     "also looking to separate our parameters from our data. The data is the\n",
-    "*givens* remember, in French the word is données literally translated\n",
-    "means *givens* that’s great, because we don’t need to change the data,\n",
-    "what we need to change are the parameters (or variables) of the model.\n",
-    "In this function the data comes in through $x$, and the parameters are\n",
-    "$m$ and $c$.\n",
+    "*givens*. In French the word is données literally translated means\n",
+    "*givens* that’s great, because we don’t need to change the data, what we\n",
+    "need to change are the parameters (or variables) of the model. In this\n",
+    "function the data comes in through $x$, and the parameters are $m$ and\n",
+    "$c$.\n",
     "\n",
     "What we’d like to create is a vector of parameters and a vector of data.\n",
     "Then we could represent the system with vectors that represent the data,\n",
@@ -1264,11 +1221,11 @@
     "\n",
     "We look to turn the multiplications and additions into a linear\n",
     "algebraic form, we have one multiplication ($m\\times c$) and one\n",
-    "addition ($mx + c$). But we can turn this into a inner product by\n",
+    "addition ($mx + c$). But we can turn this into an inner product by\n",
     "writing it in the following way, $$\n",
     "f(x) = m \\times x +\n",
     "c \\times 1,\n",
-    "$$ in other words we’ve extracted the unit value, from the offset, $c$.\n",
+    "$$ in other words, we’ve extracted the unit value from the offset, $c$.\n",
     "We can think of this unit value like an extra item of data, because it\n",
     "is always given to us, and it is always set to 1 (unlike regular data,\n",
     "which is likely to vary!). We can therefore write each input data\n",
@@ -1279,11 +1236,12 @@
     "Now we choose to also turn our parameters into a vector. The parameter\n",
     "vector will be defined to contain $$\n",
     "\\mathbf{ w}= \\begin{bmatrix} c \\\\ m\\end{bmatrix}\n",
-    "$$ because if we now take the inner product between these to vectors we\n",
+    "$$ because if we now take the inner product between these two vectors we\n",
     "recover $$\n",
     "\\mathbf{ x}\\cdot\\mathbf{ w}= 1 \\times c + x \\times m = mx + c\n",
     "$$ In `numpy` we can define this vector as follows"
-   ]
+   ],
+   "id": "0327c2d2-94f8-42a1-a199-2ee183131cc8"
   },
   {
    "cell_type": "code",
@@ -1292,7 +1250,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "5e1673aa-692d-4e07-9c20-eae9c3b51889"
   },
   {
    "cell_type": "code",
@@ -1304,7 +1263,8 @@
     "w = np.zeros(shape=(2, 1))\n",
     "w[0] = m\n",
     "w[1] = c"
-   ]
+   ],
+   "id": "f1daadd6-419f-45a4-8bc1-207f348e7799"
   },
   {
    "cell_type": "markdown",
@@ -1313,10 +1273,10 @@
     "This gives us the equivalence between original operation and an\n",
     "operation in vector space. Whilst the notation here isn’t a lot shorter,\n",
     "the beauty is that we will be able to add as many features as we like\n",
-    "and still keep the seame representation. In general, we are now moving\n",
-    "to a system where each of our predictions is given by an inner product.\n",
-    "When we want to represent a linear product in linear algebra, we tend to\n",
-    "do it with the transpose operation, so since we have\n",
+    "and keep the same representation. In general, we are now moving to a\n",
+    "system where each of our predictions is given by an inner product. When\n",
+    "we want to represent a linear product in linear algebra, we tend to do\n",
+    "it with the transpose operation, so since we have\n",
     "$\\mathbf{a}\\cdot\\mathbf{b} = \\mathbf{a}^\\top\\mathbf{b}$ we can write $$\n",
     "f(\\mathbf{ x}_i) = \\mathbf{ x}_i^\\top\\mathbf{ w}.\n",
     "$$ Where we’ve assumed that each data point, $\\mathbf{ x}_i$, is now\n",
@@ -1326,19 +1286,19 @@
     "x_i\n",
     "\\end{bmatrix}\n",
     "$$"
-   ]
+   ],
+   "id": "61776281-a38f-4855-9597-028867ef57ae"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Design Matrix\n",
-    "=============\n",
+    "# Design Matrix\n",
     "\n",
     "We can do this for the entire data set to form a [*design\n",
-    "matrix*](http://en.wikipedia.org/wiki/Design_matrix) $\\mathbf{X}$,\n",
-    "\n",
-    "$$\\mathbf{X}\n",
+    "matrix*](http://en.wikipedia.org/wiki/Design_matrix)\n",
+    "$\\boldsymbol{ \\Phi}$, $$\n",
+    "\\boldsymbol{ \\Phi}\n",
     "= \\begin{bmatrix} \n",
     "\\mathbf{ x}_1^\\top \\\\\\ \n",
     "\\mathbf{ x}_2^\\top \\\\\\ \n",
@@ -1350,10 +1310,10 @@
     "\\vdots\n",
     "& \\vdots \\\\\\\n",
     "1 & x_n\n",
-    "\\end{bmatrix},$$\n",
-    "\n",
-    "which in `numpy` can be done with the following commands:"
-   ]
+    "\\end{bmatrix},\n",
+    "$$ which in `numpy` can be done with the following commands:"
+   ],
+   "id": "cbb0c592-7513-4e10-b42a-7106863f3d04"
   },
   {
    "cell_type": "code",
@@ -1362,7 +1322,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "da72c5ac-a526-4be8-8077-ff95961615ec"
   },
   {
    "cell_type": "code",
@@ -1370,23 +1331,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X = np.hstack((np.ones_like(x), x))\n",
-    "print(X)"
-   ]
+    "Phi = np.hstack((np.ones_like(x), x))\n",
+    "print(Phi)"
+   ],
+   "id": "9dd074c1-7a75-4bae-a159-100bc821aa69"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Writing the Objective with Linear Algebra\n",
-    "-----------------------------------------\n",
+    "## Writing the Objective with Linear Algebra\n",
     "\n",
     "When we think of the objective function, we can think of it as the\n",
     "errors where the error is defined in a similar way to what it was in\n",
     "Legendre’s day $y_i - f(\\mathbf{ x}_i)$, in statistics these errors are\n",
     "also sometimes called\n",
     "[*residuals*](http://en.wikipedia.org/wiki/Errors_and_residuals_in_statistics).\n",
-    "So we can think as the objective and the prediction function as two\n",
+    "So, we can think as the objective and the prediction function as two\n",
     "separate parts, first we have, $$\n",
     "E(\\mathbf{ w}) = \\sum_{i=1}^n(y_i - f(\\mathbf{ x}_i; \\mathbf{ w}))^2,\n",
     "$$ where we’ve made the function $f(\\cdot)$’s dependence on the\n",
@@ -1396,43 +1357,44 @@
     "$$ Let’s look again at these two equations and see if we can identify\n",
     "any inner products. The first equation is a sum of squares, which is\n",
     "promising. Any sum of squares can be represented by an inner product, $$\n",
-    "a = \\sum_{i=1}^{k} b^2_i = \\mathbf{b}^\\top\\mathbf{b},\n",
-    "$$ so if we wish to represent $E(\\mathbf{ w})$ in this way, all we need\n",
-    "to do is convert the sum operator to an inner product. We can get a\n",
-    "vector from that sum operator by placing both $y_i$ and\n",
+    "a = \\sum_{i=1}^{k} b^2_i = \\mathbf{b}^\\top\\mathbf{b}.\n",
+    "$$ If we wish to represent $E(\\mathbf{ w})$ in this way, all we need to\n",
+    "do is convert the sum operator to an inner product. We can get a vector\n",
+    "from that sum operator by placing both $y_i$ and\n",
     "$f(\\mathbf{ x}_i; \\mathbf{ w})$ into vectors, which we do by defining $$\n",
     "\\mathbf{ y}= \\begin{bmatrix}y_1\\\\ y_2\\\\ \\vdots \\\\ y_n\\end{bmatrix}\n",
     "$$ and defining $$\n",
     "\\mathbf{ f}(\\mathbf{ x}_1; \\mathbf{ w}) = \\begin{bmatrix}f(\\mathbf{ x}_1; \\mathbf{ w})\\\\ f(\\mathbf{ x}_2; \\mathbf{ w})\\\\ \\vdots \\\\ f(\\mathbf{ x}_n; \\mathbf{ w})\\end{bmatrix}.\n",
-    "$$ The second of these is actually a vector-valued function. This term\n",
-    "may appear intimidating, but the idea is straightforward. A vector\n",
-    "valued function is simply a vector whose elements are themselves defined\n",
-    "as *functions*, i.e. it is a vector of functions, rather than a vector\n",
-    "of scalars. The idea is so straightforward, that we are going to ignore\n",
-    "it for the moment, and barely use it in the derivation. But it will\n",
-    "reappear later when we introduce *basis functions*. So we will, for the\n",
-    "moment, ignore the dependence of $\\mathbf{ f}$ on $\\mathbf{ w}$ and\n",
-    "$\\mathbf{X}$ and simply summarise it by a vector of numbers $$\n",
+    "$$ The second of these is a vector-valued function. This term may appear\n",
+    "intimidating, but the idea is straightforward. A vector valued function\n",
+    "is simply a vector whose elements are themselves defined as *functions*,\n",
+    "i.e., it is a vector of functions, rather than a vector of scalars. The\n",
+    "idea is so straightforward, that we are going to ignore it for the\n",
+    "moment, and barely use it in the derivation. But it will reappear later\n",
+    "when we introduce *basis functions*. So, we will for the moment ignore\n",
+    "the dependence of $\\mathbf{ f}$ on $\\mathbf{ w}$ and\n",
+    "$\\boldsymbol{ \\Phi}$ and simply summarise it by a vector of numbers $$\n",
     "\\mathbf{ f}= \\begin{bmatrix}f_1\\\\f_2\\\\\n",
     "\\vdots \\\\ f_n\\end{bmatrix}.\n",
     "$$ This allows us to write our objective in the folowing, linear\n",
     "algebraic form, $$\n",
     "E(\\mathbf{ w}) = (\\mathbf{ y}- \\mathbf{ f})^\\top(\\mathbf{ y}- \\mathbf{ f})\n",
-    "$$ from the rules of inner products. But what of our matrix $\\mathbf{X}$\n",
-    "of input data? At this point, we need to dust off [*matrix-vector\n",
+    "$$ from the rules of inner products. But what of our matrix\n",
+    "$\\boldsymbol{ \\Phi}$ of input data? At this point, we need to dust off\n",
+    "[*matrix-vector\n",
     "multiplication*](http://en.wikipedia.org/wiki/Matrix_multiplication).\n",
     "Matrix multiplication is simply a convenient way of performing many\n",
-    "inner products together, and it’s exactly what we need to summarise the\n",
+    "inner products together, and it’s exactly what we need to summarize the\n",
     "operation $$\n",
     "f_i = \\mathbf{ x}_i^\\top\\mathbf{ w}.\n",
     "$$ This operation tells us that each element of the vector $\\mathbf{ f}$\n",
     "(our vector valued function) is given by an inner product between\n",
-    "$\\mathbf{ x}_i$ and $\\mathbf{ w}$. In other words it is a series of\n",
+    "$\\mathbf{ x}_i$ and $\\mathbf{ w}$. In other words, it is a series of\n",
     "inner products. Let’s look at the definition of matrix multiplication,\n",
     "it takes the form $$\n",
-    "\\mathbf{c} = \\mathbf{B}\\mathbf{a}\n",
+    "\\mathbf{c} = \\mathbf{B}\\mathbf{a},\n",
     "$$ where $\\mathbf{c}$ might be a $k$ dimensional vector (which we can\n",
-    "intepret as a $k\\times 1$ dimensional matrix), and $\\mathbf{B}$ is a\n",
+    "interpret as a $k\\times 1$ dimensional matrix), and $\\mathbf{B}$ is a\n",
     "$k\\times k$ dimensional matrix and $\\mathbf{a}$ is a $k$ dimensional\n",
     "vector ($k\\times 1$ dimensional matrix).\n",
     "\n",
@@ -1447,16 +1409,19 @@
     "b_{1, k}a_k\\\\\n",
     "b_{2, 1}a_1 + b_{2, 2}a_2 + \\dots + b_{2, k}a_k \\\\ \n",
     "\\vdots\\\\\n",
-    "b_{k, 1}a_1 + b_{k, 2}a_2 + \\dots + b_{k, k}a_k\\end{bmatrix}\n",
-    "$$ so we see that each element of the result, $\\mathbf{a}$ is simply the\n",
+    "b_{k, 1}a_1 + b_{k, 2}a_2 + \\dots + b_{k, k}a_k\\end{bmatrix}.\n",
+    "$$ We see that each element of the result, $\\mathbf{a}$ is simply the\n",
     "inner product between each *row* of $\\mathbf{B}$ and the vector\n",
     "$\\mathbf{c}$. Because we have defined each element of $\\mathbf{ f}$ to\n",
     "be given by the inner product between each *row* of the design matrix\n",
     "and the vector $\\mathbf{ w}$ we now can write the full operation in one\n",
-    "matrix multiplication, $$\n",
-    "\\mathbf{ f}= \\mathbf{X}\\mathbf{ w}.\n",
+    "matrix multiplication,\n",
+    "\n",
+    "$$\n",
+    "\\mathbf{ f}= \\boldsymbol{ \\Phi}\\mathbf{ w}.\n",
     "$$"
-   ]
+   ],
+   "id": "d9185cc2-6ffc-4125-a288-e8962f8871d1"
   },
   {
    "cell_type": "code",
@@ -1465,7 +1430,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "9a587441-9725-48bf-a737-0fd3f797894a"
   },
   {
    "cell_type": "code",
@@ -1473,8 +1439,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "f = X@w # The @ sign performs matrix multiplication"
-   ]
+    "f = Phi@w # The @ sign performs matrix multiplication"
+   ],
+   "id": "ad80ebba-60da-4bed-b0b7-564ec30fc82e"
   },
   {
    "cell_type": "markdown",
@@ -1485,7 +1452,8 @@
     "$$ we find we have defined the *model* with two equations. One equation\n",
     "tells us the form of our predictive function and how it depends on its\n",
     "parameters, the other tells us the form of our objective function."
-   ]
+   ],
+   "id": "413fc864-1f4f-4dda-9c79-271458e880c7"
   },
   {
    "cell_type": "code",
@@ -1496,49 +1464,25 @@
     "resid = (y-f)\n",
     "E = np.dot(resid.T, resid) # matrix multiplication on a single vector is equivalent to a dot product.\n",
     "print(\"Error function is:\", E)"
-   ]
+   ],
+   "id": "b9d7f1da-7bf4-4a89-baab-3aa861d4b201"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 0\n",
-    "\n",
-    "The prediction for our movie recommender system had the form $$\n",
-    "f_{i,j} = \\mathbf{u}_i^\\top \\mathbf{v}_j\n",
-    "$$ and the objective function was then $$\n",
-    "E = \\sum_{i,j} s_{i,j}(y_{i,j} - f_{i, j})^2\n",
-    "$$ Try writing this down in matrix and vector form. How many of the\n",
-    "terms can you do? For each variable and parameter carefully think about\n",
-    "whether it should be represented as a matrix or vector. Do as many of\n",
-    "the terms as you can. Use $\\LaTeX$ to give your answers and give the\n",
-    "*dimensions* of any matrices you create."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "::: {.cell .markdown}\n",
-    "\n",
-    "### Exercise 0 Answer\n",
+    "# Objective Optimization\n",
     "\n",
-    "Write your answer to Exercise 0 here"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Objective Optimisation\n",
-    "======================\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-objective-optimisation.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-regression-objective-optimisation.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
-    "Our *model* has now been defined with two equations, the prediction\n",
-    "function and the objective function. Next we will use multivariate\n",
+    "Our *model* has now been defined with two equations: the prediction\n",
+    "function and the objective function. Now we will use multivariate\n",
     "calculus to define an *algorithm* to fit the model. The separation\n",
     "between model and algorithm is important and is often overlooked. Our\n",
     "model contains a function that shows how it will be used for prediction,\n",
-    "and a function that describes the objective function we need to optimise\n",
+    "and a function that describes the objective function we need to optimize\n",
     "to obtain a good set of parameters.\n",
     "\n",
     "The model linear regression model we have described is still the same as\n",
@@ -1550,17 +1494,17 @@
     "algorithm, it just appears to be a single operation (or function).\n",
     "However, underneath the computer calls an algorithm to find the\n",
     "solution. Further, the algorithm we obtain is very widely used, and\n",
-    "because of this it turns out to be highly optimised.\n",
+    "because of this it turns out to be highly optimized.\n",
     "\n",
-    "Once again we are going to try and find the stationary points of our\n",
+    "Once again, we are going to try and find the stationary points of our\n",
     "objective by finding the *stationary points*. However, the stationary\n",
-    "points of a multivariate function, are a little bit more complext to\n",
-    "find. Once again we need to find the point at which the derivative is\n",
-    "zero, but now we need to use *multivariate calculus* to find it. This\n",
-    "involves learning a few additional rules of differentiation (that allow\n",
-    "you to do the derivatives of a function with respect to vector), but in\n",
-    "the end it makes things quite a bit easier. We define vectorial\n",
-    "derivatives as follows, $$\n",
+    "points of a multivariate function, are a little bit more complex to\n",
+    "find. As before we need to find the point at which the gradient is zero,\n",
+    "but now we need to use *multivariate calculus* to find it. This involves\n",
+    "learning a few additional rules of differentiation (that allow you to do\n",
+    "the derivatives of a function with respect to vector), but in the end it\n",
+    "makes things quite a bit easier. We define vectorial derivatives as\n",
+    "follows, $$\n",
     "\\frac{\\text{d}E(\\mathbf{ w})}{\\text{d}\\mathbf{ w}} =\n",
     "\\begin{bmatrix}\\frac{\\text{d}E(\\mathbf{ w})}{\\text{d}w_1}\\\\\\frac{\\text{d}E(\\mathbf{ w})}{\\text{d}w_2}\\end{bmatrix}.\n",
     "$$ where $\\frac{\\text{d}E(\\mathbf{ w})}{\\text{d}w_1}$ is the [partial\n",
@@ -1569,19 +1513,19 @@
     "\n",
     "Differentiation through multiplications and additions is relatively\n",
     "straightforward, and since linear algebra is just multiplication and\n",
-    "addition, then its rules of diffentiation are quite straightforward too,\n",
-    "but slightly more complex than regular derivatives."
-   ]
+    "addition, then its rules of differentiation are quite straightforward\n",
+    "too, but slightly more complex than regular derivatives."
+   ],
+   "id": "6656e63d-e6bd-4d58-b150-d52bad3f84bc"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Multivariate Derivatives\n",
-    "------------------------\n",
+    "## Multivariate Derivatives\n",
     "\n",
     "We will need two rules of multivariate or *matrix* differentiation. The\n",
-    "first is diffentiation of an inner product. By remembering that the\n",
+    "first is differentiation of an inner product. By remembering that the\n",
     "inner product is made up of multiplication and addition, we can hope\n",
     "that its derivative is quite straightforward, and so it proves to be. We\n",
     "can start by thinking about the definition of the inner product, $$\n",
@@ -1589,10 +1533,10 @@
     "z_i,\n",
     "$$ which if we were to take the derivative with respect to $z_k$ would\n",
     "simply return the gradient of the one term in the sum for which the\n",
-    "derivative was non zero, that of $a_k$, so we know that $$\n",
+    "derivative was non-zero, that of $a_k$, so we know that $$\n",
     "\\frac{\\text{d}}{\\text{d}z_k} \\mathbf{a}^\\top \\mathbf{z} = a_k\n",
-    "$$ and by our definition of multivariate derivatives we can simply stack\n",
-    "all the partial derivatives of this form in a vector to obtain the\n",
+    "$$ and by our definition for multivariate derivatives, we can simply\n",
+    "stack all the partial derivatives of this form in a vector to obtain the\n",
     "result that $$\n",
     "\\frac{\\text{d}}{\\text{d}\\mathbf{z}}\n",
     "\\mathbf{a}^\\top \\mathbf{z} = \\mathbf{a}.\n",
@@ -1602,7 +1546,8 @@
     "$k \\times k$ *matrix* of coefficients then the matrix quadratic form is\n",
     "written as $\\mathbf{z}^\\top \\mathbf{C}\\mathbf{z}$, which is itself a\n",
     "*scalar* quantity, but it is a function of a *vector*."
-   ]
+   ],
+   "id": "be145100-1931-4f54-a0b2-a0ed62028ecc"
   },
   {
    "cell_type": "markdown",
@@ -1610,15 +1555,15 @@
    "source": [
     "### Matching Dimensions in Matrix Multiplications\n",
     "\n",
-    "There’s a trick for telling that it’s a scalar result. When you are\n",
-    "doing maths with matrices, it’s always worth pausing to perform a quick\n",
-    "sanity check on the dimensions. Matrix multplication only works when the\n",
-    "dimensions match. To be precise, the ‘inner’ dimension of the matrix\n",
-    "must match. What is the inner dimension. If we multiply two matrices\n",
-    "$\\mathbf{A}$ and $\\mathbf{B}$, the first of which has $k$ rows and\n",
-    "$\\ell$ columns and the second of which has $p$ rows and $q$ columns,\n",
-    "then we can check whether the multiplication works by writing the\n",
-    "dimensionalities next to each other, $$\n",
+    "There’s a trick for telling a multiplication leads to a scalar result.\n",
+    "When you are doing mathematics with matrices, it’s always worth pausing\n",
+    "to perform a quick sanity check on the dimensions. Matrix multplication\n",
+    "only works when the dimensions match. To be precise, the ‘inner’\n",
+    "dimension of the matrix must match. What is the inner dimension? If we\n",
+    "multiply two matrices $\\mathbf{A}$ and $\\mathbf{B}$, the first of which\n",
+    "has $k$ rows and $\\ell$ columns and the second of which has $p$ rows and\n",
+    "$q$ columns, then we can check whether the multiplication works by\n",
+    "writing the dimensionalities next to each other, $$\n",
     "\\mathbf{A} \\mathbf{B} \\rightarrow (k \\times\n",
     "\\underbrace{\\ell)(p}_\\text{inner dimensions} \\times q) \\rightarrow (k\\times q).\n",
     "$$ The inner dimensions are the two inside dimensions, $\\ell$ and $p$.\n",
@@ -1628,20 +1573,20 @@
     "not [*commutative*](http://en.wikipedia.org/wiki/Commutative_property).\n",
     "And if you change the order of the multiplication, $$\n",
     "\\mathbf{B} \\mathbf{A} \\rightarrow (\\ell \\times \\underbrace{k)(q}_\\text{inner dimensions} \\times p) \\rightarrow (\\ell \\times p).\n",
-    "$$ firstly it may no longer even work, because now the condition is that\n",
-    "$k=q$, and secondly the result could be of a different dimensionality.\n",
-    "An exception is if the matrices are square matrices (e.g. same number of\n",
-    "rows as columns) and they are both *symmetric*. A symmetric matrix is\n",
-    "one for which $\\mathbf{A}=\\mathbf{A}^\\top$, or equivalently,\n",
-    "$a_{i,j} = a_{j,i}$ for all $i$ and $j$.\n",
-    "\n",
-    "You will need to get used to working with matrices and vectors applying\n",
-    "and developing new machine learning techniques. You should have come\n",
+    "$$ Firstly, it may no longer even work, because now the condition is\n",
+    "that $k=q$, and secondly the result could be of a different\n",
+    "dimensionality. An exception is if the matrices are square matrices\n",
+    "(e.g., same number of rows as columns) and they are both *symmetric*. A\n",
+    "symmetric matrix is one for which $\\mathbf{A}=\\mathbf{A}^\\top$, or\n",
+    "equivalently, $a_{i,j} = a_{j,i}$ for all $i$ and $j$.\n",
+    "\n",
+    "For applying and developing machine learning algorithms you should get\n",
+    "familiar with working with matrices and vectors. You should have come\n",
     "across them before, but you may not have used them as extensively as we\n",
-    "will now do in this course. You should get used to using this trick to\n",
-    "check your work and ensure you know what the dimension of an output\n",
-    "matrix should be. For our matrix quadratic form, it turns out that we\n",
-    "can see it as a special type of inner product. $$\n",
+    "are doing now. It’s worth getting used to using this trick to check your\n",
+    "work and ensure you know what the dimension of an output matrix should\n",
+    "be. For our matrix quadratic form, it turns out that we can see it as a\n",
+    "special type of inner product. $$\n",
     "\\mathbf{z}^\\top\\mathbf{C}\\mathbf{z} \\rightarrow (1\\times\n",
     "\\underbrace{k) (k}_\\text{inner dimensions}\\times k) (k\\times 1) \\rightarrow\n",
     "\\mathbf{b}^\\top\\mathbf{z}\n",
@@ -1663,21 +1608,19 @@
     "\\frac{\\text{d}}{\\text{d}\\mathbf{z}} \\mathbf{z}^\\top\\mathbf{C}\\mathbf{z}=\n",
     "2\\mathbf{C}\\mathbf{z}.\n",
     "$$"
-   ]
+   ],
+   "id": "441440cf-f84e-46d8-8e17-59c50024915e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "::: {.cell .markdown}\n",
-    "\n",
-    "Differentiate the Objective\n",
-    "---------------------------\n",
+    "## Differentiate the Objective\n",
     "\n",
     "First, we need to compute the full objective by substituting our\n",
     "prediction function into the objective function to obtain the objective\n",
     "in terms of $\\mathbf{ w}$. Doing this we obtain $$\n",
-    "E(\\mathbf{ w})= (\\mathbf{ y}- \\mathbf{X}\\mathbf{ w})^\\top (\\mathbf{ y}- \\mathbf{X}\\mathbf{ w}).\n",
+    "E(\\mathbf{ w})= (\\mathbf{ y}- \\boldsymbol{ \\Phi}\\mathbf{ w})^\\top (\\mathbf{ y}- \\boldsymbol{ \\Phi}\\mathbf{ w}).\n",
     "$$ We now need to differentiate this *quadratic form* to find the\n",
     "minimum. We differentiate with respect to the *vector* $\\mathbf{ w}$.\n",
     "But before we do that, we’ll expand the brackets in the quadratic form\n",
@@ -1687,25 +1630,28 @@
     "(\\mathbf{c} - \\mathbf{d}) = \\mathbf{a}^\\top \\mathbf{c} - \\mathbf{a}^\\top\n",
     "\\mathbf{d} - \\mathbf{b}^\\top \\mathbf{c} + \\mathbf{b}^\\top \\mathbf{d}\n",
     "$$ which substituting for $\\mathbf{a} = \\mathbf{c} = \\mathbf{ y}$ and\n",
-    "$\\mathbf{b}=\\mathbf{d} = \\mathbf{X}\\mathbf{ w}$ gives $$\n",
+    "$\\mathbf{b}=\\mathbf{d} = \\boldsymbol{ \\Phi}\\mathbf{ w}$ gives $$\n",
     "E(\\mathbf{ w})=\n",
-    "\\mathbf{ y}^\\top\\mathbf{ y}- 2\\mathbf{ y}^\\top\\mathbf{X}\\mathbf{ w}+\n",
-    "\\mathbf{ w}^\\top\\mathbf{X}^\\top\\mathbf{X}\\mathbf{ w}\n",
+    "\\mathbf{ y}^\\top\\mathbf{ y}- 2\\mathbf{ y}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w}+\n",
+    "\\mathbf{ w}^\\top\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w}\n",
     "$$ where we used the fact that\n",
-    "$\\mathbf{ y}^\\top\\mathbf{X}\\mathbf{ w}=\\mathbf{ w}^\\top\\mathbf{X}^\\top\\mathbf{ y}$.\n",
+    "$\\mathbf{ y}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w}=\\mathbf{ w}^\\top\\boldsymbol{ \\Phi}^\\top\\mathbf{ y}$.\n",
+    "\n",
     "Now we can use our rules of differentiation to compute the derivative of\n",
     "this form, which is, $$\n",
-    "\\frac{\\text{d}}{\\text{d}\\mathbf{ w}}E(\\mathbf{ w})=- 2\\mathbf{X}^\\top \\mathbf{ y}+\n",
-    "2\\mathbf{X}^\\top\\mathbf{X}\\mathbf{ w},\n",
-    "$$ where we have exploited the fact that $\\mathbf{X}^\\top\\mathbf{X}$ is\n",
-    "symmetric to obtain this result."
-   ]
+    "\\frac{\\text{d}}{\\text{d}\\mathbf{ w}}E(\\mathbf{ w})=- 2\\boldsymbol{ \\Phi}^\\top \\mathbf{ y}+\n",
+    "2\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w},\n",
+    "$$ where we have exploited the fact that\n",
+    "$\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}$ is symmetric to obtain this\n",
+    "result."
+   ],
+   "id": "81f80251-83a0-47da-ae07-fa50c74f55b4"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 0\n",
+    "### Exercise 1\n",
     "\n",
     "Use the equivalence between our vector and our matrix formulations of\n",
     "linear regression, alongside our definition of vector derivates, to\n",
@@ -1713,144 +1659,90 @@
     "$\\frac{\\text{d}E(c, m)}{\\text{d}c}$ and\n",
     "$\\frac{\\text{d}E(c, m)}{\\text{d}m}$ to those for\n",
     "$\\frac{\\text{d}E(\\mathbf{ w})}{\\text{d}\\mathbf{ w}}$."
-   ]
+   ],
+   "id": "ef9f728d-ff81-4006-9b13-85756c5e4bcd"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 0 Answer\n",
+    "### Exercise 1 Answer\n",
     "\n",
-    "Write your answer to Exercise 0 here"
-   ]
+    "Write your answer to Exercise 1 here"
+   ],
+   "id": "94b33929-d077-4b16-87ba-2793abce0179"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Update Equation for Global Optimum\n",
-    "==================================\n",
+    "# Update Equation for Global Optimum\n",
     "\n",
-    "Once again, we need to find the minimum of our objective function. Using\n",
-    "our likelihood for multiple input regression we can now minimize for our\n",
-    "parameter vector $\\mathbf{ w}$. Firstly, just as in the single input\n",
-    "case, we seek stationary points by find parameter vectors that solve for\n",
-    "when the gradients are zero, $$\n",
-    "\\mathbf{0}=- 2\\mathbf{X}^\\top\n",
-    "\\mathbf{ y}+ 2\\mathbf{X}^\\top\\mathbf{X}\\mathbf{ w},\n",
-    "$$ where $\\mathbf{0}$ is a *vector* of zeros. Rearranging this equation\n",
+    "We need to find the minimum of our objective function. Using our\n",
+    "objective function, we can minimize for our parameter vector\n",
+    "$\\mathbf{ w}$. Firstly, we seek stationary points by find parameter\n",
+    "vectors that solve for when the gradients are zero, $$\n",
+    "\\mathbf{0}=- 2\\boldsymbol{ \\Phi}^\\top\n",
+    "\\mathbf{ y}+ 2\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w},\n",
+    "$$ where $\\mathbf{0}$ is a *vector* of zeros. Rearranging this equation,\n",
     "we find the solution to be $$\n",
-    "\\mathbf{ w}= \\left[\\mathbf{X}^\\top \\mathbf{X}\\right]^{-1} \\mathbf{X}^\\top\n",
+    "\\boldsymbol{ \\Phi}^\\top \\boldsymbol{ \\Phi}\\mathbf{ w}= \\boldsymbol{ \\Phi}^\\top\n",
     "\\mathbf{ y}\n",
-    "$$ where $\\mathbf{A}^{-1}$ denotes [*matrix\n",
-    "inverse*](http://en.wikipedia.org/wiki/Invertible_matrix)."
-   ]
+    "$$ which is a matrix equation of the familiar form\n",
+    "$\\mathbf{A}\\mathbf{x} = \\mathbf{b}$."
+   ],
+   "id": "88260728-0e30-4cb5-b34c-9c4fb77d79df"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Solving the Multivariate System\n",
-    "-------------------------------\n",
+    "## Solving the Multivariate System\n",
     "\n",
-    "The solution for $\\mathbf{ w}$ is given in terms of a matrix inverse,\n",
-    "but computation of a matrix inverse requires, in itself, an algorithm to\n",
-    "resolve it. You’ll know this if you had to invert, by hand, a\n",
-    "$3\\times 3$ matrix in high school. From a numerical stability\n",
-    "perspective, it is also best not to compute the matrix inverse directly,\n",
-    "but rather to ask the computer to *solve* the system of linear equations\n",
-    "given by\n",
-    "$$\\mathbf{X}^\\top\\mathbf{X}\\mathbf{ w}= \\mathbf{X}^\\top\\mathbf{ y}$$ for\n",
-    "$\\mathbf{ w}$. This can be done in `numpy` using the command"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.linalg.solve?"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "so we can obtain the solution using"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "w = np.linalg.solve(X.T@X, X.T@y)\n",
-    "print(w)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can map it back to the liner regression and plot the fit as follows"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "m = w[1]; c=w[0]\n",
-    "f_test = m*x_test + c\n",
-    "print(m)\n",
-    "print(c)\n",
-    "plt.plot(x_test, f_test, 'b-')\n",
-    "plt.plot(x, y, 'rx')"
-   ]
+    "The solution for $\\mathbf{ w}$ can be written mathematically in terms of\n",
+    "a matrix inverse of $\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}$, but\n",
+    "computation of a matrix inverse requires an algorithm to resolve it.\n",
+    "You’ll know this if you had to invert, by hand, a $3\\times 3$ matrix in\n",
+    "high school. From a numerical stability perspective, it is also best not\n",
+    "to compute the matrix inverse directly, but rather to ask the computer\n",
+    "to *solve* the system of linear equations given by $$\n",
+    "\\boldsymbol{ \\Phi}^\\top\\boldsymbol{ \\Phi}\\mathbf{ w}= \\boldsymbol{ \\Phi}^\\top\\mathbf{ y}\n",
+    "$$ for $\\mathbf{ w}$."
+   ],
+   "id": "3b1b0e18-8684-437b-b676-0a4166b55050"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Multivariate Linear Regression\n",
-    "------------------------------\n",
+    "## Multivariate Linear Regression\n",
     "\n",
     "A major advantage of the new system is that we can build a linear\n",
     "regression on a multivariate system. The matrix calculus didn’t specify\n",
     "what the length of the vector $\\mathbf{ x}$ should be, or equivalently\n",
     "the size of the design matrix."
-   ]
+   ],
+   "id": "58122b14-5ebb-4499-a20c-3dec5f086f93"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Movie Body Count Data\n",
-    "---------------------\n",
+    "## Movie Body Count Data\n",
     "\n",
-    "Let’s consider the movie body count data."
-   ]
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_datasets/includes/movie-body-count-data.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_datasets/includes/movie-body-count-data.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "This is a data set created by Simon Garnier and Rany Olson for exploring\n",
+    "the differences between R and Python for data science. The data contains\n",
+    "information about different movies augmented by estimates about how many\n",
+    "on-screen deaths are contained in the movie. The data is craped from\n",
+    "<http://www.moviebodycounts.com>. The data contains the following\n",
+    "featuers for each movie: `Year`, `Body_Count`, `MPAA_Rating`, `Genre`,\n",
+    "`Director`, `Actors`, `Length_Minutes`, `IMDB_Rating`."
+   ],
+   "id": "cec02248-83cd-4822-97d8-2946dea433e4"
   },
   {
    "cell_type": "code",
@@ -1859,7 +1751,8 @@
    "outputs": [],
    "source": [
     "import pods"
-   ]
+   ],
+   "id": "3051e759-0159-4746-80e4-15dc30486708"
   },
   {
    "cell_type": "code",
@@ -1869,14 +1762,18 @@
    "source": [
     "data = pods.datasets.movie_body_count()\n",
     "movies = data['Y']"
-   ]
+   ],
+   "id": "d3163d4d-130d-4acd-982e-efb3ab428803"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let’s remind ourselves of the features we’ve been provided with."
-   ]
+    "The data is provided to us in the form of a pandas data frame, we can\n",
+    "see the features we’re provided with by inspecting the columns of the\n",
+    "data frame."
+   ],
+   "id": "94e545bb-1507-4d12-9c0e-f2b468a6b02c"
   },
   {
    "cell_type": "code",
@@ -1885,26 +1782,26 @@
    "outputs": [],
    "source": [
     "print(', '.join(movies.columns))"
-   ]
+   ],
+   "id": "4ff32bdd-ee4b-4112-8c8d-4432c9065fe8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Multivariate Regression on Movie Body Count Data\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/movie-body-count-linear-regression.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/movie-body-count-linear-regression.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
     "Now we will build a design matrix based on the numeric features: year,\n",
-    "Body\\_Count, Length\\_Minutes in an effort to predict the rating. We\n",
-    "build the design matrix as follows:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Relation to Single Input System\n",
-    "-------------------------------\n",
+    "Body_Count, Length_Minutes in an effort to predict the rating. We build\n",
+    "the design matrix as follows:\n",
     "\n",
     "Bias as an additional feature."
-   ]
+   ],
+   "id": "27631a77-e51a-4a06-b3d5-e6f1f774cf84"
   },
   {
    "cell_type": "code",
@@ -1913,10 +1810,11 @@
    "outputs": [],
    "source": [
     "select_features = ['Year', 'Body_Count', 'Length_Minutes']\n",
-    "X = movies[select_features]\n",
-    "X['Eins'] = 1 # add a column for the offset\n",
+    "Phi = movies[select_features]\n",
+    "Phi['Eins'] = 1 # add a column for the offset\n",
     "y = movies[['IMDB_Rating']]"
-   ]
+   ],
+   "id": "15cde525-cba1-46d8-ba24-68042d2a55ae"
   },
   {
    "cell_type": "markdown",
@@ -1925,7 +1823,8 @@
     "Now let’s perform a linear regression. But this time, we will create a\n",
     "pandas data frame for the result so we can store it in a form that we\n",
     "can visualise easily."
-   ]
+   ],
+   "id": "ff420038-9345-4bcd-a306-12957f0afe67"
   },
   {
    "cell_type": "code",
@@ -1934,7 +1833,8 @@
    "outputs": [],
    "source": [
     "import pandas as pd"
-   ]
+   ],
+   "id": "57cc55da-c5bd-41db-bec3-f76073e1d78b"
   },
   {
    "cell_type": "code",
@@ -1942,17 +1842,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "w = pd.DataFrame(data=np.linalg.solve(X.T@X, X.T@y),  # solve linear regression here\n",
-    "                 index = X.columns,  # columns of X become rows of w\n",
-    "                 columns=['regression_coefficient']) # the column of X is the value of regression coefficient"
-   ]
+    "w = pd.DataFrame(data=np.linalg.solve(Phi.T@Phi, Phi.T@y),  # solve linear regression here\n",
+    "                 index = Phi.columns,  # columns of Phi become rows of w\n",
+    "                 columns=['regression_coefficient']) # the column of Phi is the value of regression coefficient"
+   ],
+   "id": "490aa0ac-e0d0-4da7-9107-7e05cd72434b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can check the residuals to see how good our estimates are"
-   ]
+    "We can check the residuals to see how good our estimates are. First we\n",
+    "create a pandas data frame containing the predictions and use it to\n",
+    "compute the residuals."
+   ],
+   "id": "d532154e-f264-4516-bf5a-9dd874bfa171"
   },
   {
    "cell_type": "code",
@@ -1960,17 +1864,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(y - X@w).hist()"
-   ]
+    "ypred = pd.DataFrame(data=(Phi@w).values, columns=['IMDB_Rating'])\n",
+    "resid = y-ypred"
+   ],
+   "id": "2b4751c9-e57d-4686-897d-0e1acd9ec745"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Which shows our model *hasn’t* yet done a great job of representation,\n",
-    "because the spread of values is large. We can check what the rating is\n",
-    "dominated by in terms of regression coefficients."
-   ]
+    "import matplotlib.pyplot as plt\n",
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "9c94779f-7aa3-45d7-9ea5-85209f47ccad"
   },
   {
    "cell_type": "code",
@@ -1978,29 +1887,63 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "w"
-   ]
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
+    "resid.hist(ax=ax)\n",
+    "mlai.write_figure(filename='movie-body-count-rating-residuals.svg', \n",
+    "                  directory='./ml')"
+   ],
+   "id": "33dcd9a6-7f86-4adf-bba0-5ac7c322390d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Although we have to be a little careful about interpretation because our\n",
-    "input values live on different scales, however it looks like we are\n",
-    "dominated by the bias, with a small negative effect for later films (but\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/movie-body-count-rating-residuals.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Residual values for the ratings from the prediction of the\n",
+    "movie rating given the data from the film.</i>\n",
+    "\n",
+    "Which shows our model *hasn’t* yet done a great job of representation,\n",
+    "because the spread of values is large. We can check what the rating is\n",
+    "dominated by in terms of regression coefficients."
+   ],
+   "id": "c7d7847a-6269-40a2-8c3d-cbe74bcb1b39"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "w"
+   ],
+   "id": "bdd15839-444f-4e94-9b4c-4cd5c7e15f09"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Although we have to be a little careful about interpretation because our\n",
+    "input values live on different scales, however it looks like we are\n",
+    "dominated by the bias, with a small negative effect for later films (but\n",
     "bear in mind the years are large, so this effect is probably larger than\n",
     "it looks) and a positive effect for length. So it looks like long\n",
     "earlier films generally do better, but the residuals are so high that we\n",
     "probably haven’t modelled the system very well."
-   ]
+   ],
+   "id": "ef8d73a8-e48e-4466-be10-7473cf7f9be2"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Underdetermined System\n",
-    "======================"
-   ]
+    "# Underdetermined System\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/underdetermined-system.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/underdetermined-system.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "9ed20728-2192-48e1-8407-73a4d915bc11"
   },
   {
    "cell_type": "code",
@@ -2008,8 +1951,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "f745ba5e-5b58-4e54-9c6f-74646dc33616"
   },
   {
    "cell_type": "code",
@@ -2018,7 +1962,8 @@
    "outputs": [],
    "source": [
     "plot.under_determined_system(diagrams='./ml')"
-   ]
+   ],
+   "id": "08518f5f-827c-4138-b324-eca9f7eca669"
   },
   {
    "cell_type": "markdown",
@@ -2026,7 +1971,7 @@
    "source": [
     "What about the situation where you have more parameters than data in\n",
     "your simultaneous equation? This is known as an *underdetermined*\n",
-    "system. In fact this set up is in some sense *easier* to solve, because\n",
+    "system. In fact, this set up is in some sense *easier* to solve, because\n",
     "we don’t need to think about introducing a slack variable (although it\n",
     "might make a lot of sense from a *modelling* perspective to do so).\n",
     "\n",
@@ -2034,31 +1979,32 @@
     "introduce slack variables, $\\epsilon_i$, which needed to be estimated\n",
     "for each point. The slack variable represented the difference between\n",
     "our actual prediction and the true observation. This is known as the\n",
-    "*residual*. By introducing the slack variable we now have an additional\n",
+    "*residual*. By introducing the slack variable, we now have an additional\n",
     "$n$ variables to estimate, one for each data point, $\\{\\epsilon_i\\}$.\n",
-    "This actually turns the overdetermined system into an underdetermined\n",
-    "system. Introduction of $n$ variables, plus the original $m$ and $c$\n",
-    "gives us $n+2$ parameters to be estimated from $n$ observations, which\n",
-    "actually makes the system *underdetermined*. However, we then made a\n",
-    "probabilistic assumption about the slack variables, we assumed that the\n",
-    "slack variables were distributed according to a probability density. And\n",
-    "for the moment we have been assuming that density was the Gaussian,\n",
+    "This turns the overdetermined system into an underdetermined system.\n",
+    "Introduction of $n$ variables, plus the original $m$ and $c$ gives us\n",
+    "$n+2$ parameters to be estimated from $n$ observations, which makes the\n",
+    "system *underdetermined*. However, we then made a probabilistic\n",
+    "assumption about the slack variables, we assumed that the slack\n",
+    "variables were distributed according to a probability density. And for\n",
+    "the moment we have been assuming that density was the Gaussian,\n",
     "$$\\epsilon_i \\sim \\mathcal{N}\\left(0,\\sigma^2\\right),$$ with zero mean\n",
     "and variance $\\sigma^2$.\n",
     "\n",
     "The follow up question is whether we can do the same thing with the\n",
-    "parameters. If we have two parameters and only one unknown can we place\n",
-    "a probability distribution over the parameters, as we did with the slack\n",
+    "parameters. If we have two parameters and only one unknown, can we place\n",
+    "a probability distribution over the parameters as we did with the slack\n",
     "variables? The answer is yes."
-   ]
+   ],
+   "id": "903c59ef-3d29-4158-9da8-edc050347c7b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Underdetermined System\n",
-    "----------------------"
-   ]
+    "## Underdetermined System"
+   ],
+   "id": "055ed30c-4047-48b2-b422-a088570e308d"
   },
   {
    "cell_type": "code",
@@ -2066,9 +2012,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "6989236a-7e2d-4ce3-9b1b-54990c2a1dd0"
   },
   {
    "cell_type": "code",
@@ -2076,27 +2023,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('under_determined_system{samp:0>3}.svg', \n",
-    "                            directory='./ml', samp=IntSlider(0, 0, 10, 1))"
-   ]
+    "nu.display_plots('under_determined_system{samp:0>3}.svg', \n",
+    "                 directory='./ml', samp=IntSlider(0, 0, 9, 1))"
+   ],
+   "id": "6fe8e747-555f-4fa1-9153-ce8e9faf1c16"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/under_determined_system009.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system009.svg\" class=\"\" width=\"40%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>An underdetermined system can be fit by considering\n",
     "uncertainty. Multiple solutions are consistent with one specified\n",
     "point.</i>"
-   ]
+   ],
+   "id": "57da1e3b-71be-4b8e-919d-6d36821608f9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Two Dimensional Gaussian\n",
-    "------------------------\n",
+    "## Two Dimensional Gaussian\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Consider the distribution of height (in meters) of an adult male human\n",
     "population. We will approximate the marginal density of heights as a\n",
@@ -2109,7 +2061,8 @@
     "deviation of $6 kg$ (implying a variance of 36), $$\n",
     "  p(w) \\sim \\mathcal{N}\\left(75,36\\right).\n",
     "  $$"
-   ]
+   ],
+   "id": "30739355-71b0-48e0-b24c-d60372b30fb8"
   },
   {
    "cell_type": "code",
@@ -2117,8 +2070,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "483179fe-0b8e-4b79-aae1-2268e98172b2"
   },
   {
    "cell_type": "code",
@@ -2127,23 +2081,24 @@
    "outputs": [],
    "source": [
     "plot.height_weight(diagrams='./ml')"
-   ]
+   ],
+   "id": "ab732e7c-75f7-47f9-9348-715ce42432ab"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/height_weight_gaussian.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/height_weight_gaussian.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Gaussian distributions for height and weight.</i>"
-   ]
+   ],
+   "id": "360df30c-cf42-4cf4-a549-fca59a1c5d89"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Independence Assumption\n",
-    "-----------------------\n",
+    "## Independence Assumption\n",
     "\n",
     "First of all, we make an independence assumption, we assume that height\n",
     "and weight are independent. The definition of probabilistic independence\n",
@@ -2152,7 +2107,8 @@
     "  p(w, h) = p(w)p(h).\n",
     "  $$ Given this assumption we can sample from the joint distribution by\n",
     "independently sampling weights and heights."
-   ]
+   ],
+   "id": "6cfefbde-6143-40fb-8bfa-24a202d59d67"
   },
   {
    "cell_type": "code",
@@ -2160,8 +2116,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "0ef697bf-b1cf-43b7-ac51-47b451bf33ff"
   },
   {
    "cell_type": "code",
@@ -2171,7 +2128,8 @@
    "source": [
     "plot.independent_height_weight(num_samps=8, \n",
     "                               diagrams='./ml')"
-   ]
+   ],
+   "id": "65e3a055-8cad-46db-89e3-51f546cd0969"
   },
   {
    "cell_type": "code",
@@ -2179,9 +2137,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "d10f64a6-742b-414f-bd98-5ffa4fc3ea5e"
   },
   {
    "cell_type": "code",
@@ -2189,16 +2148,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('independent_height_weight{fig:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "4b12582a-235f-426b-bb70-7d3d55bd15fa"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('independent_height_weight{fig:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            fig=IntSlider(0, 0, 7, 1))"
-   ]
+   ],
+   "id": "b29ec920-013c-4533-aa64-5443f9c83891"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/independent_height_weight007.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight007.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Samples from independent Gaussian variables that might\n",
     "represent heights and weights.</i>\n",
@@ -2212,15 +2182,20 @@
     "\\text{BMI} = \\frac{w}{h^2}\n",
     "$$To deal with this dependence we now introduce the notion of\n",
     "*correlation* to the multivariate Gaussian density."
-   ]
+   ],
+   "id": "000e0f54-9fea-4d98-8805-2d88fee8e3e4"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Sampling Two Dimensional Variables\n",
-    "----------------------------------"
-   ]
+    "## Sampling Two Dimensional Variables\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-correlated-sample.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-correlated-sample.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "dbce5533-e31a-4968-9c76-e652681ef434"
   },
   {
    "cell_type": "code",
@@ -2228,8 +2203,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "30d9e57b-e601-41ed-9c05-14bc84557b2c"
   },
   {
    "cell_type": "code",
@@ -2239,7 +2215,8 @@
    "source": [
     "plot.correlated_height_weight(num_samps=8, \n",
     "                              diagrams='./ml')"
-   ]
+   ],
+   "id": "b49c3250-3745-4283-bbf3-8b94604596c2"
   },
   {
    "cell_type": "code",
@@ -2247,9 +2224,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "d9f49880-33e3-4641-b77f-8200e6803788"
   },
   {
    "cell_type": "code",
@@ -2257,27 +2235,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('correlated_height_weight{fig:0>3}.svg', \n",
+    "nu.display_plots('correlated_height_weight{fig:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            fig=IntSlider(0, 0, 7, 1))"
-   ]
+   ],
+   "id": "f31aada5-a0b0-4fdd-9bb7-c847f91ae2c5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/correlated_height_weight007.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight007.svg\" class=\"\" width=\"70%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Samples from *correlated* Gaussian variables that might\n",
     "represent heights and weights.</i>"
-   ]
+   ],
+   "id": "3d7c463b-6498-4b4d-8b53-5c03d2227689"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Independent Gaussians\n",
-    "---------------------\n",
+    "## Independent Gaussians\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-maths.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/two-d-gaussian-maths.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "$$\n",
     "p(w, h) = p(w)p(h)\n",
@@ -2294,14 +2277,14 @@
     "$$\n",
     "p(\\mathbf{ y}) = \\frac{1}{\\det{2\\pi \\mathbf{D}}^{\\frac{1}{2}}} \\exp\\left(-\\frac{1}{2}(\\mathbf{ y}- \\boldsymbol{ \\mu})^\\top\\mathbf{D}^{-1}(\\mathbf{ y}- \\boldsymbol{ \\mu})\\right)\n",
     "$$"
-   ]
+   ],
+   "id": "e592a8de-d2d3-41da-af57-3747010debae"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Correlated Gaussian\n",
-    "-------------------\n",
+    "## Correlated Gaussian\n",
     "\n",
     "Form correlated from original by rotating the data space using matrix\n",
     "$\\mathbf{R}$.\n",
@@ -2325,27 +2308,31 @@
     "$$ this gives a covariance matrix: $$\n",
     "\\mathbf{C}= \\mathbf{R}\\mathbf{D} \\mathbf{R}^\\top\n",
     "$$"
-   ]
+   ],
+   "id": "a653e114-2fab-4e33-904a-d991d284d2d9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Basis Functions\n",
-    "---------------\n",
+    "## Basis Functions\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/basis-functions-nn.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/basis-functions-nn.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Here’s the idea, instead of working directly on the original input\n",
     "space, $\\mathbf{ x}$, we build models in a new space,\n",
     "$\\boldsymbol{ \\phi}(\\mathbf{ x})$ where $\\boldsymbol{ \\phi}(\\cdot)$ is a\n",
     "*vector-valued* function that is defined on the space $\\mathbf{ x}$."
-   ]
+   ],
+   "id": "bdebfc1a-2cdf-4c89-9748-5c8c28c8ee2b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Quadratic Basis\n",
-    "---------------\n",
+    "## Quadratic Basis\n",
     "\n",
     "Remember, that a *vector-valued function* is just a vector that contains\n",
     "functions instead of values. Here’s an example for a one dimensional\n",
@@ -2378,7 +2365,8 @@
     "\n",
     "Let’s try constructing such a matrix for a set of inputs. First of all,\n",
     "we create a function that returns the matrix valued function."
-   ]
+   ],
+   "id": "249616e5-70f3-4632-a02c-784a84497bed"
   },
   {
    "cell_type": "code",
@@ -2387,7 +2375,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "e1000dad-ce2e-41ec-bf4e-17a16494b5b7"
   },
   {
    "cell_type": "code",
@@ -2399,19 +2388,20 @@
     "    \"\"\"Take in a vector of input values and return the design matrix associated \n",
     "    with the basis functions.\"\"\"\n",
     "    return np.hstack([np.ones((x.shape[0], 1)), x, x**2])"
-   ]
+   ],
+   "id": "1a7242ac-5c69-4887-9554-1fd6de2b6da2"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Functions Derived from Quadratic Basis\n",
-    "--------------------------------------\n",
+    "## Functions Derived from Quadratic Basis\n",
     "\n",
     "$$\n",
     "f(x) = {\\color{red}{w_0}}   + {\\color{magenta}{w_1 x}} + {\\color{blue}{w_2 x^2}}\n",
     "$$"
-   ]
+   ],
+   "id": "7b023f1e-4067-47bd-ba8f-814767907249"
   },
   {
    "cell_type": "code",
@@ -2420,8 +2410,9 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "8496c280-0bd8-4ee6-a8b6-590d4eab95e3"
   },
   {
    "cell_type": "code",
@@ -2440,17 +2431,19 @@
     "plot.basis(quadratic, x_min=-1.3, x_max=1.3, \n",
     "           fig=f, ax=ax, loc=loc, text=text,\n",
     "           diagrams='./ml')\n"
-   ]
+   ],
+   "id": "17972117-2b60-4d1a-89c3-aa394f7307cc"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/quadratic_basis002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_basis002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The set of functions which are combined to form a *quadratic*\n",
     "basis.</i>"
-   ]
+   ],
+   "id": "592f7ad2-f8c4-4384-92a0-15489de85980"
   },
   {
    "cell_type": "code",
@@ -2458,9 +2451,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "90262aad-dfab-4bef-8398-5c6a46701a29"
   },
   {
    "cell_type": "code",
@@ -2468,10 +2462,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('quadratic_basis{num_basis:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "8aa85ee8-8421-499e-869b-a8d812aad60f"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('quadratic_basis{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(0,0,2,1))"
-   ]
+   ],
+   "id": "af15c5b4-c1c0-4b79-9897-b98c60dbc3bd"
   },
   {
    "cell_type": "markdown",
@@ -2480,7 +2485,8 @@
     "This function takes in an $n\\times 1$ dimensional vector and returns an\n",
     "$n\\times 3$ dimensional *design matrix* containing the basis functions.\n",
     "We can plot those basis functions against there input as follows."
-   ]
+   ],
+   "id": "99092d91-311c-401b-9be1-995f8afe222c"
   },
   {
    "cell_type": "code",
@@ -2502,7 +2508,8 @@
     "ax.plot(x[:,0], Phi[:, 2], 'b-', label = '$\\phi=x^2$', linewidth=3)\n",
     "ax.legend(loc='lower right')\n",
     "_ = ax.set_title('Quadratic Basis Functions')"
-   ]
+   ],
+   "id": "e9571da9-1ade-408b-8573-aa6eaf953af5"
   },
   {
    "cell_type": "markdown",
@@ -2514,20 +2521,21 @@
     "process’, and in this context they form the underlying support for our\n",
     "prediction function. Our prediction function can only be composed of a\n",
     "weighted linear sum of our basis functions."
-   ]
+   ],
+   "id": "121679d0-af95-4b0f-8f9d-b0f3c29443bb"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Quadratic Functions\n",
-    "-------------------\n",
+    "## Quadratic Functions\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/quadratic_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Functions constructed by weighted sum of the components of a\n",
     "quadratic basis.</i>"
-   ]
+   ],
+   "id": "61285a7b-043c-4390-94f6-413d241806e3"
   },
   {
    "cell_type": "code",
@@ -2535,9 +2543,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "2a6a8843-9be1-43d4-adc4-dcec93a4dbe8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "34fd435d-7c08-43f6-8fa6-2744bca64985"
   },
   {
    "cell_type": "code",
@@ -2545,17 +2564,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('quadratic_function{num_function:0>3}.svg', \n",
+    "nu.display_plots('quadratic_function{num_function:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_function=IntSlider(0,0,2,1))"
-   ]
+   ],
+   "id": "549a2c01-d7f8-4fb6-ad02-5eab60a28b13"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Rectified Linear Units\n",
-    "----------------------\n",
+    "## Rectified Linear Units\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/relu-basis.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/relu-basis.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The rectified linear unit is a basis function that emerged out of the\n",
     "deep learning community. Rectified linear units are popular in the\n",
@@ -2564,7 +2587,8 @@
     "certain threshold. $$\n",
     "\\phi_j(x) = xH(v_j x+ v_0)\n",
     "$$"
-   ]
+   ],
+   "id": "50256dd5-faf5-45d0-a0f0-af64149d32ae"
   },
   {
    "cell_type": "code",
@@ -2573,7 +2597,18 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "feab5bd5-89ec-40dc-b0fc-7587a86fcb45"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "bc7c6870-efe8-4147-9c45-232ab8088e47"
   },
   {
    "cell_type": "code",
@@ -2581,8 +2616,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s relu mlai.py"
-   ]
+    "%load -n mlai.relu"
+   ],
+   "id": "6c36c241-e2c7-4724-8513-d0451c3e1d5e"
   },
   {
    "cell_type": "code",
@@ -2591,9 +2627,10 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import teaching_plots as plot\n",
+    "import mlai.plot as plot\n",
     "import mlai"
-   ]
+   ],
+   "id": "5e708974-3822-4007-9c9b-82c275901521"
   },
   {
    "cell_type": "code",
@@ -2616,17 +2653,19 @@
     "           fig=f, ax=ax, loc=loc, text=text,\n",
     "           diagrams='./ml',\n",
     "           num_basis=5)"
-   ]
+   ],
+   "id": "ed2b3ce2-1f61-450f-b6f4-75a158c19442"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/relu_basis004.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis004.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The set of functions which are combined to form a rectified\n",
     "linear unit basis.</i>"
-   ]
+   ],
+   "id": "372a998b-9f3c-437e-a818-1350e6120fb9"
   },
   {
    "cell_type": "code",
@@ -2634,9 +2673,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "974f0ed7-b5bd-4069-ac3d-0f69085fb086"
   },
   {
    "cell_type": "code",
@@ -2644,10 +2684,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('fourier_basis{num_basis:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "95a00cd1-b978-44a9-bf8d-b9aaf7d00ec8"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('relu_basis{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(0,0,4,1))"
-   ]
+   ],
+   "id": "ce21ed6e-ccc5-4b4b-abba-fde4207e6459"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "de8fb220-2e39-4428-96aa-8c9977c81648"
   },
   {
    "cell_type": "code",
@@ -2655,25 +2716,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_prediction(basis=mlai.relu, num_basis=5)"
-   ]
+    "nu.display_prediction(basis=mlai.relu, num_basis=5)"
+   ],
+   "id": "ea44023a-03a2-4434-a4aa-c3926e64c9cc"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Functions Derived from Relu Basis\n",
-    "---------------------------------\n",
+    "## Functions Derived from Relu Basis\n",
     "\n",
     "$$\n",
     "f(x) = \\color{red}{w_0}   + \\color{magenta}{w_1 xH(x+1.0) } + \\color{blue}{w_2 xH(x+0.33) } + \\color{green}{w_3 xH(x-0.33)} +  \\color{cyan}{w_4 xH(x-1.0)}\n",
     "$$\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/relu_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A rectified linear unit basis is made up of different\n",
     "rectified linear unit functions centered at different points.</i>"
-   ]
+   ],
+   "id": "c16a171d-5388-40ee-bc6f-52733f786b61"
   },
   {
    "cell_type": "code",
@@ -2681,9 +2743,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "772fd1e0-7970-410d-a308-f3b1028199d6"
   },
   {
    "cell_type": "code",
@@ -2691,17 +2754,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('relu_function{func_num:0>3}.svg', \n",
+    "import notutils as nu"
+   ],
+   "id": "7db4cc79-600f-485b-b459-58475f4d763b"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('relu_function{func_num:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            func_num=IntSlider(0,0,2,1))"
-   ]
+   ],
+   "id": "6c7b1ade-bbc5-4cd6-8fd0-ae853df29cc5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gaussian Processes\n",
-    "------------------\n",
+    "## Gaussian Processes\n",
     "\n",
     "Models where we model the entire joint distribution of our training\n",
     "data, $p(\\mathbf{ y}, \\mathbf{X})$ are sometimes described as\n",
@@ -2729,14 +2802,18 @@
     "$$ where the conditioning is on the inputs $\\mathbf{X}$ which are used\n",
     "for computing the mean and covariance. For this reason they are known as\n",
     "mean and covariance functions."
-   ]
+   ],
+   "id": "e3d047e8-2976-4ae0-8a76-8ad19cf4703a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Linear Model Overview\n",
-    "---------------------\n",
+    "## Linear Model Overview\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-model-overview.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/linear-model-overview.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "However, we are focussing on what happens in models which are non-linear\n",
     "in the inputs, whereas the above would be *linear* in the inputs. To\n",
@@ -2780,14 +2857,14 @@
     "k_f\\left(\\mathbf{ x}_i, \\mathbf{ x}_j\\right) = \\alpha \\boldsymbol{ \\phi}\\left(\\mathbf{W}_1, \\mathbf{ x}_i\\right)^\\top \\boldsymbol{ \\phi}\\left(\\mathbf{W}_1, \\mathbf{ x}_j\\right)\n",
     "$$ so the elements of the covariance or *kernel* matrix are formed by\n",
     "inner products of the rows of the *design matrix*."
-   ]
+   ],
+   "id": "6a7047fc-f2b6-4bcc-879c-afe26a0d21d8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gaussian Process\n",
-    "----------------\n",
+    "## Gaussian Process\n",
     "\n",
     "This is the essence of a Gaussian process. Instead of making assumptions\n",
     "about our density over each data point, $y_i$ as i.i.d. we make a joint\n",
@@ -2795,14 +2872,14 @@
     "function of both the parameters of the activation function,\n",
     "$\\mathbf{V}$, and the input variables, $\\mathbf{X}$. This comes about\n",
     "through integrating out the parameters of the model, $\\mathbf{ w}$."
-   ]
+   ],
+   "id": "e099576c-9a9b-4094-8156-13a59199ad60"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Basis Functions\n",
-    "---------------\n",
+    "## Basis Functions\n",
     "\n",
     "We can basically put anything inside the basis functions, and many\n",
     "people do. These can be deep kernels (Cho and Saul, 2009) or we can\n",
@@ -2810,14 +2887,18 @@
     "\n",
     "Viewing a neural network in this way is also what allows us to beform\n",
     "sensible *batch* normalizations (Ioffe and Szegedy, 2015)."
-   ]
+   ],
+   "id": "e96df1ea-bd0c-424f-a0bb-84f1ea1eb200"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Radial Basis Functions\n",
-    "----------------------\n",
+    "## Radial Basis Functions\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_ml/includes/radial-basis.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_ml/includes/radial-basis.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Another type of basis is sometimes known as a ‘radial basis’ because the\n",
     "effect basis functions are constructed on ‘centres’ and the effect of\n",
@@ -2827,7 +2908,8 @@
     "$$\n",
     "\\phi_j(x) = \\exp\\left(-\\frac{(x-\\mu_j)^2}{\\ell^2}\\right)\n",
     "$$"
-   ]
+   ],
+   "id": "175ed08c-d82f-435d-b667-a935eece7011"
   },
   {
    "cell_type": "code",
@@ -2835,8 +2917,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s radial mlai.py"
-   ]
+    "import mlai"
+   ],
+   "id": "41e0691a-99df-4e49-a28c-34d2ba84e9a7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.radial"
+   ],
+   "id": "7c8504e7-079f-49d1-bb40-b113971ad27f"
   },
   {
    "cell_type": "code",
@@ -2845,9 +2938,10 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "import mlai\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot\n",
+    "import mlai"
+   ],
+   "id": "a0650d05-b893-4a3a-bd68-a463b6daaa0e"
   },
   {
    "cell_type": "code",
@@ -2866,17 +2960,19 @@
     "plot.basis(mlai.radial, x_min=-2, x_max=2, \n",
     "           fig=f, ax=ax, loc=loc, text=text,\n",
     "           diagrams='./ml')"
-   ]
+   ],
+   "id": "4db35504-5a63-4ea1-a7c5-7cb579dda244"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/radial_basis002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_basis002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The set of functions which are combined to form the radial\n",
     "basis.</i>"
-   ]
+   ],
+   "id": "0ff3a905-e830-4ccb-9d8c-9d5bd2409925"
   },
   {
    "cell_type": "code",
@@ -2884,9 +2980,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "0b1f7e4a-2aeb-4b99-afe0-77c73788819c"
   },
   {
    "cell_type": "code",
@@ -2894,10 +2991,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('radial_basis{num_basis:0>3}.svg', \n",
+    "nu.display_plots('radial_basis{num_basis:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            num_basis=IntSlider(0,0,2,1))"
-   ]
+   ],
+   "id": "f5023d54-1bb8-4267-911e-631d990c813b"
   },
   {
    "cell_type": "code",
@@ -2905,25 +3003,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_prediction(basis=mlai.radial, num_basis=3)"
-   ]
+    "nu.display_prediction(basis=mlai.radial, num_basis=3)"
+   ],
+   "id": "82864bcb-4652-411f-b78e-8fa468f4e91f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Functions Derived from Radial Basis\n",
-    "-----------------------------------\n",
+    "## Functions Derived from Radial Basis\n",
     "\n",
     "$$\n",
     "f(x) = \\color{red}{w_1 e^{-2(x+1)^2}}  + \\color{magenta}{w_2e^{-2x^2}} + \\color{blue}{w_3 e^{-2(x-1)^2}}\n",
     "$$\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/ml/radial_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_function002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A radial basis is made up of different locally effective\n",
     "functions centered at different points.</i>"
-   ]
+   ],
+   "id": "305d49ef-0245-43a3-adf7-1fe8c8844c6c"
   },
   {
    "cell_type": "code",
@@ -2931,9 +3030,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ipywidgets import IntSlider\n",
-    "import pods"
-   ]
+    "import notutils as nu\n",
+    "from ipywidgets import IntSlider"
+   ],
+   "id": "c6da9895-2b5b-4ab1-9ece-abd19bc2b665"
   },
   {
    "cell_type": "code",
@@ -2941,17 +3041,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('radial_function{func_num:0>3}.svg', \n",
+    "nu.display_plots('radial_function{func_num:0>3}.svg', \n",
     "                            directory='./ml', \n",
     "                            func_num=IntSlider(0,0,2,1))"
-   ]
+   ],
+   "id": "793ec318-77af-4850-9625-ba04eab0e086"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Marginal Likelihood\n",
-    "-------------------\n",
+    "## Marginal Likelihood\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-from-basis-functions.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-from-basis-functions.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "To understand the Gaussian process we’re going to build on our\n",
     "understanding of the marginal likelihood for Bayesian regression. In the\n",
@@ -2964,19 +3068,20 @@
     "of basis function models, where the parameters are sampled from a prior,\n",
     "but move to thinking about sampling from the marginal likelihood\n",
     "directly."
-   ]
+   ],
+   "id": "064c5735-ea6e-4da2-97c1-1b636d8a6887"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Sampling from the Prior\n",
-    "-----------------------\n",
+    "## Sampling from the Prior\n",
     "\n",
     "The first thing we’ll do is to set up the parameters of the model, these\n",
     "include the parameters of the prior, the parameters of the basis\n",
     "functions and the noise level."
-   ]
+   ],
+   "id": "91794807-3b19-4c4a-9a89-150b11494e2a"
   },
   {
    "cell_type": "code",
@@ -2990,7 +3095,8 @@
     "degree = 5\n",
     "# set the noise variance\n",
     "sigma2 = 0.01"
-   ]
+   ],
+   "id": "21bdb0f8-2028-47a5-b7c7-ef5c5d999ee3"
   },
   {
    "cell_type": "markdown",
@@ -3001,7 +3107,8 @@
     "\n",
     "Let’s now compute a range of values to make predictions at, spanning the\n",
     "*new* space of inputs,"
-   ]
+   ],
+   "id": "b9d97e0b-7a58-4cd6-8f6f-ce740084934a"
   },
   {
    "cell_type": "code",
@@ -3010,7 +3117,8 @@
    "outputs": [],
    "source": [
     "import numpy as np"
-   ]
+   ],
+   "id": "da357676-c3a2-4f8c-bff9-488f34dafb8d"
   },
   {
    "cell_type": "code",
@@ -3021,14 +3129,16 @@
     "def polynomial(x, degree, loc, scale):\n",
     "    degrees = np.arange(degree+1)\n",
     "    return ((x-loc)/scale)**degrees"
-   ]
+   ],
+   "id": "5b36fe1a-70e8-4906-8f5c-cc78484013a3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "now let’s build the basis matrices. First we load in the data"
-   ]
+   ],
+   "id": "2b908960-96ae-4b2e-bfb9-d28e44710f63"
   },
   {
    "cell_type": "code",
@@ -3037,7 +3147,8 @@
    "outputs": [],
    "source": [
     "import pods"
-   ]
+   ],
+   "id": "b9ea310f-d5fc-49d4-9365-e790675350c8"
   },
   {
    "cell_type": "code",
@@ -3048,7 +3159,8 @@
     "data = pods.datasets.olympic_marathon_men()\n",
     "x = data['X']\n",
     "y = data['Y']"
-   ]
+   ],
+   "id": "fde53976-318f-4784-a97e-9a5f2e22f52f"
   },
   {
    "cell_type": "code",
@@ -3063,14 +3175,14 @@
     "x_pred = np.linspace(1880, 2030, num_pred_data)[:, np.newaxis] # input locations for predictions\n",
     "Phi_pred = polynomial(x_pred, degree=degree, loc=loc, scale=scale)\n",
     "Phi = polynomial(x, degree=degree, loc=loc, scale=scale)"
-   ]
+   ],
+   "id": "4a7a2559-0c8d-4fcd-8573-bafce7f04a78"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Weight Space View\n",
-    "-----------------\n",
+    "## Weight Space View\n",
     "\n",
     "To generate typical functional predictions from the model, we need a set\n",
     "of model parameters. We assume that the parameters are drawn\n",
@@ -3083,7 +3195,8 @@
     "$\\mathbf{ w}$ using the function `np.random.normal` and combine these\n",
     "parameters with our basis to create some samples of what\n",
     "$f(\\mathbf{ x})$ looks like,"
-   ]
+   ],
+   "id": "6734e0de-ac1c-4752-ac2d-7ef5c66e4a85"
   },
   {
    "cell_type": "code",
@@ -3092,7 +3205,8 @@
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt"
-   ]
+   ],
+   "id": "1f6d777b-8d61-40ed-a050-f1a35bc5f671"
   },
   {
    "cell_type": "code",
@@ -3107,14 +3221,14 @@
     "    w_sample = z_vec*np.sqrt(alpha)\n",
     "    f_sample = Phi_pred@w_sample\n",
     "    plt.plot(x_pred, f_sample)"
-   ]
+   ],
+   "id": "e7309360-f018-4831-983b-94d4ac878dc3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Function Space View\n",
-    "-------------------\n",
+    "## Function Space View\n",
     "\n",
     "The process we have used to generate the samples is a two stage process.\n",
     "To obtain each function, we first generated a sample from the prior, $$\n",
@@ -3149,7 +3263,8 @@
     "\\mathbf{K}= \\alpha\n",
     "\\boldsymbol{ \\Phi}\\boldsymbol{ \\Phi}^\\top.\n",
     "$$"
-   ]
+   ],
+   "id": "f574f765-73bd-4197-9299-21ef469de904"
   },
   {
    "cell_type": "code",
@@ -3158,7 +3273,8 @@
    "outputs": [],
    "source": [
     "K = alpha*Phi_pred@Phi_pred.T"
-   ]
+   ],
+   "id": "1550b7e0-4076-4b40-a540-fd5027e58160"
   },
   {
    "cell_type": "markdown",
@@ -3167,7 +3283,8 @@
     "Now we can use the `np.random.multivariate_normal` command for sampling\n",
     "from a multivariate normal with covariance given by $\\mathbf{K}$ and\n",
     "zero mean,"
-   ]
+   ],
+   "id": "56ae9b60-f9c9-40be-9844-430873ad2778"
   },
   {
    "cell_type": "code",
@@ -3181,13 +3298,14 @@
     "    ax.plot(x_pred.flatten(), f_sample.flatten(), linewidth=2)\n",
     "    \n",
     "mlai.write_figure('gp-sample-basis-function.svg', directory='./kern')"
-   ]
+   ],
+   "id": "3545028e-5ecb-445f-a37f-33c651acf2e1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gp-sample-basis-function.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Samples directly from the covariance function implied by the\n",
     "basis function based covariance,\n",
@@ -3199,7 +3317,8 @@
     "directly we created the covariance for $\\mathbf{ f}$. We can visualise\n",
     "the form of this covaraince in an image in python with a colorbar to\n",
     "show scale."
-   ]
+   ],
+   "id": "caad7d07-8cc9-4469-a181-ceb47b8844be"
   },
   {
    "cell_type": "code",
@@ -3207,9 +3326,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot\n",
+    "import mlai.plot as plot\n",
     "import mlai"
-   ]
+   ],
+   "id": "6e4b8149-ead1-4e3f-8d40-e2b1007db157"
   },
   {
    "cell_type": "code",
@@ -3222,13 +3342,14 @@
     "fig.colorbar(im)\n",
     "\n",
     "mlai.write_figure('basis-covariance-function.svg', directory='./kern')"
-   ]
+   ],
+   "id": "b57b6f77-9cc9-4f83-8a91-97961a5c5591"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/basis-covariance-function.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/basis-covariance-function.svg\" class=\"\" width=\"60%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Covariance of the function implied by the basis set\n",
     "$\\alpha\\boldsymbol{ \\Phi}\\boldsymbol{ \\Phi}^\\top$.</i>\n",
@@ -3247,7 +3368,8 @@
     "\\mathbf{ y}\\sim \\mathcal{N}\\left(\\mathbf{0},\\boldsymbol{ \\Phi}\\boldsymbol{ \\Phi}^\\top +\\sigma^2\\mathbf{I}\\right).\n",
     "$$ Sampling directly from this density gives us the noise corrupted\n",
     "functions,"
-   ]
+   ],
+   "id": "4af4bab1-b66b-49dd-ae22-a605bfc5fec7"
   },
   {
    "cell_type": "code",
@@ -3255,8 +3377,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)"
-   ]
+    "import mlai"
+   ],
+   "id": "f7b9b696-b216-4377-8937-ae59c9065af7"
   },
   {
    "cell_type": "code",
@@ -3264,20 +3387,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
     "K = alpha*Phi_pred@Phi_pred.T + sigma2*np.eye(x_pred.size)\n",
     "for i in range(10):\n",
     "    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)\n",
     "    ax.plot(x_pred.flatten(), y_sample.flatten())\n",
     "    \n",
     "mlai.write_figure('gp-sample-basis-function-plus-noise.svg', \n",
-    "                  './kern')"
-   ]
+    "                  directory='./kern')"
+   ],
+   "id": "16560c95-1970-4dd4-894f-3e121a734896"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gp-sample-basis-function-plus-noise.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-noise.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Samples directly from the covariance function implied by the\n",
     "noise corrupted basis function based covariance,\n",
@@ -3286,7 +3411,8 @@
     "where the effect of our noise term is to roughen the sampled functions,\n",
     "we can also increase the variance of the noise to see a different\n",
     "effect,"
-   ]
+   ],
+   "id": "0c97406b-d72a-41a6-881d-28b72205c739"
   },
   {
    "cell_type": "code",
@@ -3296,7 +3422,8 @@
    "source": [
     "sigma2 = 1.\n",
     "K = alpha*Phi_pred@Phi_pred.T + sigma2*np.eye(x_pred.size)"
-   ]
+   ],
+   "id": "6fdd94a4-6c78-4c2e-950c-caceda54ff92"
   },
   {
    "cell_type": "code",
@@ -3310,26 +3437,31 @@
     "    plt.plot(x_pred.flatten(), y_sample.flatten())\n",
     "    \n",
     "mlai.write_figure('gp-sample-basis-function-plus-large-noise.svg', \n",
-    "                  './kern')"
-   ]
+    "                  directory='./kern')"
+   ],
+   "id": "91426d23-74e1-4083-a441-c9f4c17b7c29"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gp-sample-basis-function-plus-large-noise.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-large-noise.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Samples directly from the covariance function implied by the\n",
     "noise corrupted basis function based covariance,\n",
     "$\\alpha \\boldsymbol{ \\Phi}\\boldsymbol{ \\Phi}^\\top + \\mathbf{I}$.</i>"
-   ]
+   ],
+   "id": "a5d2958b-1174-44cf-8df4-4056fb16fe22"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Non-degenerate Gaussian Processes\n",
-    "---------------------------------\n",
+    "## Non-degenerate Gaussian Processes\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/non-degenerate-gps.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/non-degenerate-gps.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The process described above is degenerate. The covariance function is of\n",
     "rank at most $h$ and since the theoretical amount of data could always\n",
@@ -3363,7 +3495,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/radford-neal.jpg\" clip-path=\"url(#clip3)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/radford-neal.jpg\" clip-path=\"url(#clip3)\"/>\n",
     "\n",
     "</svg>\n",
     "\n",
@@ -3373,7 +3505,7 @@
     "and in considered what would happen if you took the number of hidden\n",
     "nodes, or neurons, to infinity, i.e. $h\\rightarrow \\infty$.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/neal-infinite-priors.png\" style=\"width:80%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//neal-infinite-priors.png\" style=\"width:80%\">\n",
     "\n",
     "Figure: <i>Page 37 of [Radford Neal’s 1994\n",
     "thesis](http://www.cs.toronto.edu/~radford/ftp/thesis.pdf)</i>\n",
@@ -3401,14 +3533,14 @@
     "  \\end{align*}\n",
     "  $$ has finite variance, then the result of taking the number of hidden\n",
     "units to infinity, with appropriate scaling, is also a Gaussian process."
-   ]
+   ],
+   "id": "9bb7f4c3-99c8-436d-92c1-1634b5ff3f77"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Further Reading\n",
-    "---------------\n",
+    "## Further Reading\n",
     "\n",
     "To understand this argument in more detail, I highly recommend reading\n",
     "chapter 2 of Neal’s thesis (Neal, 1994), which remains easy to read and\n",
@@ -3419,14 +3551,18 @@
     "business of machine learning in the 1990s. Radford and David were also\n",
     "pioneers in making their software widely available and publishing\n",
     "material on the web."
-   ]
+   ],
+   "id": "04644992-0802-422d-8f4c-a438d2064cd4"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gaussian Process\n",
-    "----------------\n",
+    "## Gaussian Process\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-function-space.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-function-space.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "In our we sampled from the prior over paraemters. Through the properties\n",
     "of multivariate Gaussian densities this prior over parameters implies a\n",
@@ -3447,7 +3583,18 @@
     "\\left\\Vert\\mathbf{ x}- \\mathbf{ x}^\\prime\\right\\Vert^2 = (\\mathbf{ x}- \\mathbf{ x}^\\prime)^\\top (\\mathbf{ x}- \\mathbf{ x}^\\prime) \n",
     "$$ Let’s build a covariance matrix based on this function. First we\n",
     "define the form of the covariance function,"
-   ]
+   ],
+   "id": "c313df03-8bb8-49ae-85d1-979178deed1d"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "522ab10a-940f-48cd-97a6-0108bdfd9bec"
   },
   {
    "cell_type": "code",
@@ -3455,8 +3602,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s eq_cov mlai.py"
-   ]
+    "%load -n mlai.eq_cov"
+   ],
+   "id": "fe079494-9218-423e-98b9-228a621e64a9"
   },
   {
    "cell_type": "markdown",
@@ -3465,7 +3613,8 @@
     "We can use this to compute *directly* the covariance for $\\mathbf{ f}$\n",
     "at the points given by `x_pred`. Let’s define a new function `K()` which\n",
     "does this,"
-   ]
+   ],
+   "id": "92f826fc-0801-4876-a1a3-e96a330a78a3"
   },
   {
    "cell_type": "code",
@@ -3473,15 +3622,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s Kernel mlai.py"
-   ]
+    "import mlai"
+   ],
+   "id": "9c226529-1be7-417b-b0a9-d113eda16333"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.Kernel"
+   ],
+   "id": "fbf206f2-915a-4f9c-864e-e3229b6767e1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Now we can image the resulting covariance,"
-   ]
+   ],
+   "id": "21e59da4-8b2c-4b23-a861-ce063c081afc"
   },
   {
    "cell_type": "code",
@@ -3491,7 +3652,8 @@
    "source": [
     "kernel = Kernel(function=eq_cov, variance=1., lengthscale=10.)\n",
     "K = kernel.K(x_pred, x_pred)"
-   ]
+   ],
+   "id": "36821f69-6b10-4699-be6b-a56e50170de4"
   },
   {
    "cell_type": "markdown",
@@ -3499,7 +3661,8 @@
    "source": [
     "To visualise the covariance between the points we can use the `imshow`\n",
     "function in matplotlib."
-   ]
+   ],
+   "id": "f23a8b59-1e5a-43aa-af4d-e1ca0c8ad1a3"
   },
   {
    "cell_type": "code",
@@ -3510,14 +3673,16 @@
     "fig, ax = plt.subplots(figsize=(8,8))\n",
     "im = ax.imshow(K, interpolation='none')\n",
     "fig.colorbar(im)"
-   ]
+   ],
+   "id": "784dc75f-30ce-47ce-855c-bc208b1110c3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Finally, we can sample functions from the marginal likelihood."
-   ]
+   ],
+   "id": "cf2fbd3c-749c-47e3-bd69-3234fcd83928"
   },
   {
    "cell_type": "code",
@@ -3525,31 +3690,34 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(figsize(8, 5))\n",
+    "fig, ax = plt.subplots(figsize=(8, 5))\n",
     "for i in range(10):\n",
     "    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)\n",
     "    ax.plot(x_pred.flatten(), y_sample.flatten())"
-   ]
+   ],
+   "id": "237c14ac-8db6-43c1-aba9-f6d0bdb96cbd"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 1\n",
+    "### Exercise 2\n",
     "\n",
     "**Moving Parameters** Have a play with the parameters for this\n",
     "covariance function (the lengthscale and the variance) and see what\n",
     "effects the parameters have on the types of functions you observe."
-   ]
+   ],
+   "id": "4d027376-4790-4411-acff-b9a5376b7816"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Exercise 1 Answer\n",
+    "### Exercise 2 Answer\n",
     "\n",
-    "Write your answer to Exercise 1 here"
-   ]
+    "Write your answer to Exercise 2 here"
+   ],
+   "id": "ee0a1e09-c608-4803-ad59-74f947169058"
   },
   {
    "cell_type": "code",
@@ -3559,29 +3727,32 @@
    "source": [
     "# Use this box for any code you need\n",
     "\n"
-   ]
+   ],
+   "id": "0ffde6e8-e4eb-47d0-9a5c-0860a9e69c6d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Bayesian Inference by Rejection Sampling\n",
-    "----------------------------------------\n",
+    "## Bayesian Inference by Rejection Sampling\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-intro-very-short.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-intro-very-short.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "One view of Bayesian inference is to assume we are given a mechanism for\n",
-    "generating samples, where we assume that mechanism is representing on\n",
+    "generating samples, where we assume that mechanism is representing an\n",
     "accurate view on the way we believe the world works.\n",
     "\n",
     "This mechanism is known as our *prior* belief.\n",
     "\n",
     "We combine our prior belief with our observations of the real world by\n",
-    "discarding all those samples that are inconsistent with our prior. The\n",
-    "*likelihood* defines mathematically what we mean by inconsistent with\n",
-    "the prior. The higher the noise level in the likelihood, the looser the\n",
-    "notion of consistent.\n",
+    "discarding all those prior samples that are inconsistent with our\n",
+    "observations. The *likelihood* defines mathematically what we mean by\n",
+    "inconsistent with the observations. The higher the noise level in the\n",
+    "likelihood, the looser the notion of consistent.\n",
     "\n",
-    "The samples that remain are considered to be samples from the\n",
-    "*posterior*.\n",
+    "The samples that remain are samples from the *posterior*.\n",
     "\n",
     "This approach to Bayesian inference is closely related to two sampling\n",
     "techniques known as *rejection sampling* and *importance sampling*. It\n",
@@ -3589,8 +3760,8 @@
     "computation* (ABC) or likelihood-free inference.\n",
     "\n",
     "In practice, the algorithm is often too slow to be practical, because\n",
-    "most samples will be inconsistent with the data and as a result the\n",
-    "mechanism has to be operated many times to obtain a few posterior\n",
+    "most samples will be inconsistent with the observations and as a result\n",
+    "the mechanism must be operated many times to obtain a few posterior\n",
     "samples.\n",
     "\n",
     "However, in the Gaussian process case, when the likelihood also assumes\n",
@@ -3598,9 +3769,85 @@
     "the posterior density *analytically*. This is the benefit of Gaussian\n",
     "processes.\n",
     "\n",
-    "First we will load in two python functions for computing the covariance\n",
+    "First, we will load in two python functions for computing the covariance\n",
     "function."
-   ]
+   ],
+   "id": "d6ea6441-8154-4ee7-b020-47c4630dbe52"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "60ebcb7c-5b74-42a3-baeb-891a8dd11f11"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.Kernel"
+   ],
+   "id": "bd19b06f-8839-4fde-80e0-f29971d40bff"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %load -n mlai.Kernel\n",
+    "class Kernel():\n",
+    "    \"\"\"Covariance function\n",
+    "    :param function: covariance function\n",
+    "    :type function: function\n",
+    "    :param name: name of covariance function\n",
+    "    :type name: string\n",
+    "    :param shortname: abbreviated name of covariance function\n",
+    "    :type shortname: string\n",
+    "    :param formula: latex formula of covariance function\n",
+    "    :type formula: string\n",
+    "    :param function: covariance function\n",
+    "    :type function: function\n",
+    "    :param \\**kwargs:\n",
+    "        See below\n",
+    "\n",
+    "    :Keyword Arguments:\n",
+    "        * \"\"\"\n",
+    "\n",
+    "    def __init__(self, function, name=None, shortname=None, formula=None, **kwargs):        \n",
+    "        self.function=function\n",
+    "        self.formula = formula\n",
+    "        self.name = name\n",
+    "        self.shortname = shortname\n",
+    "        self.parameters=kwargs\n",
+    "        \n",
+    "    def K(self, X, X2=None):\n",
+    "        \"\"\"Compute the full covariance function given a kernel function for two data points.\"\"\"\n",
+    "        if X2 is None:\n",
+    "            X2 = X\n",
+    "        K = np.zeros((X.shape[0], X2.shape[0]))\n",
+    "        for i in np.arange(X.shape[0]):\n",
+    "            for j in np.arange(X2.shape[0]):\n",
+    "                K[i, j] = self.function(X[i, :], X2[j, :], **self.parameters)\n",
+    "\n",
+    "        return K\n",
+    "\n",
+    "    def diag(self, X):\n",
+    "        \"\"\"Compute the diagonal of the covariance function\"\"\"\n",
+    "        diagK = np.zeros((X.shape[0], 1))\n",
+    "        for i in range(X.shape[0]):            \n",
+    "            diagK[i] = self.function(X[i, :], X[i, :], **self.parameters)\n",
+    "        return diagK\n",
+    "\n",
+    "    def _repr_html_(self):\n",
+    "        raise NotImplementedError"
+   ],
+   "id": "e1051008-92ac-4d12-99c9-f78d12ae0045"
   },
   {
    "cell_type": "code",
@@ -3608,8 +3855,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s Kernel mlai.py"
-   ]
+    "import mlai"
+   ],
+   "id": "cff9d5fd-888d-458f-8b36-8b9cc5ce5020"
   },
   {
    "cell_type": "code",
@@ -3617,8 +3865,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s eq_cov mlai.py"
-   ]
+    "%load -n mlai.eq_cov"
+   ],
+   "id": "6ca4aa43-ba18-46a3-aa24-7aeb889ace4e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %load -n mlai.eq_cov\n",
+    "def eq_cov(x, x_prime, variance=1., lengthscale=1.):\n",
+    "    \"\"\"Exponentiated quadratic covariance function.\"\"\"\n",
+    "    diffx = x - x_prime\n",
+    "    return variance*np.exp(-0.5*np.dot(diffx, diffx)/lengthscale**2)"
+   ],
+   "id": "66122e8c-2e72-4acf-b408-4894073e345e"
   },
   {
    "cell_type": "code",
@@ -3630,15 +3893,17 @@
     "                     name='Exponentiated Quadratic',\n",
     "                     shortname='eq',                     \n",
     "                     lengthscale=0.25)"
-   ]
+   ],
+   "id": "86159041-4311-4626-884e-b2f4fc70c241"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next we sample from a multivariate normal density (a multivariate\n",
+    "Next, we sample from a multivariate normal density (a multivariate\n",
     "Gaussian), using the covariance function as the covariance matrix."
-   ]
+   ],
+   "id": "d2626cb4-ca61-4c62-8430-06ce49536dcb"
   },
   {
    "cell_type": "code",
@@ -3648,8 +3913,9 @@
    "source": [
     "import numpy as np\n",
     "np.random.seed(10)\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "c8593253-cbe1-4d3e-9a16-7a721b464e33"
   },
   {
    "cell_type": "code",
@@ -3659,7 +3925,8 @@
    "source": [
     "plot.rejection_samples(kernel=kernel, \n",
     "    diagrams='./gp')"
-   ]
+   ],
+   "id": "5ef941ef-3c1c-4ba8-88dd-2e8a7485aeb9"
   },
   {
    "cell_type": "code",
@@ -3667,9 +3934,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "a6242abe-307d-42bc-886e-77fbd8dcd4c1"
   },
   {
    "cell_type": "code",
@@ -3677,18 +3945,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('gp_rejection_sample{sample:0>3}.png', \n",
-    "                            directory='./gp', \n",
-    "                            sample=IntSlider(1,1,5,1))"
-   ]
+    "nu.display_plots('gp_rejection_sample{sample:0>3}.png', \n",
+    "                 directory='./gp', \n",
+    "                 sample=IntSlider(1,1,5,1))"
+   ],
+   "id": "8d6ce516-d6ac-43b3-8478-f5427458267e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp_rejection_sample003.png\" style=\"width:100%\">\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp_rejection_sample004.png\" style=\"width:100%\">\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp_rejection_sample005.png\" style=\"width:100%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample003.png\" style=\"width:100%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample004.png\" style=\"width:100%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample005.png\" style=\"width:100%\">\n",
     "\n",
     "Figure: <i>One view of Bayesian inference is we have a machine for\n",
     "generating samples (the *prior*), and we discard all samples\n",
@@ -3696,14 +3965,14 @@
     "*posterior*). This is a rejection sampling view of Bayesian inference.\n",
     "The Gaussian process allows us to do this analytically by multiplying\n",
     "the *prior* by the *likelihood*.</i>"
-   ]
+   ],
+   "id": "5008b936-0c06-4a1d-8b3a-d8a286e18eae"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gaussian Process\n",
-    "----------------\n",
+    "## Gaussian Process\n",
     "\n",
     "The Gaussian process perspective takes the marginal likelihood of the\n",
     "data to be a joint Gaussian density with a covariance given by\n",
@@ -3724,8 +3993,306 @@
     "$$ where the *parameters* of the model are also embedded in the\n",
     "covariance function, they include the parameters of the kernel (such as\n",
     "lengthscale and variance), and the noise variance, $\\sigma^2$. Let’s\n",
-    "create a class in python for storing these variables."
-   ]
+    "create a set of classes in python for storing these variables."
+   ],
+   "id": "2e8caddd-f6ba-45cf-b66f-ede002918ebc"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "166b2d4a-2a12-4c64-82f8-bf8af60fb036"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.Model"
+   ],
+   "id": "7a8cff25-ffdf-4615-bd38-c22a5a1caf89"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "c3dcbe99-5b9b-4426-ad51-38c3367d48f0"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.MapModel"
+   ],
+   "id": "fa593473-6261-4b35-a7fb-f2ce796fda03"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "c02b16e4-8b5a-4339-a22d-836ef1fff404"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.ProbModel"
+   ],
+   "id": "76575cb6-80bc-4ccb-9ec5-79e8ca909125"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "950fdcc4-18b5-4c2f-8375-56ee64b82f39"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.ProbMapModel"
+   ],
+   "id": "6b60a783-bcae-436c-9e26-c9b3f1342884"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "12f0a510-a3a3-4799-98b6-82b6654f1009"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.GP"
+   ],
+   "id": "1b3490e4-ba5b-4a8a-81c9-d6d276db9699"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Making Predictions\n",
+    "\n",
+    "We now have a probability density that represents functions. How do we\n",
+    "make predictions with this density? The density is known as a process\n",
+    "because it is *consistent*. By consistency, here, we mean that the model\n",
+    "makes predictions for $\\mathbf{ f}$ that are unaffected by future values\n",
+    "of $\\mathbf{ f}^*$ that are currently unobserved (such as test points).\n",
+    "If we think of $\\mathbf{ f}^*$ as test points, we can still write down a\n",
+    "joint probability density over the training observations, $\\mathbf{ f}$\n",
+    "and the test observations, $\\mathbf{ f}^*$. This joint probability\n",
+    "density will be Gaussian, with a covariance matrix given by our\n",
+    "covariance function, $k(\\mathbf{ x}_i, \\mathbf{ x}_j)$. $$\n",
+    "\\begin{bmatrix}\\mathbf{ f}\\\\ \\mathbf{ f}^*\\end{bmatrix} \\sim \\mathcal{N}\\left(\\mathbf{0},\\begin{bmatrix} \\mathbf{K}& \\mathbf{K}_\\ast \\\\\n",
+    "\\mathbf{K}_\\ast^\\top & \\mathbf{K}_{\\ast,\\ast}\\end{bmatrix}\\right)\n",
+    "$$ where here $\\mathbf{K}$ is the covariance computed between all the\n",
+    "training points, $\\mathbf{K}_\\ast$ is the covariance matrix computed\n",
+    "between the training points and the test points and\n",
+    "$\\mathbf{K}_{\\ast,\\ast}$ is the covariance matrix computed betwen all\n",
+    "the tests points and themselves. To be clear, let’s compute these now\n",
+    "for our example, using `x` and `y` for the training data (although `y`\n",
+    "doesn’t enter the covariance) and `x_pred` as the test locations."
+   ],
+   "id": "1f85d452-7ddf-44fa-8afd-a9b0b770dafd"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set covariance function parameters\n",
+    "variance = 16.0\n",
+    "lengthscale = 8\n",
+    "# set noise variance\n",
+    "sigma2 = 0.05\n",
+    "\n",
+    "kernel = Kernel(eq_cov, variance=variance, lengthscale=lengthscale)\n",
+    "K = kernel.K(x, x)\n",
+    "K_star = kernel.K(x, x_pred)\n",
+    "K_starstar = kernel.K(x_pred, x_pred)"
+   ],
+   "id": "5a2c1b18-d5b5-48b0-b7c6-a9a5f798c3dd"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we use this structure to visualise the covariance between test data\n",
+    "and training data. This structure is how information is passed between\n",
+    "test and training data. Unlike the maximum likelihood formalisms we’ve\n",
+    "been considering so far, the structure expresses *correlation* between\n",
+    "our different data points. However, just like the we now have a *joint\n",
+    "density* between some variables of interest. In particular we have the\n",
+    "joint density over $p(\\mathbf{ f}, \\mathbf{ f}^*)$. The joint density is\n",
+    "*Gaussian* and *zero mean*. It is specified entirely by the *covariance\n",
+    "matrix*, $\\mathbf{K}$. That covariance matrix is, in turn, defined by a\n",
+    "covariance function. Now we will visualise the form of that covariance\n",
+    "in the form of the matrix, $$\n",
+    "\\begin{bmatrix} \\mathbf{K}& \\mathbf{K}_\\ast \\\\ \\mathbf{K}_\\ast^\\top\n",
+    "& \\mathbf{K}_{\\ast,\\ast}\\end{bmatrix}\n",
+    "$$"
+   ],
+   "id": "b6a4cd35-bac1-4488-8a10-eb9067e48774"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "dd96c366-8765-4944-99a7-0f7e7148bf57"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax = plt.subplots(figsize=(8,8))\n",
+    "im = ax.imshow(np.vstack([np.hstack([K, K_star]), np.hstack([K_star.T, K_starstar])]), interpolation='none')\n",
+    "# Add lines for separating training and test data\n",
+    "ax.axvline(x.shape[0]-1, color='w')\n",
+    "ax.axhline(x.shape[0]-1, color='w')\n",
+    "fig.colorbar(im)\n",
+    "\n",
+    "mlai.write_figure('block-predictive-covariance.svg', diagrams='./gp')"
+   ],
+   "id": "bf767349-49ba-4fbb-9d15-03926329f2dc"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/block-predictive-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>Different blocks of the covariance function. The upper left\n",
+    "block is the covariance of the training data with itself, $\\mathbf{K}$.\n",
+    "The top right is the cross covariance between training data (rows) and\n",
+    "prediction locations (columns). The lower left is the same matrix\n",
+    "transposed. The bottom right is the covariance matrix of the test data\n",
+    "with itself.</i>\n",
+    "\n",
+    "There are four blocks to this plot. The upper left block is the\n",
+    "covariance of the training data with itself, $\\mathbf{K}$. We see some\n",
+    "structure here due to the missing data from the first and second world\n",
+    "wars. Alongside this covariance (to the right and below) we see the\n",
+    "cross covariance between the training and the test data ($\\mathbf{K}_*$\n",
+    "and $\\mathbf{K}_*^\\top$). This is giving us the covariation between our\n",
+    "training and our test data. Finally the lower right block The banded\n",
+    "structure we now observe is because some of the training points are near\n",
+    "to some of the test points. This is how we obtain ‘communication’\n",
+    "between our training data and our test data. If there is no structure in\n",
+    "$\\mathbf{K}_*$ then our belief about the test data simply matches our\n",
+    "prior."
+   ],
+   "id": "497b74dd-9fe5-4f85-9ffa-23d222f51b9b"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prediction Across Two Points with GPs\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gptwopointpred.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gptwopointpred.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "d08a1884-8687-490c-b743-e78643295cfc"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "np.random.seed(4949)"
+   ],
+   "id": "95d87cfd-1a20-4cbf-9a52-db92c364a6a5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai.plot as plot\n",
+    "import pods"
+   ],
+   "id": "daec981c-de8a-42f3-ad68-d3f19f376958"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "np.random.seed(4949)"
+   ],
+   "id": "5e3116cf-aa55-47bc-a545-e384553afac7"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sampling a Function\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpdistfunc.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpdistfunc.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
+    "\n",
+    "We will consider a Gaussian distribution with a particular structure of\n",
+    "covariance matrix. We will generate *one* sample from a 25-dimensional\n",
+    "Gaussian density. $$\n",
+    "\\mathbf{ f}=\\left[f_{1},f_{2}\\dots f_{25}\\right].\n",
+    "$$ in the figure below we plot these data on the $y$-axis against their\n",
+    "*indices* on the $x$-axis."
+   ],
+   "id": "42398f0a-6904-4e4b-a333-7dca89305fb6"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "d2fc5f51-5998-4d3f-9aa1-7d4cc3481acf"
   },
   {
    "cell_type": "code",
@@ -3733,36 +4300,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s GP mlai.py"
-   ]
+    "%load -n mlai.Kernel"
+   ],
+   "id": "0b421395-4eb7-41ea-9f56-fa9e83c4d1da"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Making Predictions\n",
-    "------------------\n",
-    "\n",
-    "We now have a probability density that represents functions. How do we\n",
-    "make predictions with this density? The density is known as a process\n",
-    "because it is *consistent*. By consistency, here, we mean that the model\n",
-    "makes predictions for $\\mathbf{ f}$ that are unaffected by future values\n",
-    "of $\\mathbf{ f}^*$ that are currently unobserved (such as test points).\n",
-    "If we think of $\\mathbf{ f}^*$ as test points, we can still write down a\n",
-    "joint probability density over the training observations, $\\mathbf{ f}$\n",
-    "and the test observations, $\\mathbf{ f}^*$. This joint probability\n",
-    "density will be Gaussian, with a covariance matrix given by our\n",
-    "covariance function, $k(\\mathbf{ x}_i, \\mathbf{ x}_j)$. $$\n",
-    "\\begin{bmatrix}\\mathbf{ f}\\\\ \\mathbf{ f}^*\\end{bmatrix} \\sim \\mathcal{N}\\left(\\mathbf{0},\\begin{bmatrix} \\mathbf{K}& \\mathbf{K}_\\ast \\\\\n",
-    "\\mathbf{K}_\\ast^\\top & \\mathbf{K}_{\\ast,\\ast}\\end{bmatrix}\\right)\n",
-    "$$ where here $\\mathbf{K}$ is the covariance computed between all the\n",
-    "training points, $\\mathbf{K}_\\ast$ is the covariance matrix computed\n",
-    "between the training points and the test points and\n",
-    "$\\mathbf{K}_{\\ast,\\ast}$ is the covariance matrix computed betwen all\n",
-    "the tests points and themselves. To be clear, let’s compute these now\n",
-    "for our example, using `x` and `y` for the training data (although `y`\n",
-    "doesn’t enter the covariance) and `x_pred` as the test locations."
-   ]
+    "import mlai"
+   ],
+   "id": "830642ba-a19b-4f4e-90b0-8525653b6460"
   },
   {
    "cell_type": "code",
@@ -3770,37 +4320,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# set covariance function parameters\n",
-    "variance = 16.0\n",
-    "lengthscale = 8\n",
-    "# set noise variance\n",
-    "sigma2 = 0.05\n",
-    "\n",
-    "kernel = Kernel(eq_cov, variance=variance, lengthscale=lengthscale)\n",
-    "K = kernel.K(x, x)\n",
-    "K_star = kernel.K(x, x_pred)\n",
-    "K_starstar = kernel.K(x_pred, x_pred)"
-   ]
+    "%load -n mlai.polynomial_cov"
+   ],
+   "id": "c5b4f961-0fcf-4308-8c68-e8f3c0d6a837"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Now we use this structure to visualise the covariance between test data\n",
-    "and training data. This structure is how information is passed between\n",
-    "test and training data. Unlike the maximum likelihood formalisms we’ve\n",
-    "been considering so far, the structure expresses *correlation* between\n",
-    "our different data points. However, just like the we now have a *joint\n",
-    "density* between some variables of interest. In particular we have the\n",
-    "joint density over $p(\\mathbf{ f}, \\mathbf{ f}^*)$. The joint density is\n",
-    "*Gaussian* and *zero mean*. It is specified entirely by the *covariance\n",
-    "matrix*, $\\mathbf{K}$. That covariance matrix is, in turn, defined by a\n",
-    "covariance function. Now we will visualise the form of that covariance\n",
-    "in the form of the matrix, $$\n",
-    "\\begin{bmatrix} \\mathbf{K}& \\mathbf{K}_\\ast \\\\ \\mathbf{K}_\\ast^\\top\n",
-    "& \\mathbf{K}_{\\ast,\\ast}\\end{bmatrix}\n",
-    "$$"
-   ]
+    "import mlai"
+   ],
+   "id": "623aef44-3e02-44cc-9835-3f60f24983e9"
   },
   {
    "cell_type": "code",
@@ -3808,39 +4340,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots(figsize=(8,8))\n",
-    "im = ax.imshow(np.vstack([np.hstack([K, K_star]), np.hstack([K_star.T, K_starstar])]), interpolation='none')\n",
-    "# Add lines for separating training and test data\n",
-    "ax.axvline(x.shape[0]-1, color='w')\n",
-    "ax.axhline(x.shape[0]-1, color='w')\n",
-    "fig.colorbar(im)"
-   ]
+    "%load -n mlai.exponentiated_quadratic"
+   ],
+   "id": "835a8656-454f-4f86-b2ba-35c594caf813"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "There are four blocks to this color plot. The upper left block is the\n",
-    "covariance of the training data with itself, $\\mathbf{K}$. We see some\n",
-    "structure here due to the missing data from the first and second world\n",
-    "wars. Alongside this covariance (to the right and below) we see the\n",
-    "cross covariance between the training and the test data ($\\mathbf{K}_*$\n",
-    "and $\\mathbf{K}_*^\\top$). This is giving us the covariation between our\n",
-    "training and our test data. Finally the lower right block The banded\n",
-    "structure we now observe is because some of the training points are near\n",
-    "to some of the test points. This is how we obtain ‘communication’\n",
-    "between our training data and our test data. If there is no structure in\n",
-    "$\\mathbf{K}_*$ then our belief about the test data simply matches our\n",
-    "prior."
-   ]
+    "import mlai.plot as plot\n",
+    "from mlai import Kernel, exponentiated_quadratic"
+   ],
+   "id": "c24a4c7f-b1ac-4448-b61b-1635a57b64a9"
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Prediction Across Two Points with GPs\n",
-    "-------------------------------------"
-   ]
+    "kernel=Kernel(function=exponentiated_quadratic, lengthscale=0.5)\n",
+    "plot.two_point_sample(kernel.K, diagrams='./gp')"
+   ],
+   "id": "f28381e5-fb7d-4d50-8a57-a5dd1d48cf22"
   },
   {
    "cell_type": "code",
@@ -3848,9 +4372,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "np.random.seed(4949)"
-   ]
+    "import notutils as nu\n",
+    "from ipywidgets import IntSlider"
+   ],
+   "id": "b62ab1e2-cadf-4dc8-88c3-1c86b935cff0"
   },
   {
    "cell_type": "code",
@@ -3858,16 +4383,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot\n",
-    "import pods"
-   ]
+    "import notutils as nu"
+   ],
+   "id": "c8abd0c4-76f3-4976-9c34-34541251deba"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('two_point_sample{sample:0>3}.svg', './gp', sample=IntSlider(0, 0, 8, 1))"
+   ],
+   "id": "804faf46-d7ba-4e5a-b1f7-e4f94fad3407"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Sampling a Function from a Gaussian"
-   ]
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample008.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "\n",
+    "Figure: <i>A 25 dimensional correlated random variable (values ploted\n",
+    "against index)</i>"
+   ],
+   "id": "5d09dbf2-a8aa-4055-b3d3-0b8230b605dc"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sampling a Function from a Gaussian\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-two.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-two.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "1d7d8c51-8dba-4ff4-af07-a31540562799"
   },
   {
    "cell_type": "code",
@@ -3875,9 +4426,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "92a804ff-5f53-4896-9d47-24e8ba96233c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "7f7644dd-afa8-4e44-bbae-e225b76b08fc"
   },
   {
    "cell_type": "code",
@@ -3885,27 +4447,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', \n",
+    "nu.display_plots('two_point_sample{sample:0>3}.svg', \n",
     "                            './gp', \n",
     "                            sample=IntSlider(0, 0, 8, 1))"
-   ]
+   ],
+   "id": "180438e6-86bd-4079-bbd2-e77701f5ed70"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/two_point_sample001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample001.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The joint Gaussian over $f_1$ and $f_2$ along with the\n",
     "conditional distribution of $f_2$ given $f_1$</i>"
-   ]
+   ],
+   "id": "b444945e-5697-4c80-83c3-e63df5fc0b6f"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "### Joint Density of $f_1$ and $f_2$"
-   ]
+   ],
+   "id": "62e15bb9-3640-412e-9859-4934db1c77f9"
   },
   {
    "cell_type": "code",
@@ -3913,9 +4478,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "07f0e633-fcad-45b8-bdda-c50da4bd580c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "6aa1f9d0-82a3-4e2c-ace5-2832ec67a824"
   },
   {
    "cell_type": "code",
@@ -3923,46 +4499,91 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', \n",
+    "nu.display_plots('two_point_sample{sample:0>3}.svg', \n",
     "                            './gp', \n",
     "                            sample=IntSlider(9, 9, 12, 1))"
-   ]
+   ],
+   "id": "c4c2744c-fa03-4d35-bead-c98b36434f26"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/two_point_sample012.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample012.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The joint Gaussian over $f_1$ and $f_2$ along with the\n",
-    "conditional distribution of $f_2$ given $f_1$</i>\n",
+    "conditional distribution of $f_2$ given $f_1$</i>"
+   ],
+   "id": "b25633e9-68b3-4f55-be5d-b00907ab0a60"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Uluru\n",
     "\n",
-    "-   The single contour of the Gaussian density represents the\n",
-    "    <font color=\"red\">joint distribution, $p(f_1, f_2)$</font>\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/799px-Uluru_Panorama.jpg\" style=\"width:\">\n",
     "\n",
-    ". . .\n",
+    "Figure: <i>Uluru, the sacred rock in Australia. If we think of it as a\n",
+    "probability density, viewing it from this side gives us one *marginal*\n",
+    "from the density. Figuratively speaking, slicing through the rock would\n",
+    "give a conditional density.</i>\n",
     "\n",
-    "-   We observe that <font color=\"green\">$f_1=?$</font>\n",
+    "When viewing these contour plots, I sometimes find it helpful to think\n",
+    "of Uluru, the prominent rock formation in Australia. The rock rises\n",
+    "above the surface of the plane, just like a probability density rising\n",
+    "above the zero line. The rock is three dimensional, but when we view\n",
+    "Uluru from the classical position, we are looking at one side of it.\n",
+    "This is equivalent to viewing the marginal density.\n",
     "\n",
-    ". . .\n",
+    "The joint density can be viewed from above, using contours. The\n",
+    "conditional density is equivalent to *slicing* the rock. Uluru is a holy\n",
+    "rock, so this has to be an imaginary slice. Imagine we cut down a\n",
+    "vertical plane orthogonal to our view point (e.g. coming across our view\n",
+    "point). This would give a profile of the rock, which when renormalized,\n",
+    "would give us the conditional distribution, the value of conditioning\n",
+    "would be the location of the slice in the direction we are facing."
+   ],
+   "id": "c4af1e14-07ba-4cd6-83b4-9cb20f2a516f"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prediction with Correlated Gaussians\n",
     "\n",
-    "-   Conditional density: <font color=\"red\">$p(f_2|f_1=?)$</font>\n",
+    "Of course in practice, rather than manipulating mountains physically,\n",
+    "the advantage of the Gaussian density is that we can perform these\n",
+    "manipulations mathematically.\n",
     "\n",
-    "-   Prediction of $f_2$ from $f_1$ requires *conditional density*.\n",
+    "Prediction of $f_2$ given $f_1$ requires the *conditional density*,\n",
+    "$p(f_2|f_1)$.Another remarkable property of the Gaussian density is that\n",
+    "this conditional distribution is *also* guaranteed to be a Gaussian\n",
+    "density. It has the form, $$\n",
+    "p(f_2|f_1) = \\mathcal{N}\\left(f_2|\\frac{k_{1, 2}}{k_{1, 1}}f_1, k_{2, 2} - \\frac{k_{1,2}^2}{k_{1,1}}\\right)\n",
+    "$$where we have assumed that the covariance of the original joint\n",
+    "density was given by $$\n",
+    "\\mathbf{K}= \\begin{bmatrix} k_{1, 1} & k_{1, 2}\\\\ k_{2, 1} & k_{2, 2}.\\end{bmatrix}\n",
+    "$$\n",
     "\n",
-    "-   Conditional density is *also* Gaussian. $$\n",
-    "    p(f_2|f_1) = {\\mathcal{N}\\left(f_2|\\frac{k_{1, 2}}{k_{1, 1}}f_1,k_{2, 2} - \\frac{k_{1,2}^2}{k_{1,1}}\\right)}\n",
-    "    $$ where covariance of joint density is given by $$\n",
-    "    \\mathbf{K}= \\begin{bmatrix} k_{1, 1} & k_{1, 2}\\\\ k_{2, 1} & k_{2, 2}\\end{bmatrix}\n",
-    "    $$"
-   ]
+    "Using these formulae we can determine the conditional density for any of\n",
+    "the elements of our vector $\\mathbf{ f}$. For example, the variable\n",
+    "$f_8$ is less correlated with $f_1$ than $f_2$. If we consider this\n",
+    "variable we see the conditional density is more diffuse."
+   ],
+   "id": "7c52b7a2-8f94-4b5b-81a2-486878edb080"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Joint Density of $f_1$ and $f_8$"
-   ]
+    "### Joint Density of $f_1$ and $f_8$\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-eight.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gaussian-predict-index-one-and-eight.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "1b28600d-182b-435e-8d7f-99e7d8390b34"
   },
   {
    "cell_type": "code",
@@ -3970,9 +4591,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pods\n",
+    "import notutils as nu\n",
     "from ipywidgets import IntSlider"
-   ]
+   ],
+   "id": "5b507bf9-e0b9-427c-8fc2-3da44d9e306e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import notutils as nu"
+   ],
+   "id": "3c84e2c9-722e-411c-b0cb-aede98bd492b"
   },
   {
    "cell_type": "code",
@@ -3980,20 +4612,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', \n",
+    "nu.display_plots('two_point_sample{sample:0>3}.svg', \n",
     "                            './gp', \n",
     "                            sample=IntSlider(13, 13, 17, 1))"
-   ]
+   ],
+   "id": "61ac85c7-92b3-4515-a63d-5c850bf6422a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/two_point_sample013.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample013.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Sample from the joint Gaussian model, points indexed by 1 and\n",
     "8 highlighted.</i>"
-   ]
+   ],
+   "id": "43d7b1e6-a0bd-4e6a-abcd-e8397dad3341"
   },
   {
    "cell_type": "markdown",
@@ -4001,7 +4635,7 @@
    "source": [
     "### Prediction of $f_{8}$ from $f_{1}$\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/two_point_sample017.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample017.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The joint Gaussian over $f_1$ and $f_8$ along with the\n",
     "conditional distribution of $f_8$ given $f_1$</i>\n",
@@ -4015,7 +4649,7 @@
     "\n",
     ". . .\n",
     "\n",
-    "-   Conditional density: <font color=\"red\">$p(f_5|f_1=?)$</font>.\n",
+    "-   Conditional density: <font color=\"red\">$p(f_8|f_1=?)$</font>.\n",
     "\n",
     "-   Prediction of $\\mathbf{ f}_*$ from $\\mathbf{ f}$ requires\n",
     "    multivariate *conditional density*.\n",
@@ -4042,14 +4676,18 @@
     "-   Here covariance of joint density is given by $$\n",
     "    \\mathbf{K}= \\begin{bmatrix} \\mathbf{K}_{\\mathbf{ f}, \\mathbf{ f}} & \\mathbf{K}_{*, \\mathbf{ f}}\\\\ \\mathbf{K}_{\\mathbf{ f}, *} & \\mathbf{K}_{*, *}\\end{bmatrix}\n",
     "    $$"
-   ]
+   ],
+   "id": "416c5359-c2a5-439c-a34f-62d117cfa00c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The Importance of the Covariance Function\n",
-    "-----------------------------------------\n",
+    "## The Importance of the Covariance Function\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-covariance-function-importance.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-covariance-function-importance.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The covariance function encapsulates our assumptions about the data. The\n",
     "equations for the distribution of the prediction function, given the\n",
@@ -4068,7 +4706,8 @@
     "process. It has a computational burden which is $O(n^3)$ and a storage\n",
     "burden which is $O(n^2)$. This makes working with Gaussian processes\n",
     "computationally intensive for the situation where $n>10,000$."
-   ]
+   ],
+   "id": "89df4d0c-0a38-4306-841e-b70996ba3440"
   },
   {
    "cell_type": "code",
@@ -4078,7 +4717,8 @@
    "source": [
     "from IPython.lib.display import YouTubeVideo\n",
     "YouTubeVideo('ewJ3AxKclOg')"
-   ]
+   ],
+   "id": "8ef48b7a-089e-45a8-8178-c92a732b5e75"
   },
   {
    "cell_type": "markdown",
@@ -4087,20 +4727,35 @@
     "Figure: <i>Introduction to Gaussian processes given by Neil Lawrence at\n",
     "the 2014 Gaussian process Winter School at the University of\n",
     "Sheffield.</i>"
-   ]
+   ],
+   "id": "98f96a6f-0bf5-491d-bd64-5c6d30eb31aa"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Improving the Numerics\n",
-    "----------------------\n",
+    "## Improving the Numerics\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-numerics-and-optimization.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-numerics-and-optimization.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "In practice we shouldn’t be using matrix inverse directly to solve the\n",
     "GP system. One more stable way is to compute the *Cholesky\n",
     "decomposition* of the kernel matrix. The log determinant of the\n",
     "covariance can also be derived from the Cholesky decomposition."
-   ]
+   ],
+   "id": "02c9ed7d-61a5-46ed-bb76-d2e8bc2b5b2a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "ff45a144-502e-4f3f-9a5a-510908cd4c3b"
   },
   {
    "cell_type": "code",
@@ -4108,8 +4763,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s update_inverse mlai.py"
-   ]
+    "%load -n mlai.update_inverse"
+   ],
+   "id": "03368356-7098-4a79-a7ca-6ad545ebee81"
   },
   {
    "cell_type": "code",
@@ -4118,14 +4774,14 @@
    "outputs": [],
    "source": [
     "GP.update_inverse = update_inverse"
-   ]
+   ],
+   "id": "db2cc71d-e41c-422d-99a0-aff51491bdff"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Capacity Control\n",
-    "----------------\n",
+    "## Capacity Control\n",
     "\n",
     "Gaussian processes are sometimes seen as part of a wider family of\n",
     "methods known as kernel methods. Kernel methods are also based around\n",
@@ -4150,46 +4806,47 @@
     "parameter estimation (in the simplest case proceeds) by maximum\n",
     "likelihood. This involves taking gradients of the likelihood with\n",
     "respect to the parameters of the covariance function."
-   ]
+   ],
+   "id": "71022a58-f314-43b6-94ef-dba48b694fb3"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Gradients of the Likelihood\n",
-    "---------------------------\n",
+    "## Gradients of the Likelihood\n",
     "\n",
     "The easiest conceptual way to obtain the gradients is a two step\n",
     "process. The first step involves taking the gradient of the likelihood\n",
     "with respect to the covariance function, the second step involves\n",
     "considering the gradient of the covariance function with respect to its\n",
     "parameters."
-   ]
+   ],
+   "id": "face961f-ebdc-4ee1-89c8-aedce8856441"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Overall Process Scale\n",
-    "---------------------\n",
+    "## Overall Process Scale\n",
     "\n",
     "In general we won’t be able to find parameters of the covariance\n",
     "function through fixed point equations, we will need to do gradient\n",
     "based optimization."
-   ]
+   ],
+   "id": "d9a56244-5622-4ca5-a9d3-9445c3bea660"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Capacity Control and Data Fit\n",
-    "-----------------------------\n",
+    "## Capacity Control and Data Fit\n",
     "\n",
     "The objective function can be decomposed into two terms, a capacity\n",
     "control term, and a data fit term. The capacity control term is the log\n",
     "determinant of the covariance. The data fit term is the matrix inner\n",
     "product between the data and the inverse covariance."
-   ]
+   ],
+   "id": "c983ce2e-0b75-48f7-9230-4786dc7d79bf"
   },
   {
    "cell_type": "code",
@@ -4210,14 +4867,18 @@
     "        new = rotationMatrix*[xd(:)'; yd(:)'];\n",
     "        set(handle(i), 'xdata', new(1, :));\n",
     "        set(handle(i), 'ydata', new(2, :));"
-   ]
+   ],
+   "id": "ca5b109c-8e6c-496f-b56c-b202a2b61f2e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Learning Covariance Parameters\n",
-    "------------------------------\n",
+    "## Learning Covariance Parameters\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Can we determine covariance parameters from the data?\n",
     "\n",
@@ -4240,67 +4901,29 @@
     "$$\n",
     "E(\\boldsymbol{ \\theta}) = \\color{blue}{\\frac{1}{2}\\log\\det{\\mathbf{K}}} + \\color{red}{\\frac{\\mathbf{ y}^{\\top}\\mathbf{K}^{-1}\\mathbf{ y}}{2}}\n",
     "$$"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "      clf\n",
-    "      lambda1 = 3;\n",
-    "      lambda2 = 1;\n",
-    "      t = linspace(-pi, pi, 200);\n",
-    "      R = [sqrt(2)/2 -sqrt(2)/2; sqrt(2)/2 sqrt(2)/2];\n",
-    "      xy = R*[lambda1*sin(t); lambda2*cos(t)];\n",
-    "      line(xy(1, :), xy(2, :), 'linewidth', 3, 'color', blackColor);\n",
-    "      axis off, axis equal\n",
-    "      a = arrow([0 lambda1*R(1, 1)], [0 lambda1*R(2, 1)]);\n",
-    "      set(a, 'linewidth', 3, 'color', blueColor);\n",
-    "      a = arrow([0 lambda2*R(1, 2)], [0 lambda2*R(2, 2)]);\n",
-    "      set(a, 'linewidth', 3, 'color', blueColor);\n",
-    "      xlim = get(gca, 'xlim');\n",
-    "      xspan = xlim(2) - xlim(1);\n",
-    "      ylim = get(gca, 'ylim');\n",
-    "      yspan = ylim(2) - ylim(1);\n",
-    "      text(lambda1*0.5*R(1, 1)-0.05*xspan, lambda1*0.5*R(2, 1)-yspan*0.05, '$\\eigenvalue_1$')\n",
-    "      text(lambda2*0.5*R(1, 2)-0.05*xspan, lambda2*0.5*R(2, 2)-yspan*0.05, '$\\eigenvalue_2$')\n",
-    "      fileName = 'gpOptimiseEigen';\n",
-    "      printLatexPlot(fileName, directory, 0.45*textWidth)"
-   ]
+   ],
+   "id": "92fbe5fe-9776-4e12-af52-ae1181d5e4c1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Capacity Control through the Determinant\n",
-    "----------------------------------------\n",
+    "## Capacity Control through the Determinant\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-capacity.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-capacity.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "The parameters are *inside* the covariance function (matrix).\n",
     "$$k_{i, j} = k(\\mathbf{ x}_i, \\mathbf{ x}_j; \\boldsymbol{ \\theta})$$\n",
     "\n",
-    "$$\\mathbf{K}= \\mathbf{R}\\boldsymbol{ \\Lambda}^2 \\mathbf{R}^\\top$$"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gpoptimizePlot1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
+    "$$\\mathbf{K}= \\mathbf{R}\\boldsymbol{ \\Lambda}^2 \\mathbf{R}^\\top$$\n",
+    "\n",
     "<table>\n",
     "<tr>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img class=\"negate\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimize-eigen.png\" style=\"width:100%\">\n",
+    "<img class=\"negate\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimize-eigen.png\" style=\"width:100%\">\n",
     "\n",
     "</td>\n",
     "<td width=\"50%\">\n",
@@ -4316,7 +4939,8 @@
     "    $\\mathbf{R}^\\top\\mathbf{R}= \\mathbf{I}$.\n",
     "-   Useful representation since\n",
     "    $\\det{\\mathbf{K}} = \\det{\\boldsymbol{ \\Lambda}^2} = \\det{\\boldsymbol{ \\Lambda}}^2$."
-   ]
+   ],
+   "id": "9685b3ba-a262-474b-bfd1-81c9379549e7"
   },
   {
    "cell_type": "code",
@@ -4327,8 +4951,9 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import mlai\n",
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "a8fdcddb-bbb3-4622-a240-f6cd53551ec5"
   },
   {
    "cell_type": "code",
@@ -4336,8 +4961,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "diagrams = './gp/'"
-   ]
+    "plot.covariance_capacity(rotate_angle=np.pi/4, lambda1 = 0.5, lambda2 = 0.3, diagrams = './gp/')"
+   ],
+   "id": "45ed7b2d-6b76-4c2e-a68b-e4301b6dc29e"
   },
   {
    "cell_type": "code",
@@ -4345,18 +4971,45 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot.covariance_capacity(rotate_angle=np.pi/4, lambda1 = 0.5, lambda2 = 0.3, diagrams = './gp/')"
-   ]
+    "import notutils as nu\n",
+    "from ipywidgets import IntSlider"
+   ],
+   "id": "f6ce0136-7863-4661-8da1-baaa45e86fbf"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nu.display_plots('gp-optimise-determinant{sample:0>3}.svg', \n",
+    "                                          directory='./gp', \n",
+    "                              sample=IntSlider(0, 0, 9, 1))"
+   ],
+   "id": "407e98c6-153a-4893-b6cb-174703219dbb"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise-determinant009.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant009.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The determinant of the covariance is dependent only on the\n",
     "eigenvalues. It represents the ‘footprint’ of the Gaussian.</i>"
-   ]
+   ],
+   "id": "deb2fc1f-7d86-4d53-9efb-25afbefec960"
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quadratic Data Fit\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "73e3302f-2b45-4f62-8e06-d36f8cad139d"
   },
   {
    "cell_type": "code",
@@ -4424,34 +5077,32 @@
     "    counter = counter + 1;\n",
     "    \n",
     "    printLatexText(includeText, 'gpOptimiseQuadraticIncludeText.tex', directory)"
-   ]
+   ],
+   "id": "c9eb9b41-e13a-45a8-8ca9-4cdb096f6a47"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/diagrams/gp-optimise-quadratic002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic002.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The data fit term of the Gaussian process is a quadratic loss\n",
     "centered around zero. This has eliptical contours, the principal axes of\n",
     "which are given by the covariance matrix.</i>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Quadratic Data Fit\n",
-    "------------------"
-   ]
+   ],
+   "id": "904b4574-347e-4627-b8b6-3e6f8d5e132a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Data Fit Term\n",
-    "-------------"
-   ]
+    "## Data Fit Term\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit-capacity.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-optimize-data-fit-capacity.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "0b953695-1a2c-47a5-8184-a8584db2e554"
   },
   {
    "cell_type": "code",
@@ -4462,7 +5113,8 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import os"
-   ]
+   ],
+   "id": "2d2eca6e-096d-4afd-ab87-8a949671ad9e"
   },
   {
    "cell_type": "code",
@@ -4471,10 +5123,11 @@
    "outputs": [],
    "source": [
     "import GPy\n",
-    "import teaching_plots as plot\n",
+    "import mlai.plot as plot\n",
     "import mlai\n",
     "import gp_tutorial"
-   ]
+   ],
+   "id": "891ebfaf-4737-4720-b9f9-7998c9a13731"
   },
   {
    "cell_type": "code",
@@ -4490,7 +5143,8 @@
     "blue_color=[0., 0., 1.]\n",
     "magenta_color=[1., 0., 1.]\n",
     "fontsize=18"
-   ]
+   ],
+   "id": "011a76fc-1364-406a-88e0-13d12bbc669a"
   },
   {
    "cell_type": "code",
@@ -4507,7 +5161,8 @@
     "linewidth=3\n",
     "markersize=15\n",
     "markertype='.'"
-   ]
+   ],
+   "id": "8e9392d1-8f85-4984-91c4-95352931636f"
   },
   {
    "cell_type": "code",
@@ -4524,7 +5179,8 @@
     "true_kern.white.variance = 0.01\n",
     "K = true_kern.K(x) \n",
     "y = np.random.multivariate_normal(np.zeros((6,)), K, 1).T"
-   ]
+   ],
+   "id": "7bbe1b3c-0e3e-4e7f-a1a5-613425e395ea"
   },
   {
    "cell_type": "code",
@@ -4532,6 +5188,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
     "# Fitted model\n",
     "kern = GPy.kern.RBF(1) + GPy.kern.White(1)\n",
     "kern.rbf.lengthscale = 1.0\n",
@@ -4622,7 +5279,8 @@
     "        xlim = ax2.get_xlim()\n",
     "        ax2.plot([xlim[0], xlim[0]], err_y_lim, color=black_color)\n",
     "        ax2.plot(xlim, [err_y_lim[0], err_y_lim[0]], color=black_color)"
-   ]
+   ],
+   "id": "656830bc-4777-46d4-b97a-77143c70d253"
   },
   {
    "cell_type": "markdown",
@@ -4632,12 +5290,12 @@
     "<tr>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise006.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise006.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "</td>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise010.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise010.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "</td>\n",
     "</tr>\n",
@@ -4646,12 +5304,12 @@
     "<tr>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise016.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise016.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "</td>\n",
     "<td width=\"50%\">\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise021.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise021.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "</td>\n",
     "</tr>\n",
@@ -4659,15 +5317,40 @@
     "\n",
     "Figure: <i>Variation in the data fit term, the capacity term and the\n",
     "negative log likelihood for different lengthscales.</i>"
-   ]
+   ],
+   "id": "26c6e925-8425-414d-8ccf-04435e6b1757"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Exponentiated Quadratic Covariance\n",
-    "----------------------------------"
-   ]
+    "## Exponentiated Quadratic Covariance\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_kern/includes/eq-covariance.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_kern/includes/eq-covariance.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>"
+   ],
+   "id": "bf10aca3-f991-44c1-84b5-ed309971b643"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlai"
+   ],
+   "id": "66db095e-15f2-4a85-adce-2d990c74b386"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load -n mlai.Kernel"
+   ],
+   "id": "b4ea3b1b-b5ee-4f7e-b66b-2501f27bca40"
   },
   {
    "cell_type": "code",
@@ -4675,8 +5358,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s Kernel mlai.py"
-   ]
+    "import mlai"
+   ],
+   "id": "d60fe34b-05c5-4029-ae7c-10174847f300"
   },
   {
    "cell_type": "code",
@@ -4684,8 +5368,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%load -s eq_cov mlai.py"
-   ]
+    "%load -n mlai.eq_cov"
+   ],
+   "id": "8c1dedb6-e18f-4892-aa82-563f229fcae8"
   },
   {
    "cell_type": "code",
@@ -4698,7 +5383,8 @@
     "                     shortname='eq',                     \n",
     "                     formula='\\kernelScalar(\\inputVector, \\inputVector^\\prime) = \\alpha \\exp\\left(-\\frac{\\ltwoNorm{\\inputVector-\\inputVector^\\prime}^2}{2\\lengthScale^2}\\right)',\n",
     "                     lengthscale=0.2)"
-   ]
+   ],
+   "id": "5a895009-5987-4ccf-8ec5-fa559fd56f41"
   },
   {
    "cell_type": "code",
@@ -4706,8 +5392,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot"
-   ]
+    "import mlai.plot as plot"
+   ],
+   "id": "e6fd782c-2024-4725-8794-3bde63502027"
   },
   {
    "cell_type": "code",
@@ -4716,7 +5403,8 @@
    "outputs": [],
    "source": [
     "plot.covariance_func(kernel=kernel, diagrams='./kern/')"
-   ]
+   ],
+   "id": "19fb2343-b8f1-4f35-8772-6b2d1c6c6bfb"
   },
   {
    "cell_type": "markdown",
@@ -4745,53 +5433,69 @@
     "<tr>\n",
     "<td width=\"45%\">\n",
     "\n",
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/eq_covariance.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"../slides/diagrams/kern/eq_covariance.svg\" class=\"\" width=\"100%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "</td>\n",
     "<td width=\"45%\">\n",
     "\n",
-    "<img class=\"negate\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/eq_covariance.gif\" style=\"width:100%\">\n",
+    "<img class=\"negate\" src=\"../slides/diagrams/kern/eq_covariance.gif\" style=\"width:100%\">\n",
     "\n",
     "</td>\n",
     "</tr>\n",
     "</table>\n",
     "\n",
     "Figure: <i>The exponentiated quadratic covariance function.</i>"
-   ]
+   ],
+   "id": "6e5a0205-dfa4-478a-a242-41e0ad2706a9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "GPSS: Gaussian Process Summer School\n",
-    "------------------------------------\n",
+    "## GPSS: Gaussian Process Summer School\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-summer-school.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gp-summer-school.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "If you’re interested in finding out more about Gaussian processes, you\n",
     "can attend the Gaussian process summer school, or view the lectures and\n",
     "material on line. Details of the school, future events and past events\n",
-    "can be found at the website\n",
-    "<a href=\"http://gpss.cc\" class=\"uri\">http://gpss.cc</a>."
-   ]
+    "can be found at the website <http://gpss.cc>."
+   ],
+   "id": "188df5f1-e093-44a0-82cc-71aff5c3b08c"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install gpy"
+   ],
+   "id": "706f59f0-060a-4f15-bd00-d11766a5ca60"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "GPy: A Gaussian Process Framework in Python\n",
-    "-------------------------------------------\n",
+    "## GPy: A Gaussian Process Framework in Python\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_software/includes/gpy-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_software/includes/gpy-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "Gaussian processes are a flexible tool for non-parametric analysis with\n",
     "uncertainty. The GPy software was started in Sheffield to provide a easy\n",
     "to use interface to GPs. One which allowed the user to focus on the\n",
     "modelling rather than the mathematics.\n",
     "\n",
-    "<img class=\"\" src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gpy.png\" style=\"width:70%\">\n",
+    "<img class=\"\" src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/gpy.png\" style=\"width:70%\">\n",
     "\n",
     "Figure: <i>GPy is a BSD licensed software code base for implementing\n",
     "Gaussian process models in Python. It is designed for teaching and\n",
-    "modelling. We welcome contributions which can be made through the Github\n",
-    "repository\n",
-    "<a href=\"https://github.com/SheffieldML/GPy\" class=\"uri\">https://github.com/SheffieldML/GPy</a></i>\n",
+    "modelling. We welcome contributions which can be made through the GitHub\n",
+    "repository <https://github.com/SheffieldML/GPy></i>\n",
     "\n",
     "GPy is a BSD licensed software code base for implementing Gaussian\n",
     "process models in python. This allows GPs to be combined with a wide\n",
@@ -4802,21 +5506,25 @@
     "contributions.\n",
     "\n",
     "The aim for GPy is to be a probabilistic-style programming language,\n",
-    "i.e. you specify the model rather than the algorithm. As well as a large\n",
-    "range of covariance functions the software allows for non-Gaussian\n",
+    "i.e., you specify the model rather than the algorithm. As well as a\n",
+    "large range of covariance functions the software allows for non-Gaussian\n",
     "likelihoods, multivariate outputs, dimensionality reduction and\n",
     "approximations for larger data sets.\n",
     "\n",
     "The documentation for GPy can be found\n",
     "[here](https://gpy.readthedocs.io/en/latest/)."
-   ]
+   ],
+   "id": "2b56d58e-4a11-4282-ae1d-e47de62a8a58"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "GPy Tutorial\n",
-    "------------\n",
+    "## GPy Tutorial\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpy-tutorial.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/gpy-tutorial.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
     "\n",
@@ -4836,7 +5544,7 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/james-hensman.png\" clip-path=\"url(#clip4)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/james-hensman.png\" clip-path=\"url(#clip4)\"/>\n",
     "\n",
     "</svg>\n",
     "<svg viewBox=\"0 0 200 200\" style=\"width:15%\">\n",
@@ -4857,56 +5565,20 @@
     "\n",
     "</title>\n",
     "\n",
-    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"../slides/diagrams/people/nicolas-durrande2.jpg\" clip-path=\"url(#clip5)\"/>\n",
+    "<image preserveAspectRatio=\"xMinYMin slice\" width=\"100%\" xlink:href=\"https://mlatcl.github.io/dsa/./slides/diagrams//people/nicolas-durrande2.jpg\" clip-path=\"url(#clip5)\"/>\n",
     "\n",
     "</svg>\n",
     "\n",
     "This GPy tutorial is based on material we share in the Gaussian process\n",
-    "summer school for teaching these models\n",
-    "<a href=\"https://gpss.cc\" class=\"uri\">https://gpss.cc</a>. It contains\n",
+    "summer school for teaching these models <https://gpss.cc>. It contains\n",
     "material from various members and former members of the Sheffield\n",
     "machine learning group, but particular mention should be made of\n",
     "[Nicolas\n",
     "Durrande](https://sites.google.com/site/nicolasdurrandehomepage/) and\n",
     "[James Hensman](https://jameshensman.github.io/), see\n",
-    "<a href=\"http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb\" class=\"uri\">http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb</a>."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install gpy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/mlai.py','mlai.py')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/teaching_plots.py','teaching_plots.py')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "urllib.request.urlretrieve('https://raw.githubusercontent.com/lawrennd/talks/gh-pages/gp_tutorial.py','gp_tutorial.py')"
-   ]
+    "<http://gpss.cc/gpss17/labs/GPSS_Lab1_2017.ipynb>."
+   ],
+   "id": "18fc2fb5-f556-4050-9472-69f1866ac945"
   },
   {
    "cell_type": "code",
@@ -4916,7 +5588,8 @@
    "source": [
     "import numpy as np\n",
     "import GPy"
-   ]
+   ],
+   "id": "95f8a306-5d0f-472f-9c3c-193b1250f2b7"
   },
   {
    "cell_type": "code",
@@ -4925,19 +5598,21 @@
    "outputs": [],
    "source": [
     "from matplotlib import pyplot as plt"
-   ]
+   ],
+   "id": "f4199b72-b30f-452b-9d00-315f8d75cbb8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To give a feel for the sofware we’ll start by creating an exponentiated\n",
+    "To give a feel for the software we’ll start by creating an exponentiated\n",
     "quadratic covariance function, $$\n",
     "k(\\mathbf{ x}, \\mathbf{ x}^\\prime) = \\alpha \\exp\\left(-\\frac{\\left\\Vert \\mathbf{ x}- \\mathbf{ x}^\\prime \\right\\Vert_2^2}{2\\ell^2}\\right),\n",
     "$$ where the length scale is $\\ell$ and the variance is $\\alpha$.\n",
     "\n",
     "To set this up in GPy we create a kernel in the following manner."
-   ]
+   ],
+   "id": "509cd373-088e-4f10-97ed-ce904bc4b01d"
   },
   {
    "cell_type": "code",
@@ -4949,14 +5624,16 @@
     "alpha = 1.0\n",
     "lengthscale = 2.0\n",
     "kern = GPy.kern.RBF(input_dim=input_dim, variance=alpha, lengthscale=lengthscale)"
-   ]
+   ],
+   "id": "9cb58963-a578-4f1a-9cf8-f5bf360d24f9"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "That builds a kernel object for us. The kernel can be displayed."
-   ]
+   ],
+   "id": "b086ad8d-5017-4c52-a715-378967439fb2"
   },
   {
    "cell_type": "code",
@@ -4965,7 +5642,8 @@
    "outputs": [],
    "source": [
     "display(kern)"
-   ]
+   ],
+   "id": "6ff83cfa-3513-4d4a-b8c3-de61f4f76dac"
   },
   {
    "cell_type": "markdown",
@@ -4973,7 +5651,8 @@
    "source": [
     "Or because it’s one dimensional, you can also plot the kernel as a\n",
     "function of its inputs (while the other is fixed)."
-   ]
+   ],
+   "id": "ffb3026a-8847-4f3c-8f34-3f5f0955777f"
   },
   {
    "cell_type": "code",
@@ -4981,9 +5660,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import teaching_plots as plot\n",
-    "import mlai"
-   ]
+    "import mlai\n",
+    "import mlai.plot as plot"
+   ],
+   "id": "74a0113b-f9db-4a34-801d-df923dd4f837"
   },
   {
    "cell_type": "code",
@@ -4994,20 +5674,22 @@
     "fig, ax = plt.subplots(figsize=plot.big_wide_figsize)\n",
     "kern.plot(ax=ax)\n",
     "mlai.write_figure('gpy-eq-covariance.svg', directory='./kern')"
-   ]
+   ],
+   "id": "712e324f-a612-4c90-a0b6-2181f88d03d5"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gpy-eq-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The exponentiated quadratic covariance function as plotted by\n",
     "the `GPy.kern.plot` command.</i>\n",
     "\n",
-    "You can set the lengthscale of the covariance to different values and\n",
+    "You can set the length scale of the covariance to different values and\n",
     "plot the result."
-   ]
+   ],
+   "id": "58f6ce49-0e5b-4bc4-9ec6-0e9294f71864"
   },
   {
    "cell_type": "code",
@@ -5017,7 +5699,8 @@
    "source": [
     "kern = GPy.kern.RBF(input_dim=input_dim)     # By default, the parameters are set to 1.\n",
     "lengthscales = np.asarray([0.2,0.5,1.,2.,4.])"
-   ]
+   ],
+   "id": "8e285179-159c-4422-8802-a6a98b3c8f2a"
   },
   {
    "cell_type": "code",
@@ -5033,36 +5716,38 @@
     "\n",
     "ax.legend(lengthscales)\n",
     "mlai.write_figure('gpy-eq-covariance-lengthscales.svg', directory='./kern')"
-   ]
+   ],
+   "id": "ca4f95e4-6319-46fc-8404-2ae10ed0fd07"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gpy-eq-covariance-lengthscales.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-covariance-lengthscales.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>The exponentiated quadratic covariance function plotted for\n",
-    "different lengthscales by `GPy.kern.plot` command.</i>"
-   ]
+    "different length scales by `GPy.kern.plot` command.</i>"
+   ],
+   "id": "bfb4b220-6cca-4295-85f8-f882137b7425"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Covariance Functions in GPy\n",
-    "---------------------------\n",
+    "## Covariance Functions in GPy\n",
     "\n",
     "Many covariance functions are already implemented in GPy. Instead of\n",
     "rbf, try constructing and plotting the following covariance functions:\n",
     "`exponential`, `Matern32`, `Matern52`, `Brownian`, `linear`, `bias`,\n",
     "`rbfcos`, `periodic_Matern32`, etc. Some of these covariance functions,\n",
-    "such as `rbfcos`, are not parametrized by a variance and a lengthscale.\n",
-    "Furthermore, not all kernels are stationary (i.e., they can’t all be\n",
-    "written as\n",
+    "such as `rbfcos`, are not parametrized by a variance and a length scale.\n",
+    "Further, not all kernels are stationary (i.e., they can’t all be written\n",
+    "as\n",
     "$k(\\mathbf{ x}, \\mathbf{ x}^\\prime) = f(\\mathbf{ x}-\\mathbf{ x}^\\prime)$,\n",
-    "see for example the Brownian covariance function). For plotting so it\n",
+    "see for example the Brownian covariance function). So for plotting it\n",
     "may be interesting to change the value of the fixed input."
-   ]
+   ],
+   "id": "3ad72463-dffa-4b79-87e7-446a59a90ebb"
   },
   {
    "cell_type": "code",
@@ -5080,20 +5765,21 @@
     "ax.set_ylim(-0.1,5.1)\n",
     "\n",
     "mlai.write_figure('gpy-brownian-covariance-lengthscales.svg', directory='./kern')"
-   ]
+   ],
+   "id": "a5c929f9-6ddf-48fe-84e8-17a7fd4fb8a1"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Combining Covariance Functions in GPy\n",
-    "-------------------------------------\n",
+    "## Combining Covariance Functions in GPy\n",
     "\n",
     "In GPy you can easily combine covariance functions you have created\n",
     "using the sum and product operators, `+` and `*`. So, for example, if we\n",
     "wish to combine an exponentiated quadratic covariance with a Matern 5/2\n",
     "then we can write"
-   ]
+   ],
+   "id": "09302adb-1cdf-42c1-b73c-7a476d1f103f"
   },
   {
    "cell_type": "code",
@@ -5105,7 +5791,8 @@
     "kern2 = GPy.kern.Matern52(1, variance=2., lengthscale=4.)\n",
     "kern = kern1 + kern2\n",
     "display(kern)"
-   ]
+   ],
+   "id": "38f962cc-f3dd-455d-8b0d-bdd56f77c355"
   },
   {
    "cell_type": "code",
@@ -5118,19 +5805,21 @@
     "kern.plot(ax=ax)\n",
     "\n",
     "mlai.write_figure('gpy-eq-plus-matern52-covariance.svg', directory='./kern')"
-   ]
+   ],
+   "id": "5e37179a-dc24-4810-9a52-8eb3c2cc6e59"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gpy-eq-plus-matern52-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-plus-matern52-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A combination of the exponentiated quadratic covariance plus\n",
     "the Matern $5/2$ covariance.</i>\n",
     "\n",
-    "Or if we wanted to multiply them we can write"
-   ]
+    "Or if we wanted to multiply them, we can write"
+   ],
+   "id": "730f7620-054c-4df8-84ed-dec71eddd4a6"
   },
   {
    "cell_type": "code",
@@ -5142,7 +5831,8 @@
     "kern2 = GPy.kern.Matern52(1, variance=2., lengthscale=4.)\n",
     "kern = kern1 * kern2\n",
     "display(kern)"
-   ]
+   ],
+   "id": "955ad67a-c4ef-4eb6-9b98-337d795dc52f"
   },
   {
    "cell_type": "code",
@@ -5155,20 +5845,22 @@
     "kern.plot(ax=ax)\n",
     "\n",
     "mlai.write_figure('gpy-eq-times-matern52-covariance.svg', directory='./kern')"
-   ]
+   ],
+   "id": "2293c39a-03fa-4940-bdc8-fc12edd86caa"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/kern/gpy-eq-times-matern52-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-times-matern52-covariance.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A combination of the exponentiated quadratic covariance\n",
     "multiplied by the Matern $5/2$ covariance.</i>\n",
     "\n",
     "You can learn about how to implement [new kernel objects in GPy\n",
     "here](https://gpy.readthedocs.io/en/latest/tuto_creating_new_kernels.html)."
-   ]
+   ],
+   "id": "8e2aed5f-59eb-4053-b656-19a1271be6a2"
   },
   {
    "cell_type": "code",
@@ -5178,7 +5870,8 @@
    "source": [
     "from IPython.lib.display import YouTubeVideo\n",
     "YouTubeVideo('-sY8zW3Om1Y')"
-   ]
+   ],
+   "id": "6db61b54-a73a-4a3f-be12-21ea6706b4ed"
   },
   {
    "cell_type": "markdown",
@@ -5188,14 +5881,14 @@
     "is a key place in which you introduce your understanding of the data\n",
     "problem. To learn more about the design of covariance functions, see\n",
     "this talk from Nicolas Durrande at GPSS in 2016.</i>"
-   ]
+   ],
+   "id": "5a647089-a657-45c3-b632-d9e7651ab59a"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A Gaussian Process Regression Model\n",
-    "-----------------------------------\n",
+    "## A Gaussian Process Regression Model\n",
     "\n",
     "We will now combine the Gaussian process prior with some data to form a\n",
     "GP regression model with GPy. We will generate data from the function $$\n",
@@ -5204,7 +5897,8 @@
     "y(x) = f(x) + \\epsilon,\n",
     "$$ with the noise being Gaussian distributed,\n",
     "$\\epsilon\\sim \\mathcal{N}\\left(0,0.01\\right)$."
-   ]
+   ],
+   "id": "e8c0276e-33c3-4273-a9f1-957262f11933"
   },
   {
    "cell_type": "code",
@@ -5214,7 +5908,8 @@
    "source": [
     "X = np.linspace(0.05,0.95,10)[:,np.newaxis]\n",
     "Y = -np.cos(np.pi*X) + np.sin(4*np.pi*X) + np.random.normal(loc=0.0, scale=0.1, size=(10,1))"
-   ]
+   ],
+   "id": "4db259da-5a79-4c44-9322-f5f78d830314"
   },
   {
    "cell_type": "code",
@@ -5226,20 +5921,22 @@
     "ax.plot(X,Y,'kx',mew=1.5, linewidth=2)\n",
     "\n",
     "mlai.write_figure('noisy-sine.svg', directory='./gp')"
-   ]
+   ],
+   "id": "6ce1d82f-d9ee-472b-a5e2-8ba098d5a90c"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/noisy-sine.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>Data from the noisy sine wave for fitting with a GPy\n",
     "model.</i>\n",
     "\n",
     "A GP regression model based on an exponentiated quadratic covariance\n",
     "function can be defined by first defining a covariance function."
-   ]
+   ],
+   "id": "2530a600-05e1-4aa9-87fd-6434c82231ff"
   },
   {
    "cell_type": "code",
@@ -5248,14 +5945,16 @@
    "outputs": [],
    "source": [
     "kern = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)"
-   ]
+   ],
+   "id": "2c9ea062-8f28-40e0-b14e-1e2c239ed811"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "And then combining it with the data to form a Gaussian process model."
-   ]
+   ],
+   "id": "1e44d548-a057-42ed-aab8-05907315d2f4"
   },
   {
    "cell_type": "code",
@@ -5264,7 +5963,8 @@
    "outputs": [],
    "source": [
     "model = GPy.models.GPRegression(X,Y,kern)"
-   ]
+   ],
+   "id": "9e8c777b-bb08-40d5-a0df-ec968ae932f8"
   },
   {
    "cell_type": "markdown",
@@ -5272,7 +5972,8 @@
    "source": [
     "Just as for the covariance function object, we can find out about the\n",
     "model using the command `display(model)`."
-   ]
+   ],
+   "id": "f4e16e4b-62e3-4edd-b1fb-5465d8def64e"
   },
   {
    "cell_type": "code",
@@ -5281,7 +5982,8 @@
    "outputs": [],
    "source": [
     "display(model)"
-   ]
+   ],
+   "id": "9861f7b6-df78-47f0-8cb1-a8a59f82d1ec"
   },
   {
    "cell_type": "markdown",
@@ -5290,7 +5992,8 @@
     "Note that by default the model includes some observation noise with\n",
     "variance 1. We can see the posterior mean prediction and visualize the\n",
     "marginal posterior variances using `model.plot()`."
-   ]
+   ],
+   "id": "ff3ab136-afc8-4c53-a9ba-e6b2be022b5a"
   },
   {
    "cell_type": "code",
@@ -5302,20 +6005,22 @@
     "model.plot(ax=ax)\n",
     "\n",
     "mlai.write_figure('noisy-sine-gp-fit.svg', directory='./gp')"
-   ]
+   ],
+   "id": "c8d53cec-1bda-4184-ac3c-a43fcf90d506"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/noisy-sine-gp-fit.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-fit.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A Gaussian process fit to the noisy sine data. Here the\n",
     "parameters of the process and the covariance function haven’t yet been\n",
     "optimized.</i>\n",
     "\n",
     "You can also look directly at the predictions for the model using."
-   ]
+   ],
+   "id": "ff3a6b7b-65be-4c68-b1d1-5ab2af038398"
   },
   {
    "cell_type": "code",
@@ -5325,7 +6030,8 @@
    "source": [
     "Xstar = np.linspace(0, 10, 100)[:, np.newaxis]\n",
     "Ystar, Vstar = model.predict(Xstar)"
-   ]
+   ],
+   "id": "c3b864a9-c307-4270-98e7-8edf136d2ba5"
   },
   {
    "cell_type": "markdown",
@@ -5333,21 +6039,22 @@
    "source": [
     "Which gives you the mean (`Ystar`), the variance (`Vstar`) at the\n",
     "locations given by `Xstar`."
-   ]
+   ],
+   "id": "01af8003-d7a7-4ab3-98ce-3cdebcb8a8f2"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Covariance Function Parameter Estimation\n",
-    "----------------------------------------\n",
+    "## Covariance Function Parameter Estimation\n",
     "\n",
     "As we have seen during the lectures, the parameters values can be\n",
     "estimated by maximizing the likelihood of the observations. Since we\n",
-    "don’t want one of the variance to become negative during the\n",
+    "don’t want any of the variances to become negative during the\n",
     "optimization, we can constrain all parameters to be positive before\n",
-    "running the optimisation."
-   ]
+    "running the optimization."
+   ],
+   "id": "edd056ac-3b8a-4960-bf97-c27d9b6f8e7d"
   },
   {
    "cell_type": "code",
@@ -5356,7 +6063,8 @@
    "outputs": [],
    "source": [
     "model.constrain_positive()"
-   ]
+   ],
+   "id": "d6f7b3b7-21aa-4432-b2f6-90714a5c9d8e"
   },
   {
    "cell_type": "markdown",
@@ -5366,9 +6074,10 @@
     "default, the software is warning us that they are being reconstrained.\n",
     "\n",
     "Now we can optimize the model using the `model.optimize()` method. Here\n",
-    "we switch messages on, which allows us to see the progession of the\n",
+    "we switch messages on, which allows us to see the progression of the\n",
     "optimization."
-   ]
+   ],
+   "id": "062cbf43-9589-4660-b454-5768f969a50c"
   },
   {
    "cell_type": "code",
@@ -5377,18 +6086,20 @@
    "outputs": [],
    "source": [
     "model.optimize(messages=True)"
-   ]
+   ],
+   "id": "262d8496-2d82-4f52-a21f-6edd1bd24638"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "By default the optimization is using a limited memory BFGS optimizer\n",
+    "By default, the optimization is using a limited memory BFGS optimizer\n",
     "(Byrd et al., 1995).\n",
     "\n",
-    "Once again we can display the model, now to see how the parameters have\n",
+    "Once again, we can display the model, now to see how the parameters have\n",
     "changed."
-   ]
+   ],
+   "id": "2710e78e-54bc-44f6-814b-8844b7ab25f1"
   },
   {
    "cell_type": "code",
@@ -5397,15 +6108,17 @@
    "outputs": [],
    "source": [
     "display(model)"
-   ]
+   ],
+   "id": "9954521a-35b3-4fb1-a7eb-e59942bf4138"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The lengthscale is much smaller, as well as the noise level. The\n",
+    "The length scale is much smaller, as well as the noise level. The\n",
     "variance of the exponentiated quadratic has also reduced."
-   ]
+   ],
+   "id": "c67102d8-b67a-49b5-8ea6-41d5fc2b2d69"
   },
   {
    "cell_type": "code",
@@ -5417,32 +6130,37 @@
     "model.plot(ax=ax)\n",
     "\n",
     "mlai.write_figure('noisy-sine-gp-optimized-fit.svg', directory='./gp')"
-   ]
+   ],
+   "id": "162eca98-4bce-474f-8847-4dad4e52e87b"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<img src=\"http://inverseprobability.com/talks/slides/../slides/diagrams/gp/noisy-sine-gp-optimized-fit.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
+    "<img src=\"https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-optimized-fit.svg\" class=\"\" width=\"80%\" style=\"vertical-align:middle;\">\n",
     "\n",
     "Figure: <i>A Gaussian process fit to the noisy sine data with parameters\n",
     "optimized.</i>"
-   ]
+   ],
+   "id": "03a71ddf-e58d-456e-ad55-4f279fe6e71e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Review\n",
-    "------"
-   ]
+    "## Review"
+   ],
+   "id": "04436eb3-082b-4485-a574-02fcb65f0eec"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Other Software\n",
-    "--------------\n",
+    "## Other Software\n",
+    "\n",
+    "<span class=\"editsection-bracket\"\n",
+    "style=\"\">\\[</span><span class=\"editsection\"\n",
+    "style=\"\"><a href=\"https://github.com/lawrennd/snippets/edit/main/_gp/includes/other-gp-software.md\" target=\"_blank\" onclick=\"ga('send', 'event', 'Edit Page', 'Edit', 'https://github.com/lawrennd/snippets/edit/main/_gp/includes/other-gp-software.md', 13);\">edit</a></span><span class=\"editsection-bracket\" style=\"\">\\]</span>\n",
     "\n",
     "GPy has inspired other software solutions, first of all\n",
     "[GPflow](https://github.com/GPflow/GPflow), which uses Tensor Flow’s\n",
@@ -5453,28 +6171,28 @@
     "\n",
     "The Probabilistic programming language [pyro](https://pyro.ai/) also has\n",
     "GP support."
-   ]
+   ],
+   "id": "07c463ae-6d0f-4a44-a456-a76de6b4ba4e"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Further Reading\n",
-    "---------------\n",
+    "## Further Reading\n",
     "\n",
     "-   Chapter 2 of Neal (1994)\n",
     "\n",
     "-   Rest of Neal (1994)\n",
     "\n",
     "-   All of MacKay (1992)"
-   ]
+   ],
+   "id": "c27bf95d-a480-4130-8600-70d94f305aa8"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Thanks!\n",
-    "-------\n",
+    "## Thanks!\n",
     "\n",
     "For more information on these subjects and more you might want to check\n",
     "the following resources.\n",
@@ -5485,15 +6203,16 @@
     "    Page](http://www.theguardian.com/profile/neil-lawrence)\n",
     "-   blog:\n",
     "    [http://inverseprobability.com](http://inverseprobability.com/blog.html)"
-   ]
+   ],
+   "id": "76c3eb82-6609-4010-a909-bd9e0b0fb28d"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "References\n",
-    "----------"
-   ]
+    "## References"
+   ],
+   "id": "0c3a5da3-84d5-44ae-9b35-d25f85639795"
   },
   {
    "cell_type": "markdown",
@@ -5508,10 +6227,11 @@
     "bound constrained optimization. SIAM Journal on Scientific and\n",
     "Statistical Computing 16, 1190–1208.\n",
     "\n",
-    "Cho, Y., Saul, L.K., 2009. Kernel methods for deep learning, in: Bengio,\n",
-    "Y., Schuurmans, D., Lafferty, J.D., Williams, C.K.I., Culotta, A.\n",
-    "(Eds.), Advances in Neural Information Processing Systems 22. Curran\n",
-    "Associates, Inc., pp. 342–350.\n",
+    "Cho, Y., Saul, L.K., 2009. [Kernel methods for deep\n",
+    "learning](http://papers.nips.cc/paper/3628-kernel-methods-for-deep-learning.pdf),\n",
+    "in: Bengio, Y., Schuurmans, D., Lafferty, J.D., Williams, C.K.I.,\n",
+    "Culotta, A. (Eds.), Advances in Neural Information Processing Systems\n",
+    "22. Curran Associates, Inc., pp. 342–350.\n",
     "\n",
     "Gething, P.W., Noor, A.M., Gikandi, P.W., Ogara, E.A.A., Hay, S.I.,\n",
     "Nixon, M.S., Snow, R.W., Atkinson, P.M., 2006. Improving imperfect data\n",
@@ -5519,8 +6239,9 @@
     "geostatistics. PLoS Medicine 3.\n",
     "<https://doi.org/10.1371/journal.pmed.0030271>\n",
     "\n",
-    "Ioffe, S., Szegedy, C., 2015. Batch normalization: Accelerating deep\n",
-    "network training by reducing internal covariate shift, in: Bach, F.,\n",
+    "Ioffe, S., Szegedy, C., 2015. [Batch normalization: Accelerating deep\n",
+    "network training by reducing internal covariate\n",
+    "shift](http://proceedings.mlr.press/v37/ioffe15.html), in: Bach, F.,\n",
     "Blei, D. (Eds.), Proceedings of the 32nd International Conference on\n",
     "Machine Learning, Proceedings of Machine Learning Research. PMLR, Lille,\n",
     "France, pp. 448–456.\n",
@@ -5548,7 +6269,8 @@
     "Tipping, M.E., Bishop, C.M., 1999. Probabilistic principal component\n",
     "analysis. Journal of the Royal Statistical Society, B 6, 611–622.\n",
     "<https://doi.org/doi:10.1111/1467-9868.00196>"
-   ]
+   ],
+   "id": "c44b0836-5102-4b02-ac1c-107b9e83c56e"
   }
  ],
  "nbformat": 4,
diff --git a/slides/01-what-is-machine-learning.slides.html b/slides/01-what-is-machine-learning.slides.html
index 9a74c27..8ddb65e 100644
--- a/slides/01-what-is-machine-learning.slides.html
+++ b/slides/01-what-is-machine-learning.slides.html
@@ -8,7 +8,7 @@
   <meta name="apple-mobile-web-app-capable" content="yes">
   <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
   <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
-  <link rel="stylesheet" href="reveal.js/css/reveal.css">
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/reveal.css">
   <style type="text/css">
       code{white-space: pre-wrap;}
       span.smallcaps{font-variant: small-caps;}
@@ -19,6 +19,7 @@
 pre > code.sourceCode { white-space: pre; position: relative; }
 pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
 pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
 code.sourceCode > span { color: inherit; text-decoration: inherit; }
 div.sourceCode { margin: 1em 0; }
 pre.sourceCode { margin: 0; }
@@ -53,7 +54,7 @@
 code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 code span.at { color: #7d9029; } /* Attribute */
 code span.bn { color: #40a070; } /* BaseN */
-code span.bu { } /* BuiltIn */
+code span.bu { color: #008000; } /* BuiltIn */
 code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 code span.ch { color: #4070a0; } /* Char */
 code span.cn { color: #880000; } /* Constant */
@@ -66,7 +67,7 @@
 code span.ex { } /* Extension */
 code span.fl { color: #40a070; } /* Float */
 code span.fu { color: #06287e; } /* Function */
-code span.im { } /* Import */
+code span.im { color: #008000; font-weight: bold; } /* Import */
 code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 code span.kw { color: #007020; font-weight: bold; } /* Keyword */
 code span.op { color: #666666; } /* Operator */
@@ -79,18 +80,18 @@
 code span.vs { color: #4070a0; } /* VerbatimString */
 code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
   </style>
-  <link rel="stylesheet" href="reveal.js/css/theme/black.css" id="theme">
-  <link rel="stylesheet" href="talks.css"/>
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/theme/black.css" id="theme">
+  <link rel="stylesheet" href="https://inverseprobability.com/assets/css/talks.css"/>
   <!-- Printing and PDF exports -->
   <script>
     var link = document.createElement( 'link' );
     link.rel = 'stylesheet';
     link.type = 'text/css';
-    link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
+    link.href = window.location.search.match( /print-pdf/gi ) ? 'https://unpkg.com/reveal.js@3.9.2/css/print/pdf.css' : 'https://unpkg.com/reveal.js@3.9.2/css/print/paper.css';
     document.getElementsByTagName( 'head' )[0].appendChild( link );
   </script>
   <!--[if lt IE 9]>
-  <script src="reveal.js/lib/js/html5shiv.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/html5shiv.js"></script>
   <![endif]-->
   <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_SVG" type="text/javascript"></script>
   <script type="text/x-mathjax-config">
@@ -100,307 +101,19 @@
       }
     });
   </script>
-  <script src="http://inverseprobability.com/talks/assets/js/figure-animate.js"></script>
+  <script src="../assets/js/figure-animate.js"></script>
 </head>
 <body>
-\[\newcommand{\tk}[1]{}
-\newcommand{\Amatrix}{\mathbf{A}}
-\newcommand{\KL}[2]{\text{KL}\left( #1\,\|\,#2 \right)}
-\newcommand{\Kaast}{\kernelMatrix_{\mathbf{ \ast}\mathbf{ \ast}}}
-\newcommand{\Kastu}{\kernelMatrix_{\mathbf{ \ast} \inducingVector}}
-\newcommand{\Kff}{\kernelMatrix_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\Kfu}{\kernelMatrix_{\mappingFunctionVector \inducingVector}}
-\newcommand{\Kuast}{\kernelMatrix_{\inducingVector \bf\ast}}
-\newcommand{\Kuf}{\kernelMatrix_{\inducingVector \mappingFunctionVector}}
-\newcommand{\Kuu}{\kernelMatrix_{\inducingVector \inducingVector}}
-\newcommand{\Kuui}{\Kuu^{-1}}
-\newcommand{\Qaast}{\mathbf{Q}_{\bf \ast \ast}}
-\newcommand{\Qastf}{\mathbf{Q}_{\ast \mappingFunction}}
-\newcommand{\Qfast}{\mathbf{Q}_{\mappingFunctionVector \bf \ast}}
-\newcommand{\Qff}{\mathbf{Q}_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\aMatrix}{\mathbf{A}}
-\newcommand{\aScalar}{a}
-\newcommand{\aVector}{\mathbf{a}}
-\newcommand{\acceleration}{a}
-\newcommand{\bMatrix}{\mathbf{B}}
-\newcommand{\bScalar}{b}
-\newcommand{\bVector}{\mathbf{b}}
-\newcommand{\basisFunc}{\phi}
-\newcommand{\basisFuncVector}{\boldsymbol{ \basisFunc}}
-\newcommand{\basisFunction}{\phi}
-\newcommand{\basisLocation}{\mu}
-\newcommand{\basisMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\basisScalar}{\basisFunction}
-\newcommand{\basisVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\activationFunction}{\phi}
-\newcommand{\activationMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\activationScalar}{\basisFunction}
-\newcommand{\activationVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\bigO}{\mathcal{O}}
-\newcommand{\binomProb}{\pi}
-\newcommand{\cMatrix}{\mathbf{C}}
-\newcommand{\cbasisMatrix}{\hat{\boldsymbol{ \Phi}}}
-\newcommand{\cdataMatrix}{\hat{\dataMatrix}}
-\newcommand{\cdataScalar}{\hat{\dataScalar}}
-\newcommand{\cdataVector}{\hat{\dataVector}}
-\newcommand{\centeredKernelMatrix}{\mathbf{ \MakeUppercase{\centeredKernelScalar}}}
-\newcommand{\centeredKernelScalar}{b}
-\newcommand{\centeredKernelVector}{\centeredKernelScalar}
-\newcommand{\centeringMatrix}{\mathbf{H}}
-\newcommand{\chiSquaredDist}[2]{\chi_{#1}^{2}\left(#2\right)}
-\newcommand{\chiSquaredSamp}[1]{\chi_{#1}^{2}}
-\newcommand{\conditionalCovariance}{\boldsymbol{ \Sigma}}
-\newcommand{\coregionalizationMatrix}{\mathbf{B}}
-\newcommand{\coregionalizationScalar}{b}
-\newcommand{\coregionalizationVector}{\mathbf{ \coregionalizationScalar}}
-\newcommand{\covDist}[2]{\text{cov}_{#2}\left(#1\right)}
-\newcommand{\covSamp}[1]{\text{cov}\left(#1\right)}
-\newcommand{\covarianceScalar}{c}
-\newcommand{\covarianceVector}{\mathbf{ \covarianceScalar}}
-\newcommand{\covarianceMatrix}{\mathbf{C}}
-\newcommand{\covarianceMatrixTwo}{\boldsymbol{ \Sigma}}
-\newcommand{\croupierScalar}{s}
-\newcommand{\croupierVector}{\mathbf{ \croupierScalar}}
-\newcommand{\croupierMatrix}{\mathbf{ \MakeUppercase{\croupierScalar}}}
-\newcommand{\dataDim}{p}
-\newcommand{\dataIndex}{i}
-\newcommand{\dataIndexTwo}{j}
-\newcommand{\dataMatrix}{\mathbf{Y}}
-\newcommand{\dataScalar}{y}
-\newcommand{\dataSet}{\mathcal{D}}
-\newcommand{\dataStd}{\sigma}
-\newcommand{\dataVector}{\mathbf{ \dataScalar}}
-\newcommand{\decayRate}{d}
-\newcommand{\degreeMatrix}{\mathbf{ \MakeUppercase{\degreeScalar}}}
-\newcommand{\degreeScalar}{d}
-\newcommand{\degreeVector}{\mathbf{ \degreeScalar}}
-\newcommand{\diag}[1]{\text{diag}\left(#1\right)}
-\newcommand{\diagonalMatrix}{\mathbf{D}}
-\newcommand{\diff}[2]{\frac{\text{d}#1}{\text{d}#2}}
-\newcommand{\diffTwo}[2]{\frac{\text{d}^2#1}{\text{d}#2^2}}
-\newcommand{\displacement}{x}
-\newcommand{\displacementVector}{\textbf{\displacement}}
-\newcommand{\distanceMatrix}{\mathbf{ \MakeUppercase{\distanceScalar}}}
-\newcommand{\distanceScalar}{d}
-\newcommand{\distanceVector}{\mathbf{ \distanceScalar}}
-\newcommand{\eigenvaltwo}{\ell}
-\newcommand{\eigenvaltwoMatrix}{\mathbf{L}}
-\newcommand{\eigenvaltwoVector}{\mathbf{l}}
-\newcommand{\eigenvalue}{\lambda}
-\newcommand{\eigenvalueMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\eigenvalueVector}{\boldsymbol{ \lambda}}
-\newcommand{\eigenvector}{\mathbf{ \eigenvectorScalar}}
-\newcommand{\eigenvectorMatrix}{\mathbf{U}}
-\newcommand{\eigenvectorScalar}{u}
-\newcommand{\eigenvectwo}{\mathbf{v}}
-\newcommand{\eigenvectwoMatrix}{\mathbf{V}}
-\newcommand{\eigenvectwoScalar}{v}
-\newcommand{\entropy}[1]{\mathcal{H}\left(#1\right)}
-\newcommand{\errorFunction}{E}
-\newcommand{\expDist}[2]{\left<#1\right>_{#2}}
-\newcommand{\expSamp}[1]{\left<#1\right>}
-\newcommand{\expectation}[1]{\left\langle #1 \right\rangle }
-\newcommand{\expectationDist}[2]{\left\langle #1 \right\rangle _{#2}}
-\newcommand{\expectedDistanceMatrix}{\mathcal{D}}
-\newcommand{\eye}{\mathbf{I}}
-\newcommand{\fantasyDim}{r}
-\newcommand{\fantasyMatrix}{\mathbf{ \MakeUppercase{\fantasyScalar}}}
-\newcommand{\fantasyScalar}{z}
-\newcommand{\fantasyVector}{\mathbf{ \fantasyScalar}}
-\newcommand{\featureStd}{\varsigma}
-\newcommand{\gammaCdf}[3]{\mathcal{GAMMA CDF}\left(#1|#2,#3\right)}
-\newcommand{\gammaDist}[3]{\mathcal{G}\left(#1|#2,#3\right)}
-\newcommand{\gammaSamp}[2]{\mathcal{G}\left(#1,#2\right)}
-\newcommand{\gaussianDist}[3]{\mathcal{N}\left(#1|#2,#3\right)}
-\newcommand{\gaussianSamp}[2]{\mathcal{N}\left(#1,#2\right)}
-\newcommand{\given}{|}
-\newcommand{\half}{\frac{1}{2}}
-\newcommand{\heaviside}{H}
-\newcommand{\hiddenMatrix}{\mathbf{ \MakeUppercase{\hiddenScalar}}}
-\newcommand{\hiddenScalar}{h}
-\newcommand{\hiddenVector}{\mathbf{ \hiddenScalar}}
-\newcommand{\identityMatrix}{\eye}
-\newcommand{\inducingInputScalar}{z}
-\newcommand{\inducingInputVector}{\mathbf{ \inducingInputScalar}}
-\newcommand{\inducingInputMatrix}{\mathbf{Z}}
-\newcommand{\inducingScalar}{u}
-\newcommand{\inducingVector}{\mathbf{ \inducingScalar}}
-\newcommand{\inducingMatrix}{\mathbf{U}}
-\newcommand{\inlineDiff}[2]{\text{d}#1/\text{d}#2}
-\newcommand{\inputDim}{q}
-\newcommand{\inputMatrix}{\mathbf{X}}
-\newcommand{\inputScalar}{x}
-\newcommand{\inputSpace}{\mathcal{X}}
-\newcommand{\inputVals}{\inputVector}
-\newcommand{\inputVector}{\mathbf{ \inputScalar}}
-\newcommand{\iterNum}{k}
-\newcommand{\kernel}{\kernelScalar}
-\newcommand{\kernelMatrix}{\mathbf{K}}
-\newcommand{\kernelScalar}{k}
-\newcommand{\kernelVector}{\mathbf{ \kernelScalar}}
-\newcommand{\kff}{\kernelScalar_{\mappingFunction \mappingFunction}}
-\newcommand{\kfu}{\kernelVector_{\mappingFunction \inducingScalar}}
-\newcommand{\kuf}{\kernelVector_{\inducingScalar \mappingFunction}}
-\newcommand{\kuu}{\kernelVector_{\inducingScalar \inducingScalar}}
-\newcommand{\lagrangeMultiplier}{\lambda}
-\newcommand{\lagrangeMultiplierMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\lagrangian}{L}
-\newcommand{\laplacianFactor}{\mathbf{ \MakeUppercase{\laplacianFactorScalar}}}
-\newcommand{\laplacianFactorScalar}{m}
-\newcommand{\laplacianFactorVector}{\mathbf{ \laplacianFactorScalar}}
-\newcommand{\laplacianMatrix}{\mathbf{L}}
-\newcommand{\laplacianScalar}{\ell}
-\newcommand{\laplacianVector}{\mathbf{ \ell}}
-\newcommand{\latentDim}{q}
-\newcommand{\latentDistanceMatrix}{\boldsymbol{ \Delta}}
-\newcommand{\latentDistanceScalar}{\delta}
-\newcommand{\latentDistanceVector}{\boldsymbol{ \delta}}
-\newcommand{\latentForce}{f}
-\newcommand{\latentFunction}{u}
-\newcommand{\latentFunctionVector}{\mathbf{ \latentFunction}}
-\newcommand{\latentFunctionMatrix}{\mathbf{ \MakeUppercase{\latentFunction}}}
-\newcommand{\latentIndex}{j}
-\newcommand{\latentScalar}{z}
-\newcommand{\latentVector}{\mathbf{ \latentScalar}}
-\newcommand{\latentMatrix}{\mathbf{Z}}
-\newcommand{\learnRate}{\eta}
-\newcommand{\lengthScale}{\ell}
-\newcommand{\rbfWidth}{\ell}
-\newcommand{\likelihoodBound}{\mathcal{L}}
-\newcommand{\likelihoodFunction}{L}
-\newcommand{\locationScalar}{\mu}
-\newcommand{\locationVector}{\boldsymbol{ \locationScalar}}
-\newcommand{\locationMatrix}{\mathbf{M}}
-\newcommand{\variance}[1]{\text{var}\left( #1 \right)}
-\newcommand{\mappingFunction}{f}
-\newcommand{\mappingFunctionMatrix}{\mathbf{F}}
-\newcommand{\mappingFunctionTwo}{g}
-\newcommand{\mappingFunctionTwoMatrix}{\mathbf{G}}
-\newcommand{\mappingFunctionTwoVector}{\mathbf{ \mappingFunctionTwo}}
-\newcommand{\mappingFunctionVector}{\mathbf{ \mappingFunction}}
-\newcommand{\scaleScalar}{s}
-\newcommand{\mappingScalar}{w}
-\newcommand{\mappingVector}{\mathbf{ \mappingScalar}}
-\newcommand{\mappingMatrix}{\mathbf{W}}
-\newcommand{\mappingScalarTwo}{v}
-\newcommand{\mappingVectorTwo}{\mathbf{ \mappingScalarTwo}}
-\newcommand{\mappingMatrixTwo}{\mathbf{V}}
-\newcommand{\maxIters}{K}
-\newcommand{\meanMatrix}{\mathbf{M}}
-\newcommand{\meanScalar}{\mu}
-\newcommand{\meanTwoMatrix}{\mathbf{M}}
-\newcommand{\meanTwoScalar}{m}
-\newcommand{\meanTwoVector}{\mathbf{ \meanTwoScalar}}
-\newcommand{\meanVector}{\boldsymbol{ \meanScalar}}
-\newcommand{\mrnaConcentration}{m}
-\newcommand{\naturalFrequency}{\omega}
-\newcommand{\neighborhood}[1]{\mathcal{N}\left( #1 \right)}
-\newcommand{\neilurl}{http://inverseprobability.com/}
-\newcommand{\noiseMatrix}{\boldsymbol{ E}}
-\newcommand{\noiseScalar}{\epsilon}
-\newcommand{\noiseVector}{\boldsymbol{ \epsilon}}
-\newcommand{\norm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\normalizedLaplacianMatrix}{\hat{\mathbf{L}}}
-\newcommand{\normalizedLaplacianScalar}{\hat{\ell}}
-\newcommand{\normalizedLaplacianVector}{\hat{\mathbf{ \ell}}}
-\newcommand{\numActive}{m}
-\newcommand{\numBasisFunc}{m}
-\newcommand{\numComponents}{m}
-\newcommand{\numComps}{K}
-\newcommand{\numData}{n}
-\newcommand{\numFeatures}{K}
-\newcommand{\numHidden}{h}
-\newcommand{\numInducing}{m}
-\newcommand{\numLayers}{\ell}
-\newcommand{\numNeighbors}{K}
-\newcommand{\numSequences}{s}
-\newcommand{\numSuccess}{s}
-\newcommand{\numTasks}{m}
-\newcommand{\numTime}{T}
-\newcommand{\numTrials}{S}
-\newcommand{\outputIndex}{j}
-\newcommand{\paramVector}{\boldsymbol{ \theta}}
-\newcommand{\parameterMatrix}{\boldsymbol{ \Theta}}
-\newcommand{\parameterScalar}{\theta}
-\newcommand{\parameterVector}{\boldsymbol{ \parameterScalar}}
-\newcommand{\partDiff}[2]{\frac{\partial#1}{\partial#2}}
-\newcommand{\precisionScalar}{j}
-\newcommand{\precisionVector}{\mathbf{ \precisionScalar}}
-\newcommand{\precisionMatrix}{\mathbf{J}}
-\newcommand{\pseudotargetScalar}{\widetilde{y}}
-\newcommand{\pseudotargetVector}{\mathbf{ \pseudotargetScalar}}
-\newcommand{\pseudotargetMatrix}{\mathbf{ \widetilde{Y}}}
-\newcommand{\rank}[1]{\text{rank}\left(#1\right)}
-\newcommand{\rayleighDist}[2]{\mathcal{R}\left(#1|#2\right)}
-\newcommand{\rayleighSamp}[1]{\mathcal{R}\left(#1\right)}
-\newcommand{\responsibility}{r}
-\newcommand{\rotationScalar}{r}
-\newcommand{\rotationVector}{\mathbf{ \rotationScalar}}
-\newcommand{\rotationMatrix}{\mathbf{R}}
-\newcommand{\sampleCovScalar}{s}
-\newcommand{\sampleCovVector}{\mathbf{ \sampleCovScalar}}
-\newcommand{\sampleCovMatrix}{\mathbf{s}}
-\newcommand{\scalarProduct}[2]{\left\langle{#1},{#2}\right\rangle}
-\newcommand{\sign}[1]{\text{sign}\left(#1\right)}
-\newcommand{\sigmoid}[1]{\sigma\left(#1\right)}
-\newcommand{\singularvalue}{\ell}
-\newcommand{\singularvalueMatrix}{\mathbf{L}}
-\newcommand{\singularvalueVector}{\mathbf{l}}
-\newcommand{\sorth}{\mathbf{u}}
-\newcommand{\spar}{\lambda}
-\newcommand{\trace}[1]{\text{tr}\left(#1\right)}
-\newcommand{\BasalRate}{B}
-\newcommand{\DampingCoefficient}{C}
-\newcommand{\DecayRate}{D}
-\newcommand{\Displacement}{X}
-\newcommand{\LatentForce}{F}
-\newcommand{\Mass}{M}
-\newcommand{\Sensitivity}{S}
-\newcommand{\basalRate}{b}
-\newcommand{\dampingCoefficient}{c}
-\newcommand{\mass}{m}
-\newcommand{\sensitivity}{s}
-\newcommand{\springScalar}{\kappa}
-\newcommand{\springVector}{\boldsymbol{ \kappa}}
-\newcommand{\springMatrix}{\boldsymbol{ \mathcal{K}}}
-\newcommand{\tfConcentration}{p}
-\newcommand{\tfDecayRate}{\delta}
-\newcommand{\tfMrnaConcentration}{f}
-\newcommand{\tfVector}{\mathbf{ \tfConcentration}}
-\newcommand{\velocity}{v}
-\newcommand{\sufficientStatsScalar}{g}
-\newcommand{\sufficientStatsVector}{\mathbf{ \sufficientStatsScalar}}
-\newcommand{\sufficientStatsMatrix}{\mathbf{G}}
-\newcommand{\switchScalar}{s}
-\newcommand{\switchVector}{\mathbf{ \switchScalar}}
-\newcommand{\switchMatrix}{\mathbf{S}}
-\newcommand{\tr}[1]{\text{tr}\left(#1\right)}
-\newcommand{\loneNorm}[1]{\left\Vert #1 \right\Vert_1}
-\newcommand{\ltwoNorm}[1]{\left\Vert #1 \right\Vert_2}
-\newcommand{\onenorm}[1]{\left\vert#1\right\vert_1}
-\newcommand{\twonorm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\vScalar}{v}
-\newcommand{\vVector}{\mathbf{v}}
-\newcommand{\vMatrix}{\mathbf{V}}
-\newcommand{\varianceDist}[2]{\text{var}_{#2}\left( #1 \right)}
-\newcommand{\vecb}[1]{\left(#1\right):}
-\newcommand{\weightScalar}{w}
-\newcommand{\weightVector}{\mathbf{ \weightScalar}}
-\newcommand{\weightMatrix}{\mathbf{W}}
-\newcommand{\weightedAdjacencyMatrix}{\mathbf{A}}
-\newcommand{\weightedAdjacencyScalar}{a}
-\newcommand{\weightedAdjacencyVector}{\mathbf{ \weightedAdjacencyScalar}}
-\newcommand{\onesVector}{\mathbf{1}}
-\newcommand{\zerosVector}{\mathbf{0}}
-\]
   <div class="reveal">
     <div class="slides">
 
 <section id="title-slide">
   <h1 class="title">What is Machine Learning?</h1>
-  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil D. Lawrence</a></p>
+  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil
+D. Lawrence</a></p>
   <p class="date" style="text-align:center"><time>2019-06-03</time></p>
-  <p class="venue" style="text-align:center">Data Science Africa Summer School, Addis Ababa, Ethiopia</p>
+  <p class="venue" style="text-align:center">Data Science Africa Summer
+School, Addis Ababa, Ethiopia</p>
 </section>
 
 <section class="slide level2">
@@ -409,9 +122,6 @@ <h1 class="title">What is Machine Learning?</h1>
 <!---->
 <!-- Do not edit this file locally. -->
 <!-- The last names to be defined. Should be defined entirely in terms of macros from above-->
-<!--
-
--->
 <!-- SECTION Introduction -->
 </section>
 <section id="introduction" class="slide level2">
@@ -422,29 +132,49 @@ <h2></h2>
 <div class="figure">
 <div id="data-science-africa-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Data Science Africa <a href="http://datascienceafrica.org" class="uri">http://datascienceafrica.org</a> is a ground up initiative for capacity building around data science, machine learning and artificial intelligence on the African continent.
+Data Science Africa <a href="http://datascienceafrica.org"
+class="uri">http://datascienceafrica.org</a> is a ground up initiative
+for capacity building around data science, machine learning and
+artificial intelligence on the African continent.
 </aside>
-<p>Data Science Africa is a bottom up initiative for capacity building in data science, machine learning and AI on the African continent</p>
+<p>Data Science Africa is a bottom up initiative for capacity building
+in data science, machine learning and AI on the African continent</p>
 </section>
 <section id="section-1" class="slide level2">
 <h2></h2>
 <div class="figure">
+<div id="dsa-events-october-2021-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//dsa/dsa-events-october-2021.svg" width="60%" style=" ">
+</object>
+</div>
+</div>
+<aside class="notes">
+Data Science Africa meetings held up to October 2021.
+</aside>
+</section>
+<section id="section-2" class="slide level2">
+<h2></h2>
+<div class="figure">
 <div id="africa-benefit-data-revolution-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The lack of existing physical infrastructure on the African continent makes it a particularly interesting environment for deploying solutions based on the <em>information infrastructure</em>. The idea is explored more in this Guardian op-ed on .
+The lack of existing physical infrastructure on the African continent
+makes it a particularly interesting environment for deploying solutions
+based on the <em>information infrastructure</em>. The idea is explored
+more in this Guardian op-ed on .
 </aside>
 </section>
-<section id="example-prediction-of-malaria-incidence-in-uganda" class="slide level2">
+<section id="example-prediction-of-malaria-incidence-in-uganda"
+class="slide level2">
 <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
@@ -458,7 +188,7 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <title>
 Martin Mubangizi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/martin-mubangizi.png" clip-path="url(#clip0)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png" clip-path="url(#clip0)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip1">
@@ -469,9 +199,9 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
-Ricardo Andrade Pacheco
+Ricardo Andrade Pacecho
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip2">
@@ -484,12 +214,15 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <title>
 John Quinn
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/john-quinn.jpg" clip-path="url(#clip2)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg" clip-path="url(#clip2)"/>
 </svg>
 </div>
 <ul>
-<li>Work with Ricardo Andrade Pacheco, John Quinn and Martin Mubaganzi (Makerere University, Uganda)</li>
+<li>Work with Ricardo Andrade Pacheco, John Quinn and Martin Mubangizi
+(Makerere University, Uganda)</li>
 <li>See <a href="http://air.ug/research.html">AI-DEV Group</a>.</li>
+<li>See <a href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">UN
+Global Pulse Disease Outbreaks Site</a></li>
 </ul>
 </section>
 <section id="malaria-prediction-in-uganda" class="slide level2">
@@ -497,20 +230,26 @@ <h2>Malaria Prediction in Uganda</h2>
 <div class="figure">
 <div id="uganda-districts-2006-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Ugandan districs. Data SRTM/NASA from <a href="https://dds.cr.usgs.gov/srtm/version2_1" class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.
+Ugandan districts. Data SRTM/NASA from <a
+href="https://dds.cr.usgs.gov/srtm/version2_1"
+class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.
 </aside>
-<p><span style="text-align:right"><span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al., 2014; Mubangizi et al., 2014)</span></span></p>
+<div style="text-align:right">
+<span class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>
+</div>
 </section>
 <section id="tororo-district" class="slide level2">
 <h2>Tororo District</h2>
 <div class="figure">
 <div id="tororo-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -518,24 +257,26 @@ <h2>Tororo District</h2>
 The Tororo district, where the sentinel site, Nagongera, is located.
 </aside>
 </section>
-<section id="malaria-prediction-in-nagongera-sentinel-site" class="slide level2">
+<section id="malaria-prediction-in-nagongera-sentinel-site"
+class="slide level2">
 <h2>Malaria Prediction in Nagongera (Sentinel Site)</h2>
 <div class="figure">
 <div id="sentinel-nagongera-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Sentinel and HMIS data along with rainfall and temperature for the Nagongera sentinel station in the Tororo district.
+Sentinel and HMIS data along with rainfall and temperature for the
+Nagongera sentinel station in the Tororo district.
 </aside>
 </section>
 <section id="mubende-district" class="slide level2">
 <h2>Mubende District</h2>
 <div class="figure">
 <div id="mubende-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -548,7 +289,7 @@ <h2>Malaria Prediction in Uganda</h2>
 <div class="figure">
 <div id="malaria-prediction-mubende-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -560,20 +301,22 @@ <h2>Malaria Prediction in Uganda</h2>
 <h2>GP School at Makerere</h2>
 <div class="figure">
 <div id="-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The project arose out of the Gaussian process summer school held at Makerere in Kampala in 2013. The school led, in turn, to the Data Science Africa initiative.
+The project arose out of the Gaussian process summer school held at
+Makerere in Kampala in 2013. The school led, in turn, to the Data
+Science Africa initiative.
 </aside>
 </section>
 <section id="kabarole-district" class="slide level2">
 <h2>Kabarole District</h2>
 <div class="figure">
 <div id="kabarole-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -586,12 +329,14 @@ <h2>Early Warning System</h2>
 <div class="figure">
 <div id="kabarole-disease-over-time-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Estimate of the current disease situation in the Kabarole district over time. Estimate is constructed with a Gaussian process with an additive covariance funciton.
+Estimate of the current disease situation in the Kabarole district over
+time. Estimate is constructed with a Gaussian process with an additive
+covariance funciton.
 </aside>
 </section>
 <section id="early-warning-systems" class="slide level2">
@@ -599,12 +344,13 @@ <h2>Early Warning Systems</h2>
 <div class="figure">
 <div id="early-warning-system-map-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The map of Ugandan districts with an overview of the Malaria situation in each district.
+The map of Ugandan districts with an overview of the Malaria situation
+in each district.
 </aside>
 </section>
 <section id="machine-learning" class="slide level2">
@@ -617,66 +363,87 @@ <h2>Rise of Machine Learning</h2>
 <li>Fundamentally dependent on models</li>
 </ul>
 <p><span class="math display">\[
-\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}
+\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow}
+\text{prediction}
 \]</span></p>
 </section>
 <section id="data-revolution" class="slide level2">
 <h2>Data Revolution</h2>
 <div class="figure">
 <div id="data-science-information-flow-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/data-science/new-flow-of-information.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//data-science/new-flow-of-information.svg" width="60%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Large amounts of data and high interconnection bandwidth mean that we receive much of our information about the world around us through computers.
+Large amounts of data and high interconnection bandwidth mean that we
+receive much of our information about the world around us through
+computers.
 </aside>
 </section>
 <section id="supply-chain" class="slide level2">
 <h2>Supply Chain</h2>
 <div class="figure">
 <div id="packhorse-bridge-burbage-brook-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/packhorse-bridge-burbage-brook.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/packhorse-bridge-burbage-brook.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Packhorse Bridge under Burbage Edge. This packhorse route climbs steeply out of Hathersage and heads towards Sheffield. Packhorses were the main route for transporting goods across the Peak District. The high cost of transport is one driver of the ‘smith’ model, where there is a local skilled person responsible for assembling or creating goods (e.g. a blacksmith).
+Packhorse Bridge under Burbage Edge. This packhorse route climbs steeply
+out of Hathersage and heads towards Sheffield. Packhorses were the main
+route for transporting goods across the Peak District. The high cost of
+transport is one driver of the ‘smith’ model, where there is a local
+skilled person responsible for assembling or creating goods (e.g. a
+blacksmith).
 </aside>
 </section>
 <section id="cromford" class="slide level2">
 <h2>Cromford</h2>
 <div class="figure">
 <div id="cromford-mill-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/cromford-mill.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/cromford-mill.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Richard Arkwright is regarded of the founder of the modern factory system. Factories exploit distribution networks to centralize production of goods. Arkwright located his factory in Cromford due to proximity to Nottingham Weavers (his market) and availability of water power from the tributaries of the Derwent river. When he first arrived there was almost no transportation network. Over the following 200 years The Cromford Canal (1790s), a Turnpike (now the A6, 1816-18) and the High Peak Railway (now closed, 1820s) were all constructed to improve transportation access as the factory blossomed.
+Richard Arkwright is regarded of the founder of the modern factory
+system. Factories exploit distribution networks to centralize production
+of goods. Arkwright located his factory in Cromford due to proximity to
+Nottingham Weavers (his market) and availability of water power from the
+tributaries of the Derwent river. When he first arrived there was almost
+no transportation network. Over the following 200 years The Cromford
+Canal (1790s), a Turnpike (now the A6, 1816-18) and the High Peak
+Railway (now closed, 1820s) were all constructed to improve
+transportation access as the factory blossomed.
 </aside>
 </section>
-<section id="section-2" class="slide level2">
+<section id="section-3" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="container-2539942_1920-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/container-2539942_1920.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/container-2539942_1920.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The container is one of the major drivers of globalization, and arguably the largest agent of social change in the last 100 years. It reduces the cost of transportation, significantly changing the appropriate topology of distribution networks. The container makes it possible to ship goods halfway around the world for cheaper than it costs to process those goods, leading to an extended distribution topology.
+The container is one of the major drivers of globalization, and arguably
+the largest agent of social change in the last 100 years. It reduces the
+cost of transportation, significantly changing the appropriate topology
+of distribution networks. The container makes it possible to ship goods
+halfway around the world for cheaper than it costs to process those
+goods, leading to an extended distribution topology.
 </aside>
 </section>
 <section id="deep-freeze" class="slide level2">
 <h2>Deep Freeze</h2>
 <div class="figure">
 <div id="wild-alaskan-cod-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/wild-alaskan-cod.jpg" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod.jpg" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -688,13 +455,33 @@ <h2>Deep Freeze</h2>
 <h2>Deep Freeze</h2>
 <div class="figure">
 <div id="wild-alaskan-cod-made-in-china-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/supply-chain/wild-alaskan-cod-made-in-china.jpg" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/wild-alaskan-cod-made-in-china.jpg" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+</div>
+<aside class="notes">
+Wild Alaskan Cod that is a product of China. It is cheaper to ship the
+deep-frozen fish thousands of kilometers for processing than to process
+locally.
+</aside>
+</section>
+<section id="section-4" class="slide level2">
+<h2></h2>
+<div class="figure">
+<div id="environmental-impact-of-food-by-life-cycle-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//supply-chain/environmental-impact-of-food-by-life-cycle.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Wild Alaskan Cod that is a product of China. It is cheaper to ship the deep-frozen fish thousands of kilometers for processing than to process locally.
+The transport cost of most foods is a very small portion of the total
+cost. The exception is if foods are air freighted. Source: <a
+href="https://ourworldindata.org/food-choice-vs-eating-local"
+class="uri">https://ourworldindata.org/food-choice-vs-eating-local</a>
+by Hannah Ritche CC-BY
 </aside>
 </section>
 <section id="machine-learning-in-supply-chain" class="slide level2">
@@ -717,25 +504,30 @@ <h2>For Africa</h2>
 <section id="data-driven" class="slide level2">
 <h2>Data Driven</h2>
 <ul>
-<li>Machine Learning: Replicate Processes through <em>direct use of data</em>.</li>
+<li>Machine Learning: Replicate Processes through <em>direct use of
+data</em>.</li>
 <li>Aim to emulate cognitive processes through the use of data.</li>
-<li>Use data to provide new approaches in control and optimization that should allow for emulation of human motor skills.</li>
+<li>Use data to provide new approaches in control and optimization that
+should allow for emulation of human motor skills.</li>
 </ul>
 </section>
 <section id="process-emulation" class="slide level2">
 <h2>Process Emulation</h2>
 <ul>
 <li>Key idea: emulate the process as a mathematical function.</li>
-<li>Each function has a set of <em>parameters</em> which control its behaviour.</li>
-<li><em>Learning</em> is the process of changing these parameters to change the shape of the function</li>
-<li>Choice of which class of mathematical functions we use is a vital component of our <em>model</em>.</li>
+<li>Each function has a set of <em>parameters</em> which control its
+behaviour.</li>
+<li><em>Learning</em> is the process of changing these parameters to
+change the shape of the function</li>
+<li>Choice of which class of mathematical functions we use is a vital
+component of our <em>model</em>.</li>
 </ul>
 </section>
 <section id="kapchorwa-district" class="slide level2">
 <h2>Kapchorwa District</h2>
 <div class="figure">
 <div id="kapchorwa-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kapchorwa_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kapchorwa_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -750,16 +542,18 @@ <h2>Olympic Marathon Data</h2>
 <td width="70%">
 <ul>
 <li>Gold medal times for Olympic Marathon since 1896.</li>
-<li>Marathons before 1924 didn’t have a standardised distance.</li>
+<li>Marathons before 1924 didn’t have a standardized distance.</li>
 <li>Present results using pace per km.</li>
-<li>In 1904 Marathon was badly organised leading to very slow times.</li>
+<li>In 1904 Marathon was badly organized leading to very slow
+times.</li>
 </ul>
 </td>
 <td width="30%">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/Stephen_Kiprotich.jpg" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//Stephen_Kiprotich.jpg" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
-<small>Image from Wikimedia Commons <a href="http://bit.ly/16kMKHQ" class="uri">http://bit.ly/16kMKHQ</a></small>
+<small>Image from Wikimedia Commons <a href="http://bit.ly/16kMKHQ"
+class="uri">http://bit.ly/16kMKHQ</a></small>
 </td>
 </tr>
 </table>
@@ -768,202 +562,95 @@ <h2>Olympic Marathon Data</h2>
 <h2>Olympic Marathon Data</h2>
 <div class="figure">
 <div id="olympic-marathon-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/datasets/olympic-marathon.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//datasets/olympic-marathon.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Olympic marathon pace times since 1892.
+Olympic marathon pace times since 1896.
 </aside>
 </section>
-<section id="polynomial-fits-to-olympic-data" class="slide level2">
-<h2>Polynomial Fits to Olympic Data</h2>
-<script>
-showDivs(1, 'olympic_LM_polynomial_number');
-</script>
-<p><small></small> <input id="range-olympic_LM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_LM_polynomial_number')" oninput="setDivs('olympic_LM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_LM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_LM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-3" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number001.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-4" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number002.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-5" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number003.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-6" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number004.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-7" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number005.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-8" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number006.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-9" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number007.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-10" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number008.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-11" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number009.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-12" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number010.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-13" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number011.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-14" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number012.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-15" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number013.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-16" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number014.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-17" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number015.svg" width="80%" style=" ">
-</object>
-</div>
-</section>
-<section id="section-18" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number016.svg" width="80%" style=" ">
-</object>
-</div>
+<section id="polynomial-fits-to-olympic-marthon-data"
+class="slide level2">
+<h2>Polynomial Fits to Olympic Marthon Data</h2>
+<ul>
+<li>Fit linear model with polynomial basis to marathon data.</li>
+<li>Try different numbers of basis functions (different degress of
+polynomial).</li>
+<li>Check the quality of fit.</li>
+</ul>
 </section>
-<section id="section-19" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number017.svg" width="80%" style=" ">
+<section id="linear-fit" class="slide level2">
+<h2>Linear Fit</h2>
+<p><span class="math display">\[f(x, \mathbf{ w}) = w_0 +
+w_1x\]</span></p>
+<div class="figure">
+<div id="olympic-marathon-polynomial-2-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-2.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-20" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number018.svg" width="80%" style=" ">
-</object>
 </div>
+<aside class="notes">
+Fit of a 1-degree polynomial (a linear model) to the Olympic marathon
+data.
+</aside>
 </section>
-<section id="section-21" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number019.svg" width="80%" style=" ">
+<section id="cubic-fit" class="slide level2">
+<h2>Cubic Fit</h2>
+<p><span class="math display">\[f(x, \mathbf{ w}) = w_0 + w_1 x+ w_2 x^2
++ w_3 x^3\]</span></p>
+<div class="figure">
+<div id="olympic-marathon-polynomial-4-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-4.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-22" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number020.svg" width="80%" style=" ">
-</object>
 </div>
+<aside class="notes">
+Fit of a 3-degree polynomial (a cubic model) to the Olympic marathon
+data.
+</aside>
 </section>
-<section id="section-23" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number021.svg" width="80%" style=" ">
+<section id="th-degree-polynomial-fit" class="slide level2">
+<h2>9th Degree Polynomial Fit</h2>
+<p><span class="math display">\[f(x, \mathbf{ w}) = w_0 + w_1 x+ w_2 x^2
++ \dots + w_9 x^9\]</span></p>
+<div class="figure">
+<div id="olympic-marathon-polynomial-10-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-10.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-24" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number022.svg" width="80%" style=" ">
-</object>
 </div>
+<aside class="notes">
+Fit of a 9-degree polynomial to the Olympic marathon data.
+</aside>
 </section>
-<section id="section-25" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number023.svg" width="80%" style=" ">
+<section id="th-degree-polynomial-fit-1" class="slide level2">
+<h2>16th Degree Polynomial Fit</h2>
+<p><span class="math display">\[f(x, \mathbf{ w}) = w_0 + w_1 x+ w_2 x^2
++ \dots + w_{16} x^{16}\]</span></p>
+<div class="figure">
+<div id="olympic-marathon-polynomial-17-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-17.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-26" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number024.svg" width="80%" style=" ">
-</object>
 </div>
+<aside class="notes">
+Fit of a 16-degree polynomial to the Olympic marathon data.
+</aside>
 </section>
-<section id="section-27" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number025.svg" width="80%" style=" ">
+<section id="th-degree-polynomial-fit-2" class="slide level2">
+<h2>26th Degree Polynomial Fit</h2>
+<p><span class="math display">\[f(x, \mathbf{ w}) = w_0 + w_1 x+ w_2 x^2
++ \dots + w_{26} x^{26}\]</span></p>
+<div class="figure">
+<div id="olympic-marathon-polynomial-27-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic-marathon-polynomial-27.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-28" class="slide level2">
-<h2></h2>
-<div class="olympic_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_LM_polynomial_number026.svg" width="80%" style=" ">
-</object>
 </div>
+<aside class="notes">
+Fit of a 26-degree polynomial to the Olympic marathon data.
+</aside>
 </section>
 <section id="what-does-machine-learning-do" class="slide level2">
 <h2>What does Machine Learning do?</h2>
@@ -980,40 +667,62 @@ <h2>What does Machine Learning do?</h2>
 </ul></li>
 </ul>
 </section>
-<section id="codify-through-mathematical-functions" class="slide level2">
+<section id="codify-through-mathematical-functions"
+class="slide level2">
 <h2>Codify Through Mathematical Functions</h2>
 <ul>
 <li>How does machine learning work?</li>
 <li>Jumper (jersey/sweater) purchase with logistic regression</li>
 </ul>
-<p><span class="math display">\[ \text{odds} = \frac{p(\text{bought})}{p(\text{not bought})} \]</span></p>
-<p><span class="math display">\[ \log \text{odds}  = \beta_0 + \beta_1 \text{age} + \beta_2 \text{latitude}.\]</span></p>
+<p><span class="math display">\[ \text{odds} =
+\frac{p(\text{bought})}{p(\text{not bought})} \]</span></p>
+<p><span class="math display">\[ \log \text{odds}  = w_0 + w_1
+\text{age} + w_2 \text{latitude}.\]</span></p>
+</section>
+<section id="sigmoid-function" class="slide level2">
+<h2>Sigmoid Function</h2>
+<div class="figure">
+<div id="the-logistic-function-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/logistic.svg" width="80%" style=" ">
+</object>
+</div>
+</div>
+<aside class="notes">
+The logistic function.
+</aside>
 </section>
-<section id="codify-through-mathematical-functions-1" class="slide level2">
+<section id="codify-through-mathematical-functions-1"
+class="slide level2">
 <h2>Codify Through Mathematical Functions</h2>
 <ul>
 <li>How does machine learning work?</li>
 <li>Jumper (jersey/sweater) purchase with logistic regression</li>
 </ul>
-<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(\beta_0 + \beta_1 \text{age} + \beta_2 \text{latitude}\right).\]</span></p>
+<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(w_0 +
+w_1 \text{age} + w_2 \text{latitude}\right).\]</span></p>
 </section>
-<section id="codify-through-mathematical-functions-2" class="slide level2">
+<section id="codify-through-mathematical-functions-2"
+class="slide level2">
 <h2>Codify Through Mathematical Functions</h2>
 <ul>
 <li>How does machine learning work?</li>
 <li>Jumper (jersey/sweater) purchase with logistic regression</li>
 </ul>
-<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(\boldsymbol{\beta}^\top \mathbf{ x}\right).\]</span></p>
+<p><span class="math display">\[ p(\text{bought})
+=  \sigma\left(\mathbf{ w}^\top \mathbf{ x}\right).\]</span></p>
 </section>
-<section id="codify-through-mathematical-functions-3" class="slide level2">
+<section id="codify-through-mathematical-functions-3"
+class="slide level2">
 <h2>Codify Through Mathematical Functions</h2>
 <ul>
 <li>How does machine learning work?</li>
 <li>Jumper (jersey/sweater) purchase with logistic regression</li>
 </ul>
-<p><span class="math display">\[ y=  f\left(\mathbf{ x}, \boldsymbol{\beta}\right).\]</span></p>
+<p><span class="math display">\[ y=  f\left(\mathbf{ x}, \mathbf{
+w}\right).\]</span></p>
 <div class="fragment">
-<p>We call <span class="math inline">\(f(\cdot)\)</span> the <em>prediction function</em>.</p>
+<p>We call <span class="math inline">\(f(\cdot)\)</span> the
+<em>prediction function</em>.</p>
 </div>
 </section>
 <section id="fit-to-data" class="slide level2">
@@ -1021,47 +730,62 @@ <h2>Fit to Data</h2>
 <ul>
 <li>Use an objective function</li>
 </ul>
-<p><span class="math display">\[E(\boldsymbol{\beta}, \mathbf{Y}, \mathbf{X})\]</span></p>
+<p><span class="math display">\[E(\mathbf{ w}, \mathbf{Y},
+\mathbf{X})\]</span></p>
 <div class="fragment">
 <ul>
-<li>E.g. least squares <span class="math display">\[E(\boldsymbol{\beta}, \mathbf{Y}, \mathbf{X}) = \sum_{i=1}^n\left(y_i - f(\mathbf{ x}_i, \boldsymbol{\beta})\right)^2.\]</span></li>
+<li>E.g. least squares <span class="math display">\[E(\mathbf{ w},
+\mathbf{Y}, \mathbf{X}) = \sum_{i=1}^n\left(y_i - f(\mathbf{ x}_i,
+\mathbf{ w})\right)^2.\]</span></li>
 </ul>
 </div>
 </section>
 <section id="two-components" class="slide level2">
 <h2>Two Components</h2>
 <ul>
-<li>Prediction function, <span class="math inline">\(f(\cdot)\)</span></li>
-<li>Objective function, <span class="math inline">\(E(\cdot)\)</span></li>
+<li>Prediction function, <span
+class="math inline">\(f(\cdot)\)</span></li>
+<li>Objective function, <span
+class="math inline">\(E(\cdot)\)</span></li>
 </ul>
 </section>
-<section id="section-29" class="slide level2">
+<section id="prediction-vs-interpretation" class="slide level2">
+<h2>Prediction vs Interpretation</h2>
+<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(w_0 +
+w_1 \text{age} + w_2 \text{latitude}\right).\]</span></p>
+<p><span class="math display">\[ p(\text{bought}) =  \sigma\left(\beta_0
++ \beta_1 \text{age} + \beta_2 \text{latitude}\right).\]</span></p>
+</section>
+<section id="section-5" class="slide level2">
 <h2></h2>
-<p><span class="math display">\[\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
 </section>
 <section id="from-model-to-decision" class="slide level2">
 <h2>From Model to Decision</h2>
 </section>
-<section id="section-30" class="slide level2">
+<section id="section-6" class="slide level2">
 <h2></h2>
 <table>
 <tr>
 <td width="35%">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/earth_PNG37.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//earth_PNG37.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="45%">
-<span class="math display">\[\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span>
+<span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span>
 </td>
 <td width="20%">
-<object class="svgplot " data="../slides/diagrams/ai/1969018.svg" width="100%" style="center ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ai/1969018.svg" width="100%" style="center ">
 </object>
 </td>
 </tr>
 </table>
 </section>
-<section id="artificial-intelligence-and-data-science" class="slide level2">
+<section id="artificial-intelligence-and-data-science"
+class="slide level2">
 <h2>Artificial Intelligence and Data Science</h2>
 <ul>
 <li>AI aims to equip computers with human capabilities
@@ -1082,7 +806,9 @@ <h2>Supervised Learning for AI</h2>
 <li>Generate large labelled data set from humans.</li>
 <li>Use <em>supervised learning</em> to emulate that data.
 <ul>
-<li><em>E.g.</em> <a href="www.image-net.org">ImageNet</a> <span class="citation" data-cites="Russakovsky-imagenet15">Russakovsky et al. (2015)</span></li>
+<li><em>E.g.</em> <a href="www.image-net.org">ImageNet</a> <span
+class="citation" data-cites="Russakovsky-imagenet15">Russakovsky et al.
+(2015)</span></li>
 </ul></li>
 </ul></li>
 <li>Significant advances due to <em>deep learning</em>
@@ -1095,43 +821,62 @@ <h2>Supervised Learning for AI</h2>
 <h2>Data Science</h2>
 <ul>
 <li>Arises from <em>happenstance data</em>.</li>
-<li>Differs from statistics in that the question comes <em>after</em> data collection.</li>
+<li>Differs from statistics in that the question comes <em>after</em>
+data collection.</li>
 </ul>
 </section>
-<section id="neural-networks-and-prediction-functions" class="slide level2">
+<section id="neural-networks-and-prediction-functions"
+class="slide level2">
 <h2>Neural Networks and Prediction Functions</h2>
 <ul>
-<li>adaptive non-linear function models inspired by simple neuron models <span class="citation" data-cites="McCulloch:neuron43">(McCulloch and Pitts, 1943)</span></li>
+<li>adaptive non-linear function models inspired by simple neuron models
+<span class="citation" data-cites="McCulloch:neuron43">(McCulloch and
+Pitts, 1943)</span></li>
 <li>have become popular because of their ability to model data.</li>
 <li>can be composed to form highly complex functions</li>
 <li>start by focussing on one hidden layer</li>
 </ul>
 </section>
-<section id="prediction-function-of-one-hidden-layer" class="slide level2">
+<section id="prediction-function-of-one-hidden-layer"
+class="slide level2">
 <h2>Prediction Function of One Hidden Layer</h2>
 <p><span class="math display">\[
-f(\mathbf{ x}) = \left.\mathbf{ w}^{(2)}\right.^\top \boldsymbol{ \phi}(\mathbf{W}_{1}, \mathbf{ x})
+f(\mathbf{ x}) = \left.\mathbf{ w}^{(2)}\right.^\top \boldsymbol{
+\phi}(\mathbf{W}_{1}, \mathbf{ x})
 \]</span></p>
-<p><span class="math inline">\(f(\cdot)\)</span> is a scalar function with vector inputs,</p>
-<p><span class="math inline">\(\boldsymbol{ \phi}(\cdot)\)</span> is a vector function with vector inputs.</p>
+<p><span class="math inline">\(f(\cdot)\)</span> is a scalar function
+with vector inputs,</p>
+<p><span class="math inline">\(\boldsymbol{ \phi}(\cdot)\)</span> is a
+vector function with vector inputs.</p>
 <ul>
-<li><p>dimensionality of the vector function is known as the number of hidden units, or the number of neurons.</p></li>
-<li><p>elements of <span class="math inline">\(\boldsymbol{ \phi}(\cdot)\)</span> are the <em>activation</em> function of the neural network</p></li>
-<li><p>elements of <span class="math inline">\(\mathbf{W}_{1}\)</span> are the parameters of the activation functions.</p></li>
+<li><p>dimensionality of the vector function is known as the number of
+hidden units, or the number of neurons.</p></li>
+<li><p>elements of <span class="math inline">\(\boldsymbol{
+\phi}(\cdot)\)</span> are the <em>activation</em> function of the neural
+network</p></li>
+<li><p>elements of <span class="math inline">\(\mathbf{W}_{1}\)</span>
+are the parameters of the activation functions.</p></li>
 </ul>
 </section>
 <section id="relations-with-classical-statistics" class="slide level2">
 <h2>Relations with Classical Statistics</h2>
 <ul>
-<li><p>In statistics activation functions are known as <em>basis functions</em>.</p></li>
-<li><p>would think of this as a <em>linear model</em>: not linear predictions, linear in the parameters</p></li>
-<li><p><span class="math inline">\(\mathbf{ w}_{1}\)</span> are <em>static</em> parameters.</p></li>
+<li><p>In statistics activation functions are known as <em>basis
+functions</em>.</p></li>
+<li><p>would think of this as a <em>linear model</em>: not linear
+predictions, linear in the parameters</p></li>
+<li><p><span class="math inline">\(\mathbf{ w}_{1}\)</span> are
+<em>static</em> parameters.</p></li>
 </ul>
 </section>
 <section id="adaptive-basis-functions" class="slide level2">
 <h2>Adaptive Basis Functions</h2>
 <ul>
-<li>In machine learning we optimize <span class="math inline">\(\mathbf{W}_{1}\)</span> as well as <span class="math inline">\(\mathbf{W}_{2}\)</span> (which would normally be denoted in statistics by <span class="math inline">\(\boldsymbol{\beta}\)</span>).</li>
+<li>In machine learning we optimize <span
+class="math inline">\(\mathbf{W}_{1}\)</span> as well as <span
+class="math inline">\(\mathbf{W}_{2}\)</span> (which would normally be
+denoted in statistics by <span
+class="math inline">\(\boldsymbol{\beta}\)</span>).</li>
 </ul>
 </section>
 <section id="machine-learning-1" class="slide level2">
@@ -1182,39 +927,64 @@ <h2>Introduction to Classification</h2>
 <section id="classification" class="slide level2">
 <h2>Classification</h2>
 <ul>
-<li><p><em>Wake word</em> classification (<a href="https://radio.unglobalpulse.net/uganda/">Global Pulse Project</a>).</p></li>
-<li><p>Breakthrough in 2012 with ImageNet result of <a href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex Krizhevsky, Ilya Sutskever and Geoff Hinton</a></p></li>
-<li><p>We are given a data set containing ‘inputs’, <span class="math inline">\(\mathbf{X}\)</span> and ‘targets’, <span class="math inline">\(\mathbf{ y}\)</span>.</p></li>
-<li><p>Each data point consists of an input vector <span class="math inline">\(\mathbf{ x}_i\)</span> and a class label, <span class="math inline">\(y_i\)</span>.</p></li>
-<li><p>For binary classification assume <span class="math inline">\(y_i\)</span> should be either <span class="math inline">\(1\)</span> (yes) or <span class="math inline">\(-1\)</span> (no).</p></li>
+<li><p><em>Wake word</em> classification (<a
+href="https://radio.unglobalpulse.net/uganda/">Global Pulse
+Project</a>).</p></li>
+<li><p>Breakthrough in 2012 with ImageNet result of <a
+href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex
+Krizhevsky, Ilya Sutskever and Geoff Hinton</a></p></li>
+<li><p>We are given a data set containing ‘inputs’, <span
+class="math inline">\(\mathbf{X}\)</span> and ‘targets’, <span
+class="math inline">\(\mathbf{ y}\)</span>.</p></li>
+<li><p>Each data point consists of an input vector <span
+class="math inline">\(\mathbf{ x}_i\)</span> and a class label, <span
+class="math inline">\(y_i\)</span>.</p></li>
+<li><p>For binary classification assume <span
+class="math inline">\(y_i\)</span> should be either <span
+class="math inline">\(1\)</span> (yes) or <span
+class="math inline">\(-1\)</span> (no).</p></li>
 <li><p>Input vector can be thought of as features.</p></li>
 </ul>
 </section>
 <section id="discrete-probability" class="slide level2">
 <h2>Discrete Probability</h2>
 <ul>
-<li>Algorithms based on <em>prediction</em> function and <em>objective</em> function.</li>
-<li>For regression the <em>codomain</em> of the functions, <span class="math inline">\(f(\mathbf{X})\)</span> was the real numbers or sometimes real vectors.</li>
-<li>In classification we are given an input vector, <span class="math inline">\(\mathbf{ x}\)</span>, and an associated label, <span class="math inline">\(y\)</span> which either takes the value <span class="math inline">\(-1\)</span> or <span class="math inline">\(1\)</span>.</li>
+<li>Algorithms based on <em>prediction</em> function and
+<em>objective</em> function.</li>
+<li>For regression the <em>codomain</em> of the functions, <span
+class="math inline">\(f(\mathbf{X})\)</span> was the real numbers or
+sometimes real vectors.</li>
+<li>In classification we are given an input vector, <span
+class="math inline">\(\mathbf{ x}\)</span>, and an associated label,
+<span class="math inline">\(y\)</span> which either takes the value
+<span class="math inline">\(-1\)</span> or <span
+class="math inline">\(1\)</span>.</li>
 </ul>
 </section>
 <section id="classification-1" class="slide level2">
 <h2>Classification</h2>
 <ul>
-<li>Inputs, <span class="math inline">\(\mathbf{ x}\)</span>, mapped to a label, <span class="math inline">\(y\)</span>, through a function <span class="math inline">\(f(\cdot)\)</span> dependent on parameters, <span class="math inline">\(\mathbf{ w}\)</span>, <span class="math display">\[
+<li>Inputs, <span class="math inline">\(\mathbf{ x}\)</span>, mapped to
+a label, <span class="math inline">\(y\)</span>, through a function
+<span class="math inline">\(f(\cdot)\)</span> dependent on parameters,
+<span class="math inline">\(\mathbf{ w}\)</span>, <span
+class="math display">\[
 y= f(\mathbf{ x}; \mathbf{ w}).
 \]</span></li>
-<li><span class="math inline">\(f(\cdot)\)</span> is known as the <em>prediction function</em>.</li>
+<li><span class="math inline">\(f(\cdot)\)</span> is known as the
+<em>prediction function</em>.</li>
 </ul>
 </section>
 <section id="classification-examples" class="slide level2">
 <h2>Classification Examples</h2>
 <ul>
-<li>Classifiying hand written digits from binary images (automatic zip code reading)</li>
+<li>Classifiying hand written digits from binary images (automatic zip
+code reading)</li>
 <li>Detecting faces in images (e.g. digital cameras).</li>
 <li>Who a detected face belongs to (e.g. Facebook, DeepFace)</li>
 <li>Classifying type of cancer given gene expression data.</li>
-<li>Categorization of document types (different types of news article on the internet)</li>
+<li>Categorization of document types (different types of news article on
+the internet)</li>
 </ul>
 </section>
 <section id="perceptron" class="slide level2">
@@ -1222,313 +992,184 @@ <h2>Perceptron</h2>
 <script>
 showDivs(1, 'perceptron-algorithm');
 </script>
-<p><small></small> <input id="range-perceptron-algorithm" type="range" min="1" max="44" value="1" onchange="setDivs('perceptron-algorithm')" oninput="setDivs('perceptron-algorithm')"> <button onclick="plusDivs(-1, 'perceptron-algorithm')">❮</button> <button onclick="plusDivs(1, 'perceptron-algorithm')">❯</button></p>
-</section>
-<section id="section-31" class="slide level2">
-<h2></h2>
+<p><small></small>
+<input id="range-perceptron-algorithm" type="range" min="1" max="44" value="1" onchange="setDivs('perceptron-algorithm')" oninput="setDivs('perceptron-algorithm')">
+<button onclick="plusDivs(-1, 'perceptron-algorithm')">❮</button>
+<button onclick="plusDivs(1, 'perceptron-algorithm')">❯</button></p>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-32" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-33" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron003.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-34" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-35" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron005.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-36" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron006.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-37" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron007.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-38" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron008.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-39" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron009.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-40" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron010.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-41" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron011.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron011.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-42" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron012.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron012.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-43" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron013.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron013.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-44" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron014.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron014.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-45" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron015.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron015.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-46" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron016.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron016.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-47" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron017.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron017.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-48" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron018.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron018.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-49" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron019.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron019.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-50" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron020.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron020.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-51" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron021.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron021.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-52" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron022.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron022.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-53" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron023.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron023.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-54" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron024.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron024.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-55" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron025.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron025.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-56" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron026.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron026.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-57" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron027.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron027.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-58" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron028.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron028.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-59" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron029.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron029.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-60" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron030.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron030.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-61" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron031.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron031.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-62" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron032.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron032.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-63" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron033.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron033.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-64" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron034.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron034.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-65" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron035.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron035.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-66" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron036.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron036.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-67" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron037.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron037.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-68" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron038.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron038.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-69" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron039.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron039.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-70" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron040.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron040.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-71" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron041.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron041.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-72" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron042.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron042.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-73" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron043.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron043.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-74" class="slide level2">
-<h2></h2>
 <div class="perceptron-algorithm" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/perceptron044.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/perceptron044.svg" width="80%" style=" ">
 </object>
 </div>
 <p><em>Simple classification with the perceptron algorithm.</em></p>
@@ -1536,11 +1177,15 @@ <h2></h2>
 <section id="logistic-regression-and-glms" class="slide level2">
 <h2>Logistic Regression and GLMs</h2>
 <ul>
-<li>Modelling entire density allows any question to be answered (also missing data).</li>
-<li>Comes at the possible expense of <em>strong</em> assumptions about data generation distribution.</li>
-<li>In regression we model probability of <span class="math inline">\(y_i |\mathbf{ x}_i\)</span> directly.
+<li>Modelling entire density allows any question to be answered (also
+missing data).</li>
+<li>Comes at the possible expense of <em>strong</em> assumptions about
+data generation distribution.</li>
+<li>In regression we model probability of <span
+class="math inline">\(y_i |\mathbf{ x}_i\)</span> directly.
 <ul>
-<li><strong>Allows less flexibility in the question, but more flexibility in the model assumptions.</strong></li>
+<li><strong>Allows less flexibility in the question, but more
+flexibility in the model assumptions.</strong></li>
 </ul></li>
 <li>Can do this not just for regression, but classification.</li>
 <li>Framework is known as <em>generalized linear models</em>.</li>
@@ -1550,32 +1195,55 @@ <h2>Logistic Regression and GLMs</h2>
 <h2>Log Odds</h2>
 <ul>
 <li>model the <em>log-odds</em> with the basis functions.</li>
-<li><a href="http://en.wikipedia.org/wiki/Odds">odds</a> are defined as the ratio of the probability of a positive outcome, to the probability of a negative outcome.</li>
-<li>Probability is between zero and one, odds are: <span class="math display">\[ \frac{\pi}{1-\pi} \]</span></li>
-<li>Odds are between <span class="math inline">\(0\)</span> and <span class="math inline">\(\infty\)</span>.</li>
-<li>Logarithm of odds maps them to <span class="math inline">\(-\infty\)</span> to <span class="math inline">\(\infty\)</span>.</li>
+<li><a href="http://en.wikipedia.org/wiki/Odds">odds</a> are defined as
+the ratio of the probability of a positive outcome, to the probability
+of a negative outcome.</li>
+<li>Probability is between zero and one, odds are: <span
+class="math display">\[ \frac{\pi}{1-\pi} \]</span></li>
+<li>Odds are between <span class="math inline">\(0\)</span> and <span
+class="math inline">\(\infty\)</span>.</li>
+<li>Logarithm of odds maps them to <span
+class="math inline">\(-\infty\)</span> to <span
+class="math inline">\(\infty\)</span>.</li>
 </ul>
 </section>
 <section id="logit-link-function" class="slide level2">
 <h2>Logit Link Function</h2>
 <ul>
-<li>The <a href="http://en.wikipedia.org/wiki/Logit">Logit function</a>, <span class="math display">\[g^{-1}(\pi_i) = \log\frac{\pi_i}{1-\pi_i}.\]</span> This function is known as a <em>link function</em>.</li>
-<li>For a standard regression we take, <span class="math display">\[f(\mathbf{ x}_i) = \mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i),\]</span></li>
-<li>For classification we perform a logistic regression. <span class="math display">\[\log \frac{\pi_i}{1-\pi_i} = \mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i)\]</span></li>
+<li>The <a href="http://en.wikipedia.org/wiki/Logit">Logit function</a>,
+<span class="math display">\[g^{-1}(\pi_i) =
+\log\frac{\pi_i}{1-\pi_i}.\]</span> This function is known as a <em>link
+function</em>.</li>
+<li>For a standard regression we take, <span
+class="math display">\[f(\mathbf{ x}_i) = \mathbf{ w}^\top \boldsymbol{
+\phi}(\mathbf{ x}_i),\]</span></li>
+<li>For classification we perform a logistic regression. <span
+class="math display">\[\log \frac{\pi_i}{1-\pi_i} = \mathbf{ w}^\top
+\boldsymbol{ \phi}(\mathbf{ x}_i)\]</span></li>
 </ul>
 </section>
 <section id="inverse-link-function" class="slide level2">
 <h2>Inverse Link Function</h2>
-<p>We have defined the link function as taking the form <span class="math inline">\(g^{-1}(\cdot)\)</span> implying that the inverse link function is given by <span class="math inline">\(g(\cdot)\)</span>. Since we have defined, <span class="math display">\[
-g^{-1}(\pi(\mathbf{ x})) = \mathbf{ w}^\top\boldsymbol{ \phi}(\mathbf{ x})
-\]</span> we can write <span class="math inline">\(\pi\)</span> in terms of the <em>inverse link</em> function, <span class="math inline">\(g(\cdot)\)</span> as <span class="math display">\[
+<p>We have defined the link function as taking the form <span
+class="math inline">\(g^{-1}(\cdot)\)</span> implying that the inverse
+link function is given by <span class="math inline">\(g(\cdot)\)</span>.
+Since we have defined, <span class="math display">\[
+g^{-1}(\pi(\mathbf{ x})) = \mathbf{ w}^\top\boldsymbol{ \phi}(\mathbf{
+x})
+\]</span> we can write <span class="math inline">\(\pi\)</span> in terms
+of the <em>inverse link</em> function, <span
+class="math inline">\(g(\cdot)\)</span> as <span class="math display">\[
 \pi(\mathbf{ x}) = g(\mathbf{ w}^\top\boldsymbol{ \phi}(\mathbf{ x})).
 \]</span></p>
 </section>
 <section id="logistic-function" class="slide level2">
 <h2>Logistic function</h2>
 <ul>
-<li><a href="http://en.wikipedia.org/wiki/Logistic_function">Logistic</a> (or sigmoid) squashes real line to between 0 &amp; 1. Sometimes also called a ‘squashing function’. <object class="svgplot " data="../slides/diagrams/ml/logistic.svg" width="" style=" "></object></li>
+<li><a
+href="http://en.wikipedia.org/wiki/Logistic_function">Logistic</a> (or
+sigmoid) squashes real line to between 0 &amp; 1. Sometimes also called
+a ‘squashing function’.
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/logistic.svg" width="" style=" "></object></li>
 </ul>
 </section>
 <section id="basis-function" class="slide level2">
@@ -1584,29 +1252,41 @@ <h2>Basis Function</h2>
 <section id="prediction-function" class="slide level2">
 <h2>Prediction Function</h2>
 <ul>
-<li>Can now write <span class="math inline">\(\pi\)</span> as a function of the input and the parameter vector as, <span class="math display">\[\pi(\mathbf{ x},\mathbf{ w}) = \frac{1}{1+
-\exp\left(-\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x})\right)}.\]</span></li>
-<li>Compute the output of a standard linear basis function composition (<span class="math inline">\(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x})\)</span>, as we did for linear regression)</li>
-<li>Apply the inverse link function, <span class="math inline">\(g(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}))\)</span>.</li>
-<li>Use this value in a Bernoulli distribution to form the likelihood.</li>
+<li>Can now write <span class="math inline">\(\pi\)</span> as a function
+of the input and the parameter vector as, <span
+class="math display">\[\pi(\mathbf{ x},\mathbf{ w}) = \frac{1}{1+
+\exp\left(-\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\right)}.\]</span></li>
+<li>Compute the output of a standard linear basis function composition
+(<span class="math inline">\(\mathbf{ w}^\top \boldsymbol{
+\phi}(\mathbf{ x})\)</span>, as we did for linear regression)</li>
+<li>Apply the inverse link function, <span
+class="math inline">\(g(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x}))\)</span>.</li>
+<li>Use this value in a Bernoulli distribution to form the
+likelihood.</li>
 </ul>
 </section>
 <section id="bernoulli-reminder" class="slide level2">
 <h2>Bernoulli Reminder</h2>
 <ul>
-<li><p>From last time <span class="math display">\[P(y_i|\mathbf{ w}, \mathbf{ x}) = \pi_i^{y_i} (1-\pi_i)^{1-y_i}\]</span></p></li>
+<li><p>From last time <span class="math display">\[P(y_i|\mathbf{ w},
+\mathbf{ x}) = \pi_i^{y_i} (1-\pi_i)^{1-y_i}\]</span></p></li>
 <li><p>Trick for switching betwen probabilities</p></li>
 </ul>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">def</span> bernoulli(y, pi):</span>
-<span id="cb1-2"><a href="#cb1-2"></a>    <span class="cf">if</span> y <span class="op">==</span> <span class="dv">1</span>:</span>
-<span id="cb1-3"><a href="#cb1-3"></a>        <span class="cf">return</span> pi</span>
-<span id="cb1-4"><a href="#cb1-4"></a>    <span class="cf">else</span>:</span>
-<span id="cb1-5"><a href="#cb1-5"></a><span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> bernoulli(y, pi):</span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> y <span class="op">==</span> <span class="dv">1</span>:</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> pi</span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">else</span>:</span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
 </section>
 <section id="maximum-likelihood" class="slide level2">
 <h2>Maximum Likelihood</h2>
 <ul>
-<li>Conditional independence of data: <span class="math display">\[P(\mathbf{ y}|\mathbf{ w}, \mathbf{X}) = \prod_{i=1}^nP(y_i|\mathbf{ w},
+<li>Conditional independence of data: <span
+class="math display">\[P(\mathbf{ y}|\mathbf{ w}, \mathbf{X}) =
+\prod_{i=1}^nP(y_i|\mathbf{ w},
 \mathbf{ x}_i). \]</span></li>
 </ul>
 </section>
@@ -1614,32 +1294,43 @@ <h2>Maximum Likelihood</h2>
 <h2>Log Likelihood</h2>
 <p><span class="math display">\[\begin{align*}
   \log P(\mathbf{ y}|\mathbf{ w}, \mathbf{X}) = &amp;
-  \sum_{i=1}^n\log P(y_i|\mathbf{ w}, \mathbf{ x}_i) \\ = &amp;\sum_{i=1}^ny_i \log
+  \sum_{i=1}^n\log P(y_i|\mathbf{ w}, \mathbf{ x}_i) \\ =
+&amp;\sum_{i=1}^ny_i \log
   \pi_i \\ &amp; + \sum_{i=1}^n(1-y_i)\log (1-\pi_i)
 \end{align*}\]</span></p>
 </section>
 <section id="objective-function" class="slide level2">
 <h2>Objective Function</h2>
 <ul>
-<li>Probability of positive outcome for the <span class="math inline">\(i\)</span>th data point <span class="math display">\[\pi_i = g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i)\right),\]</span> where <span class="math inline">\(g(\cdot)\)</span> is the <em>inverse</em> link function</li>
-<li>Objective function of the form <span class="math display">\[\begin{align*}
+<li>Probability of positive outcome for the <span
+class="math inline">\(i\)</span>th data point <span
+class="math display">\[\pi_i = g\left(\mathbf{ w}^\top \boldsymbol{
+\phi}(\mathbf{ x}_i)\right),\]</span> where <span
+class="math inline">\(g(\cdot)\)</span> is the <em>inverse</em> link
+function</li>
+<li>Objective function of the form <span
+class="math display">\[\begin{align*}
   E(\mathbf{ w}) = &amp; -  \sum_{i=1}^ny_i \log
-  g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i)\right) \\&amp; -
+  g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x}_i)\right)
+\\&amp; -
   \sum_{i=1}^n(1-y_i)\log \left(1-g\left(\mathbf{ w}^\top
   \boldsymbol{ \phi}(\mathbf{ x}_i)\right)\right).
- \end{align*}\]</span></li>
+\end{align*}\]</span></li>
 </ul>
 </section>
 <section id="minimize-objective" class="slide level2">
 <h2>Minimize Objective</h2>
 <ul>
-<li>Grdient wrt <span class="math inline">\(\pi(\mathbf{ x};\mathbf{ w})\)</span> <span class="math display">\[\begin{align*}
+<li>Grdient wrt <span class="math inline">\(\pi(\mathbf{ x};\mathbf{
+w})\)</span> <span class="math display">\[\begin{align*}
 \frac{\text{d}E(\mathbf{ w})}{\text{d}\mathbf{ w}} = &amp;
 -\sum_{i=1}^n\frac{y_i}{g\left(\mathbf{ w}^\top
-\boldsymbol{ \phi}(\mathbf{ x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
+\boldsymbol{ \phi}(\mathbf{
+x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
 \boldsymbol{ \phi}(\mathbf{ x}_i) \\ &amp; +  \sum_{i=1}^n
 \frac{1-y_i}{1-g\left(\mathbf{ w}^\top
-\boldsymbol{ \phi}(\mathbf{ x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
+\boldsymbol{ \phi}(\mathbf{
+x})\right)}\frac{\text{d}g(f_i)}{\text{d}f_i}
 \boldsymbol{ \phi}(\mathbf{ x}_i)
 \end{align*}\]</span></li>
 </ul>
@@ -1647,10 +1338,12 @@ <h2>Minimize Objective</h2>
 <section id="link-function-gradient" class="slide level2">
 <h2>Link Function Gradient</h2>
 <ul>
-<li>Also need gradient of inverse link function wrt parameters. <span class="math display">\[\begin{align*}
+<li>Also need gradient of inverse link function wrt parameters. <span
+class="math display">\[\begin{align*}
 g(f_i) &amp;= \frac{1}{1+\exp(-f_i)}\\
 &amp;=(1+\exp(-f_i))^{-1}
-\end{align*}\]</span> and the gradient can be computed as <span class="math display">\[\begin{align*}
+\end{align*}\]</span> and the gradient can be computed as <span
+class="math display">\[\begin{align*}
 \frac{\text{d}g(f_i)}{\text{d} f_i} &amp; =
 \exp(-f_i)(1+\exp(-f_i))^{-2}\\
 &amp; = \frac{1}{1+\exp(-f_i)}
@@ -1663,35 +1356,60 @@ <h2>Link Function Gradient</h2>
 <h2>Objective Gradient</h2>
 <p><span class="math display">\[\begin{align*}
 \frac{\text{d}E(\mathbf{ w})}{\text{d}\mathbf{ w}} = &amp; -\sum_{i=1}^n
-y_i\left(1-g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x})\right)\right)
+y_i\left(1-g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\right)\right)
 \boldsymbol{ \phi}(\mathbf{ x}_i) \\ &amp; + \sum_{i=1}^n
-(1-y_i)\left(g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{ x})\right)\right)
+(1-y_i)\left(g\left(\mathbf{ w}^\top \boldsymbol{ \phi}(\mathbf{
+x})\right)\right)
 \boldsymbol{ \phi}(\mathbf{ x}_i).
 \end{align*}\]</span></p>
 </section>
 <section id="optimization-of-the-function" class="slide level2">
 <h2>Optimization of the Function</h2>
 <ul>
-<li>Can’t find a stationary point of the objective function analytically.</li>
+<li>Can’t find a stationary point of the objective function
+analytically.</li>
 <li>Optimization has to proceed by <em>numerical methods</em>.
 <ul>
-<li><a href="http://en.wikipedia.org/wiki/Newton%27s_method">Newton’s method</a> or</li>
-<li><a href="http://en.wikipedia.org/wiki/Gradient_method">gradient based optimization methods</a></li>
+<li><a href="http://en.wikipedia.org/wiki/Newton%27s_method">Newton’s
+method</a> or</li>
+<li><a href="http://en.wikipedia.org/wiki/Gradient_method">gradient
+based optimization methods</a></li>
 </ul></li>
-<li>Similarly to matrix factorization, for large data <em>stochastic gradient descent</em> (Robbins Munro <span class="citation" data-cites="Robbins:stoch51">(Robbins and Monro, 1951)</span> optimization procedure) works well.</li>
+<li>Similarly to matrix factorization, for large data <em>stochastic
+gradient descent</em> (Robbins Munro <span class="citation"
+data-cites="Robbins:stoch51">(Robbins and Monro, 1951)</span>
+optimization procedure) works well.</li>
 </ul>
-<p>data.head()}</p>
+<!-- SECTION Nigeria NMIS Data -->
 </section>
-<section id="batch-gradient-descent" class="slide level2">
+<section id="nigeria-nmis-data" class="slide level2">
+<h2>Nigeria NMIS Data</h2>
+</section>
+<section id="nigeria-nmis-data-notebook" class="slide level2">
+<h2>Nigeria NMIS Data: Notebook</h2>
+<div class="figure">
+<div id="nigerian-health-facilities-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+</div>
+<aside class="notes">
+Location of the over thirty-four thousand health facilities registered
+in the NMIS data across Nigeria. Each facility plotted according to its
+latitude and longitude.
+</aside>
+</section>
+<section id="nigeria-nmis-data-classification" class="slide level2">
+<h2>Nigeria NMIS Data Classification</h2>
+</section>
+<section id="batch-gradient-descent" class="slide level2">
 <h2>Batch Gradient Descent</h2>
 </section>
 <section id="stochastic-gradient-descent" class="slide level2">
 <h2>Stochastic Gradient Descent</h2>
 </section>
-<section id="exercise-2" class="slide level2">
-<h2>Exercise 2</h2>
-<p>Now construct a stochastic gradient descent algorithm and run it on the data. Is it faster or slower than batch gradient descent? What can you do to improve convergence speed?</p>
-</section>
 <section id="regression" class="slide level2">
 <h2>Regression</h2>
 <ul>
@@ -1702,18 +1420,25 @@ <h2>Regression</h2>
 <section id="regression-examples" class="slide level2">
 <h2>Regression Examples</h2>
 <ul>
-<li>Predict a real value, <span class="math inline">\(y_i\)</span> given some inputs <span class="math inline">\(\mathbf{ x}_i\)</span>.</li>
-<li>Predict quality of meat given spectral measurements (Tecator data).</li>
-<li>Radiocarbon dating, the C14 calibration curve: predict age given quantity of C14 isotope.</li>
-<li>Predict quality of different Go or Backgammon moves given expert rated training data.</li>
+<li>Predict a real value, <span class="math inline">\(y_i\)</span> given
+some inputs <span class="math inline">\(\mathbf{ x}_i\)</span>.</li>
+<li>Predict quality of meat given spectral measurements (Tecator
+data).</li>
+<li>Radiocarbon dating, the C14 calibration curve: predict age given
+quantity of C14 isotope.</li>
+<li>Predict quality of different Go or Backgammon moves given expert
+rated training data.</li>
 </ul>
 </section>
 <section id="supervised-learning-challenges" class="slide level2">
 <h2>Supervised Learning Challenges</h2>
 <ol type="1">
-<li>choosing which features, <span class="math inline">\(\mathbf{ x}\)</span>, are relevant in the prediction</li>
-<li>defining the appropriate <em>class of function</em>, <span class="math inline">\(f(\cdot)\)</span>.</li>
-<li>selecting the right parameters, <span class="math inline">\(\mathbf{ w}\)</span>.</li>
+<li>choosing which features, <span class="math inline">\(\mathbf{
+x}\)</span>, are relevant in the prediction</li>
+<li>defining the appropriate <em>class of function</em>, <span
+class="math inline">\(f(\cdot)\)</span>.</li>
+<li>selecting the right parameters, <span class="math inline">\(\mathbf{
+w}\)</span>.</li>
 </ol>
 </section>
 <section id="feature-selection" class="slide level2">
@@ -1728,13 +1453,16 @@ <h2>Feature Selection</h2>
 <h2>Applications</h2>
 <ul>
 <li>rank search results, what adverts to show, newsfeed ranking</li>
-<li>Features: number of likes, image present, friendship relationship</li>
+<li>Features: number of likes, image present, friendship
+relationship</li>
 </ul>
 </section>
 <section id="class-of-function-fcdot" class="slide level2">
-<h2>Class of Function, <span class="math inline">\(f(\cdot)\)</span></h2>
+<h2>Class of Function, <span
+class="math inline">\(f(\cdot)\)</span></h2>
 <ul>
-<li>Mapping characteristic between <span class="math inline">\(\mathbf{ x}\)</span> and <span class="math inline">\(y\)</span>?
+<li>Mapping characteristic between <span class="math inline">\(\mathbf{
+x}\)</span> and <span class="math inline">\(y\)</span>?
 <ul>
 <li><em>smooth</em> (similar inputs lead to similar outputs).</li>
 <li>linear function.</li>
@@ -1742,17 +1470,34 @@ <h2>Class of Function, <span class="math inline">\(f(\cdot)\)</span></h2>
 </ul></li>
 </ul>
 </section>
-<section id="section-75" class="slide level2">
+<section id="section-7" class="slide level2">
 <h2></h2>
+<div class="centered" style="">
+<svg viewBox="0 0 200 200" style="width:15%">
+<defs> <clipPath id="clip3">
+<style>
+circle {
+  fill: black;
+}
+</style>
+<circle cx="100" cy="100" r="100"/> </clipPath> </defs>
+<title>
+Aki Vehtari
+</title>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/aki-vehtari.jpg" clip-path="url(#clip3)"/>
+</svg>
+</div>
 <div class="figure">
 <div id="bialik-friday-the-13th-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bialik-fridaythe13th-1.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bialik-fridaythe13th-1.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-This is a retrospective analysis of US births by Aki Vehtari. The challenges of forecasting. Even with seasonal and weekly effects removed there are significant effects on holidays, weekends, etc.
+This is a retrospective analysis of US births by Aki Vehtari. The
+challenges of forecasting. Even with seasonal and weekly effects removed
+there are significant effects on holidays, weekends, etc.
 </aside>
 </section>
 <section id="gelman-book" class="slide level2">
@@ -1763,12 +1508,12 @@ <h2>Gelman Book</h2>
 <tr>
 <td width="50%">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bda_cover_1.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover_1.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="50%">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bda_cover.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bda_cover.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 </tr>
@@ -1776,9 +1521,13 @@ <h2>Gelman Book</h2>
 </div>
 </div>
 <aside class="notes">
-Two different editions of Bayesian Data Analysis <span class="citation" data-cites="Gelman:bayesian13">(Gelman et al., 2013)</span>.
+Two different editions of Bayesian Data Analysis <span class="citation"
+data-cites="Gelman:bayesian13">(Gelman et al., 2013)</span>.
 </aside>
-<p><span style="text-align:right"><span class="citation" data-cites="Gelman:bayesian13">Gelman et al. (2013)</span></span></p>
+<div style="text-align:right">
+<span class="citation" data-cites="Gelman:bayesian13">Gelman et al.
+(2013)</span>
+</div>
 </section>
 <section id="class-of-function-neural-networks" class="slide level2">
 <h2>Class of Function: Neural Networks</h2>
@@ -1792,7 +1541,8 @@ <h2>Class of Function: Invariances</h2>
 <ul>
 <li>An invariance is a transformation of the input
 <ul>
-<li>e.g. a cat remains a cat regardless of location (translation), size (scale) or upside-down (rotation and reflection).</li>
+<li>e.g. a cat remains a cat regardless of location (translation), size
+(scale) or upside-down (rotation and reflection).</li>
 </ul></li>
 </ul>
 <!-- SECTION Deep Learning -->
@@ -1803,62 +1553,83 @@ <h2>Deep Learning</h2>
 <section id="deep-learning-1" class="slide level2">
 <h2>Deep Learning</h2>
 <ul>
-<li><p>These are interpretable models: vital for disease modeling etc.</p></li>
+<li><p>These are interpretable models: vital for disease modeling
+etc.</p></li>
 <li><p>Modern machine learning methods are less interpretable</p></li>
 <li><p>Example: face recognition</p></li>
 </ul>
 <!-- No slide titles in this context -->
 </section>
-<section id="deepface" class="slide level2">
-<h2>DeepFace</h2>
-<p><span class="fragment fade-in"><small>Outline of the DeepFace architecture. A front-end of a single convolution-pooling-convolution filtering on the rectified input, followed by three locally-connected layers and two fully-connected layers. Color illustrates feature maps produced at each layer. The net includes more than 120 million parameters, where more than 95% come from the local and fully connected.</small></span></p>
+<section id="section-8" class="slide level2">
+<h2></h2>
+<p><span class="fragment fade-in"><small>Outline of the DeepFace
+architecture. A front-end of a single convolution-pooling-convolution
+filtering on the rectified input, followed by three locally-connected
+layers and two fully-connected layers. Color illustrates feature maps
+produced at each layer. The net includes more than 120 million
+parameters, where more than 95% come from the local and fully
+connected.</small></span></p>
 <div class="figure">
 <div id="deep-face-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/deepface_neg.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//deepface_neg.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The DeepFace architecture <span class="citation" data-cites="Taigman:deepface14">(Taigman et al., 2014)</span>, visualized through colors to represent the functional mappings at each layer. There are 120 million parameters in the model.
+The DeepFace architecture <span class="citation"
+data-cites="Taigman:deepface14">(Taigman et al., 2014)</span>,
+visualized through colors to represent the functional mappings at each
+layer. There are 120 million parameters in the model.
 </aside>
-<p><span style="text-align:right"><small>Source: DeepFace <span class="citation" data-cites="Taigman:deepface14">(Taigman et al., 2014)</span></small></span></p>
+<div style="text-align:right">
+<small>Source: DeepFace <span class="citation"
+data-cites="Taigman:deepface14">(Taigman et al., 2014)</span></small>
+</div>
 </section>
-<section id="deep-learning-as-pinball" class="slide level2">
-<h2>Deep Learning as Pinball</h2>
+<section id="section-9" class="slide level2">
+<h2></h2>
 <div class="figure">
 <div id="early-pinball-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/576px-Early_Pinball.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//576px-Early_Pinball.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Deep learning models are composition of simple functions. We can think of a pinball machine as an analogy. Each layer of pins corresponds to one of the layers of functions in the model. Input data is represented by the location of the ball from left to right when it is dropped in from the top. Output class comes from the position of the ball as it leaves the pins at the bottom.
+Deep learning models are composition of simple functions. We can think
+of a pinball machine as an analogy. Each layer of pins corresponds to
+one of the layers of functions in the model. Input data is represented
+by the location of the ball from left to right when it is dropped in
+from the top. Output class comes from the position of the ball as it
+leaves the pins at the bottom.
 </aside>
 </section>
-<section id="section-76" class="slide level2">
+<section id="section-10" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="pinball-initialization-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/pinball001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//pinball001.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-At initialization, the pins, which represent the parameters of the function, aren’t in the right place to bring the balls to the correct decisions.
+At initialization, the pins, which represent the parameters of the
+function, aren’t in the right place to bring the balls to the correct
+decisions.
 </aside>
 </section>
-<section id="section-77" class="slide level2">
+<section id="section-11" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="pinball-trained-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/pinball002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//pinball002.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-After learning the pins are now in the right place to bring the balls to the correct decisions.
+After learning the pins are now in the right place to bring the balls to
+the correct decisions.
 </aside>
 </section>
 <section id="encoding-knowledge" class="slide level2">
@@ -1873,16 +1644,20 @@ <h2>Encoding Knowledge</h2>
 <section id="choosing-prediction-function" class="slide level2">
 <h2>Choosing Prediction Function</h2>
 <ul>
-<li>Any function e.g. polynomials for olympic data <span class="math display">\[
+<li>Any function e.g. polynomials for olympic data <span
+class="math display">\[
 f(x) = w_0 + w_1 x+ w_2 x^2 + w_3 x^3 + w_4 x^4.
 \]</span></li>
 </ul>
 </section>
-<section id="parameter-estimation-objective-functions" class="slide level2">
+<section id="parameter-estimation-objective-functions"
+class="slide level2">
 <h2>Parameter Estimation: Objective Functions</h2>
 <ul>
-<li>After choosing <em>features</em> and <em>function class</em> we need <em>parameters</em>.</li>
-<li>Estimate <span class="math inline">\(\mathbf{ w}\)</span> by specifying an <em>objective function</em>.</li>
+<li>After choosing <em>features</em> and <em>function class</em> we need
+<em>parameters</em>.</li>
+<li>Estimate <span class="math inline">\(\mathbf{ w}\)</span> by
+specifying an <em>objective function</em>.</li>
 </ul>
 </section>
 <section id="labels-and-squared-error" class="slide level2">
@@ -1897,8 +1672,17 @@ <h2>Labels and Squared Error</h2>
 <section id="data-provision" class="slide level2">
 <h2>Data Provision</h2>
 <ul>
-<li>Given <span class="math inline">\(n\)</span> inputs, <span class="math inline">\(\mathbf{ x}_1\)</span>, <span class="math inline">\(\mathbf{ x}_2\)</span>, <span class="math inline">\(\mathbf{ x}_3\)</span>, <span class="math inline">\(\dots\)</span>, <span class="math inline">\(\mathbf{ x}_n\)</span></li>
-<li>And labels <span class="math inline">\(y_1\)</span>, <span class="math inline">\(y_2\)</span>, <span class="math inline">\(y_3\)</span>, <span class="math inline">\(\dots\)</span>, <span class="math inline">\(y_n\)</span>.</li>
+<li>Given <span class="math inline">\(n\)</span> inputs, <span
+class="math inline">\(\mathbf{ x}_1\)</span>, <span
+class="math inline">\(\mathbf{ x}_2\)</span>, <span
+class="math inline">\(\mathbf{ x}_3\)</span>, <span
+class="math inline">\(\dots\)</span>, <span
+class="math inline">\(\mathbf{ x}_n\)</span></li>
+<li>And labels <span class="math inline">\(y_1\)</span>, <span
+class="math inline">\(y_2\)</span>, <span
+class="math inline">\(y_3\)</span>, <span
+class="math inline">\(\dots\)</span>, <span
+class="math inline">\(y_n\)</span>.</li>
 <li>Sometimes label is cheap e.g. Newsfeed ranking</li>
 <li>Often it is very expensive.
 <ul>
@@ -1911,7 +1695,8 @@ <h2>Annotation</h2>
 <ul>
 <li>Human annotators
 <ul>
-<li>E.g. in ImageNet annotated using Amazon’s Mechanical Turk. (AI?)</li>
+<li>E.g. in ImageNet annotated using Amazon’s Mechanical Turk.
+(AI?)</li>
 </ul></li>
 <li>Without humans no AI.</li>
 <li>Not real intelligence, emulated</li>
@@ -1964,13 +1749,15 @@ <h2>Difficult Trap</h2>
 <li>Validation data is different from test data.</li>
 </ul>
 </section>
-<section id="hold-out-validation-on-olympic-marathon-data" class="slide level2">
+<section id="hold-out-validation-on-olympic-marathon-data"
+class="slide level2">
 <h2>Hold Out Validation on Olympic Marathon Data</h2>
 </section>
 <section id="overfitting" class="slide level2">
 <h2>Overfitting</h2>
 <ul>
-<li>Increase number of basis functions we obtain a better ‘fit’ to the data.</li>
+<li>Increase number of basis functions we obtain a better ‘fit’ to the
+data.</li>
 <li>How will the model perform on previously unseen data?</li>
 <li>Let’s consider predicting the future.</li>
 </ul>
@@ -1980,82 +1767,63 @@ <h2>Future Prediction: Extrapolation</h2>
 <script>
 showDivs(1, 'olympic_val_extra_LM_polynomial_number');
 </script>
-<p><small></small> <input id="range-olympic_val_extra_LM_polynomial_number" type="range" min="1" max="11" value="1" onchange="setDivs('olympic_val_extra_LM_polynomial_number')" oninput="setDivs('olympic_val_extra_LM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_val_extra_LM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_val_extra_LM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-78" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number001.svg" width="80%" style=" ">
+<p><small></small>
+<input id="range-olympic_val_extra_LM_polynomial_number" type="range" min="1" max="11" value="1" onchange="setDivs('olympic_val_extra_LM_polynomial_number')" oninput="setDivs('olympic_val_extra_LM_polynomial_number')">
+<button onclick="plusDivs(-1, 'olympic_val_extra_LM_polynomial_number')">❮</button>
+<button onclick="plusDivs(1, 'olympic_val_extra_LM_polynomial_number')">❯</button></p>
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-79" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number002.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-80" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number003.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-81" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number004.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-82" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number005.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-83" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number006.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-84" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number007.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-85" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number008.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-86" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number009.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-87" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number010.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-88" class="slide level2">
-<h2></h2>
-<div class="olympic_val_extra_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_extra_LM_polynomial_number011.svg" width="80%" style=" ">
+<div class="olympic_val_extra_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_extra_LM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -2067,17 +1835,21 @@ <h2>Extrapolation</h2>
 <li>Extrapolation is predicting into the future here, but could be:
 <ul>
 <li>Predicting back to the unseen past (pre 1892)</li>
-<li>Spatial prediction (e.g. Cholera rates outside Manchester given rates inside Manchester).</li>
+<li>Spatial prediction (e.g. Cholera rates outside Manchester given
+rates inside Manchester).</li>
 </ul></li>
 </ul>
 </section>
 <section id="interpolation" class="slide level2">
 <h2>Interpolation</h2>
 <ul>
-<li>Predicting the wining time for 1946 Olympics is <em>interpolation</em>.</li>
+<li>Predicting the wining time for 1946 Olympics is
+<em>interpolation</em>.</li>
 <li>This is because we have times from 1936 and 1948.</li>
-<li>If we want a model for <em>interpolation</em> how can we test it?</li>
-<li>One trick is to sample the validation set from throughout the data set.</li>
+<li>If we want a model for <em>interpolation</em> how can we test
+it?</li>
+<li>One trick is to sample the validation set from throughout the data
+set.</li>
 </ul>
 </section>
 <section id="future-prediction-interpolation" class="slide level2">
@@ -2085,122 +1857,103 @@ <h2>Future Prediction: Interpolation</h2>
 <script>
 showDivs(1, 'olympic_val_inter_LM_polynomial_number');
 </script>
-<p><small></small> <input id="range-olympic_val_inter_LM_polynomial_number" type="range" min="1" max="11" value="1" onchange="setDivs('olympic_val_inter_LM_polynomial_number')" oninput="setDivs('olympic_val_inter_LM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_val_inter_LM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_val_inter_LM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-89" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number001.svg" width="80%" style=" ">
+<p><small></small>
+<input id="range-olympic_val_inter_LM_polynomial_number" type="range" min="1" max="11" value="1" onchange="setDivs('olympic_val_inter_LM_polynomial_number')" oninput="setDivs('olympic_val_inter_LM_polynomial_number')">
+<button onclick="plusDivs(-1, 'olympic_val_inter_LM_polynomial_number')">❮</button>
+<button onclick="plusDivs(1, 'olympic_val_inter_LM_polynomial_number')">❯</button></p>
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-90" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number002.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-91" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number003.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-92" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number004.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-93" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number005.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-94" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number006.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-95" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number007.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-96" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number008.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-97" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number009.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-98" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number010.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-99" class="slide level2">
-<h2></h2>
-<div class="olympic_val_inter_LM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_inter_LM_polynomial_number011.svg" width="80%" style=" ">
+<div class="olympic_val_inter_LM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_inter_LM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="choice-of-validation-set" class="slide level2">
 <h2>Choice of Validation Set</h2>
 <ul>
-<li>The choice of validation set should reflect how you will use the model in practice.</li>
-<li>For extrapolation into the future we tried validating with data from the future.</li>
+<li>The choice of validation set should reflect how you will use the
+model in practice.</li>
+<li>For extrapolation into the future we tried validating with data from
+the future.</li>
 <li>For interpolation we chose validation set from data.</li>
 <li>For different validation sets we could get different results.</li>
 </ul>
 </section>
-<section id="exercise-3" class="slide level2">
-<h2>Exercise 3</h2>
-<p>For both the linear and quadratic models, fit the model to the data up until 1980 and then compute the error on the held out data (from 1980 onwards). Which model performs better on the validation data?</p>
-</section>
-<section id="exercise-4" class="slide level2">
-<h2>Exercise 4</h2>
-<p>Now we are going to build a more sophisticated form of basis function, one that can accept arguments to its inputs (similar to those we used in <a href="./week4.ipynb">this lab</a>). Here we will start with a polynomial basis.</p>
-<pre><code>def polynomial(x, degree, loc, scale):
-    degrees =np.arange(degree+1)
-    return ((x-loc)/scale)**degrees</code></pre>
-<p>The basis as we’ve defined it has three arguments as well as the input. The degree of the polynomial, the scale of the polynomial and the offset. These arguments need to be passed to the basis functions whenever they are called. Modify your code to pass these additional arguments to the python function for creating the basis. Do this for each of your functions <code>predict</code>, <code>fit</code> and <code>objective</code>. You will find <code>*args</code> (or <code>**kwargs</code>) useful.</p>
-<p>Write code that tries to fit different models to the data with polynomial basis. Use a maximum degree for your basis from 0 to 17. For each polynomial store the <em>hold out validation error</em> and the <em>training error</em>. When you have finished the computation plot the hold out error for your models and the training error for your p. When computing your polynomial basis use <code>offset=1956.</code> and <code>scale=120.</code> to ensure that the data is mapped (roughly) to the -1, 1 range.</p>
-<p>Which polynomial has the minimum training error? Which polynomial has the minimum validation error?</p>
-</section>
 <section id="bias-variance-decomposition" class="slide level2">
 <h2>Bias Variance Decomposition</h2>
-<p>Generalisation error <span class="math display">\[
-\mathbb{E}\left[ \left(y- f^*(\mathbf{ y})\right)^2 \right].
-\]</span> Decompose as <span class="math display">\[
-\mathbb{E}\left[ \left(y- f(\mathbf{ y})\right)^2 \right] = \text{bias}\left[f^*(\mathbf{ y})\right]^2 + \text{variance}\left[f^*(\mathbf{ y})\right] +\sigma^2,
+<p>Generalisation error <span class="math display">\[\begin{align*}
+R(\mathbf{ w}) = &amp; \int \left(y- f^*(\mathbf{ x})\right)^2
+\mathbb{P}(y, \mathbf{ x}) \text{d}y\text{d}\mathbf{ x}\\
+&amp; \triangleq \mathbb{E}\left[ \left(y- f^*(\mathbf{ x})\right)^2
+\right].
+\end{align*}\]</span></p>
+</section>
+<section id="decompose" class="slide level2">
+<h2>Decompose</h2>
+<p>Decompose as <span class="math display">\[
+\begin{align*}
+\mathbb{E}\left[ \left(y- f(\mathbf{ x})\right)^2 \right] = &amp;
+\text{bias}\left[f^*(\mathbf{ x})\right]^2 \\
+&amp; + \text{variance}\left[f^*(\mathbf{ x})\right] \\ \\
+&amp;+\sigma^2,
+\end{align*}
 \]</span></p>
 </section>
 <section id="bias" class="slide level2">
 <h2>Bias</h2>
 <ul>
 <li><p>Given by <span class="math display">\[
-\text{bias}\left[f^*(\mathbf{ y})\right] =
-\mathbb{E}\left[f^*(\mathbf{ y})\right] * f(\mathbf{ y})
+\text{bias}\left[f^*(\mathbf{ x})\right] =
+\mathbb{E}\left[f^*(\mathbf{ x})\right] - f(\mathbf{ x})
 \]</span></p></li>
 <li><p>Error due to bias comes from a model that’s too simple.</p></li>
 </ul>
@@ -2209,13 +1962,16 @@ <h2>Bias</h2>
 <h2>Variance</h2>
 <ul>
 <li><p>Given by <span class="math display">\[
-\text{variance}\left[f^*(\mathbf{ y})\right] = \mathbb{E}\left[\left(f^*(\mathbf{ y}) - \mathbb{E}\left[f^*(\mathbf{ y})\right]\right)^2\right].
+\text{variance}\left[f^*(\mathbf{ x})\right] =
+\mathbb{E}\left[\left(f^*(\mathbf{ x}) - \mathbb{E}\left[f^*(\mathbf{
+x})\right]\right)^2\right].
 \]</span></p></li>
-<li><p>Slight variations in the training set cause changes in the prediction. Error due to variance is error in the model due to an overly complex model.</p></li>
+<li><p>Slight variations in the training set cause changes in the
+prediction. Error due to variance is error in the model due to an overly
+complex model.</p></li>
 </ul>
-<p>.</p>
 </section>
-<section id="section-100" class="slide level2">
+<section id="section-12" class="slide level2">
 <h2></h2>
 <!--
  
@@ -2227,225 +1983,148 @@ <h2></h2>
 <button onclick="plusDivs(-1, 'bias-variance-plots')">❮</button>
 <button onclick="plusDivs(1, 'bias-variance-plots')">❯</button>
  
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance001.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance002.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance003.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance004.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance005.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance006.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance007.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance008.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance009.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
-
-##   {}
-
-
-
-<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="../slides/diagrams/ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
-
-
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance001.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance002.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance003.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance004.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance005.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance006.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance007.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance008.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance009.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
+<div style="text-align:center;" class="bias-variance-plots"><div class="centered " style=""><img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle"></div></div>
 
 
 <div class="caption" style="">Figure: simple models on left complex models on right</div>
 -->
 </section>
-<section id="section-101" class="slide level2">
+<section id="section-13" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance000.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-102" class="slide level2">
+<section id="section-14" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance001.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance001.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-103" class="slide level2">
+<section id="section-15" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance002.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance002.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-104" class="slide level2">
+<section id="section-16" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance003.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance003.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-105" class="slide level2">
+<section id="section-17" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance004.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance004.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-106" class="slide level2">
+<section id="section-18" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance005.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance005.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-107" class="slide level2">
+<section id="section-19" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance006.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance006.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-108" class="slide level2">
+<section id="section-20" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance007.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance007.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-109" class="slide level2">
+<section id="section-21" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance008.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance008.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-110" class="slide level2">
+<section id="section-22" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance009.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance009.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
 </section>
-<section id="section-111" class="slide level2">
+<section id="section-23" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="bias-variance-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bias-variance010.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -2456,7 +2135,7 @@ <h2></h2>
 <h2>Overfitting</h2>
 <div class="figure">
 <div id="alex-ihler-overfitting-figure" class="figure-frame">
-<iframe width="800" height="600" src="https://www.youtube.com/embed/py8QrZPT48s?start=4m0s" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
+<iframe width="600" height="450" src="https://www.youtube.com/embed/py8QrZPT48s?start=4m0s" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
 </div>
 </div>
@@ -2465,192 +2144,118 @@ <h2>Overfitting</h2>
 </aside>
 <p><em>Alex Ihler on Polynomials and Overfitting</em></p>
 </section>
-<section id="olympic-data-with-bayesian-polynomials" class="slide level2">
+<section id="olympic-data-with-bayesian-polynomials"
+class="slide level2">
 <h2>Olympic Data with Bayesian Polynomials</h2>
 <script>
 showDivs(1, 'olympic_BLM_polynomial_number');
 </script>
-<p><small></small> <input id="range-olympic_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_BLM_polynomial_number')" oninput="setDivs('olympic_BLM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_BLM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_BLM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-112" class="slide level2">
-<h2></h2>
+<p><small></small>
+<input id="range-olympic_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_BLM_polynomial_number')" oninput="setDivs('olympic_BLM_polynomial_number')">
+<button onclick="plusDivs(-1, 'olympic_BLM_polynomial_number')">❮</button>
+<button onclick="plusDivs(1, 'olympic_BLM_polynomial_number')">❯</button></p>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-113" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-114" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number003.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-115" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-116" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number005.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-117" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number006.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-118" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number007.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-119" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number008.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-120" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number009.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-121" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number010.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-122" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number011.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-123" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number012.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number012.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-124" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number013.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number013.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-125" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number014.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number014.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-126" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number015.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number015.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-127" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number016.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number016.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-128" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number017.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number017.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-129" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number018.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number018.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-130" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number019.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number019.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-131" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number020.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number020.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-132" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number021.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number021.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-133" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number022.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number022.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-134" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number023.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number023.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-135" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number024.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number024.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-136" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number025.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number025.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-137" class="slide level2">
-<h2></h2>
 <div class="olympic_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_BLM_polynomial_number026.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -2659,187 +2264,138 @@ <h2>Hold Out Validation</h2>
 <script>
 showDivs(1, 'olympic_val_BLM_polynomial_number');
 </script>
-<p><small></small> <input id="range-olympic_val_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_val_BLM_polynomial_number')" oninput="setDivs('olympic_val_BLM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_val_BLM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_val_BLM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-138" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number001.svg" width="80%" style=" ">
+<p><small></small>
+<input id="range-olympic_val_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_val_BLM_polynomial_number')" oninput="setDivs('olympic_val_BLM_polynomial_number')">
+<button onclick="plusDivs(-1, 'olympic_val_BLM_polynomial_number')">❮</button>
+<button onclick="plusDivs(1, 'olympic_val_BLM_polynomial_number')">❯</button></p>
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-139" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number002.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-140" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number003.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-141" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number004.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-142" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number005.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-143" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number006.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-144" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number007.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-145" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number008.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-146" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number009.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-147" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number010.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-148" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number011.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-149" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number012.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number012.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-150" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number013.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number013.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-151" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number014.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number014.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-152" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number015.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number015.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-153" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number016.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number016.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-154" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number017.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number017.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-155" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number018.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number018.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-156" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number019.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number019.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-157" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number020.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number020.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-158" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number021.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number021.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-159" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number022.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number022.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-160" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number023.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number023.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-161" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number024.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number024.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-162" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number025.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number025.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-163" class="slide level2">
-<h2></h2>
-<div class="olympic_val_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_val_BLM_polynomial_number026.svg" width="80%" style=" ">
+<div class="olympic_val_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_val_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -2848,187 +2404,138 @@ <h2>5-fold Cross Validation</h2>
 <script>
 showDivs(1, 'olympic_5cv05_BLM_polynomial_number');
 </script>
-<p><small></small> <input id="range-olympic_5cv05_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_5cv05_BLM_polynomial_number')" oninput="setDivs('olympic_5cv05_BLM_polynomial_number')"> <button onclick="plusDivs(-1, 'olympic_5cv05_BLM_polynomial_number')">❮</button> <button onclick="plusDivs(1, 'olympic_5cv05_BLM_polynomial_number')">❯</button></p>
-</section>
-<section id="section-164" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number001.svg" width="80%" style=" ">
+<p><small></small>
+<input id="range-olympic_5cv05_BLM_polynomial_number" type="range" min="1" max="26" value="1" onchange="setDivs('olympic_5cv05_BLM_polynomial_number')" oninput="setDivs('olympic_5cv05_BLM_polynomial_number')">
+<button onclick="plusDivs(-1, 'olympic_5cv05_BLM_polynomial_number')">❮</button>
+<button onclick="plusDivs(1, 'olympic_5cv05_BLM_polynomial_number')">❯</button></p>
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number001.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-165" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number002.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number002.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-166" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number003.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number003.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-167" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number004.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number004.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-168" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number005.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number005.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-169" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number006.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number006.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-170" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number007.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number007.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-171" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number008.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number008.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-172" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number009.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number009.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-173" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number010.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number010.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-174" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number011.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number011.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-175" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number012.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number012.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-176" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number013.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number013.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-177" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number014.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number014.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-178" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number015.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number015.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-179" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number016.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number016.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-180" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number017.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number017.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-181" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number018.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number018.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-182" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number019.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number019.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-183" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number020.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number020.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-184" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number021.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number021.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-185" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number022.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number022.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-186" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number023.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number023.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-187" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number024.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number024.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-188" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number025.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number025.svg" width="80%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-189" class="slide level2">
-<h2></h2>
-<div class="olympic_5cv05_BLM_polynomial_number" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/olympic_5cv05_BLM_polynomial_number026.svg" width="80%" style=" ">
+<div class="olympic_5cv05_BLM_polynomial_number"
+style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/olympic_5cv05_BLM_polynomial_number026.svg" width="80%" style=" ">
 </object>
 </div>
 <!-- Leave unsupervised and reinforcement learning in the notes -->
@@ -3036,54 +2543,113 @@ <h2></h2>
 <section id="thanks" class="slide level2 scrollable">
 <h2 class="scrollable">Thanks!</h2>
 <ul>
-<li><p>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
-<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></p></li>
-<li><p>Guardian article on <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How African can benefit from the data revolution</a></p></li>
-<li><p>Guardian article on <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">Data Science Africa</a></p></li>
-<li><p>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></p></li>
+<li><p>twitter: <a
+href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
+<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></p></li>
+<li><p>Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How
+African can benefit from the data revolution</a></p></li>
+<li><p>Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">Data
+Science Africa</a></p></li>
+<li><p>blog posts:</p>
+<p><a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a></p></li>
 </ul>
 </section>
 <section id="references" class="slide level2 unnumbered scrollable">
 <h2 class="unnumbered scrollable">References</h2>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Andrade:consistent14">
-<p>Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014. Consistent mapping of government malaria records across a changing territory delimitation. Malaria Journal 13. <a href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a></p>
-</div>
-<div id="ref-Gelman:bayesian13">
-<p>Gelman, A., Carlin, J.B., Stern, H.S., Rubin, D.B., 2013. Bayesian data analysis, 3rd ed. Chapman; Hall.</p>
-</div>
-<div id="ref-McCulloch:neuron43">
-<p>McCulloch, W.S., Pitts, W., 1943. A logical calculus of the ideas immanent in nervous activity. Bulletin of Mathematical Biophysics 5, 115–133.</p>
-</div>
-<div id="ref-Mubangizi:malaria14">
-<p>Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence, N.D., 2014. Malaria surveillance with multiple data sources using Gaussian process models, in: 1st International Conference on the Use of Mobile ICT in Africa.</p>
-</div>
-<div id="ref-Robbins:stoch51">
-<p>Robbins, H., Monro, S., 1951. A stochastic approximation method. Annals of Mathematical Statistics 22, 400–407.</p>
-</div>
-<div id="ref-Russakovsky-imagenet15">
-<p>Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., Berg, A.C., Fei-Fei, L., 2015. ImageNet Large Scale Visual Recognition Challenge. International Journal of Computer Vision (IJCV) 115, 211–252. <a href="https://doi.org/10.1007/s11263-015-0816-y">https://doi.org/10.1007/s11263-015-0816-y</a></p>
-</div>
-<div id="ref-Taigman:deepface14">
-<p>Taigman, Y., Yang, M., Ranzato, M., Wolf, L., 2014. DeepFace: Closing the gap to human-level performance in face verification, in: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition. <a href="https://doi.org/10.1109/CVPR.2014.220">https://doi.org/10.1109/CVPR.2014.220</a></p>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Andrade:consistent14" class="csl-entry" role="listitem">
+Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014.
+Consistent mapping of government malaria records across a changing
+territory delimitation. Malaria Journal 13. <a
+href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>
+</div>
+<div id="ref-Gelman:bayesian13" class="csl-entry" role="listitem">
+Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., Rubin,
+D.B., 2013. Bayesian data analysis, 3rd ed. Chapman; Hall.
+</div>
+<div id="ref-McCulloch:neuron43" class="csl-entry" role="listitem">
+McCulloch, W.S., Pitts, W., 1943. A logical calculus of the ideas
+immanent in nervous activity. Bulletin of Mathematical Biophysics 5,
+115–133. <a
+href="https://doi.org/10.1007/BF02478259">https://doi.org/10.1007/BF02478259</a>
+</div>
+<div id="ref-Mubangizi:malaria14" class="csl-entry" role="listitem">
+Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence,
+N.D., 2014. Malaria surveillance with multiple data sources using
+<span>Gaussian</span> process models, in: 1st International Conference
+on the Use of Mobile <span>ICT</span> in Africa.
+</div>
+<div id="ref-Robbins:stoch51" class="csl-entry" role="listitem">
+Robbins, H., Monro, S., 1951. A stochastic approximation method. Annals
+of Mathematical Statistics 22, 400–407.
+</div>
+<div id="ref-Russakovsky-imagenet15" class="csl-entry" role="listitem">
+Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S.,
+Huang, Z., Karpathy, A., Khosla, A., Bernstein, M., Berg, A.C., Fei-Fei,
+L., 2015. <span>ImageNet Large Scale Visual Recognition
+Challenge</span>. International Journal of Computer Vision (IJCV) 115,
+211–252. <a
+href="https://doi.org/10.1007/s11263-015-0816-y">https://doi.org/10.1007/s11263-015-0816-y</a>
+</div>
+<div id="ref-Taigman:deepface14" class="csl-entry" role="listitem">
+Taigman, Y., Yang, M., Ranzato, M., Wolf, L., 2014.
+<span>DeepFace</span>: Closing the gap to human-level performance in
+face verification, in: Proceedings of the <span>IEEE</span> Computer
+Society Conference on Computer Vision and Pattern Recognition. <a
+href="https://doi.org/10.1109/CVPR.2014.220">https://doi.org/10.1109/CVPR.2014.220</a>
 </div>
 </div>
 </section>
     </div>
   </div>
 
-  <script src="reveal.js/lib/js/head.min.js"></script>
-  <script src="reveal.js/js/reveal.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/head.min.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/js/reveal.js"></script>
 
   <script>
 
       // Full list of configuration options available at:
       // https://github.com/hakimel/reveal.js#configuration
       Reveal.initialize({
+        // Display controls in the bottom right corner
+        controls: true,
+        // Display a presentation progress bar
+        progress: true,
         // Push each slide change to the browser history
         history: true,
+        // Enable keyboard shortcuts for navigation
+        keyboard: true,
+        // Enable the slide overview mode
+        overview: true,
+        // Vertical centering of slides
+        center: true,
+        // Enables touch navigation on devices with touch input
+        touch: true,
+        // Turns fragments on and off globally
+        fragments: true,
+        // Flags if we should show a help overlay when the questionmark
+        // key is pressed
+        help: true,
+        // Number of milliseconds between automatically proceeding to the
+        // next slide, disabled when set to 0, this value can be overwritten
+        // by using a data-autoslide attribute on your slides
+        autoSlide: 0,
+        // Stop auto-sliding after user input
+        autoSlideStoppable: true,
         // Transition style
         transition: 'None', // none/fade/slide/convex/concave/zoom
+        // Transition speed
+        transitionSpeed: 'default', // default/fast/slow
+        // Transition style for full page slide backgrounds
+        backgroundTransition: 'fade', // none/fade/slide/convex/concave/zoom
+        // Number of slides away from the current that are visible
+        viewDistance: 3,
         math: {
           mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js',
           config: 'TeX-AMS_HTML-full',
@@ -3103,10 +2669,10 @@ <h2 class="unnumbered scrollable">References</h2>
 
         // Optional reveal.js plugins
         dependencies: [
-          { src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
-          { src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
-          { src: 'reveal.js/plugin/math/math.js', async: true },
-          { src: 'reveal.js/plugin/notes/notes.js', async: true }
+          { src: 'https://unpkg.com/reveal.js@3.9.2/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/zoom-js/zoom.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/math/math.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/notes/notes.js', async: true }
         ]
       });
     </script>
diff --git a/slides/02-ml-systems.slides.html b/slides/02-ml-systems.slides.html
index c760ad8..5434690 100644
--- a/slides/02-ml-systems.slides.html
+++ b/slides/02-ml-systems.slides.html
@@ -8,25 +8,25 @@
   <meta name="apple-mobile-web-app-capable" content="yes">
   <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
   <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
-  <link rel="stylesheet" href="reveal.js/css/reveal.css">
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/reveal.css">
   <style type="text/css">
       code{white-space: pre-wrap;}
       span.smallcaps{font-variant: small-caps;}
       span.underline{text-decoration: underline;}
       div.column{display: inline-block; vertical-align: top; width: 50%;}
   </style>
-  <link rel="stylesheet" href="reveal.js/css/theme/black.css" id="theme">
-  <link rel="stylesheet" href="talks.css"/>
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/theme/black.css" id="theme">
+  <link rel="stylesheet" href="https://inverseprobability.com/assets/css/talks.css"/>
   <!-- Printing and PDF exports -->
   <script>
     var link = document.createElement( 'link' );
     link.rel = 'stylesheet';
     link.type = 'text/css';
-    link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
+    link.href = window.location.search.match( /print-pdf/gi ) ? 'https://unpkg.com/reveal.js@3.9.2/css/print/pdf.css' : 'https://unpkg.com/reveal.js@3.9.2/css/print/paper.css';
     document.getElementsByTagName( 'head' )[0].appendChild( link );
   </script>
   <!--[if lt IE 9]>
-  <script src="reveal.js/lib/js/html5shiv.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/html5shiv.js"></script>
   <![endif]-->
   <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_SVG" type="text/javascript"></script>
   <script type="text/x-mathjax-config">
@@ -36,299 +36,9 @@
       }
     });
   </script>
-  <script src="http://inverseprobability.com/talks/assets/js/figure-animate.js"></script>
+  <script src="../assets/js/figure-animate.js"></script>
 </head>
 <body>
-\[\newcommand{\tk}[1]{}
-\newcommand{\Amatrix}{\mathbf{A}}
-\newcommand{\KL}[2]{\text{KL}\left( #1\,\|\,#2 \right)}
-\newcommand{\Kaast}{\kernelMatrix_{\mathbf{ \ast}\mathbf{ \ast}}}
-\newcommand{\Kastu}{\kernelMatrix_{\mathbf{ \ast} \inducingVector}}
-\newcommand{\Kff}{\kernelMatrix_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\Kfu}{\kernelMatrix_{\mappingFunctionVector \inducingVector}}
-\newcommand{\Kuast}{\kernelMatrix_{\inducingVector \bf\ast}}
-\newcommand{\Kuf}{\kernelMatrix_{\inducingVector \mappingFunctionVector}}
-\newcommand{\Kuu}{\kernelMatrix_{\inducingVector \inducingVector}}
-\newcommand{\Kuui}{\Kuu^{-1}}
-\newcommand{\Qaast}{\mathbf{Q}_{\bf \ast \ast}}
-\newcommand{\Qastf}{\mathbf{Q}_{\ast \mappingFunction}}
-\newcommand{\Qfast}{\mathbf{Q}_{\mappingFunctionVector \bf \ast}}
-\newcommand{\Qff}{\mathbf{Q}_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\aMatrix}{\mathbf{A}}
-\newcommand{\aScalar}{a}
-\newcommand{\aVector}{\mathbf{a}}
-\newcommand{\acceleration}{a}
-\newcommand{\bMatrix}{\mathbf{B}}
-\newcommand{\bScalar}{b}
-\newcommand{\bVector}{\mathbf{b}}
-\newcommand{\basisFunc}{\phi}
-\newcommand{\basisFuncVector}{\boldsymbol{ \basisFunc}}
-\newcommand{\basisFunction}{\phi}
-\newcommand{\basisLocation}{\mu}
-\newcommand{\basisMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\basisScalar}{\basisFunction}
-\newcommand{\basisVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\activationFunction}{\phi}
-\newcommand{\activationMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\activationScalar}{\basisFunction}
-\newcommand{\activationVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\bigO}{\mathcal{O}}
-\newcommand{\binomProb}{\pi}
-\newcommand{\cMatrix}{\mathbf{C}}
-\newcommand{\cbasisMatrix}{\hat{\boldsymbol{ \Phi}}}
-\newcommand{\cdataMatrix}{\hat{\dataMatrix}}
-\newcommand{\cdataScalar}{\hat{\dataScalar}}
-\newcommand{\cdataVector}{\hat{\dataVector}}
-\newcommand{\centeredKernelMatrix}{\mathbf{ \MakeUppercase{\centeredKernelScalar}}}
-\newcommand{\centeredKernelScalar}{b}
-\newcommand{\centeredKernelVector}{\centeredKernelScalar}
-\newcommand{\centeringMatrix}{\mathbf{H}}
-\newcommand{\chiSquaredDist}[2]{\chi_{#1}^{2}\left(#2\right)}
-\newcommand{\chiSquaredSamp}[1]{\chi_{#1}^{2}}
-\newcommand{\conditionalCovariance}{\boldsymbol{ \Sigma}}
-\newcommand{\coregionalizationMatrix}{\mathbf{B}}
-\newcommand{\coregionalizationScalar}{b}
-\newcommand{\coregionalizationVector}{\mathbf{ \coregionalizationScalar}}
-\newcommand{\covDist}[2]{\text{cov}_{#2}\left(#1\right)}
-\newcommand{\covSamp}[1]{\text{cov}\left(#1\right)}
-\newcommand{\covarianceScalar}{c}
-\newcommand{\covarianceVector}{\mathbf{ \covarianceScalar}}
-\newcommand{\covarianceMatrix}{\mathbf{C}}
-\newcommand{\covarianceMatrixTwo}{\boldsymbol{ \Sigma}}
-\newcommand{\croupierScalar}{s}
-\newcommand{\croupierVector}{\mathbf{ \croupierScalar}}
-\newcommand{\croupierMatrix}{\mathbf{ \MakeUppercase{\croupierScalar}}}
-\newcommand{\dataDim}{p}
-\newcommand{\dataIndex}{i}
-\newcommand{\dataIndexTwo}{j}
-\newcommand{\dataMatrix}{\mathbf{Y}}
-\newcommand{\dataScalar}{y}
-\newcommand{\dataSet}{\mathcal{D}}
-\newcommand{\dataStd}{\sigma}
-\newcommand{\dataVector}{\mathbf{ \dataScalar}}
-\newcommand{\decayRate}{d}
-\newcommand{\degreeMatrix}{\mathbf{ \MakeUppercase{\degreeScalar}}}
-\newcommand{\degreeScalar}{d}
-\newcommand{\degreeVector}{\mathbf{ \degreeScalar}}
-\newcommand{\diag}[1]{\text{diag}\left(#1\right)}
-\newcommand{\diagonalMatrix}{\mathbf{D}}
-\newcommand{\diff}[2]{\frac{\text{d}#1}{\text{d}#2}}
-\newcommand{\diffTwo}[2]{\frac{\text{d}^2#1}{\text{d}#2^2}}
-\newcommand{\displacement}{x}
-\newcommand{\displacementVector}{\textbf{\displacement}}
-\newcommand{\distanceMatrix}{\mathbf{ \MakeUppercase{\distanceScalar}}}
-\newcommand{\distanceScalar}{d}
-\newcommand{\distanceVector}{\mathbf{ \distanceScalar}}
-\newcommand{\eigenvaltwo}{\ell}
-\newcommand{\eigenvaltwoMatrix}{\mathbf{L}}
-\newcommand{\eigenvaltwoVector}{\mathbf{l}}
-\newcommand{\eigenvalue}{\lambda}
-\newcommand{\eigenvalueMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\eigenvalueVector}{\boldsymbol{ \lambda}}
-\newcommand{\eigenvector}{\mathbf{ \eigenvectorScalar}}
-\newcommand{\eigenvectorMatrix}{\mathbf{U}}
-\newcommand{\eigenvectorScalar}{u}
-\newcommand{\eigenvectwo}{\mathbf{v}}
-\newcommand{\eigenvectwoMatrix}{\mathbf{V}}
-\newcommand{\eigenvectwoScalar}{v}
-\newcommand{\entropy}[1]{\mathcal{H}\left(#1\right)}
-\newcommand{\errorFunction}{E}
-\newcommand{\expDist}[2]{\left<#1\right>_{#2}}
-\newcommand{\expSamp}[1]{\left<#1\right>}
-\newcommand{\expectation}[1]{\left\langle #1 \right\rangle }
-\newcommand{\expectationDist}[2]{\left\langle #1 \right\rangle _{#2}}
-\newcommand{\expectedDistanceMatrix}{\mathcal{D}}
-\newcommand{\eye}{\mathbf{I}}
-\newcommand{\fantasyDim}{r}
-\newcommand{\fantasyMatrix}{\mathbf{ \MakeUppercase{\fantasyScalar}}}
-\newcommand{\fantasyScalar}{z}
-\newcommand{\fantasyVector}{\mathbf{ \fantasyScalar}}
-\newcommand{\featureStd}{\varsigma}
-\newcommand{\gammaCdf}[3]{\mathcal{GAMMA CDF}\left(#1|#2,#3\right)}
-\newcommand{\gammaDist}[3]{\mathcal{G}\left(#1|#2,#3\right)}
-\newcommand{\gammaSamp}[2]{\mathcal{G}\left(#1,#2\right)}
-\newcommand{\gaussianDist}[3]{\mathcal{N}\left(#1|#2,#3\right)}
-\newcommand{\gaussianSamp}[2]{\mathcal{N}\left(#1,#2\right)}
-\newcommand{\given}{|}
-\newcommand{\half}{\frac{1}{2}}
-\newcommand{\heaviside}{H}
-\newcommand{\hiddenMatrix}{\mathbf{ \MakeUppercase{\hiddenScalar}}}
-\newcommand{\hiddenScalar}{h}
-\newcommand{\hiddenVector}{\mathbf{ \hiddenScalar}}
-\newcommand{\identityMatrix}{\eye}
-\newcommand{\inducingInputScalar}{z}
-\newcommand{\inducingInputVector}{\mathbf{ \inducingInputScalar}}
-\newcommand{\inducingInputMatrix}{\mathbf{Z}}
-\newcommand{\inducingScalar}{u}
-\newcommand{\inducingVector}{\mathbf{ \inducingScalar}}
-\newcommand{\inducingMatrix}{\mathbf{U}}
-\newcommand{\inlineDiff}[2]{\text{d}#1/\text{d}#2}
-\newcommand{\inputDim}{q}
-\newcommand{\inputMatrix}{\mathbf{X}}
-\newcommand{\inputScalar}{x}
-\newcommand{\inputSpace}{\mathcal{X}}
-\newcommand{\inputVals}{\inputVector}
-\newcommand{\inputVector}{\mathbf{ \inputScalar}}
-\newcommand{\iterNum}{k}
-\newcommand{\kernel}{\kernelScalar}
-\newcommand{\kernelMatrix}{\mathbf{K}}
-\newcommand{\kernelScalar}{k}
-\newcommand{\kernelVector}{\mathbf{ \kernelScalar}}
-\newcommand{\kff}{\kernelScalar_{\mappingFunction \mappingFunction}}
-\newcommand{\kfu}{\kernelVector_{\mappingFunction \inducingScalar}}
-\newcommand{\kuf}{\kernelVector_{\inducingScalar \mappingFunction}}
-\newcommand{\kuu}{\kernelVector_{\inducingScalar \inducingScalar}}
-\newcommand{\lagrangeMultiplier}{\lambda}
-\newcommand{\lagrangeMultiplierMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\lagrangian}{L}
-\newcommand{\laplacianFactor}{\mathbf{ \MakeUppercase{\laplacianFactorScalar}}}
-\newcommand{\laplacianFactorScalar}{m}
-\newcommand{\laplacianFactorVector}{\mathbf{ \laplacianFactorScalar}}
-\newcommand{\laplacianMatrix}{\mathbf{L}}
-\newcommand{\laplacianScalar}{\ell}
-\newcommand{\laplacianVector}{\mathbf{ \ell}}
-\newcommand{\latentDim}{q}
-\newcommand{\latentDistanceMatrix}{\boldsymbol{ \Delta}}
-\newcommand{\latentDistanceScalar}{\delta}
-\newcommand{\latentDistanceVector}{\boldsymbol{ \delta}}
-\newcommand{\latentForce}{f}
-\newcommand{\latentFunction}{u}
-\newcommand{\latentFunctionVector}{\mathbf{ \latentFunction}}
-\newcommand{\latentFunctionMatrix}{\mathbf{ \MakeUppercase{\latentFunction}}}
-\newcommand{\latentIndex}{j}
-\newcommand{\latentScalar}{z}
-\newcommand{\latentVector}{\mathbf{ \latentScalar}}
-\newcommand{\latentMatrix}{\mathbf{Z}}
-\newcommand{\learnRate}{\eta}
-\newcommand{\lengthScale}{\ell}
-\newcommand{\rbfWidth}{\ell}
-\newcommand{\likelihoodBound}{\mathcal{L}}
-\newcommand{\likelihoodFunction}{L}
-\newcommand{\locationScalar}{\mu}
-\newcommand{\locationVector}{\boldsymbol{ \locationScalar}}
-\newcommand{\locationMatrix}{\mathbf{M}}
-\newcommand{\variance}[1]{\text{var}\left( #1 \right)}
-\newcommand{\mappingFunction}{f}
-\newcommand{\mappingFunctionMatrix}{\mathbf{F}}
-\newcommand{\mappingFunctionTwo}{g}
-\newcommand{\mappingFunctionTwoMatrix}{\mathbf{G}}
-\newcommand{\mappingFunctionTwoVector}{\mathbf{ \mappingFunctionTwo}}
-\newcommand{\mappingFunctionVector}{\mathbf{ \mappingFunction}}
-\newcommand{\scaleScalar}{s}
-\newcommand{\mappingScalar}{w}
-\newcommand{\mappingVector}{\mathbf{ \mappingScalar}}
-\newcommand{\mappingMatrix}{\mathbf{W}}
-\newcommand{\mappingScalarTwo}{v}
-\newcommand{\mappingVectorTwo}{\mathbf{ \mappingScalarTwo}}
-\newcommand{\mappingMatrixTwo}{\mathbf{V}}
-\newcommand{\maxIters}{K}
-\newcommand{\meanMatrix}{\mathbf{M}}
-\newcommand{\meanScalar}{\mu}
-\newcommand{\meanTwoMatrix}{\mathbf{M}}
-\newcommand{\meanTwoScalar}{m}
-\newcommand{\meanTwoVector}{\mathbf{ \meanTwoScalar}}
-\newcommand{\meanVector}{\boldsymbol{ \meanScalar}}
-\newcommand{\mrnaConcentration}{m}
-\newcommand{\naturalFrequency}{\omega}
-\newcommand{\neighborhood}[1]{\mathcal{N}\left( #1 \right)}
-\newcommand{\neilurl}{http://inverseprobability.com/}
-\newcommand{\noiseMatrix}{\boldsymbol{ E}}
-\newcommand{\noiseScalar}{\epsilon}
-\newcommand{\noiseVector}{\boldsymbol{ \epsilon}}
-\newcommand{\norm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\normalizedLaplacianMatrix}{\hat{\mathbf{L}}}
-\newcommand{\normalizedLaplacianScalar}{\hat{\ell}}
-\newcommand{\normalizedLaplacianVector}{\hat{\mathbf{ \ell}}}
-\newcommand{\numActive}{m}
-\newcommand{\numBasisFunc}{m}
-\newcommand{\numComponents}{m}
-\newcommand{\numComps}{K}
-\newcommand{\numData}{n}
-\newcommand{\numFeatures}{K}
-\newcommand{\numHidden}{h}
-\newcommand{\numInducing}{m}
-\newcommand{\numLayers}{\ell}
-\newcommand{\numNeighbors}{K}
-\newcommand{\numSequences}{s}
-\newcommand{\numSuccess}{s}
-\newcommand{\numTasks}{m}
-\newcommand{\numTime}{T}
-\newcommand{\numTrials}{S}
-\newcommand{\outputIndex}{j}
-\newcommand{\paramVector}{\boldsymbol{ \theta}}
-\newcommand{\parameterMatrix}{\boldsymbol{ \Theta}}
-\newcommand{\parameterScalar}{\theta}
-\newcommand{\parameterVector}{\boldsymbol{ \parameterScalar}}
-\newcommand{\partDiff}[2]{\frac{\partial#1}{\partial#2}}
-\newcommand{\precisionScalar}{j}
-\newcommand{\precisionVector}{\mathbf{ \precisionScalar}}
-\newcommand{\precisionMatrix}{\mathbf{J}}
-\newcommand{\pseudotargetScalar}{\widetilde{y}}
-\newcommand{\pseudotargetVector}{\mathbf{ \pseudotargetScalar}}
-\newcommand{\pseudotargetMatrix}{\mathbf{ \widetilde{Y}}}
-\newcommand{\rank}[1]{\text{rank}\left(#1\right)}
-\newcommand{\rayleighDist}[2]{\mathcal{R}\left(#1|#2\right)}
-\newcommand{\rayleighSamp}[1]{\mathcal{R}\left(#1\right)}
-\newcommand{\responsibility}{r}
-\newcommand{\rotationScalar}{r}
-\newcommand{\rotationVector}{\mathbf{ \rotationScalar}}
-\newcommand{\rotationMatrix}{\mathbf{R}}
-\newcommand{\sampleCovScalar}{s}
-\newcommand{\sampleCovVector}{\mathbf{ \sampleCovScalar}}
-\newcommand{\sampleCovMatrix}{\mathbf{s}}
-\newcommand{\scalarProduct}[2]{\left\langle{#1},{#2}\right\rangle}
-\newcommand{\sign}[1]{\text{sign}\left(#1\right)}
-\newcommand{\sigmoid}[1]{\sigma\left(#1\right)}
-\newcommand{\singularvalue}{\ell}
-\newcommand{\singularvalueMatrix}{\mathbf{L}}
-\newcommand{\singularvalueVector}{\mathbf{l}}
-\newcommand{\sorth}{\mathbf{u}}
-\newcommand{\spar}{\lambda}
-\newcommand{\trace}[1]{\text{tr}\left(#1\right)}
-\newcommand{\BasalRate}{B}
-\newcommand{\DampingCoefficient}{C}
-\newcommand{\DecayRate}{D}
-\newcommand{\Displacement}{X}
-\newcommand{\LatentForce}{F}
-\newcommand{\Mass}{M}
-\newcommand{\Sensitivity}{S}
-\newcommand{\basalRate}{b}
-\newcommand{\dampingCoefficient}{c}
-\newcommand{\mass}{m}
-\newcommand{\sensitivity}{s}
-\newcommand{\springScalar}{\kappa}
-\newcommand{\springVector}{\boldsymbol{ \kappa}}
-\newcommand{\springMatrix}{\boldsymbol{ \mathcal{K}}}
-\newcommand{\tfConcentration}{p}
-\newcommand{\tfDecayRate}{\delta}
-\newcommand{\tfMrnaConcentration}{f}
-\newcommand{\tfVector}{\mathbf{ \tfConcentration}}
-\newcommand{\velocity}{v}
-\newcommand{\sufficientStatsScalar}{g}
-\newcommand{\sufficientStatsVector}{\mathbf{ \sufficientStatsScalar}}
-\newcommand{\sufficientStatsMatrix}{\mathbf{G}}
-\newcommand{\switchScalar}{s}
-\newcommand{\switchVector}{\mathbf{ \switchScalar}}
-\newcommand{\switchMatrix}{\mathbf{S}}
-\newcommand{\tr}[1]{\text{tr}\left(#1\right)}
-\newcommand{\loneNorm}[1]{\left\Vert #1 \right\Vert_1}
-\newcommand{\ltwoNorm}[1]{\left\Vert #1 \right\Vert_2}
-\newcommand{\onenorm}[1]{\left\vert#1\right\vert_1}
-\newcommand{\twonorm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\vScalar}{v}
-\newcommand{\vVector}{\mathbf{v}}
-\newcommand{\vMatrix}{\mathbf{V}}
-\newcommand{\varianceDist}[2]{\text{var}_{#2}\left( #1 \right)}
-\newcommand{\vecb}[1]{\left(#1\right):}
-\newcommand{\weightScalar}{w}
-\newcommand{\weightVector}{\mathbf{ \weightScalar}}
-\newcommand{\weightMatrix}{\mathbf{W}}
-\newcommand{\weightedAdjacencyMatrix}{\mathbf{A}}
-\newcommand{\weightedAdjacencyScalar}{a}
-\newcommand{\weightedAdjacencyVector}{\mathbf{ \weightedAdjacencyScalar}}
-\newcommand{\onesVector}{\mathbf{1}}
-\newcommand{\zerosVector}{\mathbf{0}}
-\]
   <div class="reveal">
     <div class="slides">
 
@@ -336,7 +46,8 @@
   <h1 class="title">Introduction to Machine Learning Systems</h1>
   <p class="author" style="text-align:center"><a href="https://www.linkedin.com/in/meissnereric/">Eric Meissner</a></p>
   <p class="author" style="text-align:center"><a href="https://www.linkedin.com/in/andreipaleyes/">Andrei Paleyes</a></p>
-  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil D. Lawrence</a></p>
+  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil
+D. Lawrence</a></p>
   <p class="date" style="text-align:center"><time>2020-07-24</time></p>
   <p class="venue" style="text-align:center">Virtual DSA</p>
 </section>
@@ -347,9 +58,6 @@ <h1 class="title">Introduction to Machine Learning Systems</h1>
 <!---->
 <!-- Do not edit this file locally. -->
 <!-- The last names to be defined. Should be defined entirely in terms of macros from above-->
-<!--
-
--->
 <!-- SECTION AI via ML Systems -->
 </section>
 <section id="ai-via-ml-systems" class="slide level2">
@@ -369,7 +77,7 @@ <h2>Supply Chain Optimization</h2>
 <title>
 Llew Mason
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/llew-mason.png" clip-path="url(#clip0)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/llew-mason.png" clip-path="url(#clip0)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip1">
@@ -382,11 +90,18 @@ <h2>Supply Chain Optimization</h2>
 <title>
 Devesh Mishra
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/devesh-mishra.png" clip-path="url(#clip1)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/devesh-mishra.png" clip-path="url(#clip1)"/>
 </svg>
 </div>
+<div class="figure">
+<div id="scot-promo-video-figure" class="figure-frame">
 <iframe width="600" height="450" src="https://www.youtube.com/embed/ncwsr1Of6Cw?start=" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
 </iframe>
+</div>
+</div>
+<aside class="notes">
+Promotional video for the Amazon supply chain optimization team.
+</aside>
 </section>
 <section id="supply-chain-optimization-1" class="slide level2">
 <h2>Supply Chain Optimization</h2>
@@ -402,7 +117,7 @@ <h2>Supply Chain Optimization</h2>
 <title>
 Llew Mason
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/llew-mason.png" clip-path="url(#clip2)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/llew-mason.png" clip-path="url(#clip2)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip3">
@@ -415,14 +130,22 @@ <h2>Supply Chain Optimization</h2>
 <title>
 Devesh Mishra
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/devesh-mishra.png" clip-path="url(#clip3)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/devesh-mishra.png" clip-path="url(#clip3)"/>
 </svg>
 </div>
-<object class="svgplot " data="../slides/diagrams/software/buying-schematic.svg" width="40%" style=" ">
+<div class="figure">
+<div id="buying-schematic-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//software/buying-schematic.svg" width="40%" style=" ">
 </object>
+</div>
+</div>
+<aside class="notes">
+A schematic of a typical buying system for supply chain.
+</aside>
 </section>
 <section id="forecasting" class="slide level2">
 <h2>Forecasting</h2>
+<div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip4">
 <style>
@@ -434,7 +157,7 @@ <h2>Forecasting</h2>
 <title>
 Jenny Freshwater
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/jenny-freshwater.png" clip-path="url(#clip4)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/jenny-freshwater.png" clip-path="url(#clip4)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip5">
@@ -447,7 +170,7 @@ <h2>Forecasting</h2>
 <title>
 Ping Xu
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ping-xu.png" clip-path="url(#clip5)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ping-xu.png" clip-path="url(#clip5)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip6">
@@ -460,8 +183,18 @@ <h2>Forecasting</h2>
 <title>
 Dean Foster
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/dean-foster.png" clip-path="url(#clip6)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/dean-foster.png" clip-path="url(#clip6)"/>
 </svg>
+</div>
+<div class="figure">
+<div id="jenny-freshwater-remars-figure" class="figure-frame">
+<iframe width="600" height="450" src="https://www.youtube.com/embed/wa8DU-Sui8Q?start=1358" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen>
+</iframe>
+</div>
+</div>
+<aside class="notes">
+Jenny Freshwater speaking at the Amazon re:MARS event in June 2019.
+</aside>
 </section>
 <section id="inventory-and-buying" class="slide level2">
 <h2>Inventory and Buying</h2>
@@ -477,7 +210,7 @@ <h2>Inventory and Buying</h2>
 <title>
 Deepak Bhatia
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/deepak-bhatia.png" clip-path="url(#clip7)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/deepak-bhatia.png" clip-path="url(#clip7)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip8">
@@ -490,7 +223,7 @@ <h2>Inventory and Buying</h2>
 <title>
 Piyush Saraogi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/piyush-saraogi.png" clip-path="url(#clip8)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/piyush-saraogi.png" clip-path="url(#clip8)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip9">
@@ -501,9 +234,9 @@ <h2>Inventory and Buying</h2>
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
-Salal Humair
+Raman Iyer
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/salal-humair.png" clip-path="url(#clip9)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/raman-iyer.jpg" clip-path="url(#clip9)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip10">
@@ -514,17 +247,38 @@ <h2>Inventory and Buying</h2>
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
+Salal Humair
+</title>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/salal-humair.png" clip-path="url(#clip10)"/>
+</svg>
+<svg viewBox="0 0 200 200" style="width:15%">
+<defs> <clipPath id="clip11">
+<style>
+circle {
+  fill: black;
+}
+</style>
+<circle cx="100" cy="100" r="100"/> </clipPath> </defs>
+<title>
 Narayan Venkatasubramanyan
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/narayan-venkatasubramanyan.png" clip-path="url(#clip10)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/narayan-venkatasubramanyan.png" clip-path="url(#clip11)"/>
 </svg>
 </div>
+<ul>
+<li>Automated buying based on:
+<ul>
+<li>Supplier lead times.</li>
+<li>Demand Forecast.</li>
+<li>Cost basis of the product.</li>
+</ul></li>
+</ul>
 </section>
-<section id="service-oriented-architecture" class="slide level2">
-<h2>Service Oriented Architecture</h2>
+<section id="monolithic-system" class="slide level2">
+<h2>Monolithic System</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip11">
+<defs> <clipPath id="clip12">
 <style>
 circle {
   fill: black;
@@ -534,10 +288,10 @@ <h2>Service Oriented Architecture</h2>
 <title>
 Charlie Bell
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/charlie-bell.png" clip-path="url(#clip11)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/charlie-bell.png" clip-path="url(#clip12)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip12">
+<defs> <clipPath id="clip13">
 <style>
 circle {
   fill: black;
@@ -547,12 +301,12 @@ <h2>Service Oriented Architecture</h2>
 <title>
 Peter Vosshall
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/peter-vosshall.png" clip-path="url(#clip12)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/peter-vosshall.png" clip-path="url(#clip13)"/>
 </svg>
 </div>
 <div class="figure">
 <div id="ml-system-monolith-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ai/ml-system-monolith-purchasing.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ai/ml-system-monolith-purchasing.svg" width="60%" style=" ">
 </object>
 </div>
 </div>
@@ -560,11 +314,11 @@ <h2>Service Oriented Architecture</h2>
 A potential path of models in a machine learning system.
 </aside>
 </section>
-<section id="service-oriented-architecture-1" class="slide level2">
+<section id="service-oriented-architecture" class="slide level2">
 <h2>Service Oriented Architecture</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip13">
+<defs> <clipPath id="clip14">
 <style>
 circle {
   fill: black;
@@ -574,10 +328,10 @@ <h2>Service Oriented Architecture</h2>
 <title>
 Charlie Bell
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/charlie-bell.png" clip-path="url(#clip13)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/charlie-bell.png" clip-path="url(#clip14)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip14">
+<defs> <clipPath id="clip15">
 <style>
 circle {
   fill: black;
@@ -587,18 +341,32 @@ <h2>Service Oriented Architecture</h2>
 <title>
 Peter Vosshall
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/peter-vosshall.png" clip-path="url(#clip14)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/peter-vosshall.png" clip-path="url(#clip15)"/>
 </svg>
 </div>
 <div class="figure">
 <div id="ml-system-downstream-purchasing-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ai/ml-system-downstream-purchasing000.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ai/ml-system-downstream-purchasing000.svg" width="60%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
 A potential path of models in a machine learning system.
 </aside>
+</section>
+<section id="intellectual-debt" class="slide level2">
+<h2>Intellectual Debt</h2>
+<div class="figure">
+<div id="intellectual-debt-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ai/2020-02-12-intellectual-debt.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+</div>
+<aside class="notes">
+Jonathan Zittrain’s term to describe the challenges of explanation that
+come with AI is Intellectual Debt.
+</aside>
 <!-- SECTION Data Science Africa -->
 </section>
 <section id="data-science-africa" class="slide level2">
@@ -609,34 +377,52 @@ <h2></h2>
 <div class="figure">
 <div id="data-science-africa-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science-africa-logo.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Data Science Africa <a href="http://datascienceafrica.org" class="uri">http://datascienceafrica.org</a> is a ground up initiative for capacity building around data science, machine learning and artificial intelligence on the African continent.
+Data Science Africa <a href="http://datascienceafrica.org"
+class="uri">http://datascienceafrica.org</a> is a ground up initiative
+for capacity building around data science, machine learning and
+artificial intelligence on the African continent.
 </aside>
-<p>Data Science Africa is a bottom up initiative for capacity building in data science, machine learning and AI on the African continent</p>
+<p>Data Science Africa is a bottom up initiative for capacity building
+in data science, machine learning and AI on the African continent</p>
 </section>
 <section id="section-1" class="slide level2">
 <h2></h2>
 <div class="figure">
+<div id="dsa-events-october-2021-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//dsa/dsa-events-october-2021.svg" width="60%" style=" ">
+</object>
+</div>
+</div>
+<aside class="notes">
+Data Science Africa meetings held up to October 2021.
+</aside>
+</section>
+<section id="section-2" class="slide level2">
+<h2></h2>
+<div class="figure">
 <div id="africa-benefit-data-revolution-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//data-science/africa-benefit-data-revolution.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The lack of existing physical infrastructure on the African continent makes it a particularly interesting environment for deploying solutions based on the <em>information infrastructure</em>. The idea is explored more in this Guardian op-ed on .
+The lack of existing physical infrastructure on the African continent
+makes it a particularly interesting environment for deploying solutions
+based on the <em>information infrastructure</em>. The idea is explored
+more in this Guardian op-ed on .
 </aside>
-<p>(2015/aug/25/africa-benefit-data-science-information}</p>
 </section>
 <section id="crop-monitoring" class="slide level2">
 <h2>Crop Monitoring</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip15">
+<defs> <clipPath id="clip16">
 <style>
 circle {
   fill: black;
@@ -646,13 +432,13 @@ <h2>Crop Monitoring</h2>
 <title>
 Ernest Mwebaze
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ernest-mwebaze.png" clip-path="url(#clip15)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ernest-mwebaze.png" clip-path="url(#clip16)"/>
 </svg>
 </div>
 <div class="figure">
 <div id="mobile-monitoring-crop-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/africa/mobile-monitoring-of-crop-disease.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//africa/mobile-monitoring-of-crop-disease.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -664,7 +450,7 @@ <h2>Crop Monitoring</h2>
 <h2>Biosurveillance</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip16">
+<defs> <clipPath id="clip17">
 <style>
 circle {
   fill: black;
@@ -674,13 +460,14 @@ <h2>Biosurveillance</h2>
 <title>
 Martin Mubangizi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/martin-mubangizi.png" clip-path="url(#clip16)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png" clip-path="url(#clip17)"/>
 </svg>
 </div>
 <div class="figure">
-<div id="spatiotemporal-models-biosurveillance-figure" class="figure-frame">
+<div id="spatiotemporal-models-biosurveillance-figure"
+class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/africa/spatiotemporal-models-for-biosurveillance.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//africa/spatiotemporal-models-for-biosurveillance.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -692,7 +479,7 @@ <h2>Biosurveillance</h2>
 <h2>Community Radio</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
-<defs> <clipPath id="clip17">
+<defs> <clipPath id="clip18">
 <style>
 circle {
   fill: black;
@@ -702,13 +489,13 @@ <h2>Community Radio</h2>
 <title>
 Morine Amutorine
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/morine-amutorine.png" clip-path="url(#clip17)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/morine-amutorine.png" clip-path="url(#clip18)"/>
 </svg>
 </div>
 <div class="figure">
 <div id="ugandan-community-radio-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/africa/ugandan-community-radio-project.png" width="45%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//africa/ugandan-community-radio-project.png" width="45%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -721,7 +508,7 @@ <h2>Kudu Project</h2>
 <div class="figure">
 <div id="kudu-project-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/africa/kudu-project.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//africa/kudu-project.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -734,7 +521,7 @@ <h2>Safe Boda</h2>
 <div class="figure">
 <div id="safe-boda-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/africa/safe-boda.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//africa/safe-boda.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -745,11 +532,18 @@ <h2>Safe Boda</h2>
 <section id="thanks" class="slide level2 scrollable">
 <h2 class="scrollable">Thanks!</h2>
 <ul>
-<li><p>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
-<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></p></li>
-<li><p>Guardian article on <a href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How African can benefit from the data revolution</a></p></li>
-<li><p>Guardian article on <a href="https://www.theguardian.com/media-network/">Data Science Africa</a></p></li>
-<li><p>blog: <a href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></p></li>
+<li><p>twitter: <a
+href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
+<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></p></li>
+<li><p>Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">How
+African can benefit from the data revolution</a></p></li>
+<li><p>Guardian article on <a
+href="https://www.theguardian.com/media-network/2015/aug/25/africa-benefit-data-science-information">Data
+Science Africa</a></p></li>
+<li><p>blog: <a
+href="http://inverseprobability.com/blog.html">http://inverseprobability.com</a></p></li>
 </ul>
 </section>
 <section id="references" class="slide level2 scrollable">
@@ -758,18 +552,47 @@ <h2 class="scrollable">References</h2>
     </div>
   </div>
 
-  <script src="reveal.js/lib/js/head.min.js"></script>
-  <script src="reveal.js/js/reveal.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/head.min.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/js/reveal.js"></script>
 
   <script>
 
       // Full list of configuration options available at:
       // https://github.com/hakimel/reveal.js#configuration
       Reveal.initialize({
+        // Display controls in the bottom right corner
+        controls: true,
+        // Display a presentation progress bar
+        progress: true,
         // Push each slide change to the browser history
         history: true,
+        // Enable keyboard shortcuts for navigation
+        keyboard: true,
+        // Enable the slide overview mode
+        overview: true,
+        // Vertical centering of slides
+        center: true,
+        // Enables touch navigation on devices with touch input
+        touch: true,
+        // Turns fragments on and off globally
+        fragments: true,
+        // Flags if we should show a help overlay when the questionmark
+        // key is pressed
+        help: true,
+        // Number of milliseconds between automatically proceeding to the
+        // next slide, disabled when set to 0, this value can be overwritten
+        // by using a data-autoslide attribute on your slides
+        autoSlide: 0,
+        // Stop auto-sliding after user input
+        autoSlideStoppable: true,
         // Transition style
         transition: 'None', // none/fade/slide/convex/concave/zoom
+        // Transition speed
+        transitionSpeed: 'default', // default/fast/slow
+        // Transition style for full page slide backgrounds
+        backgroundTransition: 'fade', // none/fade/slide/convex/concave/zoom
+        // Number of slides away from the current that are visible
+        viewDistance: 3,
         math: {
           mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js',
           config: 'TeX-AMS_HTML-full',
@@ -789,10 +612,10 @@ <h2 class="scrollable">References</h2>
 
         // Optional reveal.js plugins
         dependencies: [
-          { src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
-          { src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
-          { src: 'reveal.js/plugin/math/math.js', async: true },
-          { src: 'reveal.js/plugin/notes/notes.js', async: true }
+          { src: 'https://unpkg.com/reveal.js@3.9.2/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/zoom-js/zoom.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/math/math.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/notes/notes.js', async: true }
         ]
       });
     </script>
diff --git a/slides/03-bayesian-methods-abuja.slides.html b/slides/03-bayesian-methods-abuja.slides.html
index 7471eb6..922a8a1 100644
--- a/slides/03-bayesian-methods-abuja.slides.html
+++ b/slides/03-bayesian-methods-abuja.slides.html
@@ -8,7 +8,7 @@
   <meta name="apple-mobile-web-app-capable" content="yes">
   <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
   <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
-  <link rel="stylesheet" href="reveal.js/css/reveal.css">
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/reveal.css">
   <style type="text/css">
       code{white-space: pre-wrap;}
       span.smallcaps{font-variant: small-caps;}
@@ -19,6 +19,7 @@
 pre > code.sourceCode { white-space: pre; position: relative; }
 pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
 pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
 code.sourceCode > span { color: inherit; text-decoration: inherit; }
 div.sourceCode { margin: 1em 0; }
 pre.sourceCode { margin: 0; }
@@ -53,7 +54,7 @@
 code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 code span.at { color: #7d9029; } /* Attribute */
 code span.bn { color: #40a070; } /* BaseN */
-code span.bu { } /* BuiltIn */
+code span.bu { color: #008000; } /* BuiltIn */
 code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 code span.ch { color: #4070a0; } /* Char */
 code span.cn { color: #880000; } /* Constant */
@@ -66,7 +67,7 @@
 code span.ex { } /* Extension */
 code span.fl { color: #40a070; } /* Float */
 code span.fu { color: #06287e; } /* Function */
-code span.im { } /* Import */
+code span.im { color: #008000; font-weight: bold; } /* Import */
 code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 code span.kw { color: #007020; font-weight: bold; } /* Keyword */
 code span.op { color: #666666; } /* Operator */
@@ -79,18 +80,18 @@
 code span.vs { color: #4070a0; } /* VerbatimString */
 code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
   </style>
-  <link rel="stylesheet" href="reveal.js/css/theme/black.css" id="theme">
-  <link rel="stylesheet" href="talks.css"/>
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/theme/black.css" id="theme">
+  <link rel="stylesheet" href="https://inverseprobability.com/assets/css/talks.css"/>
   <!-- Printing and PDF exports -->
   <script>
     var link = document.createElement( 'link' );
     link.rel = 'stylesheet';
     link.type = 'text/css';
-    link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
+    link.href = window.location.search.match( /print-pdf/gi ) ? 'https://unpkg.com/reveal.js@3.9.2/css/print/pdf.css' : 'https://unpkg.com/reveal.js@3.9.2/css/print/paper.css';
     document.getElementsByTagName( 'head' )[0].appendChild( link );
   </script>
   <!--[if lt IE 9]>
-  <script src="reveal.js/lib/js/html5shiv.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/html5shiv.js"></script>
   <![endif]-->
   <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_SVG" type="text/javascript"></script>
   <script type="text/x-mathjax-config">
@@ -100,306 +101,18 @@
       }
     });
   </script>
-  <script src="http://inverseprobability.com/talks/assets/js/figure-animate.js"></script>
+  <script src="../assets/js/figure-animate.js"></script>
 </head>
 <body>
-\[\newcommand{\tk}[1]{}
-\newcommand{\Amatrix}{\mathbf{A}}
-\newcommand{\KL}[2]{\text{KL}\left( #1\,\|\,#2 \right)}
-\newcommand{\Kaast}{\kernelMatrix_{\mathbf{ \ast}\mathbf{ \ast}}}
-\newcommand{\Kastu}{\kernelMatrix_{\mathbf{ \ast} \inducingVector}}
-\newcommand{\Kff}{\kernelMatrix_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\Kfu}{\kernelMatrix_{\mappingFunctionVector \inducingVector}}
-\newcommand{\Kuast}{\kernelMatrix_{\inducingVector \bf\ast}}
-\newcommand{\Kuf}{\kernelMatrix_{\inducingVector \mappingFunctionVector}}
-\newcommand{\Kuu}{\kernelMatrix_{\inducingVector \inducingVector}}
-\newcommand{\Kuui}{\Kuu^{-1}}
-\newcommand{\Qaast}{\mathbf{Q}_{\bf \ast \ast}}
-\newcommand{\Qastf}{\mathbf{Q}_{\ast \mappingFunction}}
-\newcommand{\Qfast}{\mathbf{Q}_{\mappingFunctionVector \bf \ast}}
-\newcommand{\Qff}{\mathbf{Q}_{\mappingFunctionVector \mappingFunctionVector}}
-\newcommand{\aMatrix}{\mathbf{A}}
-\newcommand{\aScalar}{a}
-\newcommand{\aVector}{\mathbf{a}}
-\newcommand{\acceleration}{a}
-\newcommand{\bMatrix}{\mathbf{B}}
-\newcommand{\bScalar}{b}
-\newcommand{\bVector}{\mathbf{b}}
-\newcommand{\basisFunc}{\phi}
-\newcommand{\basisFuncVector}{\boldsymbol{ \basisFunc}}
-\newcommand{\basisFunction}{\phi}
-\newcommand{\basisLocation}{\mu}
-\newcommand{\basisMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\basisScalar}{\basisFunction}
-\newcommand{\basisVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\activationFunction}{\phi}
-\newcommand{\activationMatrix}{\boldsymbol{ \Phi}}
-\newcommand{\activationScalar}{\basisFunction}
-\newcommand{\activationVector}{\boldsymbol{ \basisFunction}}
-\newcommand{\bigO}{\mathcal{O}}
-\newcommand{\binomProb}{\pi}
-\newcommand{\cMatrix}{\mathbf{C}}
-\newcommand{\cbasisMatrix}{\hat{\boldsymbol{ \Phi}}}
-\newcommand{\cdataMatrix}{\hat{\dataMatrix}}
-\newcommand{\cdataScalar}{\hat{\dataScalar}}
-\newcommand{\cdataVector}{\hat{\dataVector}}
-\newcommand{\centeredKernelMatrix}{\mathbf{ \MakeUppercase{\centeredKernelScalar}}}
-\newcommand{\centeredKernelScalar}{b}
-\newcommand{\centeredKernelVector}{\centeredKernelScalar}
-\newcommand{\centeringMatrix}{\mathbf{H}}
-\newcommand{\chiSquaredDist}[2]{\chi_{#1}^{2}\left(#2\right)}
-\newcommand{\chiSquaredSamp}[1]{\chi_{#1}^{2}}
-\newcommand{\conditionalCovariance}{\boldsymbol{ \Sigma}}
-\newcommand{\coregionalizationMatrix}{\mathbf{B}}
-\newcommand{\coregionalizationScalar}{b}
-\newcommand{\coregionalizationVector}{\mathbf{ \coregionalizationScalar}}
-\newcommand{\covDist}[2]{\text{cov}_{#2}\left(#1\right)}
-\newcommand{\covSamp}[1]{\text{cov}\left(#1\right)}
-\newcommand{\covarianceScalar}{c}
-\newcommand{\covarianceVector}{\mathbf{ \covarianceScalar}}
-\newcommand{\covarianceMatrix}{\mathbf{C}}
-\newcommand{\covarianceMatrixTwo}{\boldsymbol{ \Sigma}}
-\newcommand{\croupierScalar}{s}
-\newcommand{\croupierVector}{\mathbf{ \croupierScalar}}
-\newcommand{\croupierMatrix}{\mathbf{ \MakeUppercase{\croupierScalar}}}
-\newcommand{\dataDim}{p}
-\newcommand{\dataIndex}{i}
-\newcommand{\dataIndexTwo}{j}
-\newcommand{\dataMatrix}{\mathbf{Y}}
-\newcommand{\dataScalar}{y}
-\newcommand{\dataSet}{\mathcal{D}}
-\newcommand{\dataStd}{\sigma}
-\newcommand{\dataVector}{\mathbf{ \dataScalar}}
-\newcommand{\decayRate}{d}
-\newcommand{\degreeMatrix}{\mathbf{ \MakeUppercase{\degreeScalar}}}
-\newcommand{\degreeScalar}{d}
-\newcommand{\degreeVector}{\mathbf{ \degreeScalar}}
-\newcommand{\diag}[1]{\text{diag}\left(#1\right)}
-\newcommand{\diagonalMatrix}{\mathbf{D}}
-\newcommand{\diff}[2]{\frac{\text{d}#1}{\text{d}#2}}
-\newcommand{\diffTwo}[2]{\frac{\text{d}^2#1}{\text{d}#2^2}}
-\newcommand{\displacement}{x}
-\newcommand{\displacementVector}{\textbf{\displacement}}
-\newcommand{\distanceMatrix}{\mathbf{ \MakeUppercase{\distanceScalar}}}
-\newcommand{\distanceScalar}{d}
-\newcommand{\distanceVector}{\mathbf{ \distanceScalar}}
-\newcommand{\eigenvaltwo}{\ell}
-\newcommand{\eigenvaltwoMatrix}{\mathbf{L}}
-\newcommand{\eigenvaltwoVector}{\mathbf{l}}
-\newcommand{\eigenvalue}{\lambda}
-\newcommand{\eigenvalueMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\eigenvalueVector}{\boldsymbol{ \lambda}}
-\newcommand{\eigenvector}{\mathbf{ \eigenvectorScalar}}
-\newcommand{\eigenvectorMatrix}{\mathbf{U}}
-\newcommand{\eigenvectorScalar}{u}
-\newcommand{\eigenvectwo}{\mathbf{v}}
-\newcommand{\eigenvectwoMatrix}{\mathbf{V}}
-\newcommand{\eigenvectwoScalar}{v}
-\newcommand{\entropy}[1]{\mathcal{H}\left(#1\right)}
-\newcommand{\errorFunction}{E}
-\newcommand{\expDist}[2]{\left<#1\right>_{#2}}
-\newcommand{\expSamp}[1]{\left<#1\right>}
-\newcommand{\expectation}[1]{\left\langle #1 \right\rangle }
-\newcommand{\expectationDist}[2]{\left\langle #1 \right\rangle _{#2}}
-\newcommand{\expectedDistanceMatrix}{\mathcal{D}}
-\newcommand{\eye}{\mathbf{I}}
-\newcommand{\fantasyDim}{r}
-\newcommand{\fantasyMatrix}{\mathbf{ \MakeUppercase{\fantasyScalar}}}
-\newcommand{\fantasyScalar}{z}
-\newcommand{\fantasyVector}{\mathbf{ \fantasyScalar}}
-\newcommand{\featureStd}{\varsigma}
-\newcommand{\gammaCdf}[3]{\mathcal{GAMMA CDF}\left(#1|#2,#3\right)}
-\newcommand{\gammaDist}[3]{\mathcal{G}\left(#1|#2,#3\right)}
-\newcommand{\gammaSamp}[2]{\mathcal{G}\left(#1,#2\right)}
-\newcommand{\gaussianDist}[3]{\mathcal{N}\left(#1|#2,#3\right)}
-\newcommand{\gaussianSamp}[2]{\mathcal{N}\left(#1,#2\right)}
-\newcommand{\given}{|}
-\newcommand{\half}{\frac{1}{2}}
-\newcommand{\heaviside}{H}
-\newcommand{\hiddenMatrix}{\mathbf{ \MakeUppercase{\hiddenScalar}}}
-\newcommand{\hiddenScalar}{h}
-\newcommand{\hiddenVector}{\mathbf{ \hiddenScalar}}
-\newcommand{\identityMatrix}{\eye}
-\newcommand{\inducingInputScalar}{z}
-\newcommand{\inducingInputVector}{\mathbf{ \inducingInputScalar}}
-\newcommand{\inducingInputMatrix}{\mathbf{Z}}
-\newcommand{\inducingScalar}{u}
-\newcommand{\inducingVector}{\mathbf{ \inducingScalar}}
-\newcommand{\inducingMatrix}{\mathbf{U}}
-\newcommand{\inlineDiff}[2]{\text{d}#1/\text{d}#2}
-\newcommand{\inputDim}{q}
-\newcommand{\inputMatrix}{\mathbf{X}}
-\newcommand{\inputScalar}{x}
-\newcommand{\inputSpace}{\mathcal{X}}
-\newcommand{\inputVals}{\inputVector}
-\newcommand{\inputVector}{\mathbf{ \inputScalar}}
-\newcommand{\iterNum}{k}
-\newcommand{\kernel}{\kernelScalar}
-\newcommand{\kernelMatrix}{\mathbf{K}}
-\newcommand{\kernelScalar}{k}
-\newcommand{\kernelVector}{\mathbf{ \kernelScalar}}
-\newcommand{\kff}{\kernelScalar_{\mappingFunction \mappingFunction}}
-\newcommand{\kfu}{\kernelVector_{\mappingFunction \inducingScalar}}
-\newcommand{\kuf}{\kernelVector_{\inducingScalar \mappingFunction}}
-\newcommand{\kuu}{\kernelVector_{\inducingScalar \inducingScalar}}
-\newcommand{\lagrangeMultiplier}{\lambda}
-\newcommand{\lagrangeMultiplierMatrix}{\boldsymbol{ \Lambda}}
-\newcommand{\lagrangian}{L}
-\newcommand{\laplacianFactor}{\mathbf{ \MakeUppercase{\laplacianFactorScalar}}}
-\newcommand{\laplacianFactorScalar}{m}
-\newcommand{\laplacianFactorVector}{\mathbf{ \laplacianFactorScalar}}
-\newcommand{\laplacianMatrix}{\mathbf{L}}
-\newcommand{\laplacianScalar}{\ell}
-\newcommand{\laplacianVector}{\mathbf{ \ell}}
-\newcommand{\latentDim}{q}
-\newcommand{\latentDistanceMatrix}{\boldsymbol{ \Delta}}
-\newcommand{\latentDistanceScalar}{\delta}
-\newcommand{\latentDistanceVector}{\boldsymbol{ \delta}}
-\newcommand{\latentForce}{f}
-\newcommand{\latentFunction}{u}
-\newcommand{\latentFunctionVector}{\mathbf{ \latentFunction}}
-\newcommand{\latentFunctionMatrix}{\mathbf{ \MakeUppercase{\latentFunction}}}
-\newcommand{\latentIndex}{j}
-\newcommand{\latentScalar}{z}
-\newcommand{\latentVector}{\mathbf{ \latentScalar}}
-\newcommand{\latentMatrix}{\mathbf{Z}}
-\newcommand{\learnRate}{\eta}
-\newcommand{\lengthScale}{\ell}
-\newcommand{\rbfWidth}{\ell}
-\newcommand{\likelihoodBound}{\mathcal{L}}
-\newcommand{\likelihoodFunction}{L}
-\newcommand{\locationScalar}{\mu}
-\newcommand{\locationVector}{\boldsymbol{ \locationScalar}}
-\newcommand{\locationMatrix}{\mathbf{M}}
-\newcommand{\variance}[1]{\text{var}\left( #1 \right)}
-\newcommand{\mappingFunction}{f}
-\newcommand{\mappingFunctionMatrix}{\mathbf{F}}
-\newcommand{\mappingFunctionTwo}{g}
-\newcommand{\mappingFunctionTwoMatrix}{\mathbf{G}}
-\newcommand{\mappingFunctionTwoVector}{\mathbf{ \mappingFunctionTwo}}
-\newcommand{\mappingFunctionVector}{\mathbf{ \mappingFunction}}
-\newcommand{\scaleScalar}{s}
-\newcommand{\mappingScalar}{w}
-\newcommand{\mappingVector}{\mathbf{ \mappingScalar}}
-\newcommand{\mappingMatrix}{\mathbf{W}}
-\newcommand{\mappingScalarTwo}{v}
-\newcommand{\mappingVectorTwo}{\mathbf{ \mappingScalarTwo}}
-\newcommand{\mappingMatrixTwo}{\mathbf{V}}
-\newcommand{\maxIters}{K}
-\newcommand{\meanMatrix}{\mathbf{M}}
-\newcommand{\meanScalar}{\mu}
-\newcommand{\meanTwoMatrix}{\mathbf{M}}
-\newcommand{\meanTwoScalar}{m}
-\newcommand{\meanTwoVector}{\mathbf{ \meanTwoScalar}}
-\newcommand{\meanVector}{\boldsymbol{ \meanScalar}}
-\newcommand{\mrnaConcentration}{m}
-\newcommand{\naturalFrequency}{\omega}
-\newcommand{\neighborhood}[1]{\mathcal{N}\left( #1 \right)}
-\newcommand{\neilurl}{http://inverseprobability.com/}
-\newcommand{\noiseMatrix}{\boldsymbol{ E}}
-\newcommand{\noiseScalar}{\epsilon}
-\newcommand{\noiseVector}{\boldsymbol{ \epsilon}}
-\newcommand{\norm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\normalizedLaplacianMatrix}{\hat{\mathbf{L}}}
-\newcommand{\normalizedLaplacianScalar}{\hat{\ell}}
-\newcommand{\normalizedLaplacianVector}{\hat{\mathbf{ \ell}}}
-\newcommand{\numActive}{m}
-\newcommand{\numBasisFunc}{m}
-\newcommand{\numComponents}{m}
-\newcommand{\numComps}{K}
-\newcommand{\numData}{n}
-\newcommand{\numFeatures}{K}
-\newcommand{\numHidden}{h}
-\newcommand{\numInducing}{m}
-\newcommand{\numLayers}{\ell}
-\newcommand{\numNeighbors}{K}
-\newcommand{\numSequences}{s}
-\newcommand{\numSuccess}{s}
-\newcommand{\numTasks}{m}
-\newcommand{\numTime}{T}
-\newcommand{\numTrials}{S}
-\newcommand{\outputIndex}{j}
-\newcommand{\paramVector}{\boldsymbol{ \theta}}
-\newcommand{\parameterMatrix}{\boldsymbol{ \Theta}}
-\newcommand{\parameterScalar}{\theta}
-\newcommand{\parameterVector}{\boldsymbol{ \parameterScalar}}
-\newcommand{\partDiff}[2]{\frac{\partial#1}{\partial#2}}
-\newcommand{\precisionScalar}{j}
-\newcommand{\precisionVector}{\mathbf{ \precisionScalar}}
-\newcommand{\precisionMatrix}{\mathbf{J}}
-\newcommand{\pseudotargetScalar}{\widetilde{y}}
-\newcommand{\pseudotargetVector}{\mathbf{ \pseudotargetScalar}}
-\newcommand{\pseudotargetMatrix}{\mathbf{ \widetilde{Y}}}
-\newcommand{\rank}[1]{\text{rank}\left(#1\right)}
-\newcommand{\rayleighDist}[2]{\mathcal{R}\left(#1|#2\right)}
-\newcommand{\rayleighSamp}[1]{\mathcal{R}\left(#1\right)}
-\newcommand{\responsibility}{r}
-\newcommand{\rotationScalar}{r}
-\newcommand{\rotationVector}{\mathbf{ \rotationScalar}}
-\newcommand{\rotationMatrix}{\mathbf{R}}
-\newcommand{\sampleCovScalar}{s}
-\newcommand{\sampleCovVector}{\mathbf{ \sampleCovScalar}}
-\newcommand{\sampleCovMatrix}{\mathbf{s}}
-\newcommand{\scalarProduct}[2]{\left\langle{#1},{#2}\right\rangle}
-\newcommand{\sign}[1]{\text{sign}\left(#1\right)}
-\newcommand{\sigmoid}[1]{\sigma\left(#1\right)}
-\newcommand{\singularvalue}{\ell}
-\newcommand{\singularvalueMatrix}{\mathbf{L}}
-\newcommand{\singularvalueVector}{\mathbf{l}}
-\newcommand{\sorth}{\mathbf{u}}
-\newcommand{\spar}{\lambda}
-\newcommand{\trace}[1]{\text{tr}\left(#1\right)}
-\newcommand{\BasalRate}{B}
-\newcommand{\DampingCoefficient}{C}
-\newcommand{\DecayRate}{D}
-\newcommand{\Displacement}{X}
-\newcommand{\LatentForce}{F}
-\newcommand{\Mass}{M}
-\newcommand{\Sensitivity}{S}
-\newcommand{\basalRate}{b}
-\newcommand{\dampingCoefficient}{c}
-\newcommand{\mass}{m}
-\newcommand{\sensitivity}{s}
-\newcommand{\springScalar}{\kappa}
-\newcommand{\springVector}{\boldsymbol{ \kappa}}
-\newcommand{\springMatrix}{\boldsymbol{ \mathcal{K}}}
-\newcommand{\tfConcentration}{p}
-\newcommand{\tfDecayRate}{\delta}
-\newcommand{\tfMrnaConcentration}{f}
-\newcommand{\tfVector}{\mathbf{ \tfConcentration}}
-\newcommand{\velocity}{v}
-\newcommand{\sufficientStatsScalar}{g}
-\newcommand{\sufficientStatsVector}{\mathbf{ \sufficientStatsScalar}}
-\newcommand{\sufficientStatsMatrix}{\mathbf{G}}
-\newcommand{\switchScalar}{s}
-\newcommand{\switchVector}{\mathbf{ \switchScalar}}
-\newcommand{\switchMatrix}{\mathbf{S}}
-\newcommand{\tr}[1]{\text{tr}\left(#1\right)}
-\newcommand{\loneNorm}[1]{\left\Vert #1 \right\Vert_1}
-\newcommand{\ltwoNorm}[1]{\left\Vert #1 \right\Vert_2}
-\newcommand{\onenorm}[1]{\left\vert#1\right\vert_1}
-\newcommand{\twonorm}[1]{\left\Vert #1 \right\Vert}
-\newcommand{\vScalar}{v}
-\newcommand{\vVector}{\mathbf{v}}
-\newcommand{\vMatrix}{\mathbf{V}}
-\newcommand{\varianceDist}[2]{\text{var}_{#2}\left( #1 \right)}
-\newcommand{\vecb}[1]{\left(#1\right):}
-\newcommand{\weightScalar}{w}
-\newcommand{\weightVector}{\mathbf{ \weightScalar}}
-\newcommand{\weightMatrix}{\mathbf{W}}
-\newcommand{\weightedAdjacencyMatrix}{\mathbf{A}}
-\newcommand{\weightedAdjacencyScalar}{a}
-\newcommand{\weightedAdjacencyVector}{\mathbf{ \weightedAdjacencyScalar}}
-\newcommand{\onesVector}{\mathbf{1}}
-\newcommand{\zerosVector}{\mathbf{0}}
-\]
   <div class="reveal">
     <div class="slides">
 
 <section id="title-slide">
   <h1 class="title">Bayesian Methods</h1>
-  <p class="subtitle" style="text-align:center">Probabilistic Machine Learning</p>
-  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil D. Lawrence</a></p>
+  <p class="subtitle" style="text-align:center">Probabilistic Machine
+Learning</p>
+  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil
+D. Lawrence</a></p>
   <p class="author" style="text-align:center"><a href="https://sanmi.cs.illinois.edu/">Oluwasanmi Koyejo</a></p>
   <p class="date" style="text-align:center"><time>2018-11-14</time></p>
   <p class="venue" style="text-align:center">DSA, Abuja</p>
@@ -411,9 +124,6 @@ <h1 class="title">Bayesian Methods</h1>
 <!---->
 <!-- Do not edit this file locally. -->
 <!-- The last names to be defined. Should be defined entirely in terms of macros from above-->
-<!--
-
--->
 <!-- SECTION What is Machine Learning? -->
 </section>
 <section id="what-is-machine-learning" class="slide level2">
@@ -422,53 +132,76 @@ <h2>What is Machine Learning?</h2>
 <section id="what-is-machine-learning-1" class="slide level2">
 <h2>What is Machine Learning?</h2>
 <div class="fragment">
-<p><span class="math display">\[ \text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p><span class="math display">\[ \text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>data</strong> : observations, could be actively or passively acquired (meta-data).</li>
+<li><strong>data</strong> : observations, could be actively or passively
+acquired (meta-data).</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>model</strong> : assumptions, based on previous experience (other data! transfer learning etc), or beliefs about the regularities of the universe. Inductive bias.</li>
+<li><strong>model</strong> : assumptions, based on previous experience
+(other data! transfer learning etc), or beliefs about the regularities
+of the universe. Inductive bias.</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>prediction</strong> : an action to be taken or a categorization or a quality score.</li>
+<li><strong>prediction</strong> : an action to be taken or a
+categorization or a quality score.</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li>Royal Society Report: <a href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine Learning: Power and Promise of Computers that Learn by Example</a></li>
+<li>Royal Society Report: <a
+href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine
+Learning: Power and Promise of Computers that Learn by Example</a></li>
 </ul>
 </div>
 </section>
 <section id="what-is-machine-learning-2" class="slide level2">
 <h2>What is Machine Learning?</h2>
-<p><span class="math display">\[\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
 <ul>
 <li class="fragment">To combine data with a model need:</li>
-<li class="fragment"><strong>a prediction function</strong> <span class="math inline">\(f(\cdot)\)</span> includes our beliefs about the regularities of the universe</li>
-<li class="fragment"><strong>an objective function</strong> <span class="math inline">\(E(\cdot)\)</span> defines the cost of misprediction.</li>
+<li class="fragment"><strong>a prediction function</strong> <span
+class="math inline">\(f(\cdot)\)</span> includes our beliefs about the
+regularities of the universe</li>
+<li class="fragment"><strong>an objective function</strong> <span
+class="math inline">\(E(\cdot)\)</span> defines the cost of
+misprediction.</li>
 </ul>
-<!-- SECTION Nigerian NMIS Data -->
-</section>
-<section id="nigerian-nmis-data" class="slide level2">
-<h2>Nigerian NMIS Data</h2>
+<!-- SECTION Nigeria NMIS Data -->
 </section>
-<section id="nigerian-nmis-data-notebook" class="slide level2">
-<h2>Nigerian NMIS Data: Notebook</h2>
+<section id="nigeria-nmis-data" class="slide level2">
+<h2>Nigeria NMIS Data</h2>
 </section>
-<section id="exercise-1" class="slide level2">
-<h2>Exercise 1</h2>
-<p>Read on the internet about the following python libraries: <code>numpy</code>, <code>matplotlib</code>, <code>scipy</code> and <code>pandas</code>. What functionality does each provide python?</p>
+<section id="nigeria-nmis-data-notebook" class="slide level2">
+<h2>Nigeria NMIS Data: Notebook</h2>
+<div class="figure">
+<div id="nigerian-health-facilities-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/nigerian-health-facilities.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
+</div>
+</div>
+<aside class="notes">
+Location of the over thirty-four thousand health facilities registered
+in the NMIS data across Nigeria. Each facility plotted according to its
+latitude and longitude.
+</aside>
 <!-- SECTION Probabilities -->
 </section>
 <section id="probabilities" class="slide level2">
 <h2>Probabilities</h2>
 </section>
+<section id="exploring-the-nmis-data" class="slide level2">
+<h2>Exploring the NMIS Data</h2>
+</section>
 <section id="probability-and-the-nmis-data" class="slide level2">
 <h2>Probability and the NMIS Data</h2>
 <!-- SECTION Conditioning -->
@@ -476,15 +209,16 @@ <h2>Probability and the NMIS Data</h2>
 <section id="conditioning" class="slide level2">
 <h2>Conditioning</h2>
 </section>
-<section id="exercise-2" class="slide level2">
-<h2>Exercise 2</h2>
-<p>Write code that prints out the probability of nurses being greater than 2 for different numbers of doctors.</p>
-</section>
 <section id="probability-review" class="slide level2">
 <h2>Probability Review</h2>
 <ul>
-<li>We are interested in trials which result in two random variables, <span class="math inline">\(X\)</span> and <span class="math inline">\(Y\)</span>, each of which has an ‘outcome’ denoted by <span class="math inline">\(x\)</span> or <span class="math inline">\(y\)</span>.</li>
-<li>We summarise the notation and terminology for these distributions in the following table.</li>
+<li>We are interested in trials which result in two random variables,
+<span class="math inline">\(X\)</span> and <span
+class="math inline">\(Y\)</span>, each of which has an ‘outcome’ denoted
+by <span class="math inline">\(x\)</span> or <span
+class="math inline">\(y\)</span>.</li>
+<li>We summarise the notation and terminology for these distributions in
+the following table.</li>
 </ul>
 </section>
 <section id="section" class="slide level2">
@@ -519,20 +253,26 @@ <h2></h2>
 The different basic probability distributions.
 </center>
 </section>
-<section id="a-pictorial-definition-of-probability" class="slide level2">
+<section id="a-pictorial-definition-of-probability"
+class="slide level2">
 <h2>A Pictorial Definition of Probability</h2>
 <div class="figure">
 <div id="prob-diagram-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/mlai/prob_diagram.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//mlai/prob_diagram.svg" width="60%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Diagram representing the different probabilities, joint, marginal and conditional. This diagram was inspired by lectures given by Christopher Bishop.
+Diagram representing the different probabilities, joint, marginal and
+conditional. This diagram was inspired by lectures given by Christopher
+Bishop.
 </aside>
-<p><span style="text-align:right">Inspired by lectures from Christopher Bishop</span></p>
+<div style="text-align:right">
+Inspired by lectures from Christopher Bishop
+</div>
 </section>
-<section id="definition-of-probability-distributions" class="slide level2">
+<section id="definition-of-probability-distributions"
+class="slide level2">
 <h2>Definition of probability distributions</h2>
 <table>
 <colgroup>
@@ -550,17 +290,20 @@ <h2>Definition of probability distributions</h2>
 <tbody>
 <tr class="odd">
 <td>Joint Probability</td>
-<td><span class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{N}\)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{N}\)</span></td>
 <td><span class="math inline">\(P\left(X=3,Y=4\right)\)</span></td>
 </tr>
 <tr class="even">
 <td>Marginal Probability</td>
-<td><span class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=5}}{N}\)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=5}}{N}\)</span></td>
 <td><span class="math inline">\(P\left(X=5\right)\)</span></td>
 </tr>
 <tr class="odd">
 <td>Conditional Probability</td>
-<td><span class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{n_{Y=4}}\)</span></td>
+<td><span
+class="math inline">\(\lim_{N\rightarrow\infty}\frac{n_{X=3,Y=4}}{n_{Y=4}}\)</span></td>
 <td><span class="math inline">\(P\left(X=3\vert Y=4\right)\)</span></td>
 </tr>
 </tbody>
@@ -569,27 +312,43 @@ <h2>Definition of probability distributions</h2>
 <section id="notational-details" class="slide level2">
 <h2>Notational Details</h2>
 <ul>
-<li><p>Typically we should write out <span class="math inline">\(P\left(X=x,Y=y\right)\)</span>.</p></li>
-<li><p>In practice, we often use <span class="math inline">\(P\left(x,y\right)\)</span>.</p></li>
-<li><p>This looks very much like we might write a multivariate function, <em>e.g.</em> <span class="math inline">\(f\left(x,y\right)=\frac{x}{y}\)</span>.</p>
-<ul>
-<li>For a multivariate function though, <span class="math inline">\(f\left(x,y\right)\neq f\left(y,x\right)\)</span>.</li>
-<li>However <span class="math inline">\(P\left(x,y\right)=P\left(y,x\right)\)</span> because <span class="math inline">\(P\left(X=x,Y=y\right)=P\left(Y=y,X=x\right)\)</span>.</li>
+<li><p>Typically we should write out <span
+class="math inline">\(P\left(X=x,Y=y\right)\)</span>.</p></li>
+<li><p>In practice, we often use <span
+class="math inline">\(P\left(x,y\right)\)</span>.</p></li>
+<li><p>This looks very much like we might write a multivariate function,
+<em>e.g.</em> <span
+class="math inline">\(f\left(x,y\right)=\frac{x}{y}\)</span>.</p>
+<ul>
+<li>For a multivariate function though, <span
+class="math inline">\(f\left(x,y\right)\neq
+f\left(y,x\right)\)</span>.</li>
+<li>However <span
+class="math inline">\(P\left(x,y\right)=P\left(y,x\right)\)</span>
+because <span
+class="math inline">\(P\left(X=x,Y=y\right)=P\left(Y=y,X=x\right)\)</span>.</li>
 </ul></li>
 <li><p>We now quickly review the ‘rules of probability’.</p></li>
 </ul>
 </section>
 <section id="normalization" class="slide level2">
 <h2>Normalization</h2>
-<p><em>All</em> distributions are normalized. This is clear from the fact that <span class="math inline">\(\sum_{x}n_{x}=N\)</span>, which gives <span class="math display">\[\sum_{x}P\left(x\right)={\lim_{N\rightarrow\infty}}\frac{\sum_{x}n_{x}}{N}={\lim_{N\rightarrow\infty}}\frac{N}{N}=1.\]</span> A similar result can be derived for the marginal and conditional distributions.</p>
+<p><em>All</em> distributions are normalized. This is clear from the
+fact that <span class="math inline">\(\sum_{x}n_{x}=N\)</span>, which
+gives <span
+class="math display">\[\sum_{x}P\left(x\right)={\lim_{N\rightarrow\infty}}\frac{\sum_{x}n_{x}}{N}={\lim_{N\rightarrow\infty}}\frac{N}{N}=1.\]</span>
+A similar result can be derived for the marginal and conditional
+distributions.</p>
 </section>
 <section id="the-product-rule" class="slide level2">
 <h2>The Product Rule</h2>
 <ul>
-<li><span class="math inline">\(P\left(x|y\right)\)</span> is <span class="math display">\[
+<li><span class="math inline">\(P\left(x|y\right)\)</span> is <span
+class="math display">\[
 {\lim_{N\rightarrow\infty}}\frac{n_{x,y}}{n_{y}}.
 \]</span></li>
-<li><span class="math inline">\(P\left(x,y\right)\)</span> is <span class="math display">\[
+<li><span class="math inline">\(P\left(x,y\right)\)</span> is <span
+class="math display">\[
 {\lim_{N\rightarrow\infty}}\frac{n_{x,y}}{N}={\lim_{N\rightarrow\infty}}\frac{n_{x,y}}{n_{y}}\frac{n_{y}}{N}
 \]</span> or in other words <span class="math display">\[
 P\left(x,y\right)=P\left(x|y\right)P\left(y\right).
@@ -600,24 +359,27 @@ <h2>The Product Rule</h2>
 <h2>The Sum Rule</h2>
 <p>Ignoring the limit in our definitions:</p>
 <ul>
-<li><p>The marginal probability <span class="math inline">\(P\left(y\right)\)</span> is <span class="math inline">\({\lim_{N\rightarrow\infty}}\frac{n_{y}}{N}\)</span> .</p></li>
-<li><p>The joint distribution <span class="math inline">\(P\left(x,y\right)\)</span> is <span class="math inline">\({\lim_{N\rightarrow\infty}}\frac{n_{x,y}}{N}\)</span>.</p></li>
-<li><p><span class="math inline">\(n_{y}=\sum_{x}n_{x,y}\)</span> so <span class="math display">\[
+<li><p>The marginal probability <span
+class="math inline">\(P\left(y\right)\)</span> is <span
+class="math inline">\({\lim_{N\rightarrow\infty}}\frac{n_{y}}{N}\)</span>
+.</p></li>
+<li><p>The joint distribution <span
+class="math inline">\(P\left(x,y\right)\)</span> is <span
+class="math inline">\({\lim_{N\rightarrow\infty}}\frac{n_{x,y}}{N}\)</span>.</p></li>
+<li><p><span class="math inline">\(n_{y}=\sum_{x}n_{x,y}\)</span> so
+<span class="math display">\[
 {\lim_{N\rightarrow\infty}}\frac{n_{y}}{N}={\lim_{N\rightarrow\infty}}\sum_{x}\frac{n_{x,y}}{N},
 \]</span> in other words <span class="math display">\[
 P\left(y\right)=\sum_{x}P\left(x,y\right).
 \]</span> This is known as the sum rule of probability.</p></li>
 </ul>
 </section>
-<section id="exercise-3" class="slide level2">
-<h2>Exercise 3</h2>
-<p>Write code that computes <span class="math inline">\(P(y)\)</span> by adding <span class="math inline">\(P(y, x)\)</span> for all values of <span class="math inline">\(x\)</span>.</p>
-</section>
 <section id="bayes-rule" class="slide level2">
 <h2>Bayes’ Rule</h2>
 <ul>
 <li>From the product rule, <span class="math display">\[
-P\left(y,x\right)=P\left(x,y\right)=P\left(x|y\right)P\left(y\right),\]</span> so <span class="math display">\[
+P\left(y,x\right)=P\left(x,y\right)=P\left(x|y\right)P\left(y\right),\]</span>
+so <span class="math display">\[
 P\left(y|x\right)P\left(x\right)=P\left(x|y\right)P\left(y\right)
 \]</span> which leads to Bayes’ rule, <span class="math display">\[
 P\left(y|x\right)=\frac{P\left(x|y\right)P\left(y\right)}{P\left(x\right)}.
@@ -627,7 +389,10 @@ <h2>Bayes’ Rule</h2>
 <section id="bayes-theorem-example" class="slide level2">
 <h2>Bayes’ Theorem Example</h2>
 <ul>
-<li>There are two barrels in front of you. Barrel One contains 20 apples and 4 oranges. Barrel Two other contains 4 apples and 8 oranges. You choose a barrel randomly and select a fruit. It is an apple. What is the probability that the barrel was Barrel One?</li>
+<li>There are two barrels in front of you. Barrel One contains 20 apples
+and 4 oranges. Barrel Two other contains 4 apples and 8 oranges. You
+choose a barrel randomly and select a fruit. It is an apple. What is the
+probability that the barrel was Barrel One?</li>
 </ul>
 </section>
 <section id="bayes-rule-example-answer-i" class="slide level2">
@@ -644,12 +409,17 @@ <h2>Bayes’ Rule Example: Answer I</h2>
 <section id="bayes-rule-example-answer-ii" class="slide level2">
 <h2>Bayes’ Rule Example: Answer II</h2>
 <ul>
-<li>We use the sum rule to compute: <span class="math display">\[\begin{aligned}
-  P(\text{F}=\text{A}) = &amp; P(\text{F}=\text{A}|\text{B}=1)P(\text{B}=1) \\&amp; + P(\text{F}=\text{A}|\text{B}=2)P(\text{B}=2) \\
+<li>We use the sum rule to compute: <span
+class="math display">\[\begin{aligned}
+  P(\text{F}=\text{A}) = &amp;
+P(\text{F}=\text{A}|\text{B}=1)P(\text{B}=1) \\&amp; +
+P(\text{F}=\text{A}|\text{B}=2)P(\text{B}=2) \\
         = &amp; 20/24\times 0.5 + 4/12 \times 0.5 = 7/12
- \end{aligned}\]</span></li>
-<li>And Bayes’ rule tells us that: <span class="math display">\[\begin{aligned}
-  P(\text{B}=1|\text{F}=\text{A}) = &amp; \frac{P(\text{F} = \text{A}|\text{B}=1)P(\text{B}=1)}{P(\text{F}=\text{A})}\\ 
+\end{aligned}\]</span></li>
+<li>And Bayes’ rule tells us that: <span
+class="math display">\[\begin{aligned}
+  P(\text{B}=1|\text{F}=\text{A}) = &amp; \frac{P(\text{F} =
+\text{A}|\text{B}=1)P(\text{B}=1)}{P(\text{F}=\text{A})}\\
        = &amp; \frac{20/24 \times 0.5}{7/12} = 5/7
 \end{aligned}\]</span></li>
 </ul>
@@ -657,13 +427,15 @@ <h2>Bayes’ Rule Example: Answer II</h2>
 <section id="further-reading" class="slide level2 scrollable">
 <h2 class="scrollable">Further Reading</h2>
 <ul>
-<li>Probability distributions: page 12–17 (Section 1.2) of <span class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
+<li>Probability distributions: page 12–17 (Section 1.2) of <span
+class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
 </ul>
 </section>
 <section id="exercises" class="slide level2 scrollable">
 <h2 class="scrollable">Exercises</h2>
 <ul>
-<li>Exercise 1.3 of <span class="citation" data-cites="Bishop:book06">Bishop (2006)</span></li>
+<li>Exercise 1.3 of <span class="citation"
+data-cites="Bishop:book06">Bishop (2006)</span></li>
 </ul>
 </section>
 <section id="computing-expectations-example" class="slide level2">
@@ -694,8 +466,10 @@ <h2>Computing Expectations Example</h2>
 <ul>
 <li>What is the mean of the distribution?</li>
 <li>What is the standard deviation of the distribution?</li>
-<li>Are the mean and standard deviation representative of the distribution form?</li>
-<li>What is the expected value of <span class="math inline">\(-\log P(y)\)</span>?</li>
+<li>Are the mean and standard deviation representative of the
+distribution form?</li>
+<li>What is the expected value of <span class="math inline">\(-\log
+P(y)\)</span>?</li>
 </ul>
 </section>
 <section id="expectations-example-answer" class="slide level2">
@@ -738,10 +512,14 @@ <h2>Expectations Example: Answer</h2>
 </tbody>
 </table>
 <ul>
-<li>Mean: <span class="math inline">\(1\times 0.3 + 2\times 0.2 + 3 \times 0.1 + 4 \times 0.4 = 2.6\)</span></li>
-<li>Second moment: <span class="math inline">\(1 \times 0.3 + 4 \times 0.2 + 9 \times 0.1 + 16 \times 0.4 = 8.4\)</span></li>
-<li>Variance: <span class="math inline">\(8.4 - 2.6\times 2.6 = 1.64\)</span></li>
-<li>Standard deviation: <span class="math inline">\(\sqrt{1.64} = 1.2806\)</span></li>
+<li>Mean: <span class="math inline">\(1\times 0.3 + 2\times 0.2 + 3
+\times 0.1 + 4 \times 0.4 = 2.6\)</span></li>
+<li>Second moment: <span class="math inline">\(1 \times 0.3 + 4 \times
+0.2 + 9 \times 0.1 + 16 \times 0.4 = 8.4\)</span></li>
+<li>Variance: <span class="math inline">\(8.4 - 2.6\times 2.6 =
+1.64\)</span></li>
+<li>Standard deviation: <span class="math inline">\(\sqrt{1.64} =
+1.2806\)</span></li>
 </ul>
 </section>
 <section id="expectations-example-answer-ii" class="slide level2">
@@ -784,13 +562,16 @@ <h2>Expectations Example: Answer II</h2>
 </tbody>
 </table>
 <ul>
-<li>Expectation <span class="math inline">\(-\log(P(y))\)</span>: <span class="math inline">\(0.3\times 1.204 + 0.2\times 1.609 + 0.1\times 2.302 +0.4\times 0.916 = 1.280\)</span></li>
+<li>Expectation <span class="math inline">\(-\log(P(y))\)</span>: <span
+class="math inline">\(0.3\times 1.204 + 0.2\times 1.609 + 0.1\times
+2.302 +0.4\times 0.916 = 1.280\)</span></li>
 </ul>
 </section>
 <section id="sample-based-approximation-example" class="slide level2">
 <h2>Sample Based Approximation Example</h2>
 <ul>
-<li><p>You are given the following values samples of heights of students,</p>
+<li><p>You are given the following values samples of heights of
+students,</p>
 <table>
 <thead>
 <tr class="header">
@@ -817,10 +598,12 @@ <h2>Sample Based Approximation Example</h2>
 </table></li>
 <li><p>What is the sample mean?</p></li>
 <li><p>What is the sample variance?</p></li>
-<li><p>Can you compute sample approximation expected value of <span class="math inline">\(-\log P(y)\)</span>?</p></li>
+<li><p>Can you compute sample approximation expected value of <span
+class="math inline">\(-\log P(y)\)</span>?</p></li>
 </ul>
 </section>
-<section id="sample-based-approximation-example-answer" class="slide level2">
+<section id="sample-based-approximation-example-answer"
+class="slide level2">
 <h2>Sample Based Approximation Example: Answer</h2>
 <ul>
 <li>We can compute:</li>
@@ -859,17 +642,21 @@ <h2>Sample Based Approximation Example: Answer</h2>
 </tbody>
 </table>
 <ul>
-<li>Mean: <span class="math inline">\(\frac{1.76 + 1.73 + 1.79 + 1.81 + 1.85 + 1.80}{6} = 1.79\)</span></li>
-<li>Second moment: $  = 3.2055$</li>
-<li>Variance: <span class="math inline">\(3.2055 - 1.79\times1.79 = 1.43\times 10^{-3}\)</span></li>
+<li>Mean: <span class="math inline">\(\frac{1.76 + 1.73 + 1.79 + 1.81 +
+1.85 + 1.80}{6} = 1.79\)</span></li>
+<li>Second moment: $ = 3.2055$</li>
+<li>Variance: <span class="math inline">\(3.2055 - 1.79\times1.79 =
+1.43\times 10^{-3}\)</span></li>
 <li>Standard deviation: <span class="math inline">\(0.0379\)</span></li>
-<li>No, you can’t compute it. You don’t have access to <span class="math inline">\(P(y)\)</span> directly.</li>
+<li>No, you can’t compute it. You don’t have access to <span
+class="math inline">\(P(y)\)</span> directly.</li>
 </ul>
 </section>
 <section id="sample-based-approximation-example-1" class="slide level2">
 <h2>Sample Based Approximation Example</h2>
 <ul>
-<li><p>You are given the following values samples of heights of students,</p>
+<li><p>You are given the following values samples of heights of
+students,</p>
 <table>
 <thead>
 <tr class="header">
@@ -894,51 +681,65 @@ <h2>Sample Based Approximation Example</h2>
 </tr>
 </tbody>
 </table></li>
-<li><p>Actually these “data” were sampled from a Gaussian with mean 1.7 and standard deviation 0.15. Are your estimates close to the real values? If not why not?</p></li>
+<li><p>Actually these “data” were sampled from a Gaussian with mean 1.7
+and standard deviation 0.15. Are your estimates close to the real
+values? If not why not?</p></li>
 </ul>
 </section>
-<section id="exercise-1-1" class="slide level2">
-<h2>Exercise 1</h2>
-<p>Now we see we have several additional features. Let’s assume we want to predict <code>maternal_health_delivery_services</code>. How would we go about doing it?</p>
-<p>Using what you’ve learnt about joint, conditional and marginal probabilities, as well as the sum and product rule, how would you formulate the question you want to answer in terms of probabilities? Should you be using a joint or a conditional distribution? If it’s conditional, what should the distribution be over, and what should it be conditioned on?</p>
-</section>
 <section id="probabilistic-modelling" class="slide level2">
 <h2>Probabilistic Modelling</h2>
 <ul>
 <li>Probabilistically we want, <span class="math display">\[
 p(y_*|\mathbf{ y}, \mathbf{X}, \mathbf{ x}_*),
-\]</span> <span class="math inline">\(y_*\)</span> is a test output <span class="math inline">\(\mathbf{ x}_*\)</span> is a test input <span class="math inline">\(\mathbf{X}\)</span> is a training input matrix <span class="math inline">\(\mathbf{ y}\)</span> is training outputs</li>
+\]</span> <span class="math inline">\(y_*\)</span> is a test output
+<span class="math inline">\(\mathbf{ x}_*\)</span> is a test input <span
+class="math inline">\(\mathbf{X}\)</span> is a training input matrix
+<span class="math inline">\(\mathbf{ y}\)</span> is training
+outputs</li>
 </ul>
 </section>
 <section id="joint-model-of-world" class="slide level2">
 <h2>Joint Model of World</h2>
 <p><span class="math display">\[
-p(y_*|\mathbf{ y}, \mathbf{X}, \mathbf{ x}_*) = \int p(y_*|\mathbf{ x}_*, \mathbf{W}) p(\mathbf{W}| \mathbf{ y}, \mathbf{X}) \text{d} \mathbf{W}
+p(y_*|\mathbf{ y}, \mathbf{X}, \mathbf{ x}_*) = \int p(y_*|\mathbf{
+x}_*, \mathbf{W}) p(\mathbf{W}| \mathbf{ y}, \mathbf{X}) \text{d}
+\mathbf{W}
 \]</span></p>
 <div class="fragment">
-<p><span class="math inline">\(\mathbf{W}\)</span> contains <span class="math inline">\(\mathbf{W}_1\)</span> and <span class="math inline">\(\mathbf{W}_2\)</span></p>
-<p><span class="math inline">\(p(\mathbf{W}| \mathbf{ y}, \mathbf{X})\)</span> is posterior density</p>
+<p><span class="math inline">\(\mathbf{W}\)</span> contains <span
+class="math inline">\(\mathbf{W}_1\)</span> and <span
+class="math inline">\(\mathbf{W}_2\)</span></p>
+<p><span class="math inline">\(p(\mathbf{W}| \mathbf{ y},
+\mathbf{X})\)</span> is posterior density</p>
 </div>
 </section>
 <section id="likelihood" class="slide level2">
 <h2>Likelihood</h2>
-<p><span class="math inline">\(p(y|\mathbf{ x}, \mathbf{W})\)</span> is the <em>likelihood</em> of data point</p>
+<p><span class="math inline">\(p(y|\mathbf{ x}, \mathbf{W})\)</span> is
+the <em>likelihood</em> of data point</p>
 <div class="fragment">
 <p>Normally assume independence: <span class="math display">\[
-p(\mathbf{ y}|\mathbf{X}, \mathbf{W}) = \prod_{i=1}^np(y_i|\mathbf{ x}_i, \mathbf{W}),\]</span></p>
+p(\mathbf{ y}|\mathbf{X}, \mathbf{W}) = \prod_{i=1}^np(y_i|\mathbf{
+x}_i, \mathbf{W}),\]</span></p>
 </div>
 </section>
 <section id="likelihood-and-prediction-function" class="slide level2">
 <h2>Likelihood and Prediction Function</h2>
 <p><span class="math display">\[
-p(y_i | f(\mathbf{ x}_i)) = \frac{1}{\sqrt{2\pi \sigma^2}} \exp\left(-\frac{\left(y_i - f(\mathbf{ x}_i)\right)^2}{2\sigma^2}\right)
+p(y_i | f(\mathbf{ x}_i)) = \frac{1}{\sqrt{2\pi \sigma^2}}
+\exp\left(-\frac{\left(y_i - f(\mathbf{
+x}_i)\right)^2}{2\sigma^2}\right)
 \]</span></p>
 </section>
 <section id="unsupervised-learning" class="slide level2">
 <h2>Unsupervised Learning</h2>
 <ul>
-<li><p>Can also consider priors over latents <span class="math display">\[
-p(\mathbf{ y}_*|\mathbf{ y}) = \int p(\mathbf{ y}_*|\mathbf{X}_*, \mathbf{W}) p(\mathbf{W}| \mathbf{ y}, \mathbf{X}) p(\mathbf{X}) p(\mathbf{X}_*) \text{d} \mathbf{W}\text{d} \mathbf{X}\text{d}\mathbf{X}_*
+<li><p>Can also consider priors over latents <span
+class="math display">\[
+p(\mathbf{ y}_*|\mathbf{ y}) = \int p(\mathbf{ y}_*|\mathbf{X}_*,
+\mathbf{W}) p(\mathbf{W}| \mathbf{ y}, \mathbf{X}) p(\mathbf{X})
+p(\mathbf{X}_*) \text{d} \mathbf{W}\text{d}
+\mathbf{X}\text{d}\mathbf{X}_*
 \]</span></p></li>
 <li><p>This gives <em>unsupervised learning</em>.</p></li>
 </ul>
@@ -947,39 +748,50 @@ <h2>Unsupervised Learning</h2>
 <h2>Probabilistic Inference</h2>
 <ul>
 <li><p>Data: <span class="math inline">\(\mathbf{ y}\)</span></p></li>
-<li><p>Model: <span class="math inline">\(p(\mathbf{ y}, \mathbf{ y}^*)\)</span></p></li>
-<li><p>Prediction: <span class="math inline">\(p(\mathbf{ y}^*| \mathbf{ y})\)</span></p></li>
+<li><p>Model: <span class="math inline">\(p(\mathbf{ y}, \mathbf{
+y}^*)\)</span></p></li>
+<li><p>Prediction: <span class="math inline">\(p(\mathbf{ y}^*| \mathbf{
+y})\)</span></p></li>
 </ul>
 </section>
 <section id="graphical-models" class="slide level2">
 <h2>Graphical Models</h2>
 <ul>
-<li>Represent joint distribution through <em>conditional dependencies</em>.</li>
+<li>Represent joint distribution through <em>conditional
+dependencies</em>.</li>
 <li>E.g. Markov chain</li>
 </ul>
-<p><span class="math display">\[p(\mathbf{ y}) = p(y_n| y_{n-1}) p(y_{n-1}|y_{n-2}) \dots p(y_{2} | y_{1})\]</span></p>
+<p><span class="math display">\[p(\mathbf{ y}) = p(y_n| y_{n-1})
+p(y_{n-1}|y_{n-2}) \dots p(y_{2} | y_{1})\]</span></p>
 <div class="figure">
 <div id="markov-chain-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/markov.svg" width="50%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/markov.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-A Markov chain is a simple form of probabilistic graphical model providing a particular decomposition of the joint density.
+A Markov chain is a simple form of probabilistic graphical model
+providing a particular decomposition of the joint density.
 </aside>
 </section>
 <section id="section-1" class="slide level2">
 <h2></h2>
-<p>Predict Perioperative Risk of Clostridium Difficile Infection Following Colon Surgery <span class="citation" data-cites="Steele:predictive12">(Steele et al., 2012)</span></p>
+<p>Predict Perioperative Risk of Clostridium Difficile Infection
+Following Colon Surgery <span class="citation"
+data-cites="Steele:predictive12">(Steele et al., 2012)</span></p>
 <div class="figure">
 <div id="c-difficile-bayes-net-diagnosis-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/bayes-net-diagnosis.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//bayes-net-diagnosis.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-A probabilistic directed graph used to predict the perioperative risk of <em>C Difficile</em> infection following colon surgery. When these models have good predictive performance they are often difficult to interpret. This may be due to the limited representation capability of the conditional densities in the model.
+A probabilistic directed graph used to predict the perioperative risk of
+<em>C Difficile</em> infection following colon surgery. When these
+models have good predictive performance they are often difficult to
+interpret. This may be due to the limited representation capability of
+the conditional densities in the model.
 </aside>
 </section>
 <section id="introduction-to-classification" class="slide level2">
@@ -988,39 +800,64 @@ <h2>Introduction to Classification</h2>
 <section id="classification" class="slide level2">
 <h2>Classification</h2>
 <ul>
-<li><p><em>Wake word</em> classification (<a href="https://radio.unglobalpulse.net/uganda/">Global Pulse Project</a>).</p></li>
-<li><p>Breakthrough in 2012 with ImageNet result of <a href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex Krizhevsky, Ilya Sutskever and Geoff Hinton</a></p></li>
-<li><p>We are given a data set containing ‘inputs’, <span class="math inline">\(\mathbf{X}\)</span> and ‘targets’, <span class="math inline">\(\mathbf{ y}\)</span>.</p></li>
-<li><p>Each data point consists of an input vector <span class="math inline">\(\mathbf{ x}_i\)</span> and a class label, <span class="math inline">\(y_i\)</span>.</p></li>
-<li><p>For binary classification assume <span class="math inline">\(y_i\)</span> should be either <span class="math inline">\(1\)</span> (yes) or <span class="math inline">\(-1\)</span> (no).</p></li>
+<li><p><em>Wake word</em> classification (<a
+href="https://radio.unglobalpulse.net/uganda/">Global Pulse
+Project</a>).</p></li>
+<li><p>Breakthrough in 2012 with ImageNet result of <a
+href="http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-">Alex
+Krizhevsky, Ilya Sutskever and Geoff Hinton</a></p></li>
+<li><p>We are given a data set containing ‘inputs’, <span
+class="math inline">\(\mathbf{X}\)</span> and ‘targets’, <span
+class="math inline">\(\mathbf{ y}\)</span>.</p></li>
+<li><p>Each data point consists of an input vector <span
+class="math inline">\(\mathbf{ x}_i\)</span> and a class label, <span
+class="math inline">\(y_i\)</span>.</p></li>
+<li><p>For binary classification assume <span
+class="math inline">\(y_i\)</span> should be either <span
+class="math inline">\(1\)</span> (yes) or <span
+class="math inline">\(-1\)</span> (no).</p></li>
 <li><p>Input vector can be thought of as features.</p></li>
 </ul>
 </section>
 <section id="discrete-probability" class="slide level2">
 <h2>Discrete Probability</h2>
 <ul>
-<li>Algorithms based on <em>prediction</em> function and <em>objective</em> function.</li>
-<li>For regression the <em>codomain</em> of the functions, <span class="math inline">\(f(\mathbf{X})\)</span> was the real numbers or sometimes real vectors.</li>
-<li>In classification we are given an input vector, <span class="math inline">\(\mathbf{ x}\)</span>, and an associated label, <span class="math inline">\(y\)</span> which either takes the value <span class="math inline">\(-1\)</span> or <span class="math inline">\(1\)</span>.</li>
+<li>Algorithms based on <em>prediction</em> function and
+<em>objective</em> function.</li>
+<li>For regression the <em>codomain</em> of the functions, <span
+class="math inline">\(f(\mathbf{X})\)</span> was the real numbers or
+sometimes real vectors.</li>
+<li>In classification we are given an input vector, <span
+class="math inline">\(\mathbf{ x}\)</span>, and an associated label,
+<span class="math inline">\(y\)</span> which either takes the value
+<span class="math inline">\(-1\)</span> or <span
+class="math inline">\(1\)</span>.</li>
 </ul>
 </section>
 <section id="classification-1" class="slide level2">
 <h2>Classification</h2>
 <ul>
-<li>Inputs, <span class="math inline">\(\mathbf{ x}\)</span>, mapped to a label, <span class="math inline">\(y\)</span>, through a function <span class="math inline">\(f(\cdot)\)</span> dependent on parameters, <span class="math inline">\(\mathbf{ w}\)</span>, <span class="math display">\[
+<li>Inputs, <span class="math inline">\(\mathbf{ x}\)</span>, mapped to
+a label, <span class="math inline">\(y\)</span>, through a function
+<span class="math inline">\(f(\cdot)\)</span> dependent on parameters,
+<span class="math inline">\(\mathbf{ w}\)</span>, <span
+class="math display">\[
 y= f(\mathbf{ x}; \mathbf{ w}).
 \]</span></li>
-<li><span class="math inline">\(f(\cdot)\)</span> is known as the <em>prediction function</em>.</li>
+<li><span class="math inline">\(f(\cdot)\)</span> is known as the
+<em>prediction function</em>.</li>
 </ul>
 </section>
 <section id="classification-examples" class="slide level2">
 <h2>Classification Examples</h2>
 <ul>
-<li>Classifiying hand written digits from binary images (automatic zip code reading)</li>
+<li>Classifiying hand written digits from binary images (automatic zip
+code reading)</li>
 <li>Detecting faces in images (e.g. digital cameras).</li>
 <li>Who a detected face belongs to (e.g. Facebook, DeepFace)</li>
 <li>Classifying type of cancer given gene expression data.</li>
-<li>Categorization of document types (different types of news article on the internet)</li>
+<li>Categorization of document types (different types of news article on
+the internet)</li>
 </ul>
 </section>
 <section id="reminder-on-the-term-bayesian" class="slide level2">
@@ -1029,16 +866,22 @@ <h2>Reminder on the Term “Bayesian”</h2>
 <li>We use Bayes’ rule to invert probabilities in the Bayesian approach.
 <ul>
 <li>Bayesian is not named after Bayes’ rule (v. common confusion).</li>
-<li>The term Bayesian refers to the treatment of the parameters as stochastic variables.</li>
-<li>Proposed by <span class="citation" data-cites="Laplace:memoire74">Laplace (1774)</span> and <span class="citation" data-cites="Bayes:doctrine63">Bayes (1763)</span> independently.</li>
-<li>For early statisticians this was very controversial (Fisher et al).</li>
+<li>The term Bayesian refers to the treatment of the parameters as
+stochastic variables.</li>
+<li>Proposed by <span class="citation"
+data-cites="Laplace:memoire74">Laplace (1774)</span> and <span
+class="citation" data-cites="Bayes:doctrine63">Bayes (1763)</span>
+independently.</li>
+<li>For early statisticians this was very controversial (Fisher et
+al).</li>
 </ul></li>
 </ul>
 </section>
 <section id="reminder-on-the-term-bayesian-1" class="slide level2">
 <h2>Reminder on the Term “Bayesian”</h2>
 <ul>
-<li>The use of Bayes’ rule does <em>not</em> imply you are being Bayesian.
+<li>The use of Bayes’ rule does <em>not</em> imply you are being
+Bayesian.
 <ul>
 <li>It is just an application of the product rule of probability.</li>
 </ul></li>
@@ -1047,27 +890,37 @@ <h2>Reminder on the Term “Bayesian”</h2>
 <section id="bernoulli-distribution" class="slide level2">
 <h2>Bernoulli Distribution</h2>
 <ul>
-<li>Binary classification: need a probability distribution for discrete variables.</li>
-<li>Discrete probability is in some ways easier: <span class="math inline">\(P(y=1) = \pi\)</span> &amp; specify distribution as a table.</li>
-<li>Instead of <span class="math inline">\(y=-1\)</span> for negative class we take <span class="math inline">\(y=0\)</span>.</li>
+<li>Binary classification: need a probability distribution for discrete
+variables.</li>
+<li>Discrete probability is in some ways easier: <span
+class="math inline">\(P(y=1) = \pi\)</span> &amp; specify distribution
+as a table.</li>
+<li>Instead of <span class="math inline">\(y=-1\)</span> for negative
+class we take <span class="math inline">\(y=0\)</span>.</li>
 </ul>
 <table>
 <thead>
 <tr class="header">
-<th style="text-align: center;"><span class="math inline">\(y\)</span></th>
+<th style="text-align: center;"><span
+class="math inline">\(y\)</span></th>
 <th style="text-align: center;">0</th>
 <th style="text-align: center;">1</th>
 </tr>
 </thead>
 <tbody>
 <tr class="odd">
-<td style="text-align: center;"><span class="math inline">\(P(y)\)</span></td>
-<td style="text-align: center;"><span class="math inline">\((1-\pi)\)</span></td>
-<td style="text-align: center;"><span class="math inline">\(\pi\)</span></td>
+<td style="text-align: center;"><span
+class="math inline">\(P(y)\)</span></td>
+<td style="text-align: center;"><span
+class="math inline">\((1-\pi)\)</span></td>
+<td style="text-align: center;"><span
+class="math inline">\(\pi\)</span></td>
 </tr>
 </tbody>
 </table>
-<p>This is the <a href="http://en.wikipedia.org/wiki/Bernoulli_distribution">Bernoulli distribution</a>.</p>
+<p>This is the <a
+href="http://en.wikipedia.org/wiki/Bernoulli_distribution">Bernoulli
+distribution</a>.</p>
 </section>
 <section id="mathematical-switch" class="slide level2">
 <h2>Mathematical Switch</h2>
@@ -1075,47 +928,63 @@ <h2>Mathematical Switch</h2>
 <li><p>The Bernoulli distribution <span class="math display">\[
 P(y) = \pi^y(1-\pi)^{(1-y)}
 \]</span></p></li>
-<li><p>Is a clever trick for switching probabilities, as code it would be</p></li>
+<li><p>Is a clever trick for switching probabilities, as code it would
+be</p></li>
 </ul>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="kw">def</span> bernoulli(y_i, pi):</span>
-<span id="cb1-2"><a href="#cb1-2"></a>    <span class="cf">if</span> y_i <span class="op">==</span> <span class="dv">1</span>:</span>
-<span id="cb1-3"><a href="#cb1-3"></a>        <span class="cf">return</span> pi</span>
-<span id="cb1-4"><a href="#cb1-4"></a>    <span class="cf">else</span>:</span>
-<span id="cb1-5"><a href="#cb1-5"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> bernoulli(y_i, pi):</span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> y_i <span class="op">==</span> <span class="dv">1</span>:</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> pi</span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">else</span>:</span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="dv">1</span><span class="op">-</span>pi</span></code></pre></div>
 </section>
 <section id="jacob-bernoullis-bernoulli" class="slide level2">
 <h2>Jacob Bernoulli’s Bernoulli</h2>
 <ul>
-<li>Bernoulli described the Bernoulli distribution in terms of an ‘urn’ filled with balls.</li>
-<li>There are red and black balls. There is a fixed number of balls in the urn.</li>
-<li>The portion of red balls is given by <span class="math inline">\(\pi\)</span>.</li>
-<li>For this reason in Bernoulli’s distribution there is <em>epistemic</em> uncertainty about the distribution parameter.</li>
+<li>Bernoulli described the Bernoulli distribution in terms of an ‘urn’
+filled with balls.</li>
+<li>There are red and black balls. There is a fixed number of balls in
+the urn.</li>
+<li>The portion of red balls is given by <span
+class="math inline">\(\pi\)</span>.</li>
+<li>For this reason in Bernoulli’s distribution there is
+<em>epistemic</em> uncertainty about the distribution parameter.</li>
 </ul>
 </section>
 <section id="section-2" class="slide level2">
 <h2></h2>
-<p><a href="https://play.google.com/books/reader?id=CF4UAAAAQAAJ&amp;pg=PA87"><img data-src="../slides/diagrams/books/CF4UAAAAQAAJ-PA87.png" /></a></p>
+<div class="centered" style="">
+<a
+href="https://play.google.com/books/reader?id=CF4UAAAAQAAJ&amp;pg=PA87"><img
+data-src="https://mlatcl.github.io/dsa/./slides/diagrams//books/CF4UAAAAQAAJ-PA87.png" /></a>
+</div>
 </section>
 <section id="jacob-bernoullis-bernoulli-1" class="slide level2">
 <h2>Jacob Bernoulli’s Bernoulli</h2>
 <div class="figure">
 <div id="bernoulli-urn-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/bernoulli-urn.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bernoulli-urn.svg" width="40%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Jacob Bernoulli described the Bernoulli distribution through an urn in which there are black and red balls.
+Jacob Bernoulli described the Bernoulli distribution through an urn in
+which there are black and red balls.
 </aside>
 </section>
 <section id="thomas-bayess-bernoulli" class="slide level2">
 <h2>Thomas Bayes’s Bernoulli</h2>
 <ul>
-<li>Bayes described the Bernoulli distribution (he didn’t call it that!) in terms of a table and two balls.</li>
-<li>Each ball is rolled so it comes to rest at a uniform distribution across the table.</li>
-<li>The first ball comes to rest at a position that is a <span class="math inline">\(\pi\)</span> times the width of table.</li>
-<li>After placing the first ball you consider whether a second would land to the left or the right.</li>
-<li>For this reason in Bayes’s distribution there is considered to be <em>aleatoric</em> uncertainty about the distribution parameter.</li>
+<li>Bayes described the Bernoulli distribution (he didn’t call it that!)
+in terms of a table and two balls.</li>
+<li>Each ball is rolled so it comes to rest at a uniform distribution
+across the table.</li>
+<li>The first ball comes to rest at a position that is a <span
+class="math inline">\(\pi\)</span> times the width of table.</li>
+<li>After placing the first ball you consider whether a second would
+land to the left or the right.</li>
+<li>For this reason in Bayes’s distribution there is considered to be
+<em>aleatoric</em> uncertainty about the distribution parameter.</li>
 </ul>
 </section>
 <section id="thomas-bayes-bernoulli" class="slide level2">
@@ -1123,83 +992,59 @@ <h2>Thomas Bayes’ Bernoulli</h2>
 <script>
 showDivs(1, 'bayes_billiard');
 </script>
-<p><small></small> <input id="range-bayes_billiard" type="range" min="1" max="10" value="1" onchange="setDivs('bayes_billiard')" oninput="setDivs('bayes_billiard')"> <button onclick="plusDivs(-1, 'bayes_billiard')">❮</button> <button onclick="plusDivs(1, 'bayes_billiard')">❯</button></p>
-</section>
-<section id="section-3" class="slide level2">
-<h2></h2>
+<p><small></small>
+<input id="range-bayes_billiard" type="range" min="1" max="10" value="1" onchange="setDivs('bayes_billiard')" oninput="setDivs('bayes_billiard')">
+<button onclick="plusDivs(-1, 'bayes_billiard')">❮</button>
+<button onclick="plusDivs(1, 'bayes_billiard')">❯</button></p>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard000.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard000.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-4" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard001.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard001.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-5" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard002.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard002.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-6" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard003.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard003.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-7" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard004.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard004.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-8" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard005.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard005.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-9" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard006.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard006.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-10" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard007.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard007.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-11" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard008.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard008.svg" width="40%" style=" ">
 </object>
 </div>
-</section>
-<section id="section-12" class="slide level2">
-<h2></h2>
 <div class="bayes_billiard" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/bayes-billiard009.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/bayes-billiard009.svg" width="40%" style=" ">
 </object>
 </div>
 </section>
 <section id="maximum-likelihood-in-the-bernoulli" class="slide level2">
 <h2>Maximum Likelihood in the Bernoulli</h2>
 <ul>
-<li>Assume data, <span class="math inline">\(\mathbf{ y}\)</span> is binary vector length <span class="math inline">\(n\)</span>.</li>
-<li>Assume each value was sampled independently from the Bernoulli distribution, given probability <span class="math inline">\(\pi\)</span> <span class="math display">\[
+<li>Assume data, <span class="math inline">\(\mathbf{ y}\)</span> is
+binary vector length <span class="math inline">\(n\)</span>.</li>
+<li>Assume each value was sampled independently from the Bernoulli
+distribution, given probability <span class="math inline">\(\pi\)</span>
+<span class="math display">\[
 p(\mathbf{ y}|\pi) = \prod_{i=1}^{n} \pi^{y_i} (1-\pi)^{1-y_i}.
 \]</span></li>
 </ul>
@@ -1207,34 +1052,48 @@ <h2>Maximum Likelihood in the Bernoulli</h2>
 <section id="negative-log-likelihood" class="slide level2">
 <h2>Negative Log Likelihood</h2>
 <ul>
-<li>Minimize the negative log likelihood <span class="math display">\[\begin{align*}
-E(\pi)&amp; = -\log p(\mathbf{ y}|\pi)\\ 
-                   &amp; = -\sum_{i=1}^{n} y_i \log \pi - \sum_{i=1}^{n} (1-y_i) \log(1-\pi),
+<li>Minimize the negative log likelihood <span
+class="math display">\[\begin{align*}
+E(\pi)&amp; = -\log p(\mathbf{ y}|\pi)\\
+                   &amp; = -\sum_{i=1}^{n} y_i \log \pi - \sum_{i=1}^{n}
+(1-y_i) \log(1-\pi),
 \end{align*}\]</span></li>
-<li>Take gradient with respect to the parameter <span class="math inline">\(\pi\)</span>. <span class="math display">\[\frac{\text{d}E(\pi)}{\text{d}\pi} = -\frac{\sum_{i=1}^{n} y_i}{\pi}  + \frac{\sum_{i=1}^{n} (1-y_i)}{1-\pi},\]</span></li>
+<li>Take gradient with respect to the parameter <span
+class="math inline">\(\pi\)</span>. <span
+class="math display">\[\frac{\text{d}E(\pi)}{\text{d}\pi} =
+-\frac{\sum_{i=1}^{n} y_i}{\pi}  + \frac{\sum_{i=1}^{n}
+(1-y_i)}{1-\pi},\]</span></li>
 </ul>
 </section>
 <section id="fixed-point" class="slide level2">
 <h2>Fixed Point</h2>
 <ul>
-<li><p>Stationary point: set derivative to zero <span class="math display">\[0 = -\frac{\sum_{i=1}^{n} y_i}{\pi}  + \frac{\sum_{i=1}^{n} (1-y_i)}{1-\pi},\]</span></p></li>
-<li><p>Rearrange to form <span class="math display">\[(1-\pi)\sum_{i=1}^{n} y_i =   \pi\sum_{i=1}^{n} (1-y_i),\]</span></p></li>
-<li><p>Giving <span class="math display">\[\sum_{i=1}^{n} y_i =   \pi\left(\sum_{i=1}^{n} (1-y_i) + \sum_{i=1}^{n} y_i\right),\]</span></p></li>
+<li><p>Stationary point: set derivative to zero <span
+class="math display">\[0 = -\frac{\sum_{i=1}^{n} y_i}{\pi}  +
+\frac{\sum_{i=1}^{n} (1-y_i)}{1-\pi},\]</span></p></li>
+<li><p>Rearrange to form <span
+class="math display">\[(1-\pi)\sum_{i=1}^{n} y_i =   \pi\sum_{i=1}^{n}
+(1-y_i),\]</span></p></li>
+<li><p>Giving <span class="math display">\[\sum_{i=1}^{n} y_i
+=   \pi\left(\sum_{i=1}^{n} (1-y_i) + \sum_{i=1}^{n}
+y_i\right),\]</span></p></li>
 </ul>
 </section>
 <section id="solution" class="slide level2">
 <h2>Solution</h2>
 <ul>
-<li><p>Recognise that <span class="math inline">\(\sum_{i=1}^{n} (1-y_i) + \sum_{i=1}^{n} y_i = n\)</span> so we have <span class="math display">\[\pi = \frac{\sum_{i=1}^{n} y_i}{n}\]</span></p></li>
-<li><p>Estimate the probability associated with the Bernoulli by setting it to the number of observed positives, divided by the total length of <span class="math inline">\(y\)</span>.</p></li>
+<li><p>Recognise that <span class="math inline">\(\sum_{i=1}^{n} (1-y_i)
++ \sum_{i=1}^{n} y_i = n\)</span> so we have <span
+class="math display">\[\pi = \frac{\sum_{i=1}^{n}
+y_i}{n}\]</span></p></li>
+<li><p>Estimate the probability associated with the Bernoulli by setting
+it to the number of observed positives, divided by the total length of
+<span class="math inline">\(y\)</span>.</p></li>
 <li><p>Makes intiutive sense.</p></li>
-<li><p>What’s your best guess of probability for coin toss is heads when you get 47 heads from 100 tosses?</p></li>
+<li><p>What’s your best guess of probability for coin toss is heads when
+you get 47 heads from 100 tosses?</p></li>
 </ul>
 </section>
-<section id="exercise-4" class="slide level2">
-<h2>Exercise 4</h2>
-<p>Show that the maximum likelihood solution we have found is a <em>minimum</em> for our objective.</p>
-</section>
 <section id="bayes-rule-reminder" class="slide level2">
 <h2>Bayes’ Rule Reminder</h2>
 <p><span class="math display">\[
@@ -1252,19 +1111,30 @@ <h2>Bayes’ Rule Reminder</h2>
 <section id="naive-bayes-classifiers" class="slide level2">
 <h2>Naive Bayes Classifiers</h2>
 <ul>
-<li><p>Probabilistic Machine Learning: place probability distributions (or densities) over all the variables of interest.</p></li>
+<li><p>Probabilistic Machine Learning: place probability distributions
+(or densities) over all the variables of interest.</p></li>
 <li><p>In <em>naive Bayes</em> this is exactly what we do.</p></li>
-<li><p>Form a classification algorithm by modelling the <em>joint</em> density of our observations.</p></li>
+<li><p>Form a classification algorithm by modelling the <em>joint</em>
+density of our observations.</p></li>
 <li><p>Need to make assumption about joint density.</p></li>
 </ul>
 </section>
 <section id="assumptions-about-density" class="slide level2">
 <h2>Assumptions about Density</h2>
 <ul>
-<li>Make assumptions to reduce the number of parameters we need to optimise.</li>
-<li>Given label data <span class="math inline">\(\mathbf{ y}\)</span> and the inputs <span class="math inline">\(\mathbf{X}\)</span> could specify joint density of all potential values of <span class="math inline">\(\mathbf{ y}\)</span> and <span class="math inline">\(\mathbf{X}\)</span>, <span class="math inline">\(p(\mathbf{ y}, \mathbf{X})\)</span>.</li>
-<li>If <span class="math inline">\(\mathbf{X}\)</span> and <span class="math inline">\(\mathbf{ y}\)</span> are training data.</li>
-<li>If <span class="math inline">\(\mathbf{ x}^*\)</span> is a test input and <span class="math inline">\(y^*\)</span> a test location we want <span class="math display">\[
+<li>Make assumptions to reduce the number of parameters we need to
+optimise.</li>
+<li>Given label data <span class="math inline">\(\mathbf{ y}\)</span>
+and the inputs <span class="math inline">\(\mathbf{X}\)</span> could
+specify joint density of all potential values of <span
+class="math inline">\(\mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span>, <span
+class="math inline">\(p(\mathbf{ y}, \mathbf{X})\)</span>.</li>
+<li>If <span class="math inline">\(\mathbf{X}\)</span> and <span
+class="math inline">\(\mathbf{ y}\)</span> are training data.</li>
+<li>If <span class="math inline">\(\mathbf{ x}^*\)</span> is a test
+input and <span class="math inline">\(y^*\)</span> a test location we
+want <span class="math display">\[
 p(y^*|\mathbf{X}, \mathbf{ y}, \mathbf{ x}^*),
 \]</span></li>
 </ul>
@@ -1273,16 +1143,24 @@ <h2>Assumptions about Density</h2>
 <h2>Answer from Rules of Probability</h2>
 <ul>
 <li>Compute this distribution using the product and sum rules.</li>
-<li>Need the probability associated with all possible combinations of <span class="math inline">\(\mathbf{ y}\)</span> and <span class="math inline">\(\mathbf{X}\)</span>.</li>
-<li>There are <span class="math inline">\(2^{n}\)</span> possible combinations for the vector <span class="math inline">\(\mathbf{ y}\)</span></li>
-<li>Probability for each of these combinations must be jointly specified along with the joint density of the matrix <span class="math inline">\(\mathbf{X}\)</span>,</li>
-<li>Also need to <em>extend</em> the density for any chosen test location <span class="math inline">\(\mathbf{ x}^*\)</span>.</li>
+<li>Need the probability associated with all possible combinations of
+<span class="math inline">\(\mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span>.</li>
+<li>There are <span class="math inline">\(2^{n}\)</span> possible
+combinations for the vector <span class="math inline">\(\mathbf{
+y}\)</span></li>
+<li>Probability for each of these combinations must be jointly specified
+along with the joint density of the matrix <span
+class="math inline">\(\mathbf{X}\)</span>,</li>
+<li>Also need to <em>extend</em> the density for any chosen test
+location <span class="math inline">\(\mathbf{ x}^*\)</span>.</li>
 </ul>
 </section>
 <section id="naive-bayes-assumptions" class="slide level2">
 <h2>Naive Bayes Assumptions</h2>
 <ul>
-<li>In <em>naive Bayes</em> we make certain simplifying assumptions that allow us to perform all of the above in practice.</li>
+<li>In <em>naive Bayes</em> we make certain simplifying assumptions that
+allow us to perform all of the above in practice.</li>
 </ul>
 <ol type="1">
 <li>Data Conditional Independence</li>
@@ -1293,22 +1171,37 @@ <h2>Naive Bayes Assumptions</h2>
 <section id="data-conditional-independence" class="slide level2">
 <h2>Data Conditional Independence</h2>
 <ul>
-<li><p>Given model parameters <span class="math inline">\(\boldsymbol{ \theta}\)</span> we assume that all data points in the model are independent. <span class="math display">\[
-p(y^*, \mathbf{ x}^*, \mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}) = p(y^*, \mathbf{ x}^*|\boldsymbol{ \theta})\prod_{i=1}^{n} p(y_i, \mathbf{ x}_i | \boldsymbol{ \theta}).
+<li><p>Given model parameters <span class="math inline">\(\boldsymbol{
+\theta}\)</span> we assume that all data points in the model are
+independent. <span class="math display">\[
+p(y^*, \mathbf{ x}^*, \mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}) =
+p(y^*, \mathbf{ x}^*|\boldsymbol{ \theta})\prod_{i=1}^{n} p(y_i,
+\mathbf{ x}_i | \boldsymbol{ \theta}).
 \]</span></p></li>
 <li><p>This is a conditional independence assumption.</p></li>
-<li><p>We also make similar assumptions for regression (where <span class="math inline">\(\boldsymbol{ \theta}= \left\{\mathbf{ w},\sigma^2\right\}\)</span>).</p></li>
-<li><p>Here we assume <em>joint</em> density of <span class="math inline">\(\mathbf{ y}\)</span> and <span class="math inline">\(\mathbf{X}\)</span> is independent across the data given the parameters.</p></li>
+<li><p>We also make similar assumptions for regression (where <span
+class="math inline">\(\boldsymbol{ \theta}= \left\{\mathbf{
+w},\sigma^2\right\}\)</span>).</p></li>
+<li><p>Here we assume <em>joint</em> density of <span
+class="math inline">\(\mathbf{ y}\)</span> and <span
+class="math inline">\(\mathbf{X}\)</span> is independent across the data
+given the parameters.</p></li>
 </ul>
 </section>
 <section id="bayes-classifier" class="slide level2">
 <h2>Bayes Classifier</h2>
-<p>Computing posterior distribution in this case becomes easier, this is known as the ‘Bayes classifier’.</p>
+<p>Computing posterior distribution in this case becomes easier, this is
+known as the ‘Bayes classifier’.</p>
 </section>
 <section id="feature-conditional-independence" class="slide level2">
 <h2>Feature Conditional Independence</h2>
 <ul>
-<li>Particular to naive Bayes: assume <em>features</em> are also conditionally independent, given param <em>and</em> the label. <span class="math display">\[p(\mathbf{ x}_i | y_i, \boldsymbol{ \theta}) = \prod_{j=1}^{p} p(x_{i,j}|y_i,\boldsymbol{ \theta})\]</span> where <span class="math inline">\(p\)</span> is the dimensionality of our inputs.</li>
+<li>Particular to naive Bayes: assume <em>features</em> are also
+conditionally independent, given param <em>and</em> the label. <span
+class="math display">\[p(\mathbf{ x}_i | y_i, \boldsymbol{ \theta}) =
+\prod_{j=1}^{p} p(x_{i,j}|y_i,\boldsymbol{ \theta})\]</span> where <span
+class="math inline">\(p\)</span> is the dimensionality of our
+inputs.</li>
 <li>This is known as the <em>naive Bayes</em> assumption.</li>
 <li>Bayes classifier + feature conditional independence.</li>
 </ul>
@@ -1316,22 +1209,36 @@ <h2>Feature Conditional Independence</h2>
 <section id="marginal-density-for-y_i" class="slide level2">
 <h2>Marginal Density for <span class="math inline">\(y_i\)</span></h2>
 <ul>
-<li><p>To specify the joint distribution we also need the marginal for <span class="math inline">\(p(y_i)\)</span> <span class="math display">\[p(x_{i,j},y_i| \boldsymbol{ \theta}) = p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i).\]</span></p></li>
-<li><p>Because <span class="math inline">\(y_i\)</span> is binary the <em>Bernoulli</em> density makes a suitable choice for our prior over <span class="math inline">\(y_i\)</span>, <span class="math display">\[p(y_i|\pi) = \pi^{y_i} (1-\pi)^{1-y_i}\]</span> where <span class="math inline">\(\pi\)</span> now has the interpretation as being the <em>prior</em> probability that the classification should be positive.</p></li>
+<li><p>To specify the joint distribution we also need the marginal for
+<span class="math inline">\(p(y_i)\)</span> <span
+class="math display">\[p(x_{i,j},y_i| \boldsymbol{ \theta}) =
+p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i).\]</span></p></li>
+<li><p>Because <span class="math inline">\(y_i\)</span> is binary the
+<em>Bernoulli</em> density makes a suitable choice for our prior over
+<span class="math inline">\(y_i\)</span>, <span
+class="math display">\[p(y_i|\pi) = \pi^{y_i} (1-\pi)^{1-y_i}\]</span>
+where <span class="math inline">\(\pi\)</span> now has the
+interpretation as being the <em>prior</em> probability that the
+classification should be positive.</p></li>
 </ul>
 </section>
 <section id="joint-density-for-naive-bayes" class="slide level2">
 <h2>Joint Density for Naive Bayes</h2>
 <ul>
-<li>This allows us to write down the full joint density of the training data, <span class="math display">\[
-p(\mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}, \pi) = \prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)
+<li>This allows us to write down the full joint density of the training
+data, <span class="math display">\[
+p(\mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}, \pi) = \prod_{i=1}^{n}
+\prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)
 \]</span> which can now be fit by maximum likelihood.</li>
 </ul>
 </section>
 <section id="objective-function" class="slide level2">
 <h2>Objective Function</h2>
 <p><span class="math display">\[\begin{align*}
-E(\boldsymbol{ \theta}, \pi)&amp; =  -\log p(\mathbf{ y}, \mathbf{X}|\boldsymbol{ \theta}, \pi) \\ &amp;= -\sum_{i=1}^{n} \sum_{j=1}^{p} \log p(x_{i, j}|y_i, \boldsymbol{ \theta}) -  \sum_{i=1}^{n} \log p(y_i|\pi),
+E(\boldsymbol{ \theta}, \pi)&amp; =  -\log p(\mathbf{ y},
+\mathbf{X}|\boldsymbol{ \theta}, \pi) \\ &amp;= -\sum_{i=1}^{n}
+\sum_{j=1}^{p} \log p(x_{i, j}|y_i, \boldsymbol{ \theta})
+-  \sum_{i=1}^{n} \log p(y_i|\pi),
 \end{align*}\]</span></p>
 </section>
 <section id="maximum-likelihood" class="slide level2">
@@ -1340,8 +1247,10 @@ <h2>Maximum Likelihood</h2>
 <section id="fit-prior" class="slide level2">
 <h2>Fit Prior</h2>
 <ul>
-<li>We can minimize prior. For Bernoulli likelihood over the labels we have, <span class="math display">\[\begin{align*}
-E(\pi) &amp; = - \sum_{i=1}^{n}\log p(y_i|\pi)\\ &amp; = -\sum_{i=1}^{n} y_i \log \pi - \sum_{i=1}^{n} (1-y_i) \log (1-\pi)
+<li>We can minimize prior. For Bernoulli likelihood over the labels we
+have, <span class="math display">\[\begin{align*}
+E(\pi) &amp; = - \sum_{i=1}^{n}\log p(y_i|\pi)\\ &amp; = -\sum_{i=1}^{n}
+y_i \log \pi - \sum_{i=1}^{n} (1-y_i) \log (1-\pi)
 \end{align*}\]</span></li>
 <li>Solution from above is <span class="math display">\[
 \pi = \frac{\sum_{i=1}^{n} y_i}{n}.
@@ -1352,42 +1261,85 @@ <h2>Fit Prior</h2>
 <h2>Fit Conditional</h2>
 <ul>
 <li>Minimize conditional distribution: <span class="math display">\[
-E(\boldsymbol{ \theta}) = -\sum_{i=1}^{n} \sum_{j=1}^{p} \log p(x_{i, j} |y_i, \boldsymbol{ \theta}),
+E(\boldsymbol{ \theta}) = -\sum_{i=1}^{n} \sum_{j=1}^{p} \log p(x_{i, j}
+|y_i, \boldsymbol{ \theta}),
 \]</span></li>
 <li>Implies making an assumption about it’s form.</li>
 <li>The right assumption will depend on the data.</li>
-<li>E.g. for real valued data, use a Gaussian <span class="math display">\[
+<li>E.g. for real valued data, use a Gaussian <span
+class="math display">\[
 p(x_{i, j} | y_i,\boldsymbol{ \theta}) =
-\frac{1}{\sqrt{2\pi \sigma_{y_i,j}^2}} \exp \left(-\frac{(x_{i,j} - \mu_{y_i,
+\frac{1}{\sqrt{2\pi \sigma_{y_i,j}^2}} \exp \left(-\frac{(x_{i,j} -
+\mu_{y_i,
 j})^2}{\sigma_{y_i,j}^2}\right),
 \]</span></li>
 </ul>
-<p>The distributions show the parameters of the <em>independent</em> class conditional probabilities for no maternity services. It is a Bernoulli distribution with the parameter, <span class="math inline">\(\pi\)</span>, given by (<code>theta_0</code>) for the facilities without maternity services and <code>theta_1</code> for the facilities with maternity services. The parameters whow that, facilities with maternity services also are more likely to have other services such as grid electricity, emergency transport, immunization programs etc.</p>
-<p>The naive Bayes assumption says that the joint probability for these services is given by the product of each of these Bernoulli distributions.</p>
-<p>We have modelled the numbers in our table with a Gaussian density. Since several of these numbers are counts, a more appropriate distribution might be the Poisson distribution. But here we can see that the average number of nurses, healthworkers and doctors is <em>higher</em> in the facilities with maternal services (<code>mu_1</code>) than those without maternal services (<code>mu_0</code>). There is also a small difference between the mean latitude and longitudes. However, the <em>standard deviation</em> which would be given by the square root of the variance parameters (<code>sigma_0</code> and <code>sigma_1</code>) is large, implying that a difference in latitude and longitude may be due to sampling error. To be sure more analysis would be required.</p>
 </section>
-<section id="compute-posterior-for-test-point-label" class="slide level2">
+<section id="nigeria-nmis-data-classification" class="slide level2">
+<h2>Nigeria NMIS Data Classification</h2>
+<p>The distributions show the parameters of the <em>independent</em>
+class conditional probabilities for no maternity services. It is a
+Bernoulli distribution with the parameter, <span
+class="math inline">\(\pi\)</span>, given by (<code>theta_0</code>) for
+the facilities without maternity services and <code>theta_1</code> for
+the facilities with maternity services. The parameters whow that,
+facilities with maternity services also are more likely to have other
+services such as grid electricity, emergency transport, immunization
+programs etc.</p>
+<p>The naive Bayes assumption says that the joint probability for these
+services is given by the product of each of these Bernoulli
+distributions.</p>
+<p>We have modelled the numbers in our table with a Gaussian density.
+Since several of these numbers are counts, a more appropriate
+distribution might be the Poisson distribution. But here we can see that
+the average number of nurses, healthworkers and doctors is
+<em>higher</em> in the facilities with maternal services
+(<code>mu_1</code>) than those without maternal services
+(<code>mu_0</code>). There is also a small difference between the mean
+latitude and longitudes. However, the <em>standard deviation</em> which
+would be given by the square root of the variance parameters
+(<code>sigma_0</code> and <code>sigma_1</code>) is large, implying that
+a difference in latitude and longitude may be due to sampling error. To
+be sure more analysis would be required.</p>
+</section>
+<section id="compute-posterior-for-test-point-label"
+class="slide level2">
 <h2>Compute Posterior for Test Point Label</h2>
 <ul>
 <li>We know that <span class="math display">\[
-P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta})p(\mathbf{ y},\mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) = p(y*, \mathbf{ y}, \mathbf{X},\mathbf{ x}^*| \boldsymbol{ \theta})
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{
+\theta})p(\mathbf{ y},\mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) =
+p(y*, \mathbf{ y}, \mathbf{X},\mathbf{ x}^*| \boldsymbol{ \theta})
 \]</span></li>
 <li>This implies <span class="math display">\[
-P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta}) = \frac{p(y*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})}{p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta})}
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta}) =
+\frac{p(y*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{
+\theta})}{p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{
+\theta})}
 \]</span></li>
 </ul>
 </section>
-<section id="compute-posterior-for-test-point-label-1" class="slide level2">
+<section id="compute-posterior-for-test-point-label-1"
+class="slide level2">
 <h2>Compute Posterior for Test Point Label</h2>
 <ul>
-<li>From conditional independence assumptions <span class="math display">\[
-p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) = \prod_{j=1}^{p} p(x^*_{j}|y^*, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)
+<li>From conditional independence assumptions <span
+class="math display">\[
+p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) =
+\prod_{j=1}^{p} p(x^*_{j}|y^*, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi)
 \]</span></li>
 <li>We also need <span class="math display">\[
-p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta})\]</span> which can be found from <span class="math display">\[p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta})\]</span>
+which can be found from <span class="math display">\[p(y^*, \mathbf{ y},
+\mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta})
 \]</span></li>
-<li>Using the <em>sum rule</em> of probability, <span class="math display">\[
-p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) = \sum_{y^*=0}^1 p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}).
+<li>Using the <em>sum rule</em> of probability, <span
+class="math display">\[
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|\boldsymbol{ \theta}) =
+\sum_{y^*=0}^1 p(y^*, \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*|
+\boldsymbol{ \theta}).
 \]</span></li>
 </ul>
 </section>
@@ -1395,10 +1347,18 @@ <h2>Compute Posterior for Test Point Label</h2>
 <h2>Independence Assumptions</h2>
 <ul>
 <li>From independence assumptions <span class="math display">\[
-p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) = \sum_{y^*=0}^1 \prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi).
+p(\mathbf{ y}, \mathbf{X}, \mathbf{ x}^*| \boldsymbol{ \theta}) =
+\sum_{y^*=0}^1 \prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi).
 \]</span></li>
 <li>Substitute both forms to recover, <span class="math display">\[
-P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta})  = \frac{\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)}{\sum_{y^*=0}^1 \prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)}
+P(y^*| \mathbf{ y}, \mathbf{X}, \mathbf{ x}^*, \boldsymbol{ \theta})  =
+\frac{\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{
+\theta})p(y^*|\pi)\prod_{i=1}^{n} \prod_{j=1}^{p} p(x_{i,j}|y_i,
+\boldsymbol{ \theta})p(y_i|\pi)}{\sum_{y^*=0}^1 \prod_{j=1}^{p}
+p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)\prod_{i=1}^{n}
+\prod_{j=1}^{p} p(x_{i,j}|y_i, \boldsymbol{ \theta})p(y_i|\pi)}
 \]</span></li>
 </ul>
 </section>
@@ -1406,14 +1366,21 @@ <h2>Independence Assumptions</h2>
 <h2>Cancelation</h2>
 <ul>
 <li>Note training data terms cancel. <span class="math display">\[
-p(y^*| \mathbf{ x}^*, \boldsymbol{ \theta}) = \frac{\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)}{\sum_{y^*=0}^1 \prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)}
+p(y^*| \mathbf{ x}^*, \boldsymbol{ \theta}) = \frac{\prod_{j=1}^{p}
+p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)}{\sum_{y^*=0}^1
+\prod_{j=1}^{p} p(x^*_{j}|y^*_i, \boldsymbol{ \theta})p(y^*|\pi)}
 \]</span></li>
-<li>This formula is also fairly straightforward to implement for different class conditional distributions.</li>
+<li>This formula is also fairly straightforward to implement for
+different class conditional distributions.</li>
 </ul>
 </section>
 <section id="laplace-smoothing" class="slide level2">
 <h2>Laplace Smoothing</h2>
-<p><a href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PA16"><img data-src="../slides/diagrams/books/1YQPAAAAQAAJ-PA16.png" /></a></p>
+<div class="centered" style="">
+<a
+href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PA16"><img
+data-src="https://mlatcl.github.io/dsa/./slides/diagrams//books/1YQPAAAAQAAJ-PA16.png" /></a>
+</div>
 </section>
 <section id="pseudo-counts" class="slide level2">
 <h2>Pseudo Counts</h2>
@@ -1421,18 +1388,12 @@ <h2>Pseudo Counts</h2>
 \pi = \frac{\sum_{i=1}^{n} y_i + 1}{n+ 2}
 \]</span></p>
 </section>
-<section id="exercise-5" class="slide level2">
-<h2>Exercise 5</h2>
-<p>How can you improve your classification, are all the features equally valid? Are some features more helpful than others? What happens if you remove features that appear to be less helpful. How might you select such features?</p>
-</section>
-<section id="exercise-6" class="slide level2">
-<h2>Exercise 6</h2>
-<p>We have decided to classify positive if probability of maternity is greater than 0.5. This has led us to accidentally classify some facilities as havien’t facilities for maternity when in fact they don’t. Imagine you wish to ensure that a facility handles maternity. With your test set how low do you have to set the threshold to avoid all the false negatives (i.e. facilities where you predicted there was no maternity, but in actuality there were?</p>
-</section>
 <section id="naive-bayes-summary" class="slide level2">
 <h2>Naive Bayes Summary</h2>
 <ul>
-<li>Model <em>full</em> joint distribution of data, <span class="math inline">\(p(\mathbf{ y}, \mathbf{X}| \boldsymbol{ \theta}, \pi)\)</span></li>
+<li>Model <em>full</em> joint distribution of data, <span
+class="math inline">\(p(\mathbf{ y}, \mathbf{X}| \boldsymbol{ \theta},
+\pi)\)</span></li>
 <li>Make conditional independence assumptions about the data.
 <ul>
 <li>feature conditional independence</li>
@@ -1445,7 +1406,9 @@ <h2>Naive Bayes Summary</h2>
 <section id="other-reading" class="slide level2">
 <h2>Other Reading</h2>
 <ul>
-<li>Chapter 5 of <span class="citation" data-cites="Rogers:book11">Rogers and Girolami (2011)</span> up to pg 179 (Section 5.1, and 5.2 up to 5.2.2).</li>
+<li>Chapter 5 of <span class="citation"
+data-cites="Rogers:book11">Rogers and Girolami (2011)</span> up to pg
+179 (Section 5.1, and 5.2 up to 5.2.2).</li>
 </ul>
 </section>
 <section id="references" class="slide level2 scrollable">
@@ -1454,45 +1417,95 @@ <h2 class="scrollable">References</h2>
 <section id="thanks" class="slide level2 scrollable">
 <h2 class="scrollable">Thanks!</h2>
 <ul>
-<li><p>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
-<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></p></li>
-<li><p>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></p></li>
+<li><p>twitter: <a
+href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
+<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></p></li>
+<li><p>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></p></li>
 <li><p>blog posts:</p>
-<p><a href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What is Machine Learning?</a></p></li>
+<p><a
+href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What
+is Machine Learning?</a></p>
+<p><a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a></p></li>
 </ul>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Bayes:doctrine63">
-<p>Bayes, T., 1763. An essay towards solving a problem in the doctrine of chances. Philosophical Transactions of the Royal Society 53, 370–418. <a href="https://doi.org/10.1098/rstl.1763.0053">https://doi.org/10.1098/rstl.1763.0053</a></p>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Bayes:doctrine63" class="csl-entry" role="listitem">
+Bayes, T., 1763. An essay towards solving a problem in the doctrine of
+chances. Philosophical Transactions of the Royal Society 53, 370–418. <a
+href="https://doi.org/10.1098/rstl.1763.0053">https://doi.org/10.1098/rstl.1763.0053</a>
 </div>
-<div id="ref-Bishop:book06">
-<p>Bishop, C.M., 2006. Pattern recognition and machine learning. springer.</p>
+<div id="ref-Bishop:book06" class="csl-entry" role="listitem">
+Bishop, C.M., 2006. Pattern recognition and machine learning. springer.
 </div>
-<div id="ref-Laplace:memoire74">
-<p>Laplace, P.S., 1774. Mémoire sur la probabilité des causes par les évènemens, in: Mémoires de Mathèmatique et de Physique, Presentés à lAcadémie Royale Des Sciences, Par Divers Savans, &amp; Lù Dans Ses Assemblées 6. pp. 621–656.</p>
+<div id="ref-Laplace:memoire74" class="csl-entry" role="listitem">
+Laplace, P.S., 1774. Mémoire sur la probabilité des causes par les
+évènemens, in: Mémoires de Mathèmatique Et de Physique, Presentés à
+lAcadémie Royale Des Sciences, Par Divers Savans, &amp; Lù Dans Ses
+Assemblées 6. pp. 621–656.
 </div>
-<div id="ref-Rogers:book11">
-<p>Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC Press.</p>
+<div id="ref-Rogers:book11" class="csl-entry" role="listitem">
+Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC
+Press.
 </div>
-<div id="ref-Steele:predictive12">
-<p>Steele, S., Bilchik, A., Eberhardt, J., Kalina, P., Nissan, A., Johnson, E., Avital, I., Stojadinovic, A., 2012. Using machine-learned Bayesian belief networks to predict perioperative risk of clostridium difficile infection following colon surgery. Interact J Med Res 1, e6. <a href="https://doi.org/10.2196/ijmr.2131">https://doi.org/10.2196/ijmr.2131</a></p>
+<div id="ref-Steele:predictive12" class="csl-entry" role="listitem">
+Steele, S., Bilchik, A., Eberhardt, J., Kalina, P., Nissan, A., Johnson,
+E., Avital, I., Stojadinovic, A., 2012. Using machine-learned
+<span>B</span>ayesian belief networks to predict perioperative risk of
+clostridium difficile infection following colon surgery. Interact J Med
+Res 1, e6. <a
+href="https://doi.org/10.2196/ijmr.2131">https://doi.org/10.2196/ijmr.2131</a>
 </div>
 </div>
 </section>
     </div>
   </div>
 
-  <script src="reveal.js/lib/js/head.min.js"></script>
-  <script src="reveal.js/js/reveal.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/head.min.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/js/reveal.js"></script>
 
   <script>
 
       // Full list of configuration options available at:
       // https://github.com/hakimel/reveal.js#configuration
       Reveal.initialize({
+        // Display controls in the bottom right corner
+        controls: true,
+        // Display a presentation progress bar
+        progress: true,
         // Push each slide change to the browser history
         history: true,
+        // Enable keyboard shortcuts for navigation
+        keyboard: true,
+        // Enable the slide overview mode
+        overview: true,
+        // Vertical centering of slides
+        center: true,
+        // Enables touch navigation on devices with touch input
+        touch: true,
+        // Turns fragments on and off globally
+        fragments: true,
+        // Flags if we should show a help overlay when the questionmark
+        // key is pressed
+        help: true,
+        // Number of milliseconds between automatically proceeding to the
+        // next slide, disabled when set to 0, this value can be overwritten
+        // by using a data-autoslide attribute on your slides
+        autoSlide: 0,
+        // Stop auto-sliding after user input
+        autoSlideStoppable: true,
         // Transition style
         transition: 'None', // none/fade/slide/convex/concave/zoom
+        // Transition speed
+        transitionSpeed: 'default', // default/fast/slow
+        // Transition style for full page slide backgrounds
+        backgroundTransition: 'fade', // none/fade/slide/convex/concave/zoom
+        // Number of slides away from the current that are visible
+        viewDistance: 3,
         math: {
           mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js',
           config: 'TeX-AMS_HTML-full',
@@ -1512,10 +1525,10 @@ <h2 class="scrollable">Thanks!</h2>
 
         // Optional reveal.js plugins
         dependencies: [
-          { src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
-          { src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
-          { src: 'reveal.js/plugin/math/math.js', async: true },
-          { src: 'reveal.js/plugin/notes/notes.js', async: true }
+          { src: 'https://unpkg.com/reveal.js@3.9.2/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/zoom-js/zoom.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/math/math.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/notes/notes.js', async: true }
         ]
       });
     </script>
diff --git a/slides/04-gaussian-processes.slides.html b/slides/04-gaussian-processes.slides.html
index 407ff64..bbd48d8 100644
--- a/slides/04-gaussian-processes.slides.html
+++ b/slides/04-gaussian-processes.slides.html
@@ -8,7 +8,7 @@
   <meta name="apple-mobile-web-app-capable" content="yes">
   <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
   <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
-  <link rel="stylesheet" href="reveal.js/css/reveal.css">
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/reveal.css">
   <style type="text/css">
       code{white-space: pre-wrap;}
       span.smallcaps{font-variant: small-caps;}
@@ -19,6 +19,7 @@
 pre > code.sourceCode { white-space: pre; position: relative; }
 pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
 pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
 code.sourceCode > span { color: inherit; text-decoration: inherit; }
 div.sourceCode { margin: 1em 0; }
 pre.sourceCode { margin: 0; }
@@ -53,7 +54,7 @@
 code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 code span.at { color: #7d9029; } /* Attribute */
 code span.bn { color: #40a070; } /* BaseN */
-code span.bu { } /* BuiltIn */
+code span.bu { color: #008000; } /* BuiltIn */
 code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 code span.ch { color: #4070a0; } /* Char */
 code span.cn { color: #880000; } /* Constant */
@@ -66,7 +67,7 @@
 code span.ex { } /* Extension */
 code span.fl { color: #40a070; } /* Float */
 code span.fu { color: #06287e; } /* Function */
-code span.im { } /* Import */
+code span.im { color: #008000; font-weight: bold; } /* Import */
 code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 code span.kw { color: #007020; font-weight: bold; } /* Keyword */
 code span.op { color: #666666; } /* Operator */
@@ -79,18 +80,18 @@
 code span.vs { color: #4070a0; } /* VerbatimString */
 code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
   </style>
-  <link rel="stylesheet" href="reveal.js/css/theme/black.css" id="theme">
-  <link rel="stylesheet" href="../assets/css/talks.css"/>
+  <link rel="stylesheet" href="https://unpkg.com/reveal.js@3.9.2/css/theme/black.css" id="theme">
+  <link rel="stylesheet" href="https://inverseprobability.com/assets/css/talks.css"/>
   <!-- Printing and PDF exports -->
   <script>
     var link = document.createElement( 'link' );
     link.rel = 'stylesheet';
     link.type = 'text/css';
-    link.href = window.location.search.match( /print-pdf/gi ) ? 'reveal.js/css/print/pdf.css' : 'reveal.js/css/print/paper.css';
+    link.href = window.location.search.match( /print-pdf/gi ) ? 'https://unpkg.com/reveal.js@3.9.2/css/print/pdf.css' : 'https://unpkg.com/reveal.js@3.9.2/css/print/paper.css';
     document.getElementsByTagName( 'head' )[0].appendChild( link );
   </script>
   <!--[if lt IE 9]>
-  <script src="reveal.js/lib/js/html5shiv.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/html5shiv.js"></script>
   <![endif]-->
   <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_SVG" type="text/javascript"></script>
   <script type="text/x-mathjax-config">
@@ -108,9 +109,9 @@
 
 <section id="title-slide">
   <h1 class="title">Gaussian Processes</h1>
-  <p class="author" style="text-align:center"><a href="http://inverseprobability.com">Neil D. Lawrence</a></p>
   <p class="date" style="text-align:center"><time>2020-11-13</time></p>
-  <p class="venue" style="text-align:center">Virtual Data Science Nigeria</p>
+  <p class="venue" style="text-align:center">Virtual Data Science
+Nigeria</p>
 </section>
 
 <section class="slide level2">
@@ -119,40 +120,57 @@ <h1 class="title">Gaussian Processes</h1>
 <!---->
 <!-- Do not edit this file locally. -->
 <!-- The last names to be defined. Should be defined entirely in terms of macros from above-->
-<!--
-
--->
+<!--setupplotcode{import seaborn as sns
+sns.set_style('darkgrid')
+sns.set_context('paper')
+sns.set_palette('colorblind')}-->
 </section>
 <section id="section" class="slide level2">
 <h2></h2>
 <div class="figure">
-<div id="gaussian-processes-for-machine-learning-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gp/rasmussen-williams-book.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div id="gaussian-processes-for-machine-learning-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/rasmussen-williams-book.jpg" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-A key reference for Gaussian process models remains the excellent book “Gaussian Processes for Machine Learning” (<span class="citation" data-cites="Rasmussen:book06">Rasmussen and Williams (2006)</span>). The book is also <a href="http://www.gaussianprocess.org/gpml/" target="_blank" >freely available online</a>.
+A key reference for Gaussian process models remains the excellent book
+“Gaussian Processes for Machine Learning” (<span class="citation"
+data-cites="Rasmussen:book06">Rasmussen and Williams (2006)</span>). The
+book is also
+<a href="http://www.gaussianprocess.org/gpml/" target="_blank">freely
+available online</a>.
 </aside>
-<p><span style="text-align:right"><span class="citation" data-cites="Rasmussen:book06">Rasmussen and Williams (2006)</span></span></p>
+<div style="text-align:right">
+<span class="citation" data-cites="Rasmussen:book06">Rasmussen and
+Williams (2006)</span>
+</div>
 </section>
 <section id="a-first-course-in-machine-learning" class="slide level2">
 <h2>A First Course in Machine Learning</h2>
 <div class="figure">
-<div id="a-first-course-in-machine-learning-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/mlai/a-first-course-in-machine-learning.jpg" width="40%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div id="a-first-course-in-machine-learning-figure"
+class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//mlai/a-first-course-in-machine-learning.jpg" width="40%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The main course text is “A First Course in Machine Learning” by <span class="citation" data-cites="Rogers:book11">Rogers and Girolami (2011)</span>.
+The main course text is “A First Course in Machine Learning” by <span
+class="citation" data-cites="Rogers:book11">Rogers and Girolami
+(2011)</span>.
 </aside>
-<p><span style="text-align:right"><span class="citation" data-cites="Rogers:book11">Rogers and Girolami (2011)</span></span></p>
+<div style="text-align:right">
+<span class="citation" data-cites="Rogers:book11">Rogers and Girolami
+(2011)</span>
+</div>
 <!--include{_gp/includes/what-is-a-gp.md}-->
 </section>
-<section id="example-prediction-of-malaria-incidence-in-uganda" class="slide level2">
+<section id="example-prediction-of-malaria-incidence-in-uganda"
+class="slide level2">
 <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <div class="centered" style="">
 <svg viewBox="0 0 200 200" style="width:15%">
@@ -166,7 +184,7 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <title>
 Martin Mubangizi
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/martin-mubangizi.png" clip-path="url(#clip0)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/martin-mubangizi.png" clip-path="url(#clip0)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip1">
@@ -177,9 +195,9 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 </style>
 <circle cx="100" cy="100" r="100"/> </clipPath> </defs>
 <title>
-Ricardo Andrade Pacheco
+Ricardo Andrade Pacecho
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/ricardo-andrade-pacheco.png" clip-path="url(#clip1)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip2">
@@ -192,12 +210,15 @@ <h2>Example: Prediction of Malaria Incidence in Uganda</h2>
 <title>
 John Quinn
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/john-quinn.jpg" clip-path="url(#clip2)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/john-quinn.jpg" clip-path="url(#clip2)"/>
 </svg>
 </div>
 <ul>
-<li>Work with Ricardo Andrade Pacheco, John Quinn and Martin Mubaganzi (Makerere University, Uganda)</li>
+<li>Work with Ricardo Andrade Pacheco, John Quinn and Martin Mubangizi
+(Makerere University, Uganda)</li>
 <li>See <a href="http://air.ug/research.html">AI-DEV Group</a>.</li>
+<li>See <a href="https://diseaseoutbreaks.unglobalpulse.net/uganda/">UN
+Global Pulse Disease Outbreaks Site</a></li>
 </ul>
 </section>
 <section id="malaria-prediction-in-uganda" class="slide level2">
@@ -205,20 +226,26 @@ <h2>Malaria Prediction in Uganda</h2>
 <div class="figure">
 <div id="uganda-districts-2006-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/uganda-districts-2006.png" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Ugandan districs. Data SRTM/NASA from <a href="https://dds.cr.usgs.gov/srtm/version2_1" class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.
+Ugandan districts. Data SRTM/NASA from <a
+href="https://dds.cr.usgs.gov/srtm/version2_1"
+class="uri">https://dds.cr.usgs.gov/srtm/version2_1</a>.
 </aside>
-<p><span style="text-align:right"><span class="citation" data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco et al., 2014; Mubangizi et al., 2014)</span></span></p>
+<div style="text-align:right">
+<span class="citation"
+data-cites="Andrade:consistent14 Mubangizi:malaria14">(Andrade-Pacheco
+et al., 2014; Mubangizi et al., 2014)</span>
+</div>
 </section>
 <section id="tororo-district" class="slide level2">
 <h2>Tororo District</h2>
 <div class="figure">
 <div id="tororo-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Tororo_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -226,24 +253,26 @@ <h2>Tororo District</h2>
 The Tororo district, where the sentinel site, Nagongera, is located.
 </aside>
 </section>
-<section id="malaria-prediction-in-nagongera-sentinel-site" class="slide level2">
+<section id="malaria-prediction-in-nagongera-sentinel-site"
+class="slide level2">
 <h2>Malaria Prediction in Nagongera (Sentinel Site)</h2>
 <div class="figure">
 <div id="sentinel-nagongera-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/sentinel_nagongera.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Sentinel and HMIS data along with rainfall and temperature for the Nagongera sentinel station in the Tororo district.
+Sentinel and HMIS data along with rainfall and temperature for the
+Nagongera sentinel station in the Tororo district.
 </aside>
 </section>
 <section id="mubende-district" class="slide level2">
 <h2>Mubende District</h2>
 <div class="figure">
 <div id="mubende-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Mubende_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -256,7 +285,7 @@ <h2>Malaria Prediction in Uganda</h2>
 <div class="figure">
 <div id="malaria-prediction-mubende-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/mubende.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -268,20 +297,22 @@ <h2>Malaria Prediction in Uganda</h2>
 <h2>GP School at Makerere</h2>
 <div class="figure">
 <div id="-figure" class="figure-frame">
-<div class="centered centered" style="">
-<img class="" src="../slides/diagrams/gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gpss/1157497_513423392066576_1845599035_n.jpg" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The project arose out of the Gaussian process summer school held at Makerere in Kampala in 2013. The school led, in turn, to the Data Science Africa initiative.
+The project arose out of the Gaussian process summer school held at
+Makerere in Kampala in 2013. The school led, in turn, to the Data
+Science Africa initiative.
 </aside>
 </section>
 <section id="kabarole-district" class="slide level2">
 <h2>Kabarole District</h2>
 <div class="figure">
 <div id="kabarole-district-in-uganda-figure" class="figure-frame">
-<object class data="../slides/diagrams/health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
+<object class data="https://mlatcl.github.io/dsa/./slides/diagrams//health/Kabarole_District_in_Uganda.svg" width="50%" style=" ">
 </object>
 </div>
 </div>
@@ -294,12 +325,14 @@ <h2>Early Warning System</h2>
 <div class="figure">
 <div id="kabarole-disease-over-time-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/kabarole.gif" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Estimate of the current disease situation in the Kabarole district over time. Estimate is constructed with a Gaussian process with an additive covariance funciton.
+Estimate of the current disease situation in the Kabarole district over
+time. Estimate is constructed with a Gaussian process with an additive
+covariance funciton.
 </aside>
 </section>
 <section id="early-warning-systems" class="slide level2">
@@ -307,12 +340,13 @@ <h2>Early Warning Systems</h2>
 <div class="figure">
 <div id="early-warning-system-map-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//health/monitor.gif" width="50%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-The map of Ugandan districts with an overview of the Malaria situation in each district.
+The map of Ugandan districts with an overview of the Malaria situation
+in each district.
 </aside>
 <!-- SECTION What is Machine Learning? -->
 </section>
@@ -322,36 +356,48 @@ <h2>What is Machine Learning?</h2>
 <section id="what-is-machine-learning-1" class="slide level2">
 <h2>What is Machine Learning?</h2>
 <div class="fragment">
-<p><span class="math display">\[ \text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p><span class="math display">\[ \text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>data</strong> : observations, could be actively or passively acquired (meta-data).</li>
+<li><strong>data</strong> : observations, could be actively or passively
+acquired (meta-data).</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>model</strong> : assumptions, based on previous experience (other data! transfer learning etc), or beliefs about the regularities of the universe. Inductive bias.</li>
+<li><strong>model</strong> : assumptions, based on previous experience
+(other data! transfer learning etc), or beliefs about the regularities
+of the universe. Inductive bias.</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li><strong>prediction</strong> : an action to be taken or a categorization or a quality score.</li>
+<li><strong>prediction</strong> : an action to be taken or a
+categorization or a quality score.</li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li>Royal Society Report: <a href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine Learning: Power and Promise of Computers that Learn by Example</a></li>
+<li>Royal Society Report: <a
+href="https://royalsociety.org/~/media/policy/projects/machine-learning/publications/machine-learning-report.pdf">Machine
+Learning: Power and Promise of Computers that Learn by Example</a></li>
 </ul>
 </div>
 </section>
 <section id="what-is-machine-learning-2" class="slide level2">
 <h2>What is Machine Learning?</h2>
-<p><span class="math display">\[\text{data} + \text{model} \stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
+<p><span class="math display">\[\text{data} + \text{model}
+\stackrel{\text{compute}}{\rightarrow} \text{prediction}\]</span></p>
 <ul>
 <li class="fragment">To combine data with a model need:</li>
-<li class="fragment"><strong>a prediction function</strong> <span class="math inline">\(f(\cdot)\)</span> includes our beliefs about the regularities of the universe</li>
-<li class="fragment"><strong>an objective function</strong> <span class="math inline">\(E(\cdot)\)</span> defines the cost of misprediction.</li>
+<li class="fragment"><strong>a prediction function</strong> <span
+class="math inline">\(f(\cdot)\)</span> includes our beliefs about the
+regularities of the universe</li>
+<li class="fragment"><strong>an objective function</strong> <span
+class="math inline">\(E(\cdot)\)</span> defines the cost of
+misprediction.</li>
 </ul>
 </section>
 <section id="overdetermined-system" class="slide level2">
@@ -362,62 +408,69 @@ <h2></h2>
 <script>
 showDivs(1, 'over_determined_system');
 </script>
-<p><small></small> <input id="range-over_determined_system" type="range" min="1" max="8" value="1" onchange="setDivs('over_determined_system')" oninput="setDivs('over_determined_system')"> <button onclick="plusDivs(-1, 'over_determined_system')">❮</button> <button onclick="plusDivs(1, 'over_determined_system')">❯</button></p>
+<p><small></small>
+<input id="range-over_determined_system" type="range" min="1" max="8" value="1" onchange="setDivs('over_determined_system')" oninput="setDivs('over_determined_system')">
+<button onclick="plusDivs(-1, 'over_determined_system')">❮</button>
+<button onclick="plusDivs(1, 'over_determined_system')">❯</button></p>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system001.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system001.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system002.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system002.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system003.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system003.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system004.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system004.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system005.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system005.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system006.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system006.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="over_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/over_determined_system007.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/over_determined_system007.svg" width="40%" style=" ">
 </object>
 </div>
 </section>
 <section id="y-mx-c" class="slide level2">
 <h2><span class="math inline">\(y= mx+ c\)</span></h2>
 <div class="fragment">
-<p>point 1: <span class="math inline">\(x= 1\)</span>, <span class="math inline">\(y=3\)</span> <span class="math display">\[
+<p>point 1: <span class="math inline">\(x= 1\)</span>, <span
+class="math inline">\(y=3\)</span> <span class="math display">\[
 3 = m + c
 \]</span></p>
 </div>
 <div class="fragment">
-<p>point 2: <span class="math inline">\(x= 3\)</span>, <span class="math inline">\(y=1\)</span> <span class="math display">\[
+<p>point 2: <span class="math inline">\(x= 3\)</span>, <span
+class="math inline">\(y=1\)</span> <span class="math display">\[
 1 = 3m + c
 \]</span></p>
 </div>
 <div class="fragment">
-<p>point 3: <span class="math inline">\(x= 2\)</span>, <span class="math inline">\(y=2.5\)</span></p>
-<p><span class="math display">\[2.5 = 2m + c\]</span></p>
+<p>point 3: <span class="math inline">\(x= 2\)</span>, <span
+class="math inline">\(y=2.5\)</span> <span class="math display">\[
+2.5 = 2m + c
+\]</span></p>
 </div>
 </section>
-<section id="section-2" class="slide level2">
-<h2></h2>
+<section id="pierre-simon-laplace" class="slide level2">
+<h2>Pierre-Simon Laplace</h2>
 </section>
-<section id="section-3" class="slide level2">
+<section id="section-2" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="pierre-simon-laplace-image-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/ml/Pierre-Simon_Laplace.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//ml/Pierre-Simon_Laplace.png" width="30%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -425,16 +478,20 @@ <h2></h2>
 Pierre-Simon Laplace 1749-1827.
 </aside>
 </section>
-<section id="section-4" class="slide level2">
+<section id="section-3" class="slide level2">
 <h2></h2>
-<p><a href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PR17-IA2"><img data-src="../slides/diagrams/books/1YQPAAAAQAAJ-PR17-IA2.png" /></a></p>
+<div class="centered" style="">
+<a
+href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PR17-IA2"><img
+data-src="https://mlatcl.github.io/dsa/./slides/diagrams//books/1YQPAAAAQAAJ-PR17-IA2.png" /></a>
+</div>
 </section>
-<section id="section-5" class="slide level2">
+<section id="section-4" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="laplaces-determinism-english-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/laplacesDeterminismEnglish.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/laplacesDeterminismEnglish.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
@@ -442,81 +499,92 @@ <h2></h2>
 Laplace’s determinsim in English translation.
 </aside>
 </section>
-<section id="section-6" class="slide level2">
+<section id="section-5" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="laplaces-demon-cropped-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/philosophicaless00lapliala_16_cropped.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala_16_cropped.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
-<!--<object data="../slides/diagrams/physics/philosophicaless00lapliala.pdf" type="application/pdf" width="80%" height="">
-    <embed src="../slides/diagrams/physics/philosophicaless00lapliala.pdf" type="application/pdf">
-        <p>This browser does not support PDF viewing. Please download the PDF to view it: <a href="../slides/diagrams/physics/philosophicaless00lapliala.pdf">Download PDF</a>.</p>
+<!--<object data="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf" type="application/pdf" width="80%" height="">
+    <embed src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf" type="application/pdf">
+        <p>This browser does not support PDF viewing. Please download the PDF to view it: <a href="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf">Download PDF</a>.</p>
     </embed>
 </object>-->
 </section>
-<section id="section-7" class="slide level2">
+<section id="laplaces-gremlin" class="slide level2">
+<h2>Laplace’s Gremlin</h2>
+</section>
+<section id="section-6" class="slide level2">
 <h2></h2>
-<p><a href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PR17-IA4"><img data-src="../slides/diagrams/books/1YQPAAAAQAAJ-PR17-IA4.png" /></a></p>
+<div class="centered" style="">
+<a
+href="https://play.google.com/books/reader?id=1YQPAAAAQAAJ&amp;pg=PR17-IA4"><img
+data-src="https://mlatcl.github.io/dsa/./slides/diagrams//books/1YQPAAAAQAAJ-PR17-IA4.png" /></a>
+</div>
 </section>
-<section id="section-8" class="slide level2">
+<section id="section-7" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="probability-relative-in-part-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/philosophicaless00lapliala.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-To Laplace, determinism is a strawman. Ignorance of mechanism and data leads to uncertainty which should be dealt with through probability.
+To Laplace, determinism is a strawman. Ignorance of mechanism and data
+leads to uncertainty which should be dealt with through probability.
 </aside>
 </section>
-<section id="section-9" class="slide level2">
+<section id="section-8" class="slide level2">
 <h2></h2>
 <div class="figure">
-<div id="probability-relative-in-part-cropped-figure" class="figure-frame">
+<div id="probability-relative-in-part-cropped-figure"
+class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/physics/philosophicaless00lapliala_18_cropped.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala_18_cropped.png" width="60%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
 </aside>
-<!--<object data="../slides/diagrams/physics/philosophicaless00lapliala.pdf" type="application/pdf" width="80%" height="">
-    <embed src="../slides/diagrams/physics/philosophicaless00lapliala.pdf" type="application/pdf">
-        <p>This browser does not support PDF viewing. Please download the PDF to view it: <a href="../slides/diagrams/physics/philosophicaless00lapliala.pdf">Download PDF</a>.</p>
+<!--<object data="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf" type="application/pdf" width="80%" height="">
+    <embed src="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf" type="application/pdf">
+        <p>This browser does not support PDF viewing. Please download the PDF to view it: <a href="https://mlatcl.github.io/dsa/./slides/diagrams//physics/philosophicaless00lapliala.pdf">Download PDF</a>.</p>
     </embed>
 </object>
 -->
 </section>
+<section id="latent-variables" class="slide level2">
+<h2>Latent Variables</h2>
+</section>
 <section id="y-mx-c-epsilon" class="slide level2">
 <h2><span class="math inline">\(y= mx+ c + \epsilon\)</span></h2>
 <div class="fragment">
-<p>point 1: <span class="math inline">\(x= 1\)</span>, <span class="math inline">\(y=3\)</span> <span class="math display">\[
-3 = m + c + \epsilon_1
-\]</span></p>
+<p>point 1: <span class="math inline">\(x= 1\)</span>, <span
+class="math inline">\(y=3\)</span> [ 3 = m + c + _1 ]</p>
 </div>
 <div class="fragment">
-<p>point 2: <span class="math inline">\(x= 3\)</span>, <span class="math inline">\(y=1\)</span> <span class="math display">\[
-1 = 3m + c + \epsilon_2
-\]</span></p>
+<p>point 2: <span class="math inline">\(x= 3\)</span>, <span
+class="math inline">\(y=1\)</span> [ 1 = 3m + c + _2 ]</p>
 </div>
 <div class="fragment">
-<p>point 3: <span class="math inline">\(x= 2\)</span>, <span class="math inline">\(y=2.5\)</span> <span class="math display">\[
-2.5 = 2m + c + \epsilon_3
-\]</span></p>
+<p>point 3: <span class="math inline">\(x= 2\)</span>, <span
+class="math inline">\(y=2.5\)</span> [ 2.5 = 2m + c + _3 ]</p>
 </div>
 </section>
 <section id="a-probabilistic-process" class="slide level2">
 <h2>A Probabilistic Process</h2>
 <div class="fragment">
-<p>Set the mean of Gaussian to be a function. <span class="math display">\[
-p\left(y_i|x_i\right)=\frac{1}{\sqrt{2\pi\sigma^2}}\exp \left(-\frac{\left(y_i-f\left(x_i\right)\right)^{2}}{2\sigma^2}\right).
+<p>Set the mean of Gaussian to be a function. <span
+class="math display">\[
+p\left(y_i|x_i\right)=\frac{1}{\sqrt{2\pi\sigma^2}}\exp
+\left(-\frac{\left(y_i-f\left(x_i\right)\right)^{2}}{2\sigma^2}\right).
 \]</span></p>
 </div>
 <div class="fragment">
@@ -532,30 +600,45 @@ <h2>Two Important Gaussian Properties</h2>
 <section id="sum-of-gaussians" class="slide level2">
 <h2>Sum of Gaussians</h2>
 <div class="fragment">
-<p><span style="text-align:left">Sum of Gaussian variables is also Gaussian.</span></p>
-<p><span class="math display">\[y_i \sim \mathcal{N}\left(\mu_i,\sigma_i^2\right)\]</span></p>
+<div style="text-align:left">
+Sum of Gaussian variables is also Gaussian.
+</div>
+<p><span class="math display">\[y_i \sim
+\mathcal{N}\left(\mu_i,\sigma_i^2\right)\]</span></p>
 </div>
 <div class="fragment">
-<p><span style="text-align:left">And the sum is distributed as</span></p>
+<div style="text-align:left">
+And the sum is distributed as
+</div>
 <p><span class="math display">\[
-\sum_{i=1}^{n} y_i \sim \mathcal{N}\left(\sum_{i=1}^n\mu_i,\sum_{i=1}^n\sigma_i^2\right)
+\sum_{i=1}^{n} y_i \sim
+\mathcal{N}\left(\sum_{i=1}^n\mu_i,\sum_{i=1}^n\sigma_i^2\right)
 \]</span></p>
 </div>
 <div class="fragment">
-<p><small>(<em>Aside</em>: As sum increases, sum of non-Gaussian, finite variance variables is also Gaussian because of <a href="https://en.wikipedia.org/wiki/Central_limit_theorem">central limit theorem</a>.)</small></p>
+<p><small>(<em>Aside</em>: As sum increases, sum of non-Gaussian, finite
+variance variables is also Gaussian because of <a
+href="https://en.wikipedia.org/wiki/Central_limit_theorem">central limit
+theorem</a>.)</small></p>
 </div>
 </section>
 <section id="scaling-a-gaussian" class="slide level2">
 <h2>Scaling a Gaussian</h2>
 <div class="fragment">
-<p><span style="text-align:left">Scaling a Gaussian leads to a Gaussian.</span></p>
+<div style="text-align:left">
+Scaling a Gaussian leads to a Gaussian.
+</div>
 </div>
 <div class="fragment">
-<p><span class="math display">\[y\sim \mathcal{N}\left(\mu,\sigma^2\right)\]</span></p>
+<p><span class="math display">\[y\sim
+\mathcal{N}\left(\mu,\sigma^2\right)\]</span></p>
 </div>
 <div class="fragment">
-<p><span style="text-align:left">And the scaled variable is distributed as</span></p>
-<p><span class="math display">\[wy\sim \mathcal{N}\left(w\mu,w^2 \sigma^2\right).\]</span></p>
+<div style="text-align:left">
+And the scaled variable is distributed as
+</div>
+<p><span class="math display">\[wy\sim \mathcal{N}\left(w\mu,w^2
+\sigma^2\right).\]</span></p>
 </div>
 </section>
 <section id="multivariate-gaussian-properties" class="slide level2">
@@ -566,14 +649,114 @@ <h2>Multivariate Gaussian Properties</h2>
 \]</span></p></li>
 <li><p>Assume <span class="math display">\[
 \begin{align}
-\mathbf{ x}&amp; \sim \mathcal{N}\left(\boldsymbol{ \mu},\mathbf{C}\right)\\
-\boldsymbol{ \epsilon}&amp; \sim \mathcal{N}\left(\mathbf{0},\boldsymbol{ \Sigma}\right)
+\mathbf{ x}&amp; \sim \mathcal{N}\left(\boldsymbol{
+\mu},\mathbf{C}\right)\\
+\boldsymbol{ \epsilon}&amp; \sim
+\mathcal{N}\left(\mathbf{0},\boldsymbol{ \Sigma}\right)
 \end{align}
 \]</span></p></li>
 <li><p>Then <span class="math display">\[
-\mathbf{ y}\sim \mathcal{N}\left(\mathbf{W}\boldsymbol{ \mu},\mathbf{W}\mathbf{C}\mathbf{W}^\top + \boldsymbol{ \Sigma}\right).
-\]</span> If <span class="math inline">\(\boldsymbol{ \Sigma}=\sigma^2\mathbf{I}\)</span>, this is Probabilistic PCA <span class="citation" data-cites="Tipping:probpca99">(Tipping and Bishop, 1999)</span>.</p></li>
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{W}\boldsymbol{
+\mu},\mathbf{W}\mathbf{C}\mathbf{W}^\top + \boldsymbol{ \Sigma}\right).
+\]</span> If <span class="math inline">\(\boldsymbol{
+\Sigma}=\sigma^2\mathbf{I}\)</span>, this is Probabilistic PCA <span
+class="citation" data-cites="Tipping:probpca99">(Tipping and Bishop,
+1999)</span>.</p></li>
+</ul>
+<!-- SECTION Objective Optimization -->
+</section>
+<section id="objective-optimization" class="slide level2">
+<h2>Objective Optimization</h2>
+</section>
+<section id="multivariate-derivatives" class="slide level2">
+<h2>Multivariate Derivatives</h2>
+<ul>
+<li>We will need some multivariate calculus.</li>
+<li>For now some simple multivariate differentiation: <span
+class="math display">\[\frac{\text{d}{\mathbf{a}^{\top}}{\mathbf{
+w}}}{\text{d}\mathbf{ w}}=\mathbf{a}\]</span> and <span
+class="math display">\[\frac{\mathbf{ w}^{\top}\mathbf{A}\mathbf{
+w}}{\text{d}\mathbf{
+w}}=\left(\mathbf{A}+\mathbf{A}^{\top}\right)\mathbf{ w}\]</span> or if
+<span class="math inline">\(\mathbf{A}\)</span> is symmetric
+(<em>i.e.</em> <span
+class="math inline">\(\mathbf{A}=\mathbf{A}^{\top}\)</span>) <span
+class="math display">\[\frac{\text{d}\mathbf{
+w}^{\top}\mathbf{A}\mathbf{ w}}{\text{d}\mathbf{ w}}=2\mathbf{A}\mathbf{
+w}.\]</span></li>
+</ul>
+</section>
+<section id="differentiate-the-objective" class="slide level2">
+<h2>Differentiate the Objective</h2>
+<div style="text-align:left">
+Differentiating with respect to the vector <span
+class="math inline">\(\mathbf{ w}\)</span> we obtain
+</div>
+<p><span class="math display">\[
+\frac{\partial L\left(\mathbf{ w},\sigma^2 \right)}{\partial
+\mathbf{ w}}=\frac{1}{\sigma^2} \sum _{i=1}^{n}\mathbf{ x}_i
+y_i-\frac{1}{\sigma^2}
+\left[\sum _{i=1}^{n}\mathbf{ x}_i\mathbf{ x}_i^{\top}\right]\mathbf{ w}
+\]</span> Leading to <span class="math display">\[
+\mathbf{ w}^{*}=\left[\sum
+_{i=1}^{n}\mathbf{ x}_i\mathbf{ x}_i^{\top}\right]^{-1}\sum
+_{i=1}^{n}\mathbf{ x}_iy_i,
+\]</span></p>
+</section>
+<section id="differentiate-the-objective-1" class="slide level2">
+<h2>Differentiate the Objective</h2>
+<p>Rewrite in matrix notation: <span class="math display">\[
+\sum_{i=1}^{n}\mathbf{ x}_i\mathbf{ x}_i^\top = \designMatrix^\top
+\designMatrix
+\]</span> <span class="math display">\[
+\sum_{i=1}^{n}\mathbf{ x}_iy_i = \designMatrix^\top \mathbf{ y}
+\]</span></p>
+<!-- SECTION Update Equation for Global Optimum -->
+</section>
+<section id="update-equation-for-global-optimum" class="slide level2">
+<h2>Update Equation for Global Optimum</h2>
+</section>
+<section id="update-equations" class="slide level2">
+<h2>Update Equations</h2>
+<ul>
+<li><p>Solve the matrix equation for <span
+class="math inline">\(\mathbf{ w}\)</span>. <span
+class="math display">\[\designMatrix^\top \designMatrix\mathbf{
+w}=  \designMatrix^\top \mathbf{ y}\]</span></p></li>
+<li><p>The equation for <span
+class="math inline">\(\left.\sigma^2\right.^{*}\)</span> may also be
+found <span
+class="math display">\[\left.\sigma^2\right.^{{*}}=\frac{\sum_{i=1}^{n}\left(y_i-\left.\mathbf{
+w}^{*}\right.^{\top}\mathbf{ x}_i\right)^{2}}{n}.\]</span></p></li>
+</ul>
+</section>
+<section id="movie-body-count-data" class="slide level2">
+<h2>Movie Body Count Data</h2>
+<ul>
+<li>Data containing movie information (year, length, rating, genre, IMDB
+Rating).</li>
 </ul>
+</section>
+<section id="multivariate-regression-on-movie-body-count-data"
+class="slide level2">
+<h2>Multivariate Regression on Movie Body Count Data</h2>
+<ul>
+<li>Regress from features <code>Year</code>, <code>Body_Count</code>,
+<code>Length_Minutes</code> to IMDB_Rating.</li>
+</ul>
+</section>
+<section id="residuals" class="slide level2">
+<h2>Residuals</h2>
+<div class="figure">
+<div id="movie-body-count-residuals-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/movie-body-count-rating-residuals.svg" width="80%" style=" ">
+</object>
+</div>
+</div>
+<aside class="notes">
+Residual values for the ratings from the prediction of the movie rating
+given the data from the film.
+</aside>
 <!-- SECTION Underdetermined System -->
 </section>
 <section id="underdetermined-system" class="slide level2">
@@ -582,10 +765,13 @@ <h2>Underdetermined System</h2>
 <section id="underdetermined-system-1" class="slide level2">
 <h2>Underdetermined System</h2>
 <ul>
-<li>What about two unknowns and <em>one</em> observation? <span class="math display">\[y_1 =  mx_1 + c\]</span></li>
+<li>What about two unknowns and <em>one</em> observation? <span
+class="math display">\[y_1 =  mx_1 + c\]</span></li>
 </ul>
 <div class="fragment">
-<p>Can compute <span class="math inline">\(m\)</span> given <span class="math inline">\(c\)</span>. <span class="math display">\[m = \frac{y_1 - c}{x}\]</span></p>
+<p>Can compute <span class="math inline">\(m\)</span> given <span
+class="math inline">\(c\)</span>. <span class="math display">\[m =
+\frac{y_1 - c}{x}\]</span></p>
 </div>
 </section>
 <section id="underdetermined-system-2" class="slide level2">
@@ -593,53 +779,58 @@ <h2>Underdetermined System</h2>
 <script>
 showDivs(0, 'under_determined_system');
 </script>
-<p><small></small> <input id="range-under_determined_system" type="range" min="0" max="9" value="0" onchange="setDivs('under_determined_system')" oninput="setDivs('under_determined_system')"> <button onclick="plusDivs(-1, 'under_determined_system')">❮</button> <button onclick="plusDivs(1, 'under_determined_system')">❯</button></p>
+<p><small></small>
+<input id="range-under_determined_system" type="range" min="0" max="9" value="0" onchange="setDivs('under_determined_system')" oninput="setDivs('under_determined_system')">
+<button onclick="plusDivs(-1, 'under_determined_system')">❮</button>
+<button onclick="plusDivs(1, 'under_determined_system')">❯</button></p>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system000.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system000.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system001.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system001.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system002.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system002.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system003.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system003.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system004.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system004.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system005.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system005.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system006.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system006.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system007.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system007.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system008.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system008.svg" width="40%" style=" ">
 </object>
 </div>
 <div class="under_determined_system" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/under_determined_system009.svg" width="40%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/under_determined_system009.svg" width="40%" style=" ">
 </object>
 </div>
 </section>
 <section id="two-dimensional-gaussian" class="slide level2">
 <h2>Two Dimensional Gaussian</h2>
 <ul>
-<li>Consider height, <span class="math inline">\(h/m\)</span> and weight, <span class="math inline">\(w/kg\)</span>.</li>
-<li>Could sample height from a distribution: <span class="math display">\[
+<li>Consider height, <span class="math inline">\(h/m\)</span> and
+weight, <span class="math inline">\(w/kg\)</span>.</li>
+<li>Could sample height from a distribution: <span
+class="math display">\[
 p(h) \sim \mathcal{N}\left(1.7,0.0225\right).
 \]</span></li>
 <li>And similarly weight: <span class="math display">\[
@@ -651,7 +842,7 @@ <h2>Two Dimensional Gaussian</h2>
 <h2>Height and Weight Models</h2>
 <div class="figure">
 <div id="height-weight-gaussian-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/ml/height_weight_gaussian.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/height_weight_gaussian.svg" width="70%" style=" ">
 </object>
 </div>
 </div>
@@ -673,45 +864,50 @@ <h2>Sampling Two Dimensional Variables</h2>
 <script>
 showDivs(0, 'independent_height_weight');
 </script>
-<p><small></small> <input id="range-independent_height_weight" type="range" min="0" max="7" value="0" onchange="setDivs('independent_height_weight')" oninput="setDivs('independent_height_weight')"> <button onclick="plusDivs(-1, 'independent_height_weight')">❮</button> <button onclick="plusDivs(1, 'independent_height_weight')">❯</button></p>
+<p><small></small>
+<input id="range-independent_height_weight" type="range" min="0" max="7" value="0" onchange="setDivs('independent_height_weight')" oninput="setDivs('independent_height_weight')">
+<button onclick="plusDivs(-1, 'independent_height_weight')">❮</button>
+<button onclick="plusDivs(1, 'independent_height_weight')">❯</button></p>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight000.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight000.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight001.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight001.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight002.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight002.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight003.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight003.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight004.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight004.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight005.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight005.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight006.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight006.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="independent_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/independent_height_weight007.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/independent_height_weight007.svg" width="70%" style=" ">
 </object>
 </div>
 </section>
 <section id="body-mass-index" class="slide level2">
 <h2>Body Mass Index</h2>
 <ul>
-<li>In reality they are dependent (body mass index) <span class="math inline">\(= \frac{w}{h^2}\)</span>.</li>
-<li>To deal with this dependence we introduce <em>correlated</em> multivariate Gaussians.</li>
+<li>In reality they are dependent (body mass index) <span
+class="math inline">\(= \frac{w}{h^2}\)</span>.</li>
+<li>To deal with this dependence we introduce <em>correlated</em>
+multivariate Gaussians.</li>
 </ul>
 </section>
 <section id="sampling-two-dimensional-variables-1" class="slide level2">
@@ -719,37 +915,40 @@ <h2>Sampling Two Dimensional Variables</h2>
 <script>
 showDivs(0, 'correlated_height_weight');
 </script>
-<p><small></small> <input id="range-correlated_height_weight" type="range" min="0" max="7" value="0" onchange="setDivs('correlated_height_weight')" oninput="setDivs('correlated_height_weight')"> <button onclick="plusDivs(-1, 'correlated_height_weight')">❮</button> <button onclick="plusDivs(1, 'correlated_height_weight')">❯</button></p>
+<p><small></small>
+<input id="range-correlated_height_weight" type="range" min="0" max="7" value="0" onchange="setDivs('correlated_height_weight')" oninput="setDivs('correlated_height_weight')">
+<button onclick="plusDivs(-1, 'correlated_height_weight')">❮</button>
+<button onclick="plusDivs(1, 'correlated_height_weight')">❯</button></p>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight000.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight000.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight001.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight001.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight002.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight002.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight003.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight003.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight004.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight004.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight005.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight005.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight006.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight006.svg" width="70%" style=" ">
 </object>
 </div>
 <div class="correlated_height_weight" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/correlated_height_weight007.svg" width="70%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/correlated_height_weight007.svg" width="70%" style=" ">
 </object>
 </div>
 </section>
@@ -762,49 +961,74 @@ <h2>Independent Gaussians</h2>
 <section id="independent-gaussians-1" class="slide level2">
 <h2>Independent Gaussians</h2>
 <p><span class="math display">\[
-p(w, h) = \frac{1}{\sqrt{2\pi \sigma_1^2}\sqrt{2\pi\sigma_2^2}} \exp\left(-\frac{1}{2}\left(\frac{(w-\mu_1)^2}{\sigma_1^2} + \frac{(h-\mu_2)^2}{\sigma_2^2}\right)\right)
+p(w, h) = \frac{1}{\sqrt{2\pi \sigma_1^2}\sqrt{2\pi\sigma_2^2}}
+\exp\left(-\frac{1}{2}\left(\frac{(w-\mu_1)^2}{\sigma_1^2} +
+\frac{(h-\mu_2)^2}{\sigma_2^2}\right)\right)
 \]</span></p>
 </section>
 <section id="independent-gaussians-2" class="slide level2">
 <h2>Independent Gaussians</h2>
 <p><small> <span class="math display">\[
-p(w, h) = \frac{1}{\sqrt{2\pi\sigma_1^22\pi\sigma_2^2}} \exp\left(-\frac{1}{2}\left(\begin{bmatrix}w \\ h\end{bmatrix} - \begin{bmatrix}\mu_1 \\ \mu_2\end{bmatrix}\right)^\top\begin{bmatrix}\sigma_1^2&amp; 0\\0&amp;\sigma_2^2\end{bmatrix}^{-1}\left(\begin{bmatrix}w \\ h\end{bmatrix} - \begin{bmatrix}\mu_1 \\ \mu_2\end{bmatrix}\right)\right)
+p(w, h) = \frac{1}{\sqrt{2\pi\sigma_1^22\pi\sigma_2^2}}
+\exp\left(-\frac{1}{2}\left(\begin{bmatrix}w \\ h\end{bmatrix} -
+\begin{bmatrix}\mu_1 \\
+\mu_2\end{bmatrix}\right)^\top\begin{bmatrix}\sigma_1^2&amp;
+0\\0&amp;\sigma_2^2\end{bmatrix}^{-1}\left(\begin{bmatrix}w \\
+h\end{bmatrix} - \begin{bmatrix}\mu_1 \\
+\mu_2\end{bmatrix}\right)\right)
 \]</span> </small></p>
 </section>
 <section id="independent-gaussians-3" class="slide level2">
 <h2>Independent Gaussians</h2>
 <p><span class="math display">\[
-p(\mathbf{ y}) = \frac{1}{\det{2\pi \mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{ \mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
+p(\mathbf{ y}) = \frac{1}{\det{2\pi \mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
 \]</span></p>
 </section>
 <section id="correlated-gaussian" class="slide level2">
 <h2>Correlated Gaussian</h2>
-<p>Form correlated from original by rotating the data space using matrix <span class="math inline">\(\mathbf{R}\)</span>.</p>
+<p>Form correlated from original by rotating the data space using matrix
+<span class="math inline">\(\mathbf{R}\)</span>.</p>
 <p><span class="math display">\[
-p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{ \mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{ y}- \boldsymbol{ \mu})\right)
 \]</span></p>
 </section>
 <section id="correlated-gaussian-1" class="slide level2">
 <h2>Correlated Gaussian</h2>
-<p>Form correlated from original by rotating the data space using matrix <span class="math inline">\(\mathbf{R}\)</span>.</p>
+<p>Form correlated from original by rotating the data space using matrix
+<span class="math inline">\(\mathbf{R}\)</span>.</p>
 <p><span class="math display">\[
-p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\mathbf{R}^\top\mathbf{ y}- \mathbf{R}^\top\boldsymbol{ \mu})^\top\mathbf{D}^{-1}(\mathbf{R}^\top\mathbf{ y}- \mathbf{R}^\top\boldsymbol{ \mu})\right)
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{R}^\top\mathbf{ y}-
+\mathbf{R}^\top\boldsymbol{
+\mu})^\top\mathbf{D}^{-1}(\mathbf{R}^\top\mathbf{ y}-
+\mathbf{R}^\top\boldsymbol{ \mu})\right)
 \]</span></p>
 </section>
 <section id="correlated-gaussian-2" class="slide level2">
 <h2>Correlated Gaussian</h2>
-<p>Form correlated from original by rotating the data space using matrix <span class="math inline">\(\mathbf{R}\)</span>.</p>
+<p>Form correlated from original by rotating the data space using matrix
+<span class="math inline">\(\mathbf{R}\)</span>.</p>
 <p><span class="math display">\[
-p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{ \mu})^\top\mathbf{R}\mathbf{D}^{-1}\mathbf{R}^\top(\mathbf{ y}- \boldsymbol{ \mu})\right)
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{D}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{R}\mathbf{D}^{-1}\mathbf{R}^\top(\mathbf{ y}-
+\boldsymbol{ \mu})\right)
 \]</span> this gives a covariance matrix: <span class="math display">\[
 \mathbf{C}^{-1} = \mathbf{R}\mathbf{D}^{-1} \mathbf{R}^\top
 \]</span></p>
 </section>
 <section id="correlated-gaussian-3" class="slide level2">
 <h2>Correlated Gaussian</h2>
-<p>Form correlated from original by rotating the data space using matrix <span class="math inline">\(\mathbf{R}\)</span>.</p>
+<p>Form correlated from original by rotating the data space using matrix
+<span class="math inline">\(\mathbf{R}\)</span>.</p>
 <p><span class="math display">\[
-p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{C}}^{\frac{1}{2}}} \exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{ \mu})^\top\mathbf{C}^{-1} (\mathbf{ y}- \boldsymbol{ \mu})\right)
+p(\mathbf{ y}) = \frac{1}{\det{2\pi\mathbf{C}}^{\frac{1}{2}}}
+\exp\left(-\frac{1}{2}(\mathbf{ y}- \boldsymbol{
+\mu})^\top\mathbf{C}^{-1} (\mathbf{ y}- \boldsymbol{ \mu})\right)
 \]</span> this gives a covariance matrix: <span class="math display">\[
 \mathbf{C}= \mathbf{R}\mathbf{D} \mathbf{R}^\top
 \]</span></p>
@@ -815,7 +1039,8 @@ <h2>Basis Functions</h2>
 <section id="quadratic-basis" class="slide level2">
 <h2>Quadratic Basis</h2>
 <ul>
-<li>Basis functions can be global. E.g. quadratic basis: <span class="math display">\[
+<li>Basis functions can be global. E.g. quadratic basis: <span
+class="math display">\[
 \boldsymbol{ \phi}= [1, x, x^2]
 \]</span></li>
 </ul>
@@ -833,7 +1058,7 @@ <h2>Quadratic Basis</h2>
 <section id="matrix-valued-function" class="slide level2">
 <h2>Matrix Valued Function</h2>
 <p><span class="math display">\[
-\boldsymbol{ \Phi}(\mathbf{ x}) = 
+\boldsymbol{ \Phi}(\mathbf{ x}) =
 \begin{bmatrix} 1 &amp; x_1 &amp;
 x_1^2 \\
 1 &amp; x_2 &amp; x_2^2\\
@@ -842,47 +1067,56 @@ <h2>Matrix Valued Function</h2>
 \end{bmatrix}
 \]</span></p>
 </section>
-<section id="functions-derived-from-quadratic-basis" class="slide level2">
+<section id="functions-derived-from-quadratic-basis"
+class="slide level2">
 <h2>Functions Derived from Quadratic Basis</h2>
 <p><span class="math display">\[
-f(x) = {\color{cyan}{w_0}}   + {\color{green}{w_1 x}} + {\color{yellow}{w_2 x^2}}
+f(x) = {\color{cyan}{w_0}}   + {\color{green}{w_1 x}} +
+{\color{yellow}{w_2 x^2}}
 \]</span></p>
 <script>
 showDivs(0, 'quadratic_basis');
 </script>
-<p><small></small> <input id="range-quadratic_basis" type="range" min="0" max="2" value="0" onchange="setDivs('quadratic_basis')" oninput="setDivs('quadratic_basis')"> <button onclick="plusDivs(-1, 'quadratic_basis')">❮</button> <button onclick="plusDivs(1, 'quadratic_basis')">❯</button></p>
+<p><small></small>
+<input id="range-quadratic_basis" type="range" min="0" max="2" value="0" onchange="setDivs('quadratic_basis')" oninput="setDivs('quadratic_basis')">
+<button onclick="plusDivs(-1, 'quadratic_basis')">❮</button>
+<button onclick="plusDivs(1, 'quadratic_basis')">❯</button></p>
 <div class="quadratic_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_basis000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_basis000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="quadratic_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_basis001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_basis001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="quadratic_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_basis002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_basis002.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="quadratic-functions" class="slide level2">
 <h2>Quadratic Functions</h2>
 <p><span class="math display">\[
-f(x) = {\color{cyan}{w_0}}   + {\color{green}{w_1 x}} + {\color{yellow}{w_2 x^2}}
+f(x) = {\color{cyan}{w_0}}   + {\color{green}{w_1 x}} +
+{\color{yellow}{w_2 x^2}}
 \]</span></p>
 <script>
 showDivs(0, 'quadratic_function');
 </script>
-<p><small></small> <input id="range-quadratic_function" type="range" min="0" max="2" value="0" onchange="setDivs('quadratic_function')" oninput="setDivs('quadratic_function')"> <button onclick="plusDivs(-1, 'quadratic_function')">❮</button> <button onclick="plusDivs(1, 'quadratic_function')">❯</button></p>
+<p><small></small>
+<input id="range-quadratic_function" type="range" min="0" max="2" value="0" onchange="setDivs('quadratic_function')" oninput="setDivs('quadratic_function')">
+<button onclick="plusDivs(-1, 'quadratic_function')">❮</button>
+<button onclick="plusDivs(1, 'quadratic_function')">❯</button></p>
 <div class="quadratic_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_function000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_function000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="quadratic_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_function001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_function001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="quadratic_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/quadratic_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/quadratic_function002.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -897,47 +1131,55 @@ <h2>Rectified Linear Units</h2>
 <script>
 showDivs(0, 'relu_basis');
 </script>
-<p><small></small> <input id="range-relu_basis" type="range" min="0" max="4" value="0" onchange="setDivs('relu_basis')" oninput="setDivs('relu_basis')"> <button onclick="plusDivs(-1, 'relu_basis')">❮</button> <button onclick="plusDivs(1, 'relu_basis')">❯</button></p>
+<p><small></small>
+<input id="range-relu_basis" type="range" min="0" max="4" value="0" onchange="setDivs('relu_basis')" oninput="setDivs('relu_basis')">
+<button onclick="plusDivs(-1, 'relu_basis')">❮</button>
+<button onclick="plusDivs(1, 'relu_basis')">❯</button></p>
 <div class="relu_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis002.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis003.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis003.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_basis004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_basis004.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="functions-derived-from-relu-basis" class="slide level2">
 <h2>Functions Derived from Relu Basis</h2>
 <p><span class="math display">\[
-f(x) = \color{cyan}{w_0}   + \color{green}{w_1 xH(x+1.0) } + \color{yellow}{w_2 xH(x+0.33) } + \color{magenta}{w_3 xH(x-0.33)} +  \color{red}{w_4 xH(x-1.0)}
+f(x) = \color{cyan}{w_0}   + \color{green}{w_1 xH(x+1.0) } +
+\color{yellow}{w_2 xH(x+0.33) } + \color{magenta}{w_3 xH(x-0.33)}
++  \color{red}{w_4 xH(x-1.0)}
 \]</span></p>
 <script>
 showDivs(0, 'relu_function');
 </script>
-<p><small></small> <input id="range-relu_function" type="range" min="0" max="4" value="0" onchange="setDivs('relu_function')" oninput="setDivs('relu_function')"> <button onclick="plusDivs(-1, 'relu_function')">❮</button> <button onclick="plusDivs(1, 'relu_function')">❯</button></p>
+<p><small></small>
+<input id="range-relu_function" type="range" min="0" max="4" value="0" onchange="setDivs('relu_function')" oninput="setDivs('relu_function')">
+<button onclick="plusDivs(-1, 'relu_function')">❮</button>
+<button onclick="plusDivs(1, 'relu_function')">❯</button></p>
 <div class="relu_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_function000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_function000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_function001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_function001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="relu_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/relu_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/relu_function002.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -946,15 +1188,18 @@ <h2>Gaussian Processes</h2>
 <ul>
 <li>Basis function models give non-linear predictions.</li>
 <li>Need to choose number and location of basis functions.</li>
-<li>Gaussian processes is a general framework (basis functions special case)</li>
-<li>Within the framework you can consider models with infinite basis functions.</li>
+<li>Gaussian processes is a general framework (basis functions special
+case)</li>
+<li>Within the framework you can consider models with infinite basis
+functions.</li>
 </ul>
 <p><span class="math display">\[
 p(\mathbf{ y}|\mathbf{X}, \mathbf{ w}) =
 \prod_{i=1}^{n} p(y_i | \mathbf{ x}_i, \mathbf{ w})
 \]</span></p>
 <p><span class="math display">\[
-\mathbf{ y}|\mathbf{X}\sim \mathcal{N}\left(\mathbf{m}(\mathbf{X}),\mathbf{K}(\mathbf{X})\right),
+\mathbf{ y}|\mathbf{X}\sim
+\mathcal{N}\left(\mathbf{m}(\mathbf{X}),\mathbf{K}(\mathbf{X})\right),
 \]</span></p>
 </section>
 <section id="linear-model-overview" class="slide level2">
@@ -965,7 +1210,7 @@ <h2>Linear Model Overview</h2>
 <p><span class="math display">\[
 \phi_{i,j} = \phi(\mathbf{ w}^{(1)}_{j}, \mathbf{ x}_{i})
 \]</span> Define <em>design matrix</em> <span class="math display">\[
-\boldsymbol{ \Phi}= 
+\boldsymbol{ \Phi}=
 \begin{bmatrix}
 \phi_{1, 1} &amp; \phi_{1, 2} &amp; \dots &amp; \phi_{1, h} \\
 \phi_{1, 2} &amp; \phi_{1, 2} &amp; \dots &amp; \phi_{1, n} \\
@@ -974,14 +1219,18 @@ <h2>Linear Model Overview</h2>
 \end{bmatrix}.
 \]</span></p>
 </section>
-<section id="matrix-representation-of-a-neural-network" class="slide level2">
+<section id="matrix-representation-of-a-neural-network"
+class="slide level2">
 <h2>Matrix Representation of a Neural Network</h2>
-<p><span class="math display">\[y\left(\mathbf{ x}\right) = \boldsymbol{ \phi}\left(\mathbf{ x}\right)^\top \mathbf{ w}+ \epsilon\]</span></p>
+<p><span class="math display">\[y\left(\mathbf{ x}\right) = \boldsymbol{
+\phi}\left(\mathbf{ x}\right)^\top \mathbf{ w}+ \epsilon\]</span></p>
 <div class="fragment">
-<p><span class="math display">\[\mathbf{ y}= \boldsymbol{ \Phi}\mathbf{ w}+ \boldsymbol{ \epsilon}\]</span></p>
+<p><span class="math display">\[\mathbf{ y}= \boldsymbol{ \Phi}\mathbf{
+w}+ \boldsymbol{ \epsilon}\]</span></p>
 </div>
 <div class="fragment">
-<p><span class="math display">\[\boldsymbol{ \epsilon}\sim \mathcal{N}\left(\mathbf{0},\sigma^2\mathbf{I}\right)\]</span></p>
+<p><span class="math display">\[\boldsymbol{ \epsilon}\sim
+\mathcal{N}\left(\mathbf{0},\sigma^2\mathbf{I}\right)\]</span></p>
 </div>
 </section>
 <section id="multivariate-gaussian-properties-1" class="slide level2">
@@ -992,13 +1241,19 @@ <h2>Multivariate Gaussian Properties</h2>
 \]</span></p></li>
 <li><p>Assume <span class="math display">\[
 \begin{align}
-\mathbf{ x}&amp; \sim \mathcal{N}\left(\boldsymbol{ \mu},\mathbf{C}\right)\\
-\boldsymbol{ \epsilon}&amp; \sim \mathcal{N}\left(\mathbf{0},\boldsymbol{ \Sigma}\right)
+\mathbf{ x}&amp; \sim \mathcal{N}\left(\boldsymbol{
+\mu},\mathbf{C}\right)\\
+\boldsymbol{ \epsilon}&amp; \sim
+\mathcal{N}\left(\mathbf{0},\boldsymbol{ \Sigma}\right)
 \end{align}
 \]</span></p></li>
 <li><p>Then <span class="math display">\[
-\mathbf{ y}\sim \mathcal{N}\left(\mathbf{W}\boldsymbol{ \mu},\mathbf{W}\mathbf{C}\mathbf{W}^\top + \boldsymbol{ \Sigma}\right).
-\]</span> If <span class="math inline">\(\boldsymbol{ \Sigma}=\sigma^2\mathbf{I}\)</span>, this is Probabilistic PCA <span class="citation" data-cites="Tipping:probpca99">(Tipping and Bishop, 1999)</span>.</p></li>
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{W}\boldsymbol{
+\mu},\mathbf{W}\mathbf{C}\mathbf{W}^\top + \boldsymbol{ \Sigma}\right).
+\]</span> If <span class="math inline">\(\boldsymbol{
+\Sigma}=\sigma^2\mathbf{I}\)</span>, this is Probabilistic PCA <span
+class="citation" data-cites="Tipping:probpca99">(Tipping and Bishop,
+1999)</span>.</p></li>
 </ul>
 </section>
 <section id="prior-density" class="slide level2">
@@ -1007,91 +1262,116 @@ <h2>Prior Density</h2>
 <li>Define <span class="math display">\[
 \mathbf{ w}\sim \mathcal{N}\left(\mathbf{0},\alpha\mathbf{I}\right),
 \]</span></li>
-<li>Rules of multivariate Gaussians to see that, <span class="math display">\[
-\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}\right).
+<li>Rules of multivariate Gaussians to see that, <span
+class="math display">\[
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}\right).
 \]</span></li>
 </ul>
 <p><span class="math display">\[
-\mathbf{K}= \alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}.
+\mathbf{K}= \alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2
+\mathbf{I}.
 \]</span></p>
 </section>
 <section id="joint-gaussian-density" class="slide level2">
 <h2>Joint Gaussian Density</h2>
 <ul>
-<li>Elements are a function <span class="math inline">\(k_{i,j} = k\left(\mathbf{ x}_i, \mathbf{ x}_j\right)\)</span></li>
+<li>Elements are a function <span class="math inline">\(k_{i,j} =
+k\left(\mathbf{ x}_i, \mathbf{ x}_j\right)\)</span></li>
 </ul>
 <p><span class="math display">\[
-\mathbf{K}= \alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}.
+\mathbf{K}= \alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2
+\mathbf{I}.
 \]</span></p>
 </section>
 <section id="covariance-function" class="slide level2">
 <h2>Covariance Function</h2>
 <p><span class="math display">\[
-k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top \boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)
+k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)
 \]</span></p>
 <ul>
-<li>formed by inner products of the rows of the <em>design matrix</em>.</li>
+<li>formed by inner products of the rows of the <em>design
+matrix</em>.</li>
 </ul>
 </section>
 <section id="gaussian-process" class="slide level2">
 <h2>Gaussian Process</h2>
 <ul>
-<li><p>Instead of making assumptions about our density over each data point, <span class="math inline">\(y_i\)</span> as i.i.d.</p></li>
+<li><p>Instead of making assumptions about our density over each data
+point, <span class="math inline">\(y_i\)</span> as i.i.d.</p></li>
 <li><p>make a joint Gaussian assumption over our data.</p></li>
-<li><p>covariance matrix is now a function of both the parameters of the activation function, <span class="math inline">\(\mathbf{W}_1\)</span>, and the input variables, <span class="math inline">\(\mathbf{X}\)</span>.</p></li>
-<li><p>Arises from integrating out <span class="math inline">\(\mathbf{ w}^{(2)}\)</span>.</p></li>
+<li><p>covariance matrix is now a function of both the parameters of the
+activation function, <span class="math inline">\(\mathbf{W}_1\)</span>,
+and the input variables, <span
+class="math inline">\(\mathbf{X}\)</span>.</p></li>
+<li><p>Arises from integrating out <span class="math inline">\(\mathbf{
+w}^{(2)}\)</span>.</p></li>
 </ul>
 </section>
 <section id="basis-functions-1" class="slide level2">
 <h2>Basis Functions</h2>
 <ul>
-<li>Can be very complex, such as deep kernels, <span class="citation" data-cites="Cho:deep09">(Cho and Saul, 2009)</span> or could even put a convolutional neural network inside.</li>
-<li>Viewing a neural network in this way is also what allows us to beform sensible <em>batch</em> normalizations <span class="citation" data-cites="Ioffe:batch15">(Ioffe and Szegedy, 2015)</span>.</li>
+<li>Can be very complex, such as deep kernels, <span class="citation"
+data-cites="Cho:deep09">(Cho and Saul, 2009)</span> or could even put a
+convolutional neural network inside.</li>
+<li>Viewing a neural network in this way is also what allows us to
+beform sensible <em>batch</em> normalizations <span class="citation"
+data-cites="Ioffe:batch15">(Ioffe and Szegedy, 2015)</span>.</li>
 </ul>
 </section>
 <section id="radial-basis-functions" class="slide level2">
 <h2>Radial Basis Functions</h2>
 <ul>
-<li>Basis functions can be local e.g. radial (or Gaussian) basis <span class="math display">\[
+<li>Basis functions can be local e.g. radial (or Gaussian) basis <span
+class="math display">\[
 \phi_j(x) = \exp\left(-\frac{(x-\mu_j)^2}{\ell^2}\right)
 \]</span></li>
 </ul>
 <script>
 showDivs(0, 'radial_basis');
 </script>
-<p><small></small> <input id="range-radial_basis" type="range" min="0" max="2" value="0" onchange="setDivs('radial_basis')" oninput="setDivs('radial_basis')"> <button onclick="plusDivs(-1, 'radial_basis')">❮</button> <button onclick="plusDivs(1, 'radial_basis')">❯</button></p>
+<p><small></small>
+<input id="range-radial_basis" type="range" min="0" max="2" value="0" onchange="setDivs('radial_basis')" oninput="setDivs('radial_basis')">
+<button onclick="plusDivs(-1, 'radial_basis')">❮</button>
+<button onclick="plusDivs(1, 'radial_basis')">❯</button></p>
 <div class="radial_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_basis000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_basis000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="radial_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_basis001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_basis001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="radial_basis" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_basis002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_basis002.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="functions-derived-from-radial-basis" class="slide level2">
 <h2>Functions Derived from Radial Basis</h2>
 <p><span class="math display">\[
-f(x) = \color{cyan}{w_1 e^{-2(x+1)^2}}  + \color{green}{w_2e^{-2x^2}} + \color{yellow}{w_3 e^{-2(x-1)^2}}
+f(x) = \color{cyan}{w_1 e^{-2(x+1)^2}}  + \color{green}{w_2e^{-2x^2}} +
+\color{yellow}{w_3 e^{-2(x-1)^2}}
 \]</span></p>
 <script>
 showDivs(0, 'radial_function');
 </script>
-<p><small></small> <input id="range-radial_function" type="range" min="0" max="2" value="0" onchange="setDivs('radial_function')" oninput="setDivs('radial_function')"> <button onclick="plusDivs(-1, 'radial_function')">❮</button> <button onclick="plusDivs(1, 'radial_function')">❯</button></p>
+<p><small></small>
+<input id="range-radial_function" type="range" min="0" max="2" value="0" onchange="setDivs('radial_function')" oninput="setDivs('radial_function')">
+<button onclick="plusDivs(-1, 'radial_function')">❮</button>
+<button onclick="plusDivs(1, 'radial_function')">❯</button></p>
 <div class="radial_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_function000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_function000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="radial_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_function001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_function001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="radial_function" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/ml/radial_function002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//ml/radial_function002.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
@@ -1116,7 +1396,8 @@ <h2>Function Space View</h2>
 \mathbf{ w}\sim \mathcal{N}\left(\mathbf{0},\alpha \mathbf{I}\right)
 \]</span></p>
 <p><span class="math display">\[
-\boldsymbol{ \Phi}= \begin{bmatrix}\boldsymbol{ \phi}(\mathbf{ x}_1) \\ \vdots \\
+\boldsymbol{ \Phi}= \begin{bmatrix}\boldsymbol{ \phi}(\mathbf{ x}_1) \\
+\vdots \\
 \boldsymbol{ \phi}(\mathbf{ x}_n)\end{bmatrix}
 \]</span></p>
 <p><span class="math display">\[
@@ -1127,44 +1408,54 @@ <h2>Function Space View</h2>
 \mathbf{ f}= \boldsymbol{ \Phi}\mathbf{ w}.
 \]</span></p>
 </section>
-<section id="section-10" class="slide level2">
+<section id="section-9" class="slide level2">
 <h2></h2>
 <p><span class="math display">\[
-\mathbf{ f}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top\right).
+\mathbf{ f}\sim \mathcal{N}\left(\mathbf{0},\alpha \boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top\right).
 \]</span></p>
 </section>
-<section id="section-11" class="slide level2">
+<section id="section-10" class="slide level2">
 <h2></h2>
 <p><span class="math display">\[
 \mathbf{K}= \alpha
 \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top.
 \]</span></p>
-<div class="sourceCode" id="cb1"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span></code></pre></div>
+<div class="sourceCode" id="cb1"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span></code></pre></div>
 </section>
-<section id="section-12" class="slide level2">
+<section id="section-11" class="slide level2">
 <h2></h2>
-<div class="sourceCode" id="cb2"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span>
-<span id="cb2-2"><a href="#cb2-2"></a>f_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span></code></pre></div>
+<div class="sourceCode" id="cb2"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>K <span class="op">=</span> alpha<span class="op">*</span>Phi_pred<span class="op">@</span>Phi_pred.T</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>f_sample <span class="op">=</span> np.random.multivariate_normal(mean<span class="op">=</span>np.zeros(x_pred.size), cov<span class="op">=</span>K)</span></code></pre></div>
 <div class="figure">
 <div id="gp-sample-basis-function-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Samples directly from the covariance function implied by the basis function based covariance, <span class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top\)</span>.
+Samples directly from the covariance function implied by the basis
+function based covariance, <span class="math inline">\(\alpha
+\boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top\)</span>.
 </aside>
 </section>
+<section id="section-12" class="slide level2">
+<h2></h2>
+</section>
 <section id="section-13" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="basis-covariance-function-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/basis-covariance-function.svg" width="60%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/basis-covariance-function.svg" width="60%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Covariance of the function implied by the basis set <span class="math inline">\(\alpha\boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top\)</span>.
+Covariance of the function implied by the basis set <span
+class="math inline">\(\alpha\boldsymbol{ \Phi}\boldsymbol{
+\Phi}^\top\)</span>.
 </aside>
 </section>
 <section id="section-14" class="slide level2">
@@ -1176,37 +1467,49 @@ <h2></h2>
 \epsilon \sim \mathcal{N}\left(\mathbf{0},\sigma^2\mathbf{I}\right).
 \]</span></p>
 <p><span class="math display">\[
-\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top +\sigma^2\mathbf{I}\right).
+\mathbf{ y}\sim \mathcal{N}\left(\mathbf{0},\boldsymbol{
+\Phi}\boldsymbol{ \Phi}^\top +\sigma^2\mathbf{I}\right).
 \]</span></p>
 </section>
 <section id="section-15" class="slide level2">
 <h2></h2>
 <div class="figure">
-<div id="gp-sample-basis-functions-plus-noise-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function-plus-noise.svg" width="80%" style=" ">
+<div id="gp-sample-basis-functions-plus-noise-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-noise.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Samples directly from the covariance function implied by the noise corrupted basis function based covariance, <span class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \sigma^2 \mathbf{I}\)</span>.
+Samples directly from the covariance function implied by the noise
+corrupted basis function based covariance, <span
+class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top +
+\sigma^2 \mathbf{I}\)</span>.
 </aside>
 <div class="figure">
-<div id="gp-sample-basis-functions-plus-large-noise-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gp-sample-basis-function-plus-large-noise.svg" width="80%" style=" ">
+<div id="gp-sample-basis-functions-plus-large-noise-figure"
+class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gp-sample-basis-function-plus-large-noise.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-Samples directly from the covariance function implied by the noise corrupted basis function based covariance, <span class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top + \mathbf{I}\)</span>.
+Samples directly from the covariance function implied by the noise
+corrupted basis function based covariance, <span
+class="math inline">\(\alpha \boldsymbol{ \Phi}\boldsymbol{ \Phi}^\top +
+\mathbf{I}\)</span>.
 </aside>
 </section>
 <section id="non-degenerate-gaussian-processes" class="slide level2">
 <h2>Non-degenerate Gaussian Processes</h2>
 <ul>
 <li>This process is <em>degenerate</em>.</li>
-<li>Covariance function is of rank at most <span class="math inline">\(h\)</span>.</li>
-<li>As <span class="math inline">\(n\rightarrow \infty\)</span>, covariance matrix is not full rank.</li>
-<li>Leading to <span class="math inline">\(\det{\mathbf{K}} = 0\)</span></li>
+<li>Covariance function is of rank at most <span
+class="math inline">\(h\)</span>.</li>
+<li>As <span class="math inline">\(n\rightarrow \infty\)</span>,
+covariance matrix is not full rank.</li>
+<li>Leading to <span class="math inline">\(\det{\mathbf{K}} =
+0\)</span></li>
 </ul>
 </section>
 <section id="infinite-networks" class="slide level2">
@@ -1223,21 +1526,26 @@ <h2>Infinite Networks</h2>
 <title>
 Radford Neal
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/radford-neal.jpg" clip-path="url(#clip3)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/radford-neal.jpg" clip-path="url(#clip3)"/>
 </svg>
 </div>
 <ul>
-<li>In ML Radford Neal <span class="citation" data-cites="Neal:bayesian94">(Neal, 1994)</span> asked “what would happen if you took <span class="math inline">\(h\rightarrow \infty\)</span>?”</li>
+<li>In ML Radford Neal <span class="citation"
+data-cites="Neal:bayesian94">(Neal, 1994)</span> asked “what would
+happen if you took <span class="math inline">\(h\rightarrow
+\infty\)</span>?”</li>
 </ul>
 <div class="figure">
 <div id="neal-infinite-priors-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/neal-infinite-priors.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//neal-infinite-priors.png" width="80%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-Page 37 of <a href="http://www.cs.toronto.edu/~radford/ftp/thesis.pdf">Radford Neal’s 1994 thesis</a>
+Page 37 of <a
+href="http://www.cs.toronto.edu/~radford/ftp/thesis.pdf">Radford Neal’s
+1994 thesis</a>
 </aside>
 </section>
 <section id="roughly-speaking" class="slide level2">
@@ -1245,91 +1553,109 @@ <h2>Roughly Speaking</h2>
 <ul>
 <li>Instead of <span class="math display">\[
 \begin{align*}
-k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) &amp; = \alpha \boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top \boldsymbol{ \phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)\\
-&amp; = \alpha \sum_k \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{ x}_i\right) \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{ x}_j\right)
+k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) &amp; = \alpha \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_i\right)^\top \boldsymbol{
+\phi}\left(\mathbf{W}_1, \mathbf{ x}_j\right)\\
+&amp; = \alpha \sum_k \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{
+x}_i\right) \phi\left(\mathbf{ w}^{(1)}_k, \mathbf{ x}_j\right)
 \end{align*}
 \]</span></li>
-<li>Sample infinitely many from a prior density, <span class="math inline">\(p(\mathbf{ w}^{(1)})\)</span>, <span class="math display">\[
-k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \int \phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_i\right) \phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_j\right) p(\mathbf{ w}^{(1)}) \text{d}\mathbf{ w}^{(1)}
+<li>Sample infinitely many from a prior density, <span
+class="math inline">\(p(\mathbf{ w}^{(1)})\)</span>, <span
+class="math display">\[
+k_f\left(\mathbf{ x}_i, \mathbf{ x}_j\right) = \alpha \int
+\phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_i\right) \phi\left(\mathbf{
+w}^{(1)}, \mathbf{ x}_j\right) p(\mathbf{ w}^{(1)}) \text{d}\mathbf{
+w}^{(1)}
 \]</span></li>
-<li>Also applies for non-Gaussian <span class="math inline">\(p(\mathbf{ w}^{(1)})\)</span> because of the <em>central limit theorem</em>.</li>
+<li>Also applies for non-Gaussian <span class="math inline">\(p(\mathbf{
+w}^{(1)})\)</span> because of the <em>central limit theorem</em>.</li>
 </ul>
 </section>
 <section id="simple-probabilistic-program" class="slide level2">
 <h2>Simple Probabilistic Program</h2>
 <ul>
 <li><p>If <span class="math display">\[
-\begin{align*} 
-\mathbf{ w}^{(1)} &amp; \sim p(\cdot)\\ \phi_i &amp; = \phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_i\right), 
+\begin{align*}
+\mathbf{ w}^{(1)} &amp; \sim p(\cdot)\\ \phi_i &amp; =
+\phi\left(\mathbf{ w}^{(1)}, \mathbf{ x}_i\right),
 \end{align*}
 \]</span> has finite variance.</p></li>
-<li><p>Then taking number of hidden units to infinity, is also a Gaussian process.</p></li>
+<li><p>Then taking number of hidden units to infinity, is also a
+Gaussian process.</p></li>
 </ul>
 </section>
 <section id="further-reading" class="slide level2">
 <h2>Further Reading</h2>
 <ul>
-<li><p>Chapter 2 of Neal’s thesis <span class="citation" data-cites="Neal:bayesian94">(Neal, 1994)</span></p></li>
-<li><p>Rest of Neal’s thesis. <span class="citation" data-cites="Neal:bayesian94">(Neal, 1994)</span></p></li>
-<li><p>David MacKay’s PhD thesis <span class="citation" data-cites="MacKay:bayesian92">(MacKay, 1992)</span></p></li>
+<li><p>Chapter 2 of Neal’s thesis <span class="citation"
+data-cites="Neal:bayesian94">(Neal, 1994)</span></p></li>
+<li><p>Rest of Neal’s thesis. <span class="citation"
+data-cites="Neal:bayesian94">(Neal, 1994)</span></p></li>
+<li><p>David MacKay’s PhD thesis <span class="citation"
+data-cites="MacKay:bayesian92">(MacKay, 1992)</span></p></li>
 </ul>
 </section>
 <section id="gaussian-process-1" class="slide level2">
 <h2>Gaussian Process</h2>
 <p><span class="math display">\[
-k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left( -\frac{\left\Vert \mathbf{ x}-\mathbf{ x}^\prime\right\Vert^2}{2\ell^2}\right),
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left( -\frac{\left\Vert
+\mathbf{ x}-\mathbf{ x}^\prime\right\Vert^2}{2\ell^2}\right),
 \]</span></p>
 <p><span class="math display">\[
-\left\Vert\mathbf{ x}- \mathbf{ x}^\prime\right\Vert^2 = (\mathbf{ x}- \mathbf{ x}^\prime)^\top (\mathbf{ x}- \mathbf{ x}^\prime) 
+\left\Vert\mathbf{ x}- \mathbf{ x}^\prime\right\Vert^2 = (\mathbf{ x}-
+\mathbf{ x}^\prime)^\top (\mathbf{ x}- \mathbf{ x}^\prime)
 \]</span></p>
 </section>
-<section id="exercise-1" class="slide level2">
-<h2>Exercise 1</h2>
-<p><strong>Moving Parameters</strong> Have a play with the parameters for this covariance function (the lengthscale and the variance) and see what effects the parameters have on the types of functions you observe.</p>
-</section>
 <section id="section-16" class="slide level2">
 <h2></h2>
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp_rejection_sample001.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample001.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <aside class="notes">
-Here we’re showing 20 samples taken from the prior over functions defined by our covarariance
+Here we’re showing 20 samples taken from the prior over functions
+defined by our covarariance
 </aside>
 </section>
 <section id="section-17" class="slide level2">
 <h2></h2>
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp_rejection_sample002.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample002.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <aside class="notes">
-We can sample many such functions, in this slide there are now 1000 in total. This is a sample from our prior over functions.
+We can sample many such functions, in this slide there are now 1000 in
+total. This is a sample from our prior over functions.
 </aside>
 </section>
 <section id="section-18" class="slide level2">
 <h2></h2>
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp_rejection_sample003.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample003.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <aside class="notes">
-Now we observe data. Here there are three data points. Conceptually in Bayesian inference we discard all samples that are distant from the data.
+Now we observe data. Here there are three data points. Conceptually in
+Bayesian inference we discard all samples that are distant from the
+data.
 </aside>
 </section>
 <section id="section-19" class="slide level2">
 <h2></h2>
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp_rejection_sample004.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample004.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <aside class="notes">
-Throwing away such samples we are left with our posterior. This is the collection of samples from the prior that are consistent with the data.
+Throwing away such samples we are left with our posterior. This is the
+collection of samples from the prior that are consistent with the data.
 </aside>
 </section>
 <section id="section-20" class="slide level2">
 <h2></h2>
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp_rejection_sample005.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp_rejection_sample005.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 <aside class="notes">
-The elegance of the Gaussian process is that this result can be computed analytically using linear algebra.
+The elegance of the Gaussian process is that this result can be computed
+analytically using linear algebra.
 </aside>
 </section>
 <section id="gaussian-process-2" class="slide level2">
@@ -1349,183 +1675,305 @@ <h2>Gaussian Process</h2>
 <section id="making-predictions" class="slide level2">
 <h2>Making Predictions</h2>
 <p><span class="math display">\[
-\begin{bmatrix}\mathbf{ f}\\ \mathbf{ f}^*\end{bmatrix} \sim \mathcal{N}\left(\mathbf{0},\begin{bmatrix} \mathbf{K}&amp; \mathbf{K}_\ast \\
+\begin{bmatrix}\mathbf{ f}\\ \mathbf{ f}^*\end{bmatrix} \sim
+\mathcal{N}\left(\mathbf{0},\begin{bmatrix} \mathbf{K}&amp;
+\mathbf{K}_\ast \\
 \mathbf{K}_\ast^\top &amp; \mathbf{K}_{\ast,\ast}\end{bmatrix}\right)
 \]</span></p>
 <p><span class="math display">\[
 \begin{bmatrix} \mathbf{K}&amp; \mathbf{K}_\ast \\ \mathbf{K}_\ast^\top
 &amp; \mathbf{K}_{\ast,\ast}\end{bmatrix}
 \]</span></p>
+<div class="figure">
+<div id="block-predictive-covariance-figure" class="figure-frame">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/block-predictive-covariance.svg" width="80%" style=" ">
+</object>
+</div>
+</div>
+<aside class="notes">
+Different blocks of the covariance function. The upper left block is the
+covariance of the training data with itself, <span
+class="math inline">\(\mathbf{K}\)</span>. The top right is the cross
+covariance between training data (rows) and prediction locations
+(columns). The lower left is the same matrix transposed. The bottom
+right is the covariance matrix of the test data with itself.
+</aside>
+</section>
+<section id="sampling-a-function" class="slide level2">
+<h2>Sampling a Function</h2>
+<p><strong>Multi-variate Gaussians</strong></p>
+<ul>
+<li>We will consider a Gaussian with a particular structure of
+covariance matrix.</li>
+<li>Generate a single sample from this 25 dimensional Gaussian density,
+<span class="math display">\[
+\mathbf{ f}=\left[f_{1},f_{2}\dots f_{25}\right].
+\]</span></li>
+<li>We will plot these points against their index.</li>
+</ul>
+</section>
+<section id="gaussian-distribution-sample" class="slide level2">
+<h2>Gaussian Distribution Sample</h2>
+<script>
+showDivs(0, 'two_point_sample');
+</script>
+<p><small></small>
+<input id="range-two_point_sample" type="range" min="0" max="8" value="0" onchange="setDivs('two_point_sample')" oninput="setDivs('two_point_sample')">
+<button onclick="plusDivs(-1, 'two_point_sample')">❮</button>
+<button onclick="plusDivs(1, 'two_point_sample')">❯</button></p>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample000.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample001.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample002.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample003.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample004.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample005.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample006.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample007.svg" width style=" ">
+</object>
+</div>
+<div class="two_point_sample" style="text-align:center;">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample008.svg" width style=" ">
+</object>
+</div>
 </section>
 <section id="sampling-a-function-from-a-gaussian" class="slide level2">
 <h2>Sampling a Function from a Gaussian</h2>
 <script>
 showDivs(9, 'two-point-sample');
 </script>
-<p><small></small> <input id="range-two-point-sample" type="range" min="9" max="12" value="9" onchange="setDivs('two-point-sample')" oninput="setDivs('two-point-sample')"> <button onclick="plusDivs(-1, 'two-point-sample')">❮</button> <button onclick="plusDivs(1, 'two-point-sample')">❯</button></p>
+<p><small></small>
+<input id="range-two-point-sample" type="range" min="9" max="12" value="9" onchange="setDivs('two-point-sample')" oninput="setDivs('two-point-sample')">
+<button onclick="plusDivs(-1, 'two-point-sample')">❮</button>
+<button onclick="plusDivs(1, 'two-point-sample')">❯</button></p>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample002.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample003.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample003.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample004.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample005.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample005.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample006.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample006.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample007.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample007.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two-point-sample" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample008.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample008.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="joint-density-of-f_1-and-f_2" class="slide level2">
-<h2>Joint Density of <span class="math inline">\(f_1\)</span> and <span class="math inline">\(f_2\)</span></h2>
+<h2>Joint Density of <span class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_2\)</span></h2>
 </section>
 <section id="prediction-of-f_2-from-f_1" class="slide level2">
-<h2>Prediction of <span class="math inline">\(f_{2}\)</span> from <span class="math inline">\(f_{1}\)</span></h2>
+<h2>Prediction of <span class="math inline">\(f_{2}\)</span> from <span
+class="math inline">\(f_{1}\)</span></h2>
 <script>
 showDivs(9, 'two_point_sample2');
 </script>
-<p><small></small> <input id="range-two_point_sample2" type="range" min="9" max="12" value="9" onchange="setDivs('two_point_sample2')" oninput="setDivs('two_point_sample2')"> <button onclick="plusDivs(-1, 'two_point_sample2')">❮</button> <button onclick="plusDivs(1, 'two_point_sample2')">❯</button></p>
+<p><small></small>
+<input id="range-two_point_sample2" type="range" min="9" max="12" value="9" onchange="setDivs('two_point_sample2')" oninput="setDivs('two_point_sample2')">
+<button onclick="plusDivs(-1, 'two_point_sample2')">❮</button>
+<button onclick="plusDivs(1, 'two_point_sample2')">❯</button></p>
 <div class="two_point_sample2" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample009.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample009.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample2" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample010.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample010.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample2" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample011.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample011.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample2" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample012.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample012.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
-<section id="prediction-of-f_2-from-f_1-1" class="slide level2">
-<h2>Prediction of <span class="math inline">\(f_{2}\)</span> from <span class="math inline">\(f_{1}\)</span></h2>
-<p><small> * The single contour of the Gaussian density represents the <font color="cyan">joint distribution, <span class="math inline">\(p(f_1, f_2)\)</span></font></p>
-<div class="fragment">
-<ul>
-<li>We observe that <font color="magenta"><span class="math inline">\(f_1=?\)</span></font></li>
-</ul>
+<section id="uluru" class="slide level2">
+<h2>Uluru</h2>
+<div class="figure">
+<div id="uluru-as-probability-figure" class="figure-frame">
+<div class="centered" style="">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/799px-Uluru_Panorama.jpg" width="" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+</div>
 </div>
-<div class="fragment">
-<ul>
-<li>Conditional density: <font color="cyan"><span class="math inline">\(p(f_2|f_1=?)\)</span></font> </small></li>
-</ul>
 </div>
+<aside class="notes">
+Uluru, the sacred rock in Australia. If we think of it as a probability
+density, viewing it from this side gives us one <em>marginal</em> from
+the density. Figuratively speaking, slicing through the rock would give
+a conditional density.
+</aside>
 </section>
 <section id="prediction-with-correlated-gaussians" class="slide level2">
 <h2>Prediction with Correlated Gaussians</h2>
 <ul>
-<li><p>Prediction of <span class="math inline">\(f_2\)</span> from <span class="math inline">\(f_1\)</span> requires <em>conditional density</em>.</p></li>
-<li><p>Conditional density is <em>also</em> Gaussian. <span class="math display">\[
-p(f_2|f_1) = {\mathcal{N}\left(f_2|\frac{k_{1, 2}}{k_{1, 1}}f_1,k_{2, 2} - \frac{k_{1,2}^2}{k_{1,1}}\right)}
-\]</span> where covariance of joint density is given by <span class="math display">\[
-\mathbf{K}= \begin{bmatrix} k_{1, 1} &amp; k_{1, 2}\\ k_{2, 1} &amp; k_{2, 2}\end{bmatrix}
-\]</span></p></li>
+<li>Prediction of <span class="math inline">\(f_2\)</span> from <span
+class="math inline">\(f_1\)</span> requires <em>conditional
+density</em>.</li>
+<li>Conditional density is <em>also</em> Gaussian. <span
+class="math display">\[
+p(f_2|f_1) = \mathcal{N}\left(f_2|\frac{k_{1, 2}}{k_{1, 1}}f_1, k_{2, 2}
+- \frac{k_{1,2}^2}{k_{1,1}}\right)
+\]</span> where covariance of joint density is given by <span
+class="math display">\[
+\mathbf{K}= \begin{bmatrix} k_{1, 1} &amp; k_{1, 2}\\ k_{2, 1} &amp;
+k_{2, 2}.\end{bmatrix}
+\]</span></li>
 </ul>
 </section>
 <section id="joint-density-of-f_1-and-f_8" class="slide level2">
-<h2>Joint Density of <span class="math inline">\(f_1\)</span> and <span class="math inline">\(f_8\)</span></h2>
+<h2>Joint Density of <span class="math inline">\(f_1\)</span> and <span
+class="math inline">\(f_8\)</span></h2>
 </section>
 <section id="prediction-of-f_8-from-f_1" class="slide level2">
-<h2>Prediction of <span class="math inline">\(f_{8}\)</span> from <span class="math inline">\(f_{1}\)</span></h2>
+<h2>Prediction of <span class="math inline">\(f_{8}\)</span> from <span
+class="math inline">\(f_{1}\)</span></h2>
 <script>
 showDivs(13, 'two_point_sample3');
 </script>
-<p><small></small> <input id="range-two_point_sample3" type="range" min="13" max="17" value="13" onchange="setDivs('two_point_sample3')" oninput="setDivs('two_point_sample3')"> <button onclick="plusDivs(-1, 'two_point_sample3')">❮</button> <button onclick="plusDivs(1, 'two_point_sample3')">❯</button></p>
+<p><small></small>
+<input id="range-two_point_sample3" type="range" min="13" max="17" value="13" onchange="setDivs('two_point_sample3')" oninput="setDivs('two_point_sample3')">
+<button onclick="plusDivs(-1, 'two_point_sample3')">❮</button>
+<button onclick="plusDivs(1, 'two_point_sample3')">❯</button></p>
 <div class="two_point_sample3" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample013.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample013.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample3" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample014.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample014.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample3" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample015.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample015.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample3" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample016.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample016.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="two_point_sample3" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/two_point_sample017.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/two_point_sample017.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
 <section id="details" class="slide level2">
 <h2>Details</h2>
 <ul>
-<li>The single contour of the Gaussian density represents the <font color="yellow">joint distribution, <span class="math inline">\(p(f_1, f_8)\)</span></font></li>
+<li>The single contour of the Gaussian density represents the
+<font color="yellow">joint distribution, <span
+class="math inline">\(p(f_1, f_8)\)</span></font></li>
 </ul>
 <div class="fragment">
 <ul>
-<li>We observe a value for <font color="magenta"><span class="math inline">\(f_1=-?\)</span></font></li>
+<li>We observe a value for <font color="magenta"><span
+class="math inline">\(f_1=-?\)</span></font></li>
 </ul>
 </div>
 <div class="fragment">
 <ul>
-<li>Conditional density: <font color="cyan"><span class="math inline">\(p(f_5|f_1=?)\)</span></font>.</li>
+<li>Conditional density: <font color="cyan"><span
+class="math inline">\(p(f_8|f_1=?)\)</span></font>.</li>
 </ul>
 </div>
 </section>
-<section id="prediction-with-correlated-gaussians-1" class="slide level2">
+<section id="prediction-with-correlated-gaussians-1"
+class="slide level2">
 <h2>Prediction with Correlated Gaussians</h2>
 <ul>
-<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span> from <span class="math inline">\(\mathbf{ f}\)</span> requires multivariate <em>conditional density</em>.</p></li>
-<li><p>Multivariate conditional density is <em>also</em> Gaussian. <large> <span class="math display">\[
-p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{ f}_*|\mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{ f},\mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}} \mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{ f},*}\right)}
+<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span>
+from <span class="math inline">\(\mathbf{ f}\)</span> requires
+multivariate <em>conditional density</em>.</p></li>
+<li><p>Multivariate conditional density is <em>also</em> Gaussian.
+<large> <span class="math display">\[
+p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{
+f}_*|\mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{ f},\mathbf{
+f}}^{-1}\mathbf{ f},\mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}}
+\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{
+f},*}\right)}
 \]</span> </large></p></li>
-<li><p>Here covariance of joint density is given by <span class="math display">\[
-\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp; \mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp; \mathbf{K}_{*, *}\end{bmatrix}
+<li><p>Here covariance of joint density is given by <span
+class="math display">\[
+\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp;
+\mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp;
+\mathbf{K}_{*, *}\end{bmatrix}
 \]</span></p></li>
 </ul>
 </section>
-<section id="prediction-with-correlated-gaussians-2" class="slide level2">
+<section id="prediction-with-correlated-gaussians-2"
+class="slide level2">
 <h2>Prediction with Correlated Gaussians</h2>
 <ul>
-<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span> from <span class="math inline">\(\mathbf{ f}\)</span> requires multivariate <em>conditional density</em>.</p></li>
-<li><p>Multivariate conditional density is <em>also</em> Gaussian. <large> <span class="math display">\[
-p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{ f}_*|\boldsymbol{ \mu},\boldsymbol{ \Sigma}\right)}
+<li><p>Prediction of <span class="math inline">\(\mathbf{ f}_*\)</span>
+from <span class="math inline">\(\mathbf{ f}\)</span> requires
+multivariate <em>conditional density</em>.</p></li>
+<li><p>Multivariate conditional density is <em>also</em> Gaussian.
+<large> <span class="math display">\[
+p(\mathbf{ f}_*|\mathbf{ f}) = {\mathcal{N}\left(\mathbf{
+f}_*|\boldsymbol{ \mu},\boldsymbol{ \Sigma}\right)}
 \]</span> <span class="math display">\[
-\boldsymbol{ \mu}= \mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{ f}
+\boldsymbol{ \mu}= \mathbf{K}_{*,\mathbf{ f}}\mathbf{K}_{\mathbf{
+f},\mathbf{ f}}^{-1}\mathbf{ f}
 \]</span> <span class="math display">\[
-\boldsymbol{ \Sigma}= \mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}} \mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{ f},*}
+\boldsymbol{ \Sigma}= \mathbf{K}_{*,*}-\mathbf{K}_{*,\mathbf{ f}}
+\mathbf{K}_{\mathbf{ f},\mathbf{ f}}^{-1}\mathbf{K}_{\mathbf{ f},*}
 \]</span> </large></p></li>
-<li><p>Here covariance of joint density is given by <span class="math display">\[
-\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp; \mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp; \mathbf{K}_{*, *}\end{bmatrix}
+<li><p>Here covariance of joint density is given by <span
+class="math display">\[
+\mathbf{K}= \begin{bmatrix} \mathbf{K}_{\mathbf{ f}, \mathbf{ f}} &amp;
+\mathbf{K}_{*, \mathbf{ f}}\\ \mathbf{K}_{\mathbf{ f}, *} &amp;
+\mathbf{K}_{*, *}\end{bmatrix}
 \]</span></p></li>
 </ul>
 </section>
-<section id="the-importance-of-the-covariance-function" class="slide level2">
+<section id="the-importance-of-the-covariance-function"
+class="slide level2">
 <h2>The Importance of the Covariance Function</h2>
 <p><span class="math display">\[
 \boldsymbol{ \mu}_f= \mathbf{A}^\top \mathbf{ y},
@@ -1537,12 +1985,16 @@ <h2>The Importance of the Covariance Function</h2>
 </div>
 </div>
 <aside class="notes">
-Introduction to Gaussian processes given by Neil Lawrence at the 2014 Gaussian process Winter School at the University of Sheffield.
+Introduction to Gaussian processes given by Neil Lawrence at the 2014
+Gaussian process Winter School at the University of Sheffield.
 </aside>
 </section>
 <section id="improving-the-numerics" class="slide level2">
 <h2>Improving the Numerics</h2>
-<p>In practice we shouldn’t be using matrix inverse directly to solve the GP system. One more stable way is to compute the <em>Cholesky decomposition</em> of the kernel matrix. The log determinant of the covariance can also be derived from the Cholesky decomposition.</p>
+<p>In practice we shouldn’t be using matrix inverse directly to solve
+the GP system. One more stable way is to compute the <em>Cholesky
+decomposition</em> of the kernel matrix. The log determinant of the
+covariance can also be derived from the Cholesky decomposition.</p>
 </section>
 <section id="capacity-control" class="slide level2">
 <h2>Capacity Control</h2>
@@ -1563,14 +2015,18 @@ <h2>Learning Covariance Parameters</h2>
 <section id="section-21" class="slide level2">
 <h2></h2>
 <p><span class="math display">\[
-\mathcal{N}\left(\mathbf{ y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}{\det{\mathbf{K}}^{\frac{1}{2}}}}{\exp\left(-\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
+\mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}{\det{\mathbf{K}}^{\frac{1}{2}}}}{\exp\left(-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
 \]</span></p>
 </section>
 <section id="section-22" class="slide level2">
 <h2></h2>
 <p><span class="math display">\[
 \begin{aligned}
-    \mathcal{N}\left(\mathbf{ y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}\color{yellow}{\det{\mathbf{K}}^{\frac{1}{2}}}}\color{cyan}{\exp\left(-\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
+    \mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=\frac{1}{(2\pi)^\frac{n}{2}\color{yellow}{\det{\mathbf{K}}^{\frac{1}{2}}}}\color{cyan}{\exp\left(-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}\right)}
 \end{aligned}
 \]</span></p>
 </section>
@@ -1578,29 +2034,39 @@ <h2></h2>
 <h2></h2>
 <p><span class="math display">\[
 \begin{aligned}
-    \log \mathcal{N}\left(\mathbf{ y}|\mathbf{0},\mathbf{K}\right)=&amp;\color{yellow}{-\frac{1}{2}\log\det{\mathbf{K}}}\color{cyan}{-\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}} \\ &amp;-\frac{n}{2}\log2\pi
+    \log \mathcal{N}\left(\mathbf{
+y}|\mathbf{0},\mathbf{K}\right)=&amp;\color{yellow}{-\frac{1}{2}\log\det{\mathbf{K}}}\color{cyan}{-\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}} \\ &amp;-\frac{n}{2}\log2\pi
 \end{aligned}
 \]</span></p>
 <p><span class="math display">\[
-E(\boldsymbol{ \theta}) = \color{yellow}{\frac{1}{2}\log\det{\mathbf{K}}} + \color{cyan}{\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}}
+E(\boldsymbol{ \theta}) =
+\color{yellow}{\frac{1}{2}\log\det{\mathbf{K}}} +
+\color{cyan}{\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}}
 \]</span></p>
 </section>
-<section id="capacity-control-through-the-determinant" class="slide level2">
+<section id="capacity-control-through-the-determinant"
+class="slide level2">
 <h2>Capacity Control through the Determinant</h2>
-<p>The parameters are <em>inside</em> the covariance function (matrix).  <span class="math display">\[k_{i, j} = k(\mathbf{ x}_i, \mathbf{ x}_j; \boldsymbol{ \theta})\]</span></p>
+<p>The parameters are <em>inside</em> the covariance function (matrix).
+<span class="math display">\[k_{i, j} = k(\mathbf{ x}_i, \mathbf{ x}_j;
+\boldsymbol{ \theta})\]</span></p>
 </section>
 <section id="eigendecomposition-of-covariance" class="slide level2">
 <h2>Eigendecomposition of Covariance</h2>
-<p><span> <span class="math display">\[\mathbf{K}= \mathbf{R}\boldsymbol{ \Lambda}^2 \mathbf{R}^\top\]</span></span></p>
+<p><span> <span class="math display">\[\mathbf{K}=
+\mathbf{R}\boldsymbol{ \Lambda}^2 \mathbf{R}^\top\]</span></span></p>
 <table>
 <tr>
 <td width="50%">
 <div class="centered" style="">
-<img class="negate" src="../slides/diagrams/gp/gp-optimize-eigen.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="negate" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimize-eigen.png" width="100%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </td>
 <td width="50%">
-<span class="math inline">\(\boldsymbol{ \Lambda}\)</span> represents distance on axes. <span class="math inline">\(\mathbf{R}\)</span> gives rotation.
+<span class="math inline">\(\boldsymbol{ \Lambda}\)</span> represents
+distance on axes. <span class="math inline">\(\mathbf{R}\)</span> gives
+rotation.
 </td>
 </tr>
 </table>
@@ -1608,106 +2074,130 @@ <h2>Eigendecomposition of Covariance</h2>
 <section id="eigendecomposition-of-covariance-1" class="slide level2">
 <h2>Eigendecomposition of Covariance</h2>
 <ul>
-<li><span class="math inline">\(\boldsymbol{ \Lambda}\)</span> is <em>diagonal</em>, <span class="math inline">\(\mathbf{R}^\top\mathbf{R}= \mathbf{I}\)</span>.</li>
-<li>Useful representation since <span class="math inline">\(\det{\mathbf{K}} = \det{\boldsymbol{ \Lambda}^2} = \det{\boldsymbol{ \Lambda}}^2\)</span>.</li>
+<li><span class="math inline">\(\boldsymbol{ \Lambda}\)</span> is
+<em>diagonal</em>, <span
+class="math inline">\(\mathbf{R}^\top\mathbf{R}=
+\mathbf{I}\)</span>.</li>
+<li>Useful representation since <span
+class="math inline">\(\det{\mathbf{K}} = \det{\boldsymbol{ \Lambda}^2} =
+\det{\boldsymbol{ \Lambda}}^2\)</span>.</li>
 </ul>
 </section>
-<section id="capacity-control-coloryellowlog-detmathbfk" class="slide level2">
-<h2>Capacity control: <span class="math inline">\(\color{yellow}{\log \det{\mathbf{K}}}\)</span></h2>
+<section id="capacity-control-coloryellowlog-detmathbfk"
+class="slide level2">
+<h2>Capacity control: <span class="math inline">\(\color{yellow}{\log
+\det{\mathbf{K}}}\)</span></h2>
 <script>
 showDivs(0, 'gp-optimise-determinant');
 </script>
-<p><small></small> <input id="range-gp-optimise-determinant" type="range" min="0" max="10" value="0" onchange="setDivs('gp-optimise-determinant')" oninput="setDivs('gp-optimise-determinant')"> <button onclick="plusDivs(-1, 'gp-optimise-determinant')">❮</button> <button onclick="plusDivs(1, 'gp-optimise-determinant')">❯</button></p>
+<p><small></small>
+<input id="range-gp-optimise-determinant" type="range" min="0" max="10" value="0" onchange="setDivs('gp-optimise-determinant')" oninput="setDivs('gp-optimise-determinant')">
+<button onclick="plusDivs(-1, 'gp-optimise-determinant')">❮</button>
+<button onclick="plusDivs(1, 'gp-optimise-determinant')">❯</button></p>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant002.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant003.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant003.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant004.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant004.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant005.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant005.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant006.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant006.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant007.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant007.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant008.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant008.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-determinant" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise-determinant009.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-determinant009.svg" width="80%" style=" ">
 </object>
 </div>
 </section>
-<section id="data-fit-colorcyanfracmathbf-ytopmathbfk-1mathbf-y2" class="slide level2">
-<h2>Data Fit: <span class="math inline">\(\color{cyan}{\frac{\mathbf{ y}^\top\mathbf{K}^{-1}\mathbf{ y}}{2}}\)</span></h2>
+<section id="quadratic-data-fit" class="slide level2">
+<h2>Quadratic Data Fit</h2>
+</section>
+<section id="data-fit-colorcyanfracmathbf-ytopmathbfk-1mathbf-y2"
+class="slide level2">
+<h2>Data Fit: <span class="math inline">\(\color{cyan}{\frac{\mathbf{
+y}^\top\mathbf{K}^{-1}\mathbf{ y}}{2}}\)</span></h2>
 <script>
 showDivs(0, 'gp-optimise-quadratic');
 </script>
-<p><small></small> <input id="range-gp-optimise-quadratic" type="range" min="0" max="2" value="0" onchange="setDivs('gp-optimise-quadratic')" oninput="setDivs('gp-optimise-quadratic')"> <button onclick="plusDivs(-1, 'gp-optimise-quadratic')">❮</button> <button onclick="plusDivs(1, 'gp-optimise-quadratic')">❯</button></p>
+<p><small></small>
+<input id="range-gp-optimise-quadratic" type="range" min="0" max="2" value="0" onchange="setDivs('gp-optimise-quadratic')" oninput="setDivs('gp-optimise-quadratic')">
+<button onclick="plusDivs(-1, 'gp-optimise-quadratic')">❮</button>
+<button onclick="plusDivs(1, 'gp-optimise-quadratic')">❯</button></p>
 <div class="gp-optimise-quadratic" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/diagrams/gp-optimise-quadratic000.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic000.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-quadratic" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/diagrams/gp-optimise-quadratic001.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic001.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="gp-optimise-quadratic" style="text-align:center;">
-<object class="svgplot " data="../slides/diagrams/gp/diagrams/gp-optimise-quadratic002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic002.svg" width="80%" style=" ">
 </object>
 </div>
 <div class="figure">
 <div id="gp-optimise-quadratic-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/diagrams/gp-optimise-quadratic002.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise-quadratic002.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-The data fit term of the Gaussian process is a quadratic loss centered around zero. This has eliptical contours, the principal axes of which are given by the covariance matrix.
+The data fit term of the Gaussian process is a quadratic loss centered
+around zero. This has eliptical contours, the principal axes of which
+are given by the covariance matrix.
 </aside>
 </section>
-<section id="eboldsymbol-theta-coloryellowfrac12logdetmathbfkcolorcyanfracmathbf-ytopmathbfk-1mathbf-y2" class="slide level2">
-<h2><span class="math display">\[E(\boldsymbol{ \theta}) = \color{yellow}{\frac{1}{2}\log\det{\mathbf{K}}}+\color{cyan}{\frac{\mathbf{ y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}}\]</span></h2>
-</section>
-<section id="quadratic-data-fit" class="slide level2">
-<h2>Quadratic Data Fit</h2>
+<section
+id="eboldsymbol-theta-coloryellowfrac12logdetmathbfkcolorcyanfracmathbf-ytopmathbfk-1mathbf-y2"
+class="slide level2">
+<h2><span class="math display">\[E(\boldsymbol{ \theta}) =
+\color{yellow}{\frac{1}{2}\log\det{\mathbf{K}}}+\color{cyan}{\frac{\mathbf{
+y}^{\top}\mathbf{K}^{-1}\mathbf{ y}}{2}}\]</span></h2>
 </section>
 <section id="data-fit-term" class="slide level2">
 <h2>Data Fit Term</h2>
 <script>
 showDivs(0, 'gp-optimise');
 </script>
-<p><small></small> <input id="range-gp-optimise" type="range" min="0" max="10" value="0" onchange="setDivs('gp-optimise')" oninput="setDivs('gp-optimise')"> <button onclick="plusDivs(-1, 'gp-optimise')">❮</button> <button onclick="plusDivs(1, 'gp-optimise')">❯</button></p>
+<p><small></small>
+<input id="range-gp-optimise" type="range" min="0" max="10" value="0" onchange="setDivs('gp-optimise')" oninput="setDivs('gp-optimise')">
+<button onclick="plusDivs(-1, 'gp-optimise')">❮</button>
+<button onclick="plusDivs(1, 'gp-optimise')">❯</button></p>
 <div class="gp-optimise" style="text-align:center;">
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise000.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise000.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise001.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise001.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1717,11 +2207,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise002.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise002.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise003.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise003.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1731,11 +2221,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise004.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise004.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise005.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise005.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1745,11 +2235,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise006.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise006.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise007.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise007.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1759,11 +2249,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise008.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise008.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise009.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise009.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1773,11 +2263,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise010.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise010.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise011.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise011.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1787,11 +2277,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise012.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise012.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise013.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise013.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1801,11 +2291,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise014.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise014.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise015.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise015.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1815,11 +2305,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise016.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise016.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise017.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise017.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1829,11 +2319,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise018.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise018.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise019.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise019.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1843,11 +2333,11 @@ <h2>Data Fit Term</h2>
 <table>
 <tr>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise020.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise020.svg" width style=" ">
 </object>
 </td>
 <td width="50%">
-<object class="svgplot " data="../slides/diagrams/gp/gp-optimise021.svg" width style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gp-optimise021.svg" width style=" ">
 </object>
 </td>
 </tr>
@@ -1857,7 +2347,9 @@ <h2>Data Fit Term</h2>
 <section id="exponentiated-quadratic-covariance" class="slide level2">
 <h2>Exponentiated Quadratic Covariance</h2>
 <center>
-<span class="math display">\[k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left(-\frac{\left\Vert \mathbf{ x}-\mathbf{ x}^\prime \right\Vert_2^2}{2\ell^2}\right)\]</span>
+<span class="math display">\[k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha
+\exp\left(-\frac{\left\Vert \mathbf{ x}-\mathbf{ x}^\prime
+\right\Vert_2^2}{2\ell^2}\right)\]</span>
 </center>
 <div class="figure">
 <div id="eq-covariance-plot-figure" class="figure-frame">
@@ -1899,36 +2391,47 @@ <h2>GPSS: Gaussian Process Summer School</h2>
 </tr>
 </table>
 </section>
-<section id="gpy-a-gaussian-process-framework-in-python" class="slide level2">
+<section id="gpy-a-gaussian-process-framework-in-python"
+class="slide level2">
 <h2>GPy: A Gaussian Process Framework in Python</h2>
 <div class="figure">
 <div id="gpy-software-figure" class="figure-frame">
 <div class="centered" style="">
-<img class="" src="../slides/diagrams/gp/gpy.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
+<img class="" src="https://mlatcl.github.io/dsa/./slides/diagrams//gp/gpy.png" width="70%" height="auto" align="center" style="background:none; border:none; box-shadow:none; display:block; margin-left:auto; margin-right:auto;vertical-align:middle">
 </div>
 </div>
 </div>
 <aside class="notes">
-GPy is a BSD licensed software code base for implementing Gaussian process models in Python. It is designed for teaching and modelling. We welcome contributions which can be made through the Github repository <a href="https://github.com/SheffieldML/GPy" class="uri">https://github.com/SheffieldML/GPy</a>
+GPy is a BSD licensed software code base for implementing Gaussian
+process models in Python. It is designed for teaching and modelling. We
+welcome contributions which can be made through the GitHub repository <a
+href="https://github.com/SheffieldML/GPy"
+class="uri">https://github.com/SheffieldML/GPy</a>
 </aside>
 <center>
-<a href="https://github.com/SheffieldML/GPy" class="uri">https://github.com/SheffieldML/GPy</a>
+<a href="https://github.com/SheffieldML/GPy"
+class="uri">https://github.com/SheffieldML/GPy</a>
 </center>
 </section>
-<section id="gpy-a-gaussian-process-framework-in-python-1" class="slide level2">
+<section id="gpy-a-gaussian-process-framework-in-python-1"
+class="slide level2">
 <h2>GPy: A Gaussian Process Framework in Python</h2>
 <ul>
 <li>BSD Licensed software base.</li>
 <li>Wide availability of libraries, ‘modern’ scripting language.</li>
-<li>Allows us to set projects to undergraduates in Comp Sci that use GPs.</li>
-<li>Available through GitHub <a href="https://github.com/SheffieldML/GPy" class="uri">https://github.com/SheffieldML/GPy</a></li>
+<li>Allows us to set projects to undergraduates in Comp Sci that use
+GPs.</li>
+<li>Available through GitHub <a
+href="https://github.com/SheffieldML/GPy"
+class="uri">https://github.com/SheffieldML/GPy</a></li>
 <li>Reproducible Research with Jupyter Notebook.</li>
 </ul>
 </section>
 <section id="features" class="slide level2">
 <h2>Features</h2>
 <ul>
-<li>Probabilistic-style programming (specify the model, not the algorithm).</li>
+<li>Probabilistic-style programming (specify the model, not the
+algorithm).</li>
 <li>Non-Gaussian likelihoods.</li>
 <li>Multivariate outputs.</li>
 <li>Dimensionality reduction.</li>
@@ -1949,7 +2452,7 @@ <h2>GPy Tutorial</h2>
 <title>
 James Hensman
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/james-hensman.png" clip-path="url(#clip4)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/james-hensman.png" clip-path="url(#clip4)"/>
 </svg>
 <svg viewBox="0 0 200 200" style="width:15%">
 <defs> <clipPath id="clip5">
@@ -1962,32 +2465,35 @@ <h2>GPy Tutorial</h2>
 <title>
 Nicolas Durrande
 </title>
-<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="../slides/diagrams/people/nicolas-durrande2.jpg" clip-path="url(#clip5)"/>
+<image preserveAspectRatio="xMinYMin slice" width="100%" xlink:href="https://mlatcl.github.io/dsa/./slides/diagrams//people/nicolas-durrande2.jpg" clip-path="url(#clip5)"/>
 </svg>
 </div>
 </section>
 <section id="covariance-functions" class="slide level2">
 <h2>Covariance Functions</h2>
 <p><span class="math display">\[
-k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left(-\frac{\left\Vert \mathbf{ x}- \mathbf{ x}^\prime \right\Vert_2^2}{2\ell^2}\right),
+k(\mathbf{ x}, \mathbf{ x}^\prime) = \alpha \exp\left(-\frac{\left\Vert
+\mathbf{ x}- \mathbf{ x}^\prime \right\Vert_2^2}{2\ell^2}\right),
 \]</span></p>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>input_dim<span class="op">=</span><span class="dv">1</span></span>
-<span id="cb3-2"><a href="#cb3-2"></a>alpha <span class="op">=</span> <span class="fl">1.0</span></span>
-<span id="cb3-3"><a href="#cb3-3"></a>lengthscale <span class="op">=</span> <span class="fl">2.0</span></span>
-<span id="cb3-4"><a href="#cb3-4"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim, </span>
-<span id="cb3-5"><a href="#cb3-5"></a>                    variance<span class="op">=</span>alpha, </span>
-<span id="cb3-6"><a href="#cb3-6"></a>                    lengthscale<span class="op">=</span>lengthscale)</span></code></pre></div>
+<div class="sourceCode" id="cb3"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>input_dim<span class="op">=</span><span class="dv">1</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>alpha <span class="op">=</span> <span class="fl">1.0</span></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>lengthscale <span class="op">=</span> <span class="fl">2.0</span></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span>input_dim, </span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>                    variance<span class="op">=</span>alpha, </span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>                    lengthscale<span class="op">=</span>lengthscale)</span></code></pre></div>
 </section>
 <section id="kernel-output" class="slide level2">
 <h2>Kernel Output</h2>
 <div class="figure">
 <div id="gpy-eq-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-covariance-lengthscales.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-covariance-lengthscales.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-The exponentiated quadratic covariance function plotted for different lengthscales by <code>GPy.kern.plot</code> command.
+The exponentiated quadratic covariance function plotted for different
+length scales by <code>GPy.kern.plot</code> command.
 </aside>
 </section>
 <section id="covariance-functions-in-gpy" class="slide level2">
@@ -1996,34 +2502,40 @@ <h2>Covariance Functions in GPy</h2>
 <li>Includes a range of covariance functions
 <ul>
 <li>E.g. Matern family, Brownian motion, periodic, linear etc.</li>
-<li>Can <a href="https://gpy.readthedocs.io/en/latest/tuto_creating_new_kernels.html">define new covariances</a></li>
+<li>Can <a
+href="https://gpy.readthedocs.io/en/latest/tuto_creating_new_kernels.html">define
+new covariances</a></li>
 </ul></li>
 </ul>
 </section>
-<section id="combining-covariance-functions-in-gpy" class="slide level2">
+<section id="combining-covariance-functions-in-gpy"
+class="slide level2">
 <h2>Combining Covariance Functions in GPy</h2>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
-<span id="cb4-2"><a href="#cb4-2"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
-<span id="cb4-3"><a href="#cb4-3"></a>kern <span class="op">=</span> kern1 <span class="op">+</span> kern2</span></code></pre></div>
+<div class="sourceCode" id="cb4"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> kern1 <span class="op">+</span> kern2</span></code></pre></div>
 </section>
 <section id="section-24" class="slide level2">
 <h2></h2>
 <div class="figure">
 <div id="gpy-eq-plus-matern52-covariance-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/kern/gpy-eq-plus-matern52-covariance.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//kern/gpy-eq-plus-matern52-covariance.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-A combination of the exponentiated quadratic covariance plus the Matern <span class="math inline">\(5/2\)</span> covariance.
+A combination of the exponentiated quadratic covariance plus the Matern
+<span class="math inline">\(5/2\)</span> covariance.
 </aside>
 </section>
 <section id="multiplication" class="slide level2">
 <h2>Multiplication</h2>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
-<span id="cb5-2"><a href="#cb5-2"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
-<span id="cb5-3"><a href="#cb5-3"></a>kern <span class="op">=</span> kern1 <span class="op">*</span> kern2</span>
-<span id="cb5-4"><a href="#cb5-4"></a>display(kern)</span></code></pre></div>
+<div class="sourceCode" id="cb5"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>kern1 <span class="op">=</span> GPy.kern.RBF(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">2.</span>)</span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>kern2 <span class="op">=</span> GPy.kern.Matern52(<span class="dv">1</span>, variance<span class="op">=</span><span class="fl">2.</span>, lengthscale<span class="op">=</span><span class="fl">4.</span>)</span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> kern1 <span class="op">*</span> kern2</span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>display(kern)</span></code></pre></div>
 </section>
 <section id="section-25" class="slide level2">
 <h2></h2>
@@ -2034,7 +2546,10 @@ <h2></h2>
 </div>
 </div>
 <aside class="notes">
-Designing the covariance function for your Gaussian process is a key place in which you introduce your understanding of the data problem. To learn more about the design of covariance functions, see this talk from Nicolas Durrande at GPSS in 2016.
+Designing the covariance function for your Gaussian process is a key
+place in which you introduce your understanding of the data problem. To
+learn more about the design of covariance functions, see this talk from
+Nicolas Durrande at GPSS in 2016.
 </aside>
 </section>
 <section id="a-gaussian-process-regression-model" class="slide level2">
@@ -2050,34 +2565,38 @@ <h2>A Gaussian Process Regression Model</h2>
 <h2>Noisy Sine</h2>
 <div class="figure">
 <div id="noisy-sine-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
 Data from the noisy sine wave for fitting with a GPy model.
 </aside>
-<div class="sourceCode" id="cb6"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span><span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">1.</span>)</span>
-<span id="cb6-2"><a href="#cb6-2"></a>model <span class="op">=</span> GPy.models.GPRegression(X,Y,kern)</span></code></pre></div>
+<div class="sourceCode" id="cb6"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>kern <span class="op">=</span> GPy.kern.RBF(input_dim<span class="op">=</span><span class="dv">1</span>, variance<span class="op">=</span><span class="fl">1.</span>, lengthscale<span class="op">=</span><span class="fl">1.</span>)</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>model <span class="op">=</span> GPy.models.GPRegression(X,Y,kern)</span></code></pre></div>
 </section>
 <section id="gp-fit-to-noisy-sine" class="slide level2">
 <h2>GP Fit to Noisy Sine</h2>
 <div class="figure">
 <div id="noisy-sine-gp-fit-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine-gp-fit.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-fit.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
 <aside class="notes">
-A Gaussian process fit to the noisy sine data. Here the parameters of the process and the covariance function haven’t yet been optimized.
+A Gaussian process fit to the noisy sine data. Here the parameters of
+the process and the covariance function haven’t yet been optimized.
 </aside>
 </section>
-<section id="covariance-function-parameter-estimation" class="slide level2">
+<section id="covariance-function-parameter-estimation"
+class="slide level2">
 <h2>Covariance Function Parameter Estimation</h2>
-<div class="sourceCode" id="cb7"><pre class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1"></a>model.optimize(messages<span class="op">=</span><span class="va">True</span>)</span></code></pre></div>
+<div class="sourceCode" id="cb7"><pre
+class="sourceCode python"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>model.optimize(messages<span class="op">=</span><span class="va">True</span>)</span></code></pre></div>
 <div class="figure">
 <div id="noisy-sine-gp-optimized-fit-figure" class="figure-frame">
-<object class="svgplot " data="../slides/diagrams/gp/noisy-sine-gp-optimized-fit.svg" width="80%" style=" ">
+<object class="svgplot " data="https://mlatcl.github.io/dsa/./slides/diagrams//gp/noisy-sine-gp-optimized-fit.svg" width="80%" style=" ">
 </object>
 </div>
 </div>
@@ -2092,76 +2611,139 @@ <h2>Review</h2>
 <h2>Other Software</h2>
 <ul>
 <li><a href="https://github.com/GPflow/GPflow">GPflow</a></li>
-<li><a href="https://github.com/cornellius-gp/gpytorch">GPyTorch</a></li>
+<li><a
+href="https://github.com/cornellius-gp/gpytorch">GPyTorch</a></li>
 <li><a href="https://pyro.ai/">PyRo</a></li>
 </ul>
 </section>
 <section id="further-reading-1" class="slide level2 scrollable">
 <h2 class="scrollable">Further Reading</h2>
 <ul>
-<li><p>Chapter 2 of <span class="citation" data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
-<li><p>Rest of <span class="citation" data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
-<li><p>All of <span class="citation" data-cites="MacKay:bayesian92">MacKay (1992)</span></p></li>
+<li><p>Chapter 2 of <span class="citation"
+data-cites="Neal:bayesian94">Neal (1994)</span></p></li>
+<li><p>Rest of <span class="citation" data-cites="Neal:bayesian94">Neal
+(1994)</span></p></li>
+<li><p>All of <span class="citation"
+data-cites="MacKay:bayesian92">MacKay (1992)</span></p></li>
 </ul>
 </section>
 <section id="thanks" class="slide level2 scrollable">
 <h2 class="scrollable">Thanks!</h2>
 <ul>
-<li><p>twitter: <a href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
-<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking Machines</a></p></li>
-<li><p>newspaper: <a href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile Page</a></p></li>
+<li><p>twitter: <a
+href="https://twitter.com/lawrennd">@lawrennd</a></p></li>
+<li><p>podcast: <a href="http://thetalkingmachines.com">The Talking
+Machines</a></p></li>
+<li><p>newspaper: <a
+href="http://www.theguardian.com/profile/neil-lawrence">Guardian Profile
+Page</a></p></li>
 <li><p>blog posts:</p>
-<p><a href="http://inverseprobability.com/2014/07/01/open-data-science">Open Data Science</a></p>
-<p><a href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What is Machine Learning?</a></p></li>
+<p><a
+href="http://inverseprobability.com/2014/07/01/open-data-science">Open
+Data Science</a></p>
+<p><a
+href="http://inverseprobability.com/2017/07/17/what-is-machine-learning">What
+is Machine Learning?</a></p></li>
 </ul>
 </section>
 <section id="references" class="slide level2 unnumbered scrollable">
 <h2 class="unnumbered scrollable">References</h2>
-<div id="refs" class="references hanging-indent" role="doc-bibliography">
-<div id="ref-Andrade:consistent14">
-<p>Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014. Consistent mapping of government malaria records across a changing territory delimitation. Malaria Journal 13. <a href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a></p>
-</div>
-<div id="ref-Cho:deep09">
-<p>Cho, Y., Saul, L.K., 2009. Kernel methods for deep learning, in: Bengio, Y., Schuurmans, D., Lafferty, J.D., Williams, C.K.I., Culotta, A. (Eds.), Advances in Neural Information Processing Systems 22. Curran Associates, Inc., pp. 342–350.</p>
-</div>
-<div id="ref-Ioffe:batch15">
-<p>Ioffe, S., Szegedy, C., 2015. Batch normalization: Accelerating deep network training by reducing internal covariate shift, in: Bach, F., Blei, D. (Eds.), Proceedings of the 32nd International Conference on Machine Learning, Proceedings of Machine Learning Research. PMLR, Lille, France, pp. 448–456.</p>
-</div>
-<div id="ref-MacKay:bayesian92">
-<p>MacKay, D.J.C., 1992. Bayesian methods for adaptive models (PhD thesis). California Institute of Technology.</p>
-</div>
-<div id="ref-Mubangizi:malaria14">
-<p>Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence, N.D., 2014. Malaria surveillance with multiple data sources using Gaussian process models, in: 1st International Conference on the Use of Mobile ICT in Africa.</p>
-</div>
-<div id="ref-Neal:bayesian94">
-<p>Neal, R.M., 1994. Bayesian learning for neural networks (PhD thesis). Dept. of Computer Science, University of Toronto.</p>
-</div>
-<div id="ref-Rasmussen:book06">
-<p>Rasmussen, C.E., Williams, C.K.I., 2006. Gaussian processes for machine learning. mit, Cambridge, MA.</p>
-</div>
-<div id="ref-Rogers:book11">
-<p>Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC Press.</p>
-</div>
-<div id="ref-Tipping:probpca99">
-<p>Tipping, M.E., Bishop, C.M., 1999. Probabilistic principal component analysis. Journal of the Royal Statistical Society, B 6, 611–622. <a href="https://doi.org/doi:10.1111/1467-9868.00196">https://doi.org/doi:10.1111/1467-9868.00196</a></p>
+<div id="refs" class="references csl-bib-body hanging-indent"
+role="list">
+<div id="ref-Andrade:consistent14" class="csl-entry" role="listitem">
+Andrade-Pacheco, R., Mubangizi, M., Quinn, J., Lawrence, N.D., 2014.
+Consistent mapping of government malaria records across a changing
+territory delimitation. Malaria Journal 13. <a
+href="https://doi.org/10.1186/1475-2875-13-S1-P5">https://doi.org/10.1186/1475-2875-13-S1-P5</a>
+</div>
+<div id="ref-Cho:deep09" class="csl-entry" role="listitem">
+Cho, Y., Saul, L.K., 2009. <a
+href="http://papers.nips.cc/paper/3628-kernel-methods-for-deep-learning.pdf">Kernel
+methods for deep learning</a>, in: Bengio, Y., Schuurmans, D., Lafferty,
+J.D., Williams, C.K.I., Culotta, A. (Eds.), Advances in Neural
+Information Processing Systems 22. Curran Associates, Inc., pp. 342–350.
+</div>
+<div id="ref-Ioffe:batch15" class="csl-entry" role="listitem">
+Ioffe, S., Szegedy, C., 2015. <a
+href="http://proceedings.mlr.press/v37/ioffe15.html">Batch
+normalization: Accelerating deep network training by reducing internal
+covariate shift</a>, in: Bach, F., Blei, D. (Eds.), Proceedings of the
+32nd International Conference on Machine Learning, Proceedings of
+Machine Learning Research. PMLR, Lille, France, pp. 448–456.
+</div>
+<div id="ref-MacKay:bayesian92" class="csl-entry" role="listitem">
+MacKay, D.J.C., 1992. Bayesian methods for adaptive models (PhD thesis).
+California Institute of Technology.
+</div>
+<div id="ref-Mubangizi:malaria14" class="csl-entry" role="listitem">
+Mubangizi, M., Andrade-Pacheco, R., Smith, M.T., Quinn, J., Lawrence,
+N.D., 2014. Malaria surveillance with multiple data sources using
+<span>Gaussian</span> process models, in: 1st International Conference
+on the Use of Mobile <span>ICT</span> in Africa.
+</div>
+<div id="ref-Neal:bayesian94" class="csl-entry" role="listitem">
+Neal, R.M., 1994. Bayesian learning for neural networks (PhD thesis).
+Dept. of Computer Science, University of Toronto.
+</div>
+<div id="ref-Rasmussen:book06" class="csl-entry" role="listitem">
+Rasmussen, C.E., Williams, C.K.I., 2006. Gaussian processes for machine
+learning. mit, Cambridge, MA.
+</div>
+<div id="ref-Rogers:book11" class="csl-entry" role="listitem">
+Rogers, S., Girolami, M., 2011. A first course in machine learning. CRC
+Press.
+</div>
+<div id="ref-Tipping:probpca99" class="csl-entry" role="listitem">
+Tipping, M.E., Bishop, C.M., 1999. Probabilistic principal component
+analysis. Journal of the Royal Statistical Society, B 6, 611–622. <a
+href="https://doi.org/doi:10.1111/1467-9868.00196">https://doi.org/doi:10.1111/1467-9868.00196</a>
 </div>
 </div>
 </section>
     </div>
   </div>
 
-  <script src="reveal.js/lib/js/head.min.js"></script>
-  <script src="reveal.js/js/reveal.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/lib/js/head.min.js"></script>
+  <script src="https://unpkg.com/reveal.js@3.9.2/js/reveal.js"></script>
 
   <script>
 
       // Full list of configuration options available at:
       // https://github.com/hakimel/reveal.js#configuration
       Reveal.initialize({
+        // Display controls in the bottom right corner
+        controls: true,
+        // Display a presentation progress bar
+        progress: true,
         // Push each slide change to the browser history
         history: true,
+        // Enable keyboard shortcuts for navigation
+        keyboard: true,
+        // Enable the slide overview mode
+        overview: true,
+        // Vertical centering of slides
+        center: true,
+        // Enables touch navigation on devices with touch input
+        touch: true,
+        // Turns fragments on and off globally
+        fragments: true,
+        // Flags if we should show a help overlay when the questionmark
+        // key is pressed
+        help: true,
+        // Number of milliseconds between automatically proceeding to the
+        // next slide, disabled when set to 0, this value can be overwritten
+        // by using a data-autoslide attribute on your slides
+        autoSlide: 0,
+        // Stop auto-sliding after user input
+        autoSlideStoppable: true,
         // Transition style
         transition: 'None', // none/fade/slide/convex/concave/zoom
+        // Transition speed
+        transitionSpeed: 'default', // default/fast/slow
+        // Transition style for full page slide backgrounds
+        backgroundTransition: 'fade', // none/fade/slide/convex/concave/zoom
+        // Number of slides away from the current that are visible
+        viewDistance: 3,
         math: {
           mathjax: 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js',
           config: 'TeX-AMS_HTML-full',
@@ -2181,10 +2763,10 @@ <h2 class="unnumbered scrollable">References</h2>
 
         // Optional reveal.js plugins
         dependencies: [
-          { src: 'reveal.js/lib/js/classList.js', condition: function() { return !document.body.classList; } },
-          { src: 'reveal.js/plugin/zoom-js/zoom.js', async: true },
-          { src: 'reveal.js/plugin/math/math.js', async: true },
-          { src: 'reveal.js/plugin/notes/notes.js', async: true }
+          { src: 'https://unpkg.com/reveal.js@3.9.2/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/zoom-js/zoom.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/math/math.js', async: true },
+          { src: 'https://unpkg.com/reveal.js@3.9.2/plugin/notes/notes.js', async: true }
         ]
       });
     </script>