diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml
new file mode 100644
index 00000000..52f825e9
--- /dev/null
+++ b/.github/workflows/deploy-pages.yml
@@ -0,0 +1,17 @@
+name: deploy-pages
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.x
+      - run: pip install mkdocs-material
+      - run: pip install mkdocstrings
+      - run: mkdocs gh-deploy --force
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2e81144a..b3c541d8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -68,8 +68,8 @@ A pre-commit hook is available to auto-format code with
 3. Install pre-commit: ``$ pip install pre-commit``
 4. Intall git hooks in your ``.git`` directory: ``$ pre-commit install``
 
-Names for functions, arguments, classes, and methods should be as descriptive as possible, 
-even if it means making them a little longer. For example, `generate_surface_structures` is 
+Names for functions, arguments, classes, and methods should be as descriptive as possible,
+even if it means making them a little longer. For example, `generate_surface_structures` is
 a preferred function name to `gen_surfs`.
 All class names should adhere to [upper CamelCase](https://en.wikipedia.org/wiki/Camel_case).
 
@@ -86,16 +86,16 @@ A passing build requires the following:
 * Every line of code is executed by a test (100% coverage)
 * Documentation has been updated or extended (as needed) and builds
 
-PR descriptions should describe the motivation and context of the code changes in the PR, 
-both for the reviewer and also for future developers. If there's a Github issue, the PR should 
+PR descriptions should describe the motivation and context of the code changes in the PR,
+both for the reviewer and also for future developers. If there's a Github issue, the PR should
 be linked to the issue to provide that context.
 
 ## Documentation<a name="documentation"></a>
-`AutoCat` documentation is built using `mkdocs` via 
-[`mkdocs-material`](https://squidfunk.github.io/mkdocs-material/) 
-and 
+`AutoCat` documentation is built using `mkdocs` via
+[`mkdocs-material`](https://squidfunk.github.io/mkdocs-material/)
+and
 [`mkdocstrings`](https://mkdocstrings.github.io/).
-All custom documentation should be written as `.md` files, appropriately placed within 
+All custom documentation should be written as `.md` files, appropriately placed within
 `docs/`, and referenced within the `mkdocs.yml` file.
 
 With `mkdocs` the docs webpage can be hosted locally with the command:
@@ -106,3 +106,4 @@ which will give an `html` link that can be pasted in a web-browser.
 
 API documentation is automatically generated with `mkdocstrings` which parses the docstrings.
 Please ensure that all docstrings follow the Numpy style.
+
diff --git a/MANIFEST.in b/MANIFEST.in
index 9d3f36c3..eda156cb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
 include src/autocat/data/**/*.json
 include src/autocat/VERSION.txt
-include bin/autocat
\ No newline at end of file
+include bin/autocat
+include CONTRIBUTING.md
diff --git a/README.md b/README.md
index 53d863ed..4b766559 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
-# AutoCat 
+# AutoCat
 
-AutoCat is a suite of python tools for **sequential learning for materials applications** 
+AutoCat is a suite of python tools for **sequential learning for materials applications**
 and **automating structure generation for DFT catalysis studies.**
 
-Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the 
+Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the
 ARPA-E DIFFERENTIATE program.
 
 ## Installation
@@ -13,10 +13,9 @@ There are two options for installation, either via `pip` or from the repo direct
 ### `pip` (recommended)
 
 If you are planning on strictly using AutoCat rather than contributing to development,
- we recommend using `pip` within a virtual environment (e.g. 
+ we recommend using `pip` within a virtual environment (e.g.
  [`conda`](https://docs.conda.io/en/latest/)
- ). This can be done
-as follows:
+ ). This can be done as follows:
 
 ```
 pip install autocat
@@ -29,10 +28,10 @@ AutoCat can be installed via a clone from Github. First, you'll need to clone th
 github repo to your local machine (or wherever you'd like to use AutoCat) using
 `git clone`. Once the repo has been cloned, you can install AutoCat as an editable
 package by changing into the created directory (the one with `setup.py`) and installing
-via: 
+via:
 ```
 pip install -e .
 ```
 ## Contributing
-Contributions through issues, feature requests, and pull requests are welcome. 
-Guidelines are provided [here](CONTRIBUTING.md).
\ No newline at end of file
+Contributions through issues, feature requests, and pull requests are welcome.
+Guidelines are provided [here](CONTRIBUTING.md).
diff --git a/docs/API/Learning/featurizers.md b/docs/API/Learning/featurizers.md
new file mode 100644
index 00000000..e0cf1037
--- /dev/null
+++ b/docs/API/Learning/featurizers.md
@@ -0,0 +1 @@
+::: autocat.learning.featurizers
diff --git a/docs/API/Learning/predictors.md b/docs/API/Learning/predictors.md
new file mode 100644
index 00000000..2ce437cc
--- /dev/null
+++ b/docs/API/Learning/predictors.md
@@ -0,0 +1 @@
+::: autocat.learning.predictors
diff --git a/docs/API/Learning/sequential.md b/docs/API/Learning/sequential.md
new file mode 100644
index 00000000..ca81144c
--- /dev/null
+++ b/docs/API/Learning/sequential.md
@@ -0,0 +1 @@
+::: autocat.learning.sequential
diff --git a/docs/API/Structure_Generation/adsorption.md b/docs/API/Structure_Generation/adsorption.md
new file mode 100644
index 00000000..daca8e34
--- /dev/null
+++ b/docs/API/Structure_Generation/adsorption.md
@@ -0,0 +1 @@
+::: autocat.adsorption
diff --git a/docs/API/Structure_Generation/bulk.md b/docs/API/Structure_Generation/bulk.md
new file mode 100644
index 00000000..80e0ed2b
--- /dev/null
+++ b/docs/API/Structure_Generation/bulk.md
@@ -0,0 +1 @@
+::: autocat.bulk
diff --git a/docs/API/Structure_Generation/saa.md b/docs/API/Structure_Generation/saa.md
new file mode 100644
index 00000000..f67a49c6
--- /dev/null
+++ b/docs/API/Structure_Generation/saa.md
@@ -0,0 +1,3 @@
+# Single Atom Alloys
+
+::: autocat.saa
diff --git a/docs/API/Structure_Generation/surface.md b/docs/API/Structure_Generation/surface.md
new file mode 100644
index 00000000..d826fc1f
--- /dev/null
+++ b/docs/API/Structure_Generation/surface.md
@@ -0,0 +1 @@
+::: autocat.surface
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index c7c06965..00000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = src
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..b8cd9c30
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,91 @@
+# AutoCat Documentation
+
+![AutoCat Logo](img/autocat_logo.png){ align=right }
+
+AutoCat is a suite of python tools for **sequential learning for materials applications** 
+and **automating structure generation for DFT catalysis studies.**
+
+Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the 
+ARPA-E DIFFERENTIATE program.
+
+Below we provide an overview of the key functionalities of AutoCat. 
+For additional details please see the User Guide, Tutorials, and API sections.
+
+## Sequential Learning
+
+One of the core philosophies of AutoCat is to provide modular and extensible tooling to
+facilitate closed-loop computational materials discovery workflows. Within this submodule 
+are classes for defining a design space, featurization, 
+regression, and defining a closed-loop sequential learning iterator. The 
+key classes intended for each of these purposes are:
+
+- [**`DesignSpace`**](User_Guide/Learning/sequential#designspace): define a design space to explore
+
+- [**`Featurizer`**](User_Guide/Learning/featurizers): featurize the systems for regression
+
+- [**`Predictor`**](User_Guide/Learning/predictors): a regressor for predicting materials properties
+
+- [**`SequentialLearner`**](User_Guide/Learning/sequential#sequentiallearner): define a closed-loop iterator 
+
+
+## Structure Generation
+
+![Adsorption Figure](img/struct_gen_figs/adsorption.png){ align=right }
+
+This submodule contains functions for automating atomic structure generation 
+within the context of a catalysis study using density functional theory. 
+Specifically, this includes generating bulk structures, surfaces, and 
+placing adsorbates. In addition, functions for generating the single-atom alloys 
+material class are also included. These functions are organized within AutoCat as follows:
+
+- [**`autocat.bulk`**](User_Guide/Structure_Generation/bulk): generation of periodic 
+mono-elemental bulk structures
+
+- [**`autocat.surface`**](User_Guide/Structure_Generation/surface): mono-elemental surface slab generation
+
+- [**`autocat.adsorption`**](User_Guide/Structure_Generation/adsorption): placement of adsorbates onto surfaces
+
+- [**`autocat.saa`**](User_Guide/Structure_Generation/saa): generation of single-atom alloy surfaces
+
+Structures generated or read with this package are typically of the form of 
+[`ase.Atoms`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#module-ase.atoms) 
+objects.
+
+When opting to write structures to 
+disk using these functions, they are automatically organized into a clean, scalable directory organization. 
+All structures are written in the 
+[`ase.io.Trajectory`](https://wiki.fysik.dtu.dk/ase/ase/io/trajectory.html#trajectory) 
+file format. 
+For further details on the directory structure, see the User Guide.
+
+## Installation
+
+There are two options for installation, either via `pip` or from the repo directly.
+
+### `pip` (recommended)
+
+If you are planning on strictly using AutoCat rather than contributing to development,
+ we recommend using `pip` within a virtual environment (e.g. 
+ [`conda`](https://docs.conda.io/en/latest/)
+ ). This can be done
+as follows:
+
+```
+pip install autocat
+```
+
+### Github (for developers)
+
+Alternatively, if you would like to contribute to the development of this software,
+AutoCat can be installed via a clone from Github. First, you'll need to clone the
+github repo to your local machine (or wherever you'd like to use AutoCat) using
+`git clone`. Once the repo has been cloned, you can install AutoCat as an editable
+package by changing into the created directory (the one with `setup.py`) and installing
+via: 
+```
+pip install -e .
+```
+
+## Contributing
+Contributions through issues, feature requests, and pull requests are welcome. 
+Guidelines are provided here.
diff --git a/docs/Tutorials/pred_h.md b/docs/Tutorials/pred_h.md
new file mode 100644
index 00000000..95850397
--- /dev/null
+++ b/docs/Tutorials/pred_h.md
@@ -0,0 +1,131 @@
+In this tutorial we are going to show how to use the learning tools within 
+AutoCat to train a regressor that can predict adsorption energies of hydrogen 
+on a set of single-atom alloys.
+
+## Creating a `DesignSpace`
+
+Let's start by creating a `DesignSpace`. Normally each of these 
+structures would be optimized via DFT, but for demo purposes 
+we'll use the generated structures directly. First we need to generate the single-atom 
+alloys. Here, we can use AutoCat's 
+[`generate_saa_structures`](../API/Structure_Generation/saa.md#autocat.saa.generate_saa_structures) 
+function. 
+
+```py
+>>> # Generate the clean single-atom alloy structures
+>>> from autocat.saa import generate_saa_structures
+>>> from autocat.utils import extract_structures
+>>> saa_struct_dict = generate_saa_structures(
+...     ["Fe", "Cu", "Au"],
+...     ["Pt", "Pd", "Ni"],
+...     facets={"Fe":["110"], "Cu":["111"], "Au":["111"]},
+...     n_fixed_layers=2,
+... )
+>>> saa_structs = extract_structures(saa_struct_dict)
+```
+
+Now that we have the clean structures, let's adsorb hydrogen on the surface. 
+For convenience let's place H at the origin instead of considering all symmetry sites. 
+To accomplish this we can make use of AutoCat's 
+[`place_adsorbate`](../API/Structure_Generation/adsorption.md#autocat.adsorption.place_adsorbate)
+function.
+
+```py
+>>> # Adsorb hydrogen onto each of the generated SAA surfaces
+>>> from autocat.adsorption import place_adsorbate
+>>> ads_structs = []
+>>> for clean_struct in saa_structs:
+...     ads_dict = place_adsorbate(
+...        clean_struct,
+...        "H",
+...        (0.,0.)
+...     )
+...     ads_struct = extract_structures(ads_dict)[0]
+...     ads_structs.append(ads_struct)
+```
+
+This has collected all of the single-atom alloys with hydrogen adsorbed into 
+a single list of `ase.Atoms` objects, `ads_structs`. Ideally at this stage we'd have 
+adsorption energies for each of the generated structures after relaxation. As a proxy 
+in this demo we'll create random labels, but this should be adsorption energies if you 
+want to train a meaningful Predictor!
+
+```py
+>>> # Generate the labels for each structure
+>>> import numpy as np
+>>> labels = np.random.uniform(-1.5,1.5,size=len(ads_structs))
+```
+
+Finally, using both our structures and labels we can define a `DesignSpace`. In practice, 
+if any of the labels for a structure are unknown, it can be included as a `numpy.nan` 
+
+```py
+>>> from autocat.learning.sequential import DesignSpace
+>>> design_space = DesignSpace(ads_structs, labels)
+```
+
+## Setting up a `Predictor`
+
+When setting up our `Predictor` we now have two choices to make:
+
+1. The technique to be used for featurizing the systems
+2. The regression model to be used for training and predictions
+
+Internally, the `Predictor` will contain a `Featurizer` object which contains all of 
+our choices for how to featurize the systems. Our choice of featurizer class and 
+the associated kwargs are specified via the `featurizer_class` and 
+`featurization_kwargs` arguments, respectively. By providing the design space structures 
+some of the kwargs related to the featurization (e.g. maximum structure size) can be 
+automatically obtained.
+
+Similarly, we can specify the regressor to be used within the `model_class` and 
+`model_kwargs` arguments. The class should be "`sklearn`-like" with `fit` and 
+`predict` methods.
+
+Let's featurize the hydrogen environment via `dscribe`'s `SOAP` class with 
+`sklearn`'s `GaussianProcessRegressor` for regression.
+
+```py
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from sklearn.gaussian_process.kernels import RBF
+>>> from dscribe import SOAP
+>>> from autocat.learning.predictors import Predictor
+>>> kernel = RBF(1.5)
+>>> model_kwargs={"kernel": kernel}
+>>> featurization_kwargs={
+...     "design_space_structures": design_space.design_space_structures,
+...     "kwargs": {"rcut": 7.0, "nmax": 8, "lmax": 8}
+... }
+>>> predictor = Predictor(
+...     model_class=GaussianProcessRegressor,
+...     model_kwargs=model_kwargs,
+...     featurizer_class=SOAP,
+...     featurization_kwargs=featurization_kwargs,
+... )
+```
+
+## Training and making predictions
+
+With our newly defined `Predictor` we can train it using data from our 
+`DesignSpace` and the `fit` method.
+
+```py
+>>> train_structures = design_space.design_space_structures[:5]
+>>> train_labels = design_space.design_space_labels[:5]
+>>> predictor.fit(train_structures, train_labels)
+```
+
+Making predictions is a similar process except using the `predict` method.
+
+```py
+>>> test_structures = design_space.design_space_structures[5:]
+>>> predicted_labels = predictor.predict(test_structures)
+```
+
+In this example, since we already have the labels for the test structures, we can 
+also use the `score` method to calculate a prediction score.
+
+```py
+>>> test_labels = design_space.design_space_labels[5:]
+>>> mae = predictor.score(test_structures, test_labels)
+```
\ No newline at end of file
diff --git a/docs/Tutorials/sl.md b/docs/Tutorials/sl.md
new file mode 100644
index 00000000..d3169349
--- /dev/null
+++ b/docs/Tutorials/sl.md
@@ -0,0 +1,112 @@
+In this tutorial we will show how to conduct a simulated sequential learning 
+run over a fully explored design space.
+
+## Creating a fully explored `DesignSpace`
+Following a similar procedure as in the previous tutorial, we will create 
+a fully explored `DesignSpace` (ie. no unknown labels). This time 
+the structures will be clean mono-elemental surfaces which we can generate via 
+`generate_surface_structures`.
+
+```py
+>>> # Generate the clean surfaces
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> surfs_dict = generate_surface_structures(
+...     ["Pt", "Cu", "Li", "Ti"],
+...     n_fixed_layers=2,
+...     default_lat_param_lib="pbe_fd"
+... )
+>>> surfs = extract_structures(surfs_dict)
+```
+
+In this case we specified that the default lattice parameters 
+from the library calculated with the PBE XC functional and 
+a finite difference basis set. 
+
+As before, we will create random labels for all structures. But if you 
+want meaningful sequential learning runs these must be actual labels relevant 
+to your design space!
+
+```py
+>>> # Generate the labels for each structure
+>>> import numpy as np
+>>> labels = np.random.uniform(-1.5,1.5,size=len(ads_structs))
+```
+
+Taking the structures and labels we can define our `DesignSpace`.
+
+```py
+>>> from autocat.learning.sequential import DesignSpace
+>>> design_space = DesignSpace(surfs, labels)
+```
+
+## Doing a single simulated sequential learning run
+
+Given our fully explored `DesignSpace`, we can simulate a sequential learning 
+search over it to gain insights into guided searches within this context.
+To do this simulated run we can make use of the `simulated_sequential_learning` 
+function. This will internally drive a `SequentialLearner` object which will be 
+returned at the end of the run.
+
+As before, we will need to make choices with regard to the `Predictor` settings. 
+In this case we will use a `SineMatrix` featurizer alongside a `GaussianProcessRegressor`. 
+
+We also need to select parameters with regard to candidate selection. 
+This includes the acquisition function to be used,  
+target window (if applicable), and number of candidates to pick at each iteration. 
+Let's use a maximum uncertainty acquisition function to pick candidates based on their 
+associated uncertainty values. We'll also restrict the run to conduct 5 iterations.
+
+```py
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from dscribe import SineMatrix
+>>> from autocat.learning.sequential import simulated_sequential_learning
+>>> kernel = RBF(1.5)
+>>> model_kwargs = {"kernel": kernel}
+>>> featurization_kwargs = {
+...     "design_space_structures": design_space.design_space_structures,
+... }
+>>> predictor_kwargs = {
+...     "model_class": GaussianProcessRegressor,
+...     "model_kwargs": model_kwargs,
+...     "featurizer_class": SineMatrix,
+...     "featurization_kwargs": featurization_kwargs
+... }
+>>> candidate_selection_kwargs = {"aq": "MU"}
+>>> sim_seq_learn = simulated_sequential_learning(
+...     full_design_space=design_space,
+...     init_training_size=1,
+...     number_of_sl_loops=5,
+...     candidate_selection_kwargs=candidate_selection_kwargs,
+...     predictor_kwargs=predictor_kwargs,
+... )
+```
+
+Within the returned `SequentialLearner` object we now have information we can use 
+for further analysis including prediction and uncertainty histories as well as the candidate 
+selection history. 
+
+## Doing multiple simulated sequential learning runs
+
+It is often useful to consider the statistics of multiple independent simulated 
+sequential learning runs. For this purpose we can make use of the 
+`multiple_simulated_sequential_learning_runs` function. This acts in the same manner 
+as for the single run verion, but will return a `SequentialLearner` object for each of the 
+independent runs in a list. Moreover, the inputs remain the same except with the added option 
+of running in parallel (since this is an embarrassingly parallel operation). Here we will conduct 
+three independent runs in serial. 
+
+```py
+>>> runs_history = multiple_simulated_sequential_learning_runs(
+...     full_design_space=design_space,
+...     init_training_size=1,
+...     number_of_sl_loops=5,
+...     candidate_selection_kwargs=candidate_selection_kwargs,
+...     predictor_kwargs=predictor_kwargs,
+...     number_of_runs=3,
+...     # number_of_parallel_jobs=N if you wanted to run in parallel
+... )
+```
+
+Taking the `SequentialLearner`s from within `runs_history`, their histories 
+may be used to calculate more robust statistics into the simulated searches.
\ No newline at end of file
diff --git a/docs/User_Guide/Data/hhi.md b/docs/User_Guide/Data/hhi.md
new file mode 100644
index 00000000..6f08cc05
--- /dev/null
+++ b/docs/User_Guide/Data/hhi.md
@@ -0,0 +1,6 @@
+The Herfindahl-Hirschman Index (HHI) is an index that measures market concentration.
+Thus, in the context of different elements, it can be used as a proxy for cost, 
+as proposed by [M. Gaultois, et. al.](https://pubs.acs.org/doi/10.1021/cm400893e).
+
+From the tabulated values in the reference above, we provide HHI values for both 
+reserves as well as production.
diff --git a/docs/User_Guide/Data/intermediates.md b/docs/User_Guide/Data/intermediates.md
new file mode 100644
index 00000000..49a3df06
--- /dev/null
+++ b/docs/User_Guide/Data/intermediates.md
@@ -0,0 +1,48 @@
+When characterizing a surface in the context of a 
+specific reaction, calculating adsorption energies
+for all of the reaction intermediates is often important.
+
+Here, AutoCat has default structures for adsorbates 
+of both the oxygen reduction reaction (ORR) and 
+nitrogen reduction reaction (NRR) intermediates.
+
+The names of all of the reaction intermediates can 
+be imported and fed directly into 
+AutoCat functions:
+```py
+>>> from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES
+>>> from autocat.data.intermediates import NRR_INTERMEDIATE_NAMES
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.adsorption import generate_adsorbed_structures
+>>> pt_dict = generate_surface_structures(["Pt"])
+>>> pt_struct = extract_structures(pt_dict)[0]
+>>> orr_structs = generate_adsorbed_structures(
+...     surface=pt_struct, 
+...     adsorbates=ORR_INTERMEDIATE_NAMES,
+...     use_all_sites=True
+... )
+>>> nrr_structs = generate_adsorbed_structures(
+...     surface=pt_struct,
+...     ads=NRR_INTERMEDIATE_NAMES,
+...     use_all_sites=True
+... )
+```
+In the above example, `orr_structs` and `nrr_structs` have all of the corresponding
+intermediates at every identified unique surface site.
+
+Alternatively, if you would like to access the 
+`ase.Atoms` objects for the intermediates directly,
+they can be imported as a `dict`:
+```py
+>>> from autocat.data.intermediates import ORR_MOLS
+>>> from autocat.data.intermediates import NRR_MOLS
+``` 
+
+**ORR Intermediates**: 
+
+OOH\*, O\*, OH\*
+
+**NRR Intermediates**: 
+
+NNH\*, NNH$_2$\*, N\*, NH\*, NH$_2$\*, NHNH\*, NHNH$_2$\*, NH$_2$NH$_2$\*
diff --git a/docs/User_Guide/Data/lattice_parameters.md b/docs/User_Guide/Data/lattice_parameters.md
new file mode 100644
index 00000000..136a6cce
--- /dev/null
+++ b/docs/User_Guide/Data/lattice_parameters.md
@@ -0,0 +1,44 @@
+In some codes, optimizing cell parameters on the fly
+during geometry relaxations is not available.
+For this reason we have compiled 
+calculated lattice parameters
+using multiple different
+calculation schemes as a convenience for high-throughput
+studies. Every calculation was conducted with
+[`GPAW`](https://wiki.fysik.dtu.dk/gpaw/index.html).
+
+There are two axes to the settings applied here:
+
+- exchange-correlation functional
+- basis set mode (finite difference or plane-wave).
+
+Available sets are as follows:
+
+- `BULK_PBE_FD`/`BULK_BEEFVDW_FD`: 
+```
+These are parameters using the finite difference scheme
+and PBE / BEEF-vdW XC functionals. Obtained via fits to an 
+equation of state (https://wiki.fysik.dtu.dk/ase/ase/eos.html)
+
+FCC/BCC
+h = 0.16, kpts = (12,12,12)
+fit to an SJ EOS
+
+HCP
+h=0.16, kpts = (12,12,6)
+fit to a Birch-Murnaghan EO
+```
+- `BULK_PBE_PW`/`BULK_BEEFVDW_PW`:
+```
+These are parameters are obatined with a plane-wave basis set and 
+using the Exponential Cell Filter to minimize the stress tensor and atomic forces
+(https://wiki.fysik.dtu.dk/ase/ase/constraints.html#the-expcellfilter-class)
+
+FCC/BCC
+mode=PW(550), kpts = (12,12,12), fmax = 0.05 eV/A
+
+HCP
+mode=PW(550), kpts = (12,12,6), fmax = 0.05 eV/A
+```
+
+All of these lattice parameters are available within `autocat.data.lattice_parameters`
diff --git a/docs/User_Guide/Data/segregation_energies.md b/docs/User_Guide/Data/segregation_energies.md
new file mode 100644
index 00000000..e9933fb1
--- /dev/null
+++ b/docs/User_Guide/Data/segregation_energies.md
@@ -0,0 +1,25 @@
+When determining the stability of dopants within a host, one
+important factor to consider is the segregation energy. This
+predicts the thermodynamic preference towards pinning the 
+dopant at the surface of the host versus burying itself in 
+the bulk.
+
+Segregation energy values are tabulated as reported by 
+[A.V. Ruban, et. al.](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.59.15990)
+for multiple combinations of transition metal hosts and dopants. 
+By definition more negative values indicate more stability towards 
+keeping the dopant at the surface.
+Values where the host is the same as the dopant is the surface energy for 
+that species.
+
+In addition, for specifically SAAs, [K. K. Rao, et. al.](https://doi.org/10.1007/s11244-020-01267-2)
+studied the stability of various different host and dopant combinations. The different configurations 
+included SAA, subsurface, dimers, adatoms, and adatom + SAA. Here for most preferential configuration 
+we attributed the following scores as per the results shown in figure 3 of the above reference:
+
+- SAA is the most stable: 1
+- SAA is not the most stable but is within:
+    - <0.1 eV: 0.9
+    - <0.2 eV: 0.8
+    - <0.5 eV: 0.5
+- SAA is not the most stable by >0.5 eV: 0
\ No newline at end of file
diff --git a/docs/User_Guide/Learning/featurizers.md b/docs/User_Guide/Learning/featurizers.md
new file mode 100644
index 00000000..de743a9f
--- /dev/null
+++ b/docs/User_Guide/Learning/featurizers.md
@@ -0,0 +1,91 @@
+The `Featurizer` object allows for the featurization of 
+systems into a format that can be fed into machine learning 
+models. Specified within this object are all the desired 
+settings for when featurizing systems. More specifically this 
+includes:
+
+- `featurizer_class`: the desired class for featurization
+
+- `preset`: if the featurizer class can be instantiated by 
+a preset, that preset can be specified here. (e.g. the `magpie` feature 
+set for the `ElementProperty` featurizer class)
+
+- `design_space_structures`: if the design space is already known, 
+the structures can be specified here to extract the `max_size` and 
+`species_list` parameters. supercedes `max_size` and `species_list` 
+upon instantiation
+
+- `max_size`: the largest structure size that the featurizer can 
+encounter
+
+- `species_list`: all possible species that the featurizer can 
+encounter
+
+Applying the `Featurizer` there are two main methods: 
+`featurize_single` and `featurize_multiple`. The former is intended 
+for featurizing a single structure. On the other hand, the latter 
+can take multiple structures and returns them in a single feature 
+matrix.
+
+Below are three examples using structure, site, and compositional 
+featurization methods:
+
+```py
+>>> from autocat.learning.featurizers import Featurizer
+>>> from autocat.utils import extract_structures
+>>> from autocat.surface import generate_surface_structures
+>>> from dscribe.descriptors import SineMatrix
+>>> surfs = extract_structures(generate_surface_structures(["Li", "Na"]))
+>>> f = Featurizer(SineMatrix, design_space_structures=surfs)
+>>> f.max_size
+36
+>>> f.species_list
+['Li', 'Na']
+>>> X = f.featurize_multiple(surfs)
+```
+
+```py
+>>> from autocat.learning.featurizers import Featurizer
+>>> from autocat.utils import extract_structures
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.adsorption import place_adsorbate
+>>> from dscribe.descriptors import SOAP
+>>> surf = extract_structures(generate_surface_structures(["Cu"]))[0]
+>>> ads_struct = extract_structures(place_adsorbate(surf, "OH", position=(0.0, 0.0)))[0]
+>>> f = Featurizer(
+...    SOAP,
+...    max_size=36,
+...    species_list=["Cu", "O", "H"]
+...    kwargs={"rcut": 6., "lmax": 8, "nmax": 8}
+... )
+>>> X = f.featurize_single(ads_struct)
+```
+
+```py
+>>> from autocat.learning.featurizers import Featurizer
+>>> from autocat.utils import extract_structures
+>>> from autocat.surface import generate_saa_structures
+>>> from matminer.featurizers.composition import ElementProperty
+>>> saas = extract_structures(generate_saa_structures(["Cu", "Au"],["Pt", "Pd"]))
+>>> f = Featurizer(ElementProperty, preset="magpie", design_space_structures=saas)
+>>> f.species_list
+['Cu', 'Pt', 'Pd', 'Au']
+>>> X = f.featurize_multiple(saas)
+```
+
+The goal of this `Featurizer` object is to provide a unified class across different 
+featurization techniques.
+
+At present the following featurizer classes are supported:
+
+- [`dscribe`](https://singroup.github.io/dscribe/latest/):
+    - `SineMatrix`
+    - `CoulombMatrix`
+    - `ACSF`
+    - `SOAP`
+
+- [`matminer`](https://hackingmaterials.lbl.gov/matminer/):
+    - `ElementProperty`
+    - `ChemicalSRO`
+    - `OPSiteFingerprint`
+    - `CrystalNNFingerprint`
\ No newline at end of file
diff --git a/docs/User_Guide/Learning/predictors.md b/docs/User_Guide/Learning/predictors.md
new file mode 100644
index 00000000..65ad67e1
--- /dev/null
+++ b/docs/User_Guide/Learning/predictors.md
@@ -0,0 +1,79 @@
+In order to iterate a sequential learning pipeline,
+a regressor is needed to select subsequent candidate systems.
+For this purpose, there is the 
+[`Predictor`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor)
+object class. This contains two key attributes:
+
+- a regressor that can be fit to data and used for predictions
+(the class provided must have `fit` and `predict` methods)
+- featurizer class and kwargs to instantiate a [`Featurizer`](featurizers.md).
+ In particular there are two currently implemented approaches,
+structure methods that featurize the entire structure (e.g. `SineMatrix`, `ElementProperty`)
+ and adsorbate methods that featurize locally (e.g. `SOAP`).
+
+Generally, this predictor object behaves similarly to regressors found in 
+[`sklearn`](https://scikit-learn.org/stable/)
+with its own 
+[`fit`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.fit), 
+[`predict`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.predict), 
+and 
+[`score`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.score) 
+methods.
+
+As an example, let's train a random forest regressor on some 
+single atom alloys.
+
+```py
+>>> import numpy as np
+>>> from autocat.learning.predictors import Predictor
+>>> from autocat.saa import generate_saa_structures
+>>> from autocat.utils import extract_structures
+>>> from dscribe.descriptors import SineMatrix
+>>> from sklearn.ensemble import RandomForestRegressor
+>>> saa_dict = generate_saa_structures(["Cu", "Au", "Fe"], ["Pt", "Ru", "Ni"])
+>>> saa_structs = extract_structures(saa_dict)
+>>> labels = np.random.randint(1, size=(len(saa_structs) - 1))
+>>> acp = Predictor(
+...     model_class=RandomForestRegressor,
+...     featurizer_class=SineMatrix,
+... )
+>>> acp.fit(saa_structs[:-1], labels)
+>>> pred, _ = acp.predict([saa_structs[-1]])
+>>> pred
+array([0.])
+```
+Here we have chosen to featurize the structures as a `SineMatrix`.
+
+Note as well that the `predict` method will return uncertainty estimates
+if available. To see this, let's train a gaussian process regressor with an RBF
+ kernel. Let's also featurize using `SOAP` to see how featurization kwargs are passed
+
+```py
+>>> import numpy as np
+>>> from autocat.learning.predictors import Predictor
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.adsorption import place_adsorbate
+>>> from dscribe.descriptors import SOAP
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from sklearn.gaussian_process.kernels import RBF
+>>> subs = extract_structures(generate_surface_structures(["Pt", "Fe", "Ru"]))
+>>> structs = [extract_structures(place_adsorbate(s, "OH"))[0] for s in subs]
+>>> labels = np.random.randint(1, size=(len(structs) - 1))
+>>> kernel = RBF()
+>>> acp = Predictor(
+...     model_class=GaussianProcessRegressor,
+...     model_kwargs={"kernel": kernel},
+...     featurizer_class=SOAP,
+...     featurization_kwargs={
+...         "design_space_structures": structs,
+...         "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6},
+...     }
+... )
+>>> acp.fit(structs[:-1], labels)
+>>> pred, unc = acp.predict([structs[-1]])
+>>> pred
+array([0.])
+>>> unc
+array([1.])
+```
diff --git a/docs/User_Guide/Learning/sequential.md b/docs/User_Guide/Learning/sequential.md
new file mode 100644
index 00000000..cbdd4569
--- /dev/null
+++ b/docs/User_Guide/Learning/sequential.md
@@ -0,0 +1,223 @@
+## DesignSpace
+
+The
+[`DesignSpace`](../../API/Learning/sequential.md#autocat.learning.sequential.DesignSpace) 
+class object is intended to store the 
+*entire* design space. As the sequential learning
+loop is iterated, this can be continuously updated
+with the newly found labels.
+
+There are two key components required for this object:
+
+1. `design_space_structures`: *all* systems to be considered as 
+[`ase.Atoms`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#module-ase.atoms) 
+objects in a `list`
+2. `design_space_labels`: `numpy array` of the same length as the above list
+with the corresponding labels. If the label is not yet
+known, set it to `numpy.nan`
+
+**NB:** The order of the list of design space structures must
+be in the same order as the labels given in the 
+design space labels. 
+
+```py
+>>> import numpy as np
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.learning.sequential import DesignSpace
+>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"])
+>>> surf_structs = extract_structures(surf_dict)
+>>> labels = np.array([0.95395024, 0.63504885, np.nan, 0.08320879, np.nan,
+... 0.32423194, 0.55570785, np.nan, np.nan, np.nan,
+... 0.18884186, np.nan])
+>>> acds = DesignSpace(surf_structs, labels)
+>>> len(acds)
+12
+>>> acds.design_space_structures
+[Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...),
+ Atoms(...)]
+>>> acds.design_space_labels
+array([0.95395024, 0.63504885,        nan, 0.08320879,        nan,
+       0.32423194, 0.55570785,        nan,        nan,        nan,
+       0.18884186,        nan])
+```
+
+
+## SequentialLearner
+
+The 
+[`SequentialLearner`](../../API/Learning/sequential.md#autocat.learning.sequential.SequentialLearner) 
+object stores information regarding the latest 
+iteration of the sequential learning loop including:
+
+1. A [`Predictor`](predictors.md) (and its kwargs for both the regressor and featurizer)
+2. Candidate selection kwargs for score calculation (e.g. acquisition functions)
+3. Iteration number
+4. Latest `DesignSpace`
+5. Candidate system that is identified for the next loop.
+6. Histories for predictions, uncertainties, and training indices
+
+This object can be thought of as a central hub for the 
+sequential learning workflow, with an external driver 
+(either automated or manual) triggering iteration. The first 
+`iterate` trains the model and identifies candidate(s) to 
+start the loop.
+
+```py
+>>> import numpy as np
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.adsorption import place_adsorbate
+>>> from autocat.learning.sequential import DesignSpace
+>>> from autocat.learning.sequential import SequentialLearner
+>>> from dscribe.descriptors import SOAP
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from sklearn.gaussian_process.kernels import RBF
+>>> subs_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"])
+>>> subs = extract_structures(subs_dict)
+>>> ads_structs =[extract_structures(place_adsorbate(s, "H"))[0] for s in subs] 
+>>> labels = np.array([0.95395024, 0.63504885, np.nan, 0.08320879, np.nan,
+... 0.32423194, 0.55570785, np.nan, np.nan, np.nan,
+... 0.18884186, np.nan])
+>>> acds = DesignSpace(ads_structs, labels)
+>>> kernel = RBF()
+>>> acsl = SequentialLearner(
+...    acds,
+...    predictor_kwargs={
+...        "model_class": GaussianProcessRegressor,
+...        "model_kwargs": {"kernel": kernel},
+...        "featurizer_class": SOAP,
+...        "featurization_kwargs": {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}}
+...    },
+...    candidate_selection_kwargs={
+...        "aq": "MLI",
+...        "target_min": -2.25,
+...        "target_max": -1.5,
+...        "include_hhi": True,
+...        "hhi_type": "reserves",
+...        "include_seg_ener": False,
+...    },
+... )
+>>> acsl.iteration_count
+0
+>>> acsl.iterate()
+>>> acsl.iteration_count
+1
+```
+
+## Simulated Sequential Learning
+
+If you already have a fully explored design space and want
+to simulate exploration over it, the 
+[`simulated_sequential_learning`](../../API/Learning/sequential.md#autocat.learning.sequential.simulated_sequential_learning) 
+function may be used.
+
+Internally this function acts a driver on a `SequentialLearner` object, and can be 
+viewed as an example for how a driver can be set up for an exploratory simulated 
+sequential learning loop. As inputs it requires all parameters needed to instantiate 
+a `SequentialLearner` and returns the object that has been iterated. For further analysis 
+of the search, histories of the predictions, uncertainties, and the training indices for 
+each iteration are kept.
+
+```py
+>>> import numpy as np
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.learning.sequential import DesignSpace
+>>> from autocat.learning.sequential import simulated_sequential_learning
+>>> from dscribe.descriptors import SineMatrix
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from sklearn.gaussian_process.kernels import RBF
+>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"])
+>>> surf_structs = extract_structures(surf_dict)
+>>> labels = np.array([0.95395024, 0.63504885, 0.4567, 0.08320879, 0.87779,
+... 0.32423194, 0.55570785, 0.325, 0.43616, 0.321632,
+... 0.18884186, 0.1114])
+>>> acds = DesignSpace(surf_structs, labels)
+>>> kernel = RBF()
+>>> sim_sl = simulated_sequential_learning(
+...    full_design_space=acds,
+...    predictor_kwargs={
+...        "model_class": GaussianProcessRegressor,
+...        "model_kwargs": {"kernel": kernel},
+...        "featurizer_class": SineMatrix,
+...    },
+...    candidate_selection_kwargs={
+...        "aq": "MLI",
+...        "target_min": -2.25,
+...        "target_max": -1.5,
+...        "include_hhi": True,
+...        "hhi_type": "reserves",
+...        "include_seg_ener": False,
+...    },
+...    init_training_size=5,
+...    number_of_sl_loops=3,
+... )
+Sequential Learning Iteration #1
+Sequential Learning Iteration #2
+Sequential Learning Iteration #3
+```
+
+Additionally, simulated searches are typically most useful when repeated to obtain 
+statistics that are less dependent on the initialization of the design space. For this 
+purpose there is the 
+[`multiple_simulated_sequential_learning_runs`](../../API/Learning/sequential.md#autocat.learning.sequential.multiple_simulated_sequential_learning_runs) 
+function. This returns a list of `SequentialLearner` corresponding to each individual run. Optionally, 
+this function can also initiate the multiple runs across parallel processes via the 
+`number_of_parallel_jobs` parameter.
+
+```py
+>>> import numpy as np
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.utils import extract_structures
+>>> from autocat.learning.sequential import DesignSpace
+>>> from autocat.learning.sequential import multiple_simulated_sequential_learning_runs
+>>> from matminer.featurizers.composition import ElementProperty
+>>> from sklearn.gaussian_process import GaussianProcessRegressor
+>>> from sklearn.gaussian_process.kernels import RBF
+>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"])
+>>> surf_structs = extract_structures(surf_dict)
+>>> labels = np.array([0.95395024, 0.63504885, 0.4567, 0.08320879, 0.87779,
+... 0.32423194, 0.55570785, 0.325, 0.43616, 0.321632,
+... 0.18884186, 0.1114])
+>>> acds = DesignSpace(surf_structs, labels)
+>>> kernel = RBF()
+>>> multi_sim_sl = multiple_simulated_sequential_learning_runs(
+...    full_design_space=acds,
+...    predictor_kwargs={
+...        "model_class": GaussianProcessRegressor,
+...        "model_kwargs": {"kernel": kernel},
+...        "featurizer_class": ElementProperty,
+...        "featurization_kwargs": {"preset": "matminer"},
+...    },
+...    candidate_selection_kwargs={
+...        "aq": "MLI",
+...        "target_min": -2.25,
+...        "target_max": -1.5,
+...        "include_hhi": True,
+...        "hhi_type": "reserves",
+...        "include_seg_ener": False,
+...    },
+...    init_training_size=5,
+...    number_of_sl_loops=2,
+...    number_of_runs=3,
+... )
+Sequential Learning Iteration #1
+Sequential Learning Iteration #2
+Sequential Learning Iteration #1
+Sequential Learning Iteration #2
+Sequential Learning Iteration #1
+Sequential Learning Iteration #2
+>>> len(multi_sim_sl)
+3
+```
\ No newline at end of file
diff --git a/docs/User_Guide/Structure_Generation/adsorption.md b/docs/User_Guide/Structure_Generation/adsorption.md
new file mode 100644
index 00000000..a20bfd57
--- /dev/null
+++ b/docs/User_Guide/Structure_Generation/adsorption.md
@@ -0,0 +1,180 @@
+![Adsorption Figure](../../img/struct_gen_figs/adsorption.png){ align=right }
+Tools within 
+[`autocat.adsorption`](../../API/Structure_Generation/adsorption.md) 
+are geared towards generating structures with adsorbates placed on
+a candidate catalyst surface.
+
+The core function of this module is
+[`generate_adsorbed_structures`](../../API/Structure_Generation/adsorption.md#autocat.adsorption.generate_adsorbed_structures)
+ for generating multiple adsorbed structures with a single function call.
+
+For the oxygen reduction (ORR) and nitrogen reduction (NRR) reactions,
+AutoCat has default starting geometries for all of these intermediates
+which can be found in [`autocat.data.intermediates`](../Data/intermediates.md). 
+
+In addition, by default initial heights of the adsorbates are guessed based
+upon the vdW radii of the nearest neighbors to the anchoring atom. 
+
+In the example below we are generating adsorption structures for all ORR intermediates
+on all of the identified unique symmetry sites on a Pt111 slab. The unique sites are
+identified using the Delaunay triangulation, as implemented in `pymatgen`. 
+Additionally, by default initial heights of the adsorbates are guessed based
+upon the vdW radii of the nearest neighbors to the anchoring atom.
+
+```py
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES
+>>> from autocat.adsorption import generate_adsorbed_structures
+>>> surface_dict = generate_surface_structures(
+...     species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2
+... )
+>>> surface = surface_dict["Pt"]["fcc111"]["structure"]
+>>> ads_dict = generate_adsorbed_structures(
+...     surface=surface,
+...     use_all_sites=True,
+...     ads=ORR_INTERMEDIATE_NAMES,
+...     write_to_disk=True,
+... )
+Structure with OOH adsorbed at ontop/0.0_0.0 written to ./adsorbates/OOH/ontop/0.0_0.0/input.traj
+Structure with OOH adsorbed at bridge/8.316_2.4 written to ./adsorbates/OOH/bridge/8.316_2.4/input.traj
+Structure with OOH adsorbed at hollow/8.316_1.6 written to ./adsorbates/OOH/hollow/8.316_1.6/input.traj
+Structure with OOH adsorbed at hollow/5.544_3.201 written to ./adsorbates/OOH/hollow/5.544_3.201/input.traj
+Structure with O adsorbed at ontop/0.0_0.0 written to ./adsorbates/O/ontop/0.0_0.0/input.traj
+Structure with O adsorbed at bridge/8.316_2.4 written to ./adsorbates/O/bridge/8.316_2.4/input.traj
+Structure with O adsorbed at hollow/8.316_1.6 written to ./adsorbates/O/hollow/8.316_1.6/input.traj
+Structure with O adsorbed at hollow/5.544_3.201 written to ./adsorbates/O/hollow/5.544_3.201/input.traj
+Structure with OH adsorbed at ontop/0.0_0.0 written to ./adsorbates/OH/ontop/0.0_0.0/input.traj
+Structure with OH adsorbed at bridge/8.316_2.4 written to ./adsorbates/OH/bridge/8.316_2.4/input.traj
+Structure with OH adsorbed at hollow/8.316_1.6 written to ./adsorbates/OH/hollow/8.316_1.6/input.traj
+Structure with OH adsorbed at hollow/5.544_3.201 written to ./adsorbates/OH/hollow/5.544_3.201/input.traj
+>>> ads_dict
+{'OOH': {'ontop': {'0.0_0.0': {'structure': Atoms(...),
+                               'traj_file_path': './adsorbates/OOH/ontop/0.0_0.0/input.traj'}},
+         'bridge': {'7.623_6.001': {'structure': Atoms(...),
+                                    'traj_file_path': './adsorbates/OOH/bridge/7.623_6.001/input.traj'}},
+         'hollow': {'6.93_5.601': {'structure': Atoms(...),
+                                   'traj_file_path': './adsorbates/OOH/hollow/6.93_5.601/input.traj'},
+                    '9.702_4.001': {'structure': Atoms(...),
+                                    'traj_file_path': './adsorbates/OOH/hollow/9.702_4.001/input.traj'}}},
+ 'O': {'ontop': {'0.0_0.0': {'structure': Atoms(...),
+                             'traj_file_path': './adsorbates/O/ontop/0.0_0.0/input.traj'}},
+       'bridge': {'7.623_6.001': {'structure': Atoms(...),
+                                  'traj_file_path': './adsorbates/O/bridge/7.623_6.001/input.traj'}},
+       'hollow': {'6.93_5.601': {'structure': Atoms(...),
+                                 'traj_file_path': './adsorbates/O/hollow/6.93_5.601/input.traj'},
+                  '9.702_4.001': {'structure': Atoms(...),
+                                  'traj_file_path': './adsorbates/O/hollow/9.702_4.001/input.traj'}}},
+ 'OH': {'ontop': {'0.0_0.0': {'structure': Atoms(...),
+                              'traj_file_path': './adsorbates/OH/ontop/0.0_0.0/input.traj'}},
+        'bridge': {'7.623_6.001': {'structure': Atoms(...),
+                                   'traj_file_path': './adsorbates/OH/bridge/7.623_6.001/input.traj'}},
+        'hollow': {'6.93_5.601': {'structure': Atoms(...),
+                                  'traj_file_path': './adsorbates/OH/hollow/6.93_5.601/input.traj'},
+                   '9.702_4.001': {'structure': Atoms(...),
+                                   'traj_file_path': './adsorbates/OH/hollow/9.702_4.001/input.traj'}}},
+```
+
+In general the dictionary generated has the following organization: 
+
+```
+{ADSORBATE_SPECIES: 
+    {SITE_LABEL: 
+        {XY: {"structure": Atoms, "traj_file_path": TRAJFILEPATH}}}, 
+```
+When writing these adsorbed structures to disk it is done with the following subdirectory
+format (mimicing the organization of the dictionary).
+
+```
+.
+├── adsorbates
+│   ├── O
+│   │   ├── bridge
+│   │   │   └── 7.623_6.001
+│   │   │       └── input.traj
+│   │   ├── hollow
+│   │   │   ├── 6.93_5.601
+│   │   │   │   └── input.traj
+│   │   │   └── 9.702_4.001
+│   │   │       └── input.traj
+│   │   └── ontop
+│   │       └── 0.0_0.0
+│   │           └── input.traj
+│   ├── OH
+│   │   ├── bridge
+│   │   │   └── 7.623_6.001
+│   │   │       └── input.traj
+│   │   ├── hollow
+│   │   │   ├── 6.93_5.601
+│   │   │   │   └── input.traj
+│   │   │   └── 9.702_4.001
+│   │   │       └── input.traj
+│   │   └── ontop
+│   │       └── 0.0_0.0
+│   │           └── input.traj
+│   └── OOH
+│       ├── bridge
+│       │   └── 7.623_6.001
+│       │       └── input.traj
+│       ├── hollow
+│       │   ├── 6.93_5.601
+│       │   │   └── input.traj
+│       │   └── 9.702_4.001
+│       │       └── input.traj
+│       └── ontop
+│           └── 0.0_0.0
+│               └── input.traj
+```
+
+Instead of generating the adsorption structures for all unique sites, 
+the xy coordinates of individual sites may be specified using the `adsorption_sites`
+ parameter. Here we can give each of these sites custom labels to be used for referencing 
+ and writing to disk.
+
+```py
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.adsorption import generate_adsorbed_structures
+>>> surface_dict = generate_surface_structures(
+...     species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2
+... )
+>>> surface = surface_dict["Pt"]["fcc111"]["structure"]
+>>> x = surface[15].x
+>>> x
+4.1577878733769
+>>> y = surface[15].y
+>>> y
+5.6011665451642
+>>> sites = {"Li": {"custom": [(x,y)]}}
+>>> ads_dict = generate_adsorbed_structures(
+...     surface=surface,
+...     adsorbates=["Li"],
+...     use_all_sites=False,
+...     adsorption_sites=site,
+...     write_to_disk=True,
+... )
+Structure with Li adsorbed at custom/4.158_5.601 written to ./adsorbates/Li/custom/4.158_5.601/input.traj
+>>> ads_dict
+{'Li': {'custom': {'4.158_5.601': {'structure': Atoms(...),
+                                   'traj_file_path': './adsorbates/Li/custom/4.158_5.601/input.traj'}}}}
+```
+
+If we are dealing with multiple adsorbates, adsorption sites, heights, etc.. that we want to 
+treat differently depending on the combination, we can leverage the `dict` option for each of these 
+inputs. The example below illustrates this capability, where can be used to specify adsorbates.
+
+```py
+>>> from autocat.surface import generate_surface_structures
+>>> from autocat.adsorption import generate_adsorbed_structures
+>>> surface_dict = generate_surface_structures(
+...     species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2
+... )
+>>> surface = surface_dict["Pt"]["fcc111"]["structure"]
+>>> sites = {"Li": {"origin": [(0.,0.)]}, "H": {"custom": [(0.5, 0.5)]}}
+>>> ads_dict = generate_adsorbed_structures(
+...     surface=surface,
+...     adsorbates=["Li", "H", "N"],
+...     use_all_sites={"Li": False, "H": False, "N": True},
+...     heights={"H": 1.2}
+...     adsorption_sites=sites,
+...     write_to_disk=True,
+... )
+```
\ No newline at end of file
diff --git a/docs/User_Guide/Structure_Generation/bulk.md b/docs/User_Guide/Structure_Generation/bulk.md
new file mode 100644
index 00000000..ad95cce1
--- /dev/null
+++ b/docs/User_Guide/Structure_Generation/bulk.md
@@ -0,0 +1,45 @@
+[`autocat.bulk`](../../API/Structure_Generation/bulk.md) 
+provides tools to automatically generate mono-element
+bulk structures. These are structures containing only a single
+chemical species with no vacuum and 3D periodicity.
+
+Multiple of these systems can be generated and written to
+disk via a single call of 
+[`generate_bulk_structures`](../../API/Structure_Generation/bulk.md#autocat.bulk.generate_bulk_structures).
+
+``` py
+>>> from autocat.bulk import generate_bulk_structures
+>>> bulk_dict = generate_bulk_structures(["Pt", "Fe", "Ru"], write_to_disk=True)
+Pt_bulk_fcc structure written to ./Pt_bulk_fcc/input.traj
+Fe_bulk_bcc structure written to ./Fe_bulk_bcc/input.traj
+Ru_bulk_hcp structure written to ./Ru_bulk_hcp/input.traj
+>>> bulk_dict
+{'Pt': {'crystal_structure': Atoms(...),
+        'traj_file_path': './Pt_bulk_fcc/input.traj'},
+ 'Fe': {'crystal_structure': Atoms(...),
+        'traj_file_path': './Fe_bulk_bcc/input.traj'},
+ 'Ru': {'crystal_structure': Atoms(...),
+        'traj_file_path': './Ru_bulk_hcp/input.traj'}}
+```
+
+In general the following structure of the resulting dict is generated:
+
+`{SPECIES: {"crystal_structure": Atoms, "traj_file_path": TRAJFILEPATH}}`
+
+If writing to disk structures to disk via 
+`#!python write_to_disk=True`,
+then the following directory structure then a similar organization is maintained on the disk:
+
+```
+.
+├── Fe_bulk_bcc
+│   └── input.traj
+├── Pt_bulk_fcc
+│   └── input.traj
+├── Ru_bulk_hcp
+│   └── input.traj
+```
+where each `input.traj` contains the bulk structure.
+
+**N.B.** by default initial magnetic moments will be set for Fe, Co, and Ni, otherwise no spin 
+will be given
diff --git a/docs/User_Guide/Structure_Generation/saa.md b/docs/User_Guide/Structure_Generation/saa.md
new file mode 100644
index 00000000..aec3e32b
--- /dev/null
+++ b/docs/User_Guide/Structure_Generation/saa.md
@@ -0,0 +1,65 @@
+![SAA Figure](../../img/struct_gen_figs/saa.png){ align=right }
+Single atom alloys (SAA) consist of a transition-metal host 
+with lone dopant atoms embedded at the surface. This 
+dispersion leads to unique electronic properties.
+
+With the [`autocat.saa`](../../API/Structure_Generation/saa.md) 
+module, we can generate structures of these
+systems to study them further. The main function for this purpose
+is [`generate_saa_structures`](../../API/Structure_Generation/saa.md#autocat.saa.generate_saa_structures) 
+where multiple SAA structures can 
+be generated simultaneously.
+
+```py
+>>> from autocat.saa import generate_saa_structures
+>>> saa_dict = generate_saa_structures(
+...     host_species=["Fe", "Cu"],
+...     dopant_species=["Pt", "Au"],
+...     facets={"Fe": ["110"], "Cu": ["111"]},
+...     n_fixed_layers=2,
+...     write_to_disk=True,
+... )
+Pt1/Fe(bcc110) structure written to ./Fe/Pt/bcc110/substrate/input.traj
+Au1/Fe(bcc110) structure written to ./Fe/Au/bcc110/substrate/input.traj
+Pt1/Cu(fcc111) structure written to ./Cu/Pt/fcc111/substrate/input.traj
+Au1/Cu(fcc111) structure written to ./Cu/Au/fcc111/substrate/input.traj
+>>> saa_dict
+{'Fe': {'Pt': {'bcc110': {'structure': Atoms(...),
+                          'traj_file_path': './Fe/Pt/bcc110/substrate/input.traj'}},
+        'Au': {'bcc110': {'structure': Atoms(...),
+                          'traj_file_path': './Fe/Au/bcc110/substrate/input.traj'}}},
+ 'Cu': {'Pt': {'fcc111': {'structure': Atoms(...),
+                          'traj_file_path': './Cu/Pt/fcc111/substrate/input.traj'}},
+        'Au': {'fcc111': {'structure': Atoms(...),
+                          'traj_file_path': './Cu/Au/fcc111/substrate/input.traj'}}}}
+```
+Here we generated SAA slabs with Fe and Cu as hosts and Pt and Au dopants under the following conditions:
+
+- for Fe (Cu) we only need the 110 (111) facet
+- the bottom 2 layers are held fixed
+
+When writing to disk the following directory structure is used:
+```
+.
+├── Cu
+│   ├── Au
+│   │   └── fcc111
+│   │       └── substrate
+│   │           └── input.traj
+│   └── Pt
+│       └── fcc111
+│           └── substrate
+│               └── input.traj
+├── Fe
+│   ├── Au
+│   │   └── bcc110
+│   │       └── substrate
+│   │           └── input.traj
+│   └── Pt
+│       └── bcc110
+│           └── substrate
+│               └── input.traj
+```
+
+**N.B.** by default, initial magnetic moments are given to the dopant species based upon
+the ground state magnetic moment of the species
diff --git a/docs/User_Guide/Structure_Generation/surface.md b/docs/User_Guide/Structure_Generation/surface.md
new file mode 100644
index 00000000..6788082c
--- /dev/null
+++ b/docs/User_Guide/Structure_Generation/surface.md
@@ -0,0 +1,71 @@
+![Surface Figure](../../img/struct_gen_figs/surface.png){ align=right }
+It is crucial for many heterogeneous catalysis studies to be
+able to model a catalyst surface where the desired reaction
+can take place. 
+[`autocat.surface`](../../API/Structure_Generation/surface.md) 
+provides tools for generating
+low miller index surfaces for mono-element surfaces with a vacuum
+in the $z$-direction.
+
+The core function of this module is 
+[`generate_surface_structures`](../../API/Structure_Generation/surface.md#autocat.surface.generate_surface_structures) 
+where multiple slabs can be generated at once.
+
+```py
+>>> from autocat.surface import generate_surface_structures
+>>> surf_dict = generate_surface_structures(
+... ["Li", "Cu"],
+... facets={"Li": ["110"]},
+... supercell_dim=[5, 5, 4],
+... n_fixed_layers=2,
+... default_lat_param_lib="beefvdw_fd",
+... write_to_disk=True,
+... )
+Li_bcc110 structure written to ./Li/bcc110/substrate/input.traj
+Cu_fcc100 structure written to ./Cu/fcc100/substrate/input.traj
+Cu_fcc111 structure written to ./Cu/fcc111/substrate/input.traj
+Cu_fcc110 structure written to ./Cu/fcc110/substrate/input.traj
+>>> surf_dict
+{'Li': {'bcc110': {'structure': Atoms(...),
+                   'traj_file_path': './Li/bcc110/substrate/input.traj'}},
+ 'Cu': {'fcc100': {'structure': Atoms(...),
+                   'traj_file_path': './Cu/fcc100/substrate/input.traj'},
+        'fcc111': {'structure': Atoms(...),
+                   'traj_file_path': './Cu/fcc111/substrate/input.traj'},
+        'fcc110': {'structure': Atoms(...),
+                   'traj_file_path': './Cu/fcc110/substrate/input.traj'}}}
+```
+Here we generated surface slabs for Cu and Li under the following conditions:
+
+- for Li we only need the 110 facet
+- generate all default facets for Cu
+    * fcc/bcc: ["100", "110", "111"]
+    * hcp: ["0001"]
+- the supercell dimensions of the slabs are 5 $\times$ 5 $\times$ 4
+- the bottom 2 layers are held fixed
+- for structures where the lattice parameter is not explicitly specified,
+their default values are pulled from the 
+[`autocat.data.lattice_parameters`](../Data/lattice_parameters.md) 
+library that used a BEEF-vdW XC and finite difference basis set
+
+When using the `write_to_disk` functionality the structures
+will be written into the following directory structure:
+
+```
+.
+├── Cu
+│   ├── fcc100
+│   │   └── substrate
+│   │       └── input.traj
+│   ├── fcc110
+│   │   └── substrate
+│   │       └── input.traj
+│   └── fcc111
+│       └── substrate
+│           └── input.traj
+├── Li
+│   └── bcc110
+│       └── substrate
+│           └── input.traj
+```
+**N.B.** by default, initial magnetic moments are given to Fe, Ni and Co
diff --git a/docs/img/autocat_icon.png b/docs/img/autocat_icon.png
new file mode 100644
index 00000000..df146db3
Binary files /dev/null and b/docs/img/autocat_icon.png differ
diff --git a/docs/img/autocat_logo.png b/docs/img/autocat_logo.png
new file mode 100644
index 00000000..49ec457c
Binary files /dev/null and b/docs/img/autocat_logo.png differ
diff --git a/docs/img/struct_gen_figs/adsorption.png b/docs/img/struct_gen_figs/adsorption.png
new file mode 100644
index 00000000..5d2de928
Binary files /dev/null and b/docs/img/struct_gen_figs/adsorption.png differ
diff --git a/docs/img/struct_gen_figs/saa.png b/docs/img/struct_gen_figs/saa.png
new file mode 100644
index 00000000..2234d3e5
Binary files /dev/null and b/docs/img/struct_gen_figs/saa.png differ
diff --git a/docs/img/struct_gen_figs/surface.png b/docs/img/struct_gen_figs/surface.png
new file mode 100644
index 00000000..62e5157c
Binary files /dev/null and b/docs/img/struct_gen_figs/surface.png differ
diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js
new file mode 100644
index 00000000..5bf8e9aa
--- /dev/null
+++ b/docs/javascripts/mathjax.js
@@ -0,0 +1,17 @@
+window.MathJax = {
+  tex: {
+    inlineMath: [["\\(", "\\)"]],
+    displayMath: [["\\[", "\\]"]],
+    processEscapes: true,
+    processEnvironments: true
+  },
+  options: {
+    ignoreHtmlClass: ".*|",
+    processHtmlClass: "arithmatex"
+  }
+};
+
+document$.subscribe(() => { // 
+
+  MathJax.typesetPromise()
+})
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index c3592594..00000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=src
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/src/conf.py b/docs/src/conf.py
deleted file mode 100644
index a182ecc6..00000000
--- a/docs/src/conf.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath("../src/autocat"))
-
-
-# -- Project information -----------------------------------------------------
-
-project = "autocat"
-copyright = "2020, Lance Kavalsky"
-author = "Lance Kavalsky"
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.napoleon",
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "bizstyle"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
diff --git a/docs/src/index.rst b/docs/src/index.rst
deleted file mode 100644
index b5c77c5f..00000000
--- a/docs/src/index.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-.. autocat documentation master file, created by
-   sphinx-quickstart on Tue Nov 24 18:52:19 2020.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to autocat's documentation!
-===================================
-
-Tools for automated structure generation of catalyst systems.
-
-.. toctree::
-   :maxdepth: 2
-   :hidden:
-
-   module_reference/index
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs/src/module_reference/adsorption.rst b/docs/src/module_reference/adsorption.rst
deleted file mode 100644
index 09813f40..00000000
--- a/docs/src/module_reference/adsorption.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _sec-adsorption:
-
-autocat.adsorption
-++++++++++++++++++
-
-Tools for automating adsorption on a given surface.
-
-.. automodule:: autocat.adsorption
-    :members:
-    :undoc-members:
diff --git a/docs/src/module_reference/bulk.rst b/docs/src/module_reference/bulk.rst
deleted file mode 100644
index dcdf8ef3..00000000
--- a/docs/src/module_reference/bulk.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _sec-bulk:
-
-autocat.bulk
-++++++++++++
-
-Tools for automating the generation of bulk mono-elemental systems
-
-.. automodule:: autocat.bulk
-    :members:
-    :undoc-members:
diff --git a/docs/src/module_reference/index.rst b/docs/src/module_reference/index.rst
deleted file mode 100644
index 7a6ab439..00000000
--- a/docs/src/module_reference/index.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. _sec-module-reference:
-
-Module reference
-++++++++++++++++
-
-.. toctree::
-    :maxdepth: 3
-    :caption: Contents
-
-    bulk
-    surface
-    saa
-    mpea
-    adsorption
diff --git a/docs/src/module_reference/mpea.rst b/docs/src/module_reference/mpea.rst
deleted file mode 100644
index 47e571f4..00000000
--- a/docs/src/module_reference/mpea.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _sec-mpea:
-
-autocat.mpea
-++++++++++++
-
-Tools for automating the generation of Multi-Principal Element Alloys (a.k.a high-entropy alloys)
-
-.. automodule:: autocat.mpea
-    :members:
-    :undoc-members:
diff --git a/docs/src/module_reference/saa.rst b/docs/src/module_reference/saa.rst
deleted file mode 100644
index a1b0396f..00000000
--- a/docs/src/module_reference/saa.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _sec-saa:
-
-autocat.saa
-+++++++++++
-
-Tools for generating single-atom alloy structures
-
-.. automodule:: autocat.saa
-    :members:
-    :undoc-members:
diff --git a/docs/src/module_reference/surface.rst b/docs/src/module_reference/surface.rst
deleted file mode 100644
index c5963b52..00000000
--- a/docs/src/module_reference/surface.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-.. _sec-surface:
-
-autocat.surface
-+++++++++++++++
-
-Tools for automatically generating mono-elemental slabs
-
-.. automodule:: autocat.surface
-    :members:
-    :undoc-members:
diff --git a/examples/adsorbing_molecules_on_surfaces.ipynb b/examples/adsorbing_molecules_on_surfaces.ipynb
new file mode 100644
index 00000000..9e295aab
--- /dev/null
+++ b/examples/adsorbing_molecules_on_surfaces.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "eaee58ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autocat.surface import generate_surface_structures\n",
+    "\n",
+    "from autocat.adsorption import generate_adsorbed_structures\n",
+    "from autocat.adsorption import generate_molecule\n",
+    "\n",
+    "from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f89805bd",
+   "metadata": {},
+   "source": [
+    "In this example we show how to use `AutoCat` to generate adsorption structures given a surface structure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69d3233d",
+   "metadata": {},
+   "source": [
+    "# Generating Reaction Structures"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ccd545c4",
+   "metadata": {},
+   "source": [
+    "Let's start by making a `Pt111` slab for demonstration purposes. But in general this can be any surface you'd like as long as you have it in the form of an `ase.Atoms` object or written to disk in an `ase` readable format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6711809e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "slab_dictionary = generate_surface_structures(\n",
+    "    species_list = [\"Pt\"],\n",
+    "    facets = {\"Pt\": [\"111\"]},\n",
+    "    n_fixed_layers = 2\n",
+    ")\n",
+    "\n",
+    "clean_slab = slab_dictionary[\"Pt\"][\"fcc111\"][\"structure\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e5bc6f7e",
+   "metadata": {},
+   "source": [
+    "Now that we have our clean slab, we can start adsorbing molecules onto the surface."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "75ab5a3d",
+   "metadata": {},
+   "source": [
+    "To fully characterize this surface for its activity toward evolving hydrogen (HER), we'd need to adsorb `H` onto every symmetry site of the surface. As the choice of `Pt111` was arbitrary, the function demo'd here works for any surface (through `pymatgen`'s implementation of Delaunay Triangulation)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4d48cb62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "h_adsorption_structure_dictionary = generate_adsorbed_structures(\n",
+    "    surface=clean_slab,\n",
+    "    use_all_sites=True, # to consider all identified sites\n",
+    "    adsorbates=[\"H\"],\n",
+    "    height={\"H\" : 1.5}, # manually specify height. default guess based on covalent radii of nearest neighbors\n",
+    "    write_to_disk = False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7e1e3ce",
+   "metadata": {},
+   "source": [
+    "This will generate a dictionary of all generated adsorption structures with the following structure:\n",
+    "\n",
+    "- Adsorbate Names\n",
+    "    - Symmetry Site types (ie. hollow, ontop, bridge) or Custom Label\n",
+    "        - `x-y` coordinate of each site\n",
+    "            - `ase.Atoms` structure\n",
+    "            - Path to structure file (in the `ase.traj` format)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "830e6d48",
+   "metadata": {},
+   "source": [
+    "Here we have all three types of symmetry sites present"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "6afb4a44",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['ontop', 'bridge', 'hollow'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(h_adsorption_structure_dictionary[\"H\"].keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "70b5335a",
+   "metadata": {},
+   "source": [
+    "And we can confirm that it identified both hollow sites:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "e846d5a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['6.93_5.601', '9.702_4.001'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(h_adsorption_structure_dictionary[\"H\"][\"hollow\"].keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c3bc7ac",
+   "metadata": {},
+   "source": [
+    "Instead of exhaustively considering all sites, it can be restricted to specific types via `site_types`. \n",
+    "\n",
+    "Or alternatively, if we want to consider only manually specified sites, that can be done via `adsorption_sites`. When specifying the sites manually in this way, we need to provide them as a dictionary with keys as to how we'd like the site labelled. This is solely used for organizing the output dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "eae177e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "h_manual_adsorption_structure_dictionary = generate_adsorbed_structures(\n",
+    "    surface=clean_slab,\n",
+    "    use_all_sites=False,\n",
+    "    adsorbates=[\"H\"],\n",
+    "    adsorption_sites={\"custom\": [(0.,0.)]},\n",
+    "    write_to_disk = False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "7eeddd64",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['custom'])\n",
+      "dict_keys(['0.0_0.0'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(h_manual_adsorption_structure_dictionary[\"H\"].keys())\n",
+    "print(h_manual_adsorption_structure_dictionary[\"H\"][\"custom\"].keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13e24624",
+   "metadata": {},
+   "source": [
+    "`AutoCat` also has some defaults for generating structures for considering the Oxygen Reduction/Evolution Reaction as well as Nitrogen Reduction. These can be found within `autocat.data.intermediates`. Let's generate the ORR adsorption structures on this slab as an example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "dd3d8ea1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "orr_adsorption_structure_dictionary = generate_adsorbed_structures(\n",
+    "    surface=clean_slab,\n",
+    "    use_all_sites = True, # to consider all identified sites (can also manually specify via `sites`)\n",
+    "    adsorbates=ORR_INTERMEDIATE_NAMES,\n",
+    "    write_to_disk = False\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da19d412",
+   "metadata": {},
+   "source": [
+    "This places all of the relevant adsorbate molecules at all of the identified sites."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "78748e9b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['OOH', 'O', 'OH', 'references'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(orr_adsorption_structure_dictionary.keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51561b67",
+   "metadata": {},
+   "source": [
+    "It's important to note that if you already have the adsorbate molecule you'd like to consider as an `ase.Atoms` object, that can be supplied as well via a `dict`. We are going to use `autocat.adsorption.generate_molecule_object` to generate an example, but this can be anything (e.g. an `*.sdf` read by `ase.io.read`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "7be25d8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nh2_mol = generate_molecule(\"NH2\")[\"NH2\"].get(\"structure\")\n",
+    "\n",
+    "nh2_adsorption_structure_dictionary = generate_adsorbed_structures(\n",
+    "    surface=clean_slab,\n",
+    "    use_all_sites = True,\n",
+    "    adsorbates = {\"NH2\": nh2_mol},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "99a12fe8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['H2N'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(nh2_adsorption_structure_dictionary.keys())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.9"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/generating_surface_slabs.ipynb b/examples/generating_surface_slabs.ipynb
new file mode 100644
index 00000000..2cd86a3b
--- /dev/null
+++ b/examples/generating_surface_slabs.ipynb
@@ -0,0 +1,183 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "319554e9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autocat.surface import generate_surface_structures"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3bbcebd9",
+   "metadata": {},
+   "source": [
+    "In this tutorial we show how to generate slabs using `AutoCat`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6d42c7dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "slab_dictionary = generate_surface_structures(\n",
+    "    species_list = [\"Pt\", \"Fe\", \"Ru\"],\n",
+    "    facets = {\"Pt\": [\"100\"], \"Fe\": [\"111\"]}, # If we want to specify only specific facets\n",
+    "    supercell_dim = (2,2,5), # dimensions of the supercell\n",
+    "    default_lat_param_lib = \"pbe_pw\", # where default lattice parameters are pulled from\n",
+    "    vacuum = 10.,\n",
+    "    n_fixed_layers = 3, # fixes bottom 3 layers\n",
+    "    write_to_disk = False # if we want to write the slabs to disk in the AutoCat directory format\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1818f3f0",
+   "metadata": {},
+   "source": [
+    "This generates a dictionary containing the structures of the slabs. The organization of this dictionary is as follows:\n",
+    "\n",
+    "- Species\n",
+    "    - Crystal Structure/Facet\n",
+    "        - `ase.Atoms` structure\n",
+    "        - Path to structure file (in the `ase.traj` format)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d49b437a",
+   "metadata": {},
+   "source": [
+    "Thus, going layer by layer for this example, the first keys correspond to:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "91f530b4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['Pt', 'Fe', 'Ru'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(slab_dictionary.keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c4a89fd",
+   "metadata": {},
+   "source": [
+    "Continuing down `Pt` for example, the next level is then:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "17fb6812",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['fcc100'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(slab_dictionary[\"Pt\"].keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8521948b",
+   "metadata": {},
+   "source": [
+    "Going down another level, we get both the `ase.Atoms` structure object as well as the file location. Since we didn't write to disk, the latter returns `None`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "db7b9248",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Atoms(symbols='Pt20', pbc=[True, True, False], cell=[5.612606335552851, 5.612606335552851, 27.937424], tags=..., constraint=FixAtoms(indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]))\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(slab_dictionary[\"Pt\"][\"fcc100\"][\"structure\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f9d94169",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(slab_dictionary[\"Pt\"][\"fcc100\"][\"traj_file_path\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.9"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/learning/conducting_simulated_sl_search.ipynb b/examples/learning/conducting_simulated_sl_search.ipynb
new file mode 100644
index 00000000..c96507e4
--- /dev/null
+++ b/examples/learning/conducting_simulated_sl_search.ipynb
@@ -0,0 +1,185 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.gaussian_process import GaussianProcessRegressor\n",
+    "\n",
+    "from matminer.featurizers.composition import ElementProperty\n",
+    "\n",
+    "from autocat.saa import generate_saa_structures\n",
+    "\n",
+    "from autocat.utils import extract_structures\n",
+    "\n",
+    "from autocat.learning.sequential import DesignSpace\n",
+    "from autocat.learning.sequential import simulated_sequential_learning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we show how to conduct a simulated sequential learning run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "saa_dictionary = generate_saa_structures(\n",
+    "    host_species=[\"Cu\", \"Au\", \"Fe\", \"Ag\", \"Ti\"],\n",
+    "    dopant_species=[\"Pt\", \"Pd\", \"Co\", \"Ni\"],\n",
+    "    facets={\"Cu\": [\"111\"], \"Au\": [\"111\"], \"Fe\": [\"110\"], \"Ag\": [\"111\"], \"Ti\": [\"0001\"]}\n",
+    ")\n",
+    "\n",
+    "saa_structures = extract_structures(saa_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = np.random.randint(-15,15,size=len(saa_structures))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "saa_design_space = DesignSpace(\n",
+    "    design_space_structures=saa_structures,\n",
+    "    design_space_labels=labels\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sequential Learning Iteration #1\n",
+      "Sequential Learning Iteration #2\n",
+      "Sequential Learning Iteration #3\n",
+      "Sequential Learning Iteration #4\n",
+      "Sequential Learning Iteration #5\n"
+     ]
+    }
+   ],
+   "source": [
+    "sl_history = simulated_sequential_learning(\n",
+    "    full_design_space=saa_design_space,\n",
+    "    init_training_size=2,\n",
+    "    predictor_kwargs={\n",
+    "        \"featurizer_class\": ElementProperty, \n",
+    "        \"featurization_kwargs\":{\"preset\": \"magpie\"}, \n",
+    "        \"model_class\": GaussianProcessRegressor\n",
+    "    },\n",
+    "    candidate_selection_kwargs={\"aq\": \"MU\", \"include_hhi\": True},\n",
+    "    number_of_sl_loops=5\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+----------------------------------+--------------------+\n",
+      "|                                  | Sequential Learner |\n",
+      "+----------------------------------+--------------------+\n",
+      "|         iteration count          |         6          |\n",
+      "| next candidate system structures |     ['PdTi35']     |\n",
+      "|  next candidate system indices   |        [17]        |\n",
+      "|       acquisition function       |         MU         |\n",
+      "|     # of candidates to pick      |         1          |\n",
+      "|          target maximum          |        None        |\n",
+      "|          target minimum          |        None        |\n",
+      "|           include hhi?           |        True        |\n",
+      "|  include segregation energies?   |       False        |\n",
+      "+----------------------------------+--------------------+\n",
+      "+-------------------------+--------------------------------------------------------+\n",
+      "|                         |                      DesignSpace                       |\n",
+      "+-------------------------+--------------------------------------------------------+\n",
+      "|    total # of systems   |                           20                           |\n",
+      "| # of unlabelled systems |                           13                           |\n",
+      "|  unique species present | ['Cu', 'Pt', 'Pd', 'Co', 'Ni', 'Au', 'Fe', 'Ag', 'Ti'] |\n",
+      "|      maximum label      |                          nan                           |\n",
+      "|      minimum label      |                          nan                           |\n",
+      "+-------------------------+--------------------------------------------------------+\n",
+      "+---------+--------------------------------------------------------+\n",
+      "|         |                       Predictor                        |\n",
+      "+---------+--------------------------------------------------------+\n",
+      "|  class  | sklearn.gaussian_process._gpr.GaussianProcessRegressor |\n",
+      "|  kwargs |                          None                          |\n",
+      "| is fit? |                          True                          |\n",
+      "+---------+--------------------------------------------------------+\n",
+      "+-----------------------------------+------------------------------------------------------------+\n",
+      "|                                   |                         Featurizer                         |\n",
+      "+-----------------------------------+------------------------------------------------------------+\n",
+      "|               class               | matminer.featurizers.composition.composite.ElementProperty |\n",
+      "|               kwargs              |                            None                            |\n",
+      "|            species list           |   ['Ti', 'Fe', 'Co', 'Ni', 'Pt', 'Pd', 'Au', 'Ag', 'Cu']   |\n",
+      "|       maximum structure size      |                             36                             |\n",
+      "|               preset              |                           magpie                           |\n",
+      "| design space structures provided? |                            True                            |\n",
+      "+-----------------------------------+------------------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sl_history)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.10 ('autocat39')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/learning/defining_a_design_space.ipynb b/examples/learning/defining_a_design_space.ipynb
new file mode 100644
index 00000000..c96d9f2b
--- /dev/null
+++ b/examples/learning/defining_a_design_space.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from autocat.saa import generate_saa_structures\n",
+    "\n",
+    "from autocat.utils import extract_structures\n",
+    "\n",
+    "from autocat.learning.sequential import DesignSpace"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we show how to define a `DesignSpace` containing single-atom alloys and corresponding labels.\n",
+    "\n",
+    "The data is as follows:\n",
+    "\n",
+    "| SAA | Label |\n",
+    "| --- |  ---  |\n",
+    "| Ni1/Cu111 | -0.3 |\n",
+    "| Ni1/Au111 | Unknown |\n",
+    "| Pd1/Cu111 | 0.2 |\n",
+    "| Pd1/Au111 | -0.1 |"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create single-atom alloy structures\n",
+    "saa_dictionary = generate_saa_structures(\n",
+    "    host_species=[\"Cu\", \"Au\"],\n",
+    "    dopant_species=[\"Ni\", \"Pd\"],\n",
+    "    n_fixed_layers=2,\n",
+    "    facets={\"Cu\":[\"111\"], \"Au\":[\"111\"]}\n",
+    ")\n",
+    "\n",
+    "saa_structures = extract_structures(saa_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 Cu35Ni\n",
+      "1 Cu35Pd\n",
+      "2 Au35Ni\n",
+      "3 Au35Pd\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get indices of each structure\n",
+    "for idx, struct in enumerate(saa_structures):\n",
+    "    print(idx, struct.get_chemical_formula())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate labels in the correct ordering as per above\n",
+    "                # Ni1Cu Pd1Cu Ni1Au Pd1Au\n",
+    "labels = np.array([-0.3, 0.2, np.nan, -0.1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-------------------------+--------------------------+\n",
+      "|                         |       DesignSpace        |\n",
+      "+-------------------------+--------------------------+\n",
+      "|    total # of systems   |            4             |\n",
+      "| # of unlabelled systems |            1             |\n",
+      "|  unique species present | ['Cu', 'Ni', 'Pd', 'Au'] |\n",
+      "|      maximum label      |           0.2            |\n",
+      "|      minimum label      |           -0.3           |\n",
+      "+-------------------------+--------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define the design space\n",
+    "saa_design_space = DesignSpace(design_space_structures=saa_structures, design_space_labels=labels)\n",
+    "\n",
+    "print(saa_design_space)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.10 ('autocat39')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/learning/featurizing_structures.ipynb b/examples/learning/featurizing_structures.ipynb
new file mode 100644
index 00000000..c0674baf
--- /dev/null
+++ b/examples/learning/featurizing_structures.ipynb
@@ -0,0 +1,236 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from autocat.surface import generate_surface_structures\n",
+    "from autocat.saa import generate_saa_structures\n",
+    "\n",
+    "from autocat.utils import extract_structures\n",
+    "\n",
+    "from autocat.learning.featurizers import Featurizer\n",
+    "\n",
+    "from dscribe.descriptors import CoulombMatrix\n",
+    "from matminer.featurizers.composition import ElementProperty"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we show how to use `AutoCat` to featurize structures with the `Featurizer` class.\n",
+    "\n",
+    "Here we will be featurizing mono-elemental surfaces."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate structures to be featurized\n",
+    "mono_surfaces_dictionary = generate_surface_structures(\n",
+    "    species_list=[\"Fe\", \"Ru\", \"Cu\", \"Pd\"],\n",
+    "    facets={\"Fe\": [\"110\"], \"Ru\":[\"0001\"], \"Cu\":[\"111\"], \"Pd\":[\"111\"]}\n",
+    ")\n",
+    "mono_surfaces_structures = extract_structures(mono_surfaces_dictionary)\n",
+    "\n",
+    "saa_surfaces_dictionary = generate_saa_structures(\n",
+    "    host_species=[\"Cu\", \"Au\"],\n",
+    "    dopant_species=[\"Pt\", \"Pd\"],\n",
+    "    facets={\"Cu\":[\"111\"], \"Au\":[\"111\"]}\n",
+    ")\n",
+    "saa_surfaces_structures = extract_structures(saa_surfaces_dictionary)\n",
+    "\n",
+    "all_structures = mono_surfaces_structures.copy()\n",
+    "all_structures.extend(saa_surfaces_structures)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fe36\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(all_structures[0].get_chemical_formula())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-----------------------------------+-------------------------------------------------+\n",
+      "|                                   |                    Featurizer                   |\n",
+      "+-----------------------------------+-------------------------------------------------+\n",
+      "|               class               | dscribe.descriptors.coulombmatrix.CoulombMatrix |\n",
+      "|               kwargs              |                       None                      |\n",
+      "|            species list           |       ['Fe', 'Ru', 'Pt', 'Pd', 'Au', 'Cu']      |\n",
+      "|       maximum structure size      |                        36                       |\n",
+      "|               preset              |                       None                      |\n",
+      "| design space structures provided? |                       True                      |\n",
+      "+-----------------------------------+-------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Instantiate featurizer based on Coulomb Matrix\n",
+    "coulomb_featurizer = Featurizer(\n",
+    "    featurizer_class=CoulombMatrix, \n",
+    "    design_space_structures=all_structures\n",
+    ")\n",
+    "print(coulomb_featurizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1296,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Featurize just Fe\n",
+    "fe_feature_vector = coulomb_featurizer.featurize_single(all_structures[0])\n",
+    "print(fe_feature_vector.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(8, 1296)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Featurize all structures into a single matrix\n",
+    "feature_matrix = coulomb_featurizer.featurize_multiple(all_structures)\n",
+    "print(feature_matrix.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-----------------------------------+------------------------------------------------------------+\n",
+      "|                                   |                         Featurizer                         |\n",
+      "+-----------------------------------+------------------------------------------------------------+\n",
+      "|               class               | matminer.featurizers.composition.composite.ElementProperty |\n",
+      "|               kwargs              |                            None                            |\n",
+      "|            species list           |            ['Fe', 'Ru', 'Pt', 'Pd', 'Au', 'Cu']            |\n",
+      "|       maximum structure size      |                             36                             |\n",
+      "|               preset              |                          matminer                          |\n",
+      "| design space structures provided? |                            True                            |\n",
+      "+-----------------------------------+------------------------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Instantiate element property featurizer\n",
+    "element_featurizer = Featurizer(\n",
+    "    featurizer_class=ElementProperty,\n",
+    "    design_space_structures=all_structures,\n",
+    "    preset=\"matminer\"\n",
+    ")\n",
+    "\n",
+    "print(element_featurizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(65,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Featurize just Fe\n",
+    "fe_feature_vector = element_featurizer.featurize_single(all_structures[0])\n",
+    "print(fe_feature_vector.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(8, 65)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Featurize all structures at once\n",
+    "feature_matrix = element_featurizer.featurize_multiple(all_structures)\n",
+    "print(feature_matrix.shape)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.10 ('autocat39')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/learning/making_predictions.ipynb b/examples/learning/making_predictions.ipynb
new file mode 100644
index 00000000..cc668ae3
--- /dev/null
+++ b/examples/learning/making_predictions.ipynb
@@ -0,0 +1,199 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.gaussian_process import GaussianProcessRegressor\n",
+    "from sklearn.gaussian_process.kernels import RBF\n",
+    "\n",
+    "from dscribe.descriptors import SineMatrix\n",
+    "\n",
+    "from autocat.surface import generate_surface_structures\n",
+    "from autocat.adsorption import generate_adsorbed_structures\n",
+    "\n",
+    "from autocat.utils import extract_structures\n",
+    "\n",
+    "from autocat.learning.predictors import Predictor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this example we show how to train a `Predictor` and use it to make predictions for adsorbates on Pt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate adsorption structures\n",
+    "substrates_dictionary = generate_surface_structures(\n",
+    "    species_list=[\"Pt\"],\n",
+    "    facets={\"Pt\":[\"100\"]}\n",
+    ")\n",
+    "\n",
+    "substrate = extract_structures(substrates_dictionary)[0]\n",
+    "\n",
+    "adsorbed_dictionary = generate_adsorbed_structures(\n",
+    "    surface=substrate,\n",
+    "    adsorbates=[\"H\", \"O\", \"N\", \"C\", \"Na\"],\n",
+    "    use_all_sites=False,\n",
+    ")\n",
+    "\n",
+    "adsorbed_structures = extract_structures(adsorbed_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate labels\n",
+    "# N.B. here they are random for convenience, but should be actual values to train a meaningful `Predictor`\n",
+    "\n",
+    "labels = np.random.randint(-10,10,size=len(adsorbed_structures))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+--------------------------------------------------------+\n",
+      "|         |                       Predictor                        |\n",
+      "+---------+--------------------------------------------------------+\n",
+      "|  class  | sklearn.gaussian_process._gpr.GaussianProcessRegressor |\n",
+      "|  kwargs |           {'kernel': RBF(length_scale=0.5)}            |\n",
+      "| is fit? |                         False                          |\n",
+      "+---------+--------------------------------------------------------+\n",
+      "+-----------------------------------+-------------------------------------------+\n",
+      "|                                   |                 Featurizer                |\n",
+      "+-----------------------------------+-------------------------------------------+\n",
+      "|               class               | dscribe.descriptors.sinematrix.SineMatrix |\n",
+      "|               kwargs              |                    None                   |\n",
+      "|            species list           |      ['Na', 'Pt', 'C', 'N', 'O', 'H']     |\n",
+      "|       maximum structure size      |                     37                    |\n",
+      "|               preset              |                    None                   |\n",
+      "| design space structures provided? |                    True                   |\n",
+      "+-----------------------------------+-------------------------------------------+\n"
+     ]
+    }
+   ],
+   "source": [
+    "kernel = RBF(0.5)\n",
+    "\n",
+    "predictor = Predictor(\n",
+    "    model_class=GaussianProcessRegressor,\n",
+    "    model_kwargs={\"kernel\": kernel},\n",
+    "    featurizer_class=SineMatrix,\n",
+    "    featurization_kwargs={\"design_space_structures\": adsorbed_structures}\n",
+    ")\n",
+    "\n",
+    "print(predictor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "predictor.fit(\n",
+    "    training_structures=adsorbed_structures,\n",
+    "    y=labels\n",
+    ")\n",
+    "\n",
+    "print(predictor.is_fit)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_dictionary = generate_adsorbed_structures(\n",
+    "    surface=substrate,\n",
+    "    adsorbates=[\"S\", \"Li\", \"P\"],\n",
+    "    use_all_sites=False\n",
+    ")\n",
+    "\n",
+    "test_structures = extract_structures(test_dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(3,)\n",
+      "(3,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Make predictions on unseen data\n",
+    "predictions, uncertainties = predictor.predict(testing_structures=test_structures)\n",
+    "print(predictions.shape)\n",
+    "print(uncertainties.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.10 ('autocat39')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.10"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 00000000..03259b57
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,63 @@
+site_name: AutoCat Documentation
+theme:
+    name: material
+    features:
+        - navigation.tabs
+        - navigation.tabs.sticky
+        - navigation.sections
+        - navigation.top
+        - toc.integrate
+    logo: img/autocat_icon.png
+markdown_extensions:
+    - pymdownx.highlight
+    - pymdownx.superfences  
+    - pymdownx.inlinehilite
+    - pymdownx.arithmatex:
+        generic: true
+    - attr_list
+extra_javascript:
+  - javascripts/mathjax.js
+  - https://polyfill.io/v3/polyfill.min.js?features=es6
+  - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
+
+plugins:
+    - search
+    - mkdocstrings:
+        default_handler: python
+        handlers:
+            python:
+                selection:
+                    docstring_style: "numpy"
+                rendering:
+                    show_source: true
+
+nav:
+    - Home: README.md
+    - User Guide:
+        - Sequential Learning:
+            - Featurizers: User_Guide/Learning/featurizers.md
+            - Predictors: User_Guide/Learning/predictors.md
+            - Sequential: User_Guide/Learning/sequential.md
+        - Structure Generation:
+            - Bulk: User_Guide/Structure_Generation/bulk.md
+            - Surfaces: User_Guide/Structure_Generation/surface.md
+            - Adsorption: User_Guide/Structure_Generation/adsorption.md
+            - Single Atom Alloys: User_Guide/Structure_Generation/saa.md
+        - Data:
+            - HHI: User_Guide/Data/hhi.md
+            - Segregation Energies: User_Guide/Data/segregation_energies.md
+            - Lattice Parameters: User_Guide/Data/lattice_parameters.md
+            - Reaction Intermediates: User_Guide/Data/intermediates.md
+    - Tutorials:
+        - Training a Predictor on hydrogen adsorption energies: Tutorials/pred_h.md
+        - Conducting a simulated sequential learning run: Tutorials/sl.md
+    - API:
+        - Sequential Learning:
+            - autocat.learning.featurizers: API/Learning/featurizers.md
+            - autocat.learning.predictors: API/Learning/predictors.md
+            - autocat.learning.sequential: API/Learning/sequential.md
+        - Structure Generation:
+            - autocat.bulk: API/Structure_Generation/bulk.md
+            - autocat.surface: API/Structure_Generation/surface.md
+            - autocat.adsorption: API/Structure_Generation/adsorption.md
+            - autocat.saa: API/Structure_Generation/saa.md
diff --git a/requirements.txt b/requirements.txt
index 6d717763..54f8cb3d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,7 @@
-numpy==1.22.0
-ase==3.21.1
-pymatgen==2022.0.17
+numpy==1.22.3
+ase==3.22.1
+pymatgen==2022.3.29
 fire==0.4.0
+matminer==0.7.3
+dscribe==0.4.0
+prettytable==3.2.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 4ccfeaa0..94e174f0 100644
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,6 @@
     ],
     package_dir={"": "src"},
     packages=find_packages(where="src"),
-    install_requires=["numpy<=1.22.0", "ase", "pymatgen<=2022.0.17", "fire",],
+    install_requires=["numpy", "ase", "pymatgen", "fire",],
     include_package_data=True,
 )
diff --git a/src/autocat/VERSION.txt b/src/autocat/VERSION.txt
index b5d4d818..6988b62d 100644
--- a/src/autocat/VERSION.txt
+++ b/src/autocat/VERSION.txt
@@ -1 +1 @@
-2022.3.31
+2022.5.23
diff --git a/src/autocat/data/hhi/__init__.py b/src/autocat/data/hhi/__init__.py
new file mode 100644
index 00000000..7504ee06
--- /dev/null
+++ b/src/autocat/data/hhi/__init__.py
@@ -0,0 +1,26 @@
+import json
+import pkg_resources
+
+__all__ = ["HHI"]
+"""
+Values obtained from dx.doi.org/10.1021/cm400893e
+
+Keys:
+    production:
+        Calculated based on elemental production
+
+    reserves:
+        Calculated based on known elemental reserves
+"""
+
+raw_hhi_p = pkg_resources.resource_filename("autocat.data.hhi", "hhi_p.json")
+
+with open(raw_hhi_p) as fr:
+    HHI_PRODUCTION = json.load(fr)
+
+raw_hhi_r = pkg_resources.resource_filename("autocat.data.hhi", "hhi_r.json")
+
+with open(raw_hhi_r) as fr:
+    HHI_RESERVES = json.load(fr)
+
+HHI = {"production": HHI_PRODUCTION, "reserves": HHI_RESERVES}
diff --git a/src/autocat/data/hhi/hhi_p.json b/src/autocat/data/hhi/hhi_p.json
new file mode 100644
index 00000000..3681cacf
--- /dev/null
+++ b/src/autocat/data/hhi/hhi_p.json
@@ -0,0 +1,79 @@
+{
+    "He": 3200,
+    "Li": 2900,
+    "Be": 8000,
+    "B": 2900,
+    "C": 500,
+    "N": 1300,
+    "O": 500,
+    "F": 1500,
+    "Na": 1100,
+    "Mg": 5300,
+    "Al": 1600,
+    "Si": 4700,
+    "P": 2000,
+    "S": 700,
+    "Cl": 1500,
+    "K": 1700,
+    "Ca": 3900,
+    "Sc": 5500,
+    "Ti": 1100,
+    "V": 3300,
+    "Cr": 3100,
+    "Mn": 1600,
+    "Fe": 2400,
+    "Co": 3100,
+    "Ni": 1000,
+    "Cu": 1600,
+    "Zn": 1600,
+    "Ga": 5500,
+    "Ge": 5300,
+    "As": 3300,
+    "Se": 2200,
+    "Br": 3300,
+    "Rb": 6000,
+    "Sr": 4200,
+    "Y": 9800,
+    "Zr": 3400,
+    "Nb": 8500,
+    "Mo": 2400,
+    "Ru": 3200,
+    "Rh": 3200,
+    "Pd": 3200,
+    "Ag": 1200,
+    "Cd": 1700,
+    "In": 3300,
+    "Sn": 2600,
+    "Sb": 7900,
+    "Te": 2900,
+    "I": 4900,
+    "Cs": 6000,
+    "Ba": 3000,
+    "La": 9500,
+    "Ce": 9500,
+    "Pr": 9500,
+    "Nd": 9500,
+    "Pm": 9500,
+    "Sm": 9500,
+    "Eu": 9500,
+    "Gd": 9500,
+    "Tb": 9500,
+    "Dy": 9500,
+    "Ho": 9500,
+    "Er": 9500,
+    "Tm": 9500,
+    "Yb": 9500,
+    "Lu": 9500,
+    "Hf": 3400,
+    "Ta": 2300,
+    "W": 7000,
+    "Re": 3300,
+    "Os": 5500,
+    "Ir": 5500,
+    "Pt": 5500,
+    "Au": 1100,
+    "Hg": 5500,
+    "Tl": 6500,
+    "Pb": 2700,
+    "Bi": 5300
+}
diff --git a/src/autocat/data/hhi/hhi_r.json b/src/autocat/data/hhi/hhi_r.json
new file mode 100644
index 00000000..79079bd1
--- /dev/null
+++ b/src/autocat/data/hhi/hhi_r.json
@@ -0,0 +1,79 @@
+{
+    "He": 3900,
+    "Li": 4200,
+    "Be": 4000,
+    "B": 2000,
+    "C": 500,
+    "N": 500,
+    "O": 500,
+    "F": 1500,
+    "Na": 500,
+    "Mg": 500,
+    "Al": 1000,
+    "Si": 1000,
+    "P": 5100,
+    "S": 1000,
+    "Cl": 1500,
+    "K": 7200,
+    "Ca": 1500,
+    "Sc": 4500,
+    "Ti": 1600,
+    "V": 3400,
+    "Cr": 4100,
+    "Mn": 1800,
+    "Fe": 1400,
+    "Co": 2700,
+    "Ni": 1500,
+    "Cu": 1500,
+    "Zn": 1900,
+    "Ga": 1900,
+    "Ge": 1900,
+    "As": 4000,
+    "Se": 1900,
+    "Br": 6900,
+    "Rb": 6000,
+    "Sr": 3000,
+    "Y": 2600,
+    "Zr": 2600,
+    "Nb": 8800,
+    "Mo": 5300,
+    "Ru": 8000,
+    "Rh": 8000,
+    "Pd": 8000,
+    "Ag": 1400,
+    "Cd": 1300,
+    "In": 2000,
+    "Sn": 1600,
+    "Sb": 3400,
+    "Te": 4900,
+    "I": 4800,
+    "Cs": 6000,
+    "Ba": 2300,
+    "La": 3100,
+    "Ce": 3100,
+    "Pr": 3100,
+    "Nd": 3100,
+    "Pm": 3100,
+    "Sm": 3100,
+    "Eu": 3100,
+    "Gd": 3100,
+    "Tb": 3100,
+    "Dy": 3100,
+    "Ho": 3100,
+    "Er": 3100,
+    "Tm": 3100,
+    "Yb": 3100,
+    "Lu": 3100,
+    "Hf": 2600,
+    "Ta": 4800,
+    "W": 4300,
+    "Re": 3300,
+    "Os": 9100,
+    "Ir": 9100,
+    "Pt": 9100,
+    "Au": 1000,
+    "Hg": 3100,
+    "Tl": 6500,
+    "Pb": 1800,
+    "Bi": 6000
+}
diff --git a/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json b/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json
index 920ad171..5b9b211f 100644
--- a/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json
+++ b/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json
@@ -1,7 +1,75 @@
 {
-    "Pt": {"a": 4.013732},
-    "Pd": {"a": 3.998072},
-    "Fe": {"a": 2.893316},
-    "Ni": {"a": 3.577644},
-    "Ru": {"a": 2.748451, "c": 4.314765}
-}
+    "Pt": {
+        "a": 4.013732
+    },
+    "Pd": {
+        "a": 3.998072
+    },
+    "Fe": {
+        "a": 2.893316
+    },
+    "Ni": {
+        "a": 3.577644
+    },
+    "Ag": {
+        "a": 4.26015
+    },
+    "Au": {
+        "a": 4.235948
+    },
+    "Cu": {
+        "a": 3.70834
+    },
+    "Rh": {
+        "a": 3.882724
+    },
+    "V": {
+        "a": 3.01497
+    },
+    "W": {
+        "a": 3.192674
+    },
+    "Mo": {
+        "a": 3.17331
+    },
+    "Nb": {
+        "a": 3.316336
+    },
+    "Cr": {
+        "a": 2.863204
+    },
+    "Ir": {
+        "a": 3.893
+    },
+    "Ta": {
+        "a": 3.334218
+    },
+    "Hf": {
+        "a": 3.218418,
+        "c": 5.081832
+    },
+    "Zr": {
+        "a": 3.246790,
+        "c": 5.172850
+    },
+    "Co": {
+        "a": 2.500673,
+        "c": 4.097051
+    },
+    "Ti": {
+        "a": 2.942273,
+        "c": 4.667081
+    },
+    "Os": {
+        "a": 2.773357,
+        "c": 4.360051
+    },
+    "Re": {
+        "a": 2.798497,
+        "c": 4.497198
+    },
+    "Ru": {
+        "a": 2.748451,
+        "c": 4.314765
+    }
+}
\ No newline at end of file
diff --git a/src/autocat/data/segregation_energies/__init__.py b/src/autocat/data/segregation_energies/__init__.py
new file mode 100644
index 00000000..f059fc0a
--- /dev/null
+++ b/src/autocat/data/segregation_energies/__init__.py
@@ -0,0 +1,35 @@
+import json
+import pkg_resources
+
+__all__ = ["SEGREGATION_ENERGIES"]
+"""
+
+Keys:
+    raban1999:
+        Values obtained from https://doi.org/10.1103/PhysRevB.59.15990
+        Segregation energies for different host/dopant combinations
+        For hosts used fcc: 111, bcc:110 (Fe100 also available), hcp:0001
+
+    rao2020:
+        Values obtained from https://doi.org/10.1007/s11244-020-01267-2
+        Segregation energies for different host/dopant combinations
+"""
+
+raw_raban_seg_ener = pkg_resources.resource_filename(
+    "autocat.data.segregation_energies", "raban1999.json"
+)
+
+with open(raw_raban_seg_ener) as fr:
+    RABAN1999_SEGREGATION_ENERGIES = json.load(fr)
+
+raw_rao_seg_ener = pkg_resources.resource_filename(
+    "autocat.data.segregation_energies", "rao2020.json"
+)
+
+with open(raw_rao_seg_ener) as fr:
+    RAO2020_SEGREGATION_ENERGIES = json.load(fr)
+
+SEGREGATION_ENERGIES = {
+    "raban1999": RABAN1999_SEGREGATION_ENERGIES,
+    "rao2020": RAO2020_SEGREGATION_ENERGIES,
+}
diff --git a/src/autocat/data/segregation_energies/raban1999.json b/src/autocat/data/segregation_energies/raban1999.json
new file mode 100644
index 00000000..a8a4e6d1
--- /dev/null
+++ b/src/autocat/data/segregation_energies/raban1999.json
@@ -0,0 +1,652 @@
+{
+    "Ti": {
+        "Ti": 1.2,
+        "V": 0.1,
+        "Cr": -0.24,
+        "Mn": -0.34,
+        "Fe": -0.41,
+        "Co": -0.56,
+        "Ni": -0.75,
+        "Cu": -0.94,
+        "Zr": -0.38,
+        "Nb": 0.03,
+        "Mo": 0.09,
+        "Tc": -0.06,
+        "Ru": -0.31,
+        "Rh": -0.62,
+        "Pd": -0.93,
+        "Ag": -1.18,
+        "Hf": -0.14,
+        "Ta": 0.25,
+        "W": 0.35,
+        "Re": 0.2,
+        "Os": -0.04,
+        "Ir": -0.37,
+        "Pt": -0.72,
+        "Au": -1.05
+    },
+    "V": {
+        "Ti": -0.49,
+        "V": 1.16,
+        "Cr": 0.3,
+        "Mn": 0.41,
+        "Fe": 0.36,
+        "Co": 0.15,
+        "Ni": -0.12,
+        "Cu": -0.54,
+        "Zr": -1.08,
+        "Nb": -0.41,
+        "Mo": 0.1,
+        "Tc": 0.36,
+        "Ru": 0.39,
+        "Rh": 0.13,
+        "Pd": -0.28,
+        "Ag": -0.75,
+        "Hf": -1.0,
+        "Ta": -0.23,
+        "W": 0.31,
+        "Re": 0.62,
+        "Os": 0.68,
+        "Ir": 0.51,
+        "Pt": 0.09,
+        "Au": -0.39
+    },
+    "Cr": {
+        "Ti": -0.72,
+        "V": -0.15,
+        "Cr": 1.46,
+        "Mn": -0.14,
+        "Fe": -0.44,
+        "Co": -0.67,
+        "Ni": -0.8,
+        "Cu": -1.02,
+        "Zr": -2.05,
+        "Nb": -1.15,
+        "Mo": -0.62,
+        "Tc": -0.45,
+        "Ru": -0.68,
+        "Rh": -1.25,
+        "Pd": -1.7,
+        "Ag": -1.9,
+        "Hf": -1.55,
+        "Ta": -0.98,
+        "W": -0.4,
+        "Re": -0.17,
+        "Os": -0.29,
+        "Ir": -0.81,
+        "Pt": -1.58,
+        "Au": -1.98
+    },
+    "Mn": {
+        "Ti": -0.83,
+        "V": -0.32,
+        "Cr": -0.1,
+        "Mn": 1.24,
+        "Fe": -0.12,
+        "Co": -0.26,
+        "Ni": -0.47,
+        "Cu": -0.77,
+        "Zr": -2.15,
+        "Nb": -1.28,
+        "Mo": -0.73,
+        "Tc": -0.48,
+        "Ru": -0.52,
+        "Rh": -0.69,
+        "Pd": -0.93,
+        "Ag": -1.31,
+        "Hf": -1.83,
+        "Ta": -1.03,
+        "W": -0.56,
+        "Re": -0.31,
+        "Os": -0.32,
+        "Ir": -0.53,
+        "Pt": -0.83,
+        "Au": -1.23
+    },
+    "Fe": {
+        "Ti": -0.39,
+        "V": 0.06,
+        "Cr": 0.1,
+        "Mn": -0.16,
+        "Fe": 1.2,
+        "Co": -0.14,
+        "Ni": -0.65,
+        "Cu": -0.83,
+        "Zr": -1.6,
+        "Nb": -0.65,
+        "Mo": -0.06,
+        "Tc": 0.1,
+        "Ru": -0.2,
+        "Rh": -0.52,
+        "Pd": -1.05,
+        "Ag": -1.55,
+        "Hf": -1.5,
+        "Ta": -0.35,
+        "W": 0.2,
+        "Re": 0.45,
+        "Os": 0.25,
+        "Ir": -0.15,
+        "Pt": -0.66,
+        "Au": -1.36
+    },
+    "Fe_100": {
+        "Ti": -0.69,
+        "V": 0.19,
+        "Cr": 0.16,
+        "Mn": -0.38,
+        "Fe": 1.73,
+        "Co": -0.03,
+        "Ni": -0.77,
+        "Cu": -1.37,
+        "Zr": -2.22,
+        "Nb": -0.83,
+        "Mo": 0.03,
+        "Tc": 0.24,
+        "Ru": 0.0,
+        "Rh": -0.53,
+        "Pd": -1.43,
+        "Ag": -2.37,
+        "Hf": -2.15,
+        "Ta": -0.5,
+        "W": 0.42,
+        "Re": 0.78,
+        "Os": 0.6,
+        "Ir": 0.08,
+        "Pt": -0.78,
+        "Au": -1.93
+    },
+    "Co": {
+        "Ti": -0.33,
+        "V": 0.13,
+        "Cr": 0.19,
+        "Mn": 0.1,
+        "Fe": -0.01,
+        "Co": 1.07,
+        "Ni": -0.13,
+        "Cu": -0.48,
+        "Zr": -1.4,
+        "Nb": -0.45,
+        "Mo": 0.0,
+        "Tc": 0.49,
+        "Ru": 0.12,
+        "Rh": -0.4,
+        "Pd": -0.6,
+        "Ag": -0.93,
+        "Hf": -0.56,
+        "Ta": -0.24,
+        "W": 0.34,
+        "Re": 0.72,
+        "Os": 0.56,
+        "Ir": -0.1,
+        "Pt": -0.38,
+        "Au": -0.76
+    },
+    "Ni": {
+        "Ti": -0.12,
+        "V": 0.2,
+        "Cr": 0.25,
+        "Mn": 0.0,
+        "Fe": 0.13,
+        "Co": 0.13,
+        "Ni": 0.95,
+        "Cu": -0.25,
+        "Zr": -1.16,
+        "Nb": -0.26,
+        "Mo": 0.18,
+        "Tc": 0.31,
+        "Ru": 0.1,
+        "Rh": -0.1,
+        "Pd": -0.4,
+        "Ag": -0.8,
+        "Hf": -0.74,
+        "Ta": -0.01,
+        "W": 0.45,
+        "Re": 0.53,
+        "Os": 0.37,
+        "Ir": 0.16,
+        "Pt": -0.17,
+        "Au": -0.69
+    },
+    "Cu": {
+        "Ti": 0.01,
+        "V": 0.25,
+        "Cr": 0.1,
+        "Mn": 0.07,
+        "Fe": 0.28,
+        "Co": 0.33,
+        "Ni": 0.17,
+        "Cu": 0.77,
+        "Zr": -0.64,
+        "Nb": 0.0,
+        "Mo": 0.28,
+        "Tc": 0.3,
+        "Ru": 0.2,
+        "Rh": 0.05,
+        "Pd": -0.2,
+        "Ag": -0.42,
+        "Hf": -0.35,
+        "Ta": 0.22,
+        "W": 0.57,
+        "Re": 0.62,
+        "Os": 0.48,
+        "Ir": 0.23,
+        "Pt": -0.04,
+        "Au": -0.29
+    },
+    "Zr": {
+        "Ti": 0.06,
+        "V": 0.01,
+        "Cr": -0.47,
+        "Mn": -0.36,
+        "Fe": -0.4,
+        "Co": -0.45,
+        "Ni": -0.55,
+        "Cu": -0.72,
+        "Zr": 1.22,
+        "Nb": 0.19,
+        "Mo": 0.13,
+        "Tc": -0.01,
+        "Ru": -0.19,
+        "Rh": -0.41,
+        "Pd": -0.68,
+        "Ag": -0.88,
+        "Hf": 0.15,
+        "Ta": 0.33,
+        "W": 0.3,
+        "Re": 0.15,
+        "Os": -0.02,
+        "Ir": -0.25,
+        "Pt": -0.5,
+        "Au": -0.8
+    },
+    "Nb": {
+        "Ti": -0.24,
+        "V": 0.12,
+        "Cr": 0.32,
+        "Mn": 0.23,
+        "Fe": 0.29,
+        "Co": 0.31,
+        "Ni": 0.08,
+        "Cu": -0.2,
+        "Zr": -0.65,
+        "Nb": 1.21,
+        "Mo": 0.48,
+        "Tc": 0.7,
+        "Ru": 0.65,
+        "Rh": 0.42,
+        "Pd": 0.05,
+        "Ag": -0.32,
+        "Hf": -0.47,
+        "Ta": 0.17,
+        "W": 0.7,
+        "Re": 0.98,
+        "Os": 1.0,
+        "Ir": 0.77,
+        "Pt": 0.4,
+        "Au": -0.03
+    },
+    "Mo": {
+        "Ti": -0.14,
+        "V": 0.08,
+        "Cr": -0.01,
+        "Mn": -0.5,
+        "Fe": -0.52,
+        "Co": -0.72,
+        "Ni": -0.82,
+        "Cu": -1.16,
+        "Zr": -0.9,
+        "Nb": -0.22,
+        "Mo": 1.6,
+        "Tc": -0.21,
+        "Ru": -0.81,
+        "Rh": -1.28,
+        "Pd": -1.47,
+        "Ag": -1.75,
+        "Hf": -0.98,
+        "Ta": -0.02,
+        "W": 0.22,
+        "Re": 0.1,
+        "Os": -0.45,
+        "Ir": -1.15,
+        "Pt": -1.6,
+        "Au": -1.94
+    },
+    "Tc": {
+        "Ti": -0.82,
+        "V": -0.4,
+        "Cr": -0.11,
+        "Mn": -0.2,
+        "Fe": -0.11,
+        "Co": -0.01,
+        "Ni": -0.24,
+        "Cu": -0.7,
+        "Zr": -1.57,
+        "Nb": -0.77,
+        "Mo": -0.27,
+        "Tc": 1.47,
+        "Ru": 0.02,
+        "Rh": -0.11,
+        "Pd": -0.46,
+        "Ag": -0.97,
+        "Hf": -1.26,
+        "Ta": -0.65,
+        "W": -0.06,
+        "Re": 0.26,
+        "Os": 0.37,
+        "Ir": 0.21,
+        "Pt": -0.16,
+        "Au": -0.7
+    },
+    "Ru": {
+        "Ti": -0.3,
+        "V": 0.15,
+        "Cr": 0.24,
+        "Mn": -0.4,
+        "Fe": -0.39,
+        "Co": -0.37,
+        "Ni": -0.71,
+        "Cu": -1.21,
+        "Zr": -1.12,
+        "Nb": -0.31,
+        "Mo": 0.1,
+        "Tc": 0.17,
+        "Ru": 1.48,
+        "Rh": -0.43,
+        "Pd": -1.03,
+        "Ag": -1.72,
+        "Hf": -0.83,
+        "Ta": -0.17,
+        "W": 0.24,
+        "Re": 0.37,
+        "Os": 0.23,
+        "Ir": -0.2,
+        "Pt": -0.82,
+        "Au": -1.62
+    },
+    "Rh": {
+        "Ti": 0.12,
+        "V": 0.35,
+        "Cr": 0.31,
+        "Mn": -0.08,
+        "Fe": -0.01,
+        "Co": 0.02,
+        "Ni": -0.08,
+        "Cu": -0.38,
+        "Zr": -0.46,
+        "Nb": 0.09,
+        "Mo": 0.44,
+        "Tc": 0.46,
+        "Ru": 0.31,
+        "Rh": 1.15,
+        "Pd": -0.45,
+        "Ag": -0.92,
+        "Hf": -0.15,
+        "Ta": 0.36,
+        "W": 0.66,
+        "Re": 0.71,
+        "Os": 0.56,
+        "Ir": 0.23,
+        "Pt": -0.27,
+        "Au": -0.87
+    },
+    "Pd": {
+        "Ti": 0.58,
+        "V": 0.78,
+        "Cr": 0.3,
+        "Mn": 0.3,
+        "Fe": 0.35,
+        "Co": 0.29,
+        "Ni": 0.21,
+        "Cu": 0.04,
+        "Zr": 0.32,
+        "Nb": 0.87,
+        "Mo": 1.08,
+        "Tc": 1.02,
+        "Ru": 0.74,
+        "Rh": 0.36,
+        "Pd": 0.84,
+        "Ag": -0.26,
+        "Hf": 0.44,
+        "Ta": 1.04,
+        "W": 1.37,
+        "Re": 1.34,
+        "Os": 1.11,
+        "Ir": 0.7,
+        "Pt": 0.19,
+        "Au": -0.22
+    },
+    "Ag": {
+        "Ti": 0.45,
+        "V": 0.63,
+        "Cr": 0.29,
+        "Mn": 0.23,
+        "Fe": 0.41,
+        "Co": 0.48,
+        "Ni": 0.49,
+        "Cu": 0.22,
+        "Zr": 0.33,
+        "Nb": 0.67,
+        "Mo": 0.74,
+        "Tc": 0.69,
+        "Ru": 0.6,
+        "Rh": 0.42,
+        "Pd": 0.28,
+        "Ag": 0.58,
+        "Hf": 0.4,
+        "Ta": 0.83,
+        "W": 0.93,
+        "Re": 0.88,
+        "Os": 0.72,
+        "Ir": 0.55,
+        "Pt": 0.34,
+        "Au": 0.03
+    },
+    "Hf": {
+        "Ti": -0.03,
+        "V": -0.04,
+        "Cr": -0.52,
+        "Mn": -0.49,
+        "Fe": -0.51,
+        "Co": -0.62,
+        "Ni": -0.75,
+        "Cu": -0.93,
+        "Zr": -0.14,
+        "Nb": 0.08,
+        "Mo": -0.12,
+        "Tc": -0.25,
+        "Ru": -0.44,
+        "Rh": -0.68,
+        "Pd": -0.92,
+        "Ag": -1.16,
+        "Hf": 1.35,
+        "Ta": 0.25,
+        "W": 0.17,
+        "Re": -0.01,
+        "Os": -0.26,
+        "Ir": -0.53,
+        "Pt": -0.8,
+        "Au": -1.11
+    },
+    "Ta": {
+        "Ti": -0.45,
+        "V": -0.03,
+        "Cr": 0.16,
+        "Mn": 0.1,
+        "Fe": 0.13,
+        "Co": 0.06,
+        "Ni": -0.18,
+        "Cu": -0.52,
+        "Zr": -0.85,
+        "Nb": -0.21,
+        "Mo": 0.25,
+        "Tc": 0.44,
+        "Ru": 0.4,
+        "Rh": 0.11,
+        "Pd": -0.26,
+        "Ag": -0.67,
+        "Hf": -0.6,
+        "Ta": 1.37,
+        "W": 0.47,
+        "Re": 0.75,
+        "Os": 0.73,
+        "Ir": 0.49,
+        "Pt": 0.11,
+        "Au": -0.37
+    },
+    "W": {
+        "Ti": 0.02,
+        "V": 0.04,
+        "Cr": -0.14,
+        "Mn": -0.25,
+        "Fe": -0.35,
+        "Co": -0.45,
+        "Ni": -0.42,
+        "Cu": -0.75,
+        "Zr": -0.81,
+        "Nb": -0.31,
+        "Mo": -0.24,
+        "Tc": -0.55,
+        "Ru": -1.07,
+        "Rh": -1.22,
+        "Pd": -1.27,
+        "Ag": -1.56,
+        "Hf": -0.78,
+        "Ta": -0.13,
+        "W": 1.87,
+        "Re": -0.27,
+        "Os": -0.85,
+        "Ir": -1.34,
+        "Pt": -1.66,
+        "Au": -1.85
+    },
+    "Re": {
+        "Ti": -0.89,
+        "V": -0.42,
+        "Cr": -0.13,
+        "Mn": -0.28,
+        "Fe": -0.18,
+        "Co": -0.14,
+        "Ni": -0.36,
+        "Cu": -0.83,
+        "Zr": -1.75,
+        "Nb": -0.94,
+        "Mo": -0.43,
+        "Tc": -0.19,
+        "Ru": -0.17,
+        "Rh": -0.32,
+        "Pd": -0.68,
+        "Ag": -1.24,
+        "Hf": -1.51,
+        "Ta": -0.77,
+        "W": -0.27,
+        "Re": 1.69,
+        "Os": 0.04,
+        "Ir": -0.11,
+        "Pt": -0.46,
+        "Au": -1.05
+    },
+    "Os": {
+        "Ti": -0.22,
+        "V": 0.18,
+        "Cr": 0.36,
+        "Mn": -0.21,
+        "Fe": -0.31,
+        "Co": -0.3,
+        "Ni": -0.62,
+        "Cu": -1.21,
+        "Zr": -1.07,
+        "Nb": -0.27,
+        "Mo": 0.07,
+        "Tc": 0.09,
+        "Ru": -0.2,
+        "Rh": -0.7,
+        "Pd": -1.31,
+        "Ag": -2.0,
+        "Hf": -1.04,
+        "Ta": -0.17,
+        "W": 0.13,
+        "Re": 0.23,
+        "Os": 1.81,
+        "Ir": -0.48,
+        "Pt": -1.25,
+        "Au": -2.14
+    },
+    "Ir": {
+        "Ti": 0.29,
+        "V": 0.51,
+        "Cr": 0.35,
+        "Mn": 0.09,
+        "Fe": 0.11,
+        "Co": 0.16,
+        "Ni": 0.12,
+        "Cu": -0.12,
+        "Zr": -0.43,
+        "Nb": 0.1,
+        "Mo": 0.35,
+        "Tc": 0.35,
+        "Ru": 0.23,
+        "Rh": -0.08,
+        "Pd": -0.55,
+        "Ag": -1.0,
+        "Hf": -0.17,
+        "Ta": 0.26,
+        "W": 0.47,
+        "Re": 0.48,
+        "Os": 0.32,
+        "Ir": 1.44,
+        "Pt": -0.58,
+        "Au": -1.2
+    },
+    "Pt": {
+        "Ti": 0.66,
+        "V": 0.98,
+        "Cr": 0.6,
+        "Mn": 0.38,
+        "Fe": 0.37,
+        "Co": 0.46,
+        "Ni": 0.43,
+        "Cu": 0.32,
+        "Zr": 0.3,
+        "Nb": 0.76,
+        "Mo": 0.93,
+        "Tc": 0.85,
+        "Ru": 0.6,
+        "Rh": 0.26,
+        "Pd": 0.0,
+        "Ag": -0.27,
+        "Hf": 0.47,
+        "Ta": 0.95,
+        "W": 1.16,
+        "Re": 1.11,
+        "Os": 0.86,
+        "Ir": 0.44,
+        "Pt": 1.03,
+        "Au": -0.36
+    },
+    "Au": {
+        "Ti": 0.46,
+        "V": 0.59,
+        "Cr": 0.33,
+        "Mn": 0.3,
+        "Fe": 0.45,
+        "Co": 0.54,
+        "Ni": 0.56,
+        "Cu": 0.34,
+        "Zr": 0.3,
+        "Nb": 0.61,
+        "Mo": 0.67,
+        "Tc": 0.59,
+        "Ru": 0.52,
+        "Rh": 0.44,
+        "Pd": 0.28,
+        "Ag": 0.0,
+        "Hf": 0.47,
+        "Ta": 0.79,
+        "W": 0.92,
+        "Re": 0.81,
+        "Os": 0.65,
+        "Ir": 0.5,
+        "Pt": 0.34,
+        "Au": 0.72
+    }
+}
\ No newline at end of file
diff --git a/src/autocat/data/segregation_energies/rao2020.json b/src/autocat/data/segregation_energies/rao2020.json
new file mode 100644
index 00000000..d2f0236f
--- /dev/null
+++ b/src/autocat/data/segregation_energies/rao2020.json
@@ -0,0 +1,842 @@
+{
+    "Al": {
+        "Al": 0,
+        "Sc": 1,
+        "Ti": 0.5,
+        "V": 0.5,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0.5,
+        "Ni": 0.8,
+        "Cu": 0.9,
+        "Zn": 0.9,
+        "Y": 1,
+        "Zr": 0.8,
+        "Nb": 0.5,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0.8,
+        "Pd": 0.9,
+        "Ag": 0.9,
+        "Cd": 0.9,
+        "Hf": 0.5,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0.5,
+        "Ir": 0.5,
+        "Pt": 0.9,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Sc": {
+        "Al": 1,
+        "Sc": 0,
+        "Ti": 0.5,
+        "V": 0.5,
+        "Cr": 0.5,
+        "Fe": 0,
+        "Co": 0,
+        "Ni": 0.5,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 0.5,
+        "Nb": 0.5,
+        "Mo": 0.5,
+        "Ru": 0,
+        "Rh": 0.5,
+        "Pd": 0.8,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0.5,
+        "Ta": 0,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0,
+        "Ir": 0,
+        "Pt": 0.5,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Ti": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0,
+        "V": 0.9,
+        "Cr": 0.9,
+        "Fe": 0.8,
+        "Co": 0.5,
+        "Ni": 0.8,
+        "Cu": 1,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 1,
+        "Ru": 1,
+        "Rh": 0.8,
+        "Pd": 0.9,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 1,
+        "Ta": 0.8,
+        "W": 1,
+        "Re": 1,
+        "Os": 0.9,
+        "Ir": 0.8,
+        "Pt": 0.9,
+        "Au": 1,
+        "Pb": 1
+    },
+    "V": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0,
+        "Cr": 0.8,
+        "Fe": 1,
+        "Co": 1,
+        "Ni": 1,
+        "Cu": 1,
+        "Zn": 1,
+        "Y": 0.9,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 1,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0.8,
+        "Cd": 0.8,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 0.9,
+        "Re": 0.5,
+        "Os": 0.8,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Cr": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 1,
+        "Cr": 0,
+        "Fe": 0.8,
+        "Co": 0,
+        "Ni": 0.8,
+        "Cu": 0.8,
+        "Zn": 0.9,
+        "Y": 0.8,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 1,
+        "Ru": 1,
+        "Rh": 0.9,
+        "Pd": 0.5,
+        "Ag": 0,
+        "Cd": 0.5,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 1,
+        "Re": 1,
+        "Os": 1,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 0.5,
+        "Pb": 1
+    },
+    "Fe": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.8,
+        "Cr": 1,
+        "Fe": 0,
+        "Co": 1,
+        "Ni": 1,
+        "Cu": 0.9,
+        "Zn": 0.9,
+        "Y": 0.9,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0.9,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0.5,
+        "Cd": 0.5,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 1,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Co": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.5,
+        "Cr": 0.9,
+        "Fe": 0.9,
+        "Co": 0,
+        "Ni": 0.9,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0.5,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0.5,
+        "Cd": 0.8,
+        "Hf": 1,
+        "Ta": 0.8,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0.9,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Ni": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0.5,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0.8,
+        "Ni": 0,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 0.5,
+        "Mo": 0,
+        "Ru": 0.9,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 1,
+        "Cd": 1,
+        "Hf": 1,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0.5,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Cu": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0.5,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0,
+        "Co": 0.5,
+        "Ni": 0.8,
+        "Cu": 0,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 0.5,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0.9,
+        "Pd": 1,
+        "Ag": 1,
+        "Cd": 1,
+        "Hf": 0.8,
+        "Ta": 0.5,
+        "W": 0,
+        "Re": 0,
+        "Os": 0.5,
+        "Ir": 0.9,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Zn": {
+        "Al": 0.9,
+        "Sc": 0.8,
+        "Ti": 0.5,
+        "V": 0.5,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0.5,
+        "Ni": 0.5,
+        "Cu": 0.5,
+        "Zn": 0,
+        "Y": 1,
+        "Zr": 0.5,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0.5,
+        "Pd": 0.5,
+        "Ag": 1,
+        "Cd": 1,
+        "Hf": 0.5,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.5,
+        "Pt": 0.5,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Y": {
+        "Al": 0.9,
+        "Sc": 0.8,
+        "Ti": 0.5,
+        "V": 0.5,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0,
+        "Ni": 0,
+        "Cu": 0.5,
+        "Zn": 0.9,
+        "Y": 0,
+        "Zr": 0.5,
+        "Nb": 0.5,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0,
+        "Pd": 0.5,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0.5,
+        "Ta": 0.5,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0,
+        "Pt": 0,
+        "Au": 0.5,
+        "Pb": 1
+    },
+    "Zr": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0.8,
+        "V": 0.8,
+        "Cr": 0.5,
+        "Fe": 0,
+        "Co": 0,
+        "Ni": 0.5,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 0,
+        "Nb": 0.9,
+        "Mo": 0.5,
+        "Ru": 0.5,
+        "Rh": 0.5,
+        "Pd": 0.9,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0.8,
+        "Ta": 0.5,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0,
+        "Ir": 0,
+        "Pt": 0.5,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Nb": {
+        "Al": 1,
+        "Sc": 0.9,
+        "Ti": 0.9,
+        "V": 0.8,
+        "Cr": 0.8,
+        "Fe": 1,
+        "Co": 1,
+        "Ni": 1,
+        "Cu": 0.9,
+        "Zn": 0.9,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 0,
+        "Mo": 0.5,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0.9,
+        "Cd": 0.9,
+        "Hf": 1,
+        "Ta": 0.8,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0.5,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Mo": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.5,
+        "Cr": 0.9,
+        "Fe": 0.9,
+        "Co": 0.9,
+        "Ni": 0.8,
+        "Cu": 0.8,
+        "Zn": 0.9,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0,
+        "Ru": 1,
+        "Rh": 0.9,
+        "Pd": 0.5,
+        "Ag": 0.5,
+        "Cd": 0.8,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 0.8,
+        "Re": 1,
+        "Os": 1,
+        "Ir": 1,
+        "Pt": 0.9,
+        "Au": 0.5,
+        "Pb": 1
+    },
+    "Ru": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.5,
+        "Cr": 0.5,
+        "Fe": 1,
+        "Co": 0.9,
+        "Ni": 0.9,
+        "Cu": 0.9,
+        "Zn": 0.9,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0.5,
+        "Ru": 0,
+        "Rh": 0.9,
+        "Pd": 0.9,
+        "Ag": 0.5,
+        "Cd": 0.9,
+        "Hf": 1,
+        "Ta": 0.9,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0.8,
+        "Ir": 1,
+        "Pt": 0.9,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Rh": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0,
+        "Co": 0.8,
+        "Ni": 1,
+        "Cu": 1,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 0.9,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0,
+        "Pd": 0.9,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0.5,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.8,
+        "Pt": 1,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Pd": {
+        "Al": 0.5,
+        "Sc": 0,
+        "Ti": 0,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 1,
+        "Co": 0.5,
+        "Ni": 0.5,
+        "Cu": 0.8,
+        "Zn": 0.8,
+        "Y": 0.8,
+        "Zr": 0,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0.5,
+        "Pd": 0,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.5,
+        "Pt": 0.9,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Ag": {
+        "Al": 0.9,
+        "Sc": 0.5,
+        "Ti": 0,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0.9,
+        "Ni": 0.5,
+        "Cu": 0.8,
+        "Zn": 1,
+        "Y": 0.5,
+        "Zr": 0,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0.5,
+        "Pd": 0.5,
+        "Ag": 0,
+        "Cd": 1,
+        "Hf": 0,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.5,
+        "Pt": 0.8,
+        "Au": 1,
+        "Pb": 1
+    },
+    "Cd": {
+        "Al": 0.8,
+        "Sc": 0.5,
+        "Ti": 0.5,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0.8,
+        "Co": 0.8,
+        "Ni": 0.8,
+        "Cu": 0.8,
+        "Zn": 0.8,
+        "Y": 0.5,
+        "Zr": 0.5,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0.5,
+        "Pd": 0.8,
+        "Ag": 0.9,
+        "Cd": 0,
+        "Hf": 0.5,
+        "Ta": 0.5,
+        "W": 0,
+        "Re": 0,
+        "Os": 0.5,
+        "Ir": 0.5,
+        "Pt": 0.5,
+        "Au": 0.8,
+        "Pb": 0.9
+    },
+    "Hf": {
+        "Al": 1,
+        "Sc": 0.9,
+        "Ti": 0.9,
+        "V": 0.9,
+        "Cr": 0.8,
+        "Fe": 0.5,
+        "Co": 0.5,
+        "Ni": 0.8,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 0.9,
+        "Mo": 0.9,
+        "Ru": 0.5,
+        "Rh": 0.5,
+        "Pd": 0.8,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0,
+        "Ta": 0.8,
+        "W": 0.9,
+        "Re": 0.5,
+        "Os": 0.5,
+        "Ir": 0.5,
+        "Pt": 0.5,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "Ta": {
+        "Al": 0.9,
+        "Sc": 0.9,
+        "Ti": 0.9,
+        "V": 0.9,
+        "Cr": 0.8,
+        "Fe": 1,
+        "Co": 1,
+        "Ni": 1,
+        "Cu": 0.8,
+        "Zn": 0.9,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0.9,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0.5,
+        "Cd": 0.5,
+        "Hf": 1,
+        "Ta": 0,
+        "W": 0.5,
+        "Re": 0.5,
+        "Os": 0.9,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 0.9,
+        "Pb": 1
+    },
+    "W": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.8,
+        "Cr": 0.9,
+        "Fe": 0.8,
+        "Co": 0.8,
+        "Ni": 0,
+        "Cu": 0.5,
+        "Zn": 0.5,
+        "Y": 0.9,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 1,
+        "Ru": 1,
+        "Rh": 0.5,
+        "Pd": 0,
+        "Ag": 0,
+        "Cd": 0.5,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 0,
+        "Re": 1,
+        "Os": 1,
+        "Ir": 1,
+        "Pt": 0.5,
+        "Au": 0,
+        "Pb": 0.5
+    },
+    "Re": {
+        "Al": 1,
+        "Sc": 0.5,
+        "Ti": 1,
+        "V": 1,
+        "Cr": 0.9,
+        "Fe": 0.9,
+        "Co": 1,
+        "Ni": 1,
+        "Cu": 0.8,
+        "Zn": 0.5,
+        "Y": 0.5,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 1,
+        "Ru": 1,
+        "Rh": 1,
+        "Pd": 1,
+        "Ag": 0,
+        "Cd": 0,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 1,
+        "Re": 0,
+        "Os": 1,
+        "Ir": 1,
+        "Pt": 1,
+        "Au": 0.9,
+        "Pb": 0
+    },
+    "Os": {
+        "Al": 0.9,
+        "Sc": 1,
+        "Ti": 1,
+        "V": 0.5,
+        "Cr": 0.5,
+        "Fe": 1,
+        "Co": 0.9,
+        "Ni": 0.9,
+        "Cu": 0.5,
+        "Zn": 0.8,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 1,
+        "Mo": 0.8,
+        "Ru": 0.9,
+        "Rh": 0.9,
+        "Pd": 0.8,
+        "Ag": 0,
+        "Cd": 0.5,
+        "Hf": 1,
+        "Ta": 1,
+        "W": 0.8,
+        "Re": 0.5,
+        "Os": 0,
+        "Ir": 1,
+        "Pt": 0.9,
+        "Au": 0.5,
+        "Pb": 0
+    },
+    "Ir": {
+        "Al": 1,
+        "Sc": 1,
+        "Ti": 0,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0,
+        "Co": 0.5,
+        "Ni": 0.9,
+        "Cu": 0.9,
+        "Zn": 1,
+        "Y": 1,
+        "Zr": 1,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0.9,
+        "Pd": 0.9,
+        "Ag": 0.5,
+        "Cd": 1,
+        "Hf": 0.9,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0.5,
+        "Ir": 0,
+        "Pt": 0.9,
+        "Au": 0.9,
+        "Pb": 0.5
+    },
+    "Pt": {
+        "Al": 0,
+        "Sc": 0,
+        "Ti": 0,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0,
+        "Co": 0,
+        "Ni": 0,
+        "Cu": 0.5,
+        "Zn": 0.5,
+        "Y": 0.9,
+        "Zr": 0,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0.5,
+        "Pd": 0.9,
+        "Ag": 0.9,
+        "Cd": 1,
+        "Hf": 0,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.5,
+        "Pt": 0,
+        "Au": 0.9,
+        "Pb": 0.5
+    },
+    "Au": {
+        "Al": 0.9,
+        "Sc": 0,
+        "Ti": 0,
+        "V": 0.5,
+        "Cr": 0,
+        "Fe": 0.5,
+        "Co": 0.9,
+        "Ni": 0.5,
+        "Cu": 0.5,
+        "Zn": 0.8,
+        "Y": 0.5,
+        "Zr": 0,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0.5,
+        "Rh": 0.5,
+        "Pd": 0.5,
+        "Ag": 0.8,
+        "Cd": 1,
+        "Hf": 0,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0.5,
+        "Pt": 0.5,
+        "Au": 0,
+        "Pb": 1
+    },
+    "Pb": {
+        "Al": 0.9,
+        "Sc": 0.5,
+        "Ti": 0.5,
+        "V": 0,
+        "Cr": 0,
+        "Fe": 0,
+        "Co": 0,
+        "Ni": 0.5,
+        "Cu": 0.5,
+        "Zn": 0.9,
+        "Y": 0.5,
+        "Zr": 0.5,
+        "Nb": 0,
+        "Mo": 0,
+        "Ru": 0,
+        "Rh": 0,
+        "Pd": 0.5,
+        "Ag": 0.9,
+        "Cd": 0.9,
+        "Hf": 0.5,
+        "Ta": 0,
+        "W": 0,
+        "Re": 0,
+        "Os": 0,
+        "Ir": 0,
+        "Pt": 0.5,
+        "Au": 0.8,
+        "Pb": 0
+    }
+}
\ No newline at end of file
diff --git a/src/autocat/learning/__init__.py b/src/autocat/learning/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/autocat/learning/featurizers.py b/src/autocat/learning/featurizers.py
new file mode 100644
index 00000000..72f14df3
--- /dev/null
+++ b/src/autocat/learning/featurizers.py
@@ -0,0 +1,295 @@
+import copy
+from typing import List, Dict
+
+import numpy as np
+from prettytable import PrettyTable
+
+from ase import Atoms
+from dscribe.descriptors import SineMatrix
+from dscribe.descriptors import CoulombMatrix
+from dscribe.descriptors import ACSF
+from dscribe.descriptors import SOAP
+from matminer.featurizers.composition import ElementProperty
+from matminer.featurizers.site import ChemicalSRO
+from matminer.featurizers.site import OPSiteFingerprint
+from matminer.featurizers.site import CrystalNNFingerprint
+from pymatgen.io.ase import AseAtomsAdaptor
+from pymatgen.core.periodic_table import Element
+
+
+SUPPORTED_MATMINER_CLASSES = [
+    ElementProperty,
+    ChemicalSRO,
+    OPSiteFingerprint,
+    CrystalNNFingerprint,
+]
+
+SUPPORTED_DSCRIBE_CLASSES = [SineMatrix, CoulombMatrix, ACSF, SOAP]
+
+
+class FeaturizerError(Exception):
+    pass
+
+
+class Featurizer:
+    def __init__(
+        self,
+        featurizer_class=None,  # black
+        design_space_structures: List[Atoms] = None,
+        species_list: List[str] = None,
+        max_size: int = None,
+        preset: str = None,
+        kwargs: Dict = None,
+    ):
+
+        self._featurizer_class = SineMatrix
+        self.featurizer_class = featurizer_class
+
+        self._preset = None
+        self.preset = preset
+
+        self._kwargs = None
+        self.kwargs = kwargs
+
+        self._max_size = 100
+        self.max_size = max_size
+
+        self._species_list = ["Fe", "Ni", "Pt", "Pd", "Cu", "C", "N", "O", "H"]
+        self.species_list = species_list
+
+        # overrides max_size and species_list if given
+        self._design_space_structures = None
+        self.design_space_structures = design_space_structures
+
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, Featurizer):
+            for attr in [
+                "featurizer_class",
+                "species_list",
+                "max_size",
+                "preset",
+                "kwargs",
+            ]:
+                if getattr(self, attr) != getattr(other, attr):
+                    return False
+            return True
+        return False
+
+    def __repr__(self) -> str:
+        pt = PrettyTable()
+        pt.field_names = ["", "Featurizer"]
+        class_name = (
+            self.featurizer_class.__module__ + "." + self.featurizer_class.__name__
+        )
+        pt.add_row(["class", class_name])
+        pt.add_row(["kwargs", self.kwargs])
+        pt.add_row(["species list", self.species_list])
+        pt.add_row(["maximum structure size", self.max_size])
+        pt.add_row(["preset", self.preset])
+        pt.add_row(
+            [
+                "design space structures provided?",
+                self.design_space_structures is not None,
+            ]
+        )
+        pt.max_width = 70
+        return str(pt)
+
+    def copy(self):
+        """
+        Returns a copy of the featurizer
+        """
+        ds_structs_copy = (
+            [struct.copy() for struct in self.design_space_structures]
+            if self.design_space_structures
+            else None
+        )
+        feat = self.__class__(
+            featurizer_class=self.featurizer_class,
+            design_space_structures=ds_structs_copy,
+            species_list=self.species_list.copy(),
+            max_size=self.max_size,
+            kwargs=copy.deepcopy(self.kwargs) if self.kwargs else None,
+        )
+        return feat
+
+    @property
+    def featurizer_class(self):
+        return self._featurizer_class
+
+    @featurizer_class.setter
+    def featurizer_class(self, featurizer_class):
+        if (
+            featurizer_class in SUPPORTED_MATMINER_CLASSES
+            or featurizer_class in SUPPORTED_DSCRIBE_CLASSES
+        ):
+            self._featurizer_class = featurizer_class
+            self._preset = None
+            self._kwargs = None
+        else:
+            msg = f"Featurization class {featurizer_class} is not currently supported."
+            raise FeaturizerError(msg)
+
+    @property
+    def preset(self):
+        return self._preset
+
+    @preset.setter
+    def preset(self, preset):
+        if self.featurizer_class in [CrystalNNFingerprint, ElementProperty]:
+            self._preset = preset
+        elif preset is None:
+            self._preset = preset
+        else:
+            msg = f"Presets are not supported for {self.featurizer_class.__module__}"
+            raise FeaturizerError(msg)
+
+    @property
+    def kwargs(self):
+        return self._kwargs
+
+    @kwargs.setter
+    def kwargs(self, kwargs):
+        if kwargs is not None:
+            self._kwargs = kwargs.copy()
+
+    @property
+    def design_space_structures(self):
+        return self._design_space_structures
+
+    @design_space_structures.setter
+    def design_space_structures(self, design_space_structures: List[Atoms]):
+        if design_space_structures is not None:
+            self._design_space_structures = [
+                struct.copy() for struct in design_space_structures
+            ]
+            # analyze new design space
+            ds_structs = design_space_structures
+            _species_list = []
+            for s in ds_structs:
+                # get all unique species
+                found_species = np.unique(s.get_chemical_symbols()).tolist()
+                new_species = [
+                    spec for spec in found_species if spec not in _species_list
+                ]
+                _species_list.extend(new_species)
+            # sort species list
+            sorted_species_list = sorted(
+                _species_list, key=lambda el: Element(el).mendeleev_no
+            )
+
+            self._max_size = max([len(s) for s in ds_structs])
+            self._species_list = sorted_species_list
+
+    @property
+    def max_size(self):
+        return self._max_size
+
+    @max_size.setter
+    def max_size(self, max_size):
+        if max_size is not None:
+            self._max_size = max_size
+
+    @property
+    def species_list(self):
+        return self._species_list
+
+    @species_list.setter
+    def species_list(self, species_list: List[str]):
+        if species_list is not None:
+            _species_list = species_list.copy()
+            # sort species list by mendeleev number
+            sorted_species_list = sorted(
+                _species_list, key=lambda el: Element(el).mendeleev_no
+            )
+            self._species_list = sorted_species_list
+
+    # TODO: "get_featurization_object" -> "get_featurizer"
+    @property
+    def featurization_object(self):
+        return self._get_featurization_object()
+
+    def _get_featurization_object(self):
+        # instantiate featurizer object
+        if hasattr(self.featurizer_class, "from_preset") and self.preset is not None:
+            return self.featurizer_class.from_preset(self.preset)
+        if self.featurizer_class in [SineMatrix, CoulombMatrix]:
+            return self.featurizer_class(
+                n_atoms_max=self.max_size, permutation="none", **self.kwargs or {},
+            )
+        if self.featurizer_class in [SOAP, ACSF]:
+            return self.featurizer_class(species=self.species_list, **self.kwargs or {})
+        return self.featurizer_class(**self.kwargs or {})
+
+    def featurize_single(self, structure: Atoms):
+        """
+        Featurize a single structure. Returns a single vector
+
+        Parameters
+        ----------
+
+        structure:
+            ase.Atoms object of structure to be featurized
+
+        Returns
+        -------
+
+        representation:
+            Numpy array of feature vector (not flattened)
+        """
+        feat_class = self.featurizer_class
+        featurization_object = self.featurization_object
+        # dscribe classes
+        if feat_class in [SOAP, ACSF]:
+            adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist()
+            return featurization_object.create(structure, positions=adsorbate_indices,)
+        if feat_class in [SineMatrix, CoulombMatrix]:
+            return featurization_object.create(structure).reshape(-1,)
+
+        # matminer classes
+        pym_struct = AseAtomsAdaptor().get_structure(structure)
+        if feat_class == ElementProperty:
+            return np.array(featurization_object.featurize(pym_struct.composition))
+        representation = np.array([])
+        if feat_class in [CrystalNNFingerprint, OPSiteFingerprint]:
+            adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist()
+            for idx in adsorbate_indices:
+                feat = featurization_object.featurize(pym_struct, idx)
+                representation = np.concatenate((representation, feat))
+            return representation
+        if feat_class == ChemicalSRO:
+            adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist()
+            formatted_list = [[pym_struct, idx] for idx in adsorbate_indices]
+            featurization_object.fit(formatted_list)
+            for idx in adsorbate_indices:
+                feat = featurization_object.featurize(pym_struct, idx)
+                representation = np.concatenate((representation, feat))
+            return representation
+        return None
+
+    def featurize_multiple(self, structures: List[Atoms]):
+        """
+        Featurize multiple structures. Returns a matrix where each
+        row is the flattened feature vector of each system
+
+        Parameters
+        ----------
+
+        structures:
+            List of ase.Atoms structures to be featurized
+
+        Returns
+        -------
+
+        X:
+            Numpy array of shape (number of structures, number of features)
+        """
+        first_vec = self.featurize_single(structures[0]).flatten()
+        num_features = len(first_vec)
+        # if adsorbate featurization, assumes only 1 adsorbate in design space
+        # (otherwise would require padding)
+        X = np.zeros((len(structures), num_features))
+        X[0, :] = first_vec.copy()
+        for i in range(1, len(structures)):
+            X[i, :] = self.featurize_single(structures[i]).flatten()
+        return X
diff --git a/src/autocat/learning/predictors.py b/src/autocat/learning/predictors.py
new file mode 100644
index 00000000..7978a849
--- /dev/null
+++ b/src/autocat/learning/predictors.py
@@ -0,0 +1,309 @@
+import copy
+import numpy as np
+
+from typing import List
+from typing import Dict
+from typing import Union
+from prettytable import PrettyTable
+
+from ase import Atoms
+
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.metrics import mean_absolute_error
+from sklearn.metrics import mean_squared_error
+
+from autocat.learning.featurizers import Featurizer
+from autocat.learning.featurizers import (
+    SUPPORTED_DSCRIBE_CLASSES,
+    SUPPORTED_MATMINER_CLASSES,
+)
+
+
+class PredictorError(Exception):
+    pass
+
+
+class Predictor:
+    def __init__(
+        self,
+        model_class=None,
+        model_kwargs: Dict = None,  # TODO: kwargs -> options?
+        featurizer_class=None,  # black
+        featurization_kwargs: Dict = None,
+    ):
+        """
+        Constructor.
+
+        Parameters
+        ----------
+
+        model_class:
+            Class of regression model to be used for training and prediction.
+            If this is changed after initialization, all previously set
+            model_kwargs will be removed.
+            N.B. must have fit and predict methods
+
+        structure_featurizer:
+            String giving featurizer to be used for full structure which will be
+            fed into `autocat.learning.featurizers.full_structure_featurization`
+
+        adsorbate_featurizer:
+            String giving featurizer to be used for full structure which will be
+            fed into `autocat.learning.featurizers.adsorbate_structure_featurization
+
+        maximum_structure_size:
+            Size of the largest structure to be supported by the representation.
+            Default: number of atoms in largest structure within `structures`
+
+        maximum_adsorbate_size:
+            Integer giving the maximum adsorbate size to be encountered
+            (ie. this determines if zero-padding should be applied and how much).
+            If the provided value is less than the adsorbate size given by
+            `adsorbate_indices`, representation will remain size of the adsorbate.
+            Default: size of adsorbate provided
+
+        species_list:
+            List of species that could be encountered for featurization.
+            Default: Parses over all `structures` and collects all encountered species
+
+        refine_structures:
+            Bool indicating whether the structures should be refined to include
+            only the adsorbate and surface layer. Requires tags for all structures
+            to have adsorbate atoms and surface atoms as 0 and 1, respectively
+
+        """
+        self.is_fit = False
+
+        self._model_class = GaussianProcessRegressor
+        self.model_class = model_class
+
+        self._model_kwargs = None
+        self.model_kwargs = model_kwargs
+
+        self.regressor = self.model_class(
+            **self.model_kwargs if self.model_kwargs else {}
+        )
+
+        self._featurizer_class = None
+        self._featurization_kwargs = None
+
+        self.featurizer_class = featurizer_class
+
+        self.featurization_kwargs = featurization_kwargs
+
+        self.featurizer = Featurizer(
+            featurizer_class=self.featurizer_class,
+            **self.featurization_kwargs if self.featurization_kwargs else {},
+        )
+
+    def __repr__(self) -> str:
+        pt = PrettyTable()
+        pt.field_names = ["", "Predictor"]
+        model_class_name = self.model_class.__module__ + "." + self.model_class.__name__
+        pt.add_row(["class", model_class_name])
+        pt.add_row(["kwargs", self.model_kwargs])
+        pt.add_row(["is fit?", self.is_fit])
+        feat_str = str(self.featurizer)
+        return str(pt) + "\n" + feat_str
+
+    @property
+    def model_class(self):
+        return self._model_class
+
+    @model_class.setter
+    def model_class(self, model_class):
+        if model_class is not None:
+            self._model_class = model_class
+            # removes any model kwargs from previous model
+            # if changed
+            self._model_kwargs = None
+            if self.is_fit:
+                self.is_fit = False
+                self.X_ = None
+                self.y_ = None
+            # generates new regressor with default settings
+            self.regressor = self._model_class()
+
+    @property
+    def model_kwargs(self):
+        return self._model_kwargs
+
+    @model_kwargs.setter
+    def model_kwargs(self, model_kwargs):
+        if model_kwargs is not None:
+            self._model_kwargs = copy.deepcopy(model_kwargs)
+            if self.is_fit:
+                self.is_fit = False
+                self.X_ = None
+                self.y_ = None
+            self.regressor = self.model_class(**model_kwargs)
+
+    @property
+    def featurizer_class(self):
+        return self._featurizer_class
+
+    @featurizer_class.setter
+    def featurizer_class(self, featurizer_class):
+        if featurizer_class is not None:
+            assert (
+                featurizer_class in SUPPORTED_DSCRIBE_CLASSES
+                or featurizer_class in SUPPORTED_MATMINER_CLASSES
+            )
+            self._featurizer_class = featurizer_class
+            self._featurization_kwargs = None
+            self.featurizer = Featurizer(featurizer_class,)
+            if self.is_fit:
+                self.is_fit = False
+                self.X_ = None
+                self.y_ = None
+            self.regressor = self.model_class(
+                **self.model_kwargs if self.model_kwargs else {}
+            )
+
+    @property
+    def featurization_kwargs(self):
+        return self._featurization_kwargs
+
+    @featurization_kwargs.setter
+    def featurization_kwargs(self, featurization_kwargs):
+        if featurization_kwargs is not None:
+            assert isinstance(featurization_kwargs, dict)
+            self._featurization_kwargs = featurization_kwargs.copy()
+            self.featurizer = Featurizer(self.featurizer_class, **featurization_kwargs)
+            if self.is_fit:
+                self.is_fit = False
+                self.X_ = None
+                self.y_ = None
+            self.regressor = self.model_class(
+                **self.model_kwargs if self.model_kwargs else {}
+            )
+
+    def copy(self):
+        """
+        Returns a copy
+        """
+        acp = self.__class__(
+            model_class=self.model_class, featurizer_class=self.featurizer_class,
+        )
+        acp.regressor = copy.deepcopy(self.regressor)
+        acp.is_fit = self.is_fit
+        acp.featurization_kwargs = copy.deepcopy(self.featurization_kwargs)
+        acp.model_kwargs = copy.deepcopy(self.model_kwargs)
+
+        return acp
+
+    def fit(
+        self, training_structures: List[Union[Atoms, str]], y: np.ndarray,
+    ):
+        """
+        Given a list of structures and labels will featurize
+        and train a regression model
+
+        Parameters
+        ----------
+
+        training_structures:
+            List of structures to be trained upon
+
+        y:
+            Numpy array of labels corresponding to training structures
+            of shape (# of training structures, # of targets)
+
+        Returns
+        -------
+
+        trained_model:
+            Trained `sklearn` model object
+        """
+        self.X_ = self.featurizer.featurize_multiple(training_structures)
+        self.y_ = y
+        self.regressor.fit(self.X_, self.y_)
+        self.is_fit = True
+
+    def predict(
+        self, testing_structures: List[Atoms],
+    ):
+        """
+        From a trained model, will predict on given structures
+
+        Parameters
+        ----------
+
+        testing_structures:
+            List of Atoms objects to make predictions on
+
+        Returns
+        -------
+
+        predicted_labels:
+            List of predicted labels for each input structure
+
+        unc:
+            List of uncertainties for each prediction if available.
+            Otherwise returns `None`
+
+        """
+        assert self.is_fit
+        featurized_input = self.featurizer.featurize_multiple(testing_structures)
+        try:
+            predicted_labels, unc = self.regressor.predict(
+                featurized_input, return_std=True
+            )
+        except TypeError:
+            predicted_labels = self.regressor.predict(featurized_input,)
+            unc = None
+
+        return predicted_labels, unc
+
+    # TODO: "score" -> "get_scores"?
+    def score(
+        self,
+        structures: List[Atoms],
+        labels: np.ndarray,
+        metric: str = "mae",
+        return_predictions: bool = False,
+        **kwargs,
+    ):
+        """
+        Returns a prediction score given the actual corrections.
+
+        Parameters
+        ----------
+
+        structures:
+            List of Atoms objects of structures to be tested on
+
+        labels:
+            Labels for the testing structures
+
+        metric:
+            How the performance metric should be calculated
+            Options:
+            - mae
+            - mse
+
+        return_predictions:
+            Bool indicating whether the predictions and uncertainties should
+            be returned in addition to the score
+
+        Returns
+        -------
+
+        score:
+            Float of calculated test score on the given data
+        """
+        assert self.is_fit
+
+        pred_label, unc = self.predict(structures)
+
+        score_func = {"mae": mean_absolute_error, "mse": mean_squared_error}
+
+        if metric not in score_func:
+            msg = f"Metric: {metric} is not supported"
+            raise PredictorError(msg)
+
+        score = score_func[metric](labels, pred_label, **kwargs)
+
+        if return_predictions:
+            return score, pred_label, unc
+        return score
diff --git a/src/autocat/learning/sequential.py b/src/autocat/learning/sequential.py
new file mode 100644
index 00000000..001bde63
--- /dev/null
+++ b/src/autocat/learning/sequential.py
@@ -0,0 +1,1142 @@
+import copy
+import os
+import json
+import importlib
+from typing import List
+from typing import Dict
+from typing import Union
+
+import numpy as np
+from joblib import Parallel, delayed
+from prettytable import PrettyTable
+from ase import Atoms
+from ase.io.jsonio import encode as atoms_encoder
+from ase.io.jsonio import decode as atoms_decoder
+from scipy import stats
+from sklearn.gaussian_process import GaussianProcessRegressor
+from dscribe.descriptors import SineMatrix
+
+from autocat.learning.predictors import Predictor
+from autocat.data.hhi import HHI
+from autocat.data.segregation_energies import SEGREGATION_ENERGIES
+
+
+Array = List[float]
+
+
+class DesignSpaceError(Exception):
+    pass
+
+
+class DesignSpace:
+    def __init__(
+        self, design_space_structures: List[Atoms], design_space_labels: Array,
+    ):
+        """
+        Constructor.
+
+        Parameters
+        ----------
+
+        design_space_structures:
+            List of all structures within the design space
+
+        design_space_labels:
+            Labels corresponding to all structures within the design space.
+            If label not yet known, set to np.nan
+
+        """
+        if len(design_space_structures) != design_space_labels.shape[0]:
+            msg = f"Number of structures ({len(design_space_structures)})\
+                 and labels ({design_space_labels.shape[0]}) must match"
+            raise DesignSpaceError(msg)
+
+        self._design_space_structures = [
+            struct.copy() for struct in design_space_structures
+        ]
+        self._design_space_labels = design_space_labels.copy()
+
+    def __repr__(self) -> str:
+        pt = PrettyTable()
+        pt.field_names = ["", "DesignSpace"]
+        pt.add_row(["total # of systems", len(self)])
+        num_unknown = sum(np.isnan(self.design_space_labels))
+        pt.add_row(["# of unlabelled systems", num_unknown])
+        pt.add_row(["unique species present", self.species_list])
+        max_label = max(self.design_space_labels)
+        pt.add_row(["maximum label", max_label])
+        min_label = min(self.design_space_labels)
+        pt.add_row(["minimum label", min_label])
+        pt.max_width = 70
+        return str(pt)
+
+    def __len__(self):
+        return len(self.design_space_structures)
+
+    # TODO: non-dunder method for deleting systems
+    def __delitem__(self, i):
+        """
+        Deletes systems from the design space. If mask provided, deletes wherever True
+        """
+        if isinstance(i, list):
+            i = np.array(i)
+        elif isinstance(i, int):
+            i = [i]
+        mask = np.ones(len(self), dtype=bool)
+        mask[i] = 0
+        self._design_space_labels = self.design_space_labels[mask]
+        structs = self.design_space_structures
+        masked_structs = [structs[j] for j in range(len(self)) if mask[j]]
+        self._design_space_structures = masked_structs
+
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, DesignSpace):
+            # check that they are the same length
+            if len(self) == len(other):
+                # check all their structures are equal
+                self_structs = self.design_space_structures
+                o_structs = other.design_space_structures
+                if not self_structs == o_structs:
+                    return False
+
+                # check their labels are equal
+                self_labels = self.design_space_labels
+                o_labels = other.design_space_labels
+                return np.array_equal(self_labels, o_labels, equal_nan=True)
+        return False
+
+    def copy(self):
+        """
+        Returns a copy of the design space
+        """
+        acds = self.__class__(
+            design_space_structures=self.design_space_structures,
+            design_space_labels=self.design_space_labels,
+        )
+        return acds
+
+    @property
+    def design_space_structures(self):
+        return self._design_space_structures
+
+    @design_space_structures.setter
+    def design_space_structures(self, design_space_structures):
+        msg = "Please use `update` method to update the design space."
+        raise DesignSpaceError(msg)
+
+    @property
+    def design_space_labels(self):
+        return self._design_space_labels
+
+    @design_space_labels.setter
+    def design_space_labels(self, design_space_labels):
+        msg = "Please use `update` method to update the design space."
+        raise DesignSpaceError(msg)
+
+    @property
+    def species_list(self):
+        species_list = []
+        for s in self.design_space_structures:
+            # get all unique species
+            found_species = np.unique(s.get_chemical_symbols()).tolist()
+            new_species = [spec for spec in found_species if spec not in species_list]
+            species_list.extend(new_species)
+        return species_list
+
+    def update(self, structures: List[Atoms], labels: Array):
+        """
+        Updates design space given structures and corresponding labels.
+        If structure already in design space, the label is updated.
+
+        Parameters
+        ----------
+
+        structures:
+            List of Atoms objects structures to be added
+
+        labels:
+            Corresponding labels to `structures`
+        """
+        if (structures is not None) and (labels is not None):
+            assert len(structures) == len(labels)
+            assert all(isinstance(struct, Atoms) for struct in structures)
+            for i, struct in enumerate(structures):
+                # if structure already in design space, update label
+                if struct in self.design_space_structures:
+                    idx = self.design_space_structures.index(struct)
+                    self._design_space_labels[idx] = labels[i]
+                # otherwise extend design space
+                else:
+                    self._design_space_structures.append(struct)
+                    self._design_space_labels = np.append(
+                        self.design_space_labels, labels[i]
+                    )
+
+    def to_jsonified_list(self) -> List:
+        """
+        Returns a jsonified list representation
+        """
+        collected_jsons = []
+        for struct in self.design_space_structures:
+            collected_jsons.append(atoms_encoder(struct))
+        # append labels to list of collected jsons
+        jsonified_labels = [float(x) for x in self.design_space_labels]
+        collected_jsons.append(jsonified_labels)
+        return collected_jsons
+
+    def write_json_to_disk(
+        self,
+        json_name: str = None,
+        write_location: str = ".",
+        write_to_disk: bool = True,
+    ):
+        """
+        Writes DesignSpace to disk as a json
+        """
+        collected_jsons = self.to_jsonified_list()
+        # set default json name if needed
+        if json_name is None:
+            json_name = "acds.json"
+        # write out single json
+        if write_to_disk:
+            json_path = os.path.join(write_location, json_name)
+            with open(json_path, "w") as f:
+                json.dump(collected_jsons, f)
+
+    @staticmethod
+    def from_json(json_name: str):
+        with open(json_name, "r") as f:
+            all_data = json.load(f)
+        structures = []
+        for i in range(len(all_data) - 1):
+            atoms = atoms_decoder(all_data[i])
+            structures.append(atoms)
+        labels = np.array(all_data[-1])
+        return DesignSpace(
+            design_space_structures=structures, design_space_labels=labels,
+        )
+
+
+class SequentialLearnerError(Exception):
+    pass
+
+
+# TODO: "kwargs" -> "options"?
+class SequentialLearner:
+    def __init__(
+        self,
+        design_space: DesignSpace,
+        predictor_kwargs: Dict[str, Union[str, float]] = None,
+        candidate_selection_kwargs: Dict[str, Union[str, float]] = None,
+        sl_kwargs: Dict[str, int] = None,
+    ):
+        # TODO: move predefined attributes (train_idx, candidate_idxs) to a
+        # different container (not kwargs)
+
+        self._design_space = None
+        self.design_space = design_space.copy()
+
+        # predictor arguments to use throughout the SL process
+        if predictor_kwargs is None:
+            predictor_kwargs = {
+                "model_class": GaussianProcessRegressor,
+                "featurizer_class": SineMatrix,
+            }
+        if "model_class" not in predictor_kwargs:
+            predictor_kwargs["model_class"] = GaussianProcessRegressor
+        if "featurizer_class" not in predictor_kwargs:
+            predictor_kwargs["featurizer_class"] = SineMatrix
+        if "featurization_kwargs" not in predictor_kwargs:
+            predictor_kwargs["featurization_kwargs"] = {}
+        ds_structs_kwargs = {
+            "design_space_structures": design_space.design_space_structures
+        }
+        predictor_kwargs["featurization_kwargs"].update(ds_structs_kwargs)
+        self._predictor_kwargs = None
+        self.predictor_kwargs = predictor_kwargs
+        self._predictor = Predictor(**predictor_kwargs)
+
+        # acquisition function arguments to use for candidate selection
+        if not candidate_selection_kwargs:
+            candidate_selection_kwargs = {"aq": "Random"}
+        self._candidate_selection_kwargs = None
+        self.candidate_selection_kwargs = candidate_selection_kwargs
+
+        # other miscellaneous kw arguments
+        self.sl_kwargs = sl_kwargs if sl_kwargs else {}
+
+        # variables that need to be propagated through the SL process
+        if "iteration_count" not in self.sl_kwargs:
+            self.sl_kwargs.update({"iteration_count": 0})
+        if "train_idx" not in self.sl_kwargs:
+            self.sl_kwargs.update({"train_idx": None})
+        if "train_idx_history" not in self.sl_kwargs:
+            self.sl_kwargs.update({"train_idx_history": None})
+        if "predictions" not in self.sl_kwargs:
+            self.sl_kwargs.update({"predictions": None})
+        if "predictions_history" not in self.sl_kwargs:
+            self.sl_kwargs.update({"predictions_history": None})
+        if "uncertainties" not in self.sl_kwargs:
+            self.sl_kwargs.update({"uncertainties": None})
+        if "uncertainties_history" not in self.sl_kwargs:
+            self.sl_kwargs.update({"uncertainties_history": None})
+        if "candidate_indices" not in self.sl_kwargs:
+            self.sl_kwargs.update({"candidate_indices": None})
+        if "candidate_index_history" not in self.sl_kwargs:
+            self.sl_kwargs.update({"candidate_index_history": None})
+        if "acquisition_scores" not in self.sl_kwargs:
+            self.sl_kwargs.update({"acquisition_scores": None})
+
+    def __repr__(self) -> str:
+        pt = PrettyTable()
+        pt.field_names = ["", "Sequential Learner"]
+        pt.add_row(["iteration count", self.iteration_count])
+        if self.candidate_structures is not None:
+            cand_formulas = [
+                s.get_chemical_formula() for s in self.candidate_structures
+            ]
+        else:
+            cand_formulas = None
+        pt.add_row(["next candidate system structures", cand_formulas])
+        pt.add_row(["next candidate system indices", self.candidate_indices])
+        pt.add_row(["acquisition function", self.candidate_selection_kwargs.get("aq")])
+        pt.add_row(
+            [
+                "# of candidates to pick",
+                self.candidate_selection_kwargs.get("num_candidates_to_pick", 1),
+            ]
+        )
+        pt.add_row(
+            ["target maximum", self.candidate_selection_kwargs.get("target_max")]
+        )
+        pt.add_row(
+            ["target minimum", self.candidate_selection_kwargs.get("target_min")]
+        )
+        pt.add_row(
+            ["include hhi?", self.candidate_selection_kwargs.get("include_hhi", False)]
+        )
+        pt.add_row(
+            [
+                "include segregation energies?",
+                self.candidate_selection_kwargs.get("include_seg_ener", False),
+            ]
+        )
+        return str(pt) + "\n" + str(self.design_space) + "\n" + str(self.predictor)
+
+    @property
+    def design_space(self):
+        return self._design_space
+
+    @design_space.setter
+    def design_space(self, design_space):
+        self._design_space = design_space
+
+    @property
+    def predictor_kwargs(self):
+        return self._predictor_kwargs
+
+    @predictor_kwargs.setter
+    def predictor_kwargs(self, predictor_kwargs):
+        if predictor_kwargs is None:
+            predictor_kwargs = {
+                "model_class": GaussianProcessRegressor,
+                "featurizer_class": SineMatrix,
+            }
+        if "model_class" not in predictor_kwargs:
+            predictor_kwargs["model_class"] = GaussianProcessRegressor
+        if "featurizer_class" not in predictor_kwargs:
+            predictor_kwargs["featurizer_class"] = SineMatrix
+        if "featurization_kwargs" not in predictor_kwargs:
+            predictor_kwargs["featurization_kwargs"] = {}
+        ds_structs_kwargs = {
+            "design_space_structures": self.design_space.design_space_structures
+        }
+        predictor_kwargs["featurization_kwargs"].update(ds_structs_kwargs)
+        self._predictor_kwargs = copy.deepcopy(predictor_kwargs)
+        self._predictor = Predictor(**predictor_kwargs)
+
+    @property
+    def predictor(self):
+        return self._predictor
+
+    @property
+    def candidate_selection_kwargs(self):
+        return self._candidate_selection_kwargs
+
+    @candidate_selection_kwargs.setter
+    def candidate_selection_kwargs(self, candidate_selection_kwargs):
+        if not candidate_selection_kwargs:
+            candidate_selection_kwargs = {}
+        self._candidate_selection_kwargs = candidate_selection_kwargs.copy()
+
+    @property
+    def iteration_count(self):
+        return self.sl_kwargs.get("iteration_count", 0)
+
+    @property
+    def train_idx(self):
+        return self.sl_kwargs.get("train_idx")
+
+    @property
+    def train_idx_history(self):
+        return self.sl_kwargs.get("train_idx_history", None)
+
+    @property
+    def predictions(self):
+        return self.sl_kwargs.get("predictions")
+
+    @property
+    def uncertainties(self):
+        return self.sl_kwargs.get("uncertainties")
+
+    @property
+    def candidate_indices(self):
+        return self.sl_kwargs.get("candidate_indices")
+
+    @property
+    def acquisition_scores(self):
+        return self.sl_kwargs.get("acquisition_scores", None)
+
+    @property
+    def candidate_structures(self):
+        idxs = self.candidate_indices
+        if idxs is not None:
+            return [self.design_space.design_space_structures[i] for i in idxs]
+
+    @property
+    def candidate_index_history(self):
+        return self.sl_kwargs.get("candidate_index_history", None)
+
+    @property
+    def predictions_history(self):
+        return self.sl_kwargs.get("predictions_history", None)
+
+    @property
+    def uncertainties_history(self):
+        return self.sl_kwargs.get("uncertainties_history", None)
+
+    def copy(self):
+        """
+        Returns a copy
+        """
+        acsl = self.__class__(design_space=self.design_space,)
+        acsl.predictor_kwargs = copy.deepcopy(self.predictor_kwargs)
+        acsl.sl_kwargs = copy.deepcopy(self.sl_kwargs)
+        return acsl
+
+    def iterate(self):
+        """Runs the next iteration of sequential learning.
+
+        This process consists of:
+        - retraining the predictor
+        - predicting candidate properties and calculating candidate scores (if
+        fully explored returns None)
+        - selecting the next batch of candidates for objective evaluation (if
+        fully explored returns None)
+        """
+
+        dstructs = self.design_space.design_space_structures
+        dlabels = self.design_space.design_space_labels
+
+        mask_nans = ~np.isnan(dlabels)
+        masked_structs = [struct for i, struct in enumerate(dstructs) if mask_nans[i]]
+        masked_labels = dlabels[np.where(mask_nans)]
+
+        self.predictor.fit(masked_structs, masked_labels)
+
+        train_idx = np.zeros(len(dlabels), dtype=bool)
+        train_idx[np.where(mask_nans)] = 1
+        self.sl_kwargs.update({"train_idx": train_idx})
+        train_idx_hist = self.sl_kwargs.get("train_idx_history")
+        if train_idx_hist is None:
+            train_idx_hist = []
+        train_idx_hist.append(train_idx)
+        self.sl_kwargs.update({"train_idx_history": train_idx_hist})
+
+        preds, unc = self.predictor.predict(dstructs)
+
+        # update predictions and store in history
+        self.sl_kwargs.update({"predictions": preds})
+        pred_hist = self.sl_kwargs.get("predictions_history")
+        if pred_hist is None:
+            pred_hist = []
+        pred_hist.append(preds)
+        self.sl_kwargs.update({"predictions_history": pred_hist})
+
+        # update uncertainties and store in history
+        self.sl_kwargs.update({"uncertainties": unc})
+        unc_hist = self.sl_kwargs.get("uncertainties_history")
+        if unc_hist is None:
+            unc_hist = []
+        unc_hist.append(unc)
+        self.sl_kwargs.update({"uncertainties_history": unc_hist})
+
+        # make sure haven't fully searched design space
+        if any([np.isnan(label) for label in dlabels]):
+            candidate_idx, _, aq_scores = choose_next_candidate(
+                dstructs,
+                dlabels,
+                train_idx,
+                preds,
+                unc,
+                **self.candidate_selection_kwargs,
+            )
+        # if fully searched, no more candidate structures
+        else:
+            candidate_idx = None
+            aq_scores = None
+        self.sl_kwargs.update({"candidate_indices": candidate_idx})
+        self.sl_kwargs.update({"acquisition_scores": aq_scores})
+
+        # update the candidate index history if new candidate
+        if candidate_idx is not None:
+            cand_idx_hist = self.sl_kwargs.get("candidate_index_history")
+            if cand_idx_hist is None:
+                cand_idx_hist = []
+            cand_idx_hist.append(candidate_idx)
+            self.sl_kwargs.update({"candidate_index_history": cand_idx_hist})
+
+        # update the SL iteration count
+        itc = self.sl_kwargs.get("iteration_count", 0)
+        self.sl_kwargs.update({"iteration_count": itc + 1})
+
+    def to_jsonified_list(self) -> List:
+        """
+        Returns a jsonified list representation
+        """
+        jsonified_list = self.design_space.to_jsonified_list()
+        # append kwargs for predictor
+        jsonified_pred_kwargs = {}
+        for k in self.predictor_kwargs:
+            if k in ["model_class", "featurizer_class"]:
+                mod_string = self.predictor_kwargs[k].__module__
+                class_string = self.predictor_kwargs[k].__name__
+                jsonified_pred_kwargs[k] = [mod_string, class_string]
+            elif k == "featurization_kwargs":
+                jsonified_pred_kwargs[k] = copy.deepcopy(self.predictor_kwargs[k])
+                # assumes design space will always match DesignSpace
+                del jsonified_pred_kwargs[k]["design_space_structures"]
+            else:
+                jsonified_pred_kwargs[k] = self.predictor_kwargs[k]
+        jsonified_list.append(jsonified_pred_kwargs)
+        # append kwargs for candidate selection
+        jsonified_list.append(self.candidate_selection_kwargs)
+        # append the acsl kwargs
+        jsonified_sl_kwargs = {}
+        for k in self.sl_kwargs:
+            if k != "iteration_count" and self.sl_kwargs[k] is not None:
+                jsonified_sl_kwargs[k] = [arr.tolist() for arr in self.sl_kwargs[k]]
+            elif k == "iteration_count":
+                jsonified_sl_kwargs["iteration_count"] = self.sl_kwargs[
+                    "iteration_count"
+                ]
+            elif self.sl_kwargs[k] is None:
+                jsonified_sl_kwargs[k] = None
+        jsonified_list.append(jsonified_sl_kwargs)
+        return jsonified_list
+
+    def write_json_to_disk(self, write_location: str = ".", json_name: str = None):
+        """
+        Writes `SequentialLearner` to disk as a json
+        """
+        jsonified_list = self.to_jsonified_list()
+
+        if json_name is None:
+            json_name = "acsl.json"
+
+        json_path = os.path.join(write_location, json_name)
+
+        with open(json_path, "w") as f:
+            json.dump(jsonified_list, f)
+
+    @staticmethod
+    def from_json(json_name: str):
+        with open(json_name, "r") as f:
+            all_data = json.load(f)
+        structures = []
+        for i in range(len(all_data) - 4):
+            atoms = atoms_decoder(all_data[i])
+            structures.append(atoms)
+        labels = np.array(all_data[-4])
+        acds = DesignSpace(
+            design_space_structures=structures, design_space_labels=labels,
+        )
+        predictor_kwargs = all_data[-3]
+        for k in predictor_kwargs:
+            if k in ["model_class", "featurizer_class"]:
+                mod = importlib.import_module(predictor_kwargs[k][0])
+                predictor_kwargs[k] = getattr(mod, predictor_kwargs[k][1])
+        candidate_selection_kwargs = all_data[-2]
+        raw_sl_kwargs = all_data[-1]
+        sl_kwargs = {}
+        for k in raw_sl_kwargs:
+            if raw_sl_kwargs[k] is not None:
+                if k in [
+                    "predictions",
+                    "uncertainties",
+                    "acquisition_scores",
+                    "candidate_indices",
+                ]:
+                    sl_kwargs[k] = np.array(raw_sl_kwargs[k])
+                elif k in [
+                    "predictions_history",
+                    "uncertainties_history",
+                    "candidate_index_history",
+                ]:
+                    sl_kwargs[k] = [np.array(i) for i in raw_sl_kwargs[k]]
+                elif k == "iteration_count":
+                    sl_kwargs[k] = raw_sl_kwargs[k]
+                elif k == "train_idx":
+                    sl_kwargs[k] = np.array(raw_sl_kwargs[k], dtype=bool)
+                elif k == "train_idx_history":
+                    sl_kwargs[k] = [np.array(i, dtype=bool) for i in raw_sl_kwargs[k]]
+            else:
+                sl_kwargs[k] = None
+
+        return SequentialLearner(
+            design_space=acds,
+            predictor_kwargs=predictor_kwargs,
+            candidate_selection_kwargs=candidate_selection_kwargs,
+            sl_kwargs=sl_kwargs,
+        )
+
+
+def multiple_simulated_sequential_learning_runs(
+    full_design_space: DesignSpace,
+    number_of_runs: int = 5,
+    number_parallel_jobs: int = None,
+    predictor_kwargs: Dict[str, Union[str, float]] = None,
+    candidate_selection_kwargs: Dict[str, Union[str, float]] = None,
+    init_training_size: int = 10,
+    number_of_sl_loops: int = None,
+    write_to_disk: bool = False,
+    write_location: str = ".",
+    json_name_prefix: str = None,
+) -> List[SequentialLearner]:
+    """
+    Conducts multiple simulated sequential learning runs
+
+    Parameters
+    ----------
+
+    full_design_space:
+        Fully labelled DesignSpace to simulate
+        being searched over
+
+    predictor_kwargs:
+        Kwargs to be used in setting up the predictor.
+        This is where model class, model hyperparameters, etc.
+        are specified.
+
+    candidate_selection_kwargs:
+        Kwargs that specify that settings for candidate selection.
+        This is where acquisition function, targets, etc. are
+        specified.
+
+    init_training_size:
+        Size of the initial training set to be selected from
+        the full space.
+        Default: 10
+
+    number_of_sl_loops:
+        Integer specifying the number of sequential learning loops to be conducted.
+        This value cannot be greater than
+        `(DESIGN_SPACE_SIZE - init_training_size)/batch_size_to_add`
+        Default: maximum number of sl loops calculated above
+
+    number_of_runs:
+        Integer of number of runs to be done
+        Default: 5
+
+    number_parallel_jobs:
+        Integer giving the number of cores to be paralellized across
+        using `joblib`
+        Default: None (ie. will run in serial)
+
+    write_to_disk:
+        Boolean specifying whether runs history should be written to disk as jsons.
+        Default: False
+
+    write_location:
+        String with the location where runs history jsons should be written to disk.
+        Default: current directory
+
+    json_name_prefix:
+        Prefix used when writing out each simulated run as a json
+        The naming convention is `{json_name_prefix}_{run #}.json`
+        Default: acsl_run
+
+    Returns
+    -------
+
+    runs_history:
+        List of SequentialLearner objects for each simulated run
+    """
+
+    if number_parallel_jobs is not None:
+        runs_history = Parallel(n_jobs=number_parallel_jobs)(
+            delayed(simulated_sequential_learning)(
+                full_design_space=full_design_space,
+                predictor_kwargs=predictor_kwargs,
+                candidate_selection_kwargs=candidate_selection_kwargs,
+                number_of_sl_loops=number_of_sl_loops,
+                init_training_size=init_training_size,
+            )
+            for i in range(number_of_runs)
+        )
+
+    else:
+        runs_history = [
+            simulated_sequential_learning(
+                full_design_space=full_design_space,
+                predictor_kwargs=predictor_kwargs,
+                candidate_selection_kwargs=candidate_selection_kwargs,
+                number_of_sl_loops=number_of_sl_loops,
+                init_training_size=init_training_size,
+            )
+            for i in range(number_of_runs)
+        ]
+
+    # TODO: separate dictionary representation and writing to disk
+    if write_to_disk:
+        if not os.path.isdir(write_location):
+            os.makedirs(write_location)
+        if json_name_prefix is None:
+            json_name_prefix = "acsl_run"
+        for i, run in enumerate(runs_history):
+            name = json_name_prefix + "_" + str(i) + ".json"
+            run.write_json_to_disk(write_location=write_location, json_name=name)
+        print(f"SL histories written to {write_location}")
+
+    return runs_history
+
+
+def simulated_sequential_learning(
+    full_design_space: DesignSpace,
+    predictor_kwargs: Dict[str, Union[str, float]] = None,
+    candidate_selection_kwargs: Dict[str, Union[str, float]] = None,
+    init_training_size: int = 10,
+    number_of_sl_loops: int = None,
+    write_to_disk: bool = False,
+    write_location: str = ".",
+    json_name: str = None,
+) -> SequentialLearner:
+    """
+    Conducts a simulated sequential learning loop for a
+    fully labelled design space to explore.
+
+    Parameters
+    ----------
+
+    full_design_space:
+        Fully labelled DesignSpace to simulate
+        being searched over
+
+    predictor_kwargs:
+        Kwargs to be used in setting up the predictor.
+        This is where model class, model hyperparameters, etc.
+        are specified.
+
+    candidate_selection_kwargs:
+        Kwargs that specify that settings for candidate selection.
+        This is where acquisition function, targets, etc. are
+        specified.
+
+    init_training_size:
+        Size of the initial training set to be selected from
+        the full space.
+        Default: 10
+
+    number_of_sl_loops:
+        Integer specifying the number of sequential learning loops to be conducted.
+        This value cannot be greater than
+        `(DESIGN_SPACE_SIZE - init_training_size)/batch_size_to_add`
+        Default: maximum number of sl loops calculated above
+
+    write_to_disk:
+        Boolean specifying whether the resulting sequential learner should be
+        written to disk as a json.
+        Defaults to False.
+
+    write_location:
+        String with the location where the resulting sequential learner
+        should be written to disk.
+        Defaults to current directory.
+
+    Returns
+    -------
+
+    sl:
+        Sequential Learner after having been iterated as specified
+        by the input settings. Contains candidate, prediction,
+        and uncertainty histories for further analysis as desired.
+    """
+
+    ds_size = len(full_design_space)
+
+    # check fully explored
+    if True in np.isnan(full_design_space.design_space_labels):
+        missing_label_idx = np.where(np.isnan(full_design_space.design_space_labels))[0]
+        msg = (
+            f"Design space must be fully explored."
+            f" Missing labels at indices: {missing_label_idx}"
+        )
+        raise SequentialLearnerError(msg)
+
+    # check that specified initial training size makes sense
+    if init_training_size > ds_size:
+        msg = f"Initial training size ({init_training_size})\
+             larger than design space ({ds_size})"
+        raise SequentialLearnerError(msg)
+
+    batch_size_to_add = candidate_selection_kwargs.get("num_candidates_to_pick", 1)
+    max_num_sl_loops = int(np.ceil((ds_size - init_training_size) / batch_size_to_add))
+
+    if number_of_sl_loops is None:
+        number_of_sl_loops = max_num_sl_loops
+
+    # check that specified number of loops is feasible
+    if number_of_sl_loops > max_num_sl_loops:
+        msg = (
+            f"Number of SL loops ({number_of_sl_loops}) cannot be greater than"
+            f" ({max_num_sl_loops})"
+        )
+        raise SequentialLearnerError(msg)
+
+    # generate initial training set
+    init_idx = np.zeros(ds_size, dtype=bool)
+    init_idx[np.random.choice(ds_size, init_training_size, replace=False)] = 1
+
+    init_structs = [
+        full_design_space.design_space_structures[idx]
+        for idx, b in enumerate(init_idx)
+        if b
+    ]
+    init_labels = full_design_space.design_space_labels.copy()
+    init_labels = init_labels[np.where(init_idx)]
+
+    # set up learner that is used for iteration
+    dummy_labels = np.empty(len(full_design_space))
+    dummy_labels[:] = np.nan
+    ds = DesignSpace(full_design_space.design_space_structures, dummy_labels)
+    ds.update(init_structs, init_labels)
+    sl = SequentialLearner(
+        design_space=ds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+    )
+    # first iteration on initial dataset
+    sl.iterate()
+
+    # start simulated sequential learning loop
+    for i in range(number_of_sl_loops):
+        print(f"Sequential Learning Iteration #{i+1}")
+        if sl.candidate_indices is not None:
+            next_structs = sl.candidate_structures
+            next_labels = full_design_space.design_space_labels.take(
+                sl.candidate_indices
+            )
+            sl.design_space.update(next_structs, next_labels)
+            sl.iterate()
+
+    if write_to_disk:
+        sl.write_json_to_disk(write_location=write_location, json_name=json_name)
+        print(f"SL dictionary written to {write_location}")
+
+    return sl
+
+
+def choose_next_candidate(
+    structures: List[Atoms] = None,
+    labels: Array = None,
+    train_idx: Array = None,
+    pred: Array = None,
+    unc: Array = None,
+    aq: str = "MLI",
+    num_candidates_to_pick: int = None,
+    target_min: float = None,
+    target_max: float = None,
+    include_hhi: bool = False,
+    hhi_type: str = "production",
+    include_seg_ener: bool = False,
+):
+    """
+    Chooses the next candidate(s) from a given acquisition function
+
+    Parameters
+    ----------
+
+    structures:
+        List of `Atoms` objects to be used for HHI weighting if desired
+
+    labels:
+        Array of the labels for the data
+
+    train_idx:
+        Indices of all data entries already in the training set
+        Default: consider entire training set
+
+    pred:
+        Predictions for all structures in the dataset
+
+    unc:
+        Uncertainties for all structures in the dataset
+
+    aq:
+        Acquisition function to be used to select the next candidates
+        Options
+        - MLI: maximum likelihood of improvement (default)
+        - Random
+        - MU: maximum uncertainty
+
+    num_candidates_to_pick:
+        Number of candidates to choose from the dataset
+
+    target_min:
+        Minimum target value to optimize for
+
+    target_max:
+        Maximum target value to optimize for
+
+    include_hhi:
+        Whether HHI scores should be used to weight aq scores
+
+    hhi_type:
+        Type of HHI index to be used for weighting
+        Options
+        - production (default)
+        - reserves
+
+    include_seg_ener:
+        Whether segregation energies should be used to weight aq scores
+
+    Returns
+    -------
+
+    parent_idx:
+        Index/indices of the selected candidates
+
+    max_scores:
+        Maximum scores (corresponding to the selected candidates for given `aq`)
+
+    aq_scores:
+        Calculated scores based on the selected `aq` for the entire training set
+    """
+    hhi_scores = None
+    if include_hhi:
+        if structures is None:
+            msg = "Structures must be provided to include HHI scores"
+            raise SequentialLearnerError(msg)
+        hhi_scores = calculate_hhi_scores(structures, hhi_type)
+
+    segreg_energy_scores = None
+    if include_seg_ener:
+        if structures is None:
+            msg = "Structures must be provided to include segregation energy scores"
+            raise SequentialLearnerError(msg)
+        segreg_energy_scores = calculate_segregation_energy_scores(structures)
+
+    if aq == "Random":
+        if labels is None:
+            msg = "For aq = 'Random', the labels must be supplied"
+            raise SequentialLearnerError(msg)
+
+        if train_idx is None:
+            train_idx = np.zeros(len(labels), dtype=bool)
+
+        if hhi_scores is None:
+            hhi_scores = np.ones(len(train_idx))
+
+        if segreg_energy_scores is None:
+            segreg_energy_scores = np.ones(len(train_idx))
+
+        aq_scores = (
+            np.random.choice(len(labels), size=len(labels), replace=False)
+            * hhi_scores
+            * segreg_energy_scores
+        )
+
+    elif aq == "MU":
+        if unc is None:
+            msg = "For aq = 'MU', the uncertainties must be supplied"
+            raise SequentialLearnerError(msg)
+
+        if train_idx is None:
+            train_idx = np.zeros(len(unc), dtype=bool)
+
+        if hhi_scores is None:
+            hhi_scores = np.ones(len(train_idx))
+
+        if segreg_energy_scores is None:
+            segreg_energy_scores = np.ones(len(train_idx))
+
+        aq_scores = unc.copy() * hhi_scores * segreg_energy_scores
+
+    elif aq == "MLI":
+        if unc is None or pred is None:
+            msg = "For aq = 'MLI', both uncertainties and predictions must be supplied"
+            raise SequentialLearnerError(msg)
+
+        if train_idx is None:
+            train_idx = np.zeros(len(unc), dtype=bool)
+
+        if hhi_scores is None:
+            hhi_scores = np.ones(len(train_idx))
+
+        if segreg_energy_scores is None:
+            segreg_energy_scores = np.ones(len(train_idx))
+
+        aq_scores = (
+            np.array(
+                [
+                    get_overlap_score(mean, std, x2=target_max, x1=target_min)
+                    for mean, std in zip(pred, unc)
+                ]
+            )
+            * hhi_scores
+            * segreg_energy_scores
+        )
+
+    else:
+        msg = f"Acquisition function {aq} is not supported"
+        raise NotImplementedError(msg)
+
+    if num_candidates_to_pick is None:
+        next_idx = np.array([np.argmax(aq_scores[~train_idx])])
+        max_scores = [np.max(aq_scores[~train_idx])]
+
+    else:
+        next_idx = np.argsort(aq_scores[~train_idx])[-num_candidates_to_pick:]
+        sorted_array = aq_scores[~train_idx][next_idx]
+        max_scores = list(sorted_array[-num_candidates_to_pick:])
+    parent_idx = np.arange(aq_scores.shape[0])[~train_idx][next_idx]
+
+    return parent_idx, max_scores, aq_scores
+
+
+def get_overlap_score(mean: float, std: float, x2: float = None, x1: float = None):
+    """Calculate overlap score given targets x2 (max) and x1 (min)"""
+    if x1 is None and x2 is None:
+        msg = "Please specify at least either a minimum or maximum target for MLI"
+        raise SequentialLearnerError(msg)
+
+    if x1 is None:
+        x1 = -np.inf
+
+    if x2 is None:
+        x2 = np.inf
+
+    norm_dist = stats.norm(loc=mean, scale=std)
+    return norm_dist.cdf(x2) - norm_dist.cdf(x1)
+
+
+def calculate_hhi_scores(structures: List[Atoms], hhi_type: str = "production"):
+    """
+    Calculates HHI scores for structures weighted by their composition.
+    The scores are normalized and inverted such that these should
+    be maximized in the interest of finding a low cost system
+
+    Parameters
+    ----------
+
+    structures:
+        List of Atoms objects for which to calculate the scores
+
+    hhi_type:
+        Type of HHI index to be used for the score
+        Options
+        - production (default)
+        - reserves
+
+    Returns
+    -------
+
+    hhi_scores:
+        Scores corresponding to each of the provided structures
+
+    """
+    if structures is None:
+        msg = "To include HHI, the structures must be provided"
+        raise SequentialLearnerError(msg)
+
+    raw_hhi_data = HHI
+    max_hhi = np.max([raw_hhi_data[hhi_type][r] for r in raw_hhi_data[hhi_type]])
+    min_hhi = np.min([raw_hhi_data[hhi_type][r] for r in raw_hhi_data[hhi_type]])
+    # normalize and invert (so that this score is to be maximized)
+    norm_hhi_data = {
+        el: 1.0 - (raw_hhi_data[hhi_type][el] - min_hhi) / (max_hhi - min_hhi)
+        for el in raw_hhi_data[hhi_type]
+    }
+
+    hhi_scores = np.zeros(len(structures))
+    for idx, struct in enumerate(structures):
+        hhi = 0
+        el_counts = struct.symbols.formula.count()
+        tot_size = len(struct)
+        # weight calculated hhi score by composition
+        for el in el_counts:
+            hhi += norm_hhi_data[el] * el_counts[el] / tot_size
+        hhi_scores[idx] = hhi
+    return hhi_scores
+
+
+def calculate_segregation_energy_scores(
+    structures: List[Atoms], data_source: str = "raban1999"
+):
+    """
+    Calculates HHI scores for structures weighted by their composition.
+    The scores are normalized and inverted such that these should
+    be maximized in the interest of finding a low cost system
+
+    Parameters
+    ----------
+
+    structures:
+        List of Atoms objects for which to calculate the scores
+
+    data_source:
+        Which tabulated data should the segregation energies be pulled from.
+        Options:
+        - "raban1999": A.V. Raban, et. al. Phys. Rev. B 59, 15990
+        - "rao2020": K. K. Rao, et. al. Topics in Catalysis volume 63, pages728-741 (2020)
+
+    Returns
+    -------
+
+    hhi_scores:
+        Scores corresponding to each of the provided structures
+
+    """
+    if structures is None:
+        msg = "To include segregation energies, the structures must be provided"
+        raise SequentialLearnerError(msg)
+
+    if data_source == "raban1999":
+        # won't consider surface energies (ie. dop == host) for normalization
+        max_seg_ener = SEGREGATION_ENERGIES["raban1999"]["Pd"]["W"]
+        min_seg_ener = SEGREGATION_ENERGIES["raban1999"]["Fe_100"]["Ag"]
+        # normalize and invert (so that this score is to be maximized)
+        norm_seg_ener_data = {}
+        for hsp in SEGREGATION_ENERGIES["raban1999"]:
+            norm_seg_ener_data[hsp] = {}
+            for dsp in SEGREGATION_ENERGIES["raban1999"][hsp]:
+                norm_seg_ener_data[hsp][dsp] = 1.0 - (
+                    SEGREGATION_ENERGIES["raban1999"][hsp][dsp] - min_seg_ener
+                ) / (max_seg_ener - min_seg_ener)
+    elif data_source == "rao2020":
+        norm_seg_ener_data = SEGREGATION_ENERGIES["rao2020"]
+    else:
+        msg = f"Unknown data source {data_source}"
+        raise SequentialLearnerError(msg)
+
+    seg_ener_scores = np.zeros(len(structures))
+    for idx, struct in enumerate(structures):
+        el_counts = struct.symbols.formula.count()
+        assert len(el_counts) == 2
+        for el in el_counts:
+            if el_counts[el] == 1:
+                dsp = el
+            else:
+                hsp = el
+        seg_ener_scores[idx] = norm_seg_ener_data[hsp][dsp]
+    return seg_ener_scores
diff --git a/src/autocat/saa.py b/src/autocat/saa.py
index 9aa6350f..2b539821 100644
--- a/src/autocat/saa.py
+++ b/src/autocat/saa.py
@@ -9,7 +9,7 @@
 from ase.data import atomic_numbers
 from ase.data import ground_state_magnetic_moments
 from pymatgen.io.ase import AseAtomsAdaptor
-from pymatgen.analysis.adsorption import AdsorbateSiteFinder
+from pymatgen.analysis.structure_matcher import StructureMatcher
 from autocat.surface import generate_surface_structures
 
 
@@ -34,6 +34,19 @@ def _find_dopant_index(structure, dopant_element):
     return dopant_index[0][0]
 
 
+def _find_all_surface_atom_indices(structure, tol: float = 0.5) -> List[int]:
+    """Helper function to find all surface atom indices
+    within a tolerance distance of the highest atom"""
+    all_heights = structure.positions[:, 2]
+    highest_atom_idx = np.argmax(all_heights)
+    height_of_highest_atom = structure[highest_atom_idx].z
+    surface_atom_indices = []
+    for idx, atom in enumerate(structure):
+        if height_of_highest_atom - atom.z < tol:
+            surface_atom_indices.append(idx)
+    return surface_atom_indices
+
+
 def generate_saa_structures(
     host_species: List[str],
     dopant_species: List[str],
@@ -310,33 +323,34 @@ def substitute_single_atom_on_surface(
         substitution functionality is folded into a more general form.
 
     """
-    tags = host_structure.get_tags()
-    constraints = host_structure.constraints
-    host_magmoms = host_structure.get_initial_magnetic_moments()
 
-    # convert ase substrate to pymatgen structure
-    converter = AseAtomsAdaptor()
-    pmg_structure = converter.get_structure(host_structure)
+    all_surface_indices = _find_all_surface_atom_indices(host_structure)
 
-    # find all symmetrically unique site to substitute on
-    finder = AdsorbateSiteFinder(pmg_structure)
+    ase_all_doped_structures = []
+    for idx in all_surface_indices:
+        dop_struct = host_structure.copy()
+        dop_struct[idx].symbol = dopant_element
+        dop_struct[idx].magmom = dopant_magnetic_moment
+        ase_all_doped_structures.append(dop_struct)
 
-    # collect all substitution structures and convert them back into ase.Atoms
-    pmg_substituted_structures = finder.generate_substitution_structures(dopant_element)
-    if len(pmg_substituted_structures) > 1:
+    # convert ase substrate to pymatgen structure
+    converter = AseAtomsAdaptor()
+    pmg_doped_structures = [
+        converter.get_structure(struct) for struct in ase_all_doped_structures
+    ]
+
+    # check that only one unique surface doped structure
+    matcher = StructureMatcher()
+    pmg_symm_equiv_doped_structure = [
+        s[0] for s in matcher.group_structures(pmg_doped_structures)
+    ]
+    if len(pmg_symm_equiv_doped_structure) > 1:
         msg = "Multiple symmetrically unique sites to dope found."
         raise NotImplementedError(msg)
 
-    ase_substituted_structure = converter.get_atoms(pmg_substituted_structures[0])
-    ase_substituted_structure.set_tags(tags)
-    # ensure pbc in xy only
-    ase_substituted_structure.pbc = (1, 1, 0)
-    # propagate constraints and host magnetization
-    ase_substituted_structure.constraints = constraints
-    ase_substituted_structure.set_initial_magnetic_moments(host_magmoms)
-    # set initial magnetic moment for the dopant atom
-    dopant_idx = _find_dopant_index(ase_substituted_structure, dopant_element)
-    ase_substituted_structure[dopant_idx].magmom = dopant_magnetic_moment
+    # assumes only a single unique doped structure
+    ase_substituted_structure = ase_all_doped_structures[0]
+
     # center the single-atom dopant
     if place_dopant_at_center:
         cent_x = (
diff --git a/src/autocat/utils.py b/src/autocat/utils.py
index fc05b721..6b53f4a8 100644
--- a/src/autocat/utils.py
+++ b/src/autocat/utils.py
@@ -1,5 +1,6 @@
 import os
 from contextlib import contextmanager
+from ase import Atoms
 
 
 @contextmanager
@@ -10,3 +11,13 @@ def change_working_dir(new_dir: str):
         yield
     finally:
         os.chdir(current_dir)
+
+
+def flatten_structures_dict(autocat_dict: dict):
+    structure_list = []
+    for element in autocat_dict:
+        if isinstance(autocat_dict[element], dict):
+            structure_list.extend(flatten_structures_dict(autocat_dict[element]))
+        elif isinstance(autocat_dict[element], Atoms):
+            structure_list.append(autocat_dict[element])
+    return structure_list
diff --git a/tests/learning/__init__.py b/tests/learning/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/learning/test_featurizers.py b/tests/learning/test_featurizers.py
new file mode 100644
index 00000000..a23df132
--- /dev/null
+++ b/tests/learning/test_featurizers.py
@@ -0,0 +1,348 @@
+"""Unit tests for the `autocat.learning.featurizersi` module."""
+
+import numpy as np
+
+from dscribe.descriptors import SineMatrix
+from dscribe.descriptors import CoulombMatrix
+from dscribe.descriptors import ACSF
+from dscribe.descriptors import SOAP
+
+from matminer.featurizers.composition import ElementProperty
+from matminer.featurizers.site import ChemicalSRO
+from matminer.featurizers.site import OPSiteFingerprint
+from matminer.featurizers.site import CrystalNNFingerprint
+
+from autocat.adsorption import generate_adsorbed_structures
+from autocat.surface import generate_surface_structures
+from autocat.saa import generate_saa_structures
+from autocat.learning.featurizers import Featurizer
+from autocat.utils import flatten_structures_dict
+
+from pymatgen.io.ase import AseAtomsAdaptor
+from pymatgen.analysis.local_env import VoronoiNN
+
+
+def test_eq_featurizer():
+    # test comparing featurizers
+
+    f = Featurizer(
+        SOAP,
+        max_size=5,
+        species_list=["Fe", "O", "H"],
+        kwargs={"rcut": 12, "nmax": 8, "lmax": 8},
+    )
+    f1 = Featurizer(
+        SOAP,
+        max_size=5,
+        species_list=["Fe", "O", "H"],
+        kwargs={"rcut": 12, "nmax": 8, "lmax": 8},
+    )
+    assert f == f1
+
+    f1.kwargs.update({"rcut": 13})
+    assert f != f1
+
+    surfs = flatten_structures_dict(generate_surface_structures(["Fe", "V"]))
+    surfs.extend(
+        flatten_structures_dict(
+            generate_surface_structures(["Au", "Ag"], supercell_dim=(1, 1, 5))
+        )
+    )
+    f = Featurizer(SineMatrix, design_space_structures=surfs,)
+
+    f1 = Featurizer(SineMatrix, species_list=["Fe", "V", "Au", "Ag"], max_size=36)
+    assert f == f1
+
+
+def test_featurizer_species_list():
+    # test default species list
+    f = Featurizer(SineMatrix)
+    assert f.species_list == ["Fe", "Ni", "Pt", "Pd", "Cu", "C", "N", "O", "H"]
+
+    # test updating species list manually and sorting
+    f.species_list = ["Li", "Na", "K"]
+    assert f.species_list == ["K", "Na", "Li"]
+
+    # test getting species list from design space structures
+    surfs = flatten_structures_dict(generate_surface_structures(["Fe", "V", "Ti"]))
+    saas = flatten_structures_dict(generate_saa_structures(["Cu", "Au"], ["Fe", "Pt"]))
+    surfs.extend(saas)
+    f.design_space_structures = surfs
+    assert f.species_list == ["Ti", "V", "Fe", "Pt", "Au", "Cu"]
+
+
+def test_featurizer_max_size():
+    # test default max size
+    f = Featurizer(SOAP, kwargs={"rcut": 12, "nmax": 8, "lmax": 8})
+    assert f.max_size == 100
+
+    # test updating max size manually
+    f.max_size = 50
+    assert f.max_size == 50
+
+    # test getting max size from design space structures
+    surfs = flatten_structures_dict(
+        generate_surface_structures(["Ru"], supercell_dim=(2, 2, 4))
+    )
+    surfs.extend(
+        flatten_structures_dict(
+            generate_surface_structures(["Fe"], supercell_dim=(4, 4, 4))
+        )
+    )
+    f.design_space_structures = surfs
+    assert f.max_size == 64
+
+
+def test_featurizer_design_space_structures():
+    # tests giving design space structures
+    surfs = flatten_structures_dict(generate_surface_structures(["Li", "Na"]))
+    surfs.extend(
+        flatten_structures_dict(
+            generate_surface_structures(["Cu", "Ni"], supercell_dim=(1, 1, 5))
+        )
+    )
+    f = Featurizer(
+        SineMatrix, design_space_structures=surfs, max_size=20, species_list=["H"]
+    )
+    assert f.design_space_structures == surfs
+    # make sure design space is prioritized over max size and species list
+    assert f.max_size == 36
+    assert f.species_list == ["Na", "Li", "Ni", "Cu"]
+
+
+def test_featurizer_featurizer_kwargs():
+    # test specifying kwargs
+    f = Featurizer(CoulombMatrix, kwargs={"flatten": False})
+    assert f.kwargs == {"flatten": False}
+    assert f.featurization_object.flatten == False
+
+    # test updating kwargs
+    f.kwargs.update({"sparse": True})
+    assert f.featurization_object.sparse == True
+
+    # test rm kwargs when updating class
+    f.featurizer_class = SineMatrix
+    assert f.kwargs is None
+
+
+def test_featurizer_featurizer_class():
+    # test changing featurizer class
+    f = Featurizer(SOAP, kwargs={"rcut": 12, "nmax": 8, "lmax": 8})
+    assert f.featurizer_class == SOAP
+    assert isinstance(f.featurization_object, SOAP)
+    f.featurizer_class = SineMatrix
+    assert f.featurizer_class == SineMatrix
+    assert isinstance(f.featurization_object, SineMatrix)
+
+
+def test_featurizer_preset():
+    # tests specifying preset for class object
+    f = Featurizer(ElementProperty, preset="magpie")
+    assert f.preset == "magpie"
+    assert "Electronegativity" in f.featurization_object.features
+    assert not "melting_point" in f.featurization_object.features
+
+    f.preset = "matminer"
+    assert f.preset == "matminer"
+    assert not "NdUnfilled" in f.featurization_object.features
+    assert "coefficient_of_linear_thermal_expansion" in f.featurization_object.features
+
+
+def test_featurizer_featurize_single():
+    # tests featurizing single structure at a time
+
+    conv = AseAtomsAdaptor()
+
+    # TEST STRUCTURE FEATURIZERS
+
+    # test ElementProperty
+    saa = flatten_structures_dict(generate_saa_structures(["Cu"], ["Pt"]))[0]
+    f = Featurizer(ElementProperty, preset="magpie", max_size=len(saa))
+    acf = f.featurize_single(saa)
+    ep = ElementProperty.from_preset("magpie")
+    pymat = conv.get_structure(saa)
+    manual_elem_prop = ep.featurize(pymat.composition)
+    assert np.array_equal(acf, manual_elem_prop)
+
+    # test SineMatrix
+    f.featurizer_class = SineMatrix
+    acf = f.featurize_single(saa)
+    sm = SineMatrix(n_atoms_max=len(saa), permutation="none")
+    manual_sm = sm.create(saa).reshape(-1,)
+    assert np.array_equal(acf, manual_sm)
+
+    # test CoulombMatrix
+    f.featurizer_class = CoulombMatrix
+    acf = f.featurize_single(saa)
+    cm = CoulombMatrix(n_atoms_max=len(saa), permutation="none")
+    manual_cm = cm.create(saa).reshape(-1,)
+    assert np.array_equal(acf, manual_cm)
+
+    # TEST SITE FEATURIZERS
+    ads_struct = flatten_structures_dict(
+        generate_adsorbed_structures(
+            surface=saa,
+            adsorbates=["OH"],
+            adsorption_sites={"custom": [(0.0, 0.0)]},
+            use_all_sites=False,
+        )
+    )[0]
+    f.max_size = len(ads_struct)
+    species = np.unique(ads_struct.get_chemical_symbols()).tolist()
+    f.species_list = species
+
+    # test ACSF
+    f.featurizer_class = ACSF
+    f.kwargs = {"rcut": 6.0}
+    acf = f.featurize_single(ads_struct)
+    acsf = ACSF(rcut=6.0, species=species)
+    manual_acsf = acsf.create(ads_struct, [36, 37])
+    assert np.array_equal(acf, manual_acsf)
+
+    # test SOAP
+    f.featurizer_class = SOAP
+    f.kwargs = {"rcut": 6.0, "lmax": 6, "nmax": 6}
+    acf = f.featurize_single(ads_struct)
+    soap = SOAP(rcut=6.0, species=species, nmax=6, lmax=6)
+    manual_soap = soap.create(ads_struct, [36, 37])
+    assert np.array_equal(acf, manual_soap)
+
+    # test ChemicalSRO
+    f.featurizer_class = ChemicalSRO
+    vnn = VoronoiNN(cutoff=10.0, allow_pathological=True)
+    f.kwargs = {"nn": vnn, "includes": species}
+    acf = f.featurize_single(ads_struct)
+    csro = ChemicalSRO(vnn, includes=species)
+    pym_struct = conv.get_structure(ads_struct)
+    csro.fit([[pym_struct, 36], [pym_struct, 37]])
+    manual_csro = csro.featurize(pym_struct, -2)
+    manual_csro = np.concatenate((manual_csro, csro.featurize(pym_struct, -1)))
+    assert np.array_equal(acf, manual_csro)
+
+    # test OPSiteFingerprint
+    f.featurizer_class = OPSiteFingerprint
+    acf = f.featurize_single(ads_struct)
+    pym_struct = conv.get_structure(ads_struct)
+    opsf = OPSiteFingerprint()
+    manual_opsf = opsf.featurize(pym_struct, -2)
+    manual_opsf = np.concatenate((manual_opsf, opsf.featurize(pym_struct, -1)))
+    assert np.array_equal(acf, manual_opsf)
+
+    # test CrystalNNFingerprint
+    f.featurizer_class = CrystalNNFingerprint
+    f.preset = "cn"
+    acf = f.featurize_single(ads_struct)
+    pym_struct = conv.get_structure(ads_struct)
+    cnn = CrystalNNFingerprint.from_preset("cn")
+    manual_cnn = cnn.featurize(pym_struct, -2)
+    manual_cnn = np.concatenate((manual_cnn, cnn.featurize(pym_struct, -1)))
+    assert np.array_equal(acf, manual_cnn)
+
+
+def test_featurizer_featurize_multiple():
+    # tests featurizing multiple structures at a time
+
+    # TEST STRUCTURE FEATURIZER
+
+    # test ElementProperty
+    saas = flatten_structures_dict(
+        generate_saa_structures(
+            ["Au", "Cu"], ["Pd", "Pt"], facets={"Au": ["111"], "Cu": ["111"]}
+        )
+    )
+    f = Featurizer(ElementProperty, preset="magpie", design_space_structures=saas)
+    acf = f.featurize_multiple(saas)
+    manual_mat = []
+    for i in range(len(saas)):
+        manual_mat.append(f.featurize_single(saas[i]))
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test SineMatrix
+    f.featurizer_class = SineMatrix
+    acf = f.featurize_multiple(saas)
+    manual_mat = []
+    for i in range(len(saas)):
+        manual_mat.append(f.featurize_single(saas[i]))
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test CoulombMatrix
+    f.featurizer_class = CoulombMatrix
+    acf = f.featurize_multiple(saas)
+    manual_mat = []
+    for i in range(len(saas)):
+        manual_mat.append(f.featurize_single(saas[i]))
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # TEST SITE FEATURIZER
+    ads_structs = []
+    for struct in saas:
+        ads_structs.append(
+            flatten_structures_dict(
+                generate_adsorbed_structures(
+                    surface=struct,
+                    adsorbates=["NNH"],
+                    adsorption_sites={"custom": [(0.0, 0.0)]},
+                    use_all_sites=False,
+                )
+            )[0]
+        )
+    species_list = []
+    for s in ads_structs:
+        # get all unique species
+        found_species = np.unique(s.get_chemical_symbols()).tolist()
+        new_species = [spec for spec in found_species if spec not in species_list]
+        species_list.extend(new_species)
+
+    # test SOAP
+    f.featurizer_class = SOAP
+    f.design_space_structures = ads_structs
+    f.kwargs = {"rcut": 6.0, "lmax": 6, "nmax": 6}
+    acf = f.featurize_multiple(ads_structs)
+    manual_mat = []
+    for i in range(len(ads_structs)):
+        manual_mat.append(f.featurize_single(ads_structs[i]).flatten())
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test ACSF
+    f.featurizer_class = ACSF
+    f.kwargs = {"rcut": 6.0}
+    acf = f.featurize_multiple(ads_structs)
+    manual_mat = []
+    for i in range(len(ads_structs)):
+        manual_mat.append(f.featurize_single(ads_structs[i]).flatten())
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test ChemicalSRO
+    f.featurizer_class = ChemicalSRO
+    vnn = VoronoiNN(cutoff=10.0, allow_pathological=True)
+    f.kwargs = {"nn": vnn, "includes": species_list}
+    acf = f.featurize_multiple(ads_structs)
+    manual_mat = []
+    for i in range(len(ads_structs)):
+        manual_mat.append(f.featurize_single(ads_structs[i]).flatten())
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test OPSiteFingerprint
+    f.featurizer_class = OPSiteFingerprint
+    acf = f.featurize_multiple(ads_structs)
+    manual_mat = []
+    for i in range(len(ads_structs)):
+        manual_mat.append(f.featurize_single(ads_structs[i]).flatten())
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
+
+    # test CrystalNNFingerprint
+    f.featurizer_class = CrystalNNFingerprint
+    f.preset = "cn"
+    acf = f.featurize_multiple(ads_structs)
+    manual_mat = []
+    for i in range(len(ads_structs)):
+        manual_mat.append(f.featurize_single(ads_structs[i]).flatten())
+    manual_mat = np.array(manual_mat)
+    assert np.array_equal(acf, manual_mat)
diff --git a/tests/learning/test_predictors.py b/tests/learning/test_predictors.py
new file mode 100644
index 00000000..471738dd
--- /dev/null
+++ b/tests/learning/test_predictors.py
@@ -0,0 +1,229 @@
+"""Unit tests for the `autocat.learning.predictors` module"""
+
+import pytest
+import numpy as np
+
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.utils.validation import check_is_fitted
+from sklearn.exceptions import NotFittedError
+
+from dscribe.descriptors import SineMatrix
+from dscribe.descriptors import SOAP
+
+from matminer.featurizers.composition import ElementProperty
+
+from ase import Atoms
+
+from autocat.adsorption import generate_adsorbed_structures, place_adsorbate
+from autocat.surface import generate_surface_structures
+from autocat.learning.predictors import Predictor
+from autocat.learning.predictors import PredictorError
+from autocat.utils import flatten_structures_dict
+
+
+def test_fit():
+    # Test returns a fit model
+    subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"]))
+    structs = []
+    for sub in subs:
+        ads_struct = flatten_structures_dict(
+            generate_adsorbed_structures(
+                surface=sub,
+                adsorbates=["OH"],
+                adsorption_sites={"origin": [(0.0, 0.0)]},
+                use_all_sites=False,
+            )
+        )[0]
+        structs.append(ads_struct)
+    labels = np.random.rand(len(structs))
+    acsc = Predictor(
+        featurizer_class=SOAP,
+        featurization_kwargs={
+            "species_list": ["Pt", "Fe", "Ru", "O", "H"],
+            "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6},
+        },
+        model_class=GaussianProcessRegressor,
+    )
+    acsc.fit(
+        training_structures=structs, y=labels,
+    )
+    assert acsc.is_fit
+    assert check_is_fitted(acsc.regressor) is None
+
+    # check no longer fit after changing featurization kwargs
+    acsc.featurization_kwargs = {
+        "species_list": ["Pt", "Fe", "Ru", "O", "H", "N"],
+        "kwargs": {"rcut": 7.0, "nmax": 8, "lmax": 8},
+    }
+    assert not acsc.is_fit
+    with pytest.raises(NotFittedError):
+        check_is_fitted(acsc.regressor)
+
+    acsc.fit(
+        training_structures=structs, y=labels,
+    )
+
+    # check no longer fit after changing featurization class
+    acsc.featurizer_class = SineMatrix
+    assert not acsc.is_fit
+    with pytest.raises(NotFittedError):
+        check_is_fitted(acsc.regressor)
+
+    acsc.fit(
+        training_structures=structs, y=labels,
+    )
+
+    # check no longer fit after changing model class
+    acsc.model_class = KernelRidge
+    assert not acsc.is_fit
+    with pytest.raises(NotFittedError):
+        check_is_fitted(acsc.regressor)
+
+    acsc.fit(
+        training_structures=structs, y=labels,
+    )
+
+    # check no longer fit after changing model kwargs
+    kernel = RBF()
+    acsc.model_kwargs = {"kernel": kernel}
+    assert not acsc.is_fit
+    with pytest.raises(NotFittedError):
+        check_is_fitted(acsc.regressor)
+
+
+def test_predict():
+    # Test outputs are returned as expected
+    subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"]))
+    structs = []
+    for sub in subs:
+        ads_struct = flatten_structures_dict(
+            generate_adsorbed_structures(
+                surface=sub,
+                adsorbates=["OH"],
+                adsorption_sites={"origin": [(0.0, 0.0)]},
+                use_all_sites=False,
+            )
+        )[0]
+        structs.append(ads_struct)
+    labels = np.random.rand(len(structs))
+    acsc = Predictor(
+        featurizer_class=SOAP,
+        featurization_kwargs={
+            "species_list": ["Pt", "Fe", "Ru", "O", "H"],
+            "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6},
+        },
+        model_class=GaussianProcessRegressor,
+    )
+    acsc.fit(
+        training_structures=structs[:-3], y=labels[:-3],
+    )
+    pred, unc = acsc.predict([structs[-3]],)
+    assert len(pred) == 1
+    # check dimension of uncertainty estimates
+    assert len(unc) == 1
+
+    pred, unc = acsc.predict(structs[-3:],)
+    assert len(pred) == 3
+    # check dimension of uncertainty estimates
+    assert len(unc) == 3
+
+    # Test prediction on model without uncertainty
+    acsc.model_class = KernelRidge
+    acsc.fit(
+        training_structures=structs[:-3], y=labels[:-3],
+    )
+    pred, unc = acsc.predict([structs[-2]],)
+    assert len(pred) == 1
+    assert unc is None
+
+
+def test_score():
+    # Tests the score method
+    subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"]))
+    structs = []
+    for sub in subs:
+        ads_struct = flatten_structures_dict(
+            generate_adsorbed_structures(
+                surface=sub,
+                adsorbates=["OH"],
+                adsorption_sites={"origin": [(0.0, 0.0)]},
+                use_all_sites=False,
+            )
+        )[0]
+        structs.append(ads_struct)
+    labels = np.random.rand(len(structs))
+    acsc = Predictor(
+        featurizer_class=SOAP,
+        featurization_kwargs={
+            "species_list": ["Pt", "Fe", "Ru", "O", "H"],
+            "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6},
+        },
+        model_class=GaussianProcessRegressor,
+    )
+    acsc.fit(
+        training_structures=structs[:-3], y=labels[:-3],
+    )
+    mae = acsc.score(structs[-3:], labels[-3:])
+    assert isinstance(mae, float)
+    mse = acsc.score(structs[-2:], labels[-2:], metric="mse")
+    assert isinstance(mse, float)
+
+    # Test returning predictions
+    _, preds, uncs = acsc.score(structs[-2:], labels[-2:], return_predictions=True)
+    assert len(preds) == 2
+    assert len(uncs) == 2
+    # check catches unknown metric
+    with pytest.raises(PredictorError):
+        acsc.score(structs, labels, metric="msd")
+
+
+def test_class_and_kwargs_logic():
+    # Tests providing regression model class and kwargs
+    featurization_kwargs = {
+        "species_list": ["Pt", "Fe", "Ru", "O", "H"],
+        "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6, "sparse": True},
+    }
+    acsc = Predictor(
+        model_class=KernelRidge,
+        model_kwargs={"gamma": 0.5},
+        featurizer_class=SOAP,
+        featurization_kwargs=featurization_kwargs,
+    )
+    assert isinstance(acsc.regressor, KernelRidge)
+    # check that regressor created with correct kwargs
+    assert acsc.regressor.gamma == 0.5
+    assert acsc.model_kwargs == {"gamma": 0.5}
+    assert acsc.featurization_kwargs == featurization_kwargs
+    assert acsc.featurizer.featurization_object.sparse
+
+    # check that model kwargs are removed when model class is changed
+    acsc.model_class = GaussianProcessRegressor
+    assert acsc.model_kwargs is None
+    assert acsc.featurizer_class == SOAP
+    assert acsc.featurization_kwargs == featurization_kwargs
+
+    # check that regressor is updated when model kwargs updated
+    acsc.model_kwargs = {"alpha": 5e-10}
+    assert acsc.regressor.alpha == 5e-10
+
+    # check that featurization kwargs removed when featurization class changed
+    acsc.featurizer_class = ElementProperty
+    assert acsc.featurization_kwargs is None
+
+    # check that featurizer is updated when featurization kwargs updated
+    acsc.featurization_kwargs = {"preset": "magpie"}
+    assert "Electronegativity" in acsc.featurizer.featurization_object.features
+
+    acsc.featurization_kwargs = {"preset": "matminer"}
+    assert (
+        "coefficient_of_linear_thermal_expansion"
+        in acsc.featurizer.featurization_object.features
+    )
+
+    acsc.featurizer_class = SineMatrix
+    acsc.featurization_kwargs = {"kwargs": {"flatten": False}}
+    assert not acsc.featurizer.featurization_object.flatten
+    acsc.featurization_kwargs = {"kwargs": {"flatten": True}}
+    assert acsc.featurizer.featurization_object.flatten
diff --git a/tests/learning/test_sequential.py b/tests/learning/test_sequential.py
new file mode 100644
index 00000000..1f1386c8
--- /dev/null
+++ b/tests/learning/test_sequential.py
@@ -0,0 +1,1180 @@
+"""Unit tests for the `autocat.learning.sequential` module"""
+
+import os
+import pytest
+import numpy as np
+import json
+
+import tempfile
+
+from sklearn.gaussian_process import GaussianProcessRegressor
+
+from dscribe.descriptors import SOAP
+from dscribe.descriptors import SineMatrix
+from matminer.featurizers.composition import ElementProperty
+
+from scipy import stats
+from ase.io.jsonio import decode as ase_decoder
+from ase import Atoms
+from autocat.data.hhi import HHI
+from autocat.data.segregation_energies import SEGREGATION_ENERGIES
+from autocat.learning.predictors import Predictor
+from autocat.learning.sequential import (
+    DesignSpace,
+    DesignSpaceError,
+    SequentialLearnerError,
+    SequentialLearner,
+    calculate_segregation_energy_scores,
+    choose_next_candidate,
+    get_overlap_score,
+)
+from autocat.learning.sequential import simulated_sequential_learning
+from autocat.learning.sequential import multiple_simulated_sequential_learning_runs
+from autocat.learning.sequential import calculate_hhi_scores
+from autocat.surface import generate_surface_structures
+from autocat.adsorption import place_adsorbate
+from autocat.saa import generate_saa_structures
+from autocat.utils import flatten_structures_dict
+
+
+def test_sequential_learner_from_json():
+    # Tests generation of an SequentialLearner from a json
+    sub1 = generate_surface_structures(["Au"], facets={"Au": ["110"]})["Au"]["fcc110"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("C"))
+    sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("Mg"))
+    sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][
+        "hcp0001"
+    ]["structure"]
+    sub3 = place_adsorbate(sub3, Atoms("N"))
+    structs = [sub1, sub2, sub3]
+    labels = np.array([0.1, np.nan, 0.3])
+    acds = DesignSpace(structs, labels)
+    featurization_kwargs = {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}}
+    predictor_kwargs = {
+        "model_class": GaussianProcessRegressor,
+        "featurizer_class": SOAP,
+        "featurization_kwargs": featurization_kwargs,
+    }
+
+    candidate_selection_kwargs = {"aq": "Random", "num_candidates_to_pick": 3}
+    acsl = SequentialLearner(
+        acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+    )
+    acsl.iterate()
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json")
+        json_path = os.path.join(_tmp_dir, "testing_acsl.json")
+        written_acsl = SequentialLearner.from_json(json_path)
+        assert np.array_equal(
+            written_acsl.design_space.design_space_labels,
+            acds.design_space_labels,
+            equal_nan=True,
+        )
+        assert (
+            written_acsl.design_space.design_space_structures
+            == acds.design_space_structures
+        )
+        predictor_kwargs["featurization_kwargs"][
+            "design_space_structures"
+        ] = acds.design_space_structures
+        assert written_acsl.predictor_kwargs == predictor_kwargs
+        assert written_acsl.candidate_selection_kwargs == candidate_selection_kwargs
+        assert written_acsl.iteration_count == 1
+        assert np.array_equal(written_acsl.train_idx, acsl.train_idx)
+        assert written_acsl.train_idx[0] in [True, False]
+        assert np.array_equal(written_acsl.train_idx_history, acsl.train_idx_history)
+        assert written_acsl.train_idx_history[0][0] in [True, False]
+        assert np.array_equal(written_acsl.predictions, acsl.predictions)
+        assert np.array_equal(
+            written_acsl.predictions_history, acsl.predictions_history
+        )
+        assert np.array_equal(written_acsl.uncertainties, acsl.uncertainties)
+        assert np.array_equal(
+            written_acsl.uncertainties_history, acsl.uncertainties_history
+        )
+        assert np.array_equal(written_acsl.candidate_indices, acsl.candidate_indices)
+        assert np.array_equal(
+            written_acsl.candidate_index_history, acsl.candidate_index_history
+        )
+        assert np.array_equal(written_acsl.acquisition_scores, acsl.acquisition_scores)
+
+
+def test_sequential_learner_write_json():
+    # Tests writing a SequentialLearner to disk as a json
+    sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["110"]})["Ag"]["fcc110"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("B"))
+    sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("Al"))
+    sub3 = generate_surface_structures(["Ti"], facets={"Ti": ["0001"]})["Ti"][
+        "hcp0001"
+    ]["structure"]
+    sub3 = place_adsorbate(sub3, Atoms("H"))
+    structs = [sub1, sub2, sub3]
+    labels = np.array([0.1, 0.2, np.nan])
+    featurization_kwargs = {"preset": "magpie"}
+    predictor_kwargs = {
+        "model_class": GaussianProcessRegressor,
+        "featurizer_class": ElementProperty,
+        "featurization_kwargs": featurization_kwargs,
+    }
+
+    candidate_selection_kwargs = {"aq": "MU", "num_candidates_to_pick": 2}
+    acds = DesignSpace(structs, labels)
+    acsl = SequentialLearner(
+        acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+    )
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json")
+        with open(os.path.join(_tmp_dir, "testing_acsl.json"), "r") as f:
+            sl = json.load(f)
+        written_structs = [ase_decoder(sl[i]) for i in range(3)]
+        assert structs == written_structs
+        assert np.array_equal(labels, sl[3], equal_nan=True)
+        # check predictor kwargs kept
+        predictor_kwargs["model_class"] = [
+            "sklearn.gaussian_process._gpr",
+            "GaussianProcessRegressor",
+        ]
+        predictor_kwargs["featurizer_class"] = [
+            "matminer.featurizers.composition.composite",
+            "ElementProperty",
+        ]
+        del predictor_kwargs["featurization_kwargs"]["design_space_structures"]
+        assert sl[4] == predictor_kwargs
+        # check candidate selection kwargs kept
+        assert sl[-2] == candidate_selection_kwargs
+        assert sl[-1] == {
+            "iteration_count": 0,
+            "train_idx": None,
+            "train_idx_history": None,
+            "predictions": None,
+            "predictions_history": None,
+            "uncertainties": None,
+            "uncertainties_history": None,
+            "candidate_indices": None,
+            "candidate_index_history": None,
+            "acquisition_scores": None,
+        }
+
+    # test after iteration
+    acsl.iterate()
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json")
+        with open(os.path.join(_tmp_dir, "testing_acsl.json"), "r") as f:
+            sl = json.load(f)
+        written_structs = [ase_decoder(sl[i]) for i in range(3)]
+        assert structs == written_structs
+        assert np.array_equal(labels, sl[3], equal_nan=True)
+        # check predictor kwargs kept
+        predictor_kwargs["model_class"] = [
+            "sklearn.gaussian_process._gpr",
+            "GaussianProcessRegressor",
+        ]
+        predictor_kwargs["featurizer_class"] = [
+            "matminer.featurizers.composition.composite",
+            "ElementProperty",
+        ]
+        assert sl[4] == predictor_kwargs
+        # check candidate selection kwargs kept
+        assert sl[-2] == candidate_selection_kwargs
+        assert sl[-1].get("iteration_count") == 1
+        assert sl[-1].get("train_idx") == acsl.train_idx.tolist()
+        assert sl[-1].get("train_idx_history") == [
+            ti.tolist() for ti in acsl.train_idx_history
+        ]
+        assert isinstance(sl[-1].get("train_idx_history")[0][0], bool)
+        assert sl[-1].get("predictions") == acsl.predictions.tolist()
+        assert sl[-1].get("predictions_history") == [
+            p.tolist() for p in acsl.predictions_history
+        ]
+        assert sl[-1].get("uncertainties") == acsl.uncertainties.tolist()
+        assert sl[-1].get("uncertainties_history") == [
+            u.tolist() for u in acsl.uncertainties_history
+        ]
+        assert sl[-1].get("candidate_indices") == acsl.candidate_indices.tolist()
+        assert sl[-1].get("candidate_index_history") == [
+            c.tolist() for c in acsl.candidate_index_history
+        ]
+        assert sl[-1].get("acquisition_scores") == acsl.acquisition_scores.tolist()
+        assert sl[-1].get("acquisition_scores") is not None
+
+
+def test_sequential_learner_to_jsonified_list():
+    # Tests writing a SequentialLearner to disk as a json
+    sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["110"]})["Ag"]["fcc110"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("B"))
+    sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("Al"))
+    sub3 = generate_surface_structures(["Ti"], facets={"Ti": ["0001"]})["Ti"][
+        "hcp0001"
+    ]["structure"]
+    sub3 = place_adsorbate(sub3, Atoms("H"))
+    structs = [sub1, sub2, sub3]
+    labels = np.array([0.1, 0.2, np.nan])
+    featurization_kwargs = {"preset": "magpie"}
+    predictor_kwargs = {
+        "model_class": GaussianProcessRegressor,
+        "featurizer_class": ElementProperty,
+        "featurization_kwargs": featurization_kwargs,
+    }
+
+    candidate_selection_kwargs = {"aq": "MU", "num_candidates_to_pick": 2}
+    acds = DesignSpace(structs, labels)
+    acsl = SequentialLearner(
+        acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+    )
+    jsonified_list = acsl.to_jsonified_list()
+    json_structs = [ase_decoder(jsonified_list[i]) for i in range(3)]
+    assert structs == json_structs
+    assert np.array_equal(labels, jsonified_list[3], equal_nan=True)
+    # check predictor kwargs kept
+    predictor_kwargs["model_class"] = [
+        "sklearn.gaussian_process._gpr",
+        "GaussianProcessRegressor",
+    ]
+    predictor_kwargs["featurizer_class"] = [
+        "matminer.featurizers.composition.composite",
+        "ElementProperty",
+    ]
+    del predictor_kwargs["featurization_kwargs"]["design_space_structures"]
+    assert jsonified_list[4] == predictor_kwargs
+    # check candidate selection kwargs kept
+    assert jsonified_list[-2] == candidate_selection_kwargs
+    assert jsonified_list[-1] == {
+        "iteration_count": 0,
+        "train_idx": None,
+        "train_idx_history": None,
+        "predictions": None,
+        "predictions_history": None,
+        "uncertainties": None,
+        "uncertainties_history": None,
+        "candidate_indices": None,
+        "candidate_index_history": None,
+        "acquisition_scores": None,
+    }
+
+    # test after iteration
+    acsl.iterate()
+    jsonified_list = acsl.to_jsonified_list()
+    json_structs = [ase_decoder(jsonified_list[i]) for i in range(3)]
+    assert structs == json_structs
+    assert np.array_equal(labels, jsonified_list[3], equal_nan=True)
+    # check predictor kwargs kept
+    predictor_kwargs["model_class"] = [
+        "sklearn.gaussian_process._gpr",
+        "GaussianProcessRegressor",
+    ]
+    predictor_kwargs["featurizer_class"] = [
+        "matminer.featurizers.composition.composite",
+        "ElementProperty",
+    ]
+    assert jsonified_list[4] == predictor_kwargs
+    # check candidate selection kwargs kept
+    assert jsonified_list[-2] == candidate_selection_kwargs
+    assert jsonified_list[-1].get("iteration_count") == 1
+    assert jsonified_list[-1].get("train_idx") == acsl.train_idx.tolist()
+    assert jsonified_list[-1].get("train_idx_history") == [
+        ti.tolist() for ti in acsl.train_idx_history
+    ]
+    assert isinstance(jsonified_list[-1].get("train_idx_history")[0][0], bool)
+    assert jsonified_list[-1].get("predictions") == acsl.predictions.tolist()
+    assert jsonified_list[-1].get("predictions_history") == [
+        p.tolist() for p in acsl.predictions_history
+    ]
+    assert jsonified_list[-1].get("uncertainties") == acsl.uncertainties.tolist()
+    assert jsonified_list[-1].get("uncertainties_history") == [
+        u.tolist() for u in acsl.uncertainties_history
+    ]
+    assert (
+        jsonified_list[-1].get("candidate_indices") == acsl.candidate_indices.tolist()
+    )
+    assert jsonified_list[-1].get("candidate_index_history") == [
+        c.tolist() for c in acsl.candidate_index_history
+    ]
+    assert (
+        jsonified_list[-1].get("acquisition_scores") == acsl.acquisition_scores.tolist()
+    )
+    assert jsonified_list[-1].get("acquisition_scores") is not None
+
+
+def test_sequential_learner_iterate():
+    # Tests iterate method
+    sub1 = generate_surface_structures(["Ca"], facets={"Ca": ["111"]})["Ca"]["fcc111"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("Na"))
+    sub2 = generate_surface_structures(["Nb"], facets={"Nb": ["110"]})["Nb"]["bcc110"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("K"))
+    sub3 = generate_surface_structures(["Ta"], facets={"Ta": ["110"]})["Ta"]["bcc110"][
+        "structure"
+    ]
+    sub3 = place_adsorbate(sub3, Atoms("H"))
+    sub4 = generate_surface_structures(["Sr"], facets={"Sr": ["110"]})["Sr"]["fcc110"][
+        "structure"
+    ]
+    sub4 = place_adsorbate(sub4, Atoms("Fe"))
+    structs = [sub1, sub2, sub3, sub4]
+    labels = np.array([11.0, 25.0, np.nan, np.nan])
+    acds = DesignSpace(structs, labels)
+    acsl = SequentialLearner(acds, predictor_kwargs={"featurizer_class": SineMatrix})
+
+    assert acsl.iteration_count == 0
+
+    acsl.iterate()
+    assert acsl.iteration_count == 1
+    assert acsl.predictions is not None
+    assert len(acsl.predictions_history) == 1
+    assert len(acsl.predictions_history[0]) == len(acds)
+    assert acsl.uncertainties is not None
+    assert len(acsl.uncertainties_history) == 1
+    assert len(acsl.uncertainties_history[0]) == len(acds)
+    assert acsl.candidate_indices is not None
+    assert acsl.candidate_index_history is not None
+    assert acsl.candidate_index_history == [acsl.candidate_indices]
+    assert len(acsl.train_idx_history) == 1
+    assert np.count_nonzero(acsl.train_idx_history[-1]) == 2
+
+    cand_ind1 = acsl.candidate_indices[0]
+    acsl.design_space.update([structs[cand_ind1]], np.array([13.0]))
+
+    acsl.iterate()
+    assert acsl.iteration_count == 2
+
+    # checks being iterated a second time to fully explore the design space
+    cand_ind2 = acsl.candidate_indices[0]
+    assert cand_ind1 != cand_ind2
+    assert acsl.candidate_index_history == [[cand_ind1], [cand_ind2]]
+    assert len(acsl.uncertainties_history) == 2
+    assert len(acsl.predictions_history) == 2
+    assert len(acsl.train_idx_history) == 2
+    assert np.count_nonzero(acsl.train_idx_history[-1]) == 3
+
+    acsl.design_space.update([structs[cand_ind2]], np.array([17.0]))
+    acsl.iterate()
+
+    assert acsl.iteration_count == 3
+    assert acsl.candidate_structures is None
+    assert acsl.candidate_indices is None
+    assert acsl.candidate_index_history == [[cand_ind1], [cand_ind2]]
+    assert len(acsl.uncertainties_history) == 3
+    assert len(acsl.predictions_history) == 3
+    assert len(acsl.train_idx_history) == 3
+    assert np.count_nonzero(acsl.train_idx_history[-1]) == 4
+
+
+def test_sequential_learner_setup():
+    # Tests setting up an SL object
+    sub1 = generate_surface_structures(["Ir"], facets={"Ir": ["100"]})["Ir"]["fcc100"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("S"))
+    sub2 = generate_surface_structures(["Mo"], facets={"Mo": ["110"]})["Mo"]["bcc110"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("H"))
+    sub3 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][
+        "structure"
+    ]
+    sub3 = place_adsorbate(sub3, Atoms("O"))
+    sub4 = generate_surface_structures(["Re"], facets={"Re": ["0001"]})["Re"][
+        "hcp0001"
+    ]["structure"]
+    sub4 = place_adsorbate(sub4, Atoms("N"))
+    structs = [sub1, sub2, sub3, sub4]
+    labels = np.array([4.0, np.nan, 6.0, np.nan])
+    acds = DesignSpace(structs, labels)
+    acsl = SequentialLearner(acds, predictor_kwargs={"featurizer_class": SineMatrix})
+
+    assert acsl.design_space.design_space_structures == acds.design_space_structures
+    assert np.array_equal(
+        acsl.design_space.design_space_labels, acds.design_space_labels, equal_nan=True
+    )
+    assert acsl.iteration_count == 0
+    assert acsl.predictions == None
+    assert acsl.candidate_indices == None
+    assert acsl.candidate_selection_kwargs == {"aq": "Random"}
+    # test specifying more kwargs
+    predictor_kwargs = {
+        "featurizer_class": SOAP,
+        "model_kwargs": {"n_restarts_optimizer": 9},
+        "featurization_kwargs": {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}},
+    }
+    acsl = SequentialLearner(
+        acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs={"aq": "MU", "num_candidates_to_pick": 2},
+    )
+    # test passing predictor kwargs
+    assert acsl.predictor_kwargs == predictor_kwargs
+    assert isinstance(acsl.predictor.featurizer.featurization_object, SOAP)
+    assert acsl.predictor.featurization_kwargs["kwargs"] == {
+        "rcut": 5.0,
+        "lmax": 6,
+        "nmax": 6,
+    }
+
+    # test passing candidate selection kwargs
+    assert acsl.candidate_selection_kwargs == {"aq": "MU", "num_candidates_to_pick": 2}
+
+
+def test_design_space_setup():
+    # test setting up an DesignSpace
+    sub1 = generate_surface_structures(
+        ["Pt"], supercell_dim=[2, 2, 5], facets={"Pt": ["100"]}
+    )["Pt"]["fcc100"]["structure"]
+    sub1 = place_adsorbate(sub1, Atoms("H"))
+    sub2 = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"]["bcc110"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("F"))
+    structs = [sub1, sub2]
+    labels = np.array([3.0, 7.0])
+    acds = DesignSpace(structs, labels)
+    assert acds.design_space_structures == [sub1, sub2]
+    assert acds.design_space_structures is not structs
+    assert np.array_equal(acds.design_space_labels, labels)
+    assert acds.design_space_labels is not labels
+    assert len(acds) == 2
+    # test different number of structures and labels
+    with pytest.raises(DesignSpaceError):
+        acds = DesignSpace([sub1], labels)
+
+
+def test_delitem_design_space():
+    # tests deleting items from the design space
+    sub0 = generate_surface_structures(["Pd"], facets={"Pd": ["100"]})["Pd"]["fcc100"][
+        "structure"
+    ]
+    sub0 = place_adsorbate(sub0, Atoms("O"))
+    sub1 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("H"))
+    sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("S"))
+    sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][
+        "hcp0001"
+    ]["structure"]
+    sub3 = place_adsorbate(sub3, Atoms("P"))
+    structs = [sub0, sub1, sub2]
+    labels = np.array([-2.5, np.nan, 600.0])
+    # test deleting by single idx
+    acds = DesignSpace(structs, labels)
+    del acds[1]
+    assert len(acds) == 2
+    assert np.array_equal(acds.design_space_labels, np.array([-2.5, 600.0]))
+    assert acds.design_space_structures == [sub0, sub2]
+    # test deleting using a mask
+    acds = DesignSpace(structs, labels)
+    mask = np.zeros(len(acds), bool)
+    mask[0] = 1
+    mask[1] = 1
+    # n.b. deletes wherever mask is True
+    del acds[mask]
+    assert len(acds) == 1
+    assert acds.design_space_structures == [sub2]
+    assert np.array_equal(acds.design_space_labels, np.array([600.0]))
+    # test deleting by providing list of idx
+    structs = [sub0, sub1, sub2, sub3]
+    labels = np.array([-20, 8, np.nan, 0.3])
+    acds = DesignSpace(structs, labels)
+    del acds[[1, 3]]
+    assert len(acds) == 2
+    assert np.array_equal(
+        acds.design_space_labels, np.array([-20, np.nan]), equal_nan=True
+    )
+    assert acds.design_space_structures == [sub0, sub2]
+    # test deleting by providing list with a single idx
+    acds = DesignSpace(structs, labels)
+    del acds[[0]]
+    assert len(acds) == 3
+    assert np.array_equal(
+        acds._design_space_labels, np.array([8, np.nan, 0.3]), equal_nan=True
+    )
+    assert acds.design_space_structures == [sub1, sub2, sub3]
+
+
+def test_eq_design_space():
+    # test comparing design spaces
+    sub0 = generate_surface_structures(["Pd"], facets={"Pd": ["100"]})["Pd"]["fcc100"][
+        "structure"
+    ]
+    sub0 = place_adsorbate(sub0, Atoms("O"))
+    sub1 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][
+        "structure"
+    ]
+    sub1 = place_adsorbate(sub1, Atoms("H"))
+    sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][
+        "structure"
+    ]
+    sub2 = place_adsorbate(sub2, Atoms("S"))
+    sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][
+        "hcp0001"
+    ]["structure"]
+    sub3 = place_adsorbate(sub3, Atoms("P"))
+    structs = [sub0, sub1, sub2]
+    labels = np.array([-2.5, np.nan, 600.0])
+
+    # test trivial case
+    acds = DesignSpace(structs, labels)
+    acds0 = DesignSpace(structs, labels)
+    assert acds == acds0
+
+    # test comparing when different length
+    acds1 = DesignSpace(structs[:-1], labels[:-1])
+    assert acds != acds1
+
+    # test same structures, different labels
+    acds2 = DesignSpace(structs, labels)
+    acds2.update([structs[1]], labels=np.array([0.2]))
+    assert acds != acds2
+
+    # test diff structures, same labels
+    structs[0][0].symbol = "Ni"
+    acds3 = DesignSpace(structs, labels)
+    assert acds != acds3
+
+
+def test_updating_design_space():
+    sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["100"]})["Ag"]["fcc100"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Li"], facets={"Li": ["110"]})["Li"]["bcc110"][
+        "structure"
+    ]
+    sub3 = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"]["bcc110"][
+        "structure"
+    ]
+    sub4 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][
+        "hcp0001"
+    ]["structure"]
+    structs = [sub1, sub2, sub3]
+    labels = np.array([4.0, 5.0, 6.0])
+    acds = DesignSpace(structs, labels)
+
+    # Test trying to update just structures
+    with pytest.raises(DesignSpaceError):
+        acds.design_space_structures = [sub4]
+
+    # Test trying to update just labels
+    with pytest.raises(DesignSpaceError):
+        acds.design_space_structures = np.array([4.0])
+
+    # Test updating label already in DS and extending
+    acds.update([sub1, sub4], np.array([10.0, 20.0]))
+    assert np.isclose(acds.design_space_labels[0], 10.0)
+    assert sub4 in acds.design_space_structures
+    assert np.isclose(acds.design_space_labels[-1], 20.0)
+
+    # Test trying to give structures that are not Atoms objects
+    with pytest.raises(AssertionError):
+        acds.update([sub1, np.array(20.0)], np.array([3.0, 4.0]))
+
+
+def test_write_design_space_as_json():
+    # Tests writing out the DesignSpace to disk
+    sub1 = generate_surface_structures(["Pd"], facets={"Pd": ["111"]})["Pd"]["fcc111"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][
+        "structure"
+    ]
+    structs = [sub1, sub2]
+    labels = np.array([0.3, 0.8])
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,)
+        acds.write_json_to_disk(write_location=_tmp_dir)
+        # loads back written json
+        with open(os.path.join(_tmp_dir, "acds.json"), "r") as f:
+            ds = json.load(f)
+        written_structs = [ase_decoder(ds[i]) for i in range(2)]
+        assert structs == written_structs
+        assert np.array_equal(labels, ds[-1])
+
+
+def test_design_space_to_jsonified_list():
+    # Tests returning the DesignSpace as a jsonified list
+    sub1 = generate_surface_structures(["Pd"], facets={"Pd": ["111"]})["Pd"]["fcc111"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][
+        "structure"
+    ]
+    structs = [sub1, sub2]
+    labels = np.array([0.3, 0.8])
+    acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,)
+    jsonified_list = acds.to_jsonified_list()
+    json_structs = [ase_decoder(jsonified_list[i]) for i in range(2)]
+    assert structs == json_structs
+    assert np.array_equal(labels, jsonified_list[-1])
+
+
+def test_get_design_space_from_json():
+    # Tests generating DesignSpace from a json
+    sub1 = generate_surface_structures(["Au"], facets={"Au": ["100"]})["Au"]["fcc100"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][
+        "structure"
+    ]
+    sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][
+        "hcp0001"
+    ]["structure"]
+    structs = [sub1, sub2, sub3]
+    labels = np.array([30.0, 900.0, np.nan])
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,)
+        acds.write_json_to_disk("testing.json", write_location=_tmp_dir)
+
+        tmp_json_dir = os.path.join(_tmp_dir, "testing.json")
+        acds_from_json = DesignSpace.from_json(tmp_json_dir)
+        assert acds_from_json.design_space_structures == structs
+        assert np.array_equal(
+            acds_from_json.design_space_labels, labels, equal_nan=True
+        )
+
+
+def test_simulated_sequential_histories():
+    # Test output sl has appropriate histories
+    sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("O"))
+    base_struct2 = place_adsorbate(sub2, Atoms("N"))
+    base_struct3 = place_adsorbate(sub2, Atoms("H"))
+    ds_structs = [
+        base_struct1,
+        base_struct2,
+        base_struct3,
+        sub1,
+        sub2,
+    ]
+    ds_labels = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs = {
+        "target_min": 0.9,
+        "target_max": 2.1,
+        "aq": "MLI",
+        "num_candidates_to_pick": 2,
+    }
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    sl = simulated_sequential_learning(
+        full_design_space=acds,
+        init_training_size=1,
+        number_of_sl_loops=2,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        predictor_kwargs=predictor_kwargs,
+    )
+
+    # Test number of sl loops
+    assert sl.iteration_count == 3
+
+    # Test initial training size
+    assert sl.train_idx_history[0].sum() == 1
+
+    # Test keeping track of pred and unc history
+    assert len(sl.uncertainties_history) == 3
+    assert len(sl.uncertainties_history[0]) == len(acds)
+    assert len(sl.predictions_history) == 3
+    assert len(sl.predictions_history[-1]) == len(acds)
+    assert len(sl.candidate_index_history) == 2
+
+
+def test_simulated_sequential_batch_added():
+    # Tests adding N candidates on each loop
+    sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("O"))
+    base_struct2 = place_adsorbate(sub2, Atoms("N"))
+    candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"}
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    num_loops = 2
+    ds_structs = [base_struct1, base_struct2, sub1, sub2]
+    ds_labels = np.array([5.0, 6.0, 7.0, 8.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    sl = simulated_sequential_learning(
+        full_design_space=acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        number_of_sl_loops=num_loops,
+        init_training_size=1,
+    )
+    # should add 2 candidates on first loop
+    assert len(sl.candidate_index_history[0]) == 2
+    # since only 1 left, should add it on the next
+    assert len(sl.candidate_index_history[1]) == 1
+
+
+def test_simulated_sequential_num_loops():
+    # Tests the number of loops
+    sub1 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("H"))
+    base_struct2 = place_adsorbate(sub2, Atoms("N"))
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    candidate_selection_kwargs = {"num_candidates_to_pick": 3, "aq": "Random"}
+    ds_structs = [base_struct1, base_struct2, sub1, sub2]
+    ds_labels = np.array([5.0, 6.0, 7.0, 8.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    # Test default number of loops
+    sl = simulated_sequential_learning(
+        full_design_space=acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        init_training_size=1,
+    )
+    assert len(sl.predictions_history) == 2
+    assert sl.iteration_count == 2
+
+    # Test catches maximum number of loops
+    with pytest.raises(SequentialLearnerError):
+        sl = simulated_sequential_learning(
+            full_design_space=acds,
+            predictor_kwargs=predictor_kwargs,
+            candidate_selection_kwargs=candidate_selection_kwargs,
+            init_training_size=1,
+            number_of_sl_loops=3,
+        )
+
+    # Test with default num loops and default num candidates
+    ds_structs = [base_struct1, base_struct2, sub2]
+    ds_labels = np.array([5.0, 6.0, 7.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs.update({"num_candidates_to_pick": 1})
+
+    sl = simulated_sequential_learning(
+        full_design_space=acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        init_training_size=1,
+    )
+    assert len(sl.uncertainties_history) == 3
+    assert sl.iteration_count == 3
+
+
+def test_simulated_sequential_write_to_disk():
+    # Test writing out sl dict
+    with tempfile.TemporaryDirectory() as _tmp_dir:
+        sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"][
+            "fcc111"
+        ]["structure"]
+        sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"][
+            "fcc100"
+        ]["structure"]
+        base_struct1 = place_adsorbate(sub1, Atoms("O"))
+        base_struct2 = place_adsorbate(sub2, Atoms("S"))
+        base_struct3 = place_adsorbate(sub2, Atoms("N"))
+        predictor_kwargs = {"featurizer_class": SineMatrix}
+        candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"}
+        ds_structs = [base_struct1, base_struct2, base_struct3]
+        ds_labels = np.array([0, 1, 2])
+        acds = DesignSpace(ds_structs, ds_labels)
+        sl = simulated_sequential_learning(
+            full_design_space=acds,
+            init_training_size=2,
+            number_of_sl_loops=1,
+            predictor_kwargs=predictor_kwargs,
+            candidate_selection_kwargs=candidate_selection_kwargs,
+            write_to_disk=True,
+            write_location=_tmp_dir,
+        )
+        # check data written as json
+        json_path = os.path.join(_tmp_dir, "acsl.json")
+        sl_written = SequentialLearner.from_json(json_path)
+        assert sl.iteration_count == sl_written.iteration_count
+        assert np.array_equal(sl.predictions_history, sl_written.predictions_history)
+        assert np.array_equal(
+            sl.uncertainties_history, sl_written.uncertainties_history
+        )
+        assert np.array_equal(
+            sl.candidate_index_history, sl_written.candidate_index_history
+        )
+        assert np.array_equal(sl.candidate_indices, sl_written.candidate_indices)
+        assert np.array_equal(sl.predictions, sl_written.predictions)
+        assert np.array_equal(sl.uncertainties, sl_written.uncertainties)
+        assert np.array_equal(sl.predictor_kwargs, sl_written.predictor_kwargs)
+        assert sl.candidate_selection_kwargs == sl_written.candidate_selection_kwargs
+        assert (
+            sl.design_space.design_space_structures
+            == sl_written.design_space.design_space_structures
+        )
+        assert np.array_equal(
+            sl.design_space.design_space_labels,
+            sl_written.design_space.design_space_labels,
+        )
+
+
+def test_simulated_sequential_learning_fully_explored():
+    # Checks that catches if ds not fully explored
+    sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][
+        "structure"
+    ]
+    sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("OH"))
+    base_struct2 = place_adsorbate(sub2, Atoms("NH"))
+    predictor_kwargs = {"structure_featurizer": "elemental_property"}
+    ds_structs = [base_struct1, base_struct2, sub2]
+    ds_labels = np.array([0.0, np.nan, 4.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs = {"aq": "MU"}
+    with pytest.raises(SequentialLearnerError):
+        sl = simulated_sequential_learning(
+            full_design_space=acds,
+            init_training_size=1,
+            number_of_sl_loops=2,
+            predictor_kwargs=predictor_kwargs,
+            candidate_selection_kwargs=candidate_selection_kwargs,
+        )
+
+
+def test_multiple_sequential_learning_serial():
+    # Tests serial implementation
+    sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("O"))
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    ds_structs = [base_struct1, sub1]
+    ds_labels = np.array([0.0, 0.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs = {"aq": "MU"}
+    runs_history = multiple_simulated_sequential_learning_runs(
+        full_design_space=acds,
+        number_of_runs=3,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        number_of_sl_loops=1,
+        init_training_size=1,
+    )
+    assert len(runs_history) == 3
+    assert isinstance(runs_history[0], SequentialLearner)
+    assert len(runs_history[1].predictions_history) == 2
+
+
+def test_multiple_sequential_learning_parallel():
+    # Tests parallel implementation
+    sub1 = generate_surface_structures(["Cu"], facets={"Cu": ["111"]})["Cu"]["fcc111"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("Li"))
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    ds_structs = [base_struct1, sub1]
+    ds_labels = np.array([0.0, 0.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs = {"aq": "Random"}
+    runs_history = multiple_simulated_sequential_learning_runs(
+        full_design_space=acds,
+        number_of_runs=3,
+        number_parallel_jobs=2,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        number_of_sl_loops=1,
+        init_training_size=1,
+    )
+    assert len(runs_history) == 3
+    assert isinstance(runs_history[2], SequentialLearner)
+    assert len(runs_history[1].uncertainties_history) == 2
+
+
+def test_multiple_sequential_learning_write_to_disk():
+    # Tests writing run history to disk
+    _tmp_dir = tempfile.TemporaryDirectory().name
+    sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][
+        "structure"
+    ]
+    base_struct1 = place_adsorbate(sub1, Atoms("N"))
+    predictor_kwargs = {"featurizer_class": SineMatrix}
+    ds_structs = [base_struct1, sub1]
+    ds_labels = np.array([0.0, 0.0])
+    acds = DesignSpace(ds_structs, ds_labels)
+    candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"}
+    runs_history = multiple_simulated_sequential_learning_runs(
+        full_design_space=acds,
+        predictor_kwargs=predictor_kwargs,
+        candidate_selection_kwargs=candidate_selection_kwargs,
+        number_of_runs=3,
+        number_parallel_jobs=2,
+        init_training_size=1,
+        number_of_sl_loops=1,
+        write_to_disk=True,
+        write_location=_tmp_dir,
+        json_name_prefix="test_multi",
+    )
+
+    # check data history in each run
+    for i in range(3):
+        written_run = SequentialLearner.from_json(
+            os.path.join(_tmp_dir, f"test_multi_{i}.json")
+        )
+        written_ds = written_run.design_space
+        assert written_ds.design_space_structures == ds_structs
+        assert np.array_equal(written_ds.design_space_labels, ds_labels)
+        assert written_run.iteration_count == runs_history[i].iteration_count
+        assert np.array_equal(written_run.predictions, runs_history[i].predictions)
+        assert np.array_equal(
+            written_run.predictions_history, runs_history[i].predictions_history
+        )
+        assert np.array_equal(written_run.uncertainties, runs_history[i].uncertainties)
+        assert np.array_equal(
+            written_run.uncertainties_history, runs_history[i].uncertainties_history
+        )
+        assert np.array_equal(
+            written_run.train_idx_history, runs_history[i].train_idx_history
+        )
+        assert np.array_equal(written_run.train_idx, runs_history[i].train_idx)
+        assert np.array_equal(
+            written_run.candidate_indices, runs_history[i].candidate_indices
+        )
+        assert np.array_equal(
+            written_run.candidate_index_history, runs_history[i].candidate_index_history
+        )
+        assert written_run.predictor_kwargs == runs_history[i].predictor_kwargs
+        assert (
+            written_run.candidate_selection_kwargs
+            == runs_history[i].candidate_selection_kwargs
+        )
+
+
+def test_choose_next_candidate_input_minimums():
+    # Tests that appropriately catches minimum necessary inputs
+    labels = np.random.rand(5)
+    train_idx = np.zeros(5, dtype=bool)
+    train_idx[np.random.choice(5, size=2, replace=False)] = 1
+    unc = np.random.rand(5)
+    pred = np.random.rand(5)
+
+    with pytest.raises(SequentialLearnerError):
+        choose_next_candidate()
+
+    with pytest.raises(SequentialLearnerError):
+        choose_next_candidate(unc=unc, pred=pred, num_candidates_to_pick=2, aq="Random")
+
+    with pytest.raises(SequentialLearnerError):
+        choose_next_candidate(
+            labels=labels, pred=pred, num_candidates_to_pick=2, aq="MU"
+        )
+
+    with pytest.raises(SequentialLearnerError):
+        choose_next_candidate(pred=pred, num_candidates_to_pick=2, aq="MLI")
+
+    with pytest.raises(SequentialLearnerError):
+        choose_next_candidate(unc=unc, num_candidates_to_pick=2, aq="MLI")
+
+
+def test_choose_next_candidate_hhi_weighting():
+    # Tests that the HHI weighting is properly applied
+    unc = np.array([0.1, 0.1])
+    pred = np.array([4.0, 4.0])
+    # Tests using production HHI values and MU
+    y_struct = generate_surface_structures(["Y"], facets={"Y": ["0001"]})["Y"][
+        "hcp0001"
+    ]["structure"]
+    ni_struct = generate_surface_structures(["Ni"], facets={"Ni": ["111"]})["Ni"][
+        "fcc111"
+    ]["structure"]
+    parent_idx, _, aq_scores = choose_next_candidate(
+        [y_struct, ni_struct],
+        unc=unc,
+        include_hhi=True,
+        aq="MU",
+        include_seg_ener=False,
+    )
+    assert parent_idx[0] == 1
+    assert aq_scores[0] < aq_scores[1]
+
+    # Tests using reserves HHI values and MLI
+    nb_struct = generate_surface_structures(["Nb"], facets={"Nb": ["111"]})["Nb"][
+        "bcc111"
+    ]["structure"]
+    na_struct = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"][
+        "bcc110"
+    ]["structure"]
+    parent_idx, _, aq_scores = choose_next_candidate(
+        [na_struct, nb_struct],
+        unc=unc,
+        pred=pred,
+        target_min=3,
+        target_max=5,
+        include_hhi=True,
+        hhi_type="reserves",
+        include_seg_ener=False,
+    )
+    assert parent_idx[0] == 0
+    assert aq_scores[0] > aq_scores[1]
+
+
+def test_choose_next_candidate_segregation_energy_weighting():
+    # Tests that the segregation energy weighting is properly applied
+    unc = np.array([0.3, 0.3])
+    pred = np.array([2.0, 2.0])
+    structs = flatten_structures_dict(
+        generate_saa_structures(["Cr"], ["Rh"], facets={"Cr": ["110"]})
+    )
+    structs.extend(
+        flatten_structures_dict(
+            generate_saa_structures(["Co"], ["Re"], facets={"Co": ["0001"]})
+        )
+    )
+    parent_idx, _, aq_scores = choose_next_candidate(
+        structs,
+        unc=unc,
+        pred=pred,
+        target_min=0,
+        target_max=4,
+        include_hhi=False,
+        include_seg_ener=True,
+    )
+    assert parent_idx[0] == 0
+    assert aq_scores[0] > aq_scores[1]
+
+
+def test_get_overlap_score():
+    # Tests default behavior
+    mean = 0.0
+    std = 0.1
+    x1 = -0.4
+    x2 = 0.8
+    norm = stats.norm(loc=mean, scale=std)
+
+    # checks that at least target min or max is provided
+    with pytest.raises(SequentialLearnerError):
+        get_overlap_score(mean, std)
+
+    # test default min
+    overlap_score = get_overlap_score(mean, std, x2=x2)
+    assert np.isclose(overlap_score, norm.cdf(x2))
+
+    # test default max
+    overlap_score = get_overlap_score(mean, std, x1=x1)
+    assert np.isclose(overlap_score, 1.0 - norm.cdf(x1))
+
+    # test both max and min
+    overlap_score = get_overlap_score(mean, std, x1=x1, x2=x2)
+    assert np.isclose(overlap_score, norm.cdf(x2) - norm.cdf(x1))
+
+
+def test_calculate_hhi_scores():
+    # Tests calculating the HHI scores
+    saa_dict = generate_saa_structures(
+        ["Pt", "Cu", "Ni"],
+        ["Ru"],
+        facets={"Pt": ["111"], "Cu": ["111"], "Ni": ["111"]},
+    )
+    saa_structs = [saa_dict[host]["Ru"]["fcc111"]["structure"] for host in saa_dict]
+    # test production
+    hhi_prod_scores = calculate_hhi_scores(saa_structs)
+    norm_hhi_prod = {
+        el: 1.0 - (HHI["production"][el] - 500.0) / 9300.0 for el in HHI["production"]
+    }
+    # check approach properly normalizes and inverts
+    assert np.isclose(norm_hhi_prod["Y"], 0.0)
+    assert np.isclose(norm_hhi_prod["O"], 1.0)
+    # test scores calculated on SAAs
+    assert np.isclose(
+        hhi_prod_scores[0], (35 * norm_hhi_prod["Pt"] + norm_hhi_prod["Ru"]) / 36
+    )
+    assert np.isclose(
+        hhi_prod_scores[1], (35 * norm_hhi_prod["Cu"] + norm_hhi_prod["Ru"]) / 36
+    )
+    assert np.isclose(
+        hhi_prod_scores[2], (35 * norm_hhi_prod["Ni"] + norm_hhi_prod["Ru"]) / 36
+    )
+    # check scores normalized
+    assert (hhi_prod_scores <= 1.0).all()
+    assert (hhi_prod_scores >= 0.0).all()
+    # test reserves
+    hhi_res_scores = calculate_hhi_scores(saa_structs, "reserves")
+    norm_hhi_res = {
+        el: 1.0 - (HHI["reserves"][el] - 500.0) / 8600.0 for el in HHI["reserves"]
+    }
+    # check approach properly normalizes and inverts
+    assert np.isclose(norm_hhi_res["Pt"], 0.0)
+    assert np.isclose(norm_hhi_res["C"], 1.0)
+    assert np.isclose(
+        hhi_res_scores[0], (35 * norm_hhi_res["Pt"] + norm_hhi_res["Ru"]) / 36
+    )
+    assert np.isclose(
+        hhi_res_scores[1], (35 * norm_hhi_res["Cu"] + norm_hhi_res["Ru"]) / 36
+    )
+    assert np.isclose(
+        hhi_res_scores[2], (35 * norm_hhi_res["Ni"] + norm_hhi_res["Ru"]) / 36
+    )
+    # check normalized
+    assert (hhi_res_scores <= 1.0).all()
+    assert (hhi_res_scores >= 0.0).all()
+
+
+def test_calculate_segregation_energy_scores():
+    # Tests calculating segregation energy scores
+    saa_structs = flatten_structures_dict(
+        generate_saa_structures(
+            ["Ag", "Ni"], ["Pt"], facets={"Ag": ["111"], "Ni": ["111"]},
+        )
+    )
+    saa_structs.extend(
+        flatten_structures_dict(
+            generate_saa_structures(["Pd"], ["W"], facets={"Pd": ["111"]})
+        )
+    )
+    # test calculating scores from RABAN1999
+    se_scores = calculate_segregation_energy_scores(saa_structs)
+    assert np.isclose(se_scores[-1], 0.0)
+    min_seg = SEGREGATION_ENERGIES["raban1999"]["Fe_100"]["Ag"]
+    max_seg = SEGREGATION_ENERGIES["raban1999"]["Pd"]["W"]
+    assert np.isclose(
+        se_scores[0],
+        1.0
+        - (SEGREGATION_ENERGIES["raban1999"]["Ag"]["Pt"] - min_seg)
+        / (max_seg - min_seg),
+    )
+    assert np.isclose(
+        se_scores[1],
+        1.0
+        - (SEGREGATION_ENERGIES["raban1999"]["Ni"]["Pt"] - min_seg)
+        / (max_seg - min_seg),
+    )
+
+    # test getting scores from RAO2020
+    se_scores = calculate_segregation_energy_scores(saa_structs, data_source="rao2020")
+    assert np.isclose(se_scores[0], SEGREGATION_ENERGIES["rao2020"]["Ag"]["Pt"])
+    assert np.isclose(se_scores[0], 0.8)
+    assert np.isclose(se_scores[1], SEGREGATION_ENERGIES["rao2020"]["Ni"]["Pt"])
+    assert np.isclose(se_scores[1], 1.0)
+    assert np.isclose(se_scores[-1], SEGREGATION_ENERGIES["rao2020"]["Pd"]["W"])
+    assert np.isclose(se_scores[-1], 0.0)
diff --git a/tests/test_saa.py b/tests/test_saa.py
index 4b738e53..1642e1c5 100644
--- a/tests/test_saa.py
+++ b/tests/test_saa.py
@@ -10,6 +10,7 @@
 from autocat.saa import generate_saa_structures
 from autocat.saa import substitute_single_atom_on_surface
 from autocat.saa import _find_dopant_index
+from autocat.saa import _find_all_surface_atom_indices
 from autocat.saa import AutocatSaaGenerationError
 from autocat.surface import generate_surface_structures
 
@@ -128,3 +129,31 @@ def test_find_dopant_index():
     host[32].symbol = "Au"
     with raises(NotImplementedError):
         _find_dopant_index(host, "Au")
+
+
+def test_find_all_surface_atom_indices():
+    # Test helper function for finding all surface atoms
+    # clean elemental surface
+    ru = generate_surface_structures(["Ru"], supercell_dim=(2, 2, 4))["Ru"]["hcp0001"][
+        "structure"
+    ]
+    indices = _find_all_surface_atom_indices(ru)
+    assert indices == [12, 13, 14, 15]
+
+    pt110 = generate_surface_structures(["Pt"], supercell_dim=(1, 1, 4))["Pt"][
+        "fcc110"
+    ]["structure"]
+    indices = _find_all_surface_atom_indices(pt110)
+    assert indices == [3]
+
+    # check increasing tolerance
+    indices = _find_all_surface_atom_indices(pt110, tol=1.4)
+    assert indices == [2, 3]
+
+    pt100 = generate_surface_structures(["Pt"], supercell_dim=(3, 3, 4))["Pt"][
+        "fcc100"
+    ]["structure"]
+    pt100[27].z += 0.3
+    pt100[30].z -= 0.4
+    indices = _find_all_surface_atom_indices(pt100, tol=0.6)
+    assert indices == [27, 28, 29, 31, 32, 33, 34, 35]
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 00000000..5b3190b6
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,60 @@
+"""Unit tests for the `autocat.utils` module"""
+
+from ase import Atoms
+from ase.build import fcc100
+from ase.build import fcc111
+from ase.build import bcc110
+
+from autocat.surface import generate_surface_structures
+from autocat.saa import generate_saa_structures
+from autocat.adsorption import generate_adsorbed_structures
+from autocat.utils import flatten_structures_dict
+
+
+def test_extract_surfaces():
+    # Tests extracting structures from `autocat.surface.generate_surface_structures`
+    surfaces = generate_surface_structures(
+        ["Pt", "Cu", "Li"], facets={"Pt": ["100", "111"], "Cu": ["111"], "Li": ["110"]}
+    )
+    ex_structures = flatten_structures_dict(surfaces)
+    assert all(isinstance(struct, Atoms) for struct in ex_structures)
+    # checks atoms objects left untouched during extraction
+    pt_struct100 = fcc100("Pt", size=(3, 3, 4), vacuum=10)
+    assert pt_struct100 in ex_structures
+    pt_struct111 = fcc111("Pt", size=(3, 3, 4), vacuum=10)
+    assert pt_struct111 in ex_structures
+    cu_struct = fcc111("Cu", size=(3, 3, 4), vacuum=10)
+    assert cu_struct in ex_structures
+    li_struct = bcc110("Li", size=(3, 3, 4), vacuum=10)
+    assert li_struct in ex_structures
+
+
+def test_extract_saas():
+    # Tests extracting saa structures
+    saas = generate_saa_structures(
+        ["Cu", "Au"],
+        ["Fe"],
+        facets={"Cu": ["110"], "Au": ["100"]},
+        supercell_dim=(2, 2, 5),
+    )
+    ex_structures = flatten_structures_dict(saas)
+    assert all(isinstance(struct, Atoms) for struct in ex_structures)
+    assert saas["Cu"]["Fe"]["fcc110"]["structure"] in ex_structures
+    assert saas["Au"]["Fe"]["fcc100"]["structure"] in ex_structures
+
+
+def test_extract_adsorption():
+    # Test extracting adsorption structures
+    saa = generate_saa_structures(["Ru"], ["Pd"], supercell_dim=(2, 2, 5),)["Ru"]["Pd"][
+        "hcp0001"
+    ]["structure"]
+    ads_dict = generate_adsorbed_structures(
+        saa,
+        adsorbates=["NH2", "Li"],
+        adsorption_sites={"custom": [(0.0, 0.0)]},
+        use_all_sites=False,
+    )
+    ex_structures = flatten_structures_dict(ads_dict)
+    assert all(isinstance(struct, Atoms) for struct in ex_structures)
+    assert ads_dict["NH2"]["custom"]["0.0_0.0"]["structure"] in ex_structures
+    assert ads_dict["Li"]["custom"]["0.0_0.0"]["structure"] in ex_structures