diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml new file mode 100644 index 00000000..52f825e9 --- /dev/null +++ b/.github/workflows/deploy-pages.yml @@ -0,0 +1,17 @@ +name: deploy-pages +on: + push: + branches: + - master + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.x + - run: pip install mkdocs-material + - run: pip install mkdocstrings + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e81144a..b3c541d8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -68,8 +68,8 @@ A pre-commit hook is available to auto-format code with 3. Install pre-commit: ``$ pip install pre-commit`` 4. Intall git hooks in your ``.git`` directory: ``$ pre-commit install`` -Names for functions, arguments, classes, and methods should be as descriptive as possible, -even if it means making them a little longer. For example, `generate_surface_structures` is +Names for functions, arguments, classes, and methods should be as descriptive as possible, +even if it means making them a little longer. For example, `generate_surface_structures` is a preferred function name to `gen_surfs`. All class names should adhere to [upper CamelCase](https://en.wikipedia.org/wiki/Camel_case). @@ -86,16 +86,16 @@ A passing build requires the following: * Every line of code is executed by a test (100% coverage) * Documentation has been updated or extended (as needed) and builds -PR descriptions should describe the motivation and context of the code changes in the PR, -both for the reviewer and also for future developers. If there's a Github issue, the PR should +PR descriptions should describe the motivation and context of the code changes in the PR, +both for the reviewer and also for future developers. If there's a Github issue, the PR should be linked to the issue to provide that context. ## Documentation -`AutoCat` documentation is built using `mkdocs` via -[`mkdocs-material`](https://squidfunk.github.io/mkdocs-material/) -and +`AutoCat` documentation is built using `mkdocs` via +[`mkdocs-material`](https://squidfunk.github.io/mkdocs-material/) +and [`mkdocstrings`](https://mkdocstrings.github.io/). -All custom documentation should be written as `.md` files, appropriately placed within +All custom documentation should be written as `.md` files, appropriately placed within `docs/`, and referenced within the `mkdocs.yml` file. With `mkdocs` the docs webpage can be hosted locally with the command: @@ -106,3 +106,4 @@ which will give an `html` link that can be pasted in a web-browser. API documentation is automatically generated with `mkdocstrings` which parses the docstrings. Please ensure that all docstrings follow the Numpy style. + diff --git a/MANIFEST.in b/MANIFEST.in index 9d3f36c3..eda156cb 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include src/autocat/data/**/*.json include src/autocat/VERSION.txt -include bin/autocat \ No newline at end of file +include bin/autocat +include CONTRIBUTING.md diff --git a/README.md b/README.md index 53d863ed..4b766559 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -# AutoCat +# AutoCat -AutoCat is a suite of python tools for **sequential learning for materials applications** +AutoCat is a suite of python tools for **sequential learning for materials applications** and **automating structure generation for DFT catalysis studies.** -Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the +Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the ARPA-E DIFFERENTIATE program. ## Installation @@ -13,10 +13,9 @@ There are two options for installation, either via `pip` or from the repo direct ### `pip` (recommended) If you are planning on strictly using AutoCat rather than contributing to development, - we recommend using `pip` within a virtual environment (e.g. + we recommend using `pip` within a virtual environment (e.g. [`conda`](https://docs.conda.io/en/latest/) - ). This can be done -as follows: + ). This can be done as follows: ``` pip install autocat @@ -29,10 +28,10 @@ AutoCat can be installed via a clone from Github. First, you'll need to clone th github repo to your local machine (or wherever you'd like to use AutoCat) using `git clone`. Once the repo has been cloned, you can install AutoCat as an editable package by changing into the created directory (the one with `setup.py`) and installing -via: +via: ``` pip install -e . ``` ## Contributing -Contributions through issues, feature requests, and pull requests are welcome. -Guidelines are provided [here](CONTRIBUTING.md). \ No newline at end of file +Contributions through issues, feature requests, and pull requests are welcome. +Guidelines are provided [here](CONTRIBUTING.md). diff --git a/docs/API/Learning/featurizers.md b/docs/API/Learning/featurizers.md new file mode 100644 index 00000000..e0cf1037 --- /dev/null +++ b/docs/API/Learning/featurizers.md @@ -0,0 +1 @@ +::: autocat.learning.featurizers diff --git a/docs/API/Learning/predictors.md b/docs/API/Learning/predictors.md new file mode 100644 index 00000000..2ce437cc --- /dev/null +++ b/docs/API/Learning/predictors.md @@ -0,0 +1 @@ +::: autocat.learning.predictors diff --git a/docs/API/Learning/sequential.md b/docs/API/Learning/sequential.md new file mode 100644 index 00000000..ca81144c --- /dev/null +++ b/docs/API/Learning/sequential.md @@ -0,0 +1 @@ +::: autocat.learning.sequential diff --git a/docs/API/Structure_Generation/adsorption.md b/docs/API/Structure_Generation/adsorption.md new file mode 100644 index 00000000..daca8e34 --- /dev/null +++ b/docs/API/Structure_Generation/adsorption.md @@ -0,0 +1 @@ +::: autocat.adsorption diff --git a/docs/API/Structure_Generation/bulk.md b/docs/API/Structure_Generation/bulk.md new file mode 100644 index 00000000..80e0ed2b --- /dev/null +++ b/docs/API/Structure_Generation/bulk.md @@ -0,0 +1 @@ +::: autocat.bulk diff --git a/docs/API/Structure_Generation/saa.md b/docs/API/Structure_Generation/saa.md new file mode 100644 index 00000000..f67a49c6 --- /dev/null +++ b/docs/API/Structure_Generation/saa.md @@ -0,0 +1,3 @@ +# Single Atom Alloys + +::: autocat.saa diff --git a/docs/API/Structure_Generation/surface.md b/docs/API/Structure_Generation/surface.md new file mode 100644 index 00000000..d826fc1f --- /dev/null +++ b/docs/API/Structure_Generation/surface.md @@ -0,0 +1 @@ +::: autocat.surface diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index c7c06965..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = src -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..b8cd9c30 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,91 @@ +# AutoCat Documentation + +![AutoCat Logo](img/autocat_logo.png){ align=right } + +AutoCat is a suite of python tools for **sequential learning for materials applications** +and **automating structure generation for DFT catalysis studies.** + +Development of this package stems from [ACED](https://www.cmu.edu/aced/), as part of the +ARPA-E DIFFERENTIATE program. + +Below we provide an overview of the key functionalities of AutoCat. +For additional details please see the User Guide, Tutorials, and API sections. + +## Sequential Learning + +One of the core philosophies of AutoCat is to provide modular and extensible tooling to +facilitate closed-loop computational materials discovery workflows. Within this submodule +are classes for defining a design space, featurization, +regression, and defining a closed-loop sequential learning iterator. The +key classes intended for each of these purposes are: + +- [**`DesignSpace`**](User_Guide/Learning/sequential#designspace): define a design space to explore + +- [**`Featurizer`**](User_Guide/Learning/featurizers): featurize the systems for regression + +- [**`Predictor`**](User_Guide/Learning/predictors): a regressor for predicting materials properties + +- [**`SequentialLearner`**](User_Guide/Learning/sequential#sequentiallearner): define a closed-loop iterator + + +## Structure Generation + +![Adsorption Figure](img/struct_gen_figs/adsorption.png){ align=right } + +This submodule contains functions for automating atomic structure generation +within the context of a catalysis study using density functional theory. +Specifically, this includes generating bulk structures, surfaces, and +placing adsorbates. In addition, functions for generating the single-atom alloys +material class are also included. These functions are organized within AutoCat as follows: + +- [**`autocat.bulk`**](User_Guide/Structure_Generation/bulk): generation of periodic +mono-elemental bulk structures + +- [**`autocat.surface`**](User_Guide/Structure_Generation/surface): mono-elemental surface slab generation + +- [**`autocat.adsorption`**](User_Guide/Structure_Generation/adsorption): placement of adsorbates onto surfaces + +- [**`autocat.saa`**](User_Guide/Structure_Generation/saa): generation of single-atom alloy surfaces + +Structures generated or read with this package are typically of the form of +[`ase.Atoms`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#module-ase.atoms) +objects. + +When opting to write structures to +disk using these functions, they are automatically organized into a clean, scalable directory organization. +All structures are written in the +[`ase.io.Trajectory`](https://wiki.fysik.dtu.dk/ase/ase/io/trajectory.html#trajectory) +file format. +For further details on the directory structure, see the User Guide. + +## Installation + +There are two options for installation, either via `pip` or from the repo directly. + +### `pip` (recommended) + +If you are planning on strictly using AutoCat rather than contributing to development, + we recommend using `pip` within a virtual environment (e.g. + [`conda`](https://docs.conda.io/en/latest/) + ). This can be done +as follows: + +``` +pip install autocat +``` + +### Github (for developers) + +Alternatively, if you would like to contribute to the development of this software, +AutoCat can be installed via a clone from Github. First, you'll need to clone the +github repo to your local machine (or wherever you'd like to use AutoCat) using +`git clone`. Once the repo has been cloned, you can install AutoCat as an editable +package by changing into the created directory (the one with `setup.py`) and installing +via: +``` +pip install -e . +``` + +## Contributing +Contributions through issues, feature requests, and pull requests are welcome. +Guidelines are provided here. diff --git a/docs/Tutorials/pred_h.md b/docs/Tutorials/pred_h.md new file mode 100644 index 00000000..95850397 --- /dev/null +++ b/docs/Tutorials/pred_h.md @@ -0,0 +1,131 @@ +In this tutorial we are going to show how to use the learning tools within +AutoCat to train a regressor that can predict adsorption energies of hydrogen +on a set of single-atom alloys. + +## Creating a `DesignSpace` + +Let's start by creating a `DesignSpace`. Normally each of these +structures would be optimized via DFT, but for demo purposes +we'll use the generated structures directly. First we need to generate the single-atom +alloys. Here, we can use AutoCat's +[`generate_saa_structures`](../API/Structure_Generation/saa.md#autocat.saa.generate_saa_structures) +function. + +```py +>>> # Generate the clean single-atom alloy structures +>>> from autocat.saa import generate_saa_structures +>>> from autocat.utils import extract_structures +>>> saa_struct_dict = generate_saa_structures( +... ["Fe", "Cu", "Au"], +... ["Pt", "Pd", "Ni"], +... facets={"Fe":["110"], "Cu":["111"], "Au":["111"]}, +... n_fixed_layers=2, +... ) +>>> saa_structs = extract_structures(saa_struct_dict) +``` + +Now that we have the clean structures, let's adsorb hydrogen on the surface. +For convenience let's place H at the origin instead of considering all symmetry sites. +To accomplish this we can make use of AutoCat's +[`place_adsorbate`](../API/Structure_Generation/adsorption.md#autocat.adsorption.place_adsorbate) +function. + +```py +>>> # Adsorb hydrogen onto each of the generated SAA surfaces +>>> from autocat.adsorption import place_adsorbate +>>> ads_structs = [] +>>> for clean_struct in saa_structs: +... ads_dict = place_adsorbate( +... clean_struct, +... "H", +... (0.,0.) +... ) +... ads_struct = extract_structures(ads_dict)[0] +... ads_structs.append(ads_struct) +``` + +This has collected all of the single-atom alloys with hydrogen adsorbed into +a single list of `ase.Atoms` objects, `ads_structs`. Ideally at this stage we'd have +adsorption energies for each of the generated structures after relaxation. As a proxy +in this demo we'll create random labels, but this should be adsorption energies if you +want to train a meaningful Predictor! + +```py +>>> # Generate the labels for each structure +>>> import numpy as np +>>> labels = np.random.uniform(-1.5,1.5,size=len(ads_structs)) +``` + +Finally, using both our structures and labels we can define a `DesignSpace`. In practice, +if any of the labels for a structure are unknown, it can be included as a `numpy.nan` + +```py +>>> from autocat.learning.sequential import DesignSpace +>>> design_space = DesignSpace(ads_structs, labels) +``` + +## Setting up a `Predictor` + +When setting up our `Predictor` we now have two choices to make: + +1. The technique to be used for featurizing the systems +2. The regression model to be used for training and predictions + +Internally, the `Predictor` will contain a `Featurizer` object which contains all of +our choices for how to featurize the systems. Our choice of featurizer class and +the associated kwargs are specified via the `featurizer_class` and +`featurization_kwargs` arguments, respectively. By providing the design space structures +some of the kwargs related to the featurization (e.g. maximum structure size) can be +automatically obtained. + +Similarly, we can specify the regressor to be used within the `model_class` and +`model_kwargs` arguments. The class should be "`sklearn`-like" with `fit` and +`predict` methods. + +Let's featurize the hydrogen environment via `dscribe`'s `SOAP` class with +`sklearn`'s `GaussianProcessRegressor` for regression. + +```py +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from sklearn.gaussian_process.kernels import RBF +>>> from dscribe import SOAP +>>> from autocat.learning.predictors import Predictor +>>> kernel = RBF(1.5) +>>> model_kwargs={"kernel": kernel} +>>> featurization_kwargs={ +... "design_space_structures": design_space.design_space_structures, +... "kwargs": {"rcut": 7.0, "nmax": 8, "lmax": 8} +... } +>>> predictor = Predictor( +... model_class=GaussianProcessRegressor, +... model_kwargs=model_kwargs, +... featurizer_class=SOAP, +... featurization_kwargs=featurization_kwargs, +... ) +``` + +## Training and making predictions + +With our newly defined `Predictor` we can train it using data from our +`DesignSpace` and the `fit` method. + +```py +>>> train_structures = design_space.design_space_structures[:5] +>>> train_labels = design_space.design_space_labels[:5] +>>> predictor.fit(train_structures, train_labels) +``` + +Making predictions is a similar process except using the `predict` method. + +```py +>>> test_structures = design_space.design_space_structures[5:] +>>> predicted_labels = predictor.predict(test_structures) +``` + +In this example, since we already have the labels for the test structures, we can +also use the `score` method to calculate a prediction score. + +```py +>>> test_labels = design_space.design_space_labels[5:] +>>> mae = predictor.score(test_structures, test_labels) +``` \ No newline at end of file diff --git a/docs/Tutorials/sl.md b/docs/Tutorials/sl.md new file mode 100644 index 00000000..d3169349 --- /dev/null +++ b/docs/Tutorials/sl.md @@ -0,0 +1,112 @@ +In this tutorial we will show how to conduct a simulated sequential learning +run over a fully explored design space. + +## Creating a fully explored `DesignSpace` +Following a similar procedure as in the previous tutorial, we will create +a fully explored `DesignSpace` (ie. no unknown labels). This time +the structures will be clean mono-elemental surfaces which we can generate via +`generate_surface_structures`. + +```py +>>> # Generate the clean surfaces +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> surfs_dict = generate_surface_structures( +... ["Pt", "Cu", "Li", "Ti"], +... n_fixed_layers=2, +... default_lat_param_lib="pbe_fd" +... ) +>>> surfs = extract_structures(surfs_dict) +``` + +In this case we specified that the default lattice parameters +from the library calculated with the PBE XC functional and +a finite difference basis set. + +As before, we will create random labels for all structures. But if you +want meaningful sequential learning runs these must be actual labels relevant +to your design space! + +```py +>>> # Generate the labels for each structure +>>> import numpy as np +>>> labels = np.random.uniform(-1.5,1.5,size=len(ads_structs)) +``` + +Taking the structures and labels we can define our `DesignSpace`. + +```py +>>> from autocat.learning.sequential import DesignSpace +>>> design_space = DesignSpace(surfs, labels) +``` + +## Doing a single simulated sequential learning run + +Given our fully explored `DesignSpace`, we can simulate a sequential learning +search over it to gain insights into guided searches within this context. +To do this simulated run we can make use of the `simulated_sequential_learning` +function. This will internally drive a `SequentialLearner` object which will be +returned at the end of the run. + +As before, we will need to make choices with regard to the `Predictor` settings. +In this case we will use a `SineMatrix` featurizer alongside a `GaussianProcessRegressor`. + +We also need to select parameters with regard to candidate selection. +This includes the acquisition function to be used, +target window (if applicable), and number of candidates to pick at each iteration. +Let's use a maximum uncertainty acquisition function to pick candidates based on their +associated uncertainty values. We'll also restrict the run to conduct 5 iterations. + +```py +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from dscribe import SineMatrix +>>> from autocat.learning.sequential import simulated_sequential_learning +>>> kernel = RBF(1.5) +>>> model_kwargs = {"kernel": kernel} +>>> featurization_kwargs = { +... "design_space_structures": design_space.design_space_structures, +... } +>>> predictor_kwargs = { +... "model_class": GaussianProcessRegressor, +... "model_kwargs": model_kwargs, +... "featurizer_class": SineMatrix, +... "featurization_kwargs": featurization_kwargs +... } +>>> candidate_selection_kwargs = {"aq": "MU"} +>>> sim_seq_learn = simulated_sequential_learning( +... full_design_space=design_space, +... init_training_size=1, +... number_of_sl_loops=5, +... candidate_selection_kwargs=candidate_selection_kwargs, +... predictor_kwargs=predictor_kwargs, +... ) +``` + +Within the returned `SequentialLearner` object we now have information we can use +for further analysis including prediction and uncertainty histories as well as the candidate +selection history. + +## Doing multiple simulated sequential learning runs + +It is often useful to consider the statistics of multiple independent simulated +sequential learning runs. For this purpose we can make use of the +`multiple_simulated_sequential_learning_runs` function. This acts in the same manner +as for the single run verion, but will return a `SequentialLearner` object for each of the +independent runs in a list. Moreover, the inputs remain the same except with the added option +of running in parallel (since this is an embarrassingly parallel operation). Here we will conduct +three independent runs in serial. + +```py +>>> runs_history = multiple_simulated_sequential_learning_runs( +... full_design_space=design_space, +... init_training_size=1, +... number_of_sl_loops=5, +... candidate_selection_kwargs=candidate_selection_kwargs, +... predictor_kwargs=predictor_kwargs, +... number_of_runs=3, +... # number_of_parallel_jobs=N if you wanted to run in parallel +... ) +``` + +Taking the `SequentialLearner`s from within `runs_history`, their histories +may be used to calculate more robust statistics into the simulated searches. \ No newline at end of file diff --git a/docs/User_Guide/Data/hhi.md b/docs/User_Guide/Data/hhi.md new file mode 100644 index 00000000..6f08cc05 --- /dev/null +++ b/docs/User_Guide/Data/hhi.md @@ -0,0 +1,6 @@ +The Herfindahl-Hirschman Index (HHI) is an index that measures market concentration. +Thus, in the context of different elements, it can be used as a proxy for cost, +as proposed by [M. Gaultois, et. al.](https://pubs.acs.org/doi/10.1021/cm400893e). + +From the tabulated values in the reference above, we provide HHI values for both +reserves as well as production. diff --git a/docs/User_Guide/Data/intermediates.md b/docs/User_Guide/Data/intermediates.md new file mode 100644 index 00000000..49a3df06 --- /dev/null +++ b/docs/User_Guide/Data/intermediates.md @@ -0,0 +1,48 @@ +When characterizing a surface in the context of a +specific reaction, calculating adsorption energies +for all of the reaction intermediates is often important. + +Here, AutoCat has default structures for adsorbates +of both the oxygen reduction reaction (ORR) and +nitrogen reduction reaction (NRR) intermediates. + +The names of all of the reaction intermediates can +be imported and fed directly into +AutoCat functions: +```py +>>> from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES +>>> from autocat.data.intermediates import NRR_INTERMEDIATE_NAMES +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.adsorption import generate_adsorbed_structures +>>> pt_dict = generate_surface_structures(["Pt"]) +>>> pt_struct = extract_structures(pt_dict)[0] +>>> orr_structs = generate_adsorbed_structures( +... surface=pt_struct, +... adsorbates=ORR_INTERMEDIATE_NAMES, +... use_all_sites=True +... ) +>>> nrr_structs = generate_adsorbed_structures( +... surface=pt_struct, +... ads=NRR_INTERMEDIATE_NAMES, +... use_all_sites=True +... ) +``` +In the above example, `orr_structs` and `nrr_structs` have all of the corresponding +intermediates at every identified unique surface site. + +Alternatively, if you would like to access the +`ase.Atoms` objects for the intermediates directly, +they can be imported as a `dict`: +```py +>>> from autocat.data.intermediates import ORR_MOLS +>>> from autocat.data.intermediates import NRR_MOLS +``` + +**ORR Intermediates**: + +OOH\*, O\*, OH\* + +**NRR Intermediates**: + +NNH\*, NNH$_2$\*, N\*, NH\*, NH$_2$\*, NHNH\*, NHNH$_2$\*, NH$_2$NH$_2$\* diff --git a/docs/User_Guide/Data/lattice_parameters.md b/docs/User_Guide/Data/lattice_parameters.md new file mode 100644 index 00000000..136a6cce --- /dev/null +++ b/docs/User_Guide/Data/lattice_parameters.md @@ -0,0 +1,44 @@ +In some codes, optimizing cell parameters on the fly +during geometry relaxations is not available. +For this reason we have compiled +calculated lattice parameters +using multiple different +calculation schemes as a convenience for high-throughput +studies. Every calculation was conducted with +[`GPAW`](https://wiki.fysik.dtu.dk/gpaw/index.html). + +There are two axes to the settings applied here: + +- exchange-correlation functional +- basis set mode (finite difference or plane-wave). + +Available sets are as follows: + +- `BULK_PBE_FD`/`BULK_BEEFVDW_FD`: +``` +These are parameters using the finite difference scheme +and PBE / BEEF-vdW XC functionals. Obtained via fits to an +equation of state (https://wiki.fysik.dtu.dk/ase/ase/eos.html) + +FCC/BCC +h = 0.16, kpts = (12,12,12) +fit to an SJ EOS + +HCP +h=0.16, kpts = (12,12,6) +fit to a Birch-Murnaghan EO +``` +- `BULK_PBE_PW`/`BULK_BEEFVDW_PW`: +``` +These are parameters are obatined with a plane-wave basis set and +using the Exponential Cell Filter to minimize the stress tensor and atomic forces +(https://wiki.fysik.dtu.dk/ase/ase/constraints.html#the-expcellfilter-class) + +FCC/BCC +mode=PW(550), kpts = (12,12,12), fmax = 0.05 eV/A + +HCP +mode=PW(550), kpts = (12,12,6), fmax = 0.05 eV/A +``` + +All of these lattice parameters are available within `autocat.data.lattice_parameters` diff --git a/docs/User_Guide/Data/segregation_energies.md b/docs/User_Guide/Data/segregation_energies.md new file mode 100644 index 00000000..e9933fb1 --- /dev/null +++ b/docs/User_Guide/Data/segregation_energies.md @@ -0,0 +1,25 @@ +When determining the stability of dopants within a host, one +important factor to consider is the segregation energy. This +predicts the thermodynamic preference towards pinning the +dopant at the surface of the host versus burying itself in +the bulk. + +Segregation energy values are tabulated as reported by +[A.V. Ruban, et. al.](https://journals.aps.org/prb/abstract/10.1103/PhysRevB.59.15990) +for multiple combinations of transition metal hosts and dopants. +By definition more negative values indicate more stability towards +keeping the dopant at the surface. +Values where the host is the same as the dopant is the surface energy for +that species. + +In addition, for specifically SAAs, [K. K. Rao, et. al.](https://doi.org/10.1007/s11244-020-01267-2) +studied the stability of various different host and dopant combinations. The different configurations +included SAA, subsurface, dimers, adatoms, and adatom + SAA. Here for most preferential configuration +we attributed the following scores as per the results shown in figure 3 of the above reference: + +- SAA is the most stable: 1 +- SAA is not the most stable but is within: + - <0.1 eV: 0.9 + - <0.2 eV: 0.8 + - <0.5 eV: 0.5 +- SAA is not the most stable by >0.5 eV: 0 \ No newline at end of file diff --git a/docs/User_Guide/Learning/featurizers.md b/docs/User_Guide/Learning/featurizers.md new file mode 100644 index 00000000..de743a9f --- /dev/null +++ b/docs/User_Guide/Learning/featurizers.md @@ -0,0 +1,91 @@ +The `Featurizer` object allows for the featurization of +systems into a format that can be fed into machine learning +models. Specified within this object are all the desired +settings for when featurizing systems. More specifically this +includes: + +- `featurizer_class`: the desired class for featurization + +- `preset`: if the featurizer class can be instantiated by +a preset, that preset can be specified here. (e.g. the `magpie` feature +set for the `ElementProperty` featurizer class) + +- `design_space_structures`: if the design space is already known, +the structures can be specified here to extract the `max_size` and +`species_list` parameters. supercedes `max_size` and `species_list` +upon instantiation + +- `max_size`: the largest structure size that the featurizer can +encounter + +- `species_list`: all possible species that the featurizer can +encounter + +Applying the `Featurizer` there are two main methods: +`featurize_single` and `featurize_multiple`. The former is intended +for featurizing a single structure. On the other hand, the latter +can take multiple structures and returns them in a single feature +matrix. + +Below are three examples using structure, site, and compositional +featurization methods: + +```py +>>> from autocat.learning.featurizers import Featurizer +>>> from autocat.utils import extract_structures +>>> from autocat.surface import generate_surface_structures +>>> from dscribe.descriptors import SineMatrix +>>> surfs = extract_structures(generate_surface_structures(["Li", "Na"])) +>>> f = Featurizer(SineMatrix, design_space_structures=surfs) +>>> f.max_size +36 +>>> f.species_list +['Li', 'Na'] +>>> X = f.featurize_multiple(surfs) +``` + +```py +>>> from autocat.learning.featurizers import Featurizer +>>> from autocat.utils import extract_structures +>>> from autocat.surface import generate_surface_structures +>>> from autocat.adsorption import place_adsorbate +>>> from dscribe.descriptors import SOAP +>>> surf = extract_structures(generate_surface_structures(["Cu"]))[0] +>>> ads_struct = extract_structures(place_adsorbate(surf, "OH", position=(0.0, 0.0)))[0] +>>> f = Featurizer( +... SOAP, +... max_size=36, +... species_list=["Cu", "O", "H"] +... kwargs={"rcut": 6., "lmax": 8, "nmax": 8} +... ) +>>> X = f.featurize_single(ads_struct) +``` + +```py +>>> from autocat.learning.featurizers import Featurizer +>>> from autocat.utils import extract_structures +>>> from autocat.surface import generate_saa_structures +>>> from matminer.featurizers.composition import ElementProperty +>>> saas = extract_structures(generate_saa_structures(["Cu", "Au"],["Pt", "Pd"])) +>>> f = Featurizer(ElementProperty, preset="magpie", design_space_structures=saas) +>>> f.species_list +['Cu', 'Pt', 'Pd', 'Au'] +>>> X = f.featurize_multiple(saas) +``` + +The goal of this `Featurizer` object is to provide a unified class across different +featurization techniques. + +At present the following featurizer classes are supported: + +- [`dscribe`](https://singroup.github.io/dscribe/latest/): + - `SineMatrix` + - `CoulombMatrix` + - `ACSF` + - `SOAP` + +- [`matminer`](https://hackingmaterials.lbl.gov/matminer/): + - `ElementProperty` + - `ChemicalSRO` + - `OPSiteFingerprint` + - `CrystalNNFingerprint` \ No newline at end of file diff --git a/docs/User_Guide/Learning/predictors.md b/docs/User_Guide/Learning/predictors.md new file mode 100644 index 00000000..65ad67e1 --- /dev/null +++ b/docs/User_Guide/Learning/predictors.md @@ -0,0 +1,79 @@ +In order to iterate a sequential learning pipeline, +a regressor is needed to select subsequent candidate systems. +For this purpose, there is the +[`Predictor`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor) +object class. This contains two key attributes: + +- a regressor that can be fit to data and used for predictions +(the class provided must have `fit` and `predict` methods) +- featurizer class and kwargs to instantiate a [`Featurizer`](featurizers.md). + In particular there are two currently implemented approaches, +structure methods that featurize the entire structure (e.g. `SineMatrix`, `ElementProperty`) + and adsorbate methods that featurize locally (e.g. `SOAP`). + +Generally, this predictor object behaves similarly to regressors found in +[`sklearn`](https://scikit-learn.org/stable/) +with its own +[`fit`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.fit), +[`predict`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.predict), +and +[`score`](../../API/Learning/predictors.md#autocat.learning.predictors.Predictor.score) +methods. + +As an example, let's train a random forest regressor on some +single atom alloys. + +```py +>>> import numpy as np +>>> from autocat.learning.predictors import Predictor +>>> from autocat.saa import generate_saa_structures +>>> from autocat.utils import extract_structures +>>> from dscribe.descriptors import SineMatrix +>>> from sklearn.ensemble import RandomForestRegressor +>>> saa_dict = generate_saa_structures(["Cu", "Au", "Fe"], ["Pt", "Ru", "Ni"]) +>>> saa_structs = extract_structures(saa_dict) +>>> labels = np.random.randint(1, size=(len(saa_structs) - 1)) +>>> acp = Predictor( +... model_class=RandomForestRegressor, +... featurizer_class=SineMatrix, +... ) +>>> acp.fit(saa_structs[:-1], labels) +>>> pred, _ = acp.predict([saa_structs[-1]]) +>>> pred +array([0.]) +``` +Here we have chosen to featurize the structures as a `SineMatrix`. + +Note as well that the `predict` method will return uncertainty estimates +if available. To see this, let's train a gaussian process regressor with an RBF + kernel. Let's also featurize using `SOAP` to see how featurization kwargs are passed + +```py +>>> import numpy as np +>>> from autocat.learning.predictors import Predictor +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.adsorption import place_adsorbate +>>> from dscribe.descriptors import SOAP +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from sklearn.gaussian_process.kernels import RBF +>>> subs = extract_structures(generate_surface_structures(["Pt", "Fe", "Ru"])) +>>> structs = [extract_structures(place_adsorbate(s, "OH"))[0] for s in subs] +>>> labels = np.random.randint(1, size=(len(structs) - 1)) +>>> kernel = RBF() +>>> acp = Predictor( +... model_class=GaussianProcessRegressor, +... model_kwargs={"kernel": kernel}, +... featurizer_class=SOAP, +... featurization_kwargs={ +... "design_space_structures": structs, +... "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6}, +... } +... ) +>>> acp.fit(structs[:-1], labels) +>>> pred, unc = acp.predict([structs[-1]]) +>>> pred +array([0.]) +>>> unc +array([1.]) +``` diff --git a/docs/User_Guide/Learning/sequential.md b/docs/User_Guide/Learning/sequential.md new file mode 100644 index 00000000..cbdd4569 --- /dev/null +++ b/docs/User_Guide/Learning/sequential.md @@ -0,0 +1,223 @@ +## DesignSpace + +The +[`DesignSpace`](../../API/Learning/sequential.md#autocat.learning.sequential.DesignSpace) +class object is intended to store the +*entire* design space. As the sequential learning +loop is iterated, this can be continuously updated +with the newly found labels. + +There are two key components required for this object: + +1. `design_space_structures`: *all* systems to be considered as +[`ase.Atoms`](https://wiki.fysik.dtu.dk/ase/ase/atoms.html#module-ase.atoms) +objects in a `list` +2. `design_space_labels`: `numpy array` of the same length as the above list +with the corresponding labels. If the label is not yet +known, set it to `numpy.nan` + +**NB:** The order of the list of design space structures must +be in the same order as the labels given in the +design space labels. + +```py +>>> import numpy as np +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.learning.sequential import DesignSpace +>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"]) +>>> surf_structs = extract_structures(surf_dict) +>>> labels = np.array([0.95395024, 0.63504885, np.nan, 0.08320879, np.nan, +... 0.32423194, 0.55570785, np.nan, np.nan, np.nan, +... 0.18884186, np.nan]) +>>> acds = DesignSpace(surf_structs, labels) +>>> len(acds) +12 +>>> acds.design_space_structures +[Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...), + Atoms(...)] +>>> acds.design_space_labels +array([0.95395024, 0.63504885, nan, 0.08320879, nan, + 0.32423194, 0.55570785, nan, nan, nan, + 0.18884186, nan]) +``` + + +## SequentialLearner + +The +[`SequentialLearner`](../../API/Learning/sequential.md#autocat.learning.sequential.SequentialLearner) +object stores information regarding the latest +iteration of the sequential learning loop including: + +1. A [`Predictor`](predictors.md) (and its kwargs for both the regressor and featurizer) +2. Candidate selection kwargs for score calculation (e.g. acquisition functions) +3. Iteration number +4. Latest `DesignSpace` +5. Candidate system that is identified for the next loop. +6. Histories for predictions, uncertainties, and training indices + +This object can be thought of as a central hub for the +sequential learning workflow, with an external driver +(either automated or manual) triggering iteration. The first +`iterate` trains the model and identifies candidate(s) to +start the loop. + +```py +>>> import numpy as np +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.adsorption import place_adsorbate +>>> from autocat.learning.sequential import DesignSpace +>>> from autocat.learning.sequential import SequentialLearner +>>> from dscribe.descriptors import SOAP +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from sklearn.gaussian_process.kernels import RBF +>>> subs_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"]) +>>> subs = extract_structures(subs_dict) +>>> ads_structs =[extract_structures(place_adsorbate(s, "H"))[0] for s in subs] +>>> labels = np.array([0.95395024, 0.63504885, np.nan, 0.08320879, np.nan, +... 0.32423194, 0.55570785, np.nan, np.nan, np.nan, +... 0.18884186, np.nan]) +>>> acds = DesignSpace(ads_structs, labels) +>>> kernel = RBF() +>>> acsl = SequentialLearner( +... acds, +... predictor_kwargs={ +... "model_class": GaussianProcessRegressor, +... "model_kwargs": {"kernel": kernel}, +... "featurizer_class": SOAP, +... "featurization_kwargs": {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}} +... }, +... candidate_selection_kwargs={ +... "aq": "MLI", +... "target_min": -2.25, +... "target_max": -1.5, +... "include_hhi": True, +... "hhi_type": "reserves", +... "include_seg_ener": False, +... }, +... ) +>>> acsl.iteration_count +0 +>>> acsl.iterate() +>>> acsl.iteration_count +1 +``` + +## Simulated Sequential Learning + +If you already have a fully explored design space and want +to simulate exploration over it, the +[`simulated_sequential_learning`](../../API/Learning/sequential.md#autocat.learning.sequential.simulated_sequential_learning) +function may be used. + +Internally this function acts a driver on a `SequentialLearner` object, and can be +viewed as an example for how a driver can be set up for an exploratory simulated +sequential learning loop. As inputs it requires all parameters needed to instantiate +a `SequentialLearner` and returns the object that has been iterated. For further analysis +of the search, histories of the predictions, uncertainties, and the training indices for +each iteration are kept. + +```py +>>> import numpy as np +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.learning.sequential import DesignSpace +>>> from autocat.learning.sequential import simulated_sequential_learning +>>> from dscribe.descriptors import SineMatrix +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from sklearn.gaussian_process.kernels import RBF +>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"]) +>>> surf_structs = extract_structures(surf_dict) +>>> labels = np.array([0.95395024, 0.63504885, 0.4567, 0.08320879, 0.87779, +... 0.32423194, 0.55570785, 0.325, 0.43616, 0.321632, +... 0.18884186, 0.1114]) +>>> acds = DesignSpace(surf_structs, labels) +>>> kernel = RBF() +>>> sim_sl = simulated_sequential_learning( +... full_design_space=acds, +... predictor_kwargs={ +... "model_class": GaussianProcessRegressor, +... "model_kwargs": {"kernel": kernel}, +... "featurizer_class": SineMatrix, +... }, +... candidate_selection_kwargs={ +... "aq": "MLI", +... "target_min": -2.25, +... "target_max": -1.5, +... "include_hhi": True, +... "hhi_type": "reserves", +... "include_seg_ener": False, +... }, +... init_training_size=5, +... number_of_sl_loops=3, +... ) +Sequential Learning Iteration #1 +Sequential Learning Iteration #2 +Sequential Learning Iteration #3 +``` + +Additionally, simulated searches are typically most useful when repeated to obtain +statistics that are less dependent on the initialization of the design space. For this +purpose there is the +[`multiple_simulated_sequential_learning_runs`](../../API/Learning/sequential.md#autocat.learning.sequential.multiple_simulated_sequential_learning_runs) +function. This returns a list of `SequentialLearner` corresponding to each individual run. Optionally, +this function can also initiate the multiple runs across parallel processes via the +`number_of_parallel_jobs` parameter. + +```py +>>> import numpy as np +>>> from autocat.surface import generate_surface_structures +>>> from autocat.utils import extract_structures +>>> from autocat.learning.sequential import DesignSpace +>>> from autocat.learning.sequential import multiple_simulated_sequential_learning_runs +>>> from matminer.featurizers.composition import ElementProperty +>>> from sklearn.gaussian_process import GaussianProcessRegressor +>>> from sklearn.gaussian_process.kernels import RBF +>>> surf_dict = generate_surface_structures(["Pt", "Pd", "Cu", "Ni"]) +>>> surf_structs = extract_structures(surf_dict) +>>> labels = np.array([0.95395024, 0.63504885, 0.4567, 0.08320879, 0.87779, +... 0.32423194, 0.55570785, 0.325, 0.43616, 0.321632, +... 0.18884186, 0.1114]) +>>> acds = DesignSpace(surf_structs, labels) +>>> kernel = RBF() +>>> multi_sim_sl = multiple_simulated_sequential_learning_runs( +... full_design_space=acds, +... predictor_kwargs={ +... "model_class": GaussianProcessRegressor, +... "model_kwargs": {"kernel": kernel}, +... "featurizer_class": ElementProperty, +... "featurization_kwargs": {"preset": "matminer"}, +... }, +... candidate_selection_kwargs={ +... "aq": "MLI", +... "target_min": -2.25, +... "target_max": -1.5, +... "include_hhi": True, +... "hhi_type": "reserves", +... "include_seg_ener": False, +... }, +... init_training_size=5, +... number_of_sl_loops=2, +... number_of_runs=3, +... ) +Sequential Learning Iteration #1 +Sequential Learning Iteration #2 +Sequential Learning Iteration #1 +Sequential Learning Iteration #2 +Sequential Learning Iteration #1 +Sequential Learning Iteration #2 +>>> len(multi_sim_sl) +3 +``` \ No newline at end of file diff --git a/docs/User_Guide/Structure_Generation/adsorption.md b/docs/User_Guide/Structure_Generation/adsorption.md new file mode 100644 index 00000000..a20bfd57 --- /dev/null +++ b/docs/User_Guide/Structure_Generation/adsorption.md @@ -0,0 +1,180 @@ +![Adsorption Figure](../../img/struct_gen_figs/adsorption.png){ align=right } +Tools within +[`autocat.adsorption`](../../API/Structure_Generation/adsorption.md) +are geared towards generating structures with adsorbates placed on +a candidate catalyst surface. + +The core function of this module is +[`generate_adsorbed_structures`](../../API/Structure_Generation/adsorption.md#autocat.adsorption.generate_adsorbed_structures) + for generating multiple adsorbed structures with a single function call. + +For the oxygen reduction (ORR) and nitrogen reduction (NRR) reactions, +AutoCat has default starting geometries for all of these intermediates +which can be found in [`autocat.data.intermediates`](../Data/intermediates.md). + +In addition, by default initial heights of the adsorbates are guessed based +upon the vdW radii of the nearest neighbors to the anchoring atom. + +In the example below we are generating adsorption structures for all ORR intermediates +on all of the identified unique symmetry sites on a Pt111 slab. The unique sites are +identified using the Delaunay triangulation, as implemented in `pymatgen`. +Additionally, by default initial heights of the adsorbates are guessed based +upon the vdW radii of the nearest neighbors to the anchoring atom. + +```py +>>> from autocat.surface import generate_surface_structures +>>> from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES +>>> from autocat.adsorption import generate_adsorbed_structures +>>> surface_dict = generate_surface_structures( +... species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2 +... ) +>>> surface = surface_dict["Pt"]["fcc111"]["structure"] +>>> ads_dict = generate_adsorbed_structures( +... surface=surface, +... use_all_sites=True, +... ads=ORR_INTERMEDIATE_NAMES, +... write_to_disk=True, +... ) +Structure with OOH adsorbed at ontop/0.0_0.0 written to ./adsorbates/OOH/ontop/0.0_0.0/input.traj +Structure with OOH adsorbed at bridge/8.316_2.4 written to ./adsorbates/OOH/bridge/8.316_2.4/input.traj +Structure with OOH adsorbed at hollow/8.316_1.6 written to ./adsorbates/OOH/hollow/8.316_1.6/input.traj +Structure with OOH adsorbed at hollow/5.544_3.201 written to ./adsorbates/OOH/hollow/5.544_3.201/input.traj +Structure with O adsorbed at ontop/0.0_0.0 written to ./adsorbates/O/ontop/0.0_0.0/input.traj +Structure with O adsorbed at bridge/8.316_2.4 written to ./adsorbates/O/bridge/8.316_2.4/input.traj +Structure with O adsorbed at hollow/8.316_1.6 written to ./adsorbates/O/hollow/8.316_1.6/input.traj +Structure with O adsorbed at hollow/5.544_3.201 written to ./adsorbates/O/hollow/5.544_3.201/input.traj +Structure with OH adsorbed at ontop/0.0_0.0 written to ./adsorbates/OH/ontop/0.0_0.0/input.traj +Structure with OH adsorbed at bridge/8.316_2.4 written to ./adsorbates/OH/bridge/8.316_2.4/input.traj +Structure with OH adsorbed at hollow/8.316_1.6 written to ./adsorbates/OH/hollow/8.316_1.6/input.traj +Structure with OH adsorbed at hollow/5.544_3.201 written to ./adsorbates/OH/hollow/5.544_3.201/input.traj +>>> ads_dict +{'OOH': {'ontop': {'0.0_0.0': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OOH/ontop/0.0_0.0/input.traj'}}, + 'bridge': {'7.623_6.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OOH/bridge/7.623_6.001/input.traj'}}, + 'hollow': {'6.93_5.601': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OOH/hollow/6.93_5.601/input.traj'}, + '9.702_4.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OOH/hollow/9.702_4.001/input.traj'}}}, + 'O': {'ontop': {'0.0_0.0': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/O/ontop/0.0_0.0/input.traj'}}, + 'bridge': {'7.623_6.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/O/bridge/7.623_6.001/input.traj'}}, + 'hollow': {'6.93_5.601': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/O/hollow/6.93_5.601/input.traj'}, + '9.702_4.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/O/hollow/9.702_4.001/input.traj'}}}, + 'OH': {'ontop': {'0.0_0.0': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OH/ontop/0.0_0.0/input.traj'}}, + 'bridge': {'7.623_6.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OH/bridge/7.623_6.001/input.traj'}}, + 'hollow': {'6.93_5.601': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OH/hollow/6.93_5.601/input.traj'}, + '9.702_4.001': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/OH/hollow/9.702_4.001/input.traj'}}}, +``` + +In general the dictionary generated has the following organization: + +``` +{ADSORBATE_SPECIES: + {SITE_LABEL: + {XY: {"structure": Atoms, "traj_file_path": TRAJFILEPATH}}}, +``` +When writing these adsorbed structures to disk it is done with the following subdirectory +format (mimicing the organization of the dictionary). + +``` +. +├── adsorbates +│   ├── O +│   │   ├── bridge +│   │   │   └── 7.623_6.001 +│   │   │   └── input.traj +│   │   ├── hollow +│   │   │   ├── 6.93_5.601 +│   │   │   │   └── input.traj +│   │   │   └── 9.702_4.001 +│   │   │   └── input.traj +│   │   └── ontop +│   │   └── 0.0_0.0 +│   │   └── input.traj +│   ├── OH +│   │   ├── bridge +│   │   │   └── 7.623_6.001 +│   │   │   └── input.traj +│   │   ├── hollow +│   │   │   ├── 6.93_5.601 +│   │   │   │   └── input.traj +│   │   │   └── 9.702_4.001 +│   │   │   └── input.traj +│   │   └── ontop +│   │   └── 0.0_0.0 +│   │   └── input.traj +│   └── OOH +│   ├── bridge +│   │   └── 7.623_6.001 +│   │   └── input.traj +│   ├── hollow +│   │   ├── 6.93_5.601 +│   │   │   └── input.traj +│   │   └── 9.702_4.001 +│   │   └── input.traj +│   └── ontop +│   └── 0.0_0.0 +│   └── input.traj +``` + +Instead of generating the adsorption structures for all unique sites, +the xy coordinates of individual sites may be specified using the `adsorption_sites` + parameter. Here we can give each of these sites custom labels to be used for referencing + and writing to disk. + +```py +>>> from autocat.surface import generate_surface_structures +>>> from autocat.adsorption import generate_adsorbed_structures +>>> surface_dict = generate_surface_structures( +... species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2 +... ) +>>> surface = surface_dict["Pt"]["fcc111"]["structure"] +>>> x = surface[15].x +>>> x +4.1577878733769 +>>> y = surface[15].y +>>> y +5.6011665451642 +>>> sites = {"Li": {"custom": [(x,y)]}} +>>> ads_dict = generate_adsorbed_structures( +... surface=surface, +... adsorbates=["Li"], +... use_all_sites=False, +... adsorption_sites=site, +... write_to_disk=True, +... ) +Structure with Li adsorbed at custom/4.158_5.601 written to ./adsorbates/Li/custom/4.158_5.601/input.traj +>>> ads_dict +{'Li': {'custom': {'4.158_5.601': {'structure': Atoms(...), + 'traj_file_path': './adsorbates/Li/custom/4.158_5.601/input.traj'}}}} +``` + +If we are dealing with multiple adsorbates, adsorption sites, heights, etc.. that we want to +treat differently depending on the combination, we can leverage the `dict` option for each of these +inputs. The example below illustrates this capability, where can be used to specify adsorbates. + +```py +>>> from autocat.surface import generate_surface_structures +>>> from autocat.adsorption import generate_adsorbed_structures +>>> surface_dict = generate_surface_structures( +... species_list=["Pt"], facets={"Pt": ["111"]}, n_fixed_layers=2 +... ) +>>> surface = surface_dict["Pt"]["fcc111"]["structure"] +>>> sites = {"Li": {"origin": [(0.,0.)]}, "H": {"custom": [(0.5, 0.5)]}} +>>> ads_dict = generate_adsorbed_structures( +... surface=surface, +... adsorbates=["Li", "H", "N"], +... use_all_sites={"Li": False, "H": False, "N": True}, +... heights={"H": 1.2} +... adsorption_sites=sites, +... write_to_disk=True, +... ) +``` \ No newline at end of file diff --git a/docs/User_Guide/Structure_Generation/bulk.md b/docs/User_Guide/Structure_Generation/bulk.md new file mode 100644 index 00000000..ad95cce1 --- /dev/null +++ b/docs/User_Guide/Structure_Generation/bulk.md @@ -0,0 +1,45 @@ +[`autocat.bulk`](../../API/Structure_Generation/bulk.md) +provides tools to automatically generate mono-element +bulk structures. These are structures containing only a single +chemical species with no vacuum and 3D periodicity. + +Multiple of these systems can be generated and written to +disk via a single call of +[`generate_bulk_structures`](../../API/Structure_Generation/bulk.md#autocat.bulk.generate_bulk_structures). + +``` py +>>> from autocat.bulk import generate_bulk_structures +>>> bulk_dict = generate_bulk_structures(["Pt", "Fe", "Ru"], write_to_disk=True) +Pt_bulk_fcc structure written to ./Pt_bulk_fcc/input.traj +Fe_bulk_bcc structure written to ./Fe_bulk_bcc/input.traj +Ru_bulk_hcp structure written to ./Ru_bulk_hcp/input.traj +>>> bulk_dict +{'Pt': {'crystal_structure': Atoms(...), + 'traj_file_path': './Pt_bulk_fcc/input.traj'}, + 'Fe': {'crystal_structure': Atoms(...), + 'traj_file_path': './Fe_bulk_bcc/input.traj'}, + 'Ru': {'crystal_structure': Atoms(...), + 'traj_file_path': './Ru_bulk_hcp/input.traj'}} +``` + +In general the following structure of the resulting dict is generated: + +`{SPECIES: {"crystal_structure": Atoms, "traj_file_path": TRAJFILEPATH}}` + +If writing to disk structures to disk via +`#!python write_to_disk=True`, +then the following directory structure then a similar organization is maintained on the disk: + +``` +. +├── Fe_bulk_bcc +│   └── input.traj +├── Pt_bulk_fcc +│   └── input.traj +├── Ru_bulk_hcp +│   └── input.traj +``` +where each `input.traj` contains the bulk structure. + +**N.B.** by default initial magnetic moments will be set for Fe, Co, and Ni, otherwise no spin +will be given diff --git a/docs/User_Guide/Structure_Generation/saa.md b/docs/User_Guide/Structure_Generation/saa.md new file mode 100644 index 00000000..aec3e32b --- /dev/null +++ b/docs/User_Guide/Structure_Generation/saa.md @@ -0,0 +1,65 @@ +![SAA Figure](../../img/struct_gen_figs/saa.png){ align=right } +Single atom alloys (SAA) consist of a transition-metal host +with lone dopant atoms embedded at the surface. This +dispersion leads to unique electronic properties. + +With the [`autocat.saa`](../../API/Structure_Generation/saa.md) +module, we can generate structures of these +systems to study them further. The main function for this purpose +is [`generate_saa_structures`](../../API/Structure_Generation/saa.md#autocat.saa.generate_saa_structures) +where multiple SAA structures can +be generated simultaneously. + +```py +>>> from autocat.saa import generate_saa_structures +>>> saa_dict = generate_saa_structures( +... host_species=["Fe", "Cu"], +... dopant_species=["Pt", "Au"], +... facets={"Fe": ["110"], "Cu": ["111"]}, +... n_fixed_layers=2, +... write_to_disk=True, +... ) +Pt1/Fe(bcc110) structure written to ./Fe/Pt/bcc110/substrate/input.traj +Au1/Fe(bcc110) structure written to ./Fe/Au/bcc110/substrate/input.traj +Pt1/Cu(fcc111) structure written to ./Cu/Pt/fcc111/substrate/input.traj +Au1/Cu(fcc111) structure written to ./Cu/Au/fcc111/substrate/input.traj +>>> saa_dict +{'Fe': {'Pt': {'bcc110': {'structure': Atoms(...), + 'traj_file_path': './Fe/Pt/bcc110/substrate/input.traj'}}, + 'Au': {'bcc110': {'structure': Atoms(...), + 'traj_file_path': './Fe/Au/bcc110/substrate/input.traj'}}}, + 'Cu': {'Pt': {'fcc111': {'structure': Atoms(...), + 'traj_file_path': './Cu/Pt/fcc111/substrate/input.traj'}}, + 'Au': {'fcc111': {'structure': Atoms(...), + 'traj_file_path': './Cu/Au/fcc111/substrate/input.traj'}}}} +``` +Here we generated SAA slabs with Fe and Cu as hosts and Pt and Au dopants under the following conditions: + +- for Fe (Cu) we only need the 110 (111) facet +- the bottom 2 layers are held fixed + +When writing to disk the following directory structure is used: +``` +. +├── Cu +│   ├── Au +│   │   └── fcc111 +│   │   └── substrate +│   │   └── input.traj +│   └── Pt +│   └── fcc111 +│   └── substrate +│   └── input.traj +├── Fe +│   ├── Au +│   │   └── bcc110 +│   │   └── substrate +│   │   └── input.traj +│   └── Pt +│   └── bcc110 +│   └── substrate +│   └── input.traj +``` + +**N.B.** by default, initial magnetic moments are given to the dopant species based upon +the ground state magnetic moment of the species diff --git a/docs/User_Guide/Structure_Generation/surface.md b/docs/User_Guide/Structure_Generation/surface.md new file mode 100644 index 00000000..6788082c --- /dev/null +++ b/docs/User_Guide/Structure_Generation/surface.md @@ -0,0 +1,71 @@ +![Surface Figure](../../img/struct_gen_figs/surface.png){ align=right } +It is crucial for many heterogeneous catalysis studies to be +able to model a catalyst surface where the desired reaction +can take place. +[`autocat.surface`](../../API/Structure_Generation/surface.md) +provides tools for generating +low miller index surfaces for mono-element surfaces with a vacuum +in the $z$-direction. + +The core function of this module is +[`generate_surface_structures`](../../API/Structure_Generation/surface.md#autocat.surface.generate_surface_structures) +where multiple slabs can be generated at once. + +```py +>>> from autocat.surface import generate_surface_structures +>>> surf_dict = generate_surface_structures( +... ["Li", "Cu"], +... facets={"Li": ["110"]}, +... supercell_dim=[5, 5, 4], +... n_fixed_layers=2, +... default_lat_param_lib="beefvdw_fd", +... write_to_disk=True, +... ) +Li_bcc110 structure written to ./Li/bcc110/substrate/input.traj +Cu_fcc100 structure written to ./Cu/fcc100/substrate/input.traj +Cu_fcc111 structure written to ./Cu/fcc111/substrate/input.traj +Cu_fcc110 structure written to ./Cu/fcc110/substrate/input.traj +>>> surf_dict +{'Li': {'bcc110': {'structure': Atoms(...), + 'traj_file_path': './Li/bcc110/substrate/input.traj'}}, + 'Cu': {'fcc100': {'structure': Atoms(...), + 'traj_file_path': './Cu/fcc100/substrate/input.traj'}, + 'fcc111': {'structure': Atoms(...), + 'traj_file_path': './Cu/fcc111/substrate/input.traj'}, + 'fcc110': {'structure': Atoms(...), + 'traj_file_path': './Cu/fcc110/substrate/input.traj'}}} +``` +Here we generated surface slabs for Cu and Li under the following conditions: + +- for Li we only need the 110 facet +- generate all default facets for Cu + * fcc/bcc: ["100", "110", "111"] + * hcp: ["0001"] +- the supercell dimensions of the slabs are 5 $\times$ 5 $\times$ 4 +- the bottom 2 layers are held fixed +- for structures where the lattice parameter is not explicitly specified, +their default values are pulled from the +[`autocat.data.lattice_parameters`](../Data/lattice_parameters.md) +library that used a BEEF-vdW XC and finite difference basis set + +When using the `write_to_disk` functionality the structures +will be written into the following directory structure: + +``` +. +├── Cu +│   ├── fcc100 +│   │   └── substrate +│   │   └── input.traj +│   ├── fcc110 +│   │   └── substrate +│   │   └── input.traj +│   └── fcc111 +│   └── substrate +│   └── input.traj +├── Li +│   └── bcc110 +│   └── substrate +│   └── input.traj +``` +**N.B.** by default, initial magnetic moments are given to Fe, Ni and Co diff --git a/docs/img/autocat_icon.png b/docs/img/autocat_icon.png new file mode 100644 index 00000000..df146db3 Binary files /dev/null and b/docs/img/autocat_icon.png differ diff --git a/docs/img/autocat_logo.png b/docs/img/autocat_logo.png new file mode 100644 index 00000000..49ec457c Binary files /dev/null and b/docs/img/autocat_logo.png differ diff --git a/docs/img/struct_gen_figs/adsorption.png b/docs/img/struct_gen_figs/adsorption.png new file mode 100644 index 00000000..5d2de928 Binary files /dev/null and b/docs/img/struct_gen_figs/adsorption.png differ diff --git a/docs/img/struct_gen_figs/saa.png b/docs/img/struct_gen_figs/saa.png new file mode 100644 index 00000000..2234d3e5 Binary files /dev/null and b/docs/img/struct_gen_figs/saa.png differ diff --git a/docs/img/struct_gen_figs/surface.png b/docs/img/struct_gen_figs/surface.png new file mode 100644 index 00000000..62e5157c Binary files /dev/null and b/docs/img/struct_gen_figs/surface.png differ diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js new file mode 100644 index 00000000..5bf8e9aa --- /dev/null +++ b/docs/javascripts/mathjax.js @@ -0,0 +1,17 @@ +window.MathJax = { + tex: { + inlineMath: [["\\(", "\\)"]], + displayMath: [["\\[", "\\]"]], + processEscapes: true, + processEnvironments: true + }, + options: { + ignoreHtmlClass: ".*|", + processHtmlClass: "arithmatex" + } +}; + +document$.subscribe(() => { // + + MathJax.typesetPromise() +}) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index c3592594..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=src -set BUILDDIR=_build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/src/conf.py b/docs/src/conf.py deleted file mode 100644 index a182ecc6..00000000 --- a/docs/src/conf.py +++ /dev/null @@ -1,55 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -sys.path.insert(0, os.path.abspath("../src/autocat")) - - -# -- Project information ----------------------------------------------------- - -project = "autocat" -copyright = "2020, Lance Kavalsky" -author = "Lance Kavalsky" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "bizstyle" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] diff --git a/docs/src/index.rst b/docs/src/index.rst deleted file mode 100644 index b5c77c5f..00000000 --- a/docs/src/index.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. autocat documentation master file, created by - sphinx-quickstart on Tue Nov 24 18:52:19 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to autocat's documentation! -=================================== - -Tools for automated structure generation of catalyst systems. - -.. toctree:: - :maxdepth: 2 - :hidden: - - module_reference/index - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/src/module_reference/adsorption.rst b/docs/src/module_reference/adsorption.rst deleted file mode 100644 index 09813f40..00000000 --- a/docs/src/module_reference/adsorption.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _sec-adsorption: - -autocat.adsorption -++++++++++++++++++ - -Tools for automating adsorption on a given surface. - -.. automodule:: autocat.adsorption - :members: - :undoc-members: diff --git a/docs/src/module_reference/bulk.rst b/docs/src/module_reference/bulk.rst deleted file mode 100644 index dcdf8ef3..00000000 --- a/docs/src/module_reference/bulk.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _sec-bulk: - -autocat.bulk -++++++++++++ - -Tools for automating the generation of bulk mono-elemental systems - -.. automodule:: autocat.bulk - :members: - :undoc-members: diff --git a/docs/src/module_reference/index.rst b/docs/src/module_reference/index.rst deleted file mode 100644 index 7a6ab439..00000000 --- a/docs/src/module_reference/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. _sec-module-reference: - -Module reference -++++++++++++++++ - -.. toctree:: - :maxdepth: 3 - :caption: Contents - - bulk - surface - saa - mpea - adsorption diff --git a/docs/src/module_reference/mpea.rst b/docs/src/module_reference/mpea.rst deleted file mode 100644 index 47e571f4..00000000 --- a/docs/src/module_reference/mpea.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _sec-mpea: - -autocat.mpea -++++++++++++ - -Tools for automating the generation of Multi-Principal Element Alloys (a.k.a high-entropy alloys) - -.. automodule:: autocat.mpea - :members: - :undoc-members: diff --git a/docs/src/module_reference/saa.rst b/docs/src/module_reference/saa.rst deleted file mode 100644 index a1b0396f..00000000 --- a/docs/src/module_reference/saa.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _sec-saa: - -autocat.saa -+++++++++++ - -Tools for generating single-atom alloy structures - -.. automodule:: autocat.saa - :members: - :undoc-members: diff --git a/docs/src/module_reference/surface.rst b/docs/src/module_reference/surface.rst deleted file mode 100644 index c5963b52..00000000 --- a/docs/src/module_reference/surface.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _sec-surface: - -autocat.surface -+++++++++++++++ - -Tools for automatically generating mono-elemental slabs - -.. automodule:: autocat.surface - :members: - :undoc-members: diff --git a/examples/adsorbing_molecules_on_surfaces.ipynb b/examples/adsorbing_molecules_on_surfaces.ipynb new file mode 100644 index 00000000..9e295aab --- /dev/null +++ b/examples/adsorbing_molecules_on_surfaces.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 36, + "id": "eaee58ba", + "metadata": {}, + "outputs": [], + "source": [ + "from autocat.surface import generate_surface_structures\n", + "\n", + "from autocat.adsorption import generate_adsorbed_structures\n", + "from autocat.adsorption import generate_molecule\n", + "\n", + "from autocat.data.intermediates import ORR_INTERMEDIATE_NAMES" + ] + }, + { + "cell_type": "markdown", + "id": "f89805bd", + "metadata": {}, + "source": [ + "In this example we show how to use `AutoCat` to generate adsorption structures given a surface structure" + ] + }, + { + "cell_type": "markdown", + "id": "69d3233d", + "metadata": {}, + "source": [ + "# Generating Reaction Structures" + ] + }, + { + "cell_type": "markdown", + "id": "ccd545c4", + "metadata": {}, + "source": [ + "Let's start by making a `Pt111` slab for demonstration purposes. But in general this can be any surface you'd like as long as you have it in the form of an `ase.Atoms` object or written to disk in an `ase` readable format." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6711809e", + "metadata": {}, + "outputs": [], + "source": [ + "slab_dictionary = generate_surface_structures(\n", + " species_list = [\"Pt\"],\n", + " facets = {\"Pt\": [\"111\"]},\n", + " n_fixed_layers = 2\n", + ")\n", + "\n", + "clean_slab = slab_dictionary[\"Pt\"][\"fcc111\"][\"structure\"]" + ] + }, + { + "cell_type": "markdown", + "id": "e5bc6f7e", + "metadata": {}, + "source": [ + "Now that we have our clean slab, we can start adsorbing molecules onto the surface." + ] + }, + { + "cell_type": "markdown", + "id": "75ab5a3d", + "metadata": {}, + "source": [ + "To fully characterize this surface for its activity toward evolving hydrogen (HER), we'd need to adsorb `H` onto every symmetry site of the surface. As the choice of `Pt111` was arbitrary, the function demo'd here works for any surface (through `pymatgen`'s implementation of Delaunay Triangulation)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d48cb62", + "metadata": {}, + "outputs": [], + "source": [ + "h_adsorption_structure_dictionary = generate_adsorbed_structures(\n", + " surface=clean_slab,\n", + " use_all_sites=True, # to consider all identified sites\n", + " adsorbates=[\"H\"],\n", + " height={\"H\" : 1.5}, # manually specify height. default guess based on covalent radii of nearest neighbors\n", + " write_to_disk = False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e7e1e3ce", + "metadata": {}, + "source": [ + "This will generate a dictionary of all generated adsorption structures with the following structure:\n", + "\n", + "- Adsorbate Names\n", + " - Symmetry Site types (ie. hollow, ontop, bridge) or Custom Label\n", + " - `x-y` coordinate of each site\n", + " - `ase.Atoms` structure\n", + " - Path to structure file (in the `ase.traj` format)" + ] + }, + { + "cell_type": "markdown", + "id": "830e6d48", + "metadata": {}, + "source": [ + "Here we have all three types of symmetry sites present" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "6afb4a44", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['ontop', 'bridge', 'hollow'])\n" + ] + } + ], + "source": [ + "print(h_adsorption_structure_dictionary[\"H\"].keys())" + ] + }, + { + "cell_type": "markdown", + "id": "70b5335a", + "metadata": {}, + "source": [ + "And we can confirm that it identified both hollow sites:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "e846d5a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['6.93_5.601', '9.702_4.001'])\n" + ] + } + ], + "source": [ + "print(h_adsorption_structure_dictionary[\"H\"][\"hollow\"].keys())" + ] + }, + { + "cell_type": "markdown", + "id": "5c3bc7ac", + "metadata": {}, + "source": [ + "Instead of exhaustively considering all sites, it can be restricted to specific types via `site_types`. \n", + "\n", + "Or alternatively, if we want to consider only manually specified sites, that can be done via `adsorption_sites`. When specifying the sites manually in this way, we need to provide them as a dictionary with keys as to how we'd like the site labelled. This is solely used for organizing the output dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "eae177e2", + "metadata": {}, + "outputs": [], + "source": [ + "h_manual_adsorption_structure_dictionary = generate_adsorbed_structures(\n", + " surface=clean_slab,\n", + " use_all_sites=False,\n", + " adsorbates=[\"H\"],\n", + " adsorption_sites={\"custom\": [(0.,0.)]},\n", + " write_to_disk = False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "7eeddd64", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['custom'])\n", + "dict_keys(['0.0_0.0'])\n" + ] + } + ], + "source": [ + "print(h_manual_adsorption_structure_dictionary[\"H\"].keys())\n", + "print(h_manual_adsorption_structure_dictionary[\"H\"][\"custom\"].keys())" + ] + }, + { + "cell_type": "markdown", + "id": "13e24624", + "metadata": {}, + "source": [ + "`AutoCat` also has some defaults for generating structures for considering the Oxygen Reduction/Evolution Reaction as well as Nitrogen Reduction. These can be found within `autocat.data.intermediates`. Let's generate the ORR adsorption structures on this slab as an example." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "dd3d8ea1", + "metadata": {}, + "outputs": [], + "source": [ + "orr_adsorption_structure_dictionary = generate_adsorbed_structures(\n", + " surface=clean_slab,\n", + " use_all_sites = True, # to consider all identified sites (can also manually specify via `sites`)\n", + " adsorbates=ORR_INTERMEDIATE_NAMES,\n", + " write_to_disk = False\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "da19d412", + "metadata": {}, + "source": [ + "This places all of the relevant adsorbate molecules at all of the identified sites." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "78748e9b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['OOH', 'O', 'OH', 'references'])\n" + ] + } + ], + "source": [ + "print(orr_adsorption_structure_dictionary.keys())" + ] + }, + { + "cell_type": "markdown", + "id": "51561b67", + "metadata": {}, + "source": [ + "It's important to note that if you already have the adsorbate molecule you'd like to consider as an `ase.Atoms` object, that can be supplied as well via a `dict`. We are going to use `autocat.adsorption.generate_molecule_object` to generate an example, but this can be anything (e.g. an `*.sdf` read by `ase.io.read`)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7be25d8e", + "metadata": {}, + "outputs": [], + "source": [ + "nh2_mol = generate_molecule(\"NH2\")[\"NH2\"].get(\"structure\")\n", + "\n", + "nh2_adsorption_structure_dictionary = generate_adsorbed_structures(\n", + " surface=clean_slab,\n", + " use_all_sites = True,\n", + " adsorbates = {\"NH2\": nh2_mol},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "99a12fe8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['H2N'])\n" + ] + } + ], + "source": [ + "print(nh2_adsorption_structure_dictionary.keys())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/generating_surface_slabs.ipynb b/examples/generating_surface_slabs.ipynb new file mode 100644 index 00000000..2cd86a3b --- /dev/null +++ b/examples/generating_surface_slabs.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "319554e9", + "metadata": {}, + "outputs": [], + "source": [ + "from autocat.surface import generate_surface_structures" + ] + }, + { + "cell_type": "markdown", + "id": "3bbcebd9", + "metadata": {}, + "source": [ + "In this tutorial we show how to generate slabs using `AutoCat`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6d42c7dc", + "metadata": {}, + "outputs": [], + "source": [ + "slab_dictionary = generate_surface_structures(\n", + " species_list = [\"Pt\", \"Fe\", \"Ru\"],\n", + " facets = {\"Pt\": [\"100\"], \"Fe\": [\"111\"]}, # If we want to specify only specific facets\n", + " supercell_dim = (2,2,5), # dimensions of the supercell\n", + " default_lat_param_lib = \"pbe_pw\", # where default lattice parameters are pulled from\n", + " vacuum = 10.,\n", + " n_fixed_layers = 3, # fixes bottom 3 layers\n", + " write_to_disk = False # if we want to write the slabs to disk in the AutoCat directory format\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1818f3f0", + "metadata": {}, + "source": [ + "This generates a dictionary containing the structures of the slabs. The organization of this dictionary is as follows:\n", + "\n", + "- Species\n", + " - Crystal Structure/Facet\n", + " - `ase.Atoms` structure\n", + " - Path to structure file (in the `ase.traj` format)" + ] + }, + { + "cell_type": "markdown", + "id": "d49b437a", + "metadata": {}, + "source": [ + "Thus, going layer by layer for this example, the first keys correspond to:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91f530b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['Pt', 'Fe', 'Ru'])\n" + ] + } + ], + "source": [ + "print(slab_dictionary.keys())" + ] + }, + { + "cell_type": "markdown", + "id": "7c4a89fd", + "metadata": {}, + "source": [ + "Continuing down `Pt` for example, the next level is then:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "17fb6812", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['fcc100'])\n" + ] + } + ], + "source": [ + "print(slab_dictionary[\"Pt\"].keys())" + ] + }, + { + "cell_type": "markdown", + "id": "8521948b", + "metadata": {}, + "source": [ + "Going down another level, we get both the `ase.Atoms` structure object as well as the file location. Since we didn't write to disk, the latter returns `None`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "db7b9248", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Atoms(symbols='Pt20', pbc=[True, True, False], cell=[5.612606335552851, 5.612606335552851, 27.937424], tags=..., constraint=FixAtoms(indices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]))\n" + ] + } + ], + "source": [ + "print(slab_dictionary[\"Pt\"][\"fcc100\"][\"structure\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f9d94169", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n" + ] + } + ], + "source": [ + "print(slab_dictionary[\"Pt\"][\"fcc100\"][\"traj_file_path\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/learning/conducting_simulated_sl_search.ipynb b/examples/learning/conducting_simulated_sl_search.ipynb new file mode 100644 index 00000000..c96507e4 --- /dev/null +++ b/examples/learning/conducting_simulated_sl_search.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "\n", + "from matminer.featurizers.composition import ElementProperty\n", + "\n", + "from autocat.saa import generate_saa_structures\n", + "\n", + "from autocat.utils import extract_structures\n", + "\n", + "from autocat.learning.sequential import DesignSpace\n", + "from autocat.learning.sequential import simulated_sequential_learning" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we show how to conduct a simulated sequential learning run." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "saa_dictionary = generate_saa_structures(\n", + " host_species=[\"Cu\", \"Au\", \"Fe\", \"Ag\", \"Ti\"],\n", + " dopant_species=[\"Pt\", \"Pd\", \"Co\", \"Ni\"],\n", + " facets={\"Cu\": [\"111\"], \"Au\": [\"111\"], \"Fe\": [\"110\"], \"Ag\": [\"111\"], \"Ti\": [\"0001\"]}\n", + ")\n", + "\n", + "saa_structures = extract_structures(saa_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "labels = np.random.randint(-15,15,size=len(saa_structures))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "saa_design_space = DesignSpace(\n", + " design_space_structures=saa_structures,\n", + " design_space_labels=labels\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sequential Learning Iteration #1\n", + "Sequential Learning Iteration #2\n", + "Sequential Learning Iteration #3\n", + "Sequential Learning Iteration #4\n", + "Sequential Learning Iteration #5\n" + ] + } + ], + "source": [ + "sl_history = simulated_sequential_learning(\n", + " full_design_space=saa_design_space,\n", + " init_training_size=2,\n", + " predictor_kwargs={\n", + " \"featurizer_class\": ElementProperty, \n", + " \"featurization_kwargs\":{\"preset\": \"magpie\"}, \n", + " \"model_class\": GaussianProcessRegressor\n", + " },\n", + " candidate_selection_kwargs={\"aq\": \"MU\", \"include_hhi\": True},\n", + " number_of_sl_loops=5\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------------------------------+--------------------+\n", + "| | Sequential Learner |\n", + "+----------------------------------+--------------------+\n", + "| iteration count | 6 |\n", + "| next candidate system structures | ['PdTi35'] |\n", + "| next candidate system indices | [17] |\n", + "| acquisition function | MU |\n", + "| # of candidates to pick | 1 |\n", + "| target maximum | None |\n", + "| target minimum | None |\n", + "| include hhi? | True |\n", + "| include segregation energies? | False |\n", + "+----------------------------------+--------------------+\n", + "+-------------------------+--------------------------------------------------------+\n", + "| | DesignSpace |\n", + "+-------------------------+--------------------------------------------------------+\n", + "| total # of systems | 20 |\n", + "| # of unlabelled systems | 13 |\n", + "| unique species present | ['Cu', 'Pt', 'Pd', 'Co', 'Ni', 'Au', 'Fe', 'Ag', 'Ti'] |\n", + "| maximum label | nan |\n", + "| minimum label | nan |\n", + "+-------------------------+--------------------------------------------------------+\n", + "+---------+--------------------------------------------------------+\n", + "| | Predictor |\n", + "+---------+--------------------------------------------------------+\n", + "| class | sklearn.gaussian_process._gpr.GaussianProcessRegressor |\n", + "| kwargs | None |\n", + "| is fit? | True |\n", + "+---------+--------------------------------------------------------+\n", + "+-----------------------------------+------------------------------------------------------------+\n", + "| | Featurizer |\n", + "+-----------------------------------+------------------------------------------------------------+\n", + "| class | matminer.featurizers.composition.composite.ElementProperty |\n", + "| kwargs | None |\n", + "| species list | ['Ti', 'Fe', 'Co', 'Ni', 'Pt', 'Pd', 'Au', 'Ag', 'Cu'] |\n", + "| maximum structure size | 36 |\n", + "| preset | magpie |\n", + "| design space structures provided? | True |\n", + "+-----------------------------------+------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "print(sl_history)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5" + }, + "kernelspec": { + "display_name": "Python 3.9.10 ('autocat39')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/learning/defining_a_design_space.ipynb b/examples/learning/defining_a_design_space.ipynb new file mode 100644 index 00000000..c96d9f2b --- /dev/null +++ b/examples/learning/defining_a_design_space.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from autocat.saa import generate_saa_structures\n", + "\n", + "from autocat.utils import extract_structures\n", + "\n", + "from autocat.learning.sequential import DesignSpace" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we show how to define a `DesignSpace` containing single-atom alloys and corresponding labels.\n", + "\n", + "The data is as follows:\n", + "\n", + "| SAA | Label |\n", + "| --- | --- |\n", + "| Ni1/Cu111 | -0.3 |\n", + "| Ni1/Au111 | Unknown |\n", + "| Pd1/Cu111 | 0.2 |\n", + "| Pd1/Au111 | -0.1 |" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Create single-atom alloy structures\n", + "saa_dictionary = generate_saa_structures(\n", + " host_species=[\"Cu\", \"Au\"],\n", + " dopant_species=[\"Ni\", \"Pd\"],\n", + " n_fixed_layers=2,\n", + " facets={\"Cu\":[\"111\"], \"Au\":[\"111\"]}\n", + ")\n", + "\n", + "saa_structures = extract_structures(saa_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 Cu35Ni\n", + "1 Cu35Pd\n", + "2 Au35Ni\n", + "3 Au35Pd\n" + ] + } + ], + "source": [ + "# Get indices of each structure\n", + "for idx, struct in enumerate(saa_structures):\n", + " print(idx, struct.get_chemical_formula())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate labels in the correct ordering as per above\n", + " # Ni1Cu Pd1Cu Ni1Au Pd1Au\n", + "labels = np.array([-0.3, 0.2, np.nan, -0.1])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-------------------------+--------------------------+\n", + "| | DesignSpace |\n", + "+-------------------------+--------------------------+\n", + "| total # of systems | 4 |\n", + "| # of unlabelled systems | 1 |\n", + "| unique species present | ['Cu', 'Ni', 'Pd', 'Au'] |\n", + "| maximum label | 0.2 |\n", + "| minimum label | -0.3 |\n", + "+-------------------------+--------------------------+\n" + ] + } + ], + "source": [ + "# Define the design space\n", + "saa_design_space = DesignSpace(design_space_structures=saa_structures, design_space_labels=labels)\n", + "\n", + "print(saa_design_space)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5" + }, + "kernelspec": { + "display_name": "Python 3.9.10 ('autocat39')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/learning/featurizing_structures.ipynb b/examples/learning/featurizing_structures.ipynb new file mode 100644 index 00000000..c0674baf --- /dev/null +++ b/examples/learning/featurizing_structures.ipynb @@ -0,0 +1,236 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autocat.surface import generate_surface_structures\n", + "from autocat.saa import generate_saa_structures\n", + "\n", + "from autocat.utils import extract_structures\n", + "\n", + "from autocat.learning.featurizers import Featurizer\n", + "\n", + "from dscribe.descriptors import CoulombMatrix\n", + "from matminer.featurizers.composition import ElementProperty" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we show how to use `AutoCat` to featurize structures with the `Featurizer` class.\n", + "\n", + "Here we will be featurizing mono-elemental surfaces." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate structures to be featurized\n", + "mono_surfaces_dictionary = generate_surface_structures(\n", + " species_list=[\"Fe\", \"Ru\", \"Cu\", \"Pd\"],\n", + " facets={\"Fe\": [\"110\"], \"Ru\":[\"0001\"], \"Cu\":[\"111\"], \"Pd\":[\"111\"]}\n", + ")\n", + "mono_surfaces_structures = extract_structures(mono_surfaces_dictionary)\n", + "\n", + "saa_surfaces_dictionary = generate_saa_structures(\n", + " host_species=[\"Cu\", \"Au\"],\n", + " dopant_species=[\"Pt\", \"Pd\"],\n", + " facets={\"Cu\":[\"111\"], \"Au\":[\"111\"]}\n", + ")\n", + "saa_surfaces_structures = extract_structures(saa_surfaces_dictionary)\n", + "\n", + "all_structures = mono_surfaces_structures.copy()\n", + "all_structures.extend(saa_surfaces_structures)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fe36\n" + ] + } + ], + "source": [ + "print(all_structures[0].get_chemical_formula())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------+-------------------------------------------------+\n", + "| | Featurizer |\n", + "+-----------------------------------+-------------------------------------------------+\n", + "| class | dscribe.descriptors.coulombmatrix.CoulombMatrix |\n", + "| kwargs | None |\n", + "| species list | ['Fe', 'Ru', 'Pt', 'Pd', 'Au', 'Cu'] |\n", + "| maximum structure size | 36 |\n", + "| preset | None |\n", + "| design space structures provided? | True |\n", + "+-----------------------------------+-------------------------------------------------+\n" + ] + } + ], + "source": [ + "# Instantiate featurizer based on Coulomb Matrix\n", + "coulomb_featurizer = Featurizer(\n", + " featurizer_class=CoulombMatrix, \n", + " design_space_structures=all_structures\n", + ")\n", + "print(coulomb_featurizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1296,)\n" + ] + } + ], + "source": [ + "# Featurize just Fe\n", + "fe_feature_vector = coulomb_featurizer.featurize_single(all_structures[0])\n", + "print(fe_feature_vector.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8, 1296)\n" + ] + } + ], + "source": [ + "# Featurize all structures into a single matrix\n", + "feature_matrix = coulomb_featurizer.featurize_multiple(all_structures)\n", + "print(feature_matrix.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------------------------+------------------------------------------------------------+\n", + "| | Featurizer |\n", + "+-----------------------------------+------------------------------------------------------------+\n", + "| class | matminer.featurizers.composition.composite.ElementProperty |\n", + "| kwargs | None |\n", + "| species list | ['Fe', 'Ru', 'Pt', 'Pd', 'Au', 'Cu'] |\n", + "| maximum structure size | 36 |\n", + "| preset | matminer |\n", + "| design space structures provided? | True |\n", + "+-----------------------------------+------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "# Instantiate element property featurizer\n", + "element_featurizer = Featurizer(\n", + " featurizer_class=ElementProperty,\n", + " design_space_structures=all_structures,\n", + " preset=\"matminer\"\n", + ")\n", + "\n", + "print(element_featurizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(65,)\n" + ] + } + ], + "source": [ + "# Featurize just Fe\n", + "fe_feature_vector = element_featurizer.featurize_single(all_structures[0])\n", + "print(fe_feature_vector.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8, 65)\n" + ] + } + ], + "source": [ + "# Featurize all structures at once\n", + "feature_matrix = element_featurizer.featurize_multiple(all_structures)\n", + "print(feature_matrix.shape)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5" + }, + "kernelspec": { + "display_name": "Python 3.9.10 ('autocat39')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/learning/making_predictions.ipynb b/examples/learning/making_predictions.ipynb new file mode 100644 index 00000000..cc668ae3 --- /dev/null +++ b/examples/learning/making_predictions.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "from sklearn.gaussian_process.kernels import RBF\n", + "\n", + "from dscribe.descriptors import SineMatrix\n", + "\n", + "from autocat.surface import generate_surface_structures\n", + "from autocat.adsorption import generate_adsorbed_structures\n", + "\n", + "from autocat.utils import extract_structures\n", + "\n", + "from autocat.learning.predictors import Predictor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example we show how to train a `Predictor` and use it to make predictions for adsorbates on Pt." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate adsorption structures\n", + "substrates_dictionary = generate_surface_structures(\n", + " species_list=[\"Pt\"],\n", + " facets={\"Pt\":[\"100\"]}\n", + ")\n", + "\n", + "substrate = extract_structures(substrates_dictionary)[0]\n", + "\n", + "adsorbed_dictionary = generate_adsorbed_structures(\n", + " surface=substrate,\n", + " adsorbates=[\"H\", \"O\", \"N\", \"C\", \"Na\"],\n", + " use_all_sites=False,\n", + ")\n", + "\n", + "adsorbed_structures = extract_structures(adsorbed_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate labels\n", + "# N.B. here they are random for convenience, but should be actual values to train a meaningful `Predictor`\n", + "\n", + "labels = np.random.randint(-10,10,size=len(adsorbed_structures))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------+--------------------------------------------------------+\n", + "| | Predictor |\n", + "+---------+--------------------------------------------------------+\n", + "| class | sklearn.gaussian_process._gpr.GaussianProcessRegressor |\n", + "| kwargs | {'kernel': RBF(length_scale=0.5)} |\n", + "| is fit? | False |\n", + "+---------+--------------------------------------------------------+\n", + "+-----------------------------------+-------------------------------------------+\n", + "| | Featurizer |\n", + "+-----------------------------------+-------------------------------------------+\n", + "| class | dscribe.descriptors.sinematrix.SineMatrix |\n", + "| kwargs | None |\n", + "| species list | ['Na', 'Pt', 'C', 'N', 'O', 'H'] |\n", + "| maximum structure size | 37 |\n", + "| preset | None |\n", + "| design space structures provided? | True |\n", + "+-----------------------------------+-------------------------------------------+\n" + ] + } + ], + "source": [ + "kernel = RBF(0.5)\n", + "\n", + "predictor = Predictor(\n", + " model_class=GaussianProcessRegressor,\n", + " model_kwargs={\"kernel\": kernel},\n", + " featurizer_class=SineMatrix,\n", + " featurization_kwargs={\"design_space_structures\": adsorbed_structures}\n", + ")\n", + "\n", + "print(predictor)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "predictor.fit(\n", + " training_structures=adsorbed_structures,\n", + " y=labels\n", + ")\n", + "\n", + "print(predictor.is_fit)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "test_dictionary = generate_adsorbed_structures(\n", + " surface=substrate,\n", + " adsorbates=[\"S\", \"Li\", \"P\"],\n", + " use_all_sites=False\n", + ")\n", + "\n", + "test_structures = extract_structures(test_dictionary)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3,)\n", + "(3,)\n" + ] + } + ], + "source": [ + "# Make predictions on unseen data\n", + "predictions, uncertainties = predictor.predict(testing_structures=test_structures)\n", + "print(predictions.shape)\n", + "print(uncertainties.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "bbcedd833a666dedd7a02add1ace8ad982a80fa78e4f715016af545a1fb42dd5" + }, + "kernelspec": { + "display_name": "Python 3.9.10 ('autocat39')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..03259b57 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,63 @@ +site_name: AutoCat Documentation +theme: + name: material + features: + - navigation.tabs + - navigation.tabs.sticky + - navigation.sections + - navigation.top + - toc.integrate + logo: img/autocat_icon.png +markdown_extensions: + - pymdownx.highlight + - pymdownx.superfences + - pymdownx.inlinehilite + - pymdownx.arithmatex: + generic: true + - attr_list +extra_javascript: + - javascripts/mathjax.js + - https://polyfill.io/v3/polyfill.min.js?features=es6 + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + +plugins: + - search + - mkdocstrings: + default_handler: python + handlers: + python: + selection: + docstring_style: "numpy" + rendering: + show_source: true + +nav: + - Home: README.md + - User Guide: + - Sequential Learning: + - Featurizers: User_Guide/Learning/featurizers.md + - Predictors: User_Guide/Learning/predictors.md + - Sequential: User_Guide/Learning/sequential.md + - Structure Generation: + - Bulk: User_Guide/Structure_Generation/bulk.md + - Surfaces: User_Guide/Structure_Generation/surface.md + - Adsorption: User_Guide/Structure_Generation/adsorption.md + - Single Atom Alloys: User_Guide/Structure_Generation/saa.md + - Data: + - HHI: User_Guide/Data/hhi.md + - Segregation Energies: User_Guide/Data/segregation_energies.md + - Lattice Parameters: User_Guide/Data/lattice_parameters.md + - Reaction Intermediates: User_Guide/Data/intermediates.md + - Tutorials: + - Training a Predictor on hydrogen adsorption energies: Tutorials/pred_h.md + - Conducting a simulated sequential learning run: Tutorials/sl.md + - API: + - Sequential Learning: + - autocat.learning.featurizers: API/Learning/featurizers.md + - autocat.learning.predictors: API/Learning/predictors.md + - autocat.learning.sequential: API/Learning/sequential.md + - Structure Generation: + - autocat.bulk: API/Structure_Generation/bulk.md + - autocat.surface: API/Structure_Generation/surface.md + - autocat.adsorption: API/Structure_Generation/adsorption.md + - autocat.saa: API/Structure_Generation/saa.md diff --git a/requirements.txt b/requirements.txt index 6d717763..54f8cb3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,7 @@ -numpy==1.22.0 -ase==3.21.1 -pymatgen==2022.0.17 +numpy==1.22.3 +ase==3.22.1 +pymatgen==2022.3.29 fire==0.4.0 +matminer==0.7.3 +dscribe==0.4.0 +prettytable==3.2.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 4ccfeaa0..94e174f0 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,6 @@ ], package_dir={"": "src"}, packages=find_packages(where="src"), - install_requires=["numpy<=1.22.0", "ase", "pymatgen<=2022.0.17", "fire",], + install_requires=["numpy", "ase", "pymatgen", "fire",], include_package_data=True, ) diff --git a/src/autocat/VERSION.txt b/src/autocat/VERSION.txt index b5d4d818..6988b62d 100644 --- a/src/autocat/VERSION.txt +++ b/src/autocat/VERSION.txt @@ -1 +1 @@ -2022.3.31 +2022.5.23 diff --git a/src/autocat/data/hhi/__init__.py b/src/autocat/data/hhi/__init__.py new file mode 100644 index 00000000..7504ee06 --- /dev/null +++ b/src/autocat/data/hhi/__init__.py @@ -0,0 +1,26 @@ +import json +import pkg_resources + +__all__ = ["HHI"] +""" +Values obtained from dx.doi.org/10.1021/cm400893e + +Keys: + production: + Calculated based on elemental production + + reserves: + Calculated based on known elemental reserves +""" + +raw_hhi_p = pkg_resources.resource_filename("autocat.data.hhi", "hhi_p.json") + +with open(raw_hhi_p) as fr: + HHI_PRODUCTION = json.load(fr) + +raw_hhi_r = pkg_resources.resource_filename("autocat.data.hhi", "hhi_r.json") + +with open(raw_hhi_r) as fr: + HHI_RESERVES = json.load(fr) + +HHI = {"production": HHI_PRODUCTION, "reserves": HHI_RESERVES} diff --git a/src/autocat/data/hhi/hhi_p.json b/src/autocat/data/hhi/hhi_p.json new file mode 100644 index 00000000..3681cacf --- /dev/null +++ b/src/autocat/data/hhi/hhi_p.json @@ -0,0 +1,79 @@ +{ + "He": 3200, + "Li": 2900, + "Be": 8000, + "B": 2900, + "C": 500, + "N": 1300, + "O": 500, + "F": 1500, + "Na": 1100, + "Mg": 5300, + "Al": 1600, + "Si": 4700, + "P": 2000, + "S": 700, + "Cl": 1500, + "K": 1700, + "Ca": 3900, + "Sc": 5500, + "Ti": 1100, + "V": 3300, + "Cr": 3100, + "Mn": 1600, + "Fe": 2400, + "Co": 3100, + "Ni": 1000, + "Cu": 1600, + "Zn": 1600, + "Ga": 5500, + "Ge": 5300, + "As": 3300, + "Se": 2200, + "Br": 3300, + "Rb": 6000, + "Sr": 4200, + "Y": 9800, + "Zr": 3400, + "Nb": 8500, + "Mo": 2400, + "Ru": 3200, + "Rh": 3200, + "Pd": 3200, + "Ag": 1200, + "Cd": 1700, + "In": 3300, + "Sn": 2600, + "Sb": 7900, + "Te": 2900, + "I": 4900, + "Cs": 6000, + "Ba": 3000, + "La": 9500, + "Ce": 9500, + "Pr": 9500, + "Nd": 9500, + "Pm": 9500, + "Sm": 9500, + "Eu": 9500, + "Gd": 9500, + "Tb": 9500, + "Dy": 9500, + "Ho": 9500, + "Er": 9500, + "Tm": 9500, + "Yb": 9500, + "Lu": 9500, + "Hf": 3400, + "Ta": 2300, + "W": 7000, + "Re": 3300, + "Os": 5500, + "Ir": 5500, + "Pt": 5500, + "Au": 1100, + "Hg": 5500, + "Tl": 6500, + "Pb": 2700, + "Bi": 5300 +} diff --git a/src/autocat/data/hhi/hhi_r.json b/src/autocat/data/hhi/hhi_r.json new file mode 100644 index 00000000..79079bd1 --- /dev/null +++ b/src/autocat/data/hhi/hhi_r.json @@ -0,0 +1,79 @@ +{ + "He": 3900, + "Li": 4200, + "Be": 4000, + "B": 2000, + "C": 500, + "N": 500, + "O": 500, + "F": 1500, + "Na": 500, + "Mg": 500, + "Al": 1000, + "Si": 1000, + "P": 5100, + "S": 1000, + "Cl": 1500, + "K": 7200, + "Ca": 1500, + "Sc": 4500, + "Ti": 1600, + "V": 3400, + "Cr": 4100, + "Mn": 1800, + "Fe": 1400, + "Co": 2700, + "Ni": 1500, + "Cu": 1500, + "Zn": 1900, + "Ga": 1900, + "Ge": 1900, + "As": 4000, + "Se": 1900, + "Br": 6900, + "Rb": 6000, + "Sr": 3000, + "Y": 2600, + "Zr": 2600, + "Nb": 8800, + "Mo": 5300, + "Ru": 8000, + "Rh": 8000, + "Pd": 8000, + "Ag": 1400, + "Cd": 1300, + "In": 2000, + "Sn": 1600, + "Sb": 3400, + "Te": 4900, + "I": 4800, + "Cs": 6000, + "Ba": 2300, + "La": 3100, + "Ce": 3100, + "Pr": 3100, + "Nd": 3100, + "Pm": 3100, + "Sm": 3100, + "Eu": 3100, + "Gd": 3100, + "Tb": 3100, + "Dy": 3100, + "Ho": 3100, + "Er": 3100, + "Tm": 3100, + "Yb": 3100, + "Lu": 3100, + "Hf": 2600, + "Ta": 4800, + "W": 4300, + "Re": 3300, + "Os": 9100, + "Ir": 9100, + "Pt": 9100, + "Au": 1000, + "Hg": 3100, + "Tl": 6500, + "Pb": 1800, + "Bi": 6000 +} diff --git a/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json b/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json index 920ad171..5b9b211f 100644 --- a/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json +++ b/src/autocat/data/lattice_parameters/bulk_beefvdw_fd.json @@ -1,7 +1,75 @@ { - "Pt": {"a": 4.013732}, - "Pd": {"a": 3.998072}, - "Fe": {"a": 2.893316}, - "Ni": {"a": 3.577644}, - "Ru": {"a": 2.748451, "c": 4.314765} -} + "Pt": { + "a": 4.013732 + }, + "Pd": { + "a": 3.998072 + }, + "Fe": { + "a": 2.893316 + }, + "Ni": { + "a": 3.577644 + }, + "Ag": { + "a": 4.26015 + }, + "Au": { + "a": 4.235948 + }, + "Cu": { + "a": 3.70834 + }, + "Rh": { + "a": 3.882724 + }, + "V": { + "a": 3.01497 + }, + "W": { + "a": 3.192674 + }, + "Mo": { + "a": 3.17331 + }, + "Nb": { + "a": 3.316336 + }, + "Cr": { + "a": 2.863204 + }, + "Ir": { + "a": 3.893 + }, + "Ta": { + "a": 3.334218 + }, + "Hf": { + "a": 3.218418, + "c": 5.081832 + }, + "Zr": { + "a": 3.246790, + "c": 5.172850 + }, + "Co": { + "a": 2.500673, + "c": 4.097051 + }, + "Ti": { + "a": 2.942273, + "c": 4.667081 + }, + "Os": { + "a": 2.773357, + "c": 4.360051 + }, + "Re": { + "a": 2.798497, + "c": 4.497198 + }, + "Ru": { + "a": 2.748451, + "c": 4.314765 + } +} \ No newline at end of file diff --git a/src/autocat/data/segregation_energies/__init__.py b/src/autocat/data/segregation_energies/__init__.py new file mode 100644 index 00000000..f059fc0a --- /dev/null +++ b/src/autocat/data/segregation_energies/__init__.py @@ -0,0 +1,35 @@ +import json +import pkg_resources + +__all__ = ["SEGREGATION_ENERGIES"] +""" + +Keys: + raban1999: + Values obtained from https://doi.org/10.1103/PhysRevB.59.15990 + Segregation energies for different host/dopant combinations + For hosts used fcc: 111, bcc:110 (Fe100 also available), hcp:0001 + + rao2020: + Values obtained from https://doi.org/10.1007/s11244-020-01267-2 + Segregation energies for different host/dopant combinations +""" + +raw_raban_seg_ener = pkg_resources.resource_filename( + "autocat.data.segregation_energies", "raban1999.json" +) + +with open(raw_raban_seg_ener) as fr: + RABAN1999_SEGREGATION_ENERGIES = json.load(fr) + +raw_rao_seg_ener = pkg_resources.resource_filename( + "autocat.data.segregation_energies", "rao2020.json" +) + +with open(raw_rao_seg_ener) as fr: + RAO2020_SEGREGATION_ENERGIES = json.load(fr) + +SEGREGATION_ENERGIES = { + "raban1999": RABAN1999_SEGREGATION_ENERGIES, + "rao2020": RAO2020_SEGREGATION_ENERGIES, +} diff --git a/src/autocat/data/segregation_energies/raban1999.json b/src/autocat/data/segregation_energies/raban1999.json new file mode 100644 index 00000000..a8a4e6d1 --- /dev/null +++ b/src/autocat/data/segregation_energies/raban1999.json @@ -0,0 +1,652 @@ +{ + "Ti": { + "Ti": 1.2, + "V": 0.1, + "Cr": -0.24, + "Mn": -0.34, + "Fe": -0.41, + "Co": -0.56, + "Ni": -0.75, + "Cu": -0.94, + "Zr": -0.38, + "Nb": 0.03, + "Mo": 0.09, + "Tc": -0.06, + "Ru": -0.31, + "Rh": -0.62, + "Pd": -0.93, + "Ag": -1.18, + "Hf": -0.14, + "Ta": 0.25, + "W": 0.35, + "Re": 0.2, + "Os": -0.04, + "Ir": -0.37, + "Pt": -0.72, + "Au": -1.05 + }, + "V": { + "Ti": -0.49, + "V": 1.16, + "Cr": 0.3, + "Mn": 0.41, + "Fe": 0.36, + "Co": 0.15, + "Ni": -0.12, + "Cu": -0.54, + "Zr": -1.08, + "Nb": -0.41, + "Mo": 0.1, + "Tc": 0.36, + "Ru": 0.39, + "Rh": 0.13, + "Pd": -0.28, + "Ag": -0.75, + "Hf": -1.0, + "Ta": -0.23, + "W": 0.31, + "Re": 0.62, + "Os": 0.68, + "Ir": 0.51, + "Pt": 0.09, + "Au": -0.39 + }, + "Cr": { + "Ti": -0.72, + "V": -0.15, + "Cr": 1.46, + "Mn": -0.14, + "Fe": -0.44, + "Co": -0.67, + "Ni": -0.8, + "Cu": -1.02, + "Zr": -2.05, + "Nb": -1.15, + "Mo": -0.62, + "Tc": -0.45, + "Ru": -0.68, + "Rh": -1.25, + "Pd": -1.7, + "Ag": -1.9, + "Hf": -1.55, + "Ta": -0.98, + "W": -0.4, + "Re": -0.17, + "Os": -0.29, + "Ir": -0.81, + "Pt": -1.58, + "Au": -1.98 + }, + "Mn": { + "Ti": -0.83, + "V": -0.32, + "Cr": -0.1, + "Mn": 1.24, + "Fe": -0.12, + "Co": -0.26, + "Ni": -0.47, + "Cu": -0.77, + "Zr": -2.15, + "Nb": -1.28, + "Mo": -0.73, + "Tc": -0.48, + "Ru": -0.52, + "Rh": -0.69, + "Pd": -0.93, + "Ag": -1.31, + "Hf": -1.83, + "Ta": -1.03, + "W": -0.56, + "Re": -0.31, + "Os": -0.32, + "Ir": -0.53, + "Pt": -0.83, + "Au": -1.23 + }, + "Fe": { + "Ti": -0.39, + "V": 0.06, + "Cr": 0.1, + "Mn": -0.16, + "Fe": 1.2, + "Co": -0.14, + "Ni": -0.65, + "Cu": -0.83, + "Zr": -1.6, + "Nb": -0.65, + "Mo": -0.06, + "Tc": 0.1, + "Ru": -0.2, + "Rh": -0.52, + "Pd": -1.05, + "Ag": -1.55, + "Hf": -1.5, + "Ta": -0.35, + "W": 0.2, + "Re": 0.45, + "Os": 0.25, + "Ir": -0.15, + "Pt": -0.66, + "Au": -1.36 + }, + "Fe_100": { + "Ti": -0.69, + "V": 0.19, + "Cr": 0.16, + "Mn": -0.38, + "Fe": 1.73, + "Co": -0.03, + "Ni": -0.77, + "Cu": -1.37, + "Zr": -2.22, + "Nb": -0.83, + "Mo": 0.03, + "Tc": 0.24, + "Ru": 0.0, + "Rh": -0.53, + "Pd": -1.43, + "Ag": -2.37, + "Hf": -2.15, + "Ta": -0.5, + "W": 0.42, + "Re": 0.78, + "Os": 0.6, + "Ir": 0.08, + "Pt": -0.78, + "Au": -1.93 + }, + "Co": { + "Ti": -0.33, + "V": 0.13, + "Cr": 0.19, + "Mn": 0.1, + "Fe": -0.01, + "Co": 1.07, + "Ni": -0.13, + "Cu": -0.48, + "Zr": -1.4, + "Nb": -0.45, + "Mo": 0.0, + "Tc": 0.49, + "Ru": 0.12, + "Rh": -0.4, + "Pd": -0.6, + "Ag": -0.93, + "Hf": -0.56, + "Ta": -0.24, + "W": 0.34, + "Re": 0.72, + "Os": 0.56, + "Ir": -0.1, + "Pt": -0.38, + "Au": -0.76 + }, + "Ni": { + "Ti": -0.12, + "V": 0.2, + "Cr": 0.25, + "Mn": 0.0, + "Fe": 0.13, + "Co": 0.13, + "Ni": 0.95, + "Cu": -0.25, + "Zr": -1.16, + "Nb": -0.26, + "Mo": 0.18, + "Tc": 0.31, + "Ru": 0.1, + "Rh": -0.1, + "Pd": -0.4, + "Ag": -0.8, + "Hf": -0.74, + "Ta": -0.01, + "W": 0.45, + "Re": 0.53, + "Os": 0.37, + "Ir": 0.16, + "Pt": -0.17, + "Au": -0.69 + }, + "Cu": { + "Ti": 0.01, + "V": 0.25, + "Cr": 0.1, + "Mn": 0.07, + "Fe": 0.28, + "Co": 0.33, + "Ni": 0.17, + "Cu": 0.77, + "Zr": -0.64, + "Nb": 0.0, + "Mo": 0.28, + "Tc": 0.3, + "Ru": 0.2, + "Rh": 0.05, + "Pd": -0.2, + "Ag": -0.42, + "Hf": -0.35, + "Ta": 0.22, + "W": 0.57, + "Re": 0.62, + "Os": 0.48, + "Ir": 0.23, + "Pt": -0.04, + "Au": -0.29 + }, + "Zr": { + "Ti": 0.06, + "V": 0.01, + "Cr": -0.47, + "Mn": -0.36, + "Fe": -0.4, + "Co": -0.45, + "Ni": -0.55, + "Cu": -0.72, + "Zr": 1.22, + "Nb": 0.19, + "Mo": 0.13, + "Tc": -0.01, + "Ru": -0.19, + "Rh": -0.41, + "Pd": -0.68, + "Ag": -0.88, + "Hf": 0.15, + "Ta": 0.33, + "W": 0.3, + "Re": 0.15, + "Os": -0.02, + "Ir": -0.25, + "Pt": -0.5, + "Au": -0.8 + }, + "Nb": { + "Ti": -0.24, + "V": 0.12, + "Cr": 0.32, + "Mn": 0.23, + "Fe": 0.29, + "Co": 0.31, + "Ni": 0.08, + "Cu": -0.2, + "Zr": -0.65, + "Nb": 1.21, + "Mo": 0.48, + "Tc": 0.7, + "Ru": 0.65, + "Rh": 0.42, + "Pd": 0.05, + "Ag": -0.32, + "Hf": -0.47, + "Ta": 0.17, + "W": 0.7, + "Re": 0.98, + "Os": 1.0, + "Ir": 0.77, + "Pt": 0.4, + "Au": -0.03 + }, + "Mo": { + "Ti": -0.14, + "V": 0.08, + "Cr": -0.01, + "Mn": -0.5, + "Fe": -0.52, + "Co": -0.72, + "Ni": -0.82, + "Cu": -1.16, + "Zr": -0.9, + "Nb": -0.22, + "Mo": 1.6, + "Tc": -0.21, + "Ru": -0.81, + "Rh": -1.28, + "Pd": -1.47, + "Ag": -1.75, + "Hf": -0.98, + "Ta": -0.02, + "W": 0.22, + "Re": 0.1, + "Os": -0.45, + "Ir": -1.15, + "Pt": -1.6, + "Au": -1.94 + }, + "Tc": { + "Ti": -0.82, + "V": -0.4, + "Cr": -0.11, + "Mn": -0.2, + "Fe": -0.11, + "Co": -0.01, + "Ni": -0.24, + "Cu": -0.7, + "Zr": -1.57, + "Nb": -0.77, + "Mo": -0.27, + "Tc": 1.47, + "Ru": 0.02, + "Rh": -0.11, + "Pd": -0.46, + "Ag": -0.97, + "Hf": -1.26, + "Ta": -0.65, + "W": -0.06, + "Re": 0.26, + "Os": 0.37, + "Ir": 0.21, + "Pt": -0.16, + "Au": -0.7 + }, + "Ru": { + "Ti": -0.3, + "V": 0.15, + "Cr": 0.24, + "Mn": -0.4, + "Fe": -0.39, + "Co": -0.37, + "Ni": -0.71, + "Cu": -1.21, + "Zr": -1.12, + "Nb": -0.31, + "Mo": 0.1, + "Tc": 0.17, + "Ru": 1.48, + "Rh": -0.43, + "Pd": -1.03, + "Ag": -1.72, + "Hf": -0.83, + "Ta": -0.17, + "W": 0.24, + "Re": 0.37, + "Os": 0.23, + "Ir": -0.2, + "Pt": -0.82, + "Au": -1.62 + }, + "Rh": { + "Ti": 0.12, + "V": 0.35, + "Cr": 0.31, + "Mn": -0.08, + "Fe": -0.01, + "Co": 0.02, + "Ni": -0.08, + "Cu": -0.38, + "Zr": -0.46, + "Nb": 0.09, + "Mo": 0.44, + "Tc": 0.46, + "Ru": 0.31, + "Rh": 1.15, + "Pd": -0.45, + "Ag": -0.92, + "Hf": -0.15, + "Ta": 0.36, + "W": 0.66, + "Re": 0.71, + "Os": 0.56, + "Ir": 0.23, + "Pt": -0.27, + "Au": -0.87 + }, + "Pd": { + "Ti": 0.58, + "V": 0.78, + "Cr": 0.3, + "Mn": 0.3, + "Fe": 0.35, + "Co": 0.29, + "Ni": 0.21, + "Cu": 0.04, + "Zr": 0.32, + "Nb": 0.87, + "Mo": 1.08, + "Tc": 1.02, + "Ru": 0.74, + "Rh": 0.36, + "Pd": 0.84, + "Ag": -0.26, + "Hf": 0.44, + "Ta": 1.04, + "W": 1.37, + "Re": 1.34, + "Os": 1.11, + "Ir": 0.7, + "Pt": 0.19, + "Au": -0.22 + }, + "Ag": { + "Ti": 0.45, + "V": 0.63, + "Cr": 0.29, + "Mn": 0.23, + "Fe": 0.41, + "Co": 0.48, + "Ni": 0.49, + "Cu": 0.22, + "Zr": 0.33, + "Nb": 0.67, + "Mo": 0.74, + "Tc": 0.69, + "Ru": 0.6, + "Rh": 0.42, + "Pd": 0.28, + "Ag": 0.58, + "Hf": 0.4, + "Ta": 0.83, + "W": 0.93, + "Re": 0.88, + "Os": 0.72, + "Ir": 0.55, + "Pt": 0.34, + "Au": 0.03 + }, + "Hf": { + "Ti": -0.03, + "V": -0.04, + "Cr": -0.52, + "Mn": -0.49, + "Fe": -0.51, + "Co": -0.62, + "Ni": -0.75, + "Cu": -0.93, + "Zr": -0.14, + "Nb": 0.08, + "Mo": -0.12, + "Tc": -0.25, + "Ru": -0.44, + "Rh": -0.68, + "Pd": -0.92, + "Ag": -1.16, + "Hf": 1.35, + "Ta": 0.25, + "W": 0.17, + "Re": -0.01, + "Os": -0.26, + "Ir": -0.53, + "Pt": -0.8, + "Au": -1.11 + }, + "Ta": { + "Ti": -0.45, + "V": -0.03, + "Cr": 0.16, + "Mn": 0.1, + "Fe": 0.13, + "Co": 0.06, + "Ni": -0.18, + "Cu": -0.52, + "Zr": -0.85, + "Nb": -0.21, + "Mo": 0.25, + "Tc": 0.44, + "Ru": 0.4, + "Rh": 0.11, + "Pd": -0.26, + "Ag": -0.67, + "Hf": -0.6, + "Ta": 1.37, + "W": 0.47, + "Re": 0.75, + "Os": 0.73, + "Ir": 0.49, + "Pt": 0.11, + "Au": -0.37 + }, + "W": { + "Ti": 0.02, + "V": 0.04, + "Cr": -0.14, + "Mn": -0.25, + "Fe": -0.35, + "Co": -0.45, + "Ni": -0.42, + "Cu": -0.75, + "Zr": -0.81, + "Nb": -0.31, + "Mo": -0.24, + "Tc": -0.55, + "Ru": -1.07, + "Rh": -1.22, + "Pd": -1.27, + "Ag": -1.56, + "Hf": -0.78, + "Ta": -0.13, + "W": 1.87, + "Re": -0.27, + "Os": -0.85, + "Ir": -1.34, + "Pt": -1.66, + "Au": -1.85 + }, + "Re": { + "Ti": -0.89, + "V": -0.42, + "Cr": -0.13, + "Mn": -0.28, + "Fe": -0.18, + "Co": -0.14, + "Ni": -0.36, + "Cu": -0.83, + "Zr": -1.75, + "Nb": -0.94, + "Mo": -0.43, + "Tc": -0.19, + "Ru": -0.17, + "Rh": -0.32, + "Pd": -0.68, + "Ag": -1.24, + "Hf": -1.51, + "Ta": -0.77, + "W": -0.27, + "Re": 1.69, + "Os": 0.04, + "Ir": -0.11, + "Pt": -0.46, + "Au": -1.05 + }, + "Os": { + "Ti": -0.22, + "V": 0.18, + "Cr": 0.36, + "Mn": -0.21, + "Fe": -0.31, + "Co": -0.3, + "Ni": -0.62, + "Cu": -1.21, + "Zr": -1.07, + "Nb": -0.27, + "Mo": 0.07, + "Tc": 0.09, + "Ru": -0.2, + "Rh": -0.7, + "Pd": -1.31, + "Ag": -2.0, + "Hf": -1.04, + "Ta": -0.17, + "W": 0.13, + "Re": 0.23, + "Os": 1.81, + "Ir": -0.48, + "Pt": -1.25, + "Au": -2.14 + }, + "Ir": { + "Ti": 0.29, + "V": 0.51, + "Cr": 0.35, + "Mn": 0.09, + "Fe": 0.11, + "Co": 0.16, + "Ni": 0.12, + "Cu": -0.12, + "Zr": -0.43, + "Nb": 0.1, + "Mo": 0.35, + "Tc": 0.35, + "Ru": 0.23, + "Rh": -0.08, + "Pd": -0.55, + "Ag": -1.0, + "Hf": -0.17, + "Ta": 0.26, + "W": 0.47, + "Re": 0.48, + "Os": 0.32, + "Ir": 1.44, + "Pt": -0.58, + "Au": -1.2 + }, + "Pt": { + "Ti": 0.66, + "V": 0.98, + "Cr": 0.6, + "Mn": 0.38, + "Fe": 0.37, + "Co": 0.46, + "Ni": 0.43, + "Cu": 0.32, + "Zr": 0.3, + "Nb": 0.76, + "Mo": 0.93, + "Tc": 0.85, + "Ru": 0.6, + "Rh": 0.26, + "Pd": 0.0, + "Ag": -0.27, + "Hf": 0.47, + "Ta": 0.95, + "W": 1.16, + "Re": 1.11, + "Os": 0.86, + "Ir": 0.44, + "Pt": 1.03, + "Au": -0.36 + }, + "Au": { + "Ti": 0.46, + "V": 0.59, + "Cr": 0.33, + "Mn": 0.3, + "Fe": 0.45, + "Co": 0.54, + "Ni": 0.56, + "Cu": 0.34, + "Zr": 0.3, + "Nb": 0.61, + "Mo": 0.67, + "Tc": 0.59, + "Ru": 0.52, + "Rh": 0.44, + "Pd": 0.28, + "Ag": 0.0, + "Hf": 0.47, + "Ta": 0.79, + "W": 0.92, + "Re": 0.81, + "Os": 0.65, + "Ir": 0.5, + "Pt": 0.34, + "Au": 0.72 + } +} \ No newline at end of file diff --git a/src/autocat/data/segregation_energies/rao2020.json b/src/autocat/data/segregation_energies/rao2020.json new file mode 100644 index 00000000..d2f0236f --- /dev/null +++ b/src/autocat/data/segregation_energies/rao2020.json @@ -0,0 +1,842 @@ +{ + "Al": { + "Al": 0, + "Sc": 1, + "Ti": 0.5, + "V": 0.5, + "Cr": 0, + "Fe": 0.5, + "Co": 0.5, + "Ni": 0.8, + "Cu": 0.9, + "Zn": 0.9, + "Y": 1, + "Zr": 0.8, + "Nb": 0.5, + "Mo": 0, + "Ru": 0.5, + "Rh": 0.8, + "Pd": 0.9, + "Ag": 0.9, + "Cd": 0.9, + "Hf": 0.5, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0.5, + "Ir": 0.5, + "Pt": 0.9, + "Au": 0.9, + "Pb": 1 + }, + "Sc": { + "Al": 1, + "Sc": 0, + "Ti": 0.5, + "V": 0.5, + "Cr": 0.5, + "Fe": 0, + "Co": 0, + "Ni": 0.5, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 0.5, + "Nb": 0.5, + "Mo": 0.5, + "Ru": 0, + "Rh": 0.5, + "Pd": 0.8, + "Ag": 0.9, + "Cd": 1, + "Hf": 0.5, + "Ta": 0, + "W": 0.5, + "Re": 0.5, + "Os": 0, + "Ir": 0, + "Pt": 0.5, + "Au": 0.9, + "Pb": 1 + }, + "Ti": { + "Al": 1, + "Sc": 1, + "Ti": 0, + "V": 0.9, + "Cr": 0.9, + "Fe": 0.8, + "Co": 0.5, + "Ni": 0.8, + "Cu": 1, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 1, + "Ru": 1, + "Rh": 0.8, + "Pd": 0.9, + "Ag": 0.9, + "Cd": 1, + "Hf": 1, + "Ta": 0.8, + "W": 1, + "Re": 1, + "Os": 0.9, + "Ir": 0.8, + "Pt": 0.9, + "Au": 1, + "Pb": 1 + }, + "V": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0, + "Cr": 0.8, + "Fe": 1, + "Co": 1, + "Ni": 1, + "Cu": 1, + "Zn": 1, + "Y": 0.9, + "Zr": 1, + "Nb": 1, + "Mo": 1, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0.8, + "Cd": 0.8, + "Hf": 1, + "Ta": 1, + "W": 0.9, + "Re": 0.5, + "Os": 0.8, + "Ir": 1, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Cr": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 1, + "Cr": 0, + "Fe": 0.8, + "Co": 0, + "Ni": 0.8, + "Cu": 0.8, + "Zn": 0.9, + "Y": 0.8, + "Zr": 1, + "Nb": 1, + "Mo": 1, + "Ru": 1, + "Rh": 0.9, + "Pd": 0.5, + "Ag": 0, + "Cd": 0.5, + "Hf": 1, + "Ta": 1, + "W": 1, + "Re": 1, + "Os": 1, + "Ir": 1, + "Pt": 1, + "Au": 0.5, + "Pb": 1 + }, + "Fe": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0.8, + "Cr": 1, + "Fe": 0, + "Co": 1, + "Ni": 1, + "Cu": 0.9, + "Zn": 0.9, + "Y": 0.9, + "Zr": 1, + "Nb": 1, + "Mo": 0.9, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0.5, + "Cd": 0.5, + "Hf": 1, + "Ta": 1, + "W": 0.5, + "Re": 0.5, + "Os": 1, + "Ir": 1, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Co": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0.5, + "Cr": 0.9, + "Fe": 0.9, + "Co": 0, + "Ni": 0.9, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 0.5, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0.5, + "Cd": 0.8, + "Hf": 1, + "Ta": 0.8, + "W": 0.5, + "Re": 0.5, + "Os": 0.9, + "Ir": 1, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Ni": { + "Al": 1, + "Sc": 1, + "Ti": 0.5, + "V": 0, + "Cr": 0, + "Fe": 0.5, + "Co": 0.8, + "Ni": 0, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 0.5, + "Mo": 0, + "Ru": 0.9, + "Rh": 1, + "Pd": 1, + "Ag": 1, + "Cd": 1, + "Hf": 1, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0.5, + "Ir": 1, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Cu": { + "Al": 1, + "Sc": 1, + "Ti": 0.5, + "V": 0, + "Cr": 0, + "Fe": 0, + "Co": 0.5, + "Ni": 0.8, + "Cu": 0, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 0.5, + "Mo": 0, + "Ru": 0.5, + "Rh": 0.9, + "Pd": 1, + "Ag": 1, + "Cd": 1, + "Hf": 0.8, + "Ta": 0.5, + "W": 0, + "Re": 0, + "Os": 0.5, + "Ir": 0.9, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Zn": { + "Al": 0.9, + "Sc": 0.8, + "Ti": 0.5, + "V": 0.5, + "Cr": 0, + "Fe": 0.5, + "Co": 0.5, + "Ni": 0.5, + "Cu": 0.5, + "Zn": 0, + "Y": 1, + "Zr": 0.5, + "Nb": 0, + "Mo": 0, + "Ru": 0, + "Rh": 0.5, + "Pd": 0.5, + "Ag": 1, + "Cd": 1, + "Hf": 0.5, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.5, + "Pt": 0.5, + "Au": 1, + "Pb": 1 + }, + "Y": { + "Al": 0.9, + "Sc": 0.8, + "Ti": 0.5, + "V": 0.5, + "Cr": 0, + "Fe": 0.5, + "Co": 0, + "Ni": 0, + "Cu": 0.5, + "Zn": 0.9, + "Y": 0, + "Zr": 0.5, + "Nb": 0.5, + "Mo": 0, + "Ru": 0, + "Rh": 0, + "Pd": 0.5, + "Ag": 0.9, + "Cd": 1, + "Hf": 0.5, + "Ta": 0.5, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0, + "Pt": 0, + "Au": 0.5, + "Pb": 1 + }, + "Zr": { + "Al": 1, + "Sc": 1, + "Ti": 0.8, + "V": 0.8, + "Cr": 0.5, + "Fe": 0, + "Co": 0, + "Ni": 0.5, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 0, + "Nb": 0.9, + "Mo": 0.5, + "Ru": 0.5, + "Rh": 0.5, + "Pd": 0.9, + "Ag": 0.9, + "Cd": 1, + "Hf": 0.8, + "Ta": 0.5, + "W": 0.5, + "Re": 0.5, + "Os": 0, + "Ir": 0, + "Pt": 0.5, + "Au": 0.9, + "Pb": 1 + }, + "Nb": { + "Al": 1, + "Sc": 0.9, + "Ti": 0.9, + "V": 0.8, + "Cr": 0.8, + "Fe": 1, + "Co": 1, + "Ni": 1, + "Cu": 0.9, + "Zn": 0.9, + "Y": 1, + "Zr": 1, + "Nb": 0, + "Mo": 0.5, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0.9, + "Cd": 0.9, + "Hf": 1, + "Ta": 0.8, + "W": 0.5, + "Re": 0.5, + "Os": 0.5, + "Ir": 1, + "Pt": 1, + "Au": 1, + "Pb": 1 + }, + "Mo": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0.5, + "Cr": 0.9, + "Fe": 0.9, + "Co": 0.9, + "Ni": 0.8, + "Cu": 0.8, + "Zn": 0.9, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 0, + "Ru": 1, + "Rh": 0.9, + "Pd": 0.5, + "Ag": 0.5, + "Cd": 0.8, + "Hf": 1, + "Ta": 1, + "W": 0.8, + "Re": 1, + "Os": 1, + "Ir": 1, + "Pt": 0.9, + "Au": 0.5, + "Pb": 1 + }, + "Ru": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0.5, + "Cr": 0.5, + "Fe": 1, + "Co": 0.9, + "Ni": 0.9, + "Cu": 0.9, + "Zn": 0.9, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 0.5, + "Ru": 0, + "Rh": 0.9, + "Pd": 0.9, + "Ag": 0.5, + "Cd": 0.9, + "Hf": 1, + "Ta": 0.9, + "W": 0.5, + "Re": 0.5, + "Os": 0.8, + "Ir": 1, + "Pt": 0.9, + "Au": 0.9, + "Pb": 1 + }, + "Rh": { + "Al": 1, + "Sc": 1, + "Ti": 0, + "V": 0, + "Cr": 0, + "Fe": 0, + "Co": 0.8, + "Ni": 1, + "Cu": 1, + "Zn": 1, + "Y": 1, + "Zr": 0.9, + "Nb": 0, + "Mo": 0, + "Ru": 0.5, + "Rh": 0, + "Pd": 0.9, + "Ag": 0.9, + "Cd": 1, + "Hf": 0.5, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.8, + "Pt": 1, + "Au": 0.9, + "Pb": 1 + }, + "Pd": { + "Al": 0.5, + "Sc": 0, + "Ti": 0, + "V": 0, + "Cr": 0, + "Fe": 1, + "Co": 0.5, + "Ni": 0.5, + "Cu": 0.8, + "Zn": 0.8, + "Y": 0.8, + "Zr": 0, + "Nb": 0, + "Mo": 0, + "Ru": 0, + "Rh": 0.5, + "Pd": 0, + "Ag": 0.9, + "Cd": 1, + "Hf": 0, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.5, + "Pt": 0.9, + "Au": 1, + "Pb": 1 + }, + "Ag": { + "Al": 0.9, + "Sc": 0.5, + "Ti": 0, + "V": 0, + "Cr": 0, + "Fe": 0.5, + "Co": 0.9, + "Ni": 0.5, + "Cu": 0.8, + "Zn": 1, + "Y": 0.5, + "Zr": 0, + "Nb": 0, + "Mo": 0, + "Ru": 0, + "Rh": 0.5, + "Pd": 0.5, + "Ag": 0, + "Cd": 1, + "Hf": 0, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.5, + "Pt": 0.8, + "Au": 1, + "Pb": 1 + }, + "Cd": { + "Al": 0.8, + "Sc": 0.5, + "Ti": 0.5, + "V": 0, + "Cr": 0, + "Fe": 0.8, + "Co": 0.8, + "Ni": 0.8, + "Cu": 0.8, + "Zn": 0.8, + "Y": 0.5, + "Zr": 0.5, + "Nb": 0, + "Mo": 0, + "Ru": 0.5, + "Rh": 0.5, + "Pd": 0.8, + "Ag": 0.9, + "Cd": 0, + "Hf": 0.5, + "Ta": 0.5, + "W": 0, + "Re": 0, + "Os": 0.5, + "Ir": 0.5, + "Pt": 0.5, + "Au": 0.8, + "Pb": 0.9 + }, + "Hf": { + "Al": 1, + "Sc": 0.9, + "Ti": 0.9, + "V": 0.9, + "Cr": 0.8, + "Fe": 0.5, + "Co": 0.5, + "Ni": 0.8, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 0.9, + "Mo": 0.9, + "Ru": 0.5, + "Rh": 0.5, + "Pd": 0.8, + "Ag": 0.9, + "Cd": 1, + "Hf": 0, + "Ta": 0.8, + "W": 0.9, + "Re": 0.5, + "Os": 0.5, + "Ir": 0.5, + "Pt": 0.5, + "Au": 0.9, + "Pb": 1 + }, + "Ta": { + "Al": 0.9, + "Sc": 0.9, + "Ti": 0.9, + "V": 0.9, + "Cr": 0.8, + "Fe": 1, + "Co": 1, + "Ni": 1, + "Cu": 0.8, + "Zn": 0.9, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 0.9, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0.5, + "Cd": 0.5, + "Hf": 1, + "Ta": 0, + "W": 0.5, + "Re": 0.5, + "Os": 0.9, + "Ir": 1, + "Pt": 1, + "Au": 0.9, + "Pb": 1 + }, + "W": { + "Al": 1, + "Sc": 1, + "Ti": 1, + "V": 0.8, + "Cr": 0.9, + "Fe": 0.8, + "Co": 0.8, + "Ni": 0, + "Cu": 0.5, + "Zn": 0.5, + "Y": 0.9, + "Zr": 1, + "Nb": 1, + "Mo": 1, + "Ru": 1, + "Rh": 0.5, + "Pd": 0, + "Ag": 0, + "Cd": 0.5, + "Hf": 1, + "Ta": 1, + "W": 0, + "Re": 1, + "Os": 1, + "Ir": 1, + "Pt": 0.5, + "Au": 0, + "Pb": 0.5 + }, + "Re": { + "Al": 1, + "Sc": 0.5, + "Ti": 1, + "V": 1, + "Cr": 0.9, + "Fe": 0.9, + "Co": 1, + "Ni": 1, + "Cu": 0.8, + "Zn": 0.5, + "Y": 0.5, + "Zr": 1, + "Nb": 1, + "Mo": 1, + "Ru": 1, + "Rh": 1, + "Pd": 1, + "Ag": 0, + "Cd": 0, + "Hf": 1, + "Ta": 1, + "W": 1, + "Re": 0, + "Os": 1, + "Ir": 1, + "Pt": 1, + "Au": 0.9, + "Pb": 0 + }, + "Os": { + "Al": 0.9, + "Sc": 1, + "Ti": 1, + "V": 0.5, + "Cr": 0.5, + "Fe": 1, + "Co": 0.9, + "Ni": 0.9, + "Cu": 0.5, + "Zn": 0.8, + "Y": 1, + "Zr": 1, + "Nb": 1, + "Mo": 0.8, + "Ru": 0.9, + "Rh": 0.9, + "Pd": 0.8, + "Ag": 0, + "Cd": 0.5, + "Hf": 1, + "Ta": 1, + "W": 0.8, + "Re": 0.5, + "Os": 0, + "Ir": 1, + "Pt": 0.9, + "Au": 0.5, + "Pb": 0 + }, + "Ir": { + "Al": 1, + "Sc": 1, + "Ti": 0, + "V": 0, + "Cr": 0, + "Fe": 0, + "Co": 0.5, + "Ni": 0.9, + "Cu": 0.9, + "Zn": 1, + "Y": 1, + "Zr": 1, + "Nb": 0, + "Mo": 0, + "Ru": 0.5, + "Rh": 0.9, + "Pd": 0.9, + "Ag": 0.5, + "Cd": 1, + "Hf": 0.9, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0.5, + "Ir": 0, + "Pt": 0.9, + "Au": 0.9, + "Pb": 0.5 + }, + "Pt": { + "Al": 0, + "Sc": 0, + "Ti": 0, + "V": 0, + "Cr": 0, + "Fe": 0, + "Co": 0, + "Ni": 0, + "Cu": 0.5, + "Zn": 0.5, + "Y": 0.9, + "Zr": 0, + "Nb": 0, + "Mo": 0, + "Ru": 0, + "Rh": 0.5, + "Pd": 0.9, + "Ag": 0.9, + "Cd": 1, + "Hf": 0, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.5, + "Pt": 0, + "Au": 0.9, + "Pb": 0.5 + }, + "Au": { + "Al": 0.9, + "Sc": 0, + "Ti": 0, + "V": 0.5, + "Cr": 0, + "Fe": 0.5, + "Co": 0.9, + "Ni": 0.5, + "Cu": 0.5, + "Zn": 0.8, + "Y": 0.5, + "Zr": 0, + "Nb": 0, + "Mo": 0, + "Ru": 0.5, + "Rh": 0.5, + "Pd": 0.5, + "Ag": 0.8, + "Cd": 1, + "Hf": 0, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0.5, + "Pt": 0.5, + "Au": 0, + "Pb": 1 + }, + "Pb": { + "Al": 0.9, + "Sc": 0.5, + "Ti": 0.5, + "V": 0, + "Cr": 0, + "Fe": 0, + "Co": 0, + "Ni": 0.5, + "Cu": 0.5, + "Zn": 0.9, + "Y": 0.5, + "Zr": 0.5, + "Nb": 0, + "Mo": 0, + "Ru": 0, + "Rh": 0, + "Pd": 0.5, + "Ag": 0.9, + "Cd": 0.9, + "Hf": 0.5, + "Ta": 0, + "W": 0, + "Re": 0, + "Os": 0, + "Ir": 0, + "Pt": 0.5, + "Au": 0.8, + "Pb": 0 + } +} \ No newline at end of file diff --git a/src/autocat/learning/__init__.py b/src/autocat/learning/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/autocat/learning/featurizers.py b/src/autocat/learning/featurizers.py new file mode 100644 index 00000000..72f14df3 --- /dev/null +++ b/src/autocat/learning/featurizers.py @@ -0,0 +1,295 @@ +import copy +from typing import List, Dict + +import numpy as np +from prettytable import PrettyTable + +from ase import Atoms +from dscribe.descriptors import SineMatrix +from dscribe.descriptors import CoulombMatrix +from dscribe.descriptors import ACSF +from dscribe.descriptors import SOAP +from matminer.featurizers.composition import ElementProperty +from matminer.featurizers.site import ChemicalSRO +from matminer.featurizers.site import OPSiteFingerprint +from matminer.featurizers.site import CrystalNNFingerprint +from pymatgen.io.ase import AseAtomsAdaptor +from pymatgen.core.periodic_table import Element + + +SUPPORTED_MATMINER_CLASSES = [ + ElementProperty, + ChemicalSRO, + OPSiteFingerprint, + CrystalNNFingerprint, +] + +SUPPORTED_DSCRIBE_CLASSES = [SineMatrix, CoulombMatrix, ACSF, SOAP] + + +class FeaturizerError(Exception): + pass + + +class Featurizer: + def __init__( + self, + featurizer_class=None, # black + design_space_structures: List[Atoms] = None, + species_list: List[str] = None, + max_size: int = None, + preset: str = None, + kwargs: Dict = None, + ): + + self._featurizer_class = SineMatrix + self.featurizer_class = featurizer_class + + self._preset = None + self.preset = preset + + self._kwargs = None + self.kwargs = kwargs + + self._max_size = 100 + self.max_size = max_size + + self._species_list = ["Fe", "Ni", "Pt", "Pd", "Cu", "C", "N", "O", "H"] + self.species_list = species_list + + # overrides max_size and species_list if given + self._design_space_structures = None + self.design_space_structures = design_space_structures + + def __eq__(self, other: object) -> bool: + if isinstance(other, Featurizer): + for attr in [ + "featurizer_class", + "species_list", + "max_size", + "preset", + "kwargs", + ]: + if getattr(self, attr) != getattr(other, attr): + return False + return True + return False + + def __repr__(self) -> str: + pt = PrettyTable() + pt.field_names = ["", "Featurizer"] + class_name = ( + self.featurizer_class.__module__ + "." + self.featurizer_class.__name__ + ) + pt.add_row(["class", class_name]) + pt.add_row(["kwargs", self.kwargs]) + pt.add_row(["species list", self.species_list]) + pt.add_row(["maximum structure size", self.max_size]) + pt.add_row(["preset", self.preset]) + pt.add_row( + [ + "design space structures provided?", + self.design_space_structures is not None, + ] + ) + pt.max_width = 70 + return str(pt) + + def copy(self): + """ + Returns a copy of the featurizer + """ + ds_structs_copy = ( + [struct.copy() for struct in self.design_space_structures] + if self.design_space_structures + else None + ) + feat = self.__class__( + featurizer_class=self.featurizer_class, + design_space_structures=ds_structs_copy, + species_list=self.species_list.copy(), + max_size=self.max_size, + kwargs=copy.deepcopy(self.kwargs) if self.kwargs else None, + ) + return feat + + @property + def featurizer_class(self): + return self._featurizer_class + + @featurizer_class.setter + def featurizer_class(self, featurizer_class): + if ( + featurizer_class in SUPPORTED_MATMINER_CLASSES + or featurizer_class in SUPPORTED_DSCRIBE_CLASSES + ): + self._featurizer_class = featurizer_class + self._preset = None + self._kwargs = None + else: + msg = f"Featurization class {featurizer_class} is not currently supported." + raise FeaturizerError(msg) + + @property + def preset(self): + return self._preset + + @preset.setter + def preset(self, preset): + if self.featurizer_class in [CrystalNNFingerprint, ElementProperty]: + self._preset = preset + elif preset is None: + self._preset = preset + else: + msg = f"Presets are not supported for {self.featurizer_class.__module__}" + raise FeaturizerError(msg) + + @property + def kwargs(self): + return self._kwargs + + @kwargs.setter + def kwargs(self, kwargs): + if kwargs is not None: + self._kwargs = kwargs.copy() + + @property + def design_space_structures(self): + return self._design_space_structures + + @design_space_structures.setter + def design_space_structures(self, design_space_structures: List[Atoms]): + if design_space_structures is not None: + self._design_space_structures = [ + struct.copy() for struct in design_space_structures + ] + # analyze new design space + ds_structs = design_space_structures + _species_list = [] + for s in ds_structs: + # get all unique species + found_species = np.unique(s.get_chemical_symbols()).tolist() + new_species = [ + spec for spec in found_species if spec not in _species_list + ] + _species_list.extend(new_species) + # sort species list + sorted_species_list = sorted( + _species_list, key=lambda el: Element(el).mendeleev_no + ) + + self._max_size = max([len(s) for s in ds_structs]) + self._species_list = sorted_species_list + + @property + def max_size(self): + return self._max_size + + @max_size.setter + def max_size(self, max_size): + if max_size is not None: + self._max_size = max_size + + @property + def species_list(self): + return self._species_list + + @species_list.setter + def species_list(self, species_list: List[str]): + if species_list is not None: + _species_list = species_list.copy() + # sort species list by mendeleev number + sorted_species_list = sorted( + _species_list, key=lambda el: Element(el).mendeleev_no + ) + self._species_list = sorted_species_list + + # TODO: "get_featurization_object" -> "get_featurizer" + @property + def featurization_object(self): + return self._get_featurization_object() + + def _get_featurization_object(self): + # instantiate featurizer object + if hasattr(self.featurizer_class, "from_preset") and self.preset is not None: + return self.featurizer_class.from_preset(self.preset) + if self.featurizer_class in [SineMatrix, CoulombMatrix]: + return self.featurizer_class( + n_atoms_max=self.max_size, permutation="none", **self.kwargs or {}, + ) + if self.featurizer_class in [SOAP, ACSF]: + return self.featurizer_class(species=self.species_list, **self.kwargs or {}) + return self.featurizer_class(**self.kwargs or {}) + + def featurize_single(self, structure: Atoms): + """ + Featurize a single structure. Returns a single vector + + Parameters + ---------- + + structure: + ase.Atoms object of structure to be featurized + + Returns + ------- + + representation: + Numpy array of feature vector (not flattened) + """ + feat_class = self.featurizer_class + featurization_object = self.featurization_object + # dscribe classes + if feat_class in [SOAP, ACSF]: + adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist() + return featurization_object.create(structure, positions=adsorbate_indices,) + if feat_class in [SineMatrix, CoulombMatrix]: + return featurization_object.create(structure).reshape(-1,) + + # matminer classes + pym_struct = AseAtomsAdaptor().get_structure(structure) + if feat_class == ElementProperty: + return np.array(featurization_object.featurize(pym_struct.composition)) + representation = np.array([]) + if feat_class in [CrystalNNFingerprint, OPSiteFingerprint]: + adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist() + for idx in adsorbate_indices: + feat = featurization_object.featurize(pym_struct, idx) + representation = np.concatenate((representation, feat)) + return representation + if feat_class == ChemicalSRO: + adsorbate_indices = np.where(structure.get_tags() <= 0)[0].tolist() + formatted_list = [[pym_struct, idx] for idx in adsorbate_indices] + featurization_object.fit(formatted_list) + for idx in adsorbate_indices: + feat = featurization_object.featurize(pym_struct, idx) + representation = np.concatenate((representation, feat)) + return representation + return None + + def featurize_multiple(self, structures: List[Atoms]): + """ + Featurize multiple structures. Returns a matrix where each + row is the flattened feature vector of each system + + Parameters + ---------- + + structures: + List of ase.Atoms structures to be featurized + + Returns + ------- + + X: + Numpy array of shape (number of structures, number of features) + """ + first_vec = self.featurize_single(structures[0]).flatten() + num_features = len(first_vec) + # if adsorbate featurization, assumes only 1 adsorbate in design space + # (otherwise would require padding) + X = np.zeros((len(structures), num_features)) + X[0, :] = first_vec.copy() + for i in range(1, len(structures)): + X[i, :] = self.featurize_single(structures[i]).flatten() + return X diff --git a/src/autocat/learning/predictors.py b/src/autocat/learning/predictors.py new file mode 100644 index 00000000..7978a849 --- /dev/null +++ b/src/autocat/learning/predictors.py @@ -0,0 +1,309 @@ +import copy +import numpy as np + +from typing import List +from typing import Dict +from typing import Union +from prettytable import PrettyTable + +from ase import Atoms + +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import mean_squared_error + +from autocat.learning.featurizers import Featurizer +from autocat.learning.featurizers import ( + SUPPORTED_DSCRIBE_CLASSES, + SUPPORTED_MATMINER_CLASSES, +) + + +class PredictorError(Exception): + pass + + +class Predictor: + def __init__( + self, + model_class=None, + model_kwargs: Dict = None, # TODO: kwargs -> options? + featurizer_class=None, # black + featurization_kwargs: Dict = None, + ): + """ + Constructor. + + Parameters + ---------- + + model_class: + Class of regression model to be used for training and prediction. + If this is changed after initialization, all previously set + model_kwargs will be removed. + N.B. must have fit and predict methods + + structure_featurizer: + String giving featurizer to be used for full structure which will be + fed into `autocat.learning.featurizers.full_structure_featurization` + + adsorbate_featurizer: + String giving featurizer to be used for full structure which will be + fed into `autocat.learning.featurizers.adsorbate_structure_featurization + + maximum_structure_size: + Size of the largest structure to be supported by the representation. + Default: number of atoms in largest structure within `structures` + + maximum_adsorbate_size: + Integer giving the maximum adsorbate size to be encountered + (ie. this determines if zero-padding should be applied and how much). + If the provided value is less than the adsorbate size given by + `adsorbate_indices`, representation will remain size of the adsorbate. + Default: size of adsorbate provided + + species_list: + List of species that could be encountered for featurization. + Default: Parses over all `structures` and collects all encountered species + + refine_structures: + Bool indicating whether the structures should be refined to include + only the adsorbate and surface layer. Requires tags for all structures + to have adsorbate atoms and surface atoms as 0 and 1, respectively + + """ + self.is_fit = False + + self._model_class = GaussianProcessRegressor + self.model_class = model_class + + self._model_kwargs = None + self.model_kwargs = model_kwargs + + self.regressor = self.model_class( + **self.model_kwargs if self.model_kwargs else {} + ) + + self._featurizer_class = None + self._featurization_kwargs = None + + self.featurizer_class = featurizer_class + + self.featurization_kwargs = featurization_kwargs + + self.featurizer = Featurizer( + featurizer_class=self.featurizer_class, + **self.featurization_kwargs if self.featurization_kwargs else {}, + ) + + def __repr__(self) -> str: + pt = PrettyTable() + pt.field_names = ["", "Predictor"] + model_class_name = self.model_class.__module__ + "." + self.model_class.__name__ + pt.add_row(["class", model_class_name]) + pt.add_row(["kwargs", self.model_kwargs]) + pt.add_row(["is fit?", self.is_fit]) + feat_str = str(self.featurizer) + return str(pt) + "\n" + feat_str + + @property + def model_class(self): + return self._model_class + + @model_class.setter + def model_class(self, model_class): + if model_class is not None: + self._model_class = model_class + # removes any model kwargs from previous model + # if changed + self._model_kwargs = None + if self.is_fit: + self.is_fit = False + self.X_ = None + self.y_ = None + # generates new regressor with default settings + self.regressor = self._model_class() + + @property + def model_kwargs(self): + return self._model_kwargs + + @model_kwargs.setter + def model_kwargs(self, model_kwargs): + if model_kwargs is not None: + self._model_kwargs = copy.deepcopy(model_kwargs) + if self.is_fit: + self.is_fit = False + self.X_ = None + self.y_ = None + self.regressor = self.model_class(**model_kwargs) + + @property + def featurizer_class(self): + return self._featurizer_class + + @featurizer_class.setter + def featurizer_class(self, featurizer_class): + if featurizer_class is not None: + assert ( + featurizer_class in SUPPORTED_DSCRIBE_CLASSES + or featurizer_class in SUPPORTED_MATMINER_CLASSES + ) + self._featurizer_class = featurizer_class + self._featurization_kwargs = None + self.featurizer = Featurizer(featurizer_class,) + if self.is_fit: + self.is_fit = False + self.X_ = None + self.y_ = None + self.regressor = self.model_class( + **self.model_kwargs if self.model_kwargs else {} + ) + + @property + def featurization_kwargs(self): + return self._featurization_kwargs + + @featurization_kwargs.setter + def featurization_kwargs(self, featurization_kwargs): + if featurization_kwargs is not None: + assert isinstance(featurization_kwargs, dict) + self._featurization_kwargs = featurization_kwargs.copy() + self.featurizer = Featurizer(self.featurizer_class, **featurization_kwargs) + if self.is_fit: + self.is_fit = False + self.X_ = None + self.y_ = None + self.regressor = self.model_class( + **self.model_kwargs if self.model_kwargs else {} + ) + + def copy(self): + """ + Returns a copy + """ + acp = self.__class__( + model_class=self.model_class, featurizer_class=self.featurizer_class, + ) + acp.regressor = copy.deepcopy(self.regressor) + acp.is_fit = self.is_fit + acp.featurization_kwargs = copy.deepcopy(self.featurization_kwargs) + acp.model_kwargs = copy.deepcopy(self.model_kwargs) + + return acp + + def fit( + self, training_structures: List[Union[Atoms, str]], y: np.ndarray, + ): + """ + Given a list of structures and labels will featurize + and train a regression model + + Parameters + ---------- + + training_structures: + List of structures to be trained upon + + y: + Numpy array of labels corresponding to training structures + of shape (# of training structures, # of targets) + + Returns + ------- + + trained_model: + Trained `sklearn` model object + """ + self.X_ = self.featurizer.featurize_multiple(training_structures) + self.y_ = y + self.regressor.fit(self.X_, self.y_) + self.is_fit = True + + def predict( + self, testing_structures: List[Atoms], + ): + """ + From a trained model, will predict on given structures + + Parameters + ---------- + + testing_structures: + List of Atoms objects to make predictions on + + Returns + ------- + + predicted_labels: + List of predicted labels for each input structure + + unc: + List of uncertainties for each prediction if available. + Otherwise returns `None` + + """ + assert self.is_fit + featurized_input = self.featurizer.featurize_multiple(testing_structures) + try: + predicted_labels, unc = self.regressor.predict( + featurized_input, return_std=True + ) + except TypeError: + predicted_labels = self.regressor.predict(featurized_input,) + unc = None + + return predicted_labels, unc + + # TODO: "score" -> "get_scores"? + def score( + self, + structures: List[Atoms], + labels: np.ndarray, + metric: str = "mae", + return_predictions: bool = False, + **kwargs, + ): + """ + Returns a prediction score given the actual corrections. + + Parameters + ---------- + + structures: + List of Atoms objects of structures to be tested on + + labels: + Labels for the testing structures + + metric: + How the performance metric should be calculated + Options: + - mae + - mse + + return_predictions: + Bool indicating whether the predictions and uncertainties should + be returned in addition to the score + + Returns + ------- + + score: + Float of calculated test score on the given data + """ + assert self.is_fit + + pred_label, unc = self.predict(structures) + + score_func = {"mae": mean_absolute_error, "mse": mean_squared_error} + + if metric not in score_func: + msg = f"Metric: {metric} is not supported" + raise PredictorError(msg) + + score = score_func[metric](labels, pred_label, **kwargs) + + if return_predictions: + return score, pred_label, unc + return score diff --git a/src/autocat/learning/sequential.py b/src/autocat/learning/sequential.py new file mode 100644 index 00000000..001bde63 --- /dev/null +++ b/src/autocat/learning/sequential.py @@ -0,0 +1,1142 @@ +import copy +import os +import json +import importlib +from typing import List +from typing import Dict +from typing import Union + +import numpy as np +from joblib import Parallel, delayed +from prettytable import PrettyTable +from ase import Atoms +from ase.io.jsonio import encode as atoms_encoder +from ase.io.jsonio import decode as atoms_decoder +from scipy import stats +from sklearn.gaussian_process import GaussianProcessRegressor +from dscribe.descriptors import SineMatrix + +from autocat.learning.predictors import Predictor +from autocat.data.hhi import HHI +from autocat.data.segregation_energies import SEGREGATION_ENERGIES + + +Array = List[float] + + +class DesignSpaceError(Exception): + pass + + +class DesignSpace: + def __init__( + self, design_space_structures: List[Atoms], design_space_labels: Array, + ): + """ + Constructor. + + Parameters + ---------- + + design_space_structures: + List of all structures within the design space + + design_space_labels: + Labels corresponding to all structures within the design space. + If label not yet known, set to np.nan + + """ + if len(design_space_structures) != design_space_labels.shape[0]: + msg = f"Number of structures ({len(design_space_structures)})\ + and labels ({design_space_labels.shape[0]}) must match" + raise DesignSpaceError(msg) + + self._design_space_structures = [ + struct.copy() for struct in design_space_structures + ] + self._design_space_labels = design_space_labels.copy() + + def __repr__(self) -> str: + pt = PrettyTable() + pt.field_names = ["", "DesignSpace"] + pt.add_row(["total # of systems", len(self)]) + num_unknown = sum(np.isnan(self.design_space_labels)) + pt.add_row(["# of unlabelled systems", num_unknown]) + pt.add_row(["unique species present", self.species_list]) + max_label = max(self.design_space_labels) + pt.add_row(["maximum label", max_label]) + min_label = min(self.design_space_labels) + pt.add_row(["minimum label", min_label]) + pt.max_width = 70 + return str(pt) + + def __len__(self): + return len(self.design_space_structures) + + # TODO: non-dunder method for deleting systems + def __delitem__(self, i): + """ + Deletes systems from the design space. If mask provided, deletes wherever True + """ + if isinstance(i, list): + i = np.array(i) + elif isinstance(i, int): + i = [i] + mask = np.ones(len(self), dtype=bool) + mask[i] = 0 + self._design_space_labels = self.design_space_labels[mask] + structs = self.design_space_structures + masked_structs = [structs[j] for j in range(len(self)) if mask[j]] + self._design_space_structures = masked_structs + + def __eq__(self, other: object) -> bool: + if isinstance(other, DesignSpace): + # check that they are the same length + if len(self) == len(other): + # check all their structures are equal + self_structs = self.design_space_structures + o_structs = other.design_space_structures + if not self_structs == o_structs: + return False + + # check their labels are equal + self_labels = self.design_space_labels + o_labels = other.design_space_labels + return np.array_equal(self_labels, o_labels, equal_nan=True) + return False + + def copy(self): + """ + Returns a copy of the design space + """ + acds = self.__class__( + design_space_structures=self.design_space_structures, + design_space_labels=self.design_space_labels, + ) + return acds + + @property + def design_space_structures(self): + return self._design_space_structures + + @design_space_structures.setter + def design_space_structures(self, design_space_structures): + msg = "Please use `update` method to update the design space." + raise DesignSpaceError(msg) + + @property + def design_space_labels(self): + return self._design_space_labels + + @design_space_labels.setter + def design_space_labels(self, design_space_labels): + msg = "Please use `update` method to update the design space." + raise DesignSpaceError(msg) + + @property + def species_list(self): + species_list = [] + for s in self.design_space_structures: + # get all unique species + found_species = np.unique(s.get_chemical_symbols()).tolist() + new_species = [spec for spec in found_species if spec not in species_list] + species_list.extend(new_species) + return species_list + + def update(self, structures: List[Atoms], labels: Array): + """ + Updates design space given structures and corresponding labels. + If structure already in design space, the label is updated. + + Parameters + ---------- + + structures: + List of Atoms objects structures to be added + + labels: + Corresponding labels to `structures` + """ + if (structures is not None) and (labels is not None): + assert len(structures) == len(labels) + assert all(isinstance(struct, Atoms) for struct in structures) + for i, struct in enumerate(structures): + # if structure already in design space, update label + if struct in self.design_space_structures: + idx = self.design_space_structures.index(struct) + self._design_space_labels[idx] = labels[i] + # otherwise extend design space + else: + self._design_space_structures.append(struct) + self._design_space_labels = np.append( + self.design_space_labels, labels[i] + ) + + def to_jsonified_list(self) -> List: + """ + Returns a jsonified list representation + """ + collected_jsons = [] + for struct in self.design_space_structures: + collected_jsons.append(atoms_encoder(struct)) + # append labels to list of collected jsons + jsonified_labels = [float(x) for x in self.design_space_labels] + collected_jsons.append(jsonified_labels) + return collected_jsons + + def write_json_to_disk( + self, + json_name: str = None, + write_location: str = ".", + write_to_disk: bool = True, + ): + """ + Writes DesignSpace to disk as a json + """ + collected_jsons = self.to_jsonified_list() + # set default json name if needed + if json_name is None: + json_name = "acds.json" + # write out single json + if write_to_disk: + json_path = os.path.join(write_location, json_name) + with open(json_path, "w") as f: + json.dump(collected_jsons, f) + + @staticmethod + def from_json(json_name: str): + with open(json_name, "r") as f: + all_data = json.load(f) + structures = [] + for i in range(len(all_data) - 1): + atoms = atoms_decoder(all_data[i]) + structures.append(atoms) + labels = np.array(all_data[-1]) + return DesignSpace( + design_space_structures=structures, design_space_labels=labels, + ) + + +class SequentialLearnerError(Exception): + pass + + +# TODO: "kwargs" -> "options"? +class SequentialLearner: + def __init__( + self, + design_space: DesignSpace, + predictor_kwargs: Dict[str, Union[str, float]] = None, + candidate_selection_kwargs: Dict[str, Union[str, float]] = None, + sl_kwargs: Dict[str, int] = None, + ): + # TODO: move predefined attributes (train_idx, candidate_idxs) to a + # different container (not kwargs) + + self._design_space = None + self.design_space = design_space.copy() + + # predictor arguments to use throughout the SL process + if predictor_kwargs is None: + predictor_kwargs = { + "model_class": GaussianProcessRegressor, + "featurizer_class": SineMatrix, + } + if "model_class" not in predictor_kwargs: + predictor_kwargs["model_class"] = GaussianProcessRegressor + if "featurizer_class" not in predictor_kwargs: + predictor_kwargs["featurizer_class"] = SineMatrix + if "featurization_kwargs" not in predictor_kwargs: + predictor_kwargs["featurization_kwargs"] = {} + ds_structs_kwargs = { + "design_space_structures": design_space.design_space_structures + } + predictor_kwargs["featurization_kwargs"].update(ds_structs_kwargs) + self._predictor_kwargs = None + self.predictor_kwargs = predictor_kwargs + self._predictor = Predictor(**predictor_kwargs) + + # acquisition function arguments to use for candidate selection + if not candidate_selection_kwargs: + candidate_selection_kwargs = {"aq": "Random"} + self._candidate_selection_kwargs = None + self.candidate_selection_kwargs = candidate_selection_kwargs + + # other miscellaneous kw arguments + self.sl_kwargs = sl_kwargs if sl_kwargs else {} + + # variables that need to be propagated through the SL process + if "iteration_count" not in self.sl_kwargs: + self.sl_kwargs.update({"iteration_count": 0}) + if "train_idx" not in self.sl_kwargs: + self.sl_kwargs.update({"train_idx": None}) + if "train_idx_history" not in self.sl_kwargs: + self.sl_kwargs.update({"train_idx_history": None}) + if "predictions" not in self.sl_kwargs: + self.sl_kwargs.update({"predictions": None}) + if "predictions_history" not in self.sl_kwargs: + self.sl_kwargs.update({"predictions_history": None}) + if "uncertainties" not in self.sl_kwargs: + self.sl_kwargs.update({"uncertainties": None}) + if "uncertainties_history" not in self.sl_kwargs: + self.sl_kwargs.update({"uncertainties_history": None}) + if "candidate_indices" not in self.sl_kwargs: + self.sl_kwargs.update({"candidate_indices": None}) + if "candidate_index_history" not in self.sl_kwargs: + self.sl_kwargs.update({"candidate_index_history": None}) + if "acquisition_scores" not in self.sl_kwargs: + self.sl_kwargs.update({"acquisition_scores": None}) + + def __repr__(self) -> str: + pt = PrettyTable() + pt.field_names = ["", "Sequential Learner"] + pt.add_row(["iteration count", self.iteration_count]) + if self.candidate_structures is not None: + cand_formulas = [ + s.get_chemical_formula() for s in self.candidate_structures + ] + else: + cand_formulas = None + pt.add_row(["next candidate system structures", cand_formulas]) + pt.add_row(["next candidate system indices", self.candidate_indices]) + pt.add_row(["acquisition function", self.candidate_selection_kwargs.get("aq")]) + pt.add_row( + [ + "# of candidates to pick", + self.candidate_selection_kwargs.get("num_candidates_to_pick", 1), + ] + ) + pt.add_row( + ["target maximum", self.candidate_selection_kwargs.get("target_max")] + ) + pt.add_row( + ["target minimum", self.candidate_selection_kwargs.get("target_min")] + ) + pt.add_row( + ["include hhi?", self.candidate_selection_kwargs.get("include_hhi", False)] + ) + pt.add_row( + [ + "include segregation energies?", + self.candidate_selection_kwargs.get("include_seg_ener", False), + ] + ) + return str(pt) + "\n" + str(self.design_space) + "\n" + str(self.predictor) + + @property + def design_space(self): + return self._design_space + + @design_space.setter + def design_space(self, design_space): + self._design_space = design_space + + @property + def predictor_kwargs(self): + return self._predictor_kwargs + + @predictor_kwargs.setter + def predictor_kwargs(self, predictor_kwargs): + if predictor_kwargs is None: + predictor_kwargs = { + "model_class": GaussianProcessRegressor, + "featurizer_class": SineMatrix, + } + if "model_class" not in predictor_kwargs: + predictor_kwargs["model_class"] = GaussianProcessRegressor + if "featurizer_class" not in predictor_kwargs: + predictor_kwargs["featurizer_class"] = SineMatrix + if "featurization_kwargs" not in predictor_kwargs: + predictor_kwargs["featurization_kwargs"] = {} + ds_structs_kwargs = { + "design_space_structures": self.design_space.design_space_structures + } + predictor_kwargs["featurization_kwargs"].update(ds_structs_kwargs) + self._predictor_kwargs = copy.deepcopy(predictor_kwargs) + self._predictor = Predictor(**predictor_kwargs) + + @property + def predictor(self): + return self._predictor + + @property + def candidate_selection_kwargs(self): + return self._candidate_selection_kwargs + + @candidate_selection_kwargs.setter + def candidate_selection_kwargs(self, candidate_selection_kwargs): + if not candidate_selection_kwargs: + candidate_selection_kwargs = {} + self._candidate_selection_kwargs = candidate_selection_kwargs.copy() + + @property + def iteration_count(self): + return self.sl_kwargs.get("iteration_count", 0) + + @property + def train_idx(self): + return self.sl_kwargs.get("train_idx") + + @property + def train_idx_history(self): + return self.sl_kwargs.get("train_idx_history", None) + + @property + def predictions(self): + return self.sl_kwargs.get("predictions") + + @property + def uncertainties(self): + return self.sl_kwargs.get("uncertainties") + + @property + def candidate_indices(self): + return self.sl_kwargs.get("candidate_indices") + + @property + def acquisition_scores(self): + return self.sl_kwargs.get("acquisition_scores", None) + + @property + def candidate_structures(self): + idxs = self.candidate_indices + if idxs is not None: + return [self.design_space.design_space_structures[i] for i in idxs] + + @property + def candidate_index_history(self): + return self.sl_kwargs.get("candidate_index_history", None) + + @property + def predictions_history(self): + return self.sl_kwargs.get("predictions_history", None) + + @property + def uncertainties_history(self): + return self.sl_kwargs.get("uncertainties_history", None) + + def copy(self): + """ + Returns a copy + """ + acsl = self.__class__(design_space=self.design_space,) + acsl.predictor_kwargs = copy.deepcopy(self.predictor_kwargs) + acsl.sl_kwargs = copy.deepcopy(self.sl_kwargs) + return acsl + + def iterate(self): + """Runs the next iteration of sequential learning. + + This process consists of: + - retraining the predictor + - predicting candidate properties and calculating candidate scores (if + fully explored returns None) + - selecting the next batch of candidates for objective evaluation (if + fully explored returns None) + """ + + dstructs = self.design_space.design_space_structures + dlabels = self.design_space.design_space_labels + + mask_nans = ~np.isnan(dlabels) + masked_structs = [struct for i, struct in enumerate(dstructs) if mask_nans[i]] + masked_labels = dlabels[np.where(mask_nans)] + + self.predictor.fit(masked_structs, masked_labels) + + train_idx = np.zeros(len(dlabels), dtype=bool) + train_idx[np.where(mask_nans)] = 1 + self.sl_kwargs.update({"train_idx": train_idx}) + train_idx_hist = self.sl_kwargs.get("train_idx_history") + if train_idx_hist is None: + train_idx_hist = [] + train_idx_hist.append(train_idx) + self.sl_kwargs.update({"train_idx_history": train_idx_hist}) + + preds, unc = self.predictor.predict(dstructs) + + # update predictions and store in history + self.sl_kwargs.update({"predictions": preds}) + pred_hist = self.sl_kwargs.get("predictions_history") + if pred_hist is None: + pred_hist = [] + pred_hist.append(preds) + self.sl_kwargs.update({"predictions_history": pred_hist}) + + # update uncertainties and store in history + self.sl_kwargs.update({"uncertainties": unc}) + unc_hist = self.sl_kwargs.get("uncertainties_history") + if unc_hist is None: + unc_hist = [] + unc_hist.append(unc) + self.sl_kwargs.update({"uncertainties_history": unc_hist}) + + # make sure haven't fully searched design space + if any([np.isnan(label) for label in dlabels]): + candidate_idx, _, aq_scores = choose_next_candidate( + dstructs, + dlabels, + train_idx, + preds, + unc, + **self.candidate_selection_kwargs, + ) + # if fully searched, no more candidate structures + else: + candidate_idx = None + aq_scores = None + self.sl_kwargs.update({"candidate_indices": candidate_idx}) + self.sl_kwargs.update({"acquisition_scores": aq_scores}) + + # update the candidate index history if new candidate + if candidate_idx is not None: + cand_idx_hist = self.sl_kwargs.get("candidate_index_history") + if cand_idx_hist is None: + cand_idx_hist = [] + cand_idx_hist.append(candidate_idx) + self.sl_kwargs.update({"candidate_index_history": cand_idx_hist}) + + # update the SL iteration count + itc = self.sl_kwargs.get("iteration_count", 0) + self.sl_kwargs.update({"iteration_count": itc + 1}) + + def to_jsonified_list(self) -> List: + """ + Returns a jsonified list representation + """ + jsonified_list = self.design_space.to_jsonified_list() + # append kwargs for predictor + jsonified_pred_kwargs = {} + for k in self.predictor_kwargs: + if k in ["model_class", "featurizer_class"]: + mod_string = self.predictor_kwargs[k].__module__ + class_string = self.predictor_kwargs[k].__name__ + jsonified_pred_kwargs[k] = [mod_string, class_string] + elif k == "featurization_kwargs": + jsonified_pred_kwargs[k] = copy.deepcopy(self.predictor_kwargs[k]) + # assumes design space will always match DesignSpace + del jsonified_pred_kwargs[k]["design_space_structures"] + else: + jsonified_pred_kwargs[k] = self.predictor_kwargs[k] + jsonified_list.append(jsonified_pred_kwargs) + # append kwargs for candidate selection + jsonified_list.append(self.candidate_selection_kwargs) + # append the acsl kwargs + jsonified_sl_kwargs = {} + for k in self.sl_kwargs: + if k != "iteration_count" and self.sl_kwargs[k] is not None: + jsonified_sl_kwargs[k] = [arr.tolist() for arr in self.sl_kwargs[k]] + elif k == "iteration_count": + jsonified_sl_kwargs["iteration_count"] = self.sl_kwargs[ + "iteration_count" + ] + elif self.sl_kwargs[k] is None: + jsonified_sl_kwargs[k] = None + jsonified_list.append(jsonified_sl_kwargs) + return jsonified_list + + def write_json_to_disk(self, write_location: str = ".", json_name: str = None): + """ + Writes `SequentialLearner` to disk as a json + """ + jsonified_list = self.to_jsonified_list() + + if json_name is None: + json_name = "acsl.json" + + json_path = os.path.join(write_location, json_name) + + with open(json_path, "w") as f: + json.dump(jsonified_list, f) + + @staticmethod + def from_json(json_name: str): + with open(json_name, "r") as f: + all_data = json.load(f) + structures = [] + for i in range(len(all_data) - 4): + atoms = atoms_decoder(all_data[i]) + structures.append(atoms) + labels = np.array(all_data[-4]) + acds = DesignSpace( + design_space_structures=structures, design_space_labels=labels, + ) + predictor_kwargs = all_data[-3] + for k in predictor_kwargs: + if k in ["model_class", "featurizer_class"]: + mod = importlib.import_module(predictor_kwargs[k][0]) + predictor_kwargs[k] = getattr(mod, predictor_kwargs[k][1]) + candidate_selection_kwargs = all_data[-2] + raw_sl_kwargs = all_data[-1] + sl_kwargs = {} + for k in raw_sl_kwargs: + if raw_sl_kwargs[k] is not None: + if k in [ + "predictions", + "uncertainties", + "acquisition_scores", + "candidate_indices", + ]: + sl_kwargs[k] = np.array(raw_sl_kwargs[k]) + elif k in [ + "predictions_history", + "uncertainties_history", + "candidate_index_history", + ]: + sl_kwargs[k] = [np.array(i) for i in raw_sl_kwargs[k]] + elif k == "iteration_count": + sl_kwargs[k] = raw_sl_kwargs[k] + elif k == "train_idx": + sl_kwargs[k] = np.array(raw_sl_kwargs[k], dtype=bool) + elif k == "train_idx_history": + sl_kwargs[k] = [np.array(i, dtype=bool) for i in raw_sl_kwargs[k]] + else: + sl_kwargs[k] = None + + return SequentialLearner( + design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + sl_kwargs=sl_kwargs, + ) + + +def multiple_simulated_sequential_learning_runs( + full_design_space: DesignSpace, + number_of_runs: int = 5, + number_parallel_jobs: int = None, + predictor_kwargs: Dict[str, Union[str, float]] = None, + candidate_selection_kwargs: Dict[str, Union[str, float]] = None, + init_training_size: int = 10, + number_of_sl_loops: int = None, + write_to_disk: bool = False, + write_location: str = ".", + json_name_prefix: str = None, +) -> List[SequentialLearner]: + """ + Conducts multiple simulated sequential learning runs + + Parameters + ---------- + + full_design_space: + Fully labelled DesignSpace to simulate + being searched over + + predictor_kwargs: + Kwargs to be used in setting up the predictor. + This is where model class, model hyperparameters, etc. + are specified. + + candidate_selection_kwargs: + Kwargs that specify that settings for candidate selection. + This is where acquisition function, targets, etc. are + specified. + + init_training_size: + Size of the initial training set to be selected from + the full space. + Default: 10 + + number_of_sl_loops: + Integer specifying the number of sequential learning loops to be conducted. + This value cannot be greater than + `(DESIGN_SPACE_SIZE - init_training_size)/batch_size_to_add` + Default: maximum number of sl loops calculated above + + number_of_runs: + Integer of number of runs to be done + Default: 5 + + number_parallel_jobs: + Integer giving the number of cores to be paralellized across + using `joblib` + Default: None (ie. will run in serial) + + write_to_disk: + Boolean specifying whether runs history should be written to disk as jsons. + Default: False + + write_location: + String with the location where runs history jsons should be written to disk. + Default: current directory + + json_name_prefix: + Prefix used when writing out each simulated run as a json + The naming convention is `{json_name_prefix}_{run #}.json` + Default: acsl_run + + Returns + ------- + + runs_history: + List of SequentialLearner objects for each simulated run + """ + + if number_parallel_jobs is not None: + runs_history = Parallel(n_jobs=number_parallel_jobs)( + delayed(simulated_sequential_learning)( + full_design_space=full_design_space, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_sl_loops=number_of_sl_loops, + init_training_size=init_training_size, + ) + for i in range(number_of_runs) + ) + + else: + runs_history = [ + simulated_sequential_learning( + full_design_space=full_design_space, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_sl_loops=number_of_sl_loops, + init_training_size=init_training_size, + ) + for i in range(number_of_runs) + ] + + # TODO: separate dictionary representation and writing to disk + if write_to_disk: + if not os.path.isdir(write_location): + os.makedirs(write_location) + if json_name_prefix is None: + json_name_prefix = "acsl_run" + for i, run in enumerate(runs_history): + name = json_name_prefix + "_" + str(i) + ".json" + run.write_json_to_disk(write_location=write_location, json_name=name) + print(f"SL histories written to {write_location}") + + return runs_history + + +def simulated_sequential_learning( + full_design_space: DesignSpace, + predictor_kwargs: Dict[str, Union[str, float]] = None, + candidate_selection_kwargs: Dict[str, Union[str, float]] = None, + init_training_size: int = 10, + number_of_sl_loops: int = None, + write_to_disk: bool = False, + write_location: str = ".", + json_name: str = None, +) -> SequentialLearner: + """ + Conducts a simulated sequential learning loop for a + fully labelled design space to explore. + + Parameters + ---------- + + full_design_space: + Fully labelled DesignSpace to simulate + being searched over + + predictor_kwargs: + Kwargs to be used in setting up the predictor. + This is where model class, model hyperparameters, etc. + are specified. + + candidate_selection_kwargs: + Kwargs that specify that settings for candidate selection. + This is where acquisition function, targets, etc. are + specified. + + init_training_size: + Size of the initial training set to be selected from + the full space. + Default: 10 + + number_of_sl_loops: + Integer specifying the number of sequential learning loops to be conducted. + This value cannot be greater than + `(DESIGN_SPACE_SIZE - init_training_size)/batch_size_to_add` + Default: maximum number of sl loops calculated above + + write_to_disk: + Boolean specifying whether the resulting sequential learner should be + written to disk as a json. + Defaults to False. + + write_location: + String with the location where the resulting sequential learner + should be written to disk. + Defaults to current directory. + + Returns + ------- + + sl: + Sequential Learner after having been iterated as specified + by the input settings. Contains candidate, prediction, + and uncertainty histories for further analysis as desired. + """ + + ds_size = len(full_design_space) + + # check fully explored + if True in np.isnan(full_design_space.design_space_labels): + missing_label_idx = np.where(np.isnan(full_design_space.design_space_labels))[0] + msg = ( + f"Design space must be fully explored." + f" Missing labels at indices: {missing_label_idx}" + ) + raise SequentialLearnerError(msg) + + # check that specified initial training size makes sense + if init_training_size > ds_size: + msg = f"Initial training size ({init_training_size})\ + larger than design space ({ds_size})" + raise SequentialLearnerError(msg) + + batch_size_to_add = candidate_selection_kwargs.get("num_candidates_to_pick", 1) + max_num_sl_loops = int(np.ceil((ds_size - init_training_size) / batch_size_to_add)) + + if number_of_sl_loops is None: + number_of_sl_loops = max_num_sl_loops + + # check that specified number of loops is feasible + if number_of_sl_loops > max_num_sl_loops: + msg = ( + f"Number of SL loops ({number_of_sl_loops}) cannot be greater than" + f" ({max_num_sl_loops})" + ) + raise SequentialLearnerError(msg) + + # generate initial training set + init_idx = np.zeros(ds_size, dtype=bool) + init_idx[np.random.choice(ds_size, init_training_size, replace=False)] = 1 + + init_structs = [ + full_design_space.design_space_structures[idx] + for idx, b in enumerate(init_idx) + if b + ] + init_labels = full_design_space.design_space_labels.copy() + init_labels = init_labels[np.where(init_idx)] + + # set up learner that is used for iteration + dummy_labels = np.empty(len(full_design_space)) + dummy_labels[:] = np.nan + ds = DesignSpace(full_design_space.design_space_structures, dummy_labels) + ds.update(init_structs, init_labels) + sl = SequentialLearner( + design_space=ds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + ) + # first iteration on initial dataset + sl.iterate() + + # start simulated sequential learning loop + for i in range(number_of_sl_loops): + print(f"Sequential Learning Iteration #{i+1}") + if sl.candidate_indices is not None: + next_structs = sl.candidate_structures + next_labels = full_design_space.design_space_labels.take( + sl.candidate_indices + ) + sl.design_space.update(next_structs, next_labels) + sl.iterate() + + if write_to_disk: + sl.write_json_to_disk(write_location=write_location, json_name=json_name) + print(f"SL dictionary written to {write_location}") + + return sl + + +def choose_next_candidate( + structures: List[Atoms] = None, + labels: Array = None, + train_idx: Array = None, + pred: Array = None, + unc: Array = None, + aq: str = "MLI", + num_candidates_to_pick: int = None, + target_min: float = None, + target_max: float = None, + include_hhi: bool = False, + hhi_type: str = "production", + include_seg_ener: bool = False, +): + """ + Chooses the next candidate(s) from a given acquisition function + + Parameters + ---------- + + structures: + List of `Atoms` objects to be used for HHI weighting if desired + + labels: + Array of the labels for the data + + train_idx: + Indices of all data entries already in the training set + Default: consider entire training set + + pred: + Predictions for all structures in the dataset + + unc: + Uncertainties for all structures in the dataset + + aq: + Acquisition function to be used to select the next candidates + Options + - MLI: maximum likelihood of improvement (default) + - Random + - MU: maximum uncertainty + + num_candidates_to_pick: + Number of candidates to choose from the dataset + + target_min: + Minimum target value to optimize for + + target_max: + Maximum target value to optimize for + + include_hhi: + Whether HHI scores should be used to weight aq scores + + hhi_type: + Type of HHI index to be used for weighting + Options + - production (default) + - reserves + + include_seg_ener: + Whether segregation energies should be used to weight aq scores + + Returns + ------- + + parent_idx: + Index/indices of the selected candidates + + max_scores: + Maximum scores (corresponding to the selected candidates for given `aq`) + + aq_scores: + Calculated scores based on the selected `aq` for the entire training set + """ + hhi_scores = None + if include_hhi: + if structures is None: + msg = "Structures must be provided to include HHI scores" + raise SequentialLearnerError(msg) + hhi_scores = calculate_hhi_scores(structures, hhi_type) + + segreg_energy_scores = None + if include_seg_ener: + if structures is None: + msg = "Structures must be provided to include segregation energy scores" + raise SequentialLearnerError(msg) + segreg_energy_scores = calculate_segregation_energy_scores(structures) + + if aq == "Random": + if labels is None: + msg = "For aq = 'Random', the labels must be supplied" + raise SequentialLearnerError(msg) + + if train_idx is None: + train_idx = np.zeros(len(labels), dtype=bool) + + if hhi_scores is None: + hhi_scores = np.ones(len(train_idx)) + + if segreg_energy_scores is None: + segreg_energy_scores = np.ones(len(train_idx)) + + aq_scores = ( + np.random.choice(len(labels), size=len(labels), replace=False) + * hhi_scores + * segreg_energy_scores + ) + + elif aq == "MU": + if unc is None: + msg = "For aq = 'MU', the uncertainties must be supplied" + raise SequentialLearnerError(msg) + + if train_idx is None: + train_idx = np.zeros(len(unc), dtype=bool) + + if hhi_scores is None: + hhi_scores = np.ones(len(train_idx)) + + if segreg_energy_scores is None: + segreg_energy_scores = np.ones(len(train_idx)) + + aq_scores = unc.copy() * hhi_scores * segreg_energy_scores + + elif aq == "MLI": + if unc is None or pred is None: + msg = "For aq = 'MLI', both uncertainties and predictions must be supplied" + raise SequentialLearnerError(msg) + + if train_idx is None: + train_idx = np.zeros(len(unc), dtype=bool) + + if hhi_scores is None: + hhi_scores = np.ones(len(train_idx)) + + if segreg_energy_scores is None: + segreg_energy_scores = np.ones(len(train_idx)) + + aq_scores = ( + np.array( + [ + get_overlap_score(mean, std, x2=target_max, x1=target_min) + for mean, std in zip(pred, unc) + ] + ) + * hhi_scores + * segreg_energy_scores + ) + + else: + msg = f"Acquisition function {aq} is not supported" + raise NotImplementedError(msg) + + if num_candidates_to_pick is None: + next_idx = np.array([np.argmax(aq_scores[~train_idx])]) + max_scores = [np.max(aq_scores[~train_idx])] + + else: + next_idx = np.argsort(aq_scores[~train_idx])[-num_candidates_to_pick:] + sorted_array = aq_scores[~train_idx][next_idx] + max_scores = list(sorted_array[-num_candidates_to_pick:]) + parent_idx = np.arange(aq_scores.shape[0])[~train_idx][next_idx] + + return parent_idx, max_scores, aq_scores + + +def get_overlap_score(mean: float, std: float, x2: float = None, x1: float = None): + """Calculate overlap score given targets x2 (max) and x1 (min)""" + if x1 is None and x2 is None: + msg = "Please specify at least either a minimum or maximum target for MLI" + raise SequentialLearnerError(msg) + + if x1 is None: + x1 = -np.inf + + if x2 is None: + x2 = np.inf + + norm_dist = stats.norm(loc=mean, scale=std) + return norm_dist.cdf(x2) - norm_dist.cdf(x1) + + +def calculate_hhi_scores(structures: List[Atoms], hhi_type: str = "production"): + """ + Calculates HHI scores for structures weighted by their composition. + The scores are normalized and inverted such that these should + be maximized in the interest of finding a low cost system + + Parameters + ---------- + + structures: + List of Atoms objects for which to calculate the scores + + hhi_type: + Type of HHI index to be used for the score + Options + - production (default) + - reserves + + Returns + ------- + + hhi_scores: + Scores corresponding to each of the provided structures + + """ + if structures is None: + msg = "To include HHI, the structures must be provided" + raise SequentialLearnerError(msg) + + raw_hhi_data = HHI + max_hhi = np.max([raw_hhi_data[hhi_type][r] for r in raw_hhi_data[hhi_type]]) + min_hhi = np.min([raw_hhi_data[hhi_type][r] for r in raw_hhi_data[hhi_type]]) + # normalize and invert (so that this score is to be maximized) + norm_hhi_data = { + el: 1.0 - (raw_hhi_data[hhi_type][el] - min_hhi) / (max_hhi - min_hhi) + for el in raw_hhi_data[hhi_type] + } + + hhi_scores = np.zeros(len(structures)) + for idx, struct in enumerate(structures): + hhi = 0 + el_counts = struct.symbols.formula.count() + tot_size = len(struct) + # weight calculated hhi score by composition + for el in el_counts: + hhi += norm_hhi_data[el] * el_counts[el] / tot_size + hhi_scores[idx] = hhi + return hhi_scores + + +def calculate_segregation_energy_scores( + structures: List[Atoms], data_source: str = "raban1999" +): + """ + Calculates HHI scores for structures weighted by their composition. + The scores are normalized and inverted such that these should + be maximized in the interest of finding a low cost system + + Parameters + ---------- + + structures: + List of Atoms objects for which to calculate the scores + + data_source: + Which tabulated data should the segregation energies be pulled from. + Options: + - "raban1999": A.V. Raban, et. al. Phys. Rev. B 59, 15990 + - "rao2020": K. K. Rao, et. al. Topics in Catalysis volume 63, pages728-741 (2020) + + Returns + ------- + + hhi_scores: + Scores corresponding to each of the provided structures + + """ + if structures is None: + msg = "To include segregation energies, the structures must be provided" + raise SequentialLearnerError(msg) + + if data_source == "raban1999": + # won't consider surface energies (ie. dop == host) for normalization + max_seg_ener = SEGREGATION_ENERGIES["raban1999"]["Pd"]["W"] + min_seg_ener = SEGREGATION_ENERGIES["raban1999"]["Fe_100"]["Ag"] + # normalize and invert (so that this score is to be maximized) + norm_seg_ener_data = {} + for hsp in SEGREGATION_ENERGIES["raban1999"]: + norm_seg_ener_data[hsp] = {} + for dsp in SEGREGATION_ENERGIES["raban1999"][hsp]: + norm_seg_ener_data[hsp][dsp] = 1.0 - ( + SEGREGATION_ENERGIES["raban1999"][hsp][dsp] - min_seg_ener + ) / (max_seg_ener - min_seg_ener) + elif data_source == "rao2020": + norm_seg_ener_data = SEGREGATION_ENERGIES["rao2020"] + else: + msg = f"Unknown data source {data_source}" + raise SequentialLearnerError(msg) + + seg_ener_scores = np.zeros(len(structures)) + for idx, struct in enumerate(structures): + el_counts = struct.symbols.formula.count() + assert len(el_counts) == 2 + for el in el_counts: + if el_counts[el] == 1: + dsp = el + else: + hsp = el + seg_ener_scores[idx] = norm_seg_ener_data[hsp][dsp] + return seg_ener_scores diff --git a/src/autocat/saa.py b/src/autocat/saa.py index 9aa6350f..2b539821 100644 --- a/src/autocat/saa.py +++ b/src/autocat/saa.py @@ -9,7 +9,7 @@ from ase.data import atomic_numbers from ase.data import ground_state_magnetic_moments from pymatgen.io.ase import AseAtomsAdaptor -from pymatgen.analysis.adsorption import AdsorbateSiteFinder +from pymatgen.analysis.structure_matcher import StructureMatcher from autocat.surface import generate_surface_structures @@ -34,6 +34,19 @@ def _find_dopant_index(structure, dopant_element): return dopant_index[0][0] +def _find_all_surface_atom_indices(structure, tol: float = 0.5) -> List[int]: + """Helper function to find all surface atom indices + within a tolerance distance of the highest atom""" + all_heights = structure.positions[:, 2] + highest_atom_idx = np.argmax(all_heights) + height_of_highest_atom = structure[highest_atom_idx].z + surface_atom_indices = [] + for idx, atom in enumerate(structure): + if height_of_highest_atom - atom.z < tol: + surface_atom_indices.append(idx) + return surface_atom_indices + + def generate_saa_structures( host_species: List[str], dopant_species: List[str], @@ -310,33 +323,34 @@ def substitute_single_atom_on_surface( substitution functionality is folded into a more general form. """ - tags = host_structure.get_tags() - constraints = host_structure.constraints - host_magmoms = host_structure.get_initial_magnetic_moments() - # convert ase substrate to pymatgen structure - converter = AseAtomsAdaptor() - pmg_structure = converter.get_structure(host_structure) + all_surface_indices = _find_all_surface_atom_indices(host_structure) - # find all symmetrically unique site to substitute on - finder = AdsorbateSiteFinder(pmg_structure) + ase_all_doped_structures = [] + for idx in all_surface_indices: + dop_struct = host_structure.copy() + dop_struct[idx].symbol = dopant_element + dop_struct[idx].magmom = dopant_magnetic_moment + ase_all_doped_structures.append(dop_struct) - # collect all substitution structures and convert them back into ase.Atoms - pmg_substituted_structures = finder.generate_substitution_structures(dopant_element) - if len(pmg_substituted_structures) > 1: + # convert ase substrate to pymatgen structure + converter = AseAtomsAdaptor() + pmg_doped_structures = [ + converter.get_structure(struct) for struct in ase_all_doped_structures + ] + + # check that only one unique surface doped structure + matcher = StructureMatcher() + pmg_symm_equiv_doped_structure = [ + s[0] for s in matcher.group_structures(pmg_doped_structures) + ] + if len(pmg_symm_equiv_doped_structure) > 1: msg = "Multiple symmetrically unique sites to dope found." raise NotImplementedError(msg) - ase_substituted_structure = converter.get_atoms(pmg_substituted_structures[0]) - ase_substituted_structure.set_tags(tags) - # ensure pbc in xy only - ase_substituted_structure.pbc = (1, 1, 0) - # propagate constraints and host magnetization - ase_substituted_structure.constraints = constraints - ase_substituted_structure.set_initial_magnetic_moments(host_magmoms) - # set initial magnetic moment for the dopant atom - dopant_idx = _find_dopant_index(ase_substituted_structure, dopant_element) - ase_substituted_structure[dopant_idx].magmom = dopant_magnetic_moment + # assumes only a single unique doped structure + ase_substituted_structure = ase_all_doped_structures[0] + # center the single-atom dopant if place_dopant_at_center: cent_x = ( diff --git a/src/autocat/utils.py b/src/autocat/utils.py index fc05b721..6b53f4a8 100644 --- a/src/autocat/utils.py +++ b/src/autocat/utils.py @@ -1,5 +1,6 @@ import os from contextlib import contextmanager +from ase import Atoms @contextmanager @@ -10,3 +11,13 @@ def change_working_dir(new_dir: str): yield finally: os.chdir(current_dir) + + +def flatten_structures_dict(autocat_dict: dict): + structure_list = [] + for element in autocat_dict: + if isinstance(autocat_dict[element], dict): + structure_list.extend(flatten_structures_dict(autocat_dict[element])) + elif isinstance(autocat_dict[element], Atoms): + structure_list.append(autocat_dict[element]) + return structure_list diff --git a/tests/learning/__init__.py b/tests/learning/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/learning/test_featurizers.py b/tests/learning/test_featurizers.py new file mode 100644 index 00000000..a23df132 --- /dev/null +++ b/tests/learning/test_featurizers.py @@ -0,0 +1,348 @@ +"""Unit tests for the `autocat.learning.featurizersi` module.""" + +import numpy as np + +from dscribe.descriptors import SineMatrix +from dscribe.descriptors import CoulombMatrix +from dscribe.descriptors import ACSF +from dscribe.descriptors import SOAP + +from matminer.featurizers.composition import ElementProperty +from matminer.featurizers.site import ChemicalSRO +from matminer.featurizers.site import OPSiteFingerprint +from matminer.featurizers.site import CrystalNNFingerprint + +from autocat.adsorption import generate_adsorbed_structures +from autocat.surface import generate_surface_structures +from autocat.saa import generate_saa_structures +from autocat.learning.featurizers import Featurizer +from autocat.utils import flatten_structures_dict + +from pymatgen.io.ase import AseAtomsAdaptor +from pymatgen.analysis.local_env import VoronoiNN + + +def test_eq_featurizer(): + # test comparing featurizers + + f = Featurizer( + SOAP, + max_size=5, + species_list=["Fe", "O", "H"], + kwargs={"rcut": 12, "nmax": 8, "lmax": 8}, + ) + f1 = Featurizer( + SOAP, + max_size=5, + species_list=["Fe", "O", "H"], + kwargs={"rcut": 12, "nmax": 8, "lmax": 8}, + ) + assert f == f1 + + f1.kwargs.update({"rcut": 13}) + assert f != f1 + + surfs = flatten_structures_dict(generate_surface_structures(["Fe", "V"])) + surfs.extend( + flatten_structures_dict( + generate_surface_structures(["Au", "Ag"], supercell_dim=(1, 1, 5)) + ) + ) + f = Featurizer(SineMatrix, design_space_structures=surfs,) + + f1 = Featurizer(SineMatrix, species_list=["Fe", "V", "Au", "Ag"], max_size=36) + assert f == f1 + + +def test_featurizer_species_list(): + # test default species list + f = Featurizer(SineMatrix) + assert f.species_list == ["Fe", "Ni", "Pt", "Pd", "Cu", "C", "N", "O", "H"] + + # test updating species list manually and sorting + f.species_list = ["Li", "Na", "K"] + assert f.species_list == ["K", "Na", "Li"] + + # test getting species list from design space structures + surfs = flatten_structures_dict(generate_surface_structures(["Fe", "V", "Ti"])) + saas = flatten_structures_dict(generate_saa_structures(["Cu", "Au"], ["Fe", "Pt"])) + surfs.extend(saas) + f.design_space_structures = surfs + assert f.species_list == ["Ti", "V", "Fe", "Pt", "Au", "Cu"] + + +def test_featurizer_max_size(): + # test default max size + f = Featurizer(SOAP, kwargs={"rcut": 12, "nmax": 8, "lmax": 8}) + assert f.max_size == 100 + + # test updating max size manually + f.max_size = 50 + assert f.max_size == 50 + + # test getting max size from design space structures + surfs = flatten_structures_dict( + generate_surface_structures(["Ru"], supercell_dim=(2, 2, 4)) + ) + surfs.extend( + flatten_structures_dict( + generate_surface_structures(["Fe"], supercell_dim=(4, 4, 4)) + ) + ) + f.design_space_structures = surfs + assert f.max_size == 64 + + +def test_featurizer_design_space_structures(): + # tests giving design space structures + surfs = flatten_structures_dict(generate_surface_structures(["Li", "Na"])) + surfs.extend( + flatten_structures_dict( + generate_surface_structures(["Cu", "Ni"], supercell_dim=(1, 1, 5)) + ) + ) + f = Featurizer( + SineMatrix, design_space_structures=surfs, max_size=20, species_list=["H"] + ) + assert f.design_space_structures == surfs + # make sure design space is prioritized over max size and species list + assert f.max_size == 36 + assert f.species_list == ["Na", "Li", "Ni", "Cu"] + + +def test_featurizer_featurizer_kwargs(): + # test specifying kwargs + f = Featurizer(CoulombMatrix, kwargs={"flatten": False}) + assert f.kwargs == {"flatten": False} + assert f.featurization_object.flatten == False + + # test updating kwargs + f.kwargs.update({"sparse": True}) + assert f.featurization_object.sparse == True + + # test rm kwargs when updating class + f.featurizer_class = SineMatrix + assert f.kwargs is None + + +def test_featurizer_featurizer_class(): + # test changing featurizer class + f = Featurizer(SOAP, kwargs={"rcut": 12, "nmax": 8, "lmax": 8}) + assert f.featurizer_class == SOAP + assert isinstance(f.featurization_object, SOAP) + f.featurizer_class = SineMatrix + assert f.featurizer_class == SineMatrix + assert isinstance(f.featurization_object, SineMatrix) + + +def test_featurizer_preset(): + # tests specifying preset for class object + f = Featurizer(ElementProperty, preset="magpie") + assert f.preset == "magpie" + assert "Electronegativity" in f.featurization_object.features + assert not "melting_point" in f.featurization_object.features + + f.preset = "matminer" + assert f.preset == "matminer" + assert not "NdUnfilled" in f.featurization_object.features + assert "coefficient_of_linear_thermal_expansion" in f.featurization_object.features + + +def test_featurizer_featurize_single(): + # tests featurizing single structure at a time + + conv = AseAtomsAdaptor() + + # TEST STRUCTURE FEATURIZERS + + # test ElementProperty + saa = flatten_structures_dict(generate_saa_structures(["Cu"], ["Pt"]))[0] + f = Featurizer(ElementProperty, preset="magpie", max_size=len(saa)) + acf = f.featurize_single(saa) + ep = ElementProperty.from_preset("magpie") + pymat = conv.get_structure(saa) + manual_elem_prop = ep.featurize(pymat.composition) + assert np.array_equal(acf, manual_elem_prop) + + # test SineMatrix + f.featurizer_class = SineMatrix + acf = f.featurize_single(saa) + sm = SineMatrix(n_atoms_max=len(saa), permutation="none") + manual_sm = sm.create(saa).reshape(-1,) + assert np.array_equal(acf, manual_sm) + + # test CoulombMatrix + f.featurizer_class = CoulombMatrix + acf = f.featurize_single(saa) + cm = CoulombMatrix(n_atoms_max=len(saa), permutation="none") + manual_cm = cm.create(saa).reshape(-1,) + assert np.array_equal(acf, manual_cm) + + # TEST SITE FEATURIZERS + ads_struct = flatten_structures_dict( + generate_adsorbed_structures( + surface=saa, + adsorbates=["OH"], + adsorption_sites={"custom": [(0.0, 0.0)]}, + use_all_sites=False, + ) + )[0] + f.max_size = len(ads_struct) + species = np.unique(ads_struct.get_chemical_symbols()).tolist() + f.species_list = species + + # test ACSF + f.featurizer_class = ACSF + f.kwargs = {"rcut": 6.0} + acf = f.featurize_single(ads_struct) + acsf = ACSF(rcut=6.0, species=species) + manual_acsf = acsf.create(ads_struct, [36, 37]) + assert np.array_equal(acf, manual_acsf) + + # test SOAP + f.featurizer_class = SOAP + f.kwargs = {"rcut": 6.0, "lmax": 6, "nmax": 6} + acf = f.featurize_single(ads_struct) + soap = SOAP(rcut=6.0, species=species, nmax=6, lmax=6) + manual_soap = soap.create(ads_struct, [36, 37]) + assert np.array_equal(acf, manual_soap) + + # test ChemicalSRO + f.featurizer_class = ChemicalSRO + vnn = VoronoiNN(cutoff=10.0, allow_pathological=True) + f.kwargs = {"nn": vnn, "includes": species} + acf = f.featurize_single(ads_struct) + csro = ChemicalSRO(vnn, includes=species) + pym_struct = conv.get_structure(ads_struct) + csro.fit([[pym_struct, 36], [pym_struct, 37]]) + manual_csro = csro.featurize(pym_struct, -2) + manual_csro = np.concatenate((manual_csro, csro.featurize(pym_struct, -1))) + assert np.array_equal(acf, manual_csro) + + # test OPSiteFingerprint + f.featurizer_class = OPSiteFingerprint + acf = f.featurize_single(ads_struct) + pym_struct = conv.get_structure(ads_struct) + opsf = OPSiteFingerprint() + manual_opsf = opsf.featurize(pym_struct, -2) + manual_opsf = np.concatenate((manual_opsf, opsf.featurize(pym_struct, -1))) + assert np.array_equal(acf, manual_opsf) + + # test CrystalNNFingerprint + f.featurizer_class = CrystalNNFingerprint + f.preset = "cn" + acf = f.featurize_single(ads_struct) + pym_struct = conv.get_structure(ads_struct) + cnn = CrystalNNFingerprint.from_preset("cn") + manual_cnn = cnn.featurize(pym_struct, -2) + manual_cnn = np.concatenate((manual_cnn, cnn.featurize(pym_struct, -1))) + assert np.array_equal(acf, manual_cnn) + + +def test_featurizer_featurize_multiple(): + # tests featurizing multiple structures at a time + + # TEST STRUCTURE FEATURIZER + + # test ElementProperty + saas = flatten_structures_dict( + generate_saa_structures( + ["Au", "Cu"], ["Pd", "Pt"], facets={"Au": ["111"], "Cu": ["111"]} + ) + ) + f = Featurizer(ElementProperty, preset="magpie", design_space_structures=saas) + acf = f.featurize_multiple(saas) + manual_mat = [] + for i in range(len(saas)): + manual_mat.append(f.featurize_single(saas[i])) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test SineMatrix + f.featurizer_class = SineMatrix + acf = f.featurize_multiple(saas) + manual_mat = [] + for i in range(len(saas)): + manual_mat.append(f.featurize_single(saas[i])) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test CoulombMatrix + f.featurizer_class = CoulombMatrix + acf = f.featurize_multiple(saas) + manual_mat = [] + for i in range(len(saas)): + manual_mat.append(f.featurize_single(saas[i])) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # TEST SITE FEATURIZER + ads_structs = [] + for struct in saas: + ads_structs.append( + flatten_structures_dict( + generate_adsorbed_structures( + surface=struct, + adsorbates=["NNH"], + adsorption_sites={"custom": [(0.0, 0.0)]}, + use_all_sites=False, + ) + )[0] + ) + species_list = [] + for s in ads_structs: + # get all unique species + found_species = np.unique(s.get_chemical_symbols()).tolist() + new_species = [spec for spec in found_species if spec not in species_list] + species_list.extend(new_species) + + # test SOAP + f.featurizer_class = SOAP + f.design_space_structures = ads_structs + f.kwargs = {"rcut": 6.0, "lmax": 6, "nmax": 6} + acf = f.featurize_multiple(ads_structs) + manual_mat = [] + for i in range(len(ads_structs)): + manual_mat.append(f.featurize_single(ads_structs[i]).flatten()) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test ACSF + f.featurizer_class = ACSF + f.kwargs = {"rcut": 6.0} + acf = f.featurize_multiple(ads_structs) + manual_mat = [] + for i in range(len(ads_structs)): + manual_mat.append(f.featurize_single(ads_structs[i]).flatten()) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test ChemicalSRO + f.featurizer_class = ChemicalSRO + vnn = VoronoiNN(cutoff=10.0, allow_pathological=True) + f.kwargs = {"nn": vnn, "includes": species_list} + acf = f.featurize_multiple(ads_structs) + manual_mat = [] + for i in range(len(ads_structs)): + manual_mat.append(f.featurize_single(ads_structs[i]).flatten()) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test OPSiteFingerprint + f.featurizer_class = OPSiteFingerprint + acf = f.featurize_multiple(ads_structs) + manual_mat = [] + for i in range(len(ads_structs)): + manual_mat.append(f.featurize_single(ads_structs[i]).flatten()) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) + + # test CrystalNNFingerprint + f.featurizer_class = CrystalNNFingerprint + f.preset = "cn" + acf = f.featurize_multiple(ads_structs) + manual_mat = [] + for i in range(len(ads_structs)): + manual_mat.append(f.featurize_single(ads_structs[i]).flatten()) + manual_mat = np.array(manual_mat) + assert np.array_equal(acf, manual_mat) diff --git a/tests/learning/test_predictors.py b/tests/learning/test_predictors.py new file mode 100644 index 00000000..471738dd --- /dev/null +++ b/tests/learning/test_predictors.py @@ -0,0 +1,229 @@ +"""Unit tests for the `autocat.learning.predictors` module""" + +import pytest +import numpy as np + +from sklearn.kernel_ridge import KernelRidge +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF +from sklearn.utils.validation import check_is_fitted +from sklearn.exceptions import NotFittedError + +from dscribe.descriptors import SineMatrix +from dscribe.descriptors import SOAP + +from matminer.featurizers.composition import ElementProperty + +from ase import Atoms + +from autocat.adsorption import generate_adsorbed_structures, place_adsorbate +from autocat.surface import generate_surface_structures +from autocat.learning.predictors import Predictor +from autocat.learning.predictors import PredictorError +from autocat.utils import flatten_structures_dict + + +def test_fit(): + # Test returns a fit model + subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"])) + structs = [] + for sub in subs: + ads_struct = flatten_structures_dict( + generate_adsorbed_structures( + surface=sub, + adsorbates=["OH"], + adsorption_sites={"origin": [(0.0, 0.0)]}, + use_all_sites=False, + ) + )[0] + structs.append(ads_struct) + labels = np.random.rand(len(structs)) + acsc = Predictor( + featurizer_class=SOAP, + featurization_kwargs={ + "species_list": ["Pt", "Fe", "Ru", "O", "H"], + "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6}, + }, + model_class=GaussianProcessRegressor, + ) + acsc.fit( + training_structures=structs, y=labels, + ) + assert acsc.is_fit + assert check_is_fitted(acsc.regressor) is None + + # check no longer fit after changing featurization kwargs + acsc.featurization_kwargs = { + "species_list": ["Pt", "Fe", "Ru", "O", "H", "N"], + "kwargs": {"rcut": 7.0, "nmax": 8, "lmax": 8}, + } + assert not acsc.is_fit + with pytest.raises(NotFittedError): + check_is_fitted(acsc.regressor) + + acsc.fit( + training_structures=structs, y=labels, + ) + + # check no longer fit after changing featurization class + acsc.featurizer_class = SineMatrix + assert not acsc.is_fit + with pytest.raises(NotFittedError): + check_is_fitted(acsc.regressor) + + acsc.fit( + training_structures=structs, y=labels, + ) + + # check no longer fit after changing model class + acsc.model_class = KernelRidge + assert not acsc.is_fit + with pytest.raises(NotFittedError): + check_is_fitted(acsc.regressor) + + acsc.fit( + training_structures=structs, y=labels, + ) + + # check no longer fit after changing model kwargs + kernel = RBF() + acsc.model_kwargs = {"kernel": kernel} + assert not acsc.is_fit + with pytest.raises(NotFittedError): + check_is_fitted(acsc.regressor) + + +def test_predict(): + # Test outputs are returned as expected + subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"])) + structs = [] + for sub in subs: + ads_struct = flatten_structures_dict( + generate_adsorbed_structures( + surface=sub, + adsorbates=["OH"], + adsorption_sites={"origin": [(0.0, 0.0)]}, + use_all_sites=False, + ) + )[0] + structs.append(ads_struct) + labels = np.random.rand(len(structs)) + acsc = Predictor( + featurizer_class=SOAP, + featurization_kwargs={ + "species_list": ["Pt", "Fe", "Ru", "O", "H"], + "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6}, + }, + model_class=GaussianProcessRegressor, + ) + acsc.fit( + training_structures=structs[:-3], y=labels[:-3], + ) + pred, unc = acsc.predict([structs[-3]],) + assert len(pred) == 1 + # check dimension of uncertainty estimates + assert len(unc) == 1 + + pred, unc = acsc.predict(structs[-3:],) + assert len(pred) == 3 + # check dimension of uncertainty estimates + assert len(unc) == 3 + + # Test prediction on model without uncertainty + acsc.model_class = KernelRidge + acsc.fit( + training_structures=structs[:-3], y=labels[:-3], + ) + pred, unc = acsc.predict([structs[-2]],) + assert len(pred) == 1 + assert unc is None + + +def test_score(): + # Tests the score method + subs = flatten_structures_dict(generate_surface_structures(["Pt", "Fe", "Ru"])) + structs = [] + for sub in subs: + ads_struct = flatten_structures_dict( + generate_adsorbed_structures( + surface=sub, + adsorbates=["OH"], + adsorption_sites={"origin": [(0.0, 0.0)]}, + use_all_sites=False, + ) + )[0] + structs.append(ads_struct) + labels = np.random.rand(len(structs)) + acsc = Predictor( + featurizer_class=SOAP, + featurization_kwargs={ + "species_list": ["Pt", "Fe", "Ru", "O", "H"], + "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6}, + }, + model_class=GaussianProcessRegressor, + ) + acsc.fit( + training_structures=structs[:-3], y=labels[:-3], + ) + mae = acsc.score(structs[-3:], labels[-3:]) + assert isinstance(mae, float) + mse = acsc.score(structs[-2:], labels[-2:], metric="mse") + assert isinstance(mse, float) + + # Test returning predictions + _, preds, uncs = acsc.score(structs[-2:], labels[-2:], return_predictions=True) + assert len(preds) == 2 + assert len(uncs) == 2 + # check catches unknown metric + with pytest.raises(PredictorError): + acsc.score(structs, labels, metric="msd") + + +def test_class_and_kwargs_logic(): + # Tests providing regression model class and kwargs + featurization_kwargs = { + "species_list": ["Pt", "Fe", "Ru", "O", "H"], + "kwargs": {"rcut": 6.0, "nmax": 6, "lmax": 6, "sparse": True}, + } + acsc = Predictor( + model_class=KernelRidge, + model_kwargs={"gamma": 0.5}, + featurizer_class=SOAP, + featurization_kwargs=featurization_kwargs, + ) + assert isinstance(acsc.regressor, KernelRidge) + # check that regressor created with correct kwargs + assert acsc.regressor.gamma == 0.5 + assert acsc.model_kwargs == {"gamma": 0.5} + assert acsc.featurization_kwargs == featurization_kwargs + assert acsc.featurizer.featurization_object.sparse + + # check that model kwargs are removed when model class is changed + acsc.model_class = GaussianProcessRegressor + assert acsc.model_kwargs is None + assert acsc.featurizer_class == SOAP + assert acsc.featurization_kwargs == featurization_kwargs + + # check that regressor is updated when model kwargs updated + acsc.model_kwargs = {"alpha": 5e-10} + assert acsc.regressor.alpha == 5e-10 + + # check that featurization kwargs removed when featurization class changed + acsc.featurizer_class = ElementProperty + assert acsc.featurization_kwargs is None + + # check that featurizer is updated when featurization kwargs updated + acsc.featurization_kwargs = {"preset": "magpie"} + assert "Electronegativity" in acsc.featurizer.featurization_object.features + + acsc.featurization_kwargs = {"preset": "matminer"} + assert ( + "coefficient_of_linear_thermal_expansion" + in acsc.featurizer.featurization_object.features + ) + + acsc.featurizer_class = SineMatrix + acsc.featurization_kwargs = {"kwargs": {"flatten": False}} + assert not acsc.featurizer.featurization_object.flatten + acsc.featurization_kwargs = {"kwargs": {"flatten": True}} + assert acsc.featurizer.featurization_object.flatten diff --git a/tests/learning/test_sequential.py b/tests/learning/test_sequential.py new file mode 100644 index 00000000..1f1386c8 --- /dev/null +++ b/tests/learning/test_sequential.py @@ -0,0 +1,1180 @@ +"""Unit tests for the `autocat.learning.sequential` module""" + +import os +import pytest +import numpy as np +import json + +import tempfile + +from sklearn.gaussian_process import GaussianProcessRegressor + +from dscribe.descriptors import SOAP +from dscribe.descriptors import SineMatrix +from matminer.featurizers.composition import ElementProperty + +from scipy import stats +from ase.io.jsonio import decode as ase_decoder +from ase import Atoms +from autocat.data.hhi import HHI +from autocat.data.segregation_energies import SEGREGATION_ENERGIES +from autocat.learning.predictors import Predictor +from autocat.learning.sequential import ( + DesignSpace, + DesignSpaceError, + SequentialLearnerError, + SequentialLearner, + calculate_segregation_energy_scores, + choose_next_candidate, + get_overlap_score, +) +from autocat.learning.sequential import simulated_sequential_learning +from autocat.learning.sequential import multiple_simulated_sequential_learning_runs +from autocat.learning.sequential import calculate_hhi_scores +from autocat.surface import generate_surface_structures +from autocat.adsorption import place_adsorbate +from autocat.saa import generate_saa_structures +from autocat.utils import flatten_structures_dict + + +def test_sequential_learner_from_json(): + # Tests generation of an SequentialLearner from a json + sub1 = generate_surface_structures(["Au"], facets={"Au": ["110"]})["Au"]["fcc110"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("C")) + sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("Mg")) + sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][ + "hcp0001" + ]["structure"] + sub3 = place_adsorbate(sub3, Atoms("N")) + structs = [sub1, sub2, sub3] + labels = np.array([0.1, np.nan, 0.3]) + acds = DesignSpace(structs, labels) + featurization_kwargs = {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}} + predictor_kwargs = { + "model_class": GaussianProcessRegressor, + "featurizer_class": SOAP, + "featurization_kwargs": featurization_kwargs, + } + + candidate_selection_kwargs = {"aq": "Random", "num_candidates_to_pick": 3} + acsl = SequentialLearner( + acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + ) + acsl.iterate() + with tempfile.TemporaryDirectory() as _tmp_dir: + acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json") + json_path = os.path.join(_tmp_dir, "testing_acsl.json") + written_acsl = SequentialLearner.from_json(json_path) + assert np.array_equal( + written_acsl.design_space.design_space_labels, + acds.design_space_labels, + equal_nan=True, + ) + assert ( + written_acsl.design_space.design_space_structures + == acds.design_space_structures + ) + predictor_kwargs["featurization_kwargs"][ + "design_space_structures" + ] = acds.design_space_structures + assert written_acsl.predictor_kwargs == predictor_kwargs + assert written_acsl.candidate_selection_kwargs == candidate_selection_kwargs + assert written_acsl.iteration_count == 1 + assert np.array_equal(written_acsl.train_idx, acsl.train_idx) + assert written_acsl.train_idx[0] in [True, False] + assert np.array_equal(written_acsl.train_idx_history, acsl.train_idx_history) + assert written_acsl.train_idx_history[0][0] in [True, False] + assert np.array_equal(written_acsl.predictions, acsl.predictions) + assert np.array_equal( + written_acsl.predictions_history, acsl.predictions_history + ) + assert np.array_equal(written_acsl.uncertainties, acsl.uncertainties) + assert np.array_equal( + written_acsl.uncertainties_history, acsl.uncertainties_history + ) + assert np.array_equal(written_acsl.candidate_indices, acsl.candidate_indices) + assert np.array_equal( + written_acsl.candidate_index_history, acsl.candidate_index_history + ) + assert np.array_equal(written_acsl.acquisition_scores, acsl.acquisition_scores) + + +def test_sequential_learner_write_json(): + # Tests writing a SequentialLearner to disk as a json + sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["110"]})["Ag"]["fcc110"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("B")) + sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("Al")) + sub3 = generate_surface_structures(["Ti"], facets={"Ti": ["0001"]})["Ti"][ + "hcp0001" + ]["structure"] + sub3 = place_adsorbate(sub3, Atoms("H")) + structs = [sub1, sub2, sub3] + labels = np.array([0.1, 0.2, np.nan]) + featurization_kwargs = {"preset": "magpie"} + predictor_kwargs = { + "model_class": GaussianProcessRegressor, + "featurizer_class": ElementProperty, + "featurization_kwargs": featurization_kwargs, + } + + candidate_selection_kwargs = {"aq": "MU", "num_candidates_to_pick": 2} + acds = DesignSpace(structs, labels) + acsl = SequentialLearner( + acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + ) + with tempfile.TemporaryDirectory() as _tmp_dir: + acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json") + with open(os.path.join(_tmp_dir, "testing_acsl.json"), "r") as f: + sl = json.load(f) + written_structs = [ase_decoder(sl[i]) for i in range(3)] + assert structs == written_structs + assert np.array_equal(labels, sl[3], equal_nan=True) + # check predictor kwargs kept + predictor_kwargs["model_class"] = [ + "sklearn.gaussian_process._gpr", + "GaussianProcessRegressor", + ] + predictor_kwargs["featurizer_class"] = [ + "matminer.featurizers.composition.composite", + "ElementProperty", + ] + del predictor_kwargs["featurization_kwargs"]["design_space_structures"] + assert sl[4] == predictor_kwargs + # check candidate selection kwargs kept + assert sl[-2] == candidate_selection_kwargs + assert sl[-1] == { + "iteration_count": 0, + "train_idx": None, + "train_idx_history": None, + "predictions": None, + "predictions_history": None, + "uncertainties": None, + "uncertainties_history": None, + "candidate_indices": None, + "candidate_index_history": None, + "acquisition_scores": None, + } + + # test after iteration + acsl.iterate() + with tempfile.TemporaryDirectory() as _tmp_dir: + acsl.write_json_to_disk(_tmp_dir, "testing_acsl.json") + with open(os.path.join(_tmp_dir, "testing_acsl.json"), "r") as f: + sl = json.load(f) + written_structs = [ase_decoder(sl[i]) for i in range(3)] + assert structs == written_structs + assert np.array_equal(labels, sl[3], equal_nan=True) + # check predictor kwargs kept + predictor_kwargs["model_class"] = [ + "sklearn.gaussian_process._gpr", + "GaussianProcessRegressor", + ] + predictor_kwargs["featurizer_class"] = [ + "matminer.featurizers.composition.composite", + "ElementProperty", + ] + assert sl[4] == predictor_kwargs + # check candidate selection kwargs kept + assert sl[-2] == candidate_selection_kwargs + assert sl[-1].get("iteration_count") == 1 + assert sl[-1].get("train_idx") == acsl.train_idx.tolist() + assert sl[-1].get("train_idx_history") == [ + ti.tolist() for ti in acsl.train_idx_history + ] + assert isinstance(sl[-1].get("train_idx_history")[0][0], bool) + assert sl[-1].get("predictions") == acsl.predictions.tolist() + assert sl[-1].get("predictions_history") == [ + p.tolist() for p in acsl.predictions_history + ] + assert sl[-1].get("uncertainties") == acsl.uncertainties.tolist() + assert sl[-1].get("uncertainties_history") == [ + u.tolist() for u in acsl.uncertainties_history + ] + assert sl[-1].get("candidate_indices") == acsl.candidate_indices.tolist() + assert sl[-1].get("candidate_index_history") == [ + c.tolist() for c in acsl.candidate_index_history + ] + assert sl[-1].get("acquisition_scores") == acsl.acquisition_scores.tolist() + assert sl[-1].get("acquisition_scores") is not None + + +def test_sequential_learner_to_jsonified_list(): + # Tests writing a SequentialLearner to disk as a json + sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["110"]})["Ag"]["fcc110"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("B")) + sub2 = generate_surface_structures(["Li"], facets={"Li": ["100"]})["Li"]["bcc100"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("Al")) + sub3 = generate_surface_structures(["Ti"], facets={"Ti": ["0001"]})["Ti"][ + "hcp0001" + ]["structure"] + sub3 = place_adsorbate(sub3, Atoms("H")) + structs = [sub1, sub2, sub3] + labels = np.array([0.1, 0.2, np.nan]) + featurization_kwargs = {"preset": "magpie"} + predictor_kwargs = { + "model_class": GaussianProcessRegressor, + "featurizer_class": ElementProperty, + "featurization_kwargs": featurization_kwargs, + } + + candidate_selection_kwargs = {"aq": "MU", "num_candidates_to_pick": 2} + acds = DesignSpace(structs, labels) + acsl = SequentialLearner( + acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + ) + jsonified_list = acsl.to_jsonified_list() + json_structs = [ase_decoder(jsonified_list[i]) for i in range(3)] + assert structs == json_structs + assert np.array_equal(labels, jsonified_list[3], equal_nan=True) + # check predictor kwargs kept + predictor_kwargs["model_class"] = [ + "sklearn.gaussian_process._gpr", + "GaussianProcessRegressor", + ] + predictor_kwargs["featurizer_class"] = [ + "matminer.featurizers.composition.composite", + "ElementProperty", + ] + del predictor_kwargs["featurization_kwargs"]["design_space_structures"] + assert jsonified_list[4] == predictor_kwargs + # check candidate selection kwargs kept + assert jsonified_list[-2] == candidate_selection_kwargs + assert jsonified_list[-1] == { + "iteration_count": 0, + "train_idx": None, + "train_idx_history": None, + "predictions": None, + "predictions_history": None, + "uncertainties": None, + "uncertainties_history": None, + "candidate_indices": None, + "candidate_index_history": None, + "acquisition_scores": None, + } + + # test after iteration + acsl.iterate() + jsonified_list = acsl.to_jsonified_list() + json_structs = [ase_decoder(jsonified_list[i]) for i in range(3)] + assert structs == json_structs + assert np.array_equal(labels, jsonified_list[3], equal_nan=True) + # check predictor kwargs kept + predictor_kwargs["model_class"] = [ + "sklearn.gaussian_process._gpr", + "GaussianProcessRegressor", + ] + predictor_kwargs["featurizer_class"] = [ + "matminer.featurizers.composition.composite", + "ElementProperty", + ] + assert jsonified_list[4] == predictor_kwargs + # check candidate selection kwargs kept + assert jsonified_list[-2] == candidate_selection_kwargs + assert jsonified_list[-1].get("iteration_count") == 1 + assert jsonified_list[-1].get("train_idx") == acsl.train_idx.tolist() + assert jsonified_list[-1].get("train_idx_history") == [ + ti.tolist() for ti in acsl.train_idx_history + ] + assert isinstance(jsonified_list[-1].get("train_idx_history")[0][0], bool) + assert jsonified_list[-1].get("predictions") == acsl.predictions.tolist() + assert jsonified_list[-1].get("predictions_history") == [ + p.tolist() for p in acsl.predictions_history + ] + assert jsonified_list[-1].get("uncertainties") == acsl.uncertainties.tolist() + assert jsonified_list[-1].get("uncertainties_history") == [ + u.tolist() for u in acsl.uncertainties_history + ] + assert ( + jsonified_list[-1].get("candidate_indices") == acsl.candidate_indices.tolist() + ) + assert jsonified_list[-1].get("candidate_index_history") == [ + c.tolist() for c in acsl.candidate_index_history + ] + assert ( + jsonified_list[-1].get("acquisition_scores") == acsl.acquisition_scores.tolist() + ) + assert jsonified_list[-1].get("acquisition_scores") is not None + + +def test_sequential_learner_iterate(): + # Tests iterate method + sub1 = generate_surface_structures(["Ca"], facets={"Ca": ["111"]})["Ca"]["fcc111"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("Na")) + sub2 = generate_surface_structures(["Nb"], facets={"Nb": ["110"]})["Nb"]["bcc110"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("K")) + sub3 = generate_surface_structures(["Ta"], facets={"Ta": ["110"]})["Ta"]["bcc110"][ + "structure" + ] + sub3 = place_adsorbate(sub3, Atoms("H")) + sub4 = generate_surface_structures(["Sr"], facets={"Sr": ["110"]})["Sr"]["fcc110"][ + "structure" + ] + sub4 = place_adsorbate(sub4, Atoms("Fe")) + structs = [sub1, sub2, sub3, sub4] + labels = np.array([11.0, 25.0, np.nan, np.nan]) + acds = DesignSpace(structs, labels) + acsl = SequentialLearner(acds, predictor_kwargs={"featurizer_class": SineMatrix}) + + assert acsl.iteration_count == 0 + + acsl.iterate() + assert acsl.iteration_count == 1 + assert acsl.predictions is not None + assert len(acsl.predictions_history) == 1 + assert len(acsl.predictions_history[0]) == len(acds) + assert acsl.uncertainties is not None + assert len(acsl.uncertainties_history) == 1 + assert len(acsl.uncertainties_history[0]) == len(acds) + assert acsl.candidate_indices is not None + assert acsl.candidate_index_history is not None + assert acsl.candidate_index_history == [acsl.candidate_indices] + assert len(acsl.train_idx_history) == 1 + assert np.count_nonzero(acsl.train_idx_history[-1]) == 2 + + cand_ind1 = acsl.candidate_indices[0] + acsl.design_space.update([structs[cand_ind1]], np.array([13.0])) + + acsl.iterate() + assert acsl.iteration_count == 2 + + # checks being iterated a second time to fully explore the design space + cand_ind2 = acsl.candidate_indices[0] + assert cand_ind1 != cand_ind2 + assert acsl.candidate_index_history == [[cand_ind1], [cand_ind2]] + assert len(acsl.uncertainties_history) == 2 + assert len(acsl.predictions_history) == 2 + assert len(acsl.train_idx_history) == 2 + assert np.count_nonzero(acsl.train_idx_history[-1]) == 3 + + acsl.design_space.update([structs[cand_ind2]], np.array([17.0])) + acsl.iterate() + + assert acsl.iteration_count == 3 + assert acsl.candidate_structures is None + assert acsl.candidate_indices is None + assert acsl.candidate_index_history == [[cand_ind1], [cand_ind2]] + assert len(acsl.uncertainties_history) == 3 + assert len(acsl.predictions_history) == 3 + assert len(acsl.train_idx_history) == 3 + assert np.count_nonzero(acsl.train_idx_history[-1]) == 4 + + +def test_sequential_learner_setup(): + # Tests setting up an SL object + sub1 = generate_surface_structures(["Ir"], facets={"Ir": ["100"]})["Ir"]["fcc100"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("S")) + sub2 = generate_surface_structures(["Mo"], facets={"Mo": ["110"]})["Mo"]["bcc110"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("H")) + sub3 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][ + "structure" + ] + sub3 = place_adsorbate(sub3, Atoms("O")) + sub4 = generate_surface_structures(["Re"], facets={"Re": ["0001"]})["Re"][ + "hcp0001" + ]["structure"] + sub4 = place_adsorbate(sub4, Atoms("N")) + structs = [sub1, sub2, sub3, sub4] + labels = np.array([4.0, np.nan, 6.0, np.nan]) + acds = DesignSpace(structs, labels) + acsl = SequentialLearner(acds, predictor_kwargs={"featurizer_class": SineMatrix}) + + assert acsl.design_space.design_space_structures == acds.design_space_structures + assert np.array_equal( + acsl.design_space.design_space_labels, acds.design_space_labels, equal_nan=True + ) + assert acsl.iteration_count == 0 + assert acsl.predictions == None + assert acsl.candidate_indices == None + assert acsl.candidate_selection_kwargs == {"aq": "Random"} + # test specifying more kwargs + predictor_kwargs = { + "featurizer_class": SOAP, + "model_kwargs": {"n_restarts_optimizer": 9}, + "featurization_kwargs": {"kwargs": {"rcut": 5.0, "lmax": 6, "nmax": 6}}, + } + acsl = SequentialLearner( + acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs={"aq": "MU", "num_candidates_to_pick": 2}, + ) + # test passing predictor kwargs + assert acsl.predictor_kwargs == predictor_kwargs + assert isinstance(acsl.predictor.featurizer.featurization_object, SOAP) + assert acsl.predictor.featurization_kwargs["kwargs"] == { + "rcut": 5.0, + "lmax": 6, + "nmax": 6, + } + + # test passing candidate selection kwargs + assert acsl.candidate_selection_kwargs == {"aq": "MU", "num_candidates_to_pick": 2} + + +def test_design_space_setup(): + # test setting up an DesignSpace + sub1 = generate_surface_structures( + ["Pt"], supercell_dim=[2, 2, 5], facets={"Pt": ["100"]} + )["Pt"]["fcc100"]["structure"] + sub1 = place_adsorbate(sub1, Atoms("H")) + sub2 = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"]["bcc110"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("F")) + structs = [sub1, sub2] + labels = np.array([3.0, 7.0]) + acds = DesignSpace(structs, labels) + assert acds.design_space_structures == [sub1, sub2] + assert acds.design_space_structures is not structs + assert np.array_equal(acds.design_space_labels, labels) + assert acds.design_space_labels is not labels + assert len(acds) == 2 + # test different number of structures and labels + with pytest.raises(DesignSpaceError): + acds = DesignSpace([sub1], labels) + + +def test_delitem_design_space(): + # tests deleting items from the design space + sub0 = generate_surface_structures(["Pd"], facets={"Pd": ["100"]})["Pd"]["fcc100"][ + "structure" + ] + sub0 = place_adsorbate(sub0, Atoms("O")) + sub1 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("H")) + sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("S")) + sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][ + "hcp0001" + ]["structure"] + sub3 = place_adsorbate(sub3, Atoms("P")) + structs = [sub0, sub1, sub2] + labels = np.array([-2.5, np.nan, 600.0]) + # test deleting by single idx + acds = DesignSpace(structs, labels) + del acds[1] + assert len(acds) == 2 + assert np.array_equal(acds.design_space_labels, np.array([-2.5, 600.0])) + assert acds.design_space_structures == [sub0, sub2] + # test deleting using a mask + acds = DesignSpace(structs, labels) + mask = np.zeros(len(acds), bool) + mask[0] = 1 + mask[1] = 1 + # n.b. deletes wherever mask is True + del acds[mask] + assert len(acds) == 1 + assert acds.design_space_structures == [sub2] + assert np.array_equal(acds.design_space_labels, np.array([600.0])) + # test deleting by providing list of idx + structs = [sub0, sub1, sub2, sub3] + labels = np.array([-20, 8, np.nan, 0.3]) + acds = DesignSpace(structs, labels) + del acds[[1, 3]] + assert len(acds) == 2 + assert np.array_equal( + acds.design_space_labels, np.array([-20, np.nan]), equal_nan=True + ) + assert acds.design_space_structures == [sub0, sub2] + # test deleting by providing list with a single idx + acds = DesignSpace(structs, labels) + del acds[[0]] + assert len(acds) == 3 + assert np.array_equal( + acds._design_space_labels, np.array([8, np.nan, 0.3]), equal_nan=True + ) + assert acds.design_space_structures == [sub1, sub2, sub3] + + +def test_eq_design_space(): + # test comparing design spaces + sub0 = generate_surface_structures(["Pd"], facets={"Pd": ["100"]})["Pd"]["fcc100"][ + "structure" + ] + sub0 = place_adsorbate(sub0, Atoms("O")) + sub1 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][ + "structure" + ] + sub1 = place_adsorbate(sub1, Atoms("H")) + sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][ + "structure" + ] + sub2 = place_adsorbate(sub2, Atoms("S")) + sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][ + "hcp0001" + ]["structure"] + sub3 = place_adsorbate(sub3, Atoms("P")) + structs = [sub0, sub1, sub2] + labels = np.array([-2.5, np.nan, 600.0]) + + # test trivial case + acds = DesignSpace(structs, labels) + acds0 = DesignSpace(structs, labels) + assert acds == acds0 + + # test comparing when different length + acds1 = DesignSpace(structs[:-1], labels[:-1]) + assert acds != acds1 + + # test same structures, different labels + acds2 = DesignSpace(structs, labels) + acds2.update([structs[1]], labels=np.array([0.2])) + assert acds != acds2 + + # test diff structures, same labels + structs[0][0].symbol = "Ni" + acds3 = DesignSpace(structs, labels) + assert acds != acds3 + + +def test_updating_design_space(): + sub1 = generate_surface_structures(["Ag"], facets={"Ag": ["100"]})["Ag"]["fcc100"][ + "structure" + ] + sub2 = generate_surface_structures(["Li"], facets={"Li": ["110"]})["Li"]["bcc110"][ + "structure" + ] + sub3 = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"]["bcc110"][ + "structure" + ] + sub4 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][ + "hcp0001" + ]["structure"] + structs = [sub1, sub2, sub3] + labels = np.array([4.0, 5.0, 6.0]) + acds = DesignSpace(structs, labels) + + # Test trying to update just structures + with pytest.raises(DesignSpaceError): + acds.design_space_structures = [sub4] + + # Test trying to update just labels + with pytest.raises(DesignSpaceError): + acds.design_space_structures = np.array([4.0]) + + # Test updating label already in DS and extending + acds.update([sub1, sub4], np.array([10.0, 20.0])) + assert np.isclose(acds.design_space_labels[0], 10.0) + assert sub4 in acds.design_space_structures + assert np.isclose(acds.design_space_labels[-1], 20.0) + + # Test trying to give structures that are not Atoms objects + with pytest.raises(AssertionError): + acds.update([sub1, np.array(20.0)], np.array([3.0, 4.0])) + + +def test_write_design_space_as_json(): + # Tests writing out the DesignSpace to disk + sub1 = generate_surface_structures(["Pd"], facets={"Pd": ["111"]})["Pd"]["fcc111"][ + "structure" + ] + sub2 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][ + "structure" + ] + structs = [sub1, sub2] + labels = np.array([0.3, 0.8]) + with tempfile.TemporaryDirectory() as _tmp_dir: + acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,) + acds.write_json_to_disk(write_location=_tmp_dir) + # loads back written json + with open(os.path.join(_tmp_dir, "acds.json"), "r") as f: + ds = json.load(f) + written_structs = [ase_decoder(ds[i]) for i in range(2)] + assert structs == written_structs + assert np.array_equal(labels, ds[-1]) + + +def test_design_space_to_jsonified_list(): + # Tests returning the DesignSpace as a jsonified list + sub1 = generate_surface_structures(["Pd"], facets={"Pd": ["111"]})["Pd"]["fcc111"][ + "structure" + ] + sub2 = generate_surface_structures(["V"], facets={"V": ["110"]})["V"]["bcc110"][ + "structure" + ] + structs = [sub1, sub2] + labels = np.array([0.3, 0.8]) + acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,) + jsonified_list = acds.to_jsonified_list() + json_structs = [ase_decoder(jsonified_list[i]) for i in range(2)] + assert structs == json_structs + assert np.array_equal(labels, jsonified_list[-1]) + + +def test_get_design_space_from_json(): + # Tests generating DesignSpace from a json + sub1 = generate_surface_structures(["Au"], facets={"Au": ["100"]})["Au"]["fcc100"][ + "structure" + ] + sub2 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][ + "structure" + ] + sub3 = generate_surface_structures(["Ru"], facets={"Ru": ["0001"]})["Ru"][ + "hcp0001" + ]["structure"] + structs = [sub1, sub2, sub3] + labels = np.array([30.0, 900.0, np.nan]) + with tempfile.TemporaryDirectory() as _tmp_dir: + acds = DesignSpace(design_space_structures=structs, design_space_labels=labels,) + acds.write_json_to_disk("testing.json", write_location=_tmp_dir) + + tmp_json_dir = os.path.join(_tmp_dir, "testing.json") + acds_from_json = DesignSpace.from_json(tmp_json_dir) + assert acds_from_json.design_space_structures == structs + assert np.array_equal( + acds_from_json.design_space_labels, labels, equal_nan=True + ) + + +def test_simulated_sequential_histories(): + # Test output sl has appropriate histories + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][ + "structure" + ] + sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("O")) + base_struct2 = place_adsorbate(sub2, Atoms("N")) + base_struct3 = place_adsorbate(sub2, Atoms("H")) + ds_structs = [ + base_struct1, + base_struct2, + base_struct3, + sub1, + sub2, + ] + ds_labels = np.array([0.0, 1.0, 2.0, 3.0, 4.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs = { + "target_min": 0.9, + "target_max": 2.1, + "aq": "MLI", + "num_candidates_to_pick": 2, + } + predictor_kwargs = {"featurizer_class": SineMatrix} + sl = simulated_sequential_learning( + full_design_space=acds, + init_training_size=1, + number_of_sl_loops=2, + candidate_selection_kwargs=candidate_selection_kwargs, + predictor_kwargs=predictor_kwargs, + ) + + # Test number of sl loops + assert sl.iteration_count == 3 + + # Test initial training size + assert sl.train_idx_history[0].sum() == 1 + + # Test keeping track of pred and unc history + assert len(sl.uncertainties_history) == 3 + assert len(sl.uncertainties_history[0]) == len(acds) + assert len(sl.predictions_history) == 3 + assert len(sl.predictions_history[-1]) == len(acds) + assert len(sl.candidate_index_history) == 2 + + +def test_simulated_sequential_batch_added(): + # Tests adding N candidates on each loop + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][ + "structure" + ] + sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("O")) + base_struct2 = place_adsorbate(sub2, Atoms("N")) + candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"} + predictor_kwargs = {"featurizer_class": SineMatrix} + num_loops = 2 + ds_structs = [base_struct1, base_struct2, sub1, sub2] + ds_labels = np.array([5.0, 6.0, 7.0, 8.0]) + acds = DesignSpace(ds_structs, ds_labels) + sl = simulated_sequential_learning( + full_design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_sl_loops=num_loops, + init_training_size=1, + ) + # should add 2 candidates on first loop + assert len(sl.candidate_index_history[0]) == 2 + # since only 1 left, should add it on the next + assert len(sl.candidate_index_history[1]) == 1 + + +def test_simulated_sequential_num_loops(): + # Tests the number of loops + sub1 = generate_surface_structures(["Fe"], facets={"Fe": ["110"]})["Fe"]["bcc110"][ + "structure" + ] + sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("H")) + base_struct2 = place_adsorbate(sub2, Atoms("N")) + predictor_kwargs = {"featurizer_class": SineMatrix} + candidate_selection_kwargs = {"num_candidates_to_pick": 3, "aq": "Random"} + ds_structs = [base_struct1, base_struct2, sub1, sub2] + ds_labels = np.array([5.0, 6.0, 7.0, 8.0]) + acds = DesignSpace(ds_structs, ds_labels) + # Test default number of loops + sl = simulated_sequential_learning( + full_design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + init_training_size=1, + ) + assert len(sl.predictions_history) == 2 + assert sl.iteration_count == 2 + + # Test catches maximum number of loops + with pytest.raises(SequentialLearnerError): + sl = simulated_sequential_learning( + full_design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + init_training_size=1, + number_of_sl_loops=3, + ) + + # Test with default num loops and default num candidates + ds_structs = [base_struct1, base_struct2, sub2] + ds_labels = np.array([5.0, 6.0, 7.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs.update({"num_candidates_to_pick": 1}) + + sl = simulated_sequential_learning( + full_design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + init_training_size=1, + ) + assert len(sl.uncertainties_history) == 3 + assert sl.iteration_count == 3 + + +def test_simulated_sequential_write_to_disk(): + # Test writing out sl dict + with tempfile.TemporaryDirectory() as _tmp_dir: + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"][ + "fcc111" + ]["structure"] + sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"][ + "fcc100" + ]["structure"] + base_struct1 = place_adsorbate(sub1, Atoms("O")) + base_struct2 = place_adsorbate(sub2, Atoms("S")) + base_struct3 = place_adsorbate(sub2, Atoms("N")) + predictor_kwargs = {"featurizer_class": SineMatrix} + candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"} + ds_structs = [base_struct1, base_struct2, base_struct3] + ds_labels = np.array([0, 1, 2]) + acds = DesignSpace(ds_structs, ds_labels) + sl = simulated_sequential_learning( + full_design_space=acds, + init_training_size=2, + number_of_sl_loops=1, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + write_to_disk=True, + write_location=_tmp_dir, + ) + # check data written as json + json_path = os.path.join(_tmp_dir, "acsl.json") + sl_written = SequentialLearner.from_json(json_path) + assert sl.iteration_count == sl_written.iteration_count + assert np.array_equal(sl.predictions_history, sl_written.predictions_history) + assert np.array_equal( + sl.uncertainties_history, sl_written.uncertainties_history + ) + assert np.array_equal( + sl.candidate_index_history, sl_written.candidate_index_history + ) + assert np.array_equal(sl.candidate_indices, sl_written.candidate_indices) + assert np.array_equal(sl.predictions, sl_written.predictions) + assert np.array_equal(sl.uncertainties, sl_written.uncertainties) + assert np.array_equal(sl.predictor_kwargs, sl_written.predictor_kwargs) + assert sl.candidate_selection_kwargs == sl_written.candidate_selection_kwargs + assert ( + sl.design_space.design_space_structures + == sl_written.design_space.design_space_structures + ) + assert np.array_equal( + sl.design_space.design_space_labels, + sl_written.design_space.design_space_labels, + ) + + +def test_simulated_sequential_learning_fully_explored(): + # Checks that catches if ds not fully explored + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][ + "structure" + ] + sub2 = generate_surface_structures(["Cu"], facets={"Cu": ["100"]})["Cu"]["fcc100"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("OH")) + base_struct2 = place_adsorbate(sub2, Atoms("NH")) + predictor_kwargs = {"structure_featurizer": "elemental_property"} + ds_structs = [base_struct1, base_struct2, sub2] + ds_labels = np.array([0.0, np.nan, 4.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs = {"aq": "MU"} + with pytest.raises(SequentialLearnerError): + sl = simulated_sequential_learning( + full_design_space=acds, + init_training_size=1, + number_of_sl_loops=2, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + ) + + +def test_multiple_sequential_learning_serial(): + # Tests serial implementation + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("O")) + predictor_kwargs = {"featurizer_class": SineMatrix} + ds_structs = [base_struct1, sub1] + ds_labels = np.array([0.0, 0.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs = {"aq": "MU"} + runs_history = multiple_simulated_sequential_learning_runs( + full_design_space=acds, + number_of_runs=3, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_sl_loops=1, + init_training_size=1, + ) + assert len(runs_history) == 3 + assert isinstance(runs_history[0], SequentialLearner) + assert len(runs_history[1].predictions_history) == 2 + + +def test_multiple_sequential_learning_parallel(): + # Tests parallel implementation + sub1 = generate_surface_structures(["Cu"], facets={"Cu": ["111"]})["Cu"]["fcc111"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("Li")) + predictor_kwargs = {"featurizer_class": SineMatrix} + ds_structs = [base_struct1, sub1] + ds_labels = np.array([0.0, 0.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs = {"aq": "Random"} + runs_history = multiple_simulated_sequential_learning_runs( + full_design_space=acds, + number_of_runs=3, + number_parallel_jobs=2, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_sl_loops=1, + init_training_size=1, + ) + assert len(runs_history) == 3 + assert isinstance(runs_history[2], SequentialLearner) + assert len(runs_history[1].uncertainties_history) == 2 + + +def test_multiple_sequential_learning_write_to_disk(): + # Tests writing run history to disk + _tmp_dir = tempfile.TemporaryDirectory().name + sub1 = generate_surface_structures(["Pt"], facets={"Pt": ["111"]})["Pt"]["fcc111"][ + "structure" + ] + base_struct1 = place_adsorbate(sub1, Atoms("N")) + predictor_kwargs = {"featurizer_class": SineMatrix} + ds_structs = [base_struct1, sub1] + ds_labels = np.array([0.0, 0.0]) + acds = DesignSpace(ds_structs, ds_labels) + candidate_selection_kwargs = {"num_candidates_to_pick": 2, "aq": "Random"} + runs_history = multiple_simulated_sequential_learning_runs( + full_design_space=acds, + predictor_kwargs=predictor_kwargs, + candidate_selection_kwargs=candidate_selection_kwargs, + number_of_runs=3, + number_parallel_jobs=2, + init_training_size=1, + number_of_sl_loops=1, + write_to_disk=True, + write_location=_tmp_dir, + json_name_prefix="test_multi", + ) + + # check data history in each run + for i in range(3): + written_run = SequentialLearner.from_json( + os.path.join(_tmp_dir, f"test_multi_{i}.json") + ) + written_ds = written_run.design_space + assert written_ds.design_space_structures == ds_structs + assert np.array_equal(written_ds.design_space_labels, ds_labels) + assert written_run.iteration_count == runs_history[i].iteration_count + assert np.array_equal(written_run.predictions, runs_history[i].predictions) + assert np.array_equal( + written_run.predictions_history, runs_history[i].predictions_history + ) + assert np.array_equal(written_run.uncertainties, runs_history[i].uncertainties) + assert np.array_equal( + written_run.uncertainties_history, runs_history[i].uncertainties_history + ) + assert np.array_equal( + written_run.train_idx_history, runs_history[i].train_idx_history + ) + assert np.array_equal(written_run.train_idx, runs_history[i].train_idx) + assert np.array_equal( + written_run.candidate_indices, runs_history[i].candidate_indices + ) + assert np.array_equal( + written_run.candidate_index_history, runs_history[i].candidate_index_history + ) + assert written_run.predictor_kwargs == runs_history[i].predictor_kwargs + assert ( + written_run.candidate_selection_kwargs + == runs_history[i].candidate_selection_kwargs + ) + + +def test_choose_next_candidate_input_minimums(): + # Tests that appropriately catches minimum necessary inputs + labels = np.random.rand(5) + train_idx = np.zeros(5, dtype=bool) + train_idx[np.random.choice(5, size=2, replace=False)] = 1 + unc = np.random.rand(5) + pred = np.random.rand(5) + + with pytest.raises(SequentialLearnerError): + choose_next_candidate() + + with pytest.raises(SequentialLearnerError): + choose_next_candidate(unc=unc, pred=pred, num_candidates_to_pick=2, aq="Random") + + with pytest.raises(SequentialLearnerError): + choose_next_candidate( + labels=labels, pred=pred, num_candidates_to_pick=2, aq="MU" + ) + + with pytest.raises(SequentialLearnerError): + choose_next_candidate(pred=pred, num_candidates_to_pick=2, aq="MLI") + + with pytest.raises(SequentialLearnerError): + choose_next_candidate(unc=unc, num_candidates_to_pick=2, aq="MLI") + + +def test_choose_next_candidate_hhi_weighting(): + # Tests that the HHI weighting is properly applied + unc = np.array([0.1, 0.1]) + pred = np.array([4.0, 4.0]) + # Tests using production HHI values and MU + y_struct = generate_surface_structures(["Y"], facets={"Y": ["0001"]})["Y"][ + "hcp0001" + ]["structure"] + ni_struct = generate_surface_structures(["Ni"], facets={"Ni": ["111"]})["Ni"][ + "fcc111" + ]["structure"] + parent_idx, _, aq_scores = choose_next_candidate( + [y_struct, ni_struct], + unc=unc, + include_hhi=True, + aq="MU", + include_seg_ener=False, + ) + assert parent_idx[0] == 1 + assert aq_scores[0] < aq_scores[1] + + # Tests using reserves HHI values and MLI + nb_struct = generate_surface_structures(["Nb"], facets={"Nb": ["111"]})["Nb"][ + "bcc111" + ]["structure"] + na_struct = generate_surface_structures(["Na"], facets={"Na": ["110"]})["Na"][ + "bcc110" + ]["structure"] + parent_idx, _, aq_scores = choose_next_candidate( + [na_struct, nb_struct], + unc=unc, + pred=pred, + target_min=3, + target_max=5, + include_hhi=True, + hhi_type="reserves", + include_seg_ener=False, + ) + assert parent_idx[0] == 0 + assert aq_scores[0] > aq_scores[1] + + +def test_choose_next_candidate_segregation_energy_weighting(): + # Tests that the segregation energy weighting is properly applied + unc = np.array([0.3, 0.3]) + pred = np.array([2.0, 2.0]) + structs = flatten_structures_dict( + generate_saa_structures(["Cr"], ["Rh"], facets={"Cr": ["110"]}) + ) + structs.extend( + flatten_structures_dict( + generate_saa_structures(["Co"], ["Re"], facets={"Co": ["0001"]}) + ) + ) + parent_idx, _, aq_scores = choose_next_candidate( + structs, + unc=unc, + pred=pred, + target_min=0, + target_max=4, + include_hhi=False, + include_seg_ener=True, + ) + assert parent_idx[0] == 0 + assert aq_scores[0] > aq_scores[1] + + +def test_get_overlap_score(): + # Tests default behavior + mean = 0.0 + std = 0.1 + x1 = -0.4 + x2 = 0.8 + norm = stats.norm(loc=mean, scale=std) + + # checks that at least target min or max is provided + with pytest.raises(SequentialLearnerError): + get_overlap_score(mean, std) + + # test default min + overlap_score = get_overlap_score(mean, std, x2=x2) + assert np.isclose(overlap_score, norm.cdf(x2)) + + # test default max + overlap_score = get_overlap_score(mean, std, x1=x1) + assert np.isclose(overlap_score, 1.0 - norm.cdf(x1)) + + # test both max and min + overlap_score = get_overlap_score(mean, std, x1=x1, x2=x2) + assert np.isclose(overlap_score, norm.cdf(x2) - norm.cdf(x1)) + + +def test_calculate_hhi_scores(): + # Tests calculating the HHI scores + saa_dict = generate_saa_structures( + ["Pt", "Cu", "Ni"], + ["Ru"], + facets={"Pt": ["111"], "Cu": ["111"], "Ni": ["111"]}, + ) + saa_structs = [saa_dict[host]["Ru"]["fcc111"]["structure"] for host in saa_dict] + # test production + hhi_prod_scores = calculate_hhi_scores(saa_structs) + norm_hhi_prod = { + el: 1.0 - (HHI["production"][el] - 500.0) / 9300.0 for el in HHI["production"] + } + # check approach properly normalizes and inverts + assert np.isclose(norm_hhi_prod["Y"], 0.0) + assert np.isclose(norm_hhi_prod["O"], 1.0) + # test scores calculated on SAAs + assert np.isclose( + hhi_prod_scores[0], (35 * norm_hhi_prod["Pt"] + norm_hhi_prod["Ru"]) / 36 + ) + assert np.isclose( + hhi_prod_scores[1], (35 * norm_hhi_prod["Cu"] + norm_hhi_prod["Ru"]) / 36 + ) + assert np.isclose( + hhi_prod_scores[2], (35 * norm_hhi_prod["Ni"] + norm_hhi_prod["Ru"]) / 36 + ) + # check scores normalized + assert (hhi_prod_scores <= 1.0).all() + assert (hhi_prod_scores >= 0.0).all() + # test reserves + hhi_res_scores = calculate_hhi_scores(saa_structs, "reserves") + norm_hhi_res = { + el: 1.0 - (HHI["reserves"][el] - 500.0) / 8600.0 for el in HHI["reserves"] + } + # check approach properly normalizes and inverts + assert np.isclose(norm_hhi_res["Pt"], 0.0) + assert np.isclose(norm_hhi_res["C"], 1.0) + assert np.isclose( + hhi_res_scores[0], (35 * norm_hhi_res["Pt"] + norm_hhi_res["Ru"]) / 36 + ) + assert np.isclose( + hhi_res_scores[1], (35 * norm_hhi_res["Cu"] + norm_hhi_res["Ru"]) / 36 + ) + assert np.isclose( + hhi_res_scores[2], (35 * norm_hhi_res["Ni"] + norm_hhi_res["Ru"]) / 36 + ) + # check normalized + assert (hhi_res_scores <= 1.0).all() + assert (hhi_res_scores >= 0.0).all() + + +def test_calculate_segregation_energy_scores(): + # Tests calculating segregation energy scores + saa_structs = flatten_structures_dict( + generate_saa_structures( + ["Ag", "Ni"], ["Pt"], facets={"Ag": ["111"], "Ni": ["111"]}, + ) + ) + saa_structs.extend( + flatten_structures_dict( + generate_saa_structures(["Pd"], ["W"], facets={"Pd": ["111"]}) + ) + ) + # test calculating scores from RABAN1999 + se_scores = calculate_segregation_energy_scores(saa_structs) + assert np.isclose(se_scores[-1], 0.0) + min_seg = SEGREGATION_ENERGIES["raban1999"]["Fe_100"]["Ag"] + max_seg = SEGREGATION_ENERGIES["raban1999"]["Pd"]["W"] + assert np.isclose( + se_scores[0], + 1.0 + - (SEGREGATION_ENERGIES["raban1999"]["Ag"]["Pt"] - min_seg) + / (max_seg - min_seg), + ) + assert np.isclose( + se_scores[1], + 1.0 + - (SEGREGATION_ENERGIES["raban1999"]["Ni"]["Pt"] - min_seg) + / (max_seg - min_seg), + ) + + # test getting scores from RAO2020 + se_scores = calculate_segregation_energy_scores(saa_structs, data_source="rao2020") + assert np.isclose(se_scores[0], SEGREGATION_ENERGIES["rao2020"]["Ag"]["Pt"]) + assert np.isclose(se_scores[0], 0.8) + assert np.isclose(se_scores[1], SEGREGATION_ENERGIES["rao2020"]["Ni"]["Pt"]) + assert np.isclose(se_scores[1], 1.0) + assert np.isclose(se_scores[-1], SEGREGATION_ENERGIES["rao2020"]["Pd"]["W"]) + assert np.isclose(se_scores[-1], 0.0) diff --git a/tests/test_saa.py b/tests/test_saa.py index 4b738e53..1642e1c5 100644 --- a/tests/test_saa.py +++ b/tests/test_saa.py @@ -10,6 +10,7 @@ from autocat.saa import generate_saa_structures from autocat.saa import substitute_single_atom_on_surface from autocat.saa import _find_dopant_index +from autocat.saa import _find_all_surface_atom_indices from autocat.saa import AutocatSaaGenerationError from autocat.surface import generate_surface_structures @@ -128,3 +129,31 @@ def test_find_dopant_index(): host[32].symbol = "Au" with raises(NotImplementedError): _find_dopant_index(host, "Au") + + +def test_find_all_surface_atom_indices(): + # Test helper function for finding all surface atoms + # clean elemental surface + ru = generate_surface_structures(["Ru"], supercell_dim=(2, 2, 4))["Ru"]["hcp0001"][ + "structure" + ] + indices = _find_all_surface_atom_indices(ru) + assert indices == [12, 13, 14, 15] + + pt110 = generate_surface_structures(["Pt"], supercell_dim=(1, 1, 4))["Pt"][ + "fcc110" + ]["structure"] + indices = _find_all_surface_atom_indices(pt110) + assert indices == [3] + + # check increasing tolerance + indices = _find_all_surface_atom_indices(pt110, tol=1.4) + assert indices == [2, 3] + + pt100 = generate_surface_structures(["Pt"], supercell_dim=(3, 3, 4))["Pt"][ + "fcc100" + ]["structure"] + pt100[27].z += 0.3 + pt100[30].z -= 0.4 + indices = _find_all_surface_atom_indices(pt100, tol=0.6) + assert indices == [27, 28, 29, 31, 32, 33, 34, 35] diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..5b3190b6 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,60 @@ +"""Unit tests for the `autocat.utils` module""" + +from ase import Atoms +from ase.build import fcc100 +from ase.build import fcc111 +from ase.build import bcc110 + +from autocat.surface import generate_surface_structures +from autocat.saa import generate_saa_structures +from autocat.adsorption import generate_adsorbed_structures +from autocat.utils import flatten_structures_dict + + +def test_extract_surfaces(): + # Tests extracting structures from `autocat.surface.generate_surface_structures` + surfaces = generate_surface_structures( + ["Pt", "Cu", "Li"], facets={"Pt": ["100", "111"], "Cu": ["111"], "Li": ["110"]} + ) + ex_structures = flatten_structures_dict(surfaces) + assert all(isinstance(struct, Atoms) for struct in ex_structures) + # checks atoms objects left untouched during extraction + pt_struct100 = fcc100("Pt", size=(3, 3, 4), vacuum=10) + assert pt_struct100 in ex_structures + pt_struct111 = fcc111("Pt", size=(3, 3, 4), vacuum=10) + assert pt_struct111 in ex_structures + cu_struct = fcc111("Cu", size=(3, 3, 4), vacuum=10) + assert cu_struct in ex_structures + li_struct = bcc110("Li", size=(3, 3, 4), vacuum=10) + assert li_struct in ex_structures + + +def test_extract_saas(): + # Tests extracting saa structures + saas = generate_saa_structures( + ["Cu", "Au"], + ["Fe"], + facets={"Cu": ["110"], "Au": ["100"]}, + supercell_dim=(2, 2, 5), + ) + ex_structures = flatten_structures_dict(saas) + assert all(isinstance(struct, Atoms) for struct in ex_structures) + assert saas["Cu"]["Fe"]["fcc110"]["structure"] in ex_structures + assert saas["Au"]["Fe"]["fcc100"]["structure"] in ex_structures + + +def test_extract_adsorption(): + # Test extracting adsorption structures + saa = generate_saa_structures(["Ru"], ["Pd"], supercell_dim=(2, 2, 5),)["Ru"]["Pd"][ + "hcp0001" + ]["structure"] + ads_dict = generate_adsorbed_structures( + saa, + adsorbates=["NH2", "Li"], + adsorption_sites={"custom": [(0.0, 0.0)]}, + use_all_sites=False, + ) + ex_structures = flatten_structures_dict(ads_dict) + assert all(isinstance(struct, Atoms) for struct in ex_structures) + assert ads_dict["NH2"]["custom"]["0.0_0.0"]["structure"] in ex_structures + assert ads_dict["Li"]["custom"]["0.0_0.0"]["structure"] in ex_structures