diff --git a/.codespellignore b/.codespellignore
index 2df21c0..89fc3e0 100644
--- a/.codespellignore
+++ b/.codespellignore
@@ -1,2 +1,3 @@
 cachable
-parma
\ No newline at end of file
+parma
+mater
\ No newline at end of file
diff --git a/.codespellrc b/.codespellrc
index bbc2c8b..c7e03fd 100644
--- a/.codespellrc
+++ b/.codespellrc
@@ -1,3 +1,3 @@
 [codespell]
-skip = ./docs/_build,./docs/_static,./docs/31_sparta/*.sh
-ignore-words-list = fom
+skip = ./docs/_build,./docs/_static,./docs/31_sparta/*.sh,./docs/32_lammpsACE/*.sh
+ignore-words-list = fom,Mater
diff --git a/.gitignore b/.gitignore
index a8069db..eb9e77c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,6 +43,17 @@ output.log
 
 lisp
 
+# LAMMPS
+docs/32_lammpsACE/lammps
+docs/32_lammpsACE/kokkos-tools
+docs/32_lammpsACE/lammps_build.log
+docs/32_lammpsACE/kokkos_tools_build.log
+docs/32_lammpsACE/templatedir/kokkos_tools_env_elcapitan.sh
+docs/32_lammpsACE/templatedir/Cu-PBE-core-rep.ace
+docs/32_lammpsACE/templatedir/lammps_env_elcapitan.sh
+docs/32_lammpsACE/templatedir/lammps
+docs/32_lammpsACE/check*
+
 \#*#
 .[#]*[#]
 
diff --git a/docs/31_sparta/sparta.rst b/docs/31_sparta/sparta.rst
index 8dfb5a9..30e2690 100644
--- a/docs/31_sparta/sparta.rst
+++ b/docs/31_sparta/sparta.rst
@@ -426,12 +426,12 @@ A script (``sparta_clone.sh``) is provided to clone the SPARTA
 repository within the "sparta" folder. Instructions are provided on
 how to build SPARTA for the following systems:
 
-* Generic (see :ref:`BuildGeneric`)
+* Generic (see :ref:`BuildSpartaGeneric`)
 * Advanced Technology System 4 (ATS-4), also known as El Capitan (see
-  :ref:`BuildATS4`)
+  :ref:`BuildSpartaATS4`)
 
 
-.. _BuildGeneric:
+.. _BuildSpartaGeneric:
 
 Generic
 -------
@@ -440,7 +440,7 @@ Refer to SPARTA's [sparta-build]_ documentation for generic
 instructions.
 
 
-.. _BuildATS4:
+.. _BuildSpartaATS4:
 
 El Capitan
 ----------
@@ -547,12 +547,12 @@ Additional information:
 Single-node results from SPARTA are provided on the following systems:
 
 * Advanced Technology System 4 (ATS-4), also known as El Capitan (see
-  :ref:`ResultsATS4`)
+  :ref:`ResultsSpartaATS4`)
 
 Multi-node results from SPARTA are provided on the following system(s):
 
 * Advanced Technology System 4 (ATS-4), also known as El Capitan (see
-  :ref:`ResultsScaleATS4`)
+  :ref:`ResultsSpartaScaleATS4`)
 
 
 .. _SPARTAComputeFOM:
@@ -582,7 +582,7 @@ example were unnecessary.
    INFO - 2026-02-16 20:54:44,673 - File = /path/to/llnl-benchmarks/docs/31_sparta/checks-10--nodes-001--L-2.0--ktst/log.sparta
 
 
-.. _ResultsATS4:
+.. _ResultsSpartaATS4:
 
 El Capitan - Single Node
 ------------------------
@@ -610,7 +610,7 @@ as part of the output.
    INFO - 2026-02-16 20:54:44,673 - File = /path/to/llnl-benchmarks/docs/31_sparta/checks-10--nodes-001--L-2.0--ktst/log.sparta
 
 
-.. _ResultsScaleATS4:
+.. _ResultsSpartaScaleATS4:
 
 El Capitan - Many Nodes
 -----------------------
diff --git a/docs/32_lammpsACE/kokkos_tools_build_elcapitan.sh b/docs/32_lammpsACE/kokkos_tools_build_elcapitan.sh
new file mode 100755
index 0000000..cad3f00
--- /dev/null
+++ b/docs/32_lammpsACE/kokkos_tools_build_elcapitan.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+# set top-level script parameters
+umask 022
+set -e
+set -x
+
+# create vars for common directories and files
+dir_root="`git rev-parse --show-toplevel`"
+dir_pwd="` pwd -P `"
+dir_src="${dir_pwd}/kokkos-tools/profiling/space-time-stack"
+dir_build="${dir_pwd}/kokkos-tools/profiling/space-time-stack"
+file_log="${dir_pwd}/kokkos_tools_build.log"
+
+# redirect STDOUT and STDERR through tee
+exec &> >(tee >(ts '[%Y-%m-%d %H:%M:%S]' > "${file_log}"))
+
+# let's turn on verbosity now
+set -v
+
+# output for posterity
+hostname
+uptime
+lscpu
+
+# clean and reset source
+pushd "${dir_src}"
+git clean -fdx
+git reset --hard
+popd
+
+# create build directory
+# test -d "${dir_build}" && rm -rf "${dir_build}"
+# mkdir -p "${dir_build}"
+
+# build
+# list current environment
+module list
+# alter environment
+. lammps_env_elcapitan.sh
+# list current environment
+module list
+pushd "${dir_build}"
+/usr/bin/time --verbose -- \
+    nice -n 1 \
+        gmake CXX=CC
+popd
+
+# gracefully exit
+exit 0
diff --git a/docs/32_lammpsACE/kokkos_tools_clone.sh b/docs/32_lammpsACE/kokkos_tools_clone.sh
new file mode 100755
index 0000000..22e8521
--- /dev/null
+++ b/docs/32_lammpsACE/kokkos_tools_clone.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+git clone git@github.com:kokkos/kokkos-tools.git
diff --git a/docs/32_lammpsACE/kokkos_tools_env_elcapitan.sh b/docs/32_lammpsACE/kokkos_tools_env_elcapitan.sh
new file mode 100644
index 0000000..6596b09
--- /dev/null
+++ b/docs/32_lammpsACE/kokkos_tools_env_elcapitan.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+dir_root="`git rev-parse --show-toplevel`"
+export KOKKOS_TOOLS_LIBS="${dir_root}/docs/32_lammpsACE/kokkos-tools/profiling/space-time-stack/kp_space_time_stack.so"
diff --git a/docs/32_lammpsACE/lammpsACE.rst b/docs/32_lammpsACE/lammpsACE.rst
index f398eb7..7beec84 100644
--- a/docs/32_lammpsACE/lammpsACE.rst
+++ b/docs/32_lammpsACE/lammpsACE.rst
@@ -2,55 +2,557 @@
 LAMMPS ACE
 **********
 
-https://docs.lammps.org/pair_pace.html
+.. note::
+   The documentation herein needs to be updated for current
+   performance.
+
+This is the documentation for the benchmark [LAMMPS]_, specifically
+KOKKOS-LAMMPS (see [KOKKOS-LAMMPS]_). The content herein was created
+by the following authors (in alphabetical order).
+
+- `Anthony M. Agelastos <mailto:amagela@sandia.gov>`_
+- `Stan Moore <mailto:stamoor@sandia.gov>`_
+
+This material is based upon work supported by the Sandia National
+Laboratories (SNL), a multimission laboratory managed and operated by
+National Technology and Engineering Solutions of Sandia under the
+U.S. Department of Energy's National Nuclear Security Administration
+under contract DE-NA0003525. Content herein considered unclassified
+with unlimited distribution under SAND2023-01070O.
 
 
 Purpose
 =======
 
+Heavily pulled from their [lammps-site]_:
+
+   LAMMPS is a classical molecular dynamics code with a focus on
+   materials modeling. It's an acronym for Large-scale
+   Atomic/Molecular Massively Parallel Simulator. LAMMPS has
+   potentials for solid-state materials (metals, semiconductors) and
+   soft matter (biomolecules, polymers) and coarse-grained or
+   mesoscopic systems. It can be used to model atoms or, more
+   generically, as a parallel particle simulator at the atomic, meso,
+   or continuum scale. LAMMPS runs on single processors or in parallel
+   using message-passing techniques and a spatial-decomposition of the
+   simulation domain. Many of its models have versions that provide
+   accelerated performance on CPUs, GPUs, and Intel Xeon Phis. The
+   code is designed to be easy to modify or extend with new
+   functionality.
+
 
 Characteristics
 ===============
 
-Problems
---------
+The goal is to utilize the specified version of LAMMPS (see
+:ref:`LAMMPSApplicationVersion`) that runs the benchmark problem (see
+:ref:`LAMMPSProblem`) correctly (see :ref:`LAMMPSCorrectness` if
+changes are made to LAMMPS).
+
+
+.. _LAMMPSApplicationVersion:
+
+Application Version
+-------------------
+
+The command to clone is provided below.
+
+.. literalinclude:: lammps_clone.sh
+   :language: sh
+   :lines: 2-
+
+.. note::
+   The Git SHA will be updated with a tag soon.
+
+The script to clone can be downloaded from :download:`lammps_clone.sh
+<lammps_clone.sh>`. It can also be executed in place to clone into
+``docs/32_lammpsACE/lammps``.
+ 
+.. code-block:: bash
+
+   cd docs/32_lammpsACE
+   ./lammps_clone.sh
+
+
+.. _LAMMPSProblem:
+
+Problem
+-------
+
+This problem runs an ACE (atomic cluster expansion) machine-learned
+potential for a copper crystal using a face-entered cubic (fcc)
+lattice at 300 K. Please refer to [pace-site]_ and [pace-article]_ for
+more information.
+
+This problem is *mostly* present within the upstream LAMMPS
+repository. The components of this problem are listed below (paths
+given are within LAMMPS repository). Each of these files will need to
+be copied into a run directory for the simulation.
+
+``examples/PACKAGES/pace/Cu-PBE-core-rep.ace``
+   This is an input needed for the simulation.
+
+``examples/PACKAGES/pace/in.pace.product`` This is the default input
+   file that controls the simulation. Some parameters within this file
+   may need to be changed depending upon what is being run (i.e.,
+   these parameters control how much memory it uses). The modified
+   version of this within the template directory should be preferred;
+   more on this below.
+
+A template run directory was created to help ease performing a
+simulation; this directory is ``templatedir``. There are some key
+files within it.
+
+``templatedir/in.pace.product``
+   This is a modified version of the input file with some key
+   parameters changed to be more appropriate as a benchmark. It is
+   designed to run for approximately 11 minutes in 2 phases of 5.5
+   minutes each. SPARTA already directly computes the FOM and outputs
+   it for each of the phases. This second phase of 5.5 minutes is the
+   FOM that is to be tracked.
+
+``templatedir/lammps_ln.sh``
+   This file creates symbolic links to files and folders needed for
+   the simulation.
+
+``templatedir/lammps_batch_elcapitan.sh``
+   This is a batch script compatible with El Capitan. It has
+   capabilities for setting key job parameters from the command line;
+   more on that below.
+
+
+An excerpt from this input file that has its key parameters is
+provided below.
+
+.. code-block::
+   :emphasize-lines: 2,7
+
+   <snip>
+   variable        L index 64.0
+   region          box block 0 ${L} 0 ${L} 0 ${L}
+   <snip>
+   pair_style      pace product chunksize 49152
+   <snip>
+   thermo          10
+   thermo_style    custom step cpu temp epair etotal press v_delenergy v_delpress
+   <snip>
+   ##################################
+   ### Benchmarking modifications ###
+   ##################################
+   
+   # Add a thermostat to keep temperature from falling
+   variable        tdamp equal $(dt)
+   fix             mynvt all nvt temp 300.0 300.0 ${tdamp}
+   
+   # Some systems buffer extensively
+   thermo_modify   flush yes
+   
+   # Print out the value of L for parsing ease
+   print "The value of L is $L" 
+   
+   ### Throw out first 5 minutes for hardware equilibrium
+   
+   # Stop after 5.5 minutes
+   fix             2 all halt 10 tlimit > 330.0 message no error continue
+   run             10000000
+   
+   ### Run another 5 minutes for final FOM
+   unfix           2
+   
+   # Stop after 5.5 minutes
+   fix             3 all halt 10 tlimit > 330.0 message no
+   run             10000000
+
+These parameters are described below.
+
+``L``
+   This corresponds to the **l**\ ength scale factor. This will scale
+   the dimensions of the problem.
+
+``thermo``
+   Compute and print thermodynamic info (e.g., temperature, energy,
+   pressure) on timesteps that are a multiple of this parameter and at
+   the beginning and end of a simulation.
+
+This problem exhibits different runtime characteristics whether or not
+Kokkos is enabled. Specifically, there is some work that is performed
+within Kokkos that helps to keep this problem as well behaved from a
+throughput perspective as possible. Ergo, Kokkos must be enabled for
+the simulations regardless of the hardware being used (the cases
+herein have configurations that enable it for reference).
+
 
 Figure of Merit
 ---------------
 
+Each LAMMPS simulation writes out a file named "log.lammps". At the
+end of this simulation is a block that resembles the following
+example.
+
+.. code-block::
+   :emphasize-lines: 11
+
+   Step         CPU        Temp       E_pair       TotEng       Press      v_delenergy       v_delpress  
+    640   0           299.7264    -3834241     -3793616.4   62562.774   -3.7252903e-08    4.8748916e-10
+    650   5.1882405   300.1416    -3834085.9   -3793405     62656.487    3.7252903e-08    2.2555469e-10
+    660   10.389581   300.04536   -3834003.9   -3793336     62705.836   -1.4901161e-08    2.910383e-11 
+   <snip>
+   1260   323.38353   300.55705   -3834187.5   -3793450.4   62842.117    9.778887e-09     1.5279511e-10
+   1270   328.58739   300.25528   -3834141.7   -3793445.4   62861.607    1.0244548e-08   -5.0931703e-10
+   1280   333.79045   300.1357    -3834154.7   -3793474.6   62856.262   -1.1641532e-08    1.6734703e-10
+   Loop time of 333.812 on 4 procs for 640 steps with 1048576 atoms
+
+   Performance: 0.083 ns/day, 289.767 hours/ns, 1.917 timesteps/s, 2.010 Matom-step/s
+   45.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+The quantity of interest (QOI) is "Mega atom steps per second," which
+is directly computed as ``Matom-step/s`` in the example above.
+
+It is desired to capture the FOM for varying problem sizes that
+encompass utilizing 50% to 80% of available memory (when all PEs are
+utilized). The ultimate goal is to maximize this throughput FOM while
+utilizing at least 50% of available memory.
+
+
+.. _LAMMPSCorrectness:
+
+Correctness
+-----------
+
+The aforementioned relevant block of output within "log.lammps" is
+replicated below.
+
+.. code-block::
+   :emphasize-lines: 2,3,4,6,7,8
+
+   Step         CPU        Temp       E_pair       TotEng       Press      v_delenergy       v_delpress  
+    640   0           299.7264    -3834241     -3793616.4   62562.774   -3.7252903e-08    4.8748916e-10
+    650   5.1882405   300.1416    -3834085.9   -3793405     62656.487    3.7252903e-08    2.2555469e-10
+    660   10.389581   300.04536   -3834003.9   -3793336     62705.836   -1.4901161e-08    2.910383e-11 
+   <snip>
+   1260   323.38353   300.55705   -3834187.5   -3793450.4   62842.117    9.778887e-09     1.5279511e-10
+   1270   328.58739   300.25528   -3834141.7   -3793445.4   62861.607    1.0244548e-08   -5.0931703e-10
+   1280   333.79045   300.1357    -3834154.7   -3793474.6   62856.262   -1.1641532e-08    1.6734703e-10
+   Loop time of 333.812 on 4 procs for 640 steps with 1048576 atoms
+
+   Performance: 0.083 ns/day, 289.767 hours/ns, 1.917 timesteps/s, 2.010 Matom-step/s
+   45.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+There are several columns of interest regarding correctness; these are
+listed below.
+
+``Step``
+   This is the step number and is the first column.
+
+``Temp``
+   This tracks the temperature aspect of the simulation.
+
+``Press``
+   This tracks the pressure aspect of the simulation.
+
+Assessing the correctness will involve comparing these quantities
+across modified (henceforth denoted with "mod" subscript) and
+unmodified ("unmod" subscript) LAMMPS subject to the methodology
+below.
+
+The **first** step is to adjust the ``thermo`` parameter
+to a value of 1 so fine-grained output is generated; if this is
+significantly slowing down computation, then it can be increased to a
+value of 10. Then, produce output from LAMMPS\ :sub:`unmod` with the
+same settings.
+
+The **second** step is to compute the absolute differences between
+modified and unmodified LAMMPS for ``Temp`` and ``Press`` for each
+row, *i*, whose ``Step`` is relevant for the FOM for LAMMPS\
+:sub:`mod`,
+
+.. math::
+   \Delta \texttt{Temp}_i &= | \texttt{Temp}_{\textrm{mod},i}-\texttt{Temp}_{\textrm{unmod},i} | \\
+   \Delta \texttt{Press}_i &= | \texttt{Press}_{\textrm{mod},i}-\texttt{Press}_{\textrm{unmod},i} | \\
+
+where
+
+* *i* is each line whose ``CPU`` time is part of the second phase for LAMMPS\ :sub:`mod`
+
+The **third** step is to compute the arithmetic mean of each of the
+aforementioned quantities over the *n* rows,
+
+.. math::
+   \mu _{\Delta \texttt{Temp}} &= \frac{\sum_{i} \Delta \texttt{Temp}_i}{n} \\
+   \mu _{\Delta \texttt{Press}} &= \frac{\sum_{i} \Delta \texttt{Press}_i}{n} \\
+
+where
+
+.. math::
+   n = \sum_{i} 1
+
+The **fourth** step is to compute the arithmetic mean of the *n*
+matching rows of the unmodified LAMMPS,
+
+.. math::
+   \mu _{\texttt{Temp},\textrm{unmod}} &= \frac{\sum_{i} \texttt{Temp}_{\textrm{unmod},i}}{n} \\
+   \mu _{\texttt{Press},\textrm{unmod}} &= \frac{\sum_{i} \texttt{Press}_{\textrm{unmod},i}}{n} \\
+
+The **fifth** step is to normalize the differences with the baseline
+values to create the error ratios,
+
+.. math::
+   \varepsilon _{\texttt{Temp}} &= \frac{\mu _{\Delta \texttt{Temp}}}{\mu _{\texttt{Temp},\textrm{unmod}}} \\
+   \varepsilon _{\texttt{Press}} &= \frac{\mu _{\Delta \texttt{Press}}}{\mu _{\texttt{Press},\textrm{unmod}}} \\
+
+The **sixth** and final step is to check over all of the error ratios
+and if any of them exceed 5%, then the modifications are not approved
+without discussing them with this benchmark's authors. The success
+criteria are:
+
+.. math::
+   \varepsilon _{\texttt{Temp}} &\le 5\% \\
+   \varepsilon _{\texttt{Press}} &\le 5\%
 
-Source code modifications
+ 
+Source Code Modifications
 =========================
 
-Please see :ref:`GlobalRunRules` for general guidance on allowed modifications. 
+Please see :ref:`GlobalRunRules` for general guidance on allowed
+modifications.
+
+
+System Information
+==================
+
+The platforms utilized for benchmarking activities are listed and
+described below.
+
+* Advanced Technology System 4 (ATS-4), also known as El Capitan (see
+  :ref:`ElCapitanSystemDescription`)
+
 
 Building
 ========
 
+A script (``lammps_clone.sh``) is provided to clone the LAMMPS
+repository within the "lammps" folder. Instructions are provided on
+how to build LAMMPS for the following systems:
+
+* Generic (see :ref:`BuildLammpsGeneric`)
+* Advanced Technology System 4 (ATS-4), also known as El Capitan (see
+  :ref:`BuildLammpsATS4`)
+
+
+.. _BuildLammpsGeneric:
+
+Generic
+-------
+
+Refer to LAMMP's [lammps-build]_ documentation for generic
+instructions.
+
+
+.. _BuildLammpsATS4:
+
+El Capitan
+----------
+
+Instructions for building on El Capitan are provided below. These
+instructions assume this repository has been cloned and that the
+current working directory is at the top level of this repository. 
+
+.. code-block:: bash
+
+   cd docs/32_lammpsACE
+   ./lammps_build_elcapitan.sh
+
+The script discussed above is :download:`lammps_build_elcapitan.sh
+<lammps_build_elcapitan.sh>` and is produced below for convenience and
+reference.
+
+.. literalinclude:: lammps_build_elcapitan.sh
+   :language: bash
+
 
 Running
 =======
 
+Instructions are provided on how to run LAMMPS for the following
+systems:
 
-Validation
-==========
+* Advanced Technology System 4 (ATS-4), also known as El Capitan (see
+  :ref:`LAMMPSRunATS4`)
+  * Profiling with Kokkos Tools on El Capitan (see
+    :ref:`LAMMPSProfileKokkosToolsElCapitan`)
+
+
+.. _LAMMPSRunATS4:
+
+El Capitan
+----------
+
+.. note::
+
+   This section will be updated with some more content soon.
+
+An example for performing simulations on El Capitan is
+provided below.
+
+.. code-block:: bash
+
+   # first, copy templatedir into something useful
+   cp -a templatedir useful
+
+   # next, go into the run folder
+   cd useful
+
+   # submit job and set parameters on command line if desired
+   #   this example sets L (aka lammps_len) to 64
+   #   this example turns on Kokkos Tools profiling (aka kokkos_tools)
+   #   this example runs on 1 node (aka --nodes=1)
+   lammps_len=64 is_kokkos_tools=1 flux batch --nodes=1 lammps_batch_elcapitan.sh
+
+
+.. _SPARTAProfileKokkosTools:
+
+Profiling with Kokkos Tools
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Scripts are provided to clone and build Kokkos Tools. The steps to do
+both are provided below.
 
-Example Scalability Results
-===========================
+.. code-block:: bash
 
+   # go into the LAMMPS documentation folder
+   cd docs/32_lammpsACE
 
-Memory Usage
-============
+   # clone Kokkos Tools
+   ./kokkos_tools_clone.sh
 
+   # build Kokkos Tools' Space Time
+   ./kokkos_tools_build_elcapitan.sh
 
-Strong Scaling on El Capitan
-============================
+Once built, the command line variable ``is_kokkos_tools`` can be set
+to ``1`` for the batch script to turn it on. After a successful run,
+it will output additional memory information. An example of this (for
+``L`` equal to 64) on El Capitan is provided below that shows
+approximately 99.6 GB of memory allocated on each GPU.
 
+.. code-block::
 
-Weak Scaling on El Capitan
-==========================
+   KOKKOS HIP SPACE:
+   ===================
+   MAX MEMORY ALLOCATED: 99615719.6 kB
+
+
+.. _SPARTAResults:
+
+Verification of Results
+=======================
+
+Additional information:
+
+* The sub-section :ref:`LAMMPSComputeFOM` describes how to compute the
+  FOM
+
+Single-node results from LAMMPS are provided on the following systems:
+
+* Advanced Technology System 4 (ATS-4), also known as El Capitan (see
+  :ref:`ResultsLammpsATS4`)
+
+Multi-node results from SPARTA are provided on the following system(s):
+
+* Advanced Technology System 4 (ATS-4), also known as El Capitan (see
+  :ref:`ResultsLammpsScaleATS4`)
+
+
+.. _LAMMPSComputeFOM:
+
+Compute Figure of Merit
+-----------------------
+
+The figure of merit (FOM) is automatically computed by LAMMPS. The
+benchmark run is broken into two phases; extract the FOM from the last
+phase. The relevant excerpt from the "log.lammps" output is below.
+
+.. code-block::
+   :emphasize-lines: 11
+
+   Step         CPU        Temp       E_pair       TotEng       Press      v_delenergy       v_delpress  
+    640   0           299.7264    -3834241     -3793616.4   62562.774   -3.7252903e-08    4.8748916e-10
+    650   5.1882405   300.1416    -3834085.9   -3793405     62656.487    3.7252903e-08    2.2555469e-10
+    660   10.389581   300.04536   -3834003.9   -3793336     62705.836   -1.4901161e-08    2.910383e-11 
+   <snip>
+   1260   323.38353   300.55705   -3834187.5   -3793450.4   62842.117    9.778887e-09     1.5279511e-10
+   1270   328.58739   300.25528   -3834141.7   -3793445.4   62861.607    1.0244548e-08   -5.0931703e-10
+   1280   333.79045   300.1357    -3834154.7   -3793474.6   62856.262   -1.1641532e-08    1.6734703e-10
+   Loop time of 333.812 on 4 procs for 640 steps with 1048576 atoms
+
+   Performance: 0.083 ns/day, 289.767 hours/ns, 1.917 timesteps/s, 2.010 Matom-step/s
+   45.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+The FOM is the quantity ``Matom-step/s``, which in this example is 2.010. 
+
+
+.. _ResultsLammpsATS4:
+
+El Capitan - Single Node
+------------------------
+
+.. note::
+
+   This section will be updated with some more content soon.
+
+A single-node example is below that showcases 2.010 Mega atom
+steps per second per node. The other relevant parameters are displayed
+as part of the output.
+
+.. code-block::
+   :emphasize-lines: 11
+
+   Step         CPU        Temp       E_pair       TotEng       Press      v_delenergy       v_delpress  
+    640   0           299.7264    -3834241     -3793616.4   62562.774   -3.7252903e-08    4.8748916e-10
+    650   5.1882405   300.1416    -3834085.9   -3793405     62656.487    3.7252903e-08    2.2555469e-10
+    660   10.389581   300.04536   -3834003.9   -3793336     62705.836   -1.4901161e-08    2.910383e-11 
+   <snip>
+   1260   323.38353   300.55705   -3834187.5   -3793450.4   62842.117    9.778887e-09     1.5279511e-10
+   1270   328.58739   300.25528   -3834141.7   -3793445.4   62861.607    1.0244548e-08   -5.0931703e-10
+   1280   333.79045   300.1357    -3834154.7   -3793474.6   62856.262   -1.1641532e-08    1.6734703e-10
+   Loop time of 333.812 on 4 procs for 640 steps with 1048576 atoms
+
+   Performance: 0.083 ns/day, 289.767 hours/ns, 1.917 timesteps/s, 2.010 Matom-step/s
+   45.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+
+.. _ResultsLammpsScaleATS4:
+
+El Capitan - Many Nodes
+-----------------------
+
+.. note::
+
+   This section will be updated with some more content soon.
 
 
 References
 ==========
+
+.. [LAMMPS] LAMMPS - a flexible simulation tool for particle-based
+            materials modeling at the atomic, meso, and continuum scales,
+            A. P. Thompson, H. M. Aktulga, R. Berger, D. S. Bolintineanu,
+            W. M. Brown, P. S. Crozier, P. J. in't Veld, A. Kohlmeyer,
+            S. G. Moore, T. D. Nguyen, R. Shan, M. J. Stevens, J. Tranchida,
+            C. Trott, S. J. Plimpton, Comp Phys Comm, 271 (2022) 10817.
+.. [lammps-site] LAMMPS Developers, 'LAMMPS Molecular Dynamics Simulator', 2026.
+                 [Online]. Available: https://lammps.org. [Accessed: 15- Feb- 2026]
+.. [lammps-build] LAMMPS Developers, 'LAMMPS Documentation', 2026.
+                 [Online]. Available: https://dics.lammps.org/Manual.html.
+                 [Accessed: 15- Feb- 2026]
+.. [pace-site] LAMMPS Developers, 'pair_style pace command - LAMMPS Documentation', 2026.
+               [Online]. Available: https://docs.lammps.org/pair_pace.html#description
+.. [pace-article] Lysogorskiy, Y., Oord, C.v.d., Bochkarev, A. et al.,
+                  Performant implementation of the atomic cluster expansion (PACE)
+                  and application to copper and silicon. NPJ Comput. Mater. 7, 97 (2021). # codespell:ignore
+                  https://doi.org/10.1038/s41524-021-00559-9
+.. [KOKKOS-LAMMPS] Anders Johansson, Evan Weinberg, Christian Trott, Megan McCarthy, and Stan Moore.
+                   2025. LAMMPS-KOKKOS: Performance Portable Molecular Dynamics Across Exascale Architectures.
+                   In Proceedings of the SC '25 Workshops of the International Conference for High Performance
+                   Computing, Networking, Storage and Analysis (SC Workshops '25).
+                   Association for Computing Machinery, New York, NY, USA, 1217–1232.
+                   https://doi.org/10.1145/3731599.3767498
diff --git a/docs/32_lammpsACE/lammps_build_elcapitan.sh b/docs/32_lammpsACE/lammps_build_elcapitan.sh
new file mode 100755
index 0000000..30fd4f5
--- /dev/null
+++ b/docs/32_lammpsACE/lammps_build_elcapitan.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+# set top-level script parameters
+umask 022
+set -e
+set -x
+
+# create vars for common directories and files
+dir_root="`git rev-parse --show-toplevel`"
+dir_pwd="` pwd -P `"
+dir_src="${dir_pwd}/lammps"
+dir_build="${dir_pwd}/lammps/_build"
+file_log="${dir_pwd}/lammps_build.log"
+
+# redirect STDOUT and STDERR through tee
+exec &> >(tee >(ts '[%Y-%m-%d %H:%M:%S]' > "${file_log}"))
+
+# let's turn on verbosity now
+set -v
+
+# output for posterity
+hostname
+uptime
+lscpu
+
+# clean and reset source
+pushd "${dir_src}"
+git clean -fdx
+git reset --hard
+popd
+
+# create build directory
+test -d "${dir_build}" && rm -rf "${dir_build}"
+mkdir -p "${dir_build}"
+
+# build
+#   list current environment
+module list
+#   alter environment
+. lammps_env_elcapitan.sh
+#   list current environment
+module list
+pushd "${dir_build}"
+cmake \
+    -C ../cmake/presets/elcapitan_kokkos.cmake \
+    -DPKG_ML-PACE=on \
+    -DBUILD_MPI=on \
+    -D CMAKE_BUILD_TYPE=Release \
+    ../cmake
+/usr/bin/time --verbose -- \
+    nice -n 1 \
+        gmake -j 64
+popd
+
+# gracefully exit
+exit 0
diff --git a/docs/32_lammpsACE/lammps_clone.sh b/docs/32_lammpsACE/lammps_clone.sh
new file mode 100755
index 0000000..b4879a8
--- /dev/null
+++ b/docs/32_lammpsACE/lammps_clone.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+git clone git@github.com:lammps/lammps.git
+cd lammps
+git checkout a51f9ba0e719be544293987bb3cbd9939f1b01ee
diff --git a/docs/32_lammpsACE/lammps_env_elcapitan.sh b/docs/32_lammpsACE/lammps_env_elcapitan.sh
new file mode 100644
index 0000000..5dad5e9
--- /dev/null
+++ b/docs/32_lammpsACE/lammps_env_elcapitan.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+module load craype-accel-amd-gfx942
+module load PrgEnv-cray
+module load rocm/6.2.1
+module load python
+
+export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
+
+export MPICH_GPU_SUPPORT_ENABLED=1
+export MPICH_OFI_NIC_POLICY=GPU
+
+### FIXME ### Need a system wide install of libfabric from SHS 11 (or newer)
+export LD_LIBRARY_PATH=/usr/workspace/wsb/accept/packages-2024/SHS11_lib:${LD_LIBRARY_PATH}
+
+export HIP_PATH=`hipconfig -p`
+export LD_LIBRARY_PATH=${HIP_PATH}/lib:${LD_LIBRARY_PATH}
+
+### Tell libfabric to only look for the ROCm runtime, not cuda, etc.
+export FI_HMEM="rocr"
+
+# Have malloc() calls use huge pages
+export HUGETLB_MORECORE=yes
+
+# restrict libhugetlbfs to be enabled for these executables only:
+export HUGETLB_RESTRICT_EXE="defrag:lmp"
+
+export HSA_XNACK=1
diff --git a/docs/32_lammpsACE/templatedir/in.pace.product b/docs/32_lammpsACE/templatedir/in.pace.product
new file mode 100644
index 0000000..c8d6dee
--- /dev/null
+++ b/docs/32_lammpsACE/templatedir/in.pace.product
@@ -0,0 +1,61 @@
+# simple test of fcc Cu with ACE product
+
+units           metal
+atom_style      atomic
+
+neighbor        0.3 bin
+neigh_modify    every 2 delay 10 check yes
+
+variable        a equal 3.597
+lattice         fcc $a
+variable        L index 64.0
+region          box block 0 ${L} 0 ${L} 0 ${L}
+create_box      1 box
+create_atoms    1 box
+
+mass            1 26.98
+
+pair_style      pace product chunksize 49152
+pair_coeff  * * Cu-PBE-core-rep.ace Cu
+
+velocity        all create 300 8728 loop geom
+timestep        0.0005
+fix             1 all nve
+
+compute         eatom all pe/atom
+compute         energy all reduce sum c_eatom
+variable        delenergy equal c_energy-pe
+
+compute         satom all stress/atom NULL
+compute         str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable        delpress equal -(c_str[1]+c_str[2]+c_str[3])/(3*vol)-press
+
+thermo          10
+thermo_style    custom step cpu temp epair etotal press v_delenergy v_delpress
+
+##################################
+### Benchmarking modifications ###
+##################################
+
+# Add a thermostat to keep temperature from falling
+variable        tdamp equal $(dt)
+fix             mynvt all nvt temp 300.0 300.0 ${tdamp}
+
+# Some systems buffer extensively
+thermo_modify   flush yes
+
+# Print out the value of L for parsing ease
+print "The value of L is $L" 
+
+### Throw out first 5 minutes for hardware equilibrium
+
+# Stop after 5.5 minutes
+fix             2 all halt 10 tlimit > 330.0 message no error continue
+run             10000000
+
+### Run another 5 minutes for final FOM
+unfix           2
+
+# Stop after 5.5 minutes
+fix             3 all halt 10 tlimit > 330.0 message no
+run             10000000
diff --git a/docs/32_lammpsACE/templatedir/lammps_batch_elcapitan.sh b/docs/32_lammpsACE/templatedir/lammps_batch_elcapitan.sh
new file mode 100755
index 0000000..6ffdf96
--- /dev/null
+++ b/docs/32_lammpsACE/templatedir/lammps_batch_elcapitan.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+#flux: # --nodes=1
+#flux: -u
+#flux: --exclusive
+#flux: -q pbatch
+#flux: -t 20
+#flux: --job-name=lammps-fcr-fy30
+#flux: --setattr=thp=always
+#flux: --setattr=hugepages=512GB
+
+# e.g., to set the L parameter to a different value: lammps_len=2.0    flux batch lammps_batch.sh
+# e.g., to turn on Kokkos Tools Space Time:          is_kokkos_tools=1 flux batch lammps_batch.sh
+
+# define runtime params
+lammps_len=${lammps_len:-1}
+is_kokkos_tools=${is_kokkos_tools:-0}
+flux_job_nodes=${flux_job_nodes:-`flux resource list -s up -no {nnodes}`}
+echo "lammps_len=${lammps_len}"
+echo "is_kokkos_tools=${is_kokkos_tools}"
+echo "flux_job_nodes=${flux_job_nodes}"
+
+# define useful locations
+dir_base="` pwd -P `"
+
+# set up environment appropriately
+. lammps_env_elcapitan.sh
+test ${is_kokkos_tools} -eq 1 && . kokkos_tools_env_elcapitan.sh
+
+# run on 4 GPUs per node
+flux run \
+     -u \
+     --exclusive \
+     --verbose \
+     -N ${flux_job_nodes} \
+     -n $((4 * flux_job_nodes)) \
+     -x \
+     -c 24 \
+     -o cpu-affinity=off \
+     -o gpu-affinity=off \
+     -o mpibind=on,smt:1,verbose:0 \
+     "${dir_base}/lammps/_build/lmp" \
+         -sf kk -k on g 1 -pk kokkos neigh half newton on \
+         -in in.pace.product \
+         -var L ${lammps_len}
diff --git a/docs/32_lammpsACE/templatedir/lammps_ln.sh b/docs/32_lammpsACE/templatedir/lammps_ln.sh
new file mode 100755
index 0000000..3a85645
--- /dev/null
+++ b/docs/32_lammpsACE/templatedir/lammps_ln.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+set -v
+
+test ! -e lammps_env_elcapitan.sh        && ln -s ../lammps_env_elcapitan.sh
+test ! -e kokkos_tools_env_elcapitan.sh  && ln -s ../kokkos_tools_env_elcapitan.sh
+test ! -e lammps                         && ln -s ../lammps
+test ! -e Cu-PBE-core-rep.ace            && ln -s ../lammps/examples/PACKAGES/pace/Cu-PBE-core-rep.ace
+
+exit 0