diff --git a/README.md b/README.md
index 540ebdda..43b3ca21 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,11 @@ indice=
 username=
 password=
 
+[<product>.crucible]
+url=
+username=
+password=
+
 [ocp-server]
 port=8000
 
@@ -53,7 +58,7 @@ indice=
 username=
 password=
 ```
-**Note: The below applies only for the elastic search at the moment**  
+**Note: The below applies only for the elastic search at the moment**
 If you also have an archived internal instance that keeps track of older data, it can be specified with '.internal' suffix. Example of our `OCP` internal archived instance's configuration.
 ```toml
 [ocp.elasticsearch.internal]
diff --git a/backend/app/api/api.py b/backend/app/api/api.py
index b74b8ad4..c37735dc 100644
--- a/backend/app/api/api.py
+++ b/backend/app/api/api.py
@@ -1,3 +1,4 @@
+import sys
 from fastapi import APIRouter
 
 from app.api.v1.endpoints.ocp import results
@@ -11,6 +12,7 @@
 from app.api.v1.endpoints.telco import telcoJobs
 from app.api.v1.endpoints.telco import telcoGraphs
 from app.api.v1.endpoints.ocm import ocmJobs
+from app.api.v1.endpoints.ilab import ilab
 
 
 router = APIRouter()
@@ -39,3 +41,6 @@
 
 # OCM endpoint
 router.include_router(ocmJobs.router, tags=['ocm'])
+
+# InstructLab endpoint
+router.include_router(router=ilab.router, tags=['ilab'])
diff --git a/backend/app/api/v1/endpoints/ilab/ilab.py b/backend/app/api/v1/endpoints/ilab/ilab.py
new file mode 100644
index 00000000..a5b4b825
--- /dev/null
+++ b/backend/app/api/v1/endpoints/ilab/ilab.py
@@ -0,0 +1,764 @@
+"""Access RHEL AI InstructLab performance data through Crucible
+
+This defines an API to expose and filter performance data from InstructLab
+CPT runs via a persistent Crucuble controller instance as defined in the
+configuration path "ilab.crucible".
+"""
+
+from datetime import datetime, timedelta, timezone
+from typing import Annotated, Any, Optional
+
+from fastapi import APIRouter, Depends, Query
+
+from app.services.crucible_svc import CrucibleService, Graph, GraphList
+
+router = APIRouter()
+
+
+CONFIGPATH = "ilab.crucible"
+
+
+def example_response(response) -> dict[str, Any]:
+    return {"content": {"application/json": {"example": response}}}
+
+
+def example_error(message: str) -> dict[str, Any]:
+    return example_response({"message": message})
+
+
+async def crucible_svc():
+    crucible = None
+    try:
+        crucible = CrucibleService(CONFIGPATH)
+        yield crucible
+    finally:
+        if crucible:
+            await crucible.close()
+
+
+@router.get(
+    "/api/v1/ilab/runs/filters",
+    summary="Returns possible filters",
+    description=(
+        "Returns a nested JSON object with all parameter and tag filter terms"
+    ),
+    responses={
+        200: example_response(
+            {
+                "param": {
+                    "model": [
+                        "/home/models/granite-7b-redhat-lab",
+                        "/home/models/granite-7b-lab/",
+                        "/home/models/Mixtral-8x7B-Instruct-v0.1",
+                    ],
+                    "gpus": ["4"],
+                    "workflow": ["train", "sdg", "train+eval"],
+                    "data-path": [
+                        "/home/data/training/jun12-phase05.jsonl",
+                        "/home/data/training/knowledge_data.jsonl",
+                        "/home/data/training/jul19-knowledge-26k.jsonl",
+                        "/home/data/jun12-phase05.jsonl",
+                    ],
+                    "nnodes": ["1"],
+                    "train-until": ["checkpoint:1", "complete"],
+                    "save-samples": ["5000", "2500", "10000"],
+                    "deepspeed-cpu-offload-optimizer": ["0", "1"],
+                    "deepspeed-cpu-offload-optimizer-pin-memory": ["0", "1"],
+                    "batch-size": ["4", "8", "16", "12", "0"],
+                    "cpu-offload-optimizer": ["1"],
+                    "cpu-offload-pin-memory": ["1"],
+                    "nproc-per-node": ["4"],
+                    "num-runavg-samples": ["2", "6"],
+                    "num-cpus": ["30"],
+                },
+                "tag": {"topology": ["none"]},
+            }
+        )
+    },
+)
+async def run_filters(crucible: Annotated[CrucibleService, Depends(crucible_svc)]):
+    return await crucible.get_run_filters()
+
+
+@router.get(
+    "/api/v1/ilab/runs",
+    summary="Returns a list of InstructLab runs",
+    description="Returns a list of runs summary documents.",
+    responses={
+        200: example_response(
+            {
+                "results": [
+                    {
+                        "benchmark": "ilab",
+                        "email": "rhel-ai-user@example.com",
+                        "id": "bd72561c-cc20-400b-b6f6-d9534a60033a",
+                        "name": '"RHEL-AI User"',
+                        "source": "n42-h01-b01-mx750c.example.com//var/lib/crucible/run/ilab--2024-09-11_19:43:53_UTC--bd72561c-cc20-400b-b6f6-d9534a60033a",
+                        "status": "pass",
+                        "begin_date": "1970-01-01 00:00:00+00:00",
+                        "end_date": "1970-01-01 00:00:00+00:00",
+                        "params": {
+                            "gpus": "4",
+                            "model": "/home/models/Mixtral-8x7B-Instruct-v0.1",
+                            "workflow": "sdg",
+                        },
+                        "iterations": [
+                            {
+                                "iteration": 1,
+                                "primary_metric": "ilab::sdg-samples-sec",
+                                "primary_period": "measurement",
+                                "status": "pass",
+                                "params": {
+                                    "gpus": "4",
+                                    "model": "/home/models/Mixtral-8x7B-Instruct-v0.1",
+                                    "workflow": "sdg",
+                                },
+                            }
+                        ],
+                        "primary_metrics": ["ilab::sdg-samples-sec"],
+                        "tags": {"topology": "none"},
+                    }
+                ],
+                "count": 5,
+                "total": 21,
+                "startDate": "2024-08-19 20:42:52.239000+00:00",
+                "endDate": "2024-09-18 20:42:52.239000+00:00",
+            }
+        ),
+        400: example_error(
+            "sort key 'bad' must be one of begin,benchmark,email,end,id,name,source,status"
+        ),
+        422: example_error(
+            "invalid date format, start_date must be less than end_date"
+        ),
+    },
+)
+async def runs(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)],
+    start_date: Annotated[
+        Optional[str],
+        Query(description="Start time for search", examples=["2020-11-10"]),
+    ] = None,
+    end_date: Annotated[
+        Optional[str],
+        Query(description="End time for search", examples=["2020-11-10"]),
+    ] = None,
+    filter: Annotated[
+        Optional[list[str]],
+        Query(
+            description="Filter terms", examples=["tag:name=value", "param:name=value"]
+        ),
+    ] = None,
+    sort: Annotated[
+        Optional[list[str]],
+        Query(description="Sort terms", examples=["start:asc", "status:desc"]),
+    ] = None,
+    size: Annotated[
+        Optional[int], Query(description="Number of runs in a page", examples=[10])
+    ] = None,
+    offset: Annotated[
+        int,
+        Query(description="Page offset to start", examples=[10]),
+    ] = 0,
+):
+    if start_date is None and end_date is None:
+        now = datetime.now(timezone.utc)
+        start = now - timedelta(days=30)
+        end = now
+    else:
+        start = start_date
+        end = end_date
+    return await crucible.get_runs(
+        start=start, end=end, filter=filter, sort=sort, size=size, offset=offset
+    )
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/tags",
+    summary="Returns the Crucible tags for a run",
+    description="Returns tags for a specified Run ID.",
+    responses={
+        200: example_response({"topology": "none"}),
+        400: example_error("Parameter error"),
+    },
+)
+async def tags(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str):
+    return await crucible.get_tags(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/params",
+    summary="Returns the InstructLab parameters for a run",
+    description="Returns params for a specified Run ID by iteration plus common params.",
+    responses={
+        200: example_response(
+            {
+                "9D5AB7D6-510A-11EF-84ED-CCA69E6B5B5B": {
+                    "num-runavg-samples": "2",
+                    "cpu-offload-pin-memory": "1",
+                    "nnodes": "1",
+                    "cpu-offload-optimizer": "1",
+                    "data-path": "/home/data/training/knowledge_data.jsonl",
+                    "model": "/home/models/granite-7b-lab/",
+                    "nproc-per-node": "4",
+                },
+                "common": {
+                    "num-runavg-samples": "2",
+                    "cpu-offload-pin-memory": "1",
+                    "nnodes": "1",
+                    "cpu-offload-optimizer": "1",
+                    "data-path": "/home/data/training/knowledge_data.jsonl",
+                    "model": "/home/models/granite-7b-lab/",
+                    "nproc-per-node": "4",
+                },
+            }
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def params(crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str):
+    return await crucible.get_params(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/iterations",
+    summary="Returns a list of InstructLab run iterations",
+    description="Returns a list of iterations for a specified Run ID.",
+    responses={
+        200: example_response(
+            [
+                {
+                    "id": "6B98F650-7139-11EF-BB69-98B53E962BD1",
+                    "num": 2,
+                    "path": None,
+                    "primary-metric": "ilab::sdg-samples-sec",
+                    "primary-period": "measurement",
+                    "status": "pass",
+                },
+                {
+                    "id": "6B99173E-7139-11EF-9434-F8BB3B1B9CFC",
+                    "num": 5,
+                    "path": None,
+                    "primary-metric": "ilab::sdg-samples-sec",
+                    "primary-period": "measurement",
+                    "status": "pass",
+                },
+            ]
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def iterations(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str
+):
+    return await crucible.get_iterations(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/samples",
+    summary="Returns a list of InstructLab run samples",
+    description="Returns a list of samples for a specified Run ID.",
+    responses={
+        200: example_response(
+            [
+                {
+                    "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61",
+                    "num": "1",
+                    "path": None,
+                    "status": "pass",
+                    "iteration": 5,
+                    "primary_metric": "ilab::sdg-samples-sec",
+                },
+                {
+                    "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869",
+                    "num": "1",
+                    "path": None,
+                    "status": "pass",
+                    "iteration": 2,
+                    "primary_metric": "ilab::sdg-samples-sec",
+                },
+            ]
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def run_samples(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str
+):
+    return await crucible.get_samples(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/periods",
+    summary="Returns a list of InstructLab run periods",
+    description="Returns a list of periods for a specified Run ID.",
+    responses={
+        200: example_response(
+            [
+                {
+                    "begin": "2024-09-12 17:40:27.982000+00:00",
+                    "end": "2024-09-12 18:03:23.132000+00:00",
+                    "id": "6BA57EF2-7139-11EF-A80B-E5037504B9B1",
+                    "name": "measurement",
+                    "iteration": 1,
+                    "sample": "1",
+                    "primary_metric": "ilab::sdg-samples-sec",
+                    "status": "pass",
+                },
+                {
+                    "begin": "2024-09-12 18:05:03.229000+00:00",
+                    "end": "2024-09-12 18:27:55.419000+00:00",
+                    "id": "6BB93622-7139-11EF-A6C0-89A48E630F9D",
+                    "name": "measurement",
+                    "iteration": 4,
+                    "sample": "1",
+                    "primary_metric": "ilab::sdg-samples-sec",
+                    "status": "pass",
+                },
+            ]
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def run_periods(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str
+):
+    return await crucible.get_periods(run)
+
+
+@router.get(
+    "/api/v1/ilab/iterations/{iteration}/samples",
+    summary="Returns a list of InstructLab iteration samples",
+    description="Returns a list of iterations for a specified iteration ID.",
+    responses={
+        200: example_response(
+            [
+                {
+                    "id": "6BBE6872-7139-11EF-BFAA-8569A9399D61",
+                    "num": "1",
+                    "path": None,
+                    "status": "pass",
+                    "iteration": 5,
+                    "primary_metric": "ilab::sdg-samples-sec",
+                },
+                {
+                    "id": "6BACDFA8-7139-11EF-9F33-8185DD5B4869",
+                    "num": "1",
+                    "path": None,
+                    "status": "pass",
+                    "iteration": 2,
+                    "primary_metric": "ilab::sdg-samples-sec",
+                },
+            ]
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def iteration_samples(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], iteration: str
+):
+    return await crucible.get_samples(iteration=iteration)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/timeline",
+    summary="Returns the 'timeline' of a run",
+    description="Describes the sequence of iterations, samples, and periods.",
+    responses={
+        200: example_response(
+            {
+                "run": {
+                    "id": "70d3b53f-c588-49a3-91c2-7fcf3927be7e",
+                    "iterations": [
+                        {
+                            "id": "BFC16DA6-60C8-11EF-AB10-CF940109872B",
+                            "num": 1,
+                            "path": None,
+                            "primary-metric": "ilab::train-samples-sec",
+                            "primary-period": "measurement",
+                            "status": "pass",
+                            "samples": [
+                                {
+                                    "id": "C021BECC-60C8-11EF-A619-E0BC70D6C320",
+                                    "num": "1",
+                                    "path": None,
+                                    "status": "pass",
+                                    "periods": [
+                                        {
+                                            "begin": "2024-08-22 19:09:08.642000+00:00",
+                                            "end": "2024-08-22 20:04:32.889000+00:00",
+                                            "id": "C022CDC6-60C8-11EF-BA80-AFE7B4B2692B",
+                                            "name": "measurement",
+                                        }
+                                    ],
+                                }
+                            ],
+                        }
+                    ],
+                    "begin": "2024-08-22 19:09:08.642000+00:00",
+                    "end": "2024-08-22 20:04:32.889000+00:00",
+                }
+            }
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def timeline(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str
+):
+    return await crucible.get_timeline(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/metrics",
+    summary="Describe the metrics collected for a run",
+    description="Returns metric labels along with breakout names and values.",
+    responses={
+        200: example_response(
+            {
+                "sar-net::packets-sec": {
+                    "periods": [],
+                    "breakouts": {
+                        "benchmark-name": ["none"],
+                        "benchmark-role": ["none"],
+                        "csid": ["remotehosts-1-sysstat-1"],
+                        "cstype": ["profiler"],
+                        "dev": ["lo", "eno8303", "eno12399", "eno12409"],
+                        "direction": ["rx", "tx"],
+                        "endpoint-label": ["remotehosts-1"],
+                        "engine-id": ["remotehosts-1-sysstat-1"],
+                        "engine-role": ["profiler"],
+                        "engine-type": ["profiler"],
+                        "hosted-by": ["x.example.com"],
+                        "hostname": ["x.example.com"],
+                        "hypervisor-host": ["none"],
+                        "osruntime": ["podman"],
+                        "tool-name": ["sysstat"],
+                        "type": ["virtual", "physical"],
+                        "userenv": ["rhel-ai"],
+                    },
+                },
+            },
+        ),
+        400: example_error("Parameter error"),
+    },
+)
+async def metrics(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], run: str
+):
+    return await crucible.get_metrics_list(run)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/breakouts/{metric}",
+    summary="Returns breakout options for a metric",
+    description="Describes the breakout names and available values for a run.",
+    responses={
+        200: example_response(
+            {
+                "label": "mpstat::Busy-CPU",
+                "class": ["throughput"],
+                "type": "Busy-CPU",
+                "source": "mpstat",
+                "breakouts": {"num": ["8", "72"], "thread": [0, 1]},
+            }
+        ),
+        400: example_error("Metric name <name> not found for run <id>"),
+    },
+)
+async def metric_breakouts(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)],
+    run: str,
+    metric: str,
+    name: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of name[=key] to match",
+            examples=["cpu=10", "cpu=10,cpu=110"],
+        ),
+    ] = None,
+    period: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of periods to match",
+            examples=["<id>", "<id1>,<id2>"],
+        ),
+    ] = None,
+):
+    return await crucible.get_metric_breakouts(run, metric, names=name, periods=period)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/data/{metric}",
+    summary="Returns metric data collected for a run",
+    description="Returns data collected for a specified Run ID metric.",
+    responses={
+        200: example_response(
+            [
+                {
+                    "begin": "2024-08-22 20:04:05.072000+00:00",
+                    "end": "2024-08-22 20:04:19.126000+00:00",
+                    "duration": 14.055,
+                    "value": 9.389257233311497,
+                },
+                {
+                    "begin": "2024-08-22 20:04:19.127000+00:00",
+                    "end": "2024-08-22 20:04:32.889000+00:00",
+                    "duration": 13.763,
+                    "value": 9.552584444155011,
+                },
+            ]
+        ),
+        400: example_error("No matches for ilab::train-samples-sc+cpu=10"),
+        422: example_response(
+            response={
+                "detail": [
+                    {
+                        "message": "More than one metric (2) probably means you should add filters",
+                        "names": {"dev": ["sdb", "sdb3"]},
+                        "periods": [],
+                    }
+                ]
+            }
+        ),
+    },
+)
+async def metric_data(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)],
+    run: str,
+    metric: str,
+    name: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of name[=key] to match",
+            examples=["cpu=10", "cpu=10,cpu=110"],
+        ),
+    ] = None,
+    period: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of periods to match",
+            examples=["<id>", "<id1>,<id2>"],
+        ),
+    ] = None,
+    aggregate: Annotated[
+        bool, Query(description="Allow aggregation of metrics")
+    ] = False,
+):
+    return await crucible.get_metrics_data(
+        run, metric, names=name, periods=period, aggregate=aggregate
+    )
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/summary/{metric}",
+    summary="Returns metric data collected for a run",
+    description="Returns data collected for a specified Run ID metric.",
+    responses={
+        200: example_response(
+            {
+                "count": 234,
+                "min": 7.905045031896648,
+                "max": 9.666444615077308,
+                "avg": 9.38298722585416,
+                "sum": 2195.6190108498736,
+            }
+        ),
+        400: example_error("No matches for ilab::train-samples-sc+cpu=10"),
+        422: example_response(
+            response={
+                "detail": [
+                    {
+                        "message": "More than one metric (2) probably means you should add filters",
+                        "names": {"dev": ["sdb", "sdb3"]},
+                        "periods": [],
+                    }
+                ]
+            }
+        ),
+    },
+)
+async def metric_summary(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)],
+    run: str,
+    metric: str,
+    name: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of name[=key] to match",
+            examples=["cpu=10", "cpu=10,cpu=110"],
+        ),
+    ] = None,
+    period: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of periods to match",
+            examples=["<id>", "<id1>,<id2>"],
+        ),
+    ] = None,
+):
+    return await crucible.get_metrics_summary(run, metric, names=name, periods=period)
+
+
+@router.post(
+    "/api/v1/ilab/runs/multigraph",
+    summary="Returns overlaid Plotly graph objects",
+    description="Returns metric data in a form usable by the Plot React component.",
+    responses={
+        200: example_response(
+            response={
+                "data": [
+                    {
+                        "x": [
+                            "2024-09-05 21:50:07+00:00",
+                            "2024-09-05 21:56:37+00:00",
+                            "2024-09-05 21:56:37.001000+00:00",
+                            "2024-09-05 21:56:52+00:00",
+                            "2024-09-05 21:56:52.001000+00:00",
+                            "2024-09-05 22:01:52+00:00",
+                        ],
+                        "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0],
+                        "name": "iostat::operations-merged-sec [cmd=read,dev=sdb]",
+                        "type": "scatter",
+                        "mode": "line",
+                        "marker": {"color": "black"},
+                        "labels": {"x": "sample timestamp", "y": "samples / second"},
+                        "yaxis": "y",
+                    },
+                    {
+                        "x": [
+                            "2024-09-05 21:50:07+00:00",
+                            "2024-09-05 21:56:37+00:00",
+                            "2024-09-05 21:56:37.001000+00:00",
+                            "2024-09-05 21:56:52+00:00",
+                            "2024-09-05 21:56:52.001000+00:00",
+                            "2024-09-05 22:01:52+00:00",
+                        ],
+                        "y": [0.0, 0.0, 0.33, 0.33, 0.0, 0.0],
+                        "name": "iostat::operations-merged-sec [dev=sdb,cmd=read]",
+                        "type": "scatter",
+                        "mode": "line",
+                        "marker": {"color": "purple"},
+                        "labels": {"x": "sample timestamp", "y": "samples / second"},
+                        "yaxis": "y",
+                    },
+                ],
+                "layout": {
+                    "width": "1500",
+                    "yaxis": {
+                        "title": "iostat::operations-merged-sec",
+                        "color": "black",
+                    },
+                },
+            }
+        ),
+        400: example_error("No matches for ilab::train-samples-sec"),
+        422: example_response(
+            response={
+                "detail": [
+                    {
+                        "message": "More than one metric (2) probably means you should add filters",
+                        "names": {"dev": ["sdb", "sdb3"]},
+                        "periods": [],
+                    }
+                ]
+            }
+        ),
+    },
+)
+async def metric_graph_body(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)], graphs: GraphList
+):
+    return await crucible.get_metrics_graph(graphs)
+
+
+@router.get(
+    "/api/v1/ilab/runs/{run}/graph/{metric}",
+    summary="Returns a single Plotly graph object for a run",
+    description="Returns metric data in a form usable by the Plot React component.",
+    responses={
+        200: example_response(
+            response={
+                "data": [
+                    {
+                        "x": [
+                            "2024-09-12 16:49:01+00:00",
+                            "2024-09-12 18:04:31+00:00",
+                            "2024-09-12 18:04:31.001000+00:00",
+                            "2024-09-12 18:04:46+00:00",
+                            "2024-09-12 18:04:46.001000+00:00",
+                            "2024-09-12 18:53:16+00:00",
+                        ],
+                        "y": [0.0, 0.0, 1.4, 1.4, 0.0, 0.0],
+                        "name": "iostat::operations-merged-sec [cmd=read,dev=sda]",
+                        "type": "scatter",
+                        "mode": "line",
+                        "marker": {"color": "black"},
+                        "labels": {
+                            "x": "sample timestamp",
+                            "y": "samples / second",
+                        },
+                        "yaxis": "y",
+                    }
+                ],
+                "layout": {
+                    "width": "1500",
+                    "yaxis": {
+                        "title": "iostat::operations-merged-sec",
+                        "color": "black",
+                    },
+                },
+            }
+        ),
+        400: example_error("No matches for ilab::train-samples-sec"),
+        422: example_response(
+            response={
+                "detail": [
+                    {
+                        "message": "More than one metric (2) probably means you should add filters",
+                        "names": {"dev": ["sdb", "sdb3"]},
+                        "periods": [],
+                    }
+                ]
+            }
+        ),
+    },
+)
+async def metric_graph_param(
+    crucible: Annotated[CrucibleService, Depends(crucible_svc)],
+    run: str,
+    metric: str,
+    aggregate: Annotated[
+        bool, Query(description="Allow aggregation of metrics")
+    ] = False,
+    name: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of name[=key] to match",
+            examples=["cpu=10", "cpu=10,cpu=110"],
+        ),
+    ] = None,
+    period: Annotated[
+        Optional[list[str]],
+        Query(
+            description="List of periods to match",
+            examples=["<id>", "<id1>,<id2>"],
+        ),
+    ] = None,
+    title: Annotated[Optional[str], Query(description="Title for graph")] = None,
+):
+    return await crucible.get_metrics_graph(
+        GraphList(
+            run=run,
+            name=metric,
+            graphs=[
+                Graph(
+                    metric=metric,
+                    aggregate=aggregate,
+                    names=name,
+                    periods=period,
+                    title=title,
+                )
+            ],
+        )
+    )
diff --git a/backend/app/services/crucible_readme.md b/backend/app/services/crucible_readme.md
new file mode 100644
index 00000000..0ac2e71f
--- /dev/null
+++ b/backend/app/services/crucible_readme.md
@@ -0,0 +1,164 @@
+Crucible divides data across a set of OpenSearch (or ElasticSearch) indices,
+each with a specific document mapping. CDM index names include a "root" name
+(like "run") with a versioned prefix, like "cdmv7dev-run".
+
+Crucible timestamps are integers in "millisecond-from-the-epoch" format.
+
+The Crucible CDM hierarchy is roughly:
+
+- RUN (an instrumented benchmark run)
+  - TAG (metadata)
+  - ITERATION (a benchmark interval)
+    - PARAM (execution parameters)
+    - SAMPLE
+      - PERIOD (time range where data is recorded)
+        - METRIC_DESC (description of a specific recorded metric)
+          - METRIC_DATA (a specific recorded data point)
+
+OpenSearch doesn't support the concept of a SQL "join", but many of the indices
+contain documents that could be considered a static "join" with parent documents
+for convenience. For example, each `iteration` document contains a copy of it's
+parent `run` document, while the `period` document contains copies of its parent
+`sample`, `iteration`, and `run` documents. This means, for example, that it's
+possible to make a single query returning all `period` documents for specific
+iteration number of a specific run.
+
+<dl>
+<dt>RUN</dt><dd>this contains the basic information about a performance run, including a
+    generated UUID, begin and end timestamps, a benchmark name, a user name and
+    email, the (host/directory) "source" of the indexed data (which is usable on
+    the controler's local file system), plus host and test harness names.</dd>
+<dt>TAG</dt><dd>this contains information about a general purpose "tag" to associate some
+    arbitrary context with a run, for example software versions, hardware, or
+    other metadata. This can be considered a SQL JOIN with the run document,
+    adding a tag UUID, name, and value.</dd>
+<dt>ITERATION</dt><dd>this contains basic information about a performance run iteration,
+    including the iteration UUID, number, the primary (benchmark) metric associated
+    with the iteration, plus the primary "period" of the iteration, and the
+    iteration status.</dd>
+<dt>PARAM</dt><dd>this defines a key/value pair specifying behavior of the benchmark
+    script for an iteration. Parameters are iteration-specific, but parameters that
+    don't vary between iterations are often represented as run parameters.</dd>
+<dt>SAMPLE</dt><dd>this contains basic information about a sample of an iteration,
+    including a sample UUID and sample number, along with a "path" for sample data
+    and a sample status.</dd>
+<dt>PERIOD</dt><dd>this contains basic information about a period during which data is
+    collected within a sample, including the period UUID, name, and begin and end
+    timestamps. A set of periods can be "linked" through a "prev_id" field.</dd>
+<dt>METRIC_DESC</dt><dd>this contains descriptive data about a specific series
+    of metric values within a specific period of a run, including the metric UUID,
+    the metric "class", type, and source, along with a set of "names" (key/value
+    pairs) defining the metric breakout details that narrow down a specific source and
+    type. For example source:mpstat, type:Busy-CPU data is broken down by package, cpu,
+    core, and other breakouts which can be isolated or aggregated for data reporting.</dd>
+<dt>METRIC_DATA</dt><dd>this describes a specific data point, sampled over a specified
+    duration with a fixed begin and end timestamp, plus a floating point value.
+    Each is tied to a specific metric_desc UUID value. Depending on the varied
+    semantics of metric_desc breakouts, it's often valid to aggregate these
+    across a set of relatead metric_desc IDs, based on source and type, for
+    example to get aggregate CPU load across all modes, cores, or across all
+    modes within a core. This service allows arbitrary aggregation within a
+    given metric source and type, but by default will attempt to direct the
+    caller to specifying a set of breakouts that result in a single metric_desc
+    ID.</dd>
+</dl>
+
+The `crucible_svc` allows CPT project APIs to access a Crucible CDM backing
+store to find information about runs, tags, params, iterations, samples,
+periods, plus various ways to expose and aggregate metric data both for
+primary benchmarks and non-periodic tools.
+
+The `get_runs` API is the primary entry point, returning an object that
+supports filtering, sorting, and pagination of the Crucible run data decorated
+with useful iteration, tag, and parameter data.
+
+The metrics data APIs (data, breakouts, summary, and graph) now allow
+filtering by the metric "name" data. This allows "drilling down" through
+the non-periodic "tool data". For example, IO data is per-disk, CPU
+information is broken down by core and package. You can now aggregate
+all global data (e.g., total system CPU), or filter by breakout names to
+select by CPU, mode (usr, sys, irq), etc.
+
+For example, to return `Busy-CPU` ("type") graph data from the `mpstat`
+("source") tool for system mode on one core, you might query:
+
+```
+/api/v1/ilab/runs/<id>/graph/mpstat::Busy-CPU?name=core=12,package=1,num=77,type=sys
+```
+
+If you make a `graph`, `data`, or `summary` query that doesn't translate
+to a unique metric, and don't select aggregation, you'll get a diagnostic
+message identifying possible additional filters. For example, with
+`type=sys` removed, that same query will show the available values for
+the `type` breakout name:
+
+```
+{
+  "detail": [
+    {
+      "message": "More than one metric (5) probably means you should add filters",
+      "names": {
+        "type": [
+          "guest",
+          "irq",
+          "soft",
+          "sys",
+          "usr"
+        ]
+      },
+      "periods": []
+    }
+  ]
+}
+```
+
+This capability can be used to build an interactive exploratory UI to
+allow displaying breakout details. The `get_metrics` API will show all
+recorded metrics, along with information the names and values used in
+those. Metrics that show "names" with more than one value will need to be
+filtered to produce meaningful summaries or graphs.
+
+You can instead aggregate metrics across breakouts using the `?aggregate`
+query parameter, like `GET /api/v1/ilab/runs/<id>/graph/mpstat::Busy-CPU?aggregate`
+which will aggregate all CPU busy data for the system.
+
+Normally you'll want to display data based on sample periods, for example the
+primary period of an iteration, using `?period=<period-id>`. This will
+implicitly constrain the metric data based on the period ID associated with
+the `metric_desc` document *and* the begin/end time period of the selected
+periods. Normally, a benchmark will will separate iterations because each is
+run with a different parameter value, and the default graph labeling will
+look for a set of distinct parameters not used by other iterations: for
+example, `mpstat::Busy-CPU (batch-size=16)`.
+
+The `get_breakouts` API can be used to explore the namespace recorded for that
+metric in the specified run. For example,
+
+```
+GET /api/v1/ilab/runs/<id>/breakouts/sar-net::packets-sec?name=direction=rx
+{
+  "label": "sar-net::packets-sec",
+  "source": "sar-net",
+  "type": "packets-sec",
+  "class": [],
+  "names": {
+    "dev": [
+      "lo",
+      "eno12409",
+      "eno12399"
+    ],
+    "type": [
+      "physical",
+      "virtual"
+    ]
+  }
+}
+```
+
+The `get_filters` API reports all the tag and param filter tags and
+values for the runs. These can be used for the `filters` query parameter
+on `get_runs` to restrict the set of runs reported; for example,
+`/api/v1/ilab/runs?filter=param:workflow=sdg` shows only runs with the param
+arg `workflow` set to the value `sdg`. You can search for a subset of the
+string value using the operator "~" instead of "=". For example,
+`?filter=param:user~user` will match `user` values of "A user" or "The user".
diff --git a/backend/app/services/crucible_svc.py b/backend/app/services/crucible_svc.py
new file mode 100644
index 00000000..7ae9ac12
--- /dev/null
+++ b/backend/app/services/crucible_svc.py
@@ -0,0 +1,1988 @@
+"""Service to pull data from a Crucible CDM OpenSearch data store
+
+A set of helper methods to enable a project API to easily process data from a
+Crucible controller's OpenSearch data backend.
+
+This includes paginated, filtered, and sorted lists of benchmark runs, along
+access to the associated Crucible documents such as iterations, samples, and
+periods. Metric data can be accessed by breakout names, or aggregated by
+breakout subsets or collection periods as either raw data points, statistical
+aggregate, or Plotly graph format for UI display.
+"""
+
+import time
+from collections import defaultdict
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Iterator, Optional, Tuple, Union
+
+from elasticsearch import AsyncElasticsearch, NotFoundError
+from fastapi import HTTPException, status
+from pydantic import BaseModel
+
+from app import config
+
+
+class Graph(BaseModel):
+    """Describe a single graph
+
+    This represents a JSON object provided by a caller through the get_graph
+    API to describe a specific metric graph.
+
+    The default title (if the field is omitted) is the metric label with a
+    suffix denoting breakout values selected, any unique parameter values
+    in a selected iteration, and (if multiple runs are selected in any Graph
+    list) an indication of the run index. For example,
+    "mpstat::Busy-CPU [core=2,type=usr] (batch-size=16)".
+
+    Fields:
+        metric: the metric label, "ilab::train-samples-sec"
+        aggregate: True to aggregate unspecified breakouts
+        color: CSS color string ("green" or "#008000")
+        names: Lock in breakouts
+        periods: Select metrics for specific test period(s)
+        run: Override the default run ID from GraphList
+        title: Provide a title for the graph. The default is a generated title
+    """
+
+    metric: str
+    aggregate: bool = False
+    color: Optional[str] = None
+    names: Optional[list[str]] = None
+    periods: Optional[list[str]] = None
+    run: Optional[str] = None
+    title: Optional[str] = None
+
+
+class GraphList(BaseModel):
+    """Describe a set of overlaid graphs
+
+    This represents a JSON object provided by a caller through the get_graph
+    API to introduce a set of constrained metrics to be graphed. The "run
+    ID" here provides a default for the embedded Graph objects, and can be
+    omitted if all Graph objects specify a run ID. (This is most useful to
+    select a set of graphs all for a single run ID.)
+
+    Fields:
+        run: Specify the (default) run ID
+        name: Specify a name for the set of graphs
+        graphs: a list of Graph objects
+    """
+
+    run: Optional[str] = None
+    name: str
+    graphs: list[Graph]
+
+
+@dataclass
+class Point:
+    """Graph point
+
+    Record the start & end timestamp and value of a metric data point
+    """
+
+    begin: int
+    end: int
+    value: float
+
+
+colors = [
+    "black",
+    "aqua",
+    "blue",
+    "fuschia",
+    "gray",
+    "green",
+    "maroon",
+    "navy",
+    "olive",
+    "teal",
+    "silver",
+    "lightskyblue",
+    "mediumspringgreen",
+    "mistyrose",
+    "darkgoldenrod",
+    "cadetblue",
+    "chocolate",
+    "coral",
+    "brown",
+    "bisque",
+    "deeppink",
+    "sienna",
+]
+
+
+@dataclass
+class Term:
+    namespace: str
+    key: str
+    value: str
+
+
+class Parser:
+    """Help parsing filter expressions."""
+
+    def __init__(self, term: str):
+        """Construct an instance to help parse query parameter expressions
+
+        These consist of a sequence of tokens separated by delimiters. Each
+        token may be quoted to allow matching against strings with spaces.
+
+        For example, `param:name="A string"`
+
+        Args:
+            term: A filter expression to parse
+        """
+        self.buffer = term
+        self.context = term
+        self.offset = 0
+
+    def _next_token(
+        self, delimiters: list[str] = [], optional: bool = False
+    ) -> Tuple[str, Union[str, None]]:
+        """Extract the next token from an expression
+
+        Tokens may be quoted; the quotes are removed. for example, the two
+        expressions `'param':"workflow"='"sdg"'` and `param:workflow:sdg` are
+        identical.
+
+        Args:
+            delimiters: a list of delimiter characters
+            optional: whether the terminating delimiter is optional
+
+        Returns:
+            A tuple consisting of the token and the delimiter (or None if
+            parsing reached the end of the expression and the delimiter was
+            optional)
+        """
+
+        @dataclass
+        class Quote:
+            open: int
+            quote: str
+
+        quoted: list[Quote] = []
+        next_char = None
+        token = ""
+        first_quote = None
+        for o in range(len(self.buffer)):
+            next_char = self.buffer[o]
+            if next_char in delimiters and not quoted:
+                self.buffer = self.buffer[o + 1 :]
+                self.offset += o + 1
+                break
+            elif next_char in ('"', "'"):
+                if o == 0:
+                    first_quote = next_char
+                if quoted and quoted[-1].quote == next_char:
+                    quoted.pop()
+                else:
+                    quoted.append(Quote(o, next_char))
+            token += next_char
+        else:
+            next_char = None
+            if quoted:
+                q = quoted[-1]
+                c = self.context
+                i = q.open + self.offset
+                annotated = c[:i] + "[" + c[i] + "]" + c[i + 1 :]
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, f"Unterminated quote at {annotated!r}"
+                )
+
+            # If delimiters are specified, and not optional, then we didn't
+            # find one, and that's an error.
+            if not optional and delimiters:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST,
+                    f"Missing delimiter from {','.join(delimiters)} after {token!r}",
+                )
+            self.buffer = ""
+            self.offset = len(self.context)
+        return (token, next_char) if not first_quote else (token[1:-1], next_char)
+
+
+class CommonParams:
+    """Help with sorting out parameters
+
+    Parameter values are associated with iterations, but often a set of
+    parameters is common across all iterations of a run, and that set can
+    provide useful context.
+
+    This helps to filter out identical parameters across a set of
+    iterations.
+    """
+
+    def __init__(self):
+        self.common: dict[str, Any] = {}
+        self.omit = set()
+
+    def add(self, params: dict[str, Any]):
+        """Add a new iteration into the param set
+
+        Mark all parameter keys which don't appear in all iterations, or which
+        have different values in at least one iteration, to be omitted from the
+        merged "common" param set.
+
+        Args:
+            params: the param dictionary of an iteration
+        """
+        if not self.common:
+            self.common.update(params)
+        else:
+            for k, v in self.common.items():
+                if k not in self.omit and (k not in params or v != params[k]):
+                    self.omit.add(k)
+
+    def render(self) -> dict[str, Any]:
+        """Return a new param set with only common params"""
+        return {k: v for k, v in self.common.items() if k not in self.omit}
+
+
+class CrucibleService:
+    """Support convenient generalized access to Crucible data
+
+    This implements access to the "v7" Crucible "Common Data Model" through
+    OpenSearch queries.
+    """
+
+    # OpenSearch massive limit on hits in a single query
+    BIGQUERY = 262144
+
+    # Define the 'run' document fields that support general filtering via
+    # `?filter=<name>:<value>`
+    #
+    # TODO: this excludes 'desc', which isn't used by the ilab runs, and needs
+    # different treatment as it's a text field rather than a term. It's not an
+    # immediate priority for ilab, but may be important for general use.
+    RUN_FILTERS = ("benchmark", "email", "name", "source", "harness", "host")
+
+    # Define the keywords for sorting.
+    DIRECTIONS = ("asc", "desc")
+    FIELDS = (
+        "begin",
+        "benchmark",
+        "desc",
+        "email",
+        "end",
+        "harness",
+        "host",
+        "id",
+        "name",
+        "source",
+    )
+
+    def __init__(self, configpath: str = "crucible"):
+        """Initialize a Crucible CDM (OpenSearch) connection.
+
+        Generally the `configpath` should be scoped, like `ilab.crucible` so
+        that multiple APIs based on access to distinct Crucible controllers can
+        coexist.
+
+        Initialization includes making an "info" call to confirm and record the
+        server response.
+
+        Args:
+            configpath: The Vyper config path (e.g., "ilab.crucible")
+        """
+        self.cfg = config.get_config()
+        self.user = self.cfg.get(configpath + ".username")
+        self.password = self.cfg.get(configpath + ".password")
+        self.auth = (self.user, self.password) if self.user or self.password else None
+        self.url = self.cfg.get(configpath + ".url")
+        self.elastic = AsyncElasticsearch(self.url, basic_auth=self.auth)
+
+    @staticmethod
+    def _get_index(root: str) -> str:
+        return "cdmv7dev-" + root
+
+    @staticmethod
+    def _split_list(alist: Optional[list[str]] = None) -> list[str]:
+        """Split a list of parameters
+
+        For simplicity, the APIs supporting "list" query parameters allow
+        each element in the list to be a comma-separated list of strings.
+        For example, ["a", "b", "c"] is logically the same as ["a,b,c"].
+
+        This method normalizes the second form into first to simplify life for
+        consumers.
+
+        Args:
+            alist: list of names or name lists
+
+        Returns:
+            A simple list of options
+        """
+        l: list[str] = []
+        if alist:
+            for n in alist:
+                l.extend(n.split(","))
+        return l
+
+    @staticmethod
+    def _normalize_date(value: Optional[Union[int, str, datetime]]) -> int:
+        """Normalize date parameters
+
+        The Crucible data model stores dates as string representations of an
+        integer "millseconds-from-epoch" value. To allow flexibility, this
+        Crucible service allows incoming dates to be specified as ISO-format
+        strings, as integers, or as the stringified integer.
+
+        That is, "2024-09-12 18:29:35.123000+00:00", "1726165775123", and
+        1726165775123 are identical.
+
+        Args:
+            value: Representation of a date-time value
+
+        Returns:
+            The integer milliseconds-from-epoch equivalent
+        """
+        try:
+            if isinstance(value, int):
+                return value
+            elif isinstance(value, datetime):
+                return int(value.timestamp() * 1000.0)
+            elif isinstance(value, str):
+                try:
+                    return int(value)
+                except ValueError:
+                    pass
+                try:
+                    d = datetime.fromisoformat(value)
+                    return int(d.timestamp() * 1000.0)
+                except ValueError:
+                    pass
+        except Exception as e:
+            print(f"normalizing {type(value).__name__} {value} failed with {str(e)}")
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                f"Date representation {value} is not a date string or timestamp",
+            )
+
+    @staticmethod
+    def _hits(
+        payload: dict[str, Any], fields: Optional[list[str]] = None
+    ) -> Iterator[dict[str, Any]]:
+        """Helper to iterate through OpenSearch query matches
+
+        Iteratively yields the "_source" of each hit. As a convenience, can
+        yield a sub-object of "_source" ... for example, specifying the
+        optional "fields" as ["metric_desc", "id"] will yield the equivalent of
+        hit["_source"]["metric_desc"]["id"]
+
+        Args:
+            payload: OpenSearch reponse payload
+            fields: Optional sub-fields of "_source"
+
+        Returns:
+            Yields each object from the "greatest hits" list
+        """
+        if "hits" not in payload:
+            raise HTTPException(
+                status_code=500, detail=f"Attempt to iterate hits for {payload}"
+            )
+        hits = payload.get("hits", {}).get("hits", [])
+        for h in hits:
+            source = h["_source"]
+            if fields:
+                for f in fields:
+                    source = source[f]
+            yield source
+
+    @staticmethod
+    def _aggs(payload: dict[str, Any], aggregation: str) -> Iterator[dict[str, Any]]:
+        """Helper to access OpenSearch aggregations
+
+        Iteratively yields the name and value of each aggregation returned
+        by an OpenSearch query. This can also be used for nested aggregations
+        by specifying an aggregation object.
+
+        Args:
+            payload: A JSON dict containing an "aggregations" field
+
+        Returns:
+            Yields each aggregation from an aggregation bucket list
+        """
+        if "aggregations" not in payload:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Attempt to iterate missing aggregations for {payload}",
+            )
+        aggs = payload["aggregations"]
+        if aggregation not in aggs:
+            raise HTTPException(
+                status_code=500,
+                detail=f"Attempt to iterate missing aggregation {aggregation} for {payload}",
+            )
+        for agg in aggs[aggregation]["buckets"]:
+            yield agg
+
+    @staticmethod
+    def _format_timestamp(timestamp: Union[str, int]) -> str:
+        """Convert stringified integer milliseconds-from-epoch to ISO date"""
+        try:
+            ts = int(timestamp)
+        except Exception as e:
+            print(f"ERROR: invalid {timestamp!r}: {str(e)!r}")
+            ts = 0
+        return str(datetime.fromtimestamp(ts / 1000.00, timezone.utc))
+
+    @classmethod
+    def _format_data(cls, data: dict[str, Any]) -> dict[str, Any]:
+        """Helper to format a "metric_data" object
+
+        Crucible stores the date, duration, and value as strings, so this
+        converts them to more useful values. The end timestamp is converted
+        to an ISO date-time string; the duration and value to floating point
+        numbers.
+
+        Args:
+            data: a "metric_data" object
+
+        Returns:
+            A neatly formatted "metric_data" object
+        """
+        return {
+            "begin": cls._format_timestamp(data["begin"]),
+            "end": cls._format_timestamp(data["end"]),
+            "duration": int(data["duration"]) / 1000,
+            "value": float(data["value"]),
+        }
+
+    @classmethod
+    def _format_period(cls, period: dict[str, Any]) -> dict[str, Any]:
+        """Helper to format a "period" object
+
+        Crucible stores the date values as stringified integers, so this
+        converts the begin and end timestamps to ISO date-time strings.
+
+        Args:
+            period: a "period" object
+
+        Returns:
+            A neatly formatted "period" object
+        """
+        return {
+            "begin": cls._format_timestamp(timestamp=period["begin"]),
+            "end": cls._format_timestamp(period["end"]),
+            "id": period["id"],
+            "name": period["name"],
+        }
+
+    @classmethod
+    def _build_filter_options(cls, filter: Optional[list[str]] = None) -> Tuple[
+        Optional[list[dict[str, Any]]],
+        Optional[list[dict[str, Any]]],
+        Optional[list[dict[str, Any]]],
+    ]:
+        """Build filter terms for tag and parameter filter terms
+
+        Each term has the form "<namespace>:<key><operator><value>". Any term
+        may be quoted: quotes are stripped and ignored. (This is generally only
+        useful on the <value> to include spaces.)
+
+        We support three namespaces:
+            param: Match against param index arg/val
+            tag: Match against tag index name/val
+            run: Match against run index fields
+
+        We support two operators:
+            =: Exact match
+            ~: Partial match
+
+        Args:
+            filter: list of filter terms like "param:key=value"
+
+        Returns:
+            A set of OpenSearch filter object lists to detect missing
+            and matching documents for params, tags, and run fields. For
+            example, to select param:batch-size=12 results in the
+            following param filter list:
+
+                [
+                    {'
+                        dis_max': {
+                            'queries': [
+                                {
+                                    'bool': {
+                                        'must': [
+                                            {'term': {'param.arg': 'batch-size'}},
+                                            {'term': {'param.val': '12'}}
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                ]
+        """
+        terms = defaultdict(list)
+        for term in cls._split_list(filter):
+            p = Parser(term)
+            namespace, _ = p._next_token([":"])
+            key, operation = p._next_token(["=", "~"])
+            value, _ = p._next_token()
+            if operation == "~":
+                value = f".*{value}.*"
+                matcher = "regexp"
+            else:
+                matcher = "term"
+            if namespace in ("param", "tag"):
+                if namespace == "param":
+                    key_field = "param.arg"
+                    value_field = "param.val"
+                else:
+                    key_field = "tag.name"
+                    value_field = "tag.val"
+                terms[namespace].append(
+                    {
+                        "bool": {
+                            "must": [
+                                {"term": {key_field: key}},
+                                {matcher: {value_field: value}},
+                            ]
+                        }
+                    }
+                )
+            elif namespace == "run":
+                terms[namespace].append({matcher: {f"run.{key}": value}})
+            else:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST,
+                    f"unknown filter namespace {namespace!r}",
+                )
+        param_filter = None
+        tag_filter = None
+        if "param" in terms:
+            param_filter = [{"dis_max": {"queries": terms["param"]}}]
+        if "tag" in terms:
+            tag_filter = [{"dis_max": {"queries": terms["tag"]}}]
+        return param_filter, tag_filter, terms.get("run")
+
+    @classmethod
+    def _build_name_filters(
+        cls, namelist: Optional[list[str]] = None
+    ) -> list[dict[str, Any]]:
+        """Build filter terms for metric breakout names
+
+        for example, "cpu=10" filters for metric data descriptors where the
+        breakout name "cpu" exists and has a value of 10.
+
+        Args:
+            namelist: list of possibly comma-separated list values
+
+        Returns:
+            A list of filters to match breakout terms
+        """
+        names: list[str] = cls._split_list(namelist)
+        filters = []
+        for e in names:
+            try:
+                n, v = e.split("=", maxsplit=1)
+            except ValueError:
+                raise HTTPException(
+                    status.HTTP_400_BAD_REQUEST, f"Filter item {e} must be '<k>=<v>'"
+                )
+            filters.append({"term": {f"metric_desc.names.{n}": v}})
+        return filters
+
+    @classmethod
+    def _build_period_filters(
+        cls, periodlist: Optional[list[str]] = None
+    ) -> list[dict[str, Any]]:
+        """Build period filters
+
+        Generate metric_desc filter terms to match against a list of period IDs.
+
+        Note that not all metric descriptions are periodic, and we don't want
+        these filters to exclude them -- so the filter will exclude only
+        documents that have a period and don't match. (That is, we won't drop
+        any non-periodic metrics. We expect those to be filtered by timestamp
+        instead.)
+
+        Args:
+            period: list of possibly comma-separated period IDs
+
+        Returns:
+            A filter term that requires a period.id match only for metric_desc
+            documents with a period.
+        """
+        pl: list[str] = cls._split_list(periodlist)
+        if pl:
+            return [
+                {
+                    "dis_max": {
+                        "queries": [
+                            {"bool": {"must_not": {"exists": {"field": "period"}}}},
+                            {"terms": {"period.id": pl}},
+                        ]
+                    }
+                }
+            ]
+        else:
+            return []
+
+    @classmethod
+    def _build_metric_filters(
+        cls,
+        run: str,
+        metric: str,
+        names: Optional[list[str]] = None,
+        periods: Optional[list[str]] = None,
+    ) -> list[dict[str, Any]]:
+        """Helper for filtering metric descriptions
+
+        We normally filter by run, metric "label", and optionally by breakout
+        names and periods. This encapsulates the filter construction.
+
+        Args:
+            run: run ID
+            metric: metric label (ilab::sdg-samples-sec)
+            names: list of "name=value" filters
+            periods: list of period IDs
+
+        Returns:
+            A list of OpenSearch filter expressions
+        """
+        msource, mtype = metric.split("::")
+        return (
+            [
+                {"term": {"run.id": run}},
+                {"term": {"metric_desc.source": msource}},
+                {"term": {"metric_desc.type": mtype}},
+            ]
+            + cls._build_name_filters(names)
+            + cls._build_period_filters(periods)
+        )
+
+    @classmethod
+    def _build_sort_terms(cls, sorters: Optional[list[str]]) -> list[dict[str, str]]:
+        """Build sort term list
+
+        Sorters may reference any native `run` index field and must specify
+        either "asc"(ending) or "desc"(ending) sort order. Any number of
+        sorters may be combined, like ["name:asc,benchmark:desc", "end:desc"]
+
+        Args:
+            sorters: list of <key>:<direction> sort terms
+
+        Returns:
+            list of OpenSearch sort terms
+        """
+        if sorters:
+            sort_terms = []
+            for s in sorters:
+                key, dir = s.split(":", maxsplit=1)
+                if dir not in cls.DIRECTIONS:
+                    raise HTTPException(
+                        status.HTTP_400_BAD_REQUEST,
+                        f"Sort direction {dir!r} must be one of {','.join(DIRECTIONS)}",
+                    )
+                if key not in cls.FIELDS:
+                    raise HTTPException(
+                        status.HTTP_400_BAD_REQUEST,
+                        f"Sort key {key!r} must be one of {','.join(FIELDS)}",
+                    )
+                sort_terms.append({f"run.{key}": dir})
+        else:
+            sort_terms = [{"run.begin": "asc"}]
+        return sort_terms
+
+    async def _search(
+        self, index: str, query: Optional[dict[str, Any]] = None, **kwargs
+    ) -> dict[str, Any]:
+        """Issue an OpenSearch query
+
+        Args:
+            index: The "base" CDM index name, e.g., "run", "metric_desc"
+            query: An OpenSearch query object
+            kwargs: Additional OpenSearch parameters
+
+        Returns:
+            The OpenSearch response payload (JSON dict)
+        """
+        idx = self._get_index(index)
+        start = time.time()
+        value = await self.elastic.search(index=idx, body=query, **kwargs)
+        print(
+            f"QUERY on {idx} took {time.time() - start} seconds, "
+            f"hits: {value.get('hits', {}).get('total')}"
+        )
+        return value
+
+    async def close(self):
+        """Close the OpenSearch connection"""
+        if self.elastic:
+            await self.elastic.close()
+        self.elastic = None
+
+    async def search(
+        self,
+        index: str,
+        filters: Optional[list[dict[str, Any]]] = None,
+        aggregations: Optional[dict[str, Any]] = None,
+        sort: Optional[list[dict[str, str]]] = None,
+        source: Optional[str] = None,
+        size: Optional[int] = None,
+        offset: Optional[int] = None,
+        **kwargs,
+    ) -> dict[str, Any]:
+        """OpenSearch query helper
+
+        Combine index, filters, aggregations, sort, and pagination options
+        into an OpenSearch query.
+
+        Args:
+            index: "root" CDM index name ("run", "metric_desc", ...)
+            filters: list of JSON dict filter terms {"term": {"name": "value}}
+            aggregations: list of JSON dict aggregations {"name": {"term": "name"}}
+            sort: list of JSON dict sort terms ("name": "asc")
+            size: The number of hits to return; defaults to "very large"
+            offset: The number of hits to skip, for pagination
+            kwargs: Additional OpenSearch options
+
+        Returns:
+            The OpenSearch response
+        """
+        f = filters if filters else []
+        query = {
+            "size": self.BIGQUERY if size is None else size,
+            "query": {"bool": {"filter": f}},
+        }
+        if sort:
+            query.update({"sort": sort})
+        if source:
+            query.update({"_source": source})
+        if offset:
+            query.update({"from": offset})
+        if aggregations:
+            query.update({"aggs": aggregations})
+        return await self._search(index, query, **kwargs)
+
+    async def _get_metric_ids(
+        self,
+        run: str,
+        metric: str,
+        namelist: Optional[list[str]] = None,
+        periodlist: Optional[list[str]] = None,
+        aggregate: bool = False,
+    ) -> list[str]:
+        """Generate a list of matching metric_desc IDs
+
+        Given a specific run and metric name, and a set of breakout filters,
+        returns a list of metric desc IDs that match.
+
+        If a single ID is required to produce a consistent metric, and the
+        supplied filters produce more than one without aggregation, raise a
+        422 HTTP error (UNPROCESSABLE CONTENT) with a response body showing
+        the unsatisfied breakouts (name and available values).
+
+        Args:
+            run: run ID
+            metric: combined metric name (e.g., sar-net::packets-sec)
+            namelist: a list of breakout filters like "type=physical"
+            periodlist: a list of period IDs
+            aggregate: if True, allow multiple metric IDs
+
+        Returns:
+            A list of matching metric_desc ID value(s)
+        """
+        filters = self._build_metric_filters(run, metric, namelist, periodlist)
+        metrics = await self.search(
+            "metric_desc",
+            filters=filters,
+            ignore_unavailable=True,
+        )
+        if len(metrics["hits"]["hits"]) < 1:
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                (
+                    f"No matches for {metric}"
+                    f"{('+' + ','.join(namelist) if namelist else '')}"
+                ),
+            )
+        ids = [h["metric_desc"]["id"] for h in self._hits(metrics)]
+        if len(ids) < 2 or aggregate:
+            return ids
+
+        # If we get here, the client asked for breakout data that doesn't
+        # resolve to a single metric stream, and didn't specify aggregation.
+        # Offer some help.
+        names = defaultdict(set)
+        periods = set()
+        response = {
+            "message": f"More than one metric ({len(ids)}) means "
+            "you should add breakout filters or aggregate."
+        }
+        for m in self._hits(metrics):
+            if "period" in m:
+                periods.add(m["period"]["id"])
+            for n, v in m["metric_desc"]["names"].items():
+                names[n].add(v)
+
+        # We want to help filter a consistent summary, so only show those
+        # breakout names with more than one value.
+        response["names"] = {n: sorted(v) for n, v in names.items() if v and len(v) > 1}
+        response["periods"] = list(periods)
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=response
+        )
+
+    async def _build_timestamp_range_filters(
+        self, periods: Optional[list[str]] = None
+    ) -> list[dict[str, Any]]:
+        """Create a timestamp range filter
+
+        This extracts the begin and end timestamps from the list of periods and
+        builds a timestamp filter range to select documents on or after the
+        earliest begin timestamp and on or before the latest end timestamp.
+
+        Args:
+            periods: a list of CDM period IDs
+
+        Returns:
+            Constructs a range filter for the earliest begin timestamp and the
+            latest end timestamp among the specified periods.
+        """
+        if periods:
+            ps = self._split_list(periods)
+            matches = await self.search(
+                "period", filters=[{"terms": {"period.id": ps}}]
+            )
+            start = None
+            end = None
+            for h in self._hits(matches):
+                p = h["period"]
+                st = p["begin"]
+                et = p["end"]
+
+                # If any period is missing a timestamp, use the run's timestamp
+                # instead to avoid blowing up on a CDM error.
+                if st is None:
+                    st = h["run"]["begin"]
+                if et is None:
+                    et = h["run"]["end"]
+                if st and (not start or st < start):
+                    start = st
+                if et and (not end or et > end):
+                    end = et
+            if start is None or end is None:
+                name = (
+                    f"{h['run']['benchmark']}:{h['run']['begin']}-"
+                    f"{h['iteration']['num']}-{h['sample']['num']}-"
+                    f"{p['name']}"
+                )
+                raise HTTPException(
+                    status.HTTP_422_UNPROCESSABLE_ENTITY,
+                    f"Unable to compute {name!r} time range {start!r} -> {end!r}",
+                )
+            return [
+                {"range": {"metric_data.begin": {"gte": str(start)}}},
+                {"range": {"metric_data.end": {"lte": str(end)}}},
+            ]
+        else:
+            return []
+
+    async def _get_run_ids(
+        self, index: str, filters: Optional[list[dict[str, Any]]] = None
+    ) -> set[str]:
+        """Return a set of run IDs matching a filter
+
+        Documents in the specified index must have "run.id" fields. Returns
+        a set of unique run IDs matched by the filter in the specified index.
+
+        Args:
+            index: root CDM index name
+            filters: a list of OpenSearch filter terms
+
+        Returns:
+            a set of unique run ID values
+        """
+        filtered = await self.search(
+            index, source="run.id", filters=filters, ignore_unavailable=True
+        )
+        print(f"HITS: {filtered['hits']['hits']}")
+        return set([x for x in self._hits(filtered, ["run", "id"])])
+
+    async def get_run_filters(self) -> dict[str, dict[str, list[str]]]:
+        """Return possible tag and filter terms
+
+        Return a description of tag and param filter terms meaningful
+        across all datasets. TODO: we should support date-range and benchmark
+        filtering. Consider supporting all `run` API filtering, which would
+        allow adjusting the filter popups to drop options no longer relevant
+        to a given set.
+
+            {
+                "param": {
+                    {"gpus": [4", "8"]}
+                }
+            }
+
+        Returns:
+            A two-level JSON dict; the first level is the namespace (param or
+            tag), the second level key is the param/tag/field name and its value
+            is the set of values defined for that key.
+        """
+        tags = await self.search(
+            "tag",
+            size=0,
+            aggregations={
+                "key": {
+                    "terms": {"field": "tag.name", "size": self.BIGQUERY},
+                    "aggs": {
+                        "values": {"terms": {"field": "tag.val", "size": self.BIGQUERY}}
+                    },
+                }
+            },
+            ignore_unavailable=True,
+        )
+        params = await self.search(
+            "param",
+            size=0,
+            aggregations={
+                "key": {
+                    "terms": {"field": "param.arg", "size": self.BIGQUERY},
+                    "aggs": {
+                        "values": {
+                            "terms": {"field": "param.val", "size": self.BIGQUERY}
+                        }
+                    },
+                }
+            },
+            ignore_unavailable=True,
+        )
+        aggs = {
+            k: {"terms": {"field": f"run.{k}", "size": self.BIGQUERY}}
+            for k in self.RUN_FILTERS
+        }
+        runs = await self.search(
+            "run",
+            size=0,
+            aggregations=aggs,
+        )
+        result = defaultdict(lambda: defaultdict(lambda: set()))
+        for p in self._aggs(params, "key"):
+            for v in p["values"]["buckets"]:
+                result["param"][p["key"]].add(v["key"])
+        for t in self._aggs(tags, "key"):
+            for v in t["values"]["buckets"]:
+                result["tag"][t["key"]].add(v["key"])
+        for name in self.RUN_FILTERS:
+            for f in self._aggs(runs, name):
+                result["run"][name].add(f["key"])
+        return {s: {k: list(v) for k, v in keys.items()} for s, keys in result.items()}
+
+    async def get_runs(
+        self,
+        filter: Optional[list[str]] = None,
+        start: Optional[Union[int, str, datetime]] = None,
+        end: Optional[Union[int, str, datetime]] = None,
+        offset: int = 0,
+        sort: Optional[list[str]] = None,
+        size: Optional[int] = None,
+        **kwargs,
+    ) -> dict[str, Any]:
+        """Return matching Crucible runs
+
+        Filtered and sorted list of runs.
+
+        {
+            "sort": [],
+            "startDate": "2024-01-01T05:00:00+00:00",
+            "size": 1,
+            "offset": 0,
+            "results": [
+                {
+                    "begin": "1722878906342",
+                    "benchmark": "ilab",
+                    "email": "A@email",
+                    "end": "1722880503544",
+                    "id": "4e1d2c3c-b01c-4007-a92d-23a561af2c11",
+                    "name": "\"A User\"",
+                    "source": "node.example.com//var/lib/crucible/run/ilab--2024-08-05_17:17:13_UTC--4e1d2c3c-b01c-4007-a92d-23a561af2c11",
+                    "tags": {
+                        "topology": "none"
+                    },
+                    "iterations": [
+                        {
+                            "iteration": 1,
+                            "primary_metric": "ilab::train-samples-sec",
+                            "primary_period": "measurement",
+                            "status": "pass",
+                            "params": {
+                                "cpu-offload-pin-memory": "1",
+                                "model": "/home/models/granite-7b-lab/",
+                                "data-path": "/home/data/training/knowledge_data.jsonl",
+                                "cpu-offload-optimizer": "1",
+                                "nnodes": "1",
+                                "nproc-per-node": "4",
+                                "num-runavg-samples": "2"
+                            }
+                        }
+                    ],
+                    "primary_metrics": [
+                        "ilab::train-samples-sec"
+                    ],
+                    "status": "pass",
+                    "params": {
+                        "cpu-offload-pin-memory": "1",
+                        "model": "/home/models/granite-7b-lab/",
+                        "data-path": "/home/data/training/knowledge_data.jsonl",
+                        "cpu-offload-optimizer": "1",
+                        "nnodes": "1",
+                        "nproc-per-node": "4",
+                        "num-runavg-samples": "2"
+                    },
+                    "begin_date": "2024-08-05 17:28:26.342000+00:00",
+                    "end_date": "2024-08-05 17:55:03.544000+00:00"
+                }
+            ],
+            "count": 1,
+            "total": 15,
+            "next_offset": 1
+        }
+
+        Args:
+            start: Include runs starting at timestamp
+            end: Include runs ending no later than timestamp
+            filter: List of tag/param filter terms (parm:key=value)
+            sort: List of sort terms (column:<dir>)
+            size: Include up to <size> runs in output
+            offset: Use size/from pagination instead of search_after
+
+        Returns:
+            JSON object with "results" list and "housekeeping" fields
+        """
+
+        # We need to remove runs which don't match against 'tag' or 'param'
+        # filter terms. The CDM schema doesn't make it possible to do this in
+        # one shot. Instead, we run queries against the param and tag indices
+        # separately, producing a list of run IDs which we'll exclude from the
+        # final collection.
+        #
+        # If there are no matches, we can exit early. (TODO: should this be an
+        # error, or just a success with an empty list?)
+        results = {}
+        filters = []
+        sorters = self._split_list(sort)
+        results["sort"] = sorters
+        sort_terms = self._build_sort_terms(sorters)
+        param_filters, tag_filters, run_filters = self._build_filter_options(filter)
+        if run_filters:
+            filters.extend(run_filters)
+        if start or end:
+            s = None
+            e = None
+            if start:
+                s = self._normalize_date(start)
+                results["startDate"] = datetime.fromtimestamp(
+                    s / 1000.0, tz=timezone.utc
+                )
+            if end:
+                e = self._normalize_date(end)
+                results["endDate"] = datetime.fromtimestamp(e / 1000.0, tz=timezone.utc)
+
+            if s and e and s > e:
+                raise HTTPException(
+                    status_code=422,
+                    detail={
+                        "error": "Invalid date format, start_date must be less than end_date"
+                    },
+                )
+            cond = {}
+            if s:
+                cond["gte"] = str(s)
+            if e:
+                cond["lte"] = str(e)
+            filters.append({"range": {"run.begin": cond}})
+        if size:
+            results["size"] = size
+        results["offset"] = offset if offset is not None else 0
+
+        # In order to filter by param or tag values, we need to produce a list
+        # of matching RUN IDs from each index. We'll then drop any RUN ID that's
+        # not on both lists.
+        if tag_filters:
+            tagids = await self._get_run_ids("tag", tag_filters)
+        if param_filters:
+            paramids = await self._get_run_ids("param", param_filters)
+
+        # If it's obvious we can't produce any matches at this point, exit.
+        if (tag_filters and len(tagids) == 0) or (param_filters and len(paramids) == 0):
+            results.update({"results": [], "count": 0, "total": 0})
+            return results
+
+        hits = await self.search(
+            "run",
+            size=size,
+            offset=offset,
+            sort=sort_terms,
+            filters=filters,
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        rawiterations = await self.search("iteration", ignore_unavailable=True)
+        rawtags = await self.search("tag", ignore_unavailable=True)
+        rawparams = await self.search("param", ignore_unavailable=True)
+
+        iterations = defaultdict(list)
+        tags = defaultdict(defaultdict)
+        params = defaultdict(defaultdict)
+        run_params = defaultdict(list)
+
+        for i in self._hits(rawiterations):
+            iterations[i["run"]["id"]].append(i["iteration"])
+
+        # Organize tags by run ID
+        for t in self._hits(rawtags):
+            tags[t["run"]["id"]][t["tag"]["name"]] = t["tag"]["val"]
+
+        # Organize params by iteration ID
+        for p in self._hits(rawparams):
+            run_params[p["run"]["id"]].append(p)
+            params[p["iteration"]["id"]][p["param"]["arg"]] = p["param"]["val"]
+
+        runs = {}
+        for h in self._hits(hits):
+            run = h["run"]
+            rid = run["id"]
+
+            # Filter the runs by our tag and param queries
+            if param_filters and rid not in paramids:
+                continue
+
+            if tag_filters and rid not in tagids:
+                continue
+
+            # Collect unique runs: the status is "fail" if any iteration for
+            # that run ID failed.
+            runs[rid] = run
+            run["tags"] = tags.get(rid, {})
+            run["iterations"] = []
+            run["primary_metrics"] = set()
+            common = CommonParams()
+            for i in iterations.get(rid, []):
+                iparams = params.get(i["id"], {})
+                if "status" not in run:
+                    run["status"] = i["status"]
+                else:
+                    if i["status"] != "pass":
+                        run["status"] = i["status"]
+                common.add(iparams)
+                run["primary_metrics"].add(i["primary-metric"])
+                run["iterations"].append(
+                    {
+                        "iteration": i["num"],
+                        "primary_metric": i["primary-metric"],
+                        "primary_period": i["primary-period"],
+                        "status": i["status"],
+                        "params": iparams,
+                    }
+                )
+            run["params"] = common.render()
+            try:
+                run["begin_date"] = self._format_timestamp(run["begin"])
+                run["end_date"] = self._format_timestamp(run["end"])
+            except KeyError as e:
+                print(f"Missing 'run' key {str(e)} in {run}")
+                run["begin_date"] = self._format_timestamp("0")
+                run["end_date"] = self._format_timestamp("0")
+
+        count = len(runs)
+        total = hits["hits"]["total"]["value"]
+        results.update(
+            {
+                "results": list(runs.values()),
+                "count": count,
+                "total": total,
+            }
+        )
+        if size and (offset + count < total):
+            results["next_offset"] = offset + size
+        return results
+
+    async def get_tags(self, run: str, **kwargs) -> dict[str, str]:
+        """Return the set of tags associated with a run
+
+        Args:
+            run: run ID
+
+        Returns:
+            JSON dict with "tag" keys showing each value
+        """
+        tags = await self.search(
+            index="tag",
+            filters=[{"term": {"run.id": run}}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        return {t["name"]: t["val"] for t in self._hits(tags, ["tag"])}
+
+    async def get_params(
+        self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs
+    ) -> dict[str, dict[str, str]]:
+        """Return the set of parameters for a run or iteration
+
+        Parameters are technically associated with an iteration, but can be
+        aggregated for a run. This will return a set of parameters for each
+        iteration; plus, if a "run" was specified, a filtered list of param
+        values that are common across all iterations.
+
+        Args:
+            run: run ID
+            iteration: iteration ID
+            kwargs: additional OpenSearch keywords
+
+        Returns:
+            JSON dict of param values by iteration (plus "common" if by run ID)
+        """
+        if not run and not iteration:
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                "A params query requires either a run or iteration ID",
+            )
+        match = {"run.id" if run else "iteration.id": run if run else iteration}
+        params = await self.search(
+            index="param",
+            filters=[{"term": match}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        response = defaultdict(defaultdict)
+        for param in self._hits(params):
+            iter = param["iteration"]["id"]
+            arg = param["param"]["arg"]
+            val = param["param"]["val"]
+            if response.get(iter) and response.get(iter).get(arg):
+                print(f"Duplicate param {arg} for iteration {iter}")
+            response[iter][arg] = val
+
+        # Filter out all parameter values that don't exist in all or which have
+        # different values.
+        if run:
+            common = CommonParams()
+            for params in response.values():
+                common.add(params)
+            response["common"] = common.render()
+        return response
+
+    async def get_iterations(self, run: str, **kwargs) -> list[dict[str, Any]]:
+        """Return a list of iterations for a run
+
+        Args:
+            run: run ID
+            kwargs: additional OpenSearch keywords
+
+        Returns:
+            A list of iteration documents
+        """
+        iterations = await self.search(
+            index="iteration",
+            filters=[{"term": {"run.id": run}}],
+            sort=[{"iteration.num": "asc"}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        return [i["iteration"] for i in self._hits(iterations)]
+
+    async def get_samples(
+        self, run: Optional[str] = None, iteration: Optional[str] = None, **kwargs
+    ):
+        """Return a list of samples for a run or iteration
+
+        Args:
+            run: run ID
+            iteration: iteration ID
+            kwargs: additional OpenSearch keywords
+
+        Returns:
+            A list of sample documents.
+        """
+        if not run and not iteration:
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                "A sample query requires either a run or iteration ID",
+            )
+        match = {"run.id" if run else "iteration.id": run if run else iteration}
+        hits = await self.search(
+            index="sample",
+            filters=[{"term": match}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        samples = []
+        for s in self._hits(hits):
+            print(f"SAMPLE's ITERATION {s['iteration']}")
+            sample = s["sample"]
+            sample["iteration"] = s["iteration"]["num"]
+            sample["primary_metric"] = s["iteration"]["primary-metric"]
+            sample["status"] = s["iteration"]["status"]
+            samples.append(sample)
+        return samples
+
+    async def get_periods(
+        self,
+        run: Optional[str] = None,
+        iteration: Optional[str] = None,
+        sample: Optional[str] = None,
+        **kwargs,
+    ):
+        """Return a list of periods associated with a run, an iteration, or a
+        sample
+
+        The "period" document is normalized to represent timestamps using ISO
+        strings.
+
+        Args:
+            run: run ID
+            iteration: iteration ID
+            sample: sample ID
+            kwargs: additional OpenSearch parameters
+
+        Returns:
+            a list of normalized period documents
+        """
+        if not any((run, iteration, sample)):
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                "A period query requires a run, iteration, or sample ID",
+            )
+        match = None
+        if sample:
+            match = {"sample.id": sample}
+        elif iteration:
+            match = {"iteration.id": iteration}
+        else:
+            match = {"run.id": run}
+        periods = await self.search(
+            index="period",
+            filters=[{"term": match}],
+            sort=[{"period.begin": "asc"}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        body = []
+        for h in self._hits(periods):
+            period = self._format_period(period=h["period"])
+            period["iteration"] = h["iteration"]["num"]
+            period["sample"] = h["sample"]["num"]
+            period["primary_metric"] = h["iteration"]["primary-metric"]
+            period["status"] = h["iteration"]["status"]
+            body.append(period)
+        return body
+
+    async def get_timeline(self, run: str, **kwargs) -> dict[str, Any]:
+        """Report the relative timeline of a run
+
+        With nested object lists, show runs to iterations to samples to
+        periods.
+
+        Args:
+            run: run ID
+            kwargs: additional OpenSearch parameters
+        """
+        itr = await self.search(
+            index="iteration",
+            filters=[{"term": {"run.id": run}}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        sam = await self.search(
+            index="sample",
+            filters=[{"term": {"run.id": run}}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        per = await self.search(
+            index="period",
+            filters=[{"term": {"run.id": run}}],
+            **kwargs,
+            ignore_unavailable=True,
+        )
+        samples = defaultdict(list)
+        periods = defaultdict(list)
+
+        for s in self._hits(sam):
+            samples[s["iteration"]["id"]].append(s)
+        for p in self._hits(per):
+            periods[p["sample"]["id"]].append(p)
+
+        iterations = []
+        robj = {"id": run, "iterations": iterations}
+        body = {"run": robj}
+        for i in self._hits(itr):
+            if "begin" not in robj:
+                robj["begin"] = self._format_timestamp(i["run"]["begin"])
+                robj["end"] = self._format_timestamp(i["run"]["end"])
+            iteration = i["iteration"]
+            iterations.append(iteration)
+            iteration["samples"] = []
+            for s in samples.get(iteration["id"], []):
+                sample = s["sample"]
+                sample["periods"] = []
+                for pr in periods.get(sample["id"], []):
+                    period = self._format_period(pr["period"])
+                    sample["periods"].append(period)
+                iteration["samples"].append(sample)
+        return body
+
+    async def get_metrics_list(self, run: str, **kwargs) -> dict[str, Any]:
+        """Return a list of metrics available for a run
+
+        Each run may have multiple performance metrics stored. This API allows
+        retrieving a sorted list of the metrics available for a given run, with
+        the "names" selection criteria available for each and, for "periodic"
+        (benchmark) metrics, the defined periods for which data was gathered.
+
+        {
+            "ilab::train-samples-sec": {
+                "periods": [{"id": <id>, "name": "measurement"}],
+                "breakouts": {"benchmark-group" ["unknown"], ...}
+            },
+            "iostat::avg-queue-length": {
+                "periods": [],
+                "breakouts": {"benchmark-group": ["unknown"], ...},
+            },
+            ...
+        }
+
+        Args:
+            run: run ID
+
+        Returns:
+            List of metrics available for the run
+        """
+        hits = await self.search(
+            index="metric_desc",
+            filters=[{"term": {"run.id": run}}],
+            ignore_unavailable=True,
+            **kwargs,
+        )
+        met = {}
+        for h in self._hits(hits):
+            desc = h["metric_desc"]
+            name = desc["source"] + "::" + desc["type"]
+            if name in met:
+                record = met[name]
+            else:
+                record = {"periods": [], "breakouts": defaultdict(set)}
+                met[name] = record
+            if "period" in h:
+                record["periods"].append(h["period"]["id"])
+            for n, v in desc["names"].items():
+                record["breakouts"][n].add(v)
+        return met
+
+    async def get_metric_breakouts(
+        self,
+        run: str,
+        metric: str,
+        names: Optional[list[str]] = None,
+        periods: Optional[list[str]] = None,
+    ) -> dict[str, Any]:
+        """Help explore available metric breakouts
+
+        Args:
+            run: run ID
+            metric: metric label (e.g., "mpstat::Busy-CPU")
+            names: list of name filters ("cpu=3")
+            periods: list of period IDs
+
+        Returns:
+            A description of all breakout names and values, which can be
+            specified to narrow down metrics returns by the data, summary, and
+            graph APIs.
+
+            {
+                "label": "mpstat::Busy-CPU",
+                "class": [
+                    "throughput"
+                ],
+                "type": "Busy-CPU",
+                "source": "mpstat",
+                "breakouts": {
+                    "num": [
+                        "8",
+                        "72"
+                    ],
+                    "thread": [
+                        0,
+                        1
+                    ]
+                }
+            }
+        """
+        start = time.time()
+        filters = self._build_metric_filters(run, metric, names, periods)
+        metric_name = metric + ("" if not names else ("+" + ",".join(names)))
+        metrics = await self.search(
+            "metric_desc",
+            filters=filters,
+            ignore_unavailable=True,
+        )
+        if len(metrics["hits"]["hits"]) < 1:
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST,
+                f"Metric name {metric_name} not found for run {run}",
+            )
+        classes = set()
+        response = {"label": metric, "class": classes}
+        breakouts = defaultdict(set)
+        pl = set()
+        for m in self._hits(metrics):
+            desc = m["metric_desc"]
+            response["type"] = desc["type"]
+            response["source"] = desc["source"]
+            if desc.get("class"):
+                classes.add(desc["class"])
+            if "period" in m:
+                pl.add(m["period"]["id"])
+            for n, v in desc["names"].items():
+                breakouts[n].add(v)
+        # We want to help filter a consistent summary, so only show those
+        # names with more than one value.
+        if len(pl) > 1:
+            response["periods"] = pl
+        response["breakouts"] = {n: v for n, v in breakouts.items() if len(v) > 1}
+        duration = time.time() - start
+        print(f"Processing took {duration} seconds")
+        return response
+
+    async def get_metrics_data(
+        self,
+        run: str,
+        metric: str,
+        names: Optional[list[str]] = None,
+        periods: Optional[list[str]] = None,
+        aggregate: bool = False,
+    ) -> list[Any]:
+        """Return a list of metric data
+
+        The "aggregate" option allows aggregating various metrics across
+        breakout streams and periods: be careful, as this is meaningful only if
+        the breakout streams are sufficiently related.
+
+        Args:
+            run: run ID
+            metric: metric label (e.g., "mpstat::Busy-CPU")
+            names: list of name filters ("cpu=3")
+            periods: list of period IDs
+            aggregate: aggregate multiple metric data streams
+
+        Returns:
+            A sequence of data samples, showing the aggregate sample along with
+            the duration and end timestamp of each sample interval.
+
+            [
+                {
+                    "begin": "2024-08-22 20:03:23.028000+00:00",
+                    "end": "2024-08-22 20:03:37.127000+00:00",
+                    "duration": 14.1,
+                    "value": 9.35271216694379
+                },
+                {
+                    "begin": "2024-08-22 20:03:37.128000+00:00",
+                    "end": "2024-08-22 20:03:51.149000+00:00",
+                    "duration": 14.022,
+                    "value": 9.405932330557683
+                },
+                {
+                    "begin": "2024-08-22 20:03:51.150000+00:00",
+                    "end": "2024-08-22 20:04:05.071000+00:00",
+                    "duration": 13.922,
+                    "value": 9.478773265522682
+                }
+            ]
+        """
+        start = time.time()
+        ids = await self._get_metric_ids(
+            run, metric, names, periodlist=periods, aggregate=aggregate
+        )
+
+        # If we're searching by periods, filter metric data by the period
+        # timestamp range rather than just relying on the metric desc IDs as
+        # we also want to filter non-periodic tool data.
+        filters = [{"terms": {"metric_desc.id": ids}}]
+        filters.extend(await self._build_timestamp_range_filters(periods))
+
+        response = []
+        if len(ids) > 1:
+            # Find the minimum sample interval of the selected metrics
+            aggdur = await self.search(
+                "metric_data",
+                size=0,
+                filters=filters,
+                aggregations={"duration": {"stats": {"field": "metric_data.duration"}}},
+            )
+            if aggdur["aggregations"]["duration"]["count"] > 0:
+                interval = int(aggdur["aggregations"]["duration"]["min"])
+                data = await self.search(
+                    index="metric_data",
+                    size=0,
+                    filters=filters,
+                    aggregations={
+                        "interval": {
+                            "histogram": {
+                                "field": "metric_data.end",
+                                "interval": interval,
+                            },
+                            "aggs": {"value": {"sum": {"field": "metric_data.value"}}},
+                        }
+                    },
+                )
+                for h in self._aggs(data, "interval"):
+                    response.append(
+                        {
+                            "begin": self._format_timestamp(h["key"] - interval),
+                            "end": self._format_timestamp(h["key"]),
+                            "value": h["value"]["value"],
+                            "duration": interval / 1000.0,
+                        }
+                    )
+        else:
+            data = await self.search("metric_data", filters=filters)
+            for h in self._hits(data, ["metric_data"]):
+                response.append(self._format_data(h))
+        response.sort(key=lambda a: a["end"])
+        duration = time.time() - start
+        print(f"Processing took {duration} seconds")
+        return response
+
+    async def get_metrics_summary(
+        self,
+        run: str,
+        metric: str,
+        names: Optional[list[str]] = None,
+        periods: Optional[list[str]] = None,
+    ) -> dict[str, Any]:
+        """Return a statistical summary of metric data
+
+        Provides a statistical summary of selected data samples.
+
+        Args:
+            run: run ID
+            metric: metric label (e.g., "mpstat::Busy-CPU")
+            names: list of name filters ("cpu=3")
+            periods: list of period IDs
+
+        Returns:
+            A statistical summary of the selected metric data
+
+            {
+                "count": 71,
+                "min": 0.0,
+                "max": 0.3296,
+                "avg": 0.02360704225352113,
+                "sum": 1.676self.BIGQUERY00000001
+            }
+        """
+        start = time.time()
+        ids = await self._get_metric_ids(run, metric, names, periodlist=periods)
+        filters = [{"terms": {"metric_desc.id": ids}}]
+        filters.extend(await self._build_timestamp_range_filters(periods))
+        data = await self.search(
+            "metric_data",
+            size=0,
+            filters=filters,
+            aggregations={"score": {"stats": {"field": "metric_data.value"}}},
+        )
+        duration = time.time() - start
+        print(f"Processing took {duration} seconds")
+        return data["aggregations"]["score"]
+
+    async def _graph_title(
+        self,
+        run_id: str,
+        run_id_list: list[str],
+        graph: Graph,
+        params_by_run: dict[str, Any],
+        periods_by_run: dict[str, Any],
+    ) -> str:
+        """Compute a default title for a graph
+
+        Use the period, breakout name selections, run list, and iteration
+        parameters to construct a meaningful name for a graph.
+
+        For example, "ilab::sdg-samples-sec (batch-size=4) {run 1}", or
+        "mpstat::Busy-CPU [cpu=4]"
+
+        Args:
+            run_id: the Crucible run ID
+            run_id_list: ordered list of run IDs in our list of graphs
+            graph: the current Graph object
+            params_by_run: initially empty dict used to cache parameters
+            periods_by_run: initially empty dict used to cache periods
+
+        Returns:
+            A string title
+        """
+        names = graph.names
+        metric = graph.metric
+        if run_id not in params_by_run:
+            # Gather iteration parameters outside the loop for help in
+            # generating useful labels.
+            all_params = await self.search(
+                "param", filters=[{"term": {"run.id": run_id}}]
+            )
+            collector = defaultdict(defaultdict)
+            for h in self._hits(all_params):
+                collector[h["iteration"]["id"]][h["param"]["arg"]] = h["param"]["val"]
+            params_by_run[run_id] = collector
+        else:
+            collector = params_by_run[run_id]
+
+        if run_id not in periods_by_run:
+            periods = await self.search(
+                "period", filters=[{"term": {"run.id": run_id}}]
+            )
+            iteration_periods = defaultdict(set)
+            for p in self._hits(periods):
+                iteration_periods[p["iteration"]["id"]].add(p["period"]["id"])
+            periods_by_run[run_id] = iteration_periods
+        else:
+            iteration_periods = periods_by_run[run_id]
+
+        # We can easily end up with multiple graphs across distinct
+        # periods or iterations, so we want to be able to provide some
+        # labeling to the graphs. We do this by looking for unique
+        # iteration parameters values, since the iteration number and
+        # period name aren't useful by themselves.
+        name_suffix = ""
+        if graph.periods:
+            iteration = None
+            for i, pset in iteration_periods.items():
+                if set(graph.periods) <= pset:
+                    iteration = i
+                    break
+
+            # If the period(s) we're graphing resolve to a single
+            # iteration in a run with multiple iterations, then we can
+            # try to find a unique title suffix based on distinct param
+            # values for that iteration.
+            if iteration and len(collector) > 1:
+                unique = collector[iteration].copy()
+                for i, params in collector.items():
+                    if i != iteration:
+                        for p in list(unique.keys()):
+                            if p in params and unique[p] == params[p]:
+                                del unique[p]
+                if unique:
+                    name_suffix = (
+                        " (" + ",".join([f"{p}={v}" for p, v in unique.items()]) + ")"
+                    )
+
+        if len(run_id_list) > 1:
+            name_suffix += f" {{run {run_id_list.index(run_id) + 1}}}"
+
+        options = (" [" + ",".join(names) + "]") if names else ""
+        return metric + options + name_suffix
+
+    async def get_metrics_graph(self, graphdata: GraphList) -> dict[str, Any]:
+        """Return metrics data for a run
+
+        Each run may have multiple performance metrics stored. This API allows
+        retrieving graphable time-series representation of a metric over the
+        period of the run, in the format defined by Plotly as configuration
+        settings plus an x value array and a y value array.
+
+        {
+            "data": [
+                {
+                "x": [
+                    "2024-08-27 09:16:27.371000",
+                    ...
+                ],
+                "y": [
+                    10.23444312132161,
+                    ...
+                ],
+                "name": "Metric ilab::train-samples-sec",
+                "type": "scatter",
+                "mode": "line",
+                "marker": {"color": "black"},
+                "labels": {"x": "sample timestamp", "y": "samples / second"}
+                }
+            ]
+            "layout": {
+                "width": 1500,
+                "yaxis": {
+                    "title": "mpstat::Busy-CPU core=2,package=0,num=112,type=usr",
+                    "color": "black"
+                }
+            }
+        }
+
+        Args:
+            graphdata: A GraphList object
+
+        Returns:
+            A Plotly object with layout
+        """
+        start = time.time()
+        graphlist = []
+        default_run_id = graphdata.run
+        layout: dict[str, Any] = {"width": "1500"}
+        axes = {}
+        yaxis = None
+        cindex = 0
+        params_by_run = {}
+        periods_by_run = {}
+
+        # Construct a de-duped ordered list of run IDs, starting with the
+        # default.
+        run_id_list = []
+        if default_run_id:
+            run_id_list.append(default_run_id)
+        run_id_missing = False
+        for g in graphdata.graphs:
+            if g.run:
+                if g.run not in run_id_list:
+                    run_id_list.append(g.run)
+            else:
+                run_id_missing = True
+
+        if run_id_missing and not default_run_id:
+            raise HTTPException(
+                status.HTTP_400_BAD_REQUEST, "each graph request must have a run ID"
+            )
+
+        for g in graphdata.graphs:
+            run_id = g.run if g.run else default_run_id
+            names = g.names
+            metric: str = g.metric
+
+            # The caller can provide a title for each graph; but, if not, we
+            # journey down dark overgrown pathways to fabricate a default with
+            # reasonable context, including unique iteration parameters,
+            # breakdown selections, and which run provided the data.
+            if g.title:
+                title = g.title
+            else:
+                title = await self._graph_title(
+                    run_id, run_id_list, g, params_by_run, periods_by_run
+                )
+
+            ids = await self._get_metric_ids(
+                run_id,
+                metric,
+                names,
+                periodlist=g.periods,
+                aggregate=g.aggregate,
+            )
+            filters = [{"terms": {"metric_desc.id": ids}}]
+            filters.extend(await self._build_timestamp_range_filters(g.periods))
+            y_max = 0.0
+            points: list[Point] = []
+
+            # If we're pulling multiple breakouts, e.g., total CPU across modes
+            # or cores, we want to aggregate by timestamp interval. Sample
+            # timstamps don't necessarily align, so the "histogram" aggregation
+            # normalizes within the interval (based on the minimum actual
+            # interval duration).
+            if len(ids) > 1:
+                # Find the minimum sample interval of the selected metrics
+                aggdur = await self.search(
+                    "metric_data",
+                    size=0,
+                    filters=filters,
+                    aggregations={
+                        "duration": {"stats": {"field": "metric_data.duration"}}
+                    },
+                )
+                if aggdur["aggregations"]["duration"]["count"] > 0:
+                    interval = int(aggdur["aggregations"]["duration"]["min"])
+                    data = await self.search(
+                        index="metric_data",
+                        size=0,
+                        filters=filters,
+                        aggregations={
+                            "interval": {
+                                "histogram": {
+                                    "field": "metric_data.begin",
+                                    "interval": interval,
+                                },
+                                "aggs": {
+                                    "value": {"sum": {"field": "metric_data.value"}}
+                                },
+                            }
+                        },
+                    )
+                    for h in self._aggs(data, "interval"):
+                        begin = int(h["key"])
+                        end = begin + interval - 1
+                        points.append(Point(begin, end, float(h["value"]["value"])))
+            else:
+                data = await self.search("metric_data", filters=filters)
+                for h in self._hits(data, ["metric_data"]):
+                    points.append(
+                        Point(int(h["begin"]), int(h["end"]), float(h["value"]))
+                    )
+
+            # Sort the graph points by timestamp so that Ploty will draw nice
+            # lines. We graph both the "begin" and "end" timestamp of each
+            # sample against the value to more clearly show the sampling
+            # interval.
+            x = []
+            y = []
+
+            for p in sorted(points, key=lambda a: a.begin):
+                x.extend(
+                    [self._format_timestamp(p.begin), self._format_timestamp(p.end)]
+                )
+                y.extend([p.value, p.value])
+                y_max = max(y_max, p.value)
+
+            if g.color:
+                color = g.color
+            else:
+                color = colors[cindex]
+                cindex += 1
+                if cindex >= len(colors):
+                    cindex = 0
+            graphitem = {
+                "x": x,
+                "y": y,
+                "name": title,
+                "type": "scatter",
+                "mode": "line",
+                "marker": {"color": color},
+                "labels": {
+                    "x": "sample timestamp",
+                    "y": "samples / second",
+                },
+            }
+
+            # Y-axis scaling and labeling is divided by benchmark label;
+            # so store each we've created to reuse. (E.g., if we graph
+            # 5 different mpstat::Busy-CPU periods, they'll share a single
+            # Y axis.)
+            if metric in axes:
+                yref = axes[metric]
+            else:
+                if yaxis:
+                    name = f"yaxis{yaxis}"
+                    yref = f"y{yaxis}"
+                    yaxis += 1
+                    layout[name] = {
+                        "title": metric,
+                        "color": color,
+                        "autorange": True,
+                        "anchor": "free",
+                        "autoshift": True,
+                        "overlaying": "y",
+                    }
+                else:
+                    name = "yaxis"
+                    yref = "y"
+                    yaxis = 2
+                    layout[name] = {
+                        "title": metric,
+                        "color": color,
+                    }
+                axes[metric] = yref
+            graphitem["yaxis"] = yref
+            graphlist.append(graphitem)
+        duration = time.time() - start
+        print(f"Processing took {duration} seconds")
+        return {"data": graphlist, "layout": layout}
diff --git a/backend/skeleton.toml b/backend/skeleton.toml
index 81662a55..2aac4574 100644
--- a/backend/skeleton.toml
+++ b/backend/skeleton.toml
@@ -15,3 +15,8 @@ personal_access_token=
 url=
 username=
 password=
+
+[crucible]
+url=
+username=
+password=