From 08f30aef0a58f55339c4f5337d4ebd333206d621 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Fri, 2 Aug 2024 18:23:41 +0200
Subject: [PATCH] refactor: modularize file state (#126)

---
 .gitignore                                    |    3 +
 .pre-commit-config.yaml                       |    6 +-
 neps/api.py                                   |   64 +-
 neps/env.py                                   |   89 ++
 neps/exceptions.py                            |   47 +
 neps/optimizers/__init__.py                   |    5 +-
 neps/optimizers/base_optimizer.py             |  148 +-
 .../acquisition_samplers/base_acq_sampler.py  |   14 +-
 .../acquisition_samplers/mutation_sampler.py  |   14 +-
 .../bayesian_optimization/cost_cooling.py     |   48 +-
 .../bayesian_optimization/kernels/encoding.py |   10 +-
 .../bayesian_optimization/mf_tpe.py           |   11 +-
 .../bayesian_optimization/models/deepGP.py    |   53 +-
 .../models/gp_hierarchy.py                    |   49 +-
 .../bayesian_optimization/optimizer.py        |    7 +-
 neps/optimizers/grid_search/optimizer.py      |    8 +-
 neps/optimizers/multi_fidelity/_dyhpo.py      |    9 +-
 neps/optimizers/multi_fidelity/dyhpo.py       |    7 +-
 neps/optimizers/multi_fidelity/hyperband.py   |   15 +-
 .../multi_fidelity/successive_halving.py      |   21 +-
 .../multi_fidelity_prior/async_priorband.py   |   15 +-
 .../prototype_optimizer.py                    |    7 +-
 neps/optimizers/random_search/optimizer.py    |    8 +-
 .../regularized_evolution/optimizer.py        |   17 +-
 neps/plot/tensorboard_eval.py                 |   20 +-
 neps/runtime.py                               | 1372 ++++++-----------
 neps/search_spaces/architecture/graph.py      |   22 +-
 neps/search_spaces/search_space.py            |   36 +-
 neps/search_spaces/yaml_search_space_utils.py |   84 +-
 neps/state/__init__.py                        |   19 +
 neps/state/_eval.py                           |  195 +++
 neps/state/err_dump.py                        |   77 +
 neps/state/filebased.py                       |  672 ++++++++
 neps/state/neps_state.py                      |  231 +++
 neps/state/optimizer.py                       |   57 +
 neps/state/protocols.py                       |  560 +++++++
 neps/state/seed_snapshot.py                   |  115 ++
 neps/state/settings.py                        |  171 ++
 neps/state/trial.py                           |  289 ++++
 neps/status/status.py                         |   37 +-
 neps/utils/_rng.py                            |  176 ---
 neps/utils/common.py                          |   62 +-
 neps/utils/data_loading.py                    |   93 +-
 neps/utils/files.py                           |   34 +-
 neps/utils/types.py                           |    9 +-
 neps/utils/validation.py                      |   31 +
 neps_examples/basic_usage/hyperparameters.py  |    2 +-
 neps_examples/efficiency/multi_fidelity.py    |    4 +-
 pyproject.toml                                |    4 +-
 tests/test_neps_api/test_api.py               |   10 +-
 tests/test_runtime/__init__.py                |    0
 .../test_default_report_values.py             |  170 ++
 .../test_error_handling_strategies.py         |  200 +++
 tests/test_runtime/test_locking.py            |  105 --
 tests/test_runtime/test_stopping_criterion.py |  481 ++++++
 tests/test_state/__init__.py                  |    0
 tests/test_state/test_filebased_neps_state.py |  123 ++
 tests/test_state/test_neps_state.py           |  205 +++
 tests/{ => test_state}/test_rng.py            |   34 +-
 tests/test_state/test_synced.py               |  432 ++++++
 tests/test_state/test_trial.py                |  301 ++++
 .../test_declarative_usage_docs.py            |   35 +-
 .../test_neps_run.py                          |   76 +-
 63 files changed, 5523 insertions(+), 1696 deletions(-)
 create mode 100644 neps/env.py
 create mode 100644 neps/exceptions.py
 create mode 100644 neps/state/__init__.py
 create mode 100644 neps/state/_eval.py
 create mode 100644 neps/state/err_dump.py
 create mode 100644 neps/state/filebased.py
 create mode 100644 neps/state/neps_state.py
 create mode 100644 neps/state/optimizer.py
 create mode 100644 neps/state/protocols.py
 create mode 100644 neps/state/seed_snapshot.py
 create mode 100644 neps/state/settings.py
 create mode 100644 neps/state/trial.py
 delete mode 100644 neps/utils/_rng.py
 create mode 100644 neps/utils/validation.py
 create mode 100644 tests/test_runtime/__init__.py
 create mode 100644 tests/test_runtime/test_default_report_values.py
 create mode 100644 tests/test_runtime/test_error_handling_strategies.py
 delete mode 100644 tests/test_runtime/test_locking.py
 create mode 100644 tests/test_runtime/test_stopping_criterion.py
 create mode 100644 tests/test_state/__init__.py
 create mode 100644 tests/test_state/test_filebased_neps_state.py
 create mode 100644 tests/test_state/test_neps_state.py
 rename tests/{ => test_state}/test_rng.py (52%)
 create mode 100644 tests/test_state/test_synced.py
 create mode 100644 tests/test_state/test_trial.py

diff --git a/.gitignore b/.gitignore
index 1471b0e8..e8be93e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,6 @@ jahs_bench_data/
 
 # MacOS
 *.DS_Store
+
+# Yaml tests
+path
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7228325e..92ff2356 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
         files: '^src/.*\.py$'
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
+    rev: v1.11.1
     hooks:
       - id: mypy
         files: |
@@ -42,7 +42,7 @@ repos:
           - "--show-traceback"
 
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.28.2
+    rev: 0.29.1
     hooks:
       - id: check-github-workflows
         files: '^github/workflows/.*\.ya?ml$'
@@ -51,7 +51,7 @@ repos:
         files: '^\.github/dependabot\.ya?ml$'
 
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.2
+    rev: v0.5.5
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix, --no-cache]
diff --git a/neps/api.py b/neps/api.py
index e523fe44..196e371d 100644
--- a/neps/api.py
+++ b/neps/api.py
@@ -1,5 +1,5 @@
-"""API for the neps package.
-"""
+"""API for the neps package."""
+
 from __future__ import annotations
 
 import inspect
@@ -12,7 +12,7 @@
 from neps.utils.run_args import Settings, Default
 
 from neps.utils.common import instance_from_map
-from neps.runtime import launch_runtime
+from neps.runtime import _launch_runtime
 from neps.optimizers import BaseOptimizer, SearcherMapping
 from neps.search_spaces.parameter import Parameter
 from neps.search_spaces.search_space import (
@@ -24,6 +24,8 @@
 from neps.utils.common import get_searcher_data, get_value
 from neps.optimizers.info import SearcherConfigs
 
+logger = logging.getLogger(__name__)
+
 
 def run(
     run_pipeline: Callable | None = Default(None),
@@ -59,7 +61,8 @@ def run(
             "asha",
             "regularized_evolution",
         ]
-        | BaseOptimizer | Path
+        | BaseOptimizer
+        | Path
     ) = Default("default"),
     **searcher_kwargs,
 ) -> None:
@@ -144,13 +147,11 @@ def run(
         )
         max_cost_total = searcher_kwargs["budget"]
         del searcher_kwargs["budget"]
+
     settings = Settings(locals(), run_args)
     # TODO: check_essentials,
 
-    logger = logging.getLogger("neps")
-
     # DO NOT use any neps arguments directly; instead, access them via the Settings class.
-
     if settings.pre_load_hooks is None:
         settings.pre_load_hooks = []
 
@@ -175,8 +176,9 @@ def run(
             # TODO habe hier searcher kwargs gedroppt, sprich das merging muss davor statt
             # finden
             searcher_info["searcher_args"] = settings.searcher_kwargs
-            settings.searcher = settings.searcher(search_space,
-                                                  **settings.searcher_kwargs)
+            settings.searcher = settings.searcher(
+                search_space, **settings.searcher_kwargs
+            )
         else:
             # Raise an error if searcher is not a subclass of BaseOptimizer
             raise TypeError(
@@ -200,7 +202,6 @@ def run(
             ignore_errors=settings.ignore_errors,
             loss_value_on_error=settings.loss_value_on_error,
             cost_value_on_error=settings.cost_value_on_error,
-            logger=logger,
             searcher=settings.searcher,
             **settings.searcher_kwargs,
         )
@@ -220,23 +221,25 @@ def run(
         )
 
     if settings.task_id is not None:
-        settings.root_directory = Path(settings.root_directory) / (f"task_"
-                                                          f"{settings.task_id}")
+        settings.root_directory = Path(settings.root_directory) / (
+            f"task_" f"{settings.task_id}"
+        )
     if settings.development_stage_id is not None:
-        settings.root_directory = (Path(settings.root_directory) /
-                          f"dev_{settings.development_stage_id}")
+        settings.root_directory = (
+            Path(settings.root_directory) / f"dev_{settings.development_stage_id}"
+        )
 
-    launch_runtime(
+    _launch_runtime(
         evaluation_fn=settings.run_pipeline,
-        sampler=searcher_instance,
+        optimizer=searcher_instance,
         optimizer_info=searcher_info,
-        optimization_dir=settings.root_directory,
+        max_cost_total=settings.max_cost_total,
+        optimization_dir=Path(settings.root_directory),
         max_evaluations_total=settings.max_evaluations_total,
-        max_evaluations_per_run=settings.max_evaluations_per_run,
-        continue_until_max_evaluation_completed
-        =settings.continue_until_max_evaluation_completed,
-        logger=logger,
+        max_evaluations_for_worker=settings.max_evaluations_per_run,
+        continue_until_max_evaluation_completed=settings.continue_until_max_evaluation_completed,
         loss_value_on_error=settings.loss_value_on_error,
+        cost_value_on_error=settings.cost_value_on_error,
         ignore_errors=settings.ignore_errors,
         overwrite_optimization_dir=settings.overwrite_working_directory,
         pre_load_hooks=settings.pre_load_hooks,
@@ -260,7 +263,6 @@ def _run_args(
     ignore_errors: bool = False,
     loss_value_on_error: None | float = None,
     cost_value_on_error: None | float = None,
-    logger=None,
     searcher: (
         Literal[
             "default",
@@ -306,13 +308,17 @@ def _run_args(
         raise TypeError(message) from e
 
     # Load the information of the optimizer
-    if isinstance(searcher, (str, Path)) and searcher not in \
-        SearcherConfigs.get_searchers() and searcher != "default":
+    if (
+        isinstance(searcher, (str, Path))
+        and searcher not in SearcherConfigs.get_searchers()
+        and searcher != "default"
+    ):
         # The users have their own custom searcher provided via yaml.
         logging.info("Preparing to run user created searcher")
 
-        searcher_config, file_name = get_searcher_data(searcher,
-                                                      loading_custom_searcher=True)
+        searcher_config, file_name = get_searcher_data(
+            searcher, loading_custom_searcher=True
+        )
         # name defined via key or the filename of the yaml
         searcher_name = searcher_config.pop("name", file_name)
         searcher_info["searcher_selection"] = "user-yaml"
@@ -351,21 +357,19 @@ def _run_args(
         warnings.warn(
             "The 'algorithm' argument is deprecated and will be removed in "
             "future versions. Please use 'strategy' instead.",
-            DeprecationWarning
+            DeprecationWarning,
         )
         # Map the old 'algorithm' argument to 'strategy'
-        searcher_config['strategy'] = searcher_config.pop("algorithm")
+        searcher_config["strategy"] = searcher_config.pop("algorithm")
 
     if "strategy" in searcher_config:
         searcher_alg = searcher_config.pop("strategy")
     else:
         raise KeyError(f"Missing key strategy in searcher config:{searcher_config}")
 
-
     logger.info(f"Running {searcher_name} as the searcher")
     logger.info(f"Strategy: {searcher_alg}")
 
-
     # Used to create the yaml holding information about the searcher.
     # Also important for testing and debugging the api.
     searcher_info["searcher_name"] = searcher_name
diff --git a/neps/env.py b/neps/env.py
new file mode 100644
index 00000000..7cb8eada
--- /dev/null
+++ b/neps/env.py
@@ -0,0 +1,89 @@
+"""Environment variable parsing for the state."""
+
+from __future__ import annotations
+
+import os
+from typing import Callable, TypeVar
+
+T = TypeVar("T")
+V = TypeVar("V")
+
+
+def get_env(key: str, parse: Callable[[str], T], default: V) -> T | V:
+    """Get an environment variable or return a default value."""
+    if (e := os.environ.get(key)) is not None:
+        return parse(e)
+
+    return default
+
+
+def is_nullable(e: str) -> bool:
+    """Check if an environment variable is nullable."""
+    return e.lower() in ("none", "n", "null")
+
+
+TRIAL_FILELOCK_POLL = get_env(
+    "NEPS_TRIAL_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+TRIAL_FILELOCK_TIMEOUT = get_env(
+    "NEPS_TRIAL_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
+
+JOBQUEUE_FILELOCK_POLL = get_env(
+    "NEPS_JOBQUEUE_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+JOBQUEUE_FILELOCK_TIMEOUT = get_env(
+    "NEPS_JOBQUEUE_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
+
+SEED_SNAPSHOT_FILELOCK_POLL = get_env(
+    "NEPS_SEED_SNAPSHOT_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+SEED_SNAPSHOT_FILELOCK_TIMEOUT = get_env(
+    "NEPS_SEED_SNAPSHOT_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
+
+OPTIMIZER_INFO_FILELOCK_POLL = get_env(
+    "NEPS_OPTIMIZER_INFO_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+OPTIMIZER_INFO_FILELOCK_TIMEOUT = get_env(
+    "NEPS_OPTIMIZER_INFO_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
+
+OPTIMIZER_STATE_FILELOCK_POLL = get_env(
+    "NEPS_OPTIMIZER_STATE_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+OPTIMIZER_STATE_FILELOCK_TIMEOUT = get_env(
+    "NEPS_OPTIMIZER_STATE_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
+
+GLOBAL_ERR_FILELOCK_POLL = get_env(
+    "NEPS_GLOBAL_ERR_FILELOCK_POLL",
+    parse=float,
+    default=0.05,
+)
+GLOBAL_ERR_FILELOCK_TIMEOUT = get_env(
+    "NEPS_GLOBAL_ERR_FILELOCK_TIMEOUT",
+    parse=lambda e: None if is_nullable(e) else float(e),
+    default=None,
+)
diff --git a/neps/exceptions.py b/neps/exceptions.py
new file mode 100644
index 00000000..597dfb1f
--- /dev/null
+++ b/neps/exceptions.py
@@ -0,0 +1,47 @@
+"""Exceptions for NePS that don't belong in a specific module."""
+
+from __future__ import annotations
+
+
+class NePSError(Exception):
+    """Base class for all NePS exceptions.
+
+    This allows an easier way to catch all NePS exceptions
+    if we inherit all exceptions from this class.
+    """
+
+
+class VersionMismatchError(NePSError):
+    """Raised when the version of a resource does not match the expected version."""
+
+
+class VersionedResourceAlreadyExistsError(NePSError):
+    """Raised when a version already exists when trying to create a new versioned
+    data.
+    """
+
+
+class VersionedResourceRemovedError(NePSError):
+    """Raised when a version already exists when trying to create a new versioned
+    data.
+    """
+
+
+class VersionedResourceDoesNotExistsError(NePSError):
+    """Raised when a versioned resource does not exist at a location."""
+
+
+class LockFailedError(NePSError):
+    """Raised when a lock cannot be acquired."""
+
+
+class TrialAlreadyExistsError(VersionedResourceAlreadyExistsError):
+    """Raised when a trial already exists in the store."""
+
+
+class TrialNotFoundError(VersionedResourceDoesNotExistsError):
+    """Raised when a trial already exists in the store."""
+
+
+class WorkerFailedToGetPendingTrialsError(NePSError):
+    """Raised when a worker failed to get pending trials."""
diff --git a/neps/optimizers/__init__.py b/neps/optimizers/__init__.py
index 0493078a..31cb4c4a 100644
--- a/neps/optimizers/__init__.py
+++ b/neps/optimizers/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from functools import partial
-from typing import Callable
+from typing import Callable, Mapping
 
 from .base_optimizer import BaseOptimizer
 from .bayesian_optimization.cost_cooling import CostCooling
@@ -26,7 +26,8 @@
 from .random_search.optimizer import RandomSearch
 from .regularized_evolution.optimizer import RegularizedEvolution
 
-SearcherMapping: dict[str, Callable] = {
+# TODO: Rename Searcher to Optimizer...
+SearcherMapping: Mapping[str, Callable[..., BaseOptimizer]] = {
     "bayesian_optimization": BayesianOptimization,
     "pibo": partial(BayesianOptimization, disable_priors=False),
     "cost_cooling_bayesian_optimization": CostCooling,
diff --git a/neps/optimizers/base_optimizer.py b/neps/optimizers/base_optimizer.py
index 33f1a804..34804626 100644
--- a/neps/optimizers/base_optimizer.py
+++ b/neps/optimizers/base_optimizer.py
@@ -2,15 +2,21 @@
 
 import logging
 from abc import abstractmethod
-from typing import Any, Iterator, Mapping
-from typing_extensions import Self
-from contextlib import contextmanager
-from pathlib import Path
+from typing import Any, Mapping
 
+from dataclasses import asdict, dataclass
+from neps.state.optimizer import BudgetInfo
 from neps.utils.types import ConfigResult, RawConfig, ERROR, ResultDict
-from neps.utils.files import serialize, deserialize
 from neps.search_spaces.search_space import SearchSpace
 from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss
+from neps.state.trial import Trial
+
+
+@dataclass
+class SampledConfig:
+    id: Trial.ID
+    config: Mapping[str, Any]
+    previous_config_id: Trial.ID | None
 
 
 class BaseOptimizer:
@@ -41,10 +47,12 @@ def __init__(
         self.ignore_errors = ignore_errors
 
     @abstractmethod
-    def load_results(
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         raise NotImplementedError
 
@@ -60,45 +68,125 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         """
         raise NotImplementedError
 
-    def get_state(self) -> Any:
-        _state = {"used_budget": self.used_budget}
-        if self.budget is not None:
-            # TODO(eddiebergman): Seems like this isn't used anywhere,
-            # A fuzzy find search for `remaining_budget` shows this as the
-            # only use point.
-            _state["remaining_budget"] = self.budget - self.used_budget
+    def ask(
+        self,
+        trials: Mapping[str, Trial],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
+    ) -> tuple[SampledConfig, dict[str, Any]]:
+        """Sample a new configuration
+
+        !!! note
 
-        return _state
+            The plan is this method replaces the two-step procedure of `load_optimization_state`
+            and `get_config_and_ids` in the future, replacing both with a single method `ask`
+            which would be easier for developer of NePS optimizers to implement.
 
-    def load_state(self, state: Any) -> None:
-        self.used_budget = state["used_budget"]
+        !!! note
 
-    def load_config(self, config_dict: Mapping[str, Any]) -> SearchSpace:
-        config = self.pipeline_space.clone()
-        config.load_from(config_dict)
-        return config
+            The `optimizer_state` right now is just a `dict` that optimizers are free to mutate
+            as desired. A `dict` is not ideal as its _stringly_ typed but this was the least
+            invasive way to add this at the moment. It's actually an existing feature no
+            optimizer uses except _cost-cooling_ which basically just took a value from
+            `budget_info`.
 
-    def get_loss(self, result: ERROR | ResultDict | float) -> float | Any:
+            Ideally an optimizer overwriting this can decide what to return instead of having
+            to rely on them mutating it, however this is the best work-around I could come up with
+            for now.
+
+        Args:
+            trials: All of the trials that are known about.
+            budget_info: information about the budget
+            optimizer_state: extra state the optimizer would like to keep between calls
+
+        Returns:
+            SampledConfig: a sampled configuration
+            dict: state the optimizer would like to keep between calls
+        """
+        completed: dict[Trial.ID, ConfigResult] = {}
+        pending: dict[Trial.ID, SearchSpace] = {}
+        for trial_id, trial in trials.items():
+            if trial.report is not None:
+                completed[trial_id] = ConfigResult(
+                    id=trial_id,
+                    config=self.pipeline_space.from_dict(trial.config),
+                    result=trial.report,
+                    # TODO: Better if we could just pass around this metadata
+                    # object instead of converting to a dict each time.
+                    metadata=asdict(trial.metadata),
+                )
+            elif trial.state in (
+                Trial.State.PENDING,
+                Trial.State.SUBMITTED,
+                Trial.State.EVALUATING,
+            ):
+                pending[trial_id] = self.pipeline_space.from_dict(trial.config)
+
+        self.load_optimization_state(
+            previous_results=completed,
+            pending_evaluations=pending,
+            budget_info=budget_info,
+            optimizer_state=optimizer_state,
+        )
+        config, config_id, previous_config_id = self.get_config_and_ids()
+        return SampledConfig(
+            id=config_id, config=config, previous_config_id=previous_config_id
+        ), optimizer_state
+
+    def update_state_post_evaluation(
+        self, state: dict[str, Any], report: Trial.Report
+    ) -> dict[str, Any]:
+        # TODO: There's a slot in `OptimizerState` to store extra things
+        # required for the optimizer but is currently not used
+        # state["key"] = "value"
+        return state
+
+    def get_loss(
+        self, result: ERROR | ResultDict | float | Trial.Report
+    ) -> float | ERROR:
         """Calls result.utils.get_loss() and passes the error handling through.
         Please use self.get_loss() instead of get_loss() in all optimizer classes."""
+
+        # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
+        # use `Trial` and `Report`, they already take care of this and save having to do this
+        # `_get_loss` at every call. We can also then just use `None` instead of the string `"error"`
+        if isinstance(result, Trial.Report):
+            return result.loss if result.loss is not None else "error"
+
         return _get_loss(
             result,
             loss_value_on_error=self.loss_value_on_error,
             ignore_errors=self.ignore_errors,
         )
 
-    def get_cost(self, result: ERROR | ResultDict | float) -> float | Any:
+    def get_cost(
+        self, result: ERROR | ResultDict | float | Trial.Report
+    ) -> float | ERROR:
         """Calls result.utils.get_cost() and passes the error handling through.
         Please use self.get_cost() instead of get_cost() in all optimizer classes."""
+        # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
+        # use `Trial` and `Report`, they already take care of this and save having to do this
+        # `_get_loss` at every call
+        if isinstance(result, Trial.Report):
+            return result.loss if result.loss is not None else "error"
+
         return _get_cost(
             result,
             cost_value_on_error=self.cost_value_on_error,
             ignore_errors=self.ignore_errors,
         )
 
-    def get_learning_curve(self, result: str | dict | float) -> float | Any:
+    def get_learning_curve(
+        self, result: str | dict | float | Trial.Report
+    ) -> list[float] | Any:
         """Calls result.utils.get_loss() and passes the error handling through.
         Please use self.get_loss() instead of get_loss() in all optimizer classes."""
+        # TODO(eddiebergman): This is a forward change for whenever we can have optimizers
+        # use `Trial` and `Report`, they already take care of this and save having to do this
+        # `_get_loss` at every call
+        if isinstance(result, Trial.Report):
+            return result.learning_curve
+
         return _get_learning_curve(
             result,
             learning_curve_on_error=self.learning_curve_on_error,
@@ -107,17 +195,3 @@ def get_learning_curve(self, result: str | dict | float) -> float | Any:
 
     def whoami(self) -> str:
         return type(self).__name__
-
-    @contextmanager
-    def using_state(self, state_file: Path) -> Iterator[Self]:
-        if state_file.exists():
-            optimizer_state = deserialize(state_file)
-            self.load_state(optimizer_state)
-
-        yield self
-
-        serialize(self.get_state(), path=state_file)
-
-    def is_out_of_budget(self) -> bool:
-        """Check if the optimizer has used all of its budget, if any."""
-        return self.budget is not None and self.used_budget >= self.budget
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py
index 8fbc572a..adf47b82 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/base_acq_sampler.py
@@ -3,9 +3,9 @@
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Sequence, Callable
 
-from neps.utils.types import Array
-
 if TYPE_CHECKING:
+    import numpy as np
+    import torch
     from neps.search_spaces.search_space import SearchSpace
 
 
@@ -17,16 +17,20 @@ def __init__(self, pipeline_space: SearchSpace, patience: int = 50):
         self.pipeline_space = pipeline_space
         self.acquisition_function = None
         self.x: list[SearchSpace] = []
-        self.y: Sequence[float] | Array = []
+        self.y: Sequence[float] | np.ndarray | torch.Tensor = []
         self.patience = patience
 
     @abstractmethod
     def sample(self, acquisition_function: Callable) -> SearchSpace:
         raise NotImplementedError
 
-    def sample_batch(self, acquisition_function: Callable, batch: int) -> list[SearchSpace]:
+    def sample_batch(
+        self, acquisition_function: Callable, batch: int
+    ) -> list[SearchSpace]:
         return [self.sample(acquisition_function) for _ in range(batch)]
 
-    def set_state(self, x: list[SearchSpace], y: Sequence[float] | Array) -> None:
+    def set_state(
+        self, x: list[SearchSpace], y: Sequence[float] | np.ndarray | torch.Tensor
+    ) -> None:
         self.x = x
         self.y = y
diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py
index 869d5e91..4c6b17df 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/mutation_sampler.py
@@ -7,11 +7,14 @@
 from more_itertools import first
 from typing_extensions import override
 
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import AcquisitionSampler
-from neps.optimizers.bayesian_optimization.acquisition_samplers.random_sampler import RandomSampler
+from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
+    AcquisitionSampler,
+)
+from neps.optimizers.bayesian_optimization.acquisition_samplers.random_sampler import (
+    RandomSampler,
+)
 
 if TYPE_CHECKING:
-    from neps.utils.types import Array
     from neps.search_spaces.search_space import SearchSpace
 
 
@@ -64,7 +67,9 @@ def __init__(
         )
 
     @override
-    def set_state(self, x: list[SearchSpace], y: Sequence[float] | Array) -> None:
+    def set_state(
+        self, x: list[SearchSpace], y: Sequence[float] | np.ndarray | torch.Tensor
+    ) -> None:
         super().set_state(x, y)
         self.random_sampling.set_state(x, y)
 
@@ -108,6 +113,7 @@ def create_pool(
         ][:n_best]
 
         seen: set[int] = set()
+
         def _hash(_config: SearchSpace) -> int:
             return hash(_config.hp_values().values())
 
diff --git a/neps/optimizers/bayesian_optimization/cost_cooling.py b/neps/optimizers/bayesian_optimization/cost_cooling.py
index ea6dffff..f2878fe9 100644
--- a/neps/optimizers/bayesian_optimization/cost_cooling.py
+++ b/neps/optimizers/bayesian_optimization/cost_cooling.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
 from typing import Any
+from typing_extensions import override
 
+from neps.state.optimizer import BudgetInfo
 from neps.utils.types import ConfigResult
 from neps.utils.common import instance_from_map
 from neps.optimizers.bayesian_optimization.acquisition_functions.cost_cooling import (
@@ -9,11 +11,19 @@
 )
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.bayesian_optimization.acquisition_functions import AcquisitionMapping
-from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition
-from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import DecayingPriorWeightedAcquisition
-from neps.optimizers.bayesian_optimization.acquisition_samplers import AcquisitionSamplerMapping
-from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import AcquisitionSampler
-from neps.optimizers.bayesian_optimization.kernels import get_kernels
+from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
+    BaseAcquisition,
+)
+from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import (
+    DecayingPriorWeightedAcquisition,
+)
+from neps.optimizers.bayesian_optimization.acquisition_samplers import (
+    AcquisitionSamplerMapping,
+)
+from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import (
+    AcquisitionSampler,
+)
+from neps.optimizers.bayesian_optimization.kernels.get_kernels import get_kernels
 from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
 from neps.optimizers.bayesian_optimization.optimizer import BayesianOptimization
 
@@ -133,9 +143,9 @@ def __init__(
             raise ValueError("No kernels are provided!")
 
         if "vectorial_features" not in surrogate_model_args:
-            surrogate_model_args[
-                "vectorial_features"
-            ] = self.pipeline_space.get_vectorial_dim()
+            surrogate_model_args["vectorial_features"] = (
+                self.pipeline_space.get_vectorial_dim()
+            )
 
         self.surrogate_model = instance_from_map(
             SurrogateModelMapping,
@@ -153,9 +163,9 @@ def __init__(
             raise ValueError("No kernels are provided!")
 
         if "vectorial_features" not in cost_model_args:
-            cost_model_args[
-                "vectorial_features"
-            ] = self.pipeline_space.get_vectorial_dim()
+            cost_model_args["vectorial_features"] = (
+                self.pipeline_space.get_vectorial_dim()
+            )
 
         self.cost_model = instance_from_map(
             SurrogateModelMapping,
@@ -184,12 +194,23 @@ def __init__(
             kwargs={"patience": self.patience, "pipeline_space": self.pipeline_space},
         )
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         # TODO(Jan): read out cost and fit cost model
+        if budget_info is None:
+            raise ValueError(
+                "Used budget is not set in the optimizer state but is required"
+                " for cost cooling, please return a `'cost'` when you return results"
+                " and/or a `max_cost_budget` when running NePS!"
+            )
+        self.used_budget = budget_info.used_cost_budget
+
         train_x = [el.config for el in previous_results.values()]
         train_y = [self.get_loss(el.result) for el in previous_results.values()]
         train_cost = [self.get_cost(el.result) for el in previous_results.values()]
@@ -214,7 +235,8 @@ def load_results(
                 # TODO: set acquisition state
                 self.acquisition.set_state(
                     self.surrogate_model,
-                    alpha=1 - (self.used_budget / self.budget),
+                    alpha=1
+                    - (budget_info.used_cost_budget / budget_info.max_cost_budget),
                     cost_model=self.cost_model,
                 )
                 self.acquisition_sampler.set_state(x=train_x, y=train_y)
diff --git a/neps/optimizers/bayesian_optimization/kernels/encoding.py b/neps/optimizers/bayesian_optimization/kernels/encoding.py
index 0e7e35df..419b6926 100644
--- a/neps/optimizers/bayesian_optimization/kernels/encoding.py
+++ b/neps/optimizers/bayesian_optimization/kernels/encoding.py
@@ -65,7 +65,6 @@ def _compute_kernel(self, dist, l=None):
             return 0.0
         if l is None:
             l = self.lengthscale
-        # print(dist)
         return np.exp(-dist / (l**2))
 
     def _compute_dist(
@@ -141,10 +140,7 @@ def fit_transform(
         save_gram_matrix: bool = False,
         **kwargs,
     ):
-        if (
-            not rebuild_model
-            and self._gram is not None
-        ):
+        if not rebuild_model and self._gram is not None:
             return self._gram
         K = self.forward(*gr, l=l)
         if save_gram_matrix:
@@ -152,9 +148,7 @@ def fit_transform(
             self._train_x = gr[:]
         return K
 
-    def transform(
-        self, gr: list, l: float = None, **kwargs
-    ):
+    def transform(self, gr: list, l: float = None, **kwargs):
         if self._gram is None:
             raise ValueError("The kernel has not been fitted. Run fit_transform first")
         n = len(gr)
diff --git a/neps/optimizers/bayesian_optimization/mf_tpe.py b/neps/optimizers/bayesian_optimization/mf_tpe.py
index 4c38352e..45e4adc4 100644
--- a/neps/optimizers/bayesian_optimization/mf_tpe.py
+++ b/neps/optimizers/bayesian_optimization/mf_tpe.py
@@ -2,13 +2,14 @@
 
 import random
 from copy import deepcopy
-from typing import Iterable
+from typing import Any, Iterable
 
 import numpy as np
 import torch
 from scipy.stats import spearmanr
-from typing_extensions import Literal
+from typing_extensions import Literal, override
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.utils.common import instance_from_map
 from neps.search_spaces import (
@@ -457,10 +458,13 @@ def is_init_phase(self) -> bool:
             return False
         return True
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         # TODO remove doubles from previous results
         train_y = [self.get_loss(el.result) for el in previous_results.values()]
@@ -637,7 +641,6 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
 
         else:
             config = self.acquisition_sampler.sample(self.acquisition)
-            print([hp.value for hp in config.hyperparameters.values()])
             config.fidelity.set_value(self.rung_map[self.min_rung])
 
         config_id = str(self._num_train_x + len(self._pending_evaluations) + 1)
diff --git a/neps/optimizers/bayesian_optimization/models/deepGP.py b/neps/optimizers/bayesian_optimization/models/deepGP.py
index 06745184..d5145043 100644
--- a/neps/optimizers/bayesian_optimization/models/deepGP.py
+++ b/neps/optimizers/bayesian_optimization/models/deepGP.py
@@ -317,9 +317,7 @@ def __reset_xy(
         normalize_y: bool = False,
         normalize_budget: bool = True,
     ):
-        self.normalize_budget = (
-            normalize_budget
-        )
+        self.normalize_budget = normalize_budget
         self.normalize_y = normalize_y
 
         x_train, train_budgets, learning_curves = self._preprocess_input(
@@ -329,12 +327,8 @@ def __reset_xy(
         y_train = self._preprocess_y(y_train, normalize_y)
 
         self.x_train = x_train
-        self.train_budgets = (
-            train_budgets
-        )
-        self.learning_curves = (
-            learning_curves
-        )
+        self.train_budgets = train_budgets
+        self.learning_curves = learning_curves
         self.y_train = y_train
 
     def _preprocess_input(
@@ -446,13 +440,11 @@ def __train_model(
         self.model.train()
         self.likelihood.train()
         self.nn.train()
-        self.optimizer = (
-            torch.optim.Adam(
-                [
-                    dict({"params": self.model.parameters()}, **optimizer_args),
-                    dict({"params": self.nn.parameters()}, **optimizer_args),
-                ]
-            )
+        self.optimizer = torch.optim.Adam(
+            [
+                dict({"params": self.model.parameters()}, **optimizer_args),
+                dict({"params": self.nn.parameters()}, **optimizer_args),
+            ]
         )
 
         count_down = patience
@@ -547,10 +539,8 @@ def __train_model(
             #     break
 
     def set_prediction_learning_curves(self, learning_curves: list[list[float]]):
-
         self.prediction_learning_curves = learning_curves
 
-
     def predict(
         self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None
     ):
@@ -642,30 +632,3 @@ def get_state(self) -> dict[str, dict]:
 
     def delete_checkpoint(self):
         self.checkpoint_path.unlink(missing_ok=True)
-
-
-if __name__ == "__main__":
-    print(torch.version.__version__)
-
-    pipe_space = SearchSpace(
-        float_=FloatParameter(lower=0.0, upper=5.0),
-        e=IntegerParameter(lower=0, upper=10, is_fidelity=True),
-    )
-
-    configs = [pipe_space.sample(ignore_fidelity=False) for _ in range(100)]
-
-    y = np.random.random(100).tolist()
-
-    lcs = [
-        np.random.random(size=np.random.randint(low=1, high=50)).tolist()
-        for _ in range(100)
-    ]
-
-    deep_gp = DeepGP(pipe_space, neural_network_args={})
-
-    deep_gp.fit(x_train=configs, learning_curves=lcs, y_train=y)
-
-    means, stds = deep_gp.predict(configs, lcs)
-
-    print(list(zip(means, y)))
-    print(stds)
diff --git a/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py b/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py
index 14f5cda5..a359b937 100644
--- a/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py
+++ b/neps/optimizers/bayesian_optimization/models/gp_hierarchy.py
@@ -15,6 +15,10 @@
 from ..kernels.vectorial_kernels import Stationary
 from ..kernels.weisfilerlehman import WeisfilerLehman
 
+import logging
+
+logger = logging.getLogger(__name__)
+
 
 # Code for psd_safe_cholesky from gypytorch
 class _value_context:
@@ -69,20 +73,12 @@ def _set_value(cls, float_value, double_value, half_value):
         if half_value is not None:
             cls._global_half_value = half_value
 
-    def __init__(
-        self, float=None, double=None, half=None
-    ):
-        self._orig_float_value = (
-            self.__class__.value()
-        )
+    def __init__(self, float=None, double=None, half=None):
+        self._orig_float_value = self.__class__.value()
         self._instance_float_value = float
-        self._orig_double_value = (
-            self.__class__.value()
-        )
+        self._orig_double_value = self.__class__.value()
         self._instance_double_value = double
-        self._orig_half_value = (
-            self.__class__.value()
-        )
+        self._orig_half_value = self.__class__.value()
         self._instance_half_value = half
 
     def __enter__(
@@ -459,7 +455,6 @@ def _optimize_graph_kernels(self, h_: int, lengthscale_):
                             K, self.likelihood, self.gpytorch_kinv
                         )
                         nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-                        # print(i, nlml)
                         if nlml < best_nlml:
                             best_nlml = nlml
                             best_subtree_depth = h_i
@@ -468,9 +463,7 @@ def _optimize_graph_kernels(self, h_: int, lengthscale_):
                     self.combined_kernel.kernels[0].change_kernel_params(
                         {"h": best_subtree_depth}
                     )
-                    self.combined_kernel._gram = (
-                        best_K
-                    )
+                    self.combined_kernel._gram = best_K
 
     def fit(self, train_x: Iterable, train_y: Union[Iterable, torch.Tensor]):
         self._fit(train_x, train_y, **self.surrogate_model_fit_args)
@@ -483,7 +476,8 @@ def _fit(
         optimizer: str = "adam",
         wl_subtree_candidates: tuple = tuple(range(5)),
         wl_lengthscales: tuple = tuple(
-            np.e**i for i in range(-2, 3)  # type: ignore[name-defined]
+            np.e**i
+            for i in range(-2, 3)  # type: ignore[name-defined]
         ),
         optimize_lik: bool = True,
         max_lik: float = 0.5,
@@ -589,7 +583,7 @@ def _fit(
                 nlml = -compute_log_marginal_likelihood(K_i, logDetK, self.y)
                 nlml.backward(create_graph=True)
                 if self.verbose and i % 10 == 0:
-                    print(
+                    logger.info(
                         "Iteration:",
                         i,
                         "/",
@@ -646,20 +640,6 @@ def _fit(
                 k.update_hyperparameters(lengthscale=torch.exp(theta_vector))
 
         self.combined_kernel.weights = weights.clone()
-        if self.verbose:
-            print("Optimisation summary: ")
-            print("Optimal NLML: ", nlml)
-            print("Lengthscales: ", torch.exp(theta_vector))
-            try:
-                print(
-                    "Optimal h: ",
-                    self.domain_kernels[0]._h,
-                )
-            except AttributeError:
-                pass
-            print("Weights: ", self.weights)
-            print("Lik:", self.likelihood)
-            print("Optimal layer weights", layer_weights)
 
     def predict(self, x_configs, preserve_comp_graph: bool = False):
         """Kriging predictions"""
@@ -993,17 +973,12 @@ def _grid_search_wl_kernel(
             k.change_se_params({"lengthscale": i[1]})
         k.change_kernel_params({"h": i[0]})
         K = k.fit_transform(train_x, rebuild_model=True, save_gram_matrix=True)
-        # print(K)
         K_i, logDetK = compute_pd_inverse(K, lik, gpytorch_kinv)
-        # print(train_y)
         nlml = -compute_log_marginal_likelihood(K_i, logDetK, train_y)
-        # print(i, nlml)
         if nlml < best_nlml:
             best_nlml = nlml
             best_subtree_depth, best_lengthscale = i
             best_K = torch.clone(K)
-    # print("h: ", best_subtree_depth, "theta: ", best_lengthscale)
-    # print(best_subtree_depth)
     k.change_kernel_params({"h": best_subtree_depth})
     if k.se is not None:
         k.change_se_params({"lengthscale": best_lengthscale})
diff --git a/neps/optimizers/bayesian_optimization/optimizer.py b/neps/optimizers/bayesian_optimization/optimizer.py
index 9c9ab5fe..9fc3aeae 100644
--- a/neps/optimizers/bayesian_optimization/optimizer.py
+++ b/neps/optimizers/bayesian_optimization/optimizer.py
@@ -2,7 +2,9 @@
 
 import random
 from typing import Any, TYPE_CHECKING, Literal
+from typing_extensions import override
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.utils.common import instance_from_map
 from neps.search_spaces import (
@@ -228,10 +230,13 @@ def is_init_phase(self) -> bool:
             return False
         return True
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         train_x = [el.config for el in previous_results.values()]
         train_y = [self.get_loss(el.result) for el in previous_results.values()]
diff --git a/neps/optimizers/grid_search/optimizer.py b/neps/optimizers/grid_search/optimizer.py
index fc082df1..4f5ff24e 100644
--- a/neps/optimizers/grid_search/optimizer.py
+++ b/neps/optimizers/grid_search/optimizer.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
 import random
+from typing import Any
+from typing_extensions import override
 
+from neps.state.optimizer import BudgetInfo
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.base_optimizer import BaseOptimizer
@@ -19,10 +22,13 @@ def __init__(
         )
         random.shuffle(self.configs_list)
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         self._num_previous_configs = len(previous_results) + len(pending_evaluations)
 
diff --git a/neps/optimizers/multi_fidelity/_dyhpo.py b/neps/optimizers/multi_fidelity/_dyhpo.py
index e61e9d33..da3e36bf 100644
--- a/neps/optimizers/multi_fidelity/_dyhpo.py
+++ b/neps/optimizers/multi_fidelity/_dyhpo.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 from typing import Any, List, Union
+from typing_extensions import override
 
 import numpy as np
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace
 from neps.optimizers.base_optimizer import BaseOptimizer
@@ -206,10 +208,13 @@ def is_init_phase(self) -> bool:
     def num_train_configs(self):
         return len(self.observed_configs.completed_runs)
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         """This is basically the fit method.
 
@@ -386,7 +391,6 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, Union[str, None]]:
                 # if the returned config already observed,
                 # set the fidelity to the next budget level if not max already
                 # else set the fidelity to the minimum budget level
-                # print(config_condition)
             else:
                 config = self.pipeline_space.sample(
                     patience=self.patience, user_priors=True, ignore_fidelity=False
@@ -402,5 +406,4 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, Union[str, None]]:
                 else 0
             )
         config_id = f"{_config_id}_{self.get_budget_level(config)}"
-        # print(self.observed_configs)
         return config.hp_values(), config_id, None
diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py
index 7176c2d5..59804637 100755
--- a/neps/optimizers/multi_fidelity/dyhpo.py
+++ b/neps/optimizers/multi_fidelity/dyhpo.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 from typing import Any
+from typing_extensions import override
 
 import numpy as np
 
+from neps.state.optimizer import BudgetInfo
 from neps.utils.types import ConfigResult, RawConfig
 from neps.utils.common import instance_from_map
 from neps.search_spaces.search_space import FloatParameter, IntegerParameter, SearchSpace
@@ -282,10 +284,13 @@ def is_init_phase(self, budget_based: bool = True) -> bool:
     def num_train_configs(self):
         return len(self.observed_configs.completed_runs)
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         """This is basically the fit method.
 
diff --git a/neps/optimizers/multi_fidelity/hyperband.py b/neps/optimizers/multi_fidelity/hyperband.py
index 8823e21a..dde96c56 100644
--- a/neps/optimizers/multi_fidelity/hyperband.py
+++ b/neps/optimizers/multi_fidelity/hyperband.py
@@ -5,8 +5,9 @@
 from typing import Any
 
 import numpy as np
-from typing_extensions import Literal
+from typing_extensions import Literal, override
 
+from neps.state.optimizer import BudgetInfo
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
@@ -123,12 +124,20 @@ def _handle_promotions(self):
         # overloaded function disables the need for retrieving promotions for HB overall
         return
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
-        super().load_results(previous_results, pending_evaluations)
+        super().load_optimization_state(
+            previous_results=previous_results,
+            pending_evaluations=pending_evaluations,
+            budget_info=budget_info,
+            optimizer_state=optimizer_state
+        )
         # important for the global HB to run the right SH
         self._update_sh_bracket_state()
 
diff --git a/neps/optimizers/multi_fidelity/successive_halving.py b/neps/optimizers/multi_fidelity/successive_halving.py
index 16c93fb0..a936b0a2 100644
--- a/neps/optimizers/multi_fidelity/successive_halving.py
+++ b/neps/optimizers/multi_fidelity/successive_halving.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from typing_extensions import Literal
+from typing_extensions import Literal, override
 
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces import (
@@ -16,7 +16,7 @@
     ConstantParameter,
     FloatParameter,
     IntegerParameter,
-    SearchSpace
+    SearchSpace,
 )
 from neps.optimizers.base_optimizer import BaseOptimizer
 from neps.optimizers.multi_fidelity.promotion_policy import (
@@ -31,7 +31,9 @@
 CUSTOM_FLOAT_CONFIDENCE_SCORES = dict(FloatParameter.DEFAULT_CONFIDENCE_SCORES)
 CUSTOM_FLOAT_CONFIDENCE_SCORES.update({"ultra": 0.05})
 
-CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(CategoricalParameter.DEFAULT_CONFIDENCE_SCORES)
+CUSTOM_CATEGORICAL_CONFIDENCE_SCORES = dict(
+    CategoricalParameter.DEFAULT_CONFIDENCE_SCORES
+)
 CUSTOM_CATEGORICAL_CONFIDENCE_SCORES.update({"ultra": 8})
 
 
@@ -317,18 +319,15 @@ def _fit_models(self):
         # if adding model-based search to the basic multi-fidelity algorithm
         return
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
-        """This is basically the fit method.
-
-        Args:
-            previous_results (dict[str, ConfigResult]): [description]
-            pending_evaluations (dict[str, ConfigResult]): [description]
-        """
-
+        """This is basically the fit method."""
         self.rung_histories = {
             rung: {"config": [], "perf": []}
             for rung in range(self.min_rung, self.max_rung + 1)
diff --git a/neps/optimizers/multi_fidelity_prior/async_priorband.py b/neps/optimizers/multi_fidelity_prior/async_priorband.py
index 5ab55139..40f6cb29 100644
--- a/neps/optimizers/multi_fidelity_prior/async_priorband.py
+++ b/neps/optimizers/multi_fidelity_prior/async_priorband.py
@@ -3,8 +3,9 @@
 import typing
 
 import numpy as np
-from typing_extensions import Literal
+from typing_extensions import Literal, override
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.bayesian_optimization.acquisition_functions.base_acquisition import (
@@ -245,12 +246,20 @@ def _update_sh_bracket_state(self) -> None:
             bracket.observed_configs = self.observed_configs.copy()
             bracket.rung_histories = self.rung_histories
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, typing.Any],
     ) -> None:
-        super().load_results(previous_results, pending_evaluations)
+        super().load_optimization_state(
+            previous_results=previous_results,
+            pending_evaluations=pending_evaluations,
+            budget_info=budget_info,
+            optimizer_state=optimizer_state
+        )
         # important for the global HB to run the right SH
         self._update_sh_bracket_state()
 
diff --git a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py
index f0bd46df..845552ea 100644
--- a/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py
+++ b/neps/optimizers/multiple_knowledge_sources/prototype_optimizer.py
@@ -2,7 +2,9 @@
 
 import logging
 from typing import Any
+from typing_extensions import override
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import SearchSpace
 from neps.utils.data_loading import read_tasks_and_dev_stages_from_disk
@@ -43,10 +45,13 @@ def calculate_defaults(self):
             hp_values, delete_previous_defaults=True, delete_previous_values=True
         )
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         self._num_previous_configs = len(previous_results) + len(pending_evaluations)
 
diff --git a/neps/optimizers/random_search/optimizer.py b/neps/optimizers/random_search/optimizer.py
index fbd5b7be..5aeaff33 100644
--- a/neps/optimizers/random_search/optimizer.py
+++ b/neps/optimizers/random_search/optimizer.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
+from typing import Any
+from typing_extensions import override
 
+from neps.state.optimizer import BudgetInfo, OptimizationState
 from neps.utils.types import ConfigResult, RawConfig
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.base_optimizer import BaseOptimizer
@@ -12,10 +15,13 @@ def __init__(self, use_priors=False, ignore_fidelity=True, **optimizer_kwargs):
         self.use_priors = use_priors
         self.ignore_fidelity = ignore_fidelity
 
-    def load_results(
+    @override
+    def load_optimization_state(
         self,
         previous_results: dict[str, ConfigResult],
         pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
     ) -> None:
         self._num_previous_configs = len(previous_results) + len(pending_evaluations)
 
diff --git a/neps/optimizers/regularized_evolution/optimizer.py b/neps/optimizers/regularized_evolution/optimizer.py
index 2117a226..0860ba1c 100644
--- a/neps/optimizers/regularized_evolution/optimizer.py
+++ b/neps/optimizers/regularized_evolution/optimizer.py
@@ -4,12 +4,14 @@
 import os
 import random
 from pathlib import Path
-from typing import Callable
+from typing import Any, Callable
+from typing_extensions import override
 
 import numpy as np
 import yaml
 
-from neps.utils.types import RawConfig
+from neps.state.optimizer import BudgetInfo, OptimizationState
+from neps.utils.types import ConfigResult, RawConfig
 
 from neps.search_spaces.search_space import SearchSpace
 from neps.optimizers.base_optimizer import BaseOptimizer
@@ -52,7 +54,14 @@ def __init__(
             self.assisted_init_population_dir = Path(assisted_init_population_dir)
             self.assisted_init_population_dir.mkdir(exist_ok=True)
 
-    def load_results(self, previous_results: dict, pending_evaluations: dict) -> None:
+    @override
+    def load_optimization_state(
+        self,
+        previous_results: dict[str, ConfigResult],
+        pending_evaluations: dict[str, SearchSpace],
+        budget_info: BudgetInfo | None,
+        optimizer_state: dict[str, Any],
+    ) -> None:
         train_x = [el.config for el in previous_results.values()]
         train_y = [self.get_loss(el.result) for el in previous_results.values()]
         self.num_train_x = len(train_x)
@@ -68,7 +77,6 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
         if len(self.population) < self.population_size:
             if self.assisted:
                 if 0 == len(os.listdir(self.assisted_init_population_dir)):
-                    print("Generate initial design with assistance")
                     cur_population_size = self.population_size - len(self.population)
                     configs = [
                         self.pipeline_space.sample(
@@ -93,7 +101,6 @@ def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
                             encoding="utf-8",
                         ) as f:
                             yaml.dump(configs[config_idx].serialize(), f)
-                print("Pick config from pre-computed population")
                 config_yaml = sorted(os.listdir(self.assisted_init_population_dir))[0]
                 with open(
                     self.assisted_init_population_dir / config_yaml, encoding="utf-8"
diff --git a/neps/plot/tensorboard_eval.py b/neps/plot/tensorboard_eval.py
index 9259952c..e77329b4 100644
--- a/neps/plot/tensorboard_eval.py
+++ b/neps/plot/tensorboard_eval.py
@@ -12,7 +12,7 @@
 from torch.utils.tensorboard.summary import hparams
 from torch.utils.tensorboard.writer import SummaryWriter
 
-from neps.runtime import get_in_progress_trial
+from neps.runtime import get_in_progress_trial, get_workers_neps_state
 from neps.status.status import get_summary_dict
 from neps.utils.common import get_initial_directory
 
@@ -85,13 +85,19 @@ def _initiate_internal_configurations() -> None:
         operating on.
         """
         trial = get_in_progress_trial()
-        assert trial is not None
+        neps_state = get_workers_neps_state()
 
-        # TODO(eddiebergman): We could just save the instance of the trial
-        # on this object, OR even just use `get_in_process_trial()` in each call directly.
-        tblogger.config_working_directory = trial.pipeline_dir
-        tblogger.config_previous_directory = trial.disk.previous_pipeline_dir
-        tblogger.optimizer_dir = trial.disk.optimization_dir.parent
+        # We are assuming that neps state is all filebased here
+        root_dir = Path(neps_state.location)
+        assert root_dir.exists()
+
+        tblogger.config_working_directory = Path(trial.metadata.location)
+        tblogger.config_previous_directory = (
+            Path(trial.metadata.previous_trial_location)
+            if trial.metadata.previous_trial_location is not None
+            else None
+        )
+        tblogger.optimizer_dir = root_dir
         tblogger.config = trial.config
 
     @staticmethod
diff --git a/neps/runtime.py b/neps/runtime.py
index 6d76ef28..c9988f70 100644
--- a/neps/runtime.py
+++ b/neps/runtime.py
@@ -1,1042 +1,534 @@
-"""Module for the runtime of a single instance of NePS running.
-
-An important advantage of NePS with a running instance per worker and no
-multiprocessing is that we can reliably use globals to store information such
-as the currently running configuration, without interfering with other
-workers which have launched.
-
-This allows us to have a global `Trial` object which can be accessed
-using `import neps.runtime; neps.get_in_progress_trial()`.
-
----
-
-This module primarily handles the worker loop where important concepts are:
-* **State**: The state of optimization is all of the configurations, their results and
- the current state of the optimizer.
-* **Shared State**: Whenever a worker wishes to read or write any state, they will _lock_
-the shared state, declaring themselves as operating on it. At this point, no other worker
-can access the shared state.
-* **Optimizer Hydration**: This is the process through which an optimizer instance is
-_hydrated_ with the Shared State so it can make a decision, i.e. for sampling.
-Equally we _serialize_ the optimizer when writing it back to Shared State
-* **Trial Lock**: When evaluating a configuration, a worker must _lock_ it to declared
-itself as evaluating it. This communicates to other workers that this configuration is
-in progress.
-
-### Loop
-We mark lines with `+` as the worker having locked the Shared State and `~` as the worker
-having locked the Trial. The trial lock `~` is allowed to fail, in which case all steps
-with a `~` are skipped and the loop continues.
-
-1. + Check exit conditions
-2. + Hydrate the optimizer
-3. + Sample a new Trial
-3. Unlock the Shared State
-4. ~ Obtain a Trial Lock
-5. ~ Set the global trial for this work to the current trial
-6. ~ Evaluate the trial
-7. ~+ Lock the shared state
-8. ~+ Write the results of the config to disk
-9. ~+ Update the optimizer if required (used budget for evaluating trial)
-10. ~ Unlock the shared state
-11. Unlock Trial Lock
-"""
+"""TODO."""
 
 from __future__ import annotations
 
-import inspect
+import datetime
 import logging
 import os
 import shutil
 import time
-import traceback
-import warnings
 from contextlib import contextmanager
-from dataclasses import dataclass, field
-from enum import Enum
+from dataclasses import dataclass
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
+    Generic,
     Iterable,
     Iterator,
+    Literal,
     Mapping,
-    Union,
+    TypeVar,
 )
-from typing_extensions import Self, TypeAlias
-
-import numpy as np
-
-from neps.utils._locker import Locker
-from neps.utils._rng import SeedState
-from neps.utils.files import deserialize, empty_file, serialize
-from neps.utils.types import (
-    ERROR,
-    ConfigID,
-    ConfigResult,
-    RawConfig,
+
+from neps.exceptions import (
+    NePSError,
+    VersionMismatchError,
+    WorkerFailedToGetPendingTrialsError,
 )
+from neps.state._eval import evaluate_trial
+from neps.state.filebased import create_or_load_filebased_neps_state
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
+from neps.state.trial import Trial
 
 if TYPE_CHECKING:
     from neps.optimizers.base_optimizer import BaseOptimizer
-    from neps.search_spaces.search_space import SearchSpace
+    from neps.state.neps_state import NePSState
 
 logger = logging.getLogger(__name__)
 
-# Wait time between each successive poll to see if state can be grabbed
-DEFAULT_STATE_POLL: float = 0.1
-ENVIRON_STATE_POLL_KEY = "NEPS_STATE_POLL"
 
-# Timeout before giving up on trying to grab the state, raising an error
-DEFAULT_STATE_TIMEOUT: float | None = None
-ENVIRON_STATE_TIMEOUT_KEY = "NEPS_STATE_TIMEOUT"
+def _default_worker_name() -> str:
+    isoformat = datetime.datetime.now(datetime.timezone.utc).isoformat()
+    return f"{os.getpid()}-{isoformat}"
 
 
-# TODO(eddiebergman): We should not do this...
-warnings.simplefilter("always", DeprecationWarning)
+N_FAILED_GET_NEXT_PENDING_ATTEMPTS_BEFORE_ERROR = 10
+N_FAILED_TO_SET_TRIAL_STATE = 10
 
+Loc = TypeVar("Loc")
 
-# NOTE: As each NEPS process is only ever evaluating a single trial,
-# this global can be retrieved in NePS and refers to what this process
-# is currently evaluating.
-_CURRENTLY_RUNNING_TRIAL_IN_PROCESS: Trial | None = None
 
+# NOTE: As each NEPS process is only ever evaluating a single trial, this global can
+# be retrieved in NePS and refers to what this process is currently evaluating.
+# Note that before `_set_in_progress_trial` is called, this should be cleared
+# with `_clear_in_progress_trial` to ensure that we are not in some erroneuous state.
+# Prefer to call `_clear_in_progress_trial` after a trial has finished evaluating and
+# not just before `_set_in_progress_trial`, as the latter defeats the purpose of this
+# assertion.
+_CURRENTLY_RUNNING_TRIAL_IN_PROCESS: Trial | None = None
+_WORKER_NEPS_STATE: NePSState | None = None
 
-def get_in_progress_trial() -> Trial | None:
-    """Get the currently running trial in this process."""
-    return _CURRENTLY_RUNNING_TRIAL_IN_PROCESS
 
+# TODO: This only works with a filebased nepsstate
+def get_workers_neps_state() -> NePSState[Path]:
+    """Get the worker's NePS state."""
+    if _WORKER_NEPS_STATE is None:
+        raise RuntimeError(
+            "The worker's NePS state has not been set! This should only be called"
+            " from within a `run_pipeline` context. If you are not running a pipeline"
+            " and you did not call this function (`get_workers_neps_state`) yourself,"
+            " this is a bug and should be reported to NePS."
+        )
+    return _WORKER_NEPS_STATE
 
-def _set_in_progress_trial(trial: Trial | None) -> None:
-    global _CURRENTLY_RUNNING_TRIAL_IN_PROCESS  # noqa: PLW0603
-    _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = trial
 
+def _set_workers_neps_state(state: NePSState[Path]) -> None:
+    global _WORKER_NEPS_STATE  # noqa: PLW0603
+    _WORKER_NEPS_STATE = state
 
-def get_shared_state_poll_and_timeout() -> tuple[float, float | None]:
-    """Get the poll and timeout for the shared state."""
-    poll = float(os.environ.get(ENVIRON_STATE_POLL_KEY, DEFAULT_STATE_POLL))
-    timeout = os.environ.get(ENVIRON_STATE_TIMEOUT_KEY, DEFAULT_STATE_TIMEOUT)
-    timeout = float(timeout) if timeout is not None else None
-    return poll, timeout
 
+def get_in_progress_trial() -> Trial:
+    """Get the currently running trial in this process."""
+    if _CURRENTLY_RUNNING_TRIAL_IN_PROCESS is None:
+        raise RuntimeError(
+            "The worker's NePS state has not been set! This should only be called"
+            " from within a `run_pipeline` context. If you are not running a pipeline"
+            " and you did not call this function (`get_workers_neps_state`) yourself,"
+            " this is a bug and should be reported to NePS."
+        )
+    return _CURRENTLY_RUNNING_TRIAL_IN_PROCESS
 
-@dataclass
-class SuccessReport:
-    """A successful report of the evaluation of a configuration."""
 
-    loss: float
-    cost: float | None
-    account_for_cost: bool
-    results: Mapping[str, Any]
+@contextmanager
+def _set_global_trial(trial: Trial) -> Iterator[None]:
+    global _CURRENTLY_RUNNING_TRIAL_IN_PROCESS  # noqa: PLW0603
+    if _CURRENTLY_RUNNING_TRIAL_IN_PROCESS is not None:
+        raise NePSError(
+            "A trial was already set to run in this process, yet some other trial was"
+            " attempted to be set as the global trial in progress."
+            " We assume that each process will only ever have one trial running at a time"
+            " to allow functionality like `neps.get_in_progress_trial()`,"
+            " `load_checkpoint()` and `save_checkpoint()` to work."
+            "\n\nThis is most likely a bug and should be reported to NePS!"
+        )
+    _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = trial
+    yield
+    _CURRENTLY_RUNNING_TRIAL_IN_PROCESS = None
 
 
+# NOTE: This class is quite stateful and has been split up quite a bit to make testing
+# interleaving of workers easier. This comes at the cost of more fragmented code.
 @dataclass
-class ErrorReport:
-    """A failed report of the evaluation of a configuration."""
-
-    err: Exception
-    tb: str | None
-    loss: float | None
-    cost: float | None
-    account_for_cost: bool
-    results: Mapping[str, Any]
+class DefaultWorker(Generic[Loc]):
+    """A default worker for the NePS system.
 
-
-Report: TypeAlias = Union[SuccessReport, ErrorReport]
-
-
-@dataclass
-class Trial:
-    """A trial is a configuration and it's associated data.
-
-    The object is considered mutable and the global trial currently being
-    evaluated can be access using `get_in_progress_trial()`.
-
-    Attributes:
-        id: Unique identifier for the configuration
-        config: The configuration to evaluate
-        pipeline_dir: Directory where the configuration is evaluated
-        previous: The previous trial before this trial.
-        time_sampled: The time the configuration was sampled
-        metadata: Additional metadata about the configuration
+    This is the worker that is used by default in the neps.run() loop.
     """
 
-    id: ConfigID
-    config: Mapping[str, Any]
-    pipeline_dir: Path
-    previous: Trial | None
-    report: Report | None
-    time_sampled: float
-    metadata: dict[str, Any]
-    _lock: Locker = field(init=False)
-    disk: Trial.Disk = field(init=False)
-
-    def to_config_result(
-        self,
-        config_to_search_space: Callable[[RawConfig], SearchSpace],
-    ) -> ConfigResult:
-        """Convert the report to a `ConfigResult` object."""
-        result: ERROR | Mapping[str, Any] = (
-            "error"
-            if self.report is None or isinstance(self.report, ErrorReport)
-            else self.report.results
-        )
-        return ConfigResult(
-            self.id,
-            config=config_to_search_space(self.config),
-            result=result,
-            metadata=self.metadata,
-        )
+    state: NePSState
+    """The state of the NePS system."""
 
-    class State(Enum):
-        """The state of a trial."""
+    settings: WorkerSettings
+    """The settings for the worker."""
 
-        PENDING = "pending"
-        IN_PROGRESS = "in_progress"
-        SUCCESS = "success"
-        ERROR = "error"
-        CORRUPTED = "corrupted"
+    evaluation_fn: Callable[..., float | Mapping[str, Any]]
+    """The evaluation function to use for the worker."""
 
-    def __post_init__(self) -> None:
-        if "time_sampled" not in self.metadata:
-            self.metadata["time_sampled"] = self.time_sampled
-        self.pipeline_dir.mkdir(exist_ok=True, parents=True)
-        self._lock = Locker(self.pipeline_dir / ".config_lock")
-        self.disk = Trial.Disk(pipeline_dir=self.pipeline_dir)
+    optimizer: BaseOptimizer
+    """The optimizer that is in use by the worker."""
 
-    @property
-    def config_file(self) -> Path:
-        """The path to the configuration file."""
-        return self.pipeline_dir / "config.yaml"
+    worker_id: str
+    """The id of the worker."""
 
-    @property
-    def metadata_file(self) -> Path:
-        """The path to the metadata file."""
-        return self.pipeline_dir / "metadata.yaml"
+    _pre_sample_hooks: list[Callable[[BaseOptimizer], BaseOptimizer]] | None = None
+    """Hooks to run before sampling a new trial."""
 
-    @classmethod
-    def from_dir(cls, pipeline_dir: Path, *, previous: Trial | None = None) -> Self:
-        """Create a `Trial` object from a directory.
+    worker_cumulative_eval_count: int = 0
+    """The number of evaluations done by this worker."""
 
-        Args:
-            pipeline_dir: The directory where the trial is stored
-            previous: The previous trial before this trial.
-                You can use this to prevent loading the previous trial from disk,
-                if it exists, i.e. a caching shortcut.
+    worker_cumulative_eval_cost: float = 0.0
+    """The cost of the evaluations done by this worker."""
 
-        Returns:
-            The trial object.
-        """
-        return cls.from_disk(
-            Trial.Disk.from_dir(pipeline_dir),
-            previous=previous,
-        )
+    worker_cumulative_evaluation_time_seconds: float = 0.0
+    """The time spent evaluating configurations by this worker."""
 
     @classmethod
-    def from_disk(cls, disk: Trial.Disk, *, previous: Trial | None = None) -> Self:
-        """Create a `Trial` object from a disk.
-
-        Args:
-            disk: The disk information of the trial.
-            previous: The previous trial before this trial.
-                You can use this to prevent loading the previous trial from disk,
-                if it exists, i.e. a caching shortcut.
-
-        Returns:
-            The trial object.
-        """
-        try:
-            config = deserialize(disk.config_file)
-        except Exception as e:
-            logger.error(
-                f"Error loading config from {disk.config_file}: {e}",
-                exc_info=True,
-            )
-            config = {}
-
-        try:
-            metadata = deserialize(disk.metadata_file)
-            time_sampled = metadata["time_sampled"]
-        except Exception as e:
-            logger.error(
-                f"Error loading metadata from {disk.metadata_file}: {e}",
-                exc_info=True,
-            )
-            metadata = {}
-            time_sampled = float("nan")
-
-        try:
-            result: dict[str, Any] | tuple[Exception, str | None] | None
-            report: Report | None
-            if not empty_file(disk.result_file):
-                result = deserialize(disk.result_file)
-
-                assert isinstance(result, dict)
-                report = SuccessReport(
-                    loss=result["loss"],
-                    cost=result.get("cost", None),
-                    account_for_cost=result.get("account_for_cost", True),
-                    results=result,
-                )
-            elif not empty_file(disk.error_file):
-                error_tb = deserialize(disk.error_file)
-                result = deserialize(disk.result_file)
-                report = ErrorReport(
-                    # NOTE: Not sure we can easily get the original exception type,
-                    # once serialized
-                    err=Exception(error_tb["err"]),
-                    tb=error_tb.get("tb"),
-                    loss=result.get("loss", None),
-                    cost=result.get("cost", None),
-                    account_for_cost=result.get("account_for_cost", True),
-                    results=result,
-                )
-            else:
-                report = None
-        except Exception as e:
-            logger.error(
-                f"Error loading result from {disk.result_file}: {e}",
-                exc_info=True,
-            )
-            report = None
-
-        try:
-            if previous is None and disk.previous_pipeline_dir is not None:
-                previous = Trial.from_dir(disk.previous_pipeline_dir)
-        except Exception as e:
-            logger.error(
-                f"Error loading previous from {disk.previous_pipeline_dir}: {e}",
-                exc_info=True,
-            )
-            previous = None
-
-        return cls(
-            id=disk.config_id,
-            config=config,
-            pipeline_dir=disk.pipeline_dir,
-            report=report,
-            previous=previous,
-            time_sampled=time_sampled,
-            metadata=metadata,
+    def new(
+        cls,
+        *,
+        state: NePSState,
+        optimizer: BaseOptimizer,
+        settings: WorkerSettings,
+        evaluation_fn: Callable[..., float | Mapping[str, Any]],
+        _pre_sample_hooks: list[Callable[[BaseOptimizer], BaseOptimizer]] | None = None,
+        worker_id: str | None = None,
+    ) -> DefaultWorker:
+        """Create a new worker."""
+        return DefaultWorker(
+            state=state,
+            optimizer=optimizer,
+            settings=settings,
+            evaluation_fn=evaluation_fn,
+            worker_id=worker_id if worker_id is not None else _default_worker_name(),
+            _pre_sample_hooks=_pre_sample_hooks,
         )
 
-    @property
-    def previous_config_id_file(self) -> Path:
-        """The path to the previous configuration id file."""
-        return self.pipeline_dir / "previous_config.id"
-
-    def create_error_report(self, err: Exception, tb: str | None = None) -> ErrorReport:
-        """Create a [`Report`][neps.runtime.Report] object with an error."""
-        # TODO(eddiebergman): For now we assume the loss and cost for an error is None
-        # and that we don't account for cost and there are no possible results.
-        return ErrorReport(
-            loss=None,
-            cost=None,
-            account_for_cost=False,
-            results={},
-            err=err,
-            tb=tb,
-        )
+    def _get_next_trial_from_state(self) -> Trial:
+        nxt_trial = self.state.get_next_pending_trial()
 
-    def create_success_report(self, result: float | Mapping[str, Any]) -> SuccessReport:
-        """Check if the trial has succeeded."""
-        _result: dict[str, Any] = {}
-        if isinstance(result, Mapping):
-            if "loss" not in result:
-                raise KeyError("The 'loss' should be provided in the evaluation result")
+        # If we have a trial, we will use it
+        if nxt_trial is not None:
+            logger.info(
+                f"Worker '{self.worker_id}' got previosly sampled trial: {nxt_trial}"
+            )
 
-            _result = dict(result)
-            loss = _result["loss"]
+        # Otherwise sample a new one
         else:
-            loss = result
-
-        try:
-            _result["loss"] = float(loss)
-        except (TypeError, ValueError) as e:
-            raise ValueError(
-                "The evaluation result should be a dictionnary or a float but got"
-                f" a `{type(loss)}` with value of {loss}",
-            ) from e
-
-        # TODO(eddiebergman): For now we have no access to the cost for crash
-        # so we just set it to None.
-        _cost: float | None = _result.get("cost", None)
-        if _cost is not None:
-            try:
-                _result["cost"] = float(_cost)
-            except (TypeError, ValueError) as e:
-                raise ValueError(
-                    "The evaluation result should be a dictionnary or a float but got"
-                    f" a `{type(_cost)}` with value of {_cost}",
-                ) from e
-
-        # TODO(eddiebergman): Should probably be a global user setting for this.
-        _account_for_cost = _result.get("account_for_cost", True)
-
-        return SuccessReport(
-            loss=_result["loss"],
-            cost=_cost,
-            account_for_cost=_account_for_cost,
-            results=_result,
-        )
-
-    @dataclass
-    class Disk:
-        """The disk information of a trial.
-
-        Attributes:
-            pipeline_dir: The directory where the trial is stored
-            id: The unique identifier of the trial
-            config_file: The path to the configuration file
-            result_file: The path to the result file
-            metadata_file: The path to the metadata file
-            optimization_dir: The directory from which optimization is running
-            previous_config_id_file: The path to the previous config id file
-            previous_pipeline_dir: The directory of the previous configuration
-            lock: The lock for the trial. Obtaining this lock indicates the worker
-                is evaluating this trial.
-        """
-
-        pipeline_dir: Path
-
-        config_id: ConfigID = field(init=False)
-        config_file: Path = field(init=False)
-        result_file: Path = field(init=False)
-        error_file: Path = field(init=False)
-        metadata_file: Path = field(init=False)
-        optimization_dir: Path = field(init=False)
-        previous_config_id_file: Path = field(init=False)
-        previous_config_id: ConfigID | None = field(init=False)
-        previous_pipeline_dir: Path | None = field(init=False)
-        lock: Locker = field(init=False)
-
-        def __post_init__(self) -> None:
-            self.config_id = self.pipeline_dir.name[len("config_") :]
-            self.config_file = self.pipeline_dir / "config.yaml"
-            self.result_file = self.pipeline_dir / "result.yaml"
-            self.error_file = self.pipeline_dir / "error.yaml"
-            self.metadata_file = self.pipeline_dir / "metadata.yaml"
-
-            # NOTE: This is a bit of an assumption!
-            self.optimization_dir = self.pipeline_dir.parent
-
-            self.previous_config_id_file = self.pipeline_dir / "previous_config.id"
-            if not empty_file(self.previous_config_id_file):
-                with self.previous_config_id_file.open("r") as f:
-                    self.previous_config_id = f.read().strip()
-
-                self.previous_pipeline_dir = (
-                    self.pipeline_dir.parent / f"config_{self.previous_config_id}"
-                )
-            else:
-                self.previous_pipeline_dir = None
-                self.previous_config_id = None
-
-            self.pipeline_dir.mkdir(exist_ok=True, parents=True)
-            self.lock = Locker(self.pipeline_dir / ".config_lock")
-
-        def raw_config(self) -> dict[str, Any]:
-            """Deserialize the configuration from disk."""
-            return deserialize(self.config_file)
-
-        def state(self) -> Trial.State:  # noqa: PLR0911
-            """The state of the trial."""
-            result_file_exists = not empty_file(self.result_file)
-            error_file_exists = not empty_file(self.error_file)
-            config_file_exists = not empty_file(self.config_file)
-
-            # NOTE: We don't handle the case where it's locked and there is a result
-            # or error file existing, namely as this might introduce a race condition,
-            # where the result/error is being written to while the lock still exists.
-
-            if error_file_exists:
-                # Should not have a results file if there is an error file
-                if result_file_exists:
-                    return Trial.State.CORRUPTED
-
-                # Should have a config file if there is an error file
-                if not config_file_exists:
-                    return Trial.State.CORRUPTED
-
-                return Trial.State.ERROR
-
-            if result_file_exists:
-                # Should have a config file if there is a results file
-                if not config_file_exists:
-                    return Trial.State.CORRUPTED
-
-                return Trial.State.SUCCESS
-
-            if self.lock.is_locked():
-                # Should have a config to evaluate if it's locked
-                if not config_file_exists:
-                    return Trial.State.CORRUPTED
-
-                return Trial.State.IN_PROGRESS
-
-            return Trial.State.PENDING
-
-        @classmethod
-        def from_dir(cls, pipeline_dir: Path) -> Trial.Disk:
-            """Create a `Trial.Disk` object from a directory."""
-            return cls(pipeline_dir=pipeline_dir)
-
-
-@dataclass
-class StatePaths:
-    """The paths used for the state of the optimization process.
-
-    Most important method is [`config_dir`][neps.runtime.StatePaths.config_dir],
-    which gives the directory to use for a configuration.
-
-    Attributes:
-        root: The root directory of the optimization process.
-        create_dirs: Whether to create the directories if they do not exist.
-        optimizer_state_file: The path to the optimizer state file.
-        optimizer_info_file: The path to the optimizer info file.
-        seed_state_dir: The directory where the seed state is stored.
-        results_dir: The directory where results are stored.
-    """
-
-    root: Path
-    create_dirs: bool = False
-
-    optimizer_state_file: Path = field(init=False)
-    optimizer_info_file: Path = field(init=False)
-    seed_state_dir: Path = field(init=False)
-    results_dir: Path = field(init=False)
-
-    def __post_init__(self) -> None:
-        if self.create_dirs:
-            self.root.mkdir(parents=True, exist_ok=True)
-
-        self.results_dir = self.root / "results"
-
-        if self.create_dirs:
-            self.results_dir.mkdir(exist_ok=True)
-
-        self.optimizer_state_file = self.root / ".optimizer_state.yaml"
-        self.optimizer_info_file = self.root / ".optimizer_info.yaml"
-        self.seed_state_dir = self.root / ".seed_state"
-
-    def config_dir(self, config_id: ConfigID) -> Path:
-        """Get the directory for a configuration."""
-        return self.results_dir / f"config_{config_id}"
-
-
-@dataclass
-class SharedState:
-    """The shared state of the optimization process that workers communicate through.
-
-    Attributes:
-        base_dir: The base directory from which the optimization is running.
-        create_dirs: Whether to create the directories if they do not exist.
-        lock: The lock to signify that a worker is operating on the shared state.
-        optimizer_state_file: The path to the optimizers state.
-        optimizer_info_file: The path to the file containing information about the
-            optimizer's setup.
-        seed_state_dir: Directory where the seed state is stored.
-        results_dir: Directory where results for configurations are stored.
-    """
-
-    base_dir: Path
-    paths: StatePaths = field(init=False)
-    create_dirs: bool = False
-    lock: Locker = field(init=False)
-
-    trials: dict[ConfigID, tuple[Trial, Trial.State]] = field(default_factory=dict)
-    """Mapping from a configid to the trial and it's last known state, including if
-    it's been evaluated."""
-
-    def __post_init__(self) -> None:
-        self.paths = StatePaths(root=self.base_dir, create_dirs=self.create_dirs)
-        self.lock = Locker(self.base_dir / ".decision_lock")
+            nxt_trial = self.state.sample_trial(
+                worker_id=self.worker_id,
+                optimizer=self.optimizer,
+                _sample_hooks=self._pre_sample_hooks,
+            )
+            logger.info(f"Worker '{self.worker_id}' sampled a new trial: {nxt_trial}")
 
-    def trials_by_state(self) -> dict[Trial.State, list[Trial]]:
-        """Get the trials grouped by their state."""
-        _dict: dict[Trial.State, list[Trial]] = {state: [] for state in Trial.State}
-        for trial, state in self.trials.values():
-            _dict[state].append(trial)
-        return _dict
+        return nxt_trial
 
-    def check_optimizer_info_on_disk_matches(
+    def _check_if_should_stop(  # noqa: C901, PLR0912, PLR0911
         self,
-        optimizer_info: dict[str, Any],
         *,
-        excluded_keys: Iterable[str] = ("searcher_name",),
-    ) -> None:
-        """Sanity check that the provided info matches the one on disk (if any).
-
-        Args:
-            optimizer_info: The optimizer info to check.
-            excluded_keys: Any keys to exclude during the comparison.
-
-        Raises:
-            ValueError: If there is optimizer info on disk and it does not match the
-            provided info.
-        """
-        optimizer_info_copy = optimizer_info.copy()
-        loaded_info = deserialize(self.paths.optimizer_info_file)
-
-        for key in excluded_keys:
-            optimizer_info_copy.pop(key, None)
-            loaded_info.pop(key, None)
-
-        if optimizer_info_copy != loaded_info:
-            raise ValueError(
-                f"The sampler_info in the file {self.paths.optimizer_info_file} is not"
-                f" valid. Expected: {optimizer_info_copy}, Found: {loaded_info}",
+        time_monotonic_start: float,
+        error_from_this_worker: Exception | None,
+    ) -> str | Literal[False]:
+        # NOTE: Sorry this code is kind of ugly but it's pretty straightforward, just a
+        # lot of conditional checking and making sure to check cheaper conditions first.
+        # It would look a little nicer with a match statement but we've got to wait
+        # for python 3.10 for that.
+
+        # First check for stopping criterion for this worker in particular as it's
+        # cheaper and doesn't require anything from the state.
+        if error_from_this_worker and self.settings.on_error in (
+            OnErrorPossibilities.RAISE_WORKER_ERROR,
+            OnErrorPossibilities.RAISE_ANY_ERROR,
+            OnErrorPossibilities.STOP_WORKER_ERROR,
+            OnErrorPossibilities.STOP_ANY_ERROR,
+        ):
+            if self.settings.on_error in (
+                OnErrorPossibilities.RAISE_WORKER_ERROR,
+                OnErrorPossibilities.RAISE_ANY_ERROR,
+            ):
+                raise error_from_this_worker
+            return (
+                "Error occurred while evaluating a configuration with this worker and"
+                f" the worker is set to stop with {self.settings.on_error}."
             )
 
-    @contextmanager
-    def use_sampler(
-        self,
-        sampler: BaseOptimizer,
-        *,
-        serialize_seed: bool = True,
-    ) -> Iterator[BaseOptimizer]:
-        """Use the sampler with the shared state."""
-        if serialize_seed:
-            with SeedState.use(self.paths.seed_state_dir), sampler.using_state(
-                self.paths.optimizer_state_file
-            ):
-                yield sampler
-        else:
-            with sampler.using_state(self.paths.optimizer_state_file):
-                yield sampler
-
-    def update_from_disk(self) -> None:
-        """Update the shared state from disk."""
-        trial_dirs = (p for p in self.paths.results_dir.iterdir() if p.is_dir())
-        _disks = [Trial.Disk.from_dir(p) for p in trial_dirs]
-        _disk_lookup = {disk.config_id: disk for disk in _disks}
-
-        # NOTE: We sort all trials such that we process previous trials first, i.e.
-        # if trial_3 has trial_2 as previous, we process trial_2 first, which
-        # requires trial_1 to have been processed first.
-        def _depth(trial: Trial.Disk) -> int:
-            depth = 0
-            previous = trial.previous_config_id
-            while previous is not None:
-                depth += 1
-                previous_trial = _disk_lookup.get(previous)
-                if previous_trial is None:
-                    raise RuntimeError(
-                        "Previous trial not found on disk when processing a trial."
-                        " This should not happen as if a tria has a previous trial,"
-                        " then it should be present and evaluated on disk.",
-                    )
-                previous = previous_trial.previous_config_id
+        if (
+            self.settings.max_evaluations_for_worker is not None
+            and self.worker_cumulative_eval_count
+            >= self.settings.max_evaluations_for_worker
+        ):
+            return (
+                "Worker has reached the maximum number of evaluations it is allowed to do"
+                f" as given by `{self.settings.max_evaluations_for_worker=}`."
+                "\nTo allow more evaluations, increase this value or use a different"
+                " stopping criterion."
+            )
 
-            return depth
+        if (
+            self.settings.max_cost_for_worker is not None
+            and self.worker_cumulative_eval_cost >= self.settings.max_cost_for_worker
+        ):
+            return (
+                "Worker has reached the maximum cost it is allowed to spend"
+                f" which is given by `{self.settings.max_cost_for_worker=}`."
+                f" This worker has spend '{self.worker_cumulative_eval_cost}'."
+                "\n To allow more evaluations, increase this value or use a different"
+                " stopping criterion."
+            )
 
-        # This allows is to traverse linearly and used cached values of previous
-        # trial data loading, as done below.
-        _disks.sort(key=_depth)
+        if self.settings.max_wallclock_time_for_worker_seconds is not None and (
+            time.monotonic() - time_monotonic_start
+            >= self.settings.max_wallclock_time_for_worker_seconds
+        ):
+            return (
+                "Worker has reached the maximum wallclock time it is allowed to spend"
+                f", given by `{self.settings.max_wallclock_time_for_worker_seconds=}`."
+            )
 
-        for disk in _disks:
-            config_id = disk.config_id
-            state = disk.state()
+        if self.settings.max_evaluation_time_for_worker_seconds is not None and (
+            self.worker_cumulative_evaluation_time_seconds
+            >= self.settings.max_evaluation_time_for_worker_seconds
+        ):
+            return (
+                "Worker has reached the maximum evaluation time it is allowed to spend"
+                f", given by `{self.settings.max_evaluation_time_for_worker_seconds=}`."
+            )
 
-            if state is Trial.State.CORRUPTED:
-                logger.warning(f"Trial {config_id} was corrupted somehow!")
+        # We check this global error stopping criterion as it's much
+        # cheaper than sweeping the state from all trials.
+        if self.settings.on_error in (
+            OnErrorPossibilities.RAISE_ANY_ERROR,
+            OnErrorPossibilities.STOP_ANY_ERROR,
+        ):
+            err = self.state._shared_errors.synced().latest_err_as_raisable()
+            if err is not None:
+                if self.settings.on_error == OnErrorPossibilities.RAISE_ANY_ERROR:
+                    raise err
+
+                return (
+                    "An error occurred in another worker and this worker is set to stop"
+                    f" with {self.settings.on_error}."
+                    "\n To allow more evaluations, use a different stopping criterion."
+                )
 
-            previous: Trial | None = None
-            if disk.previous_config_id is not None:
-                previous, _ = self.trials.get(disk.previous_config_id, (None, None))
-                if previous is None:
-                    raise RuntimeError(
-                        "Previous trial not found in memory when processing a trial."
-                        " This should not happen as if a trial has a previous trial,"
-                        " then it should be present and evaluated in memory.",
-                    )
+        # If there are no global stopping criterion, we can no just return early.
+        if (
+            self.settings.max_evaluations_total is None
+            and self.settings.max_cost_total is None
+            and self.settings.max_evaluation_time_total_seconds is None
+        ):
+            return False
+
+        # At this point, if we have some global stopping criterion, we need to sweep
+        # the current state of trials to determine if we should stop
+        # NOTE: If these `sum` turn out to somehow be a bottleneck, these could
+        # be precomputed and accumulated over time. This would have to be handled
+        # in the `NePSState` class.
+        trials = self.state.get_all_trials()
+        if self.settings.max_evaluations_total is not None:
+            if self.settings.include_in_progress_evaluations_towards_maximum:
+                count = sum(
+                    1
+                    for _, trial in trials.items()
+                    if trial.report is not None
+                    or trial.state in (Trial.State.EVALUATING, Trial.State.SUBMITTED)
+                )
+            else:
+                count = sum(1 for _, trial in trials.items() if trial.report is not None)
+
+            if count >= self.settings.max_evaluations_total:
+                return (
+                    "The total number of evaluations has reached the maximum allowed of"
+                    f" `{self.settings.max_evaluations_total=}`."
+                    " To allow more evaluations, increase this value or use a different"
+                    " stopping criterion."
+                )
 
-            cached_trial = self.trials.get(config_id, None)
-
-            # If not currently cached or it was and had a state change
-            if cached_trial is None or cached_trial[1] != state:
-                trial = Trial.from_disk(disk, previous=previous)
-                self.trials[config_id] = (trial, state)
-
-    @contextmanager
-    def sync(self, *, lock: bool = True) -> Iterator[None]:
-        """Sync up with what's on disk."""
-        if lock:
-            _poll, _timeout = get_shared_state_poll_and_timeout()
-            with self.lock(poll=_poll, timeout=_timeout):
-                self.update_from_disk()
-                yield
-        else:
-            yield
+        if self.settings.max_cost_total is not None:
+            cost = sum(
+                trial.report.cost
+                for _, trial in trials.items()
+                if trial.report is not None and trial.report.cost is not None
+            )
+            if cost >= self.settings.max_cost_total:
+                return (
+                    f"The maximum cost `{self.settings.max_cost_total=}` has been"
+                    " reached by all of the evaluated trials. To allow more evaluations,"
+                    " increase this value or use a different stopping criterion."
+                )
 
+        if self.settings.max_evaluation_time_total_seconds is not None:
+            time_spent = sum(
+                trial.report.evaluation_duration
+                for _, trial in trials.items()
+                if trial.report is not None
+                if trial.report.evaluation_duration is not None
+            )
+            if time_spent >= self.settings.max_evaluation_time_total_seconds:
+                return (
+                    "The maximum evaluation time of"
+                    f" `{self.settings.max_evaluation_time_total_seconds=}` has been"
+                    " reached. To allow more evaluations, increase this value or use"
+                    " a different stopping criterion."
+                )
 
-def _evaluate_config(
-    trial: Trial,
-    evaluation_fn: Callable[..., float | Mapping[str, Any]],
-    logger: logging.Logger,
-) -> float | Mapping[str, Any]:
-    config = trial.config
-    config_id = trial.id
-    pipeline_directory = trial.pipeline_dir
-    previous_pipeline_directory = (
-        None if trial.previous is None else trial.previous.pipeline_dir
-    )
+        return False
 
-    logger.info(f"Start evaluating config {config_id}")
+    def run(self) -> None:  # noqa: C901, PLR0915
+        """Run the worker.
 
-    # If pipeline_directory and previous_pipeline_directory are included in the
-    # signature we supply their values, otherwise we simply do nothing.
-    directory_params: list[Path | None] = []
+        Will keep running until one of the criterion defined by the `WorkerSettings`
+        is met.
+        """
+        _set_workers_neps_state(self.state)
 
-    evaluation_fn_params = inspect.signature(evaluation_fn).parameters
-    if "pipeline_directory" in evaluation_fn_params:
-        directory_params.append(pipeline_directory)
-    if "previous_pipeline_directory" in evaluation_fn_params:
-        directory_params.append(previous_pipeline_directory)
+        logger.info("Launching NePS")
 
-    return evaluation_fn(*directory_params, **config)
+        _time_monotonic_start = time.monotonic()
+        _error_from_evaluation: Exception | None = None
 
+        _repeated_fail_get_next_trial_count = 0
+        while True:
+            # NOTE: We rely on this function to do logging and raising errors if it should
+            should_stop = self._check_if_should_stop(
+                time_monotonic_start=_time_monotonic_start,
+                error_from_this_worker=_error_from_evaluation,
+            )
+            if should_stop is not False:
+                logger.info(should_stop)
+                break
 
-def _worker_should_continue(
-    max_evaluations_total: int | None,
-    *,
-    n_inprogress: int,
-    n_evaluated: int,
-    continue_until_max_evaluation_completed: bool,
-) -> bool:
-    # Check if we have reached the total amount of configurations to evaluated
-    # (including pending evaluations possibly)
-    if max_evaluations_total is None:
-        return True
-
-    n_counter = (
-        n_evaluated
-        if continue_until_max_evaluation_completed
-        else n_evaluated + n_inprogress
-    )
-    return n_counter < max_evaluations_total
+            try:
+                trial_to_eval = self._get_next_trial_from_state()
+                _repeated_fail_get_next_trial_count = 0
+            except Exception as e:
+                _repeated_fail_get_next_trial_count += 1
+                logger.error(
+                    "Error while trying to get the next trial to evaluate.", exc_info=True
+                )
 
+                # NOTE: This is to prevent any infinite loops if we can't get a trial
+                if (
+                    _repeated_fail_get_next_trial_count
+                    >= N_FAILED_GET_NEXT_PENDING_ATTEMPTS_BEFORE_ERROR
+                ):
+                    raise WorkerFailedToGetPendingTrialsError(
+                        "Worker '%s' failed to get pending trials %d times in a row."
+                        " Bailing!"
+                    ) from e
 
-def _sample_trial_from_optimizer(
-    optimizer: BaseOptimizer,
-    config_dir_f: Callable[[ConfigID], Path],
-    evaluated_trials: Mapping[ConfigID, Trial],
-    pending_trials: Mapping[ConfigID, Trial],
-) -> Trial:
-    optimizer.load_results(
-        previous_results={
-            config_id: report.to_config_result(optimizer.load_config)
-            for config_id, report in evaluated_trials.items()
-        },
-        pending_evaluations={
-            config_id: optimizer.load_config(trial.config)
-            for config_id, trial in pending_trials.items()
-        },
-    )
-    config, config_id, prev_config_id = optimizer.get_config_and_ids()
-    previous = None
-    if prev_config_id is not None:
-        previous = evaluated_trials[prev_config_id]
-
-    time_sampled = time.time()
-    return Trial(
-        id=config_id,
-        config=config,
-        report=None,
-        time_sampled=time_sampled,
-        pipeline_dir=config_dir_f(config_id),
-        previous=previous,
-        metadata={"time_sampled": time_sampled},
-    )
+                continue
 
+            # If we can't set this working to evaluating, then just retry the loop
+            try:
+                trial_to_eval.set_evaluating(
+                    time_started=time.time(),
+                    worker_id=self.worker_id,
+                )
+                self.state.put_updated_trial(trial_to_eval)
+                n_failed_set_trial_state = 0
+            except VersionMismatchError:
+                n_failed_set_trial_state += 1
+                logger.debug(
+                    f"Another worker has managed to change trial '{trial_to_eval.id}'"
+                    " to evaluate and put back into state. This is fine and likely means"
+                    " the other worker is evaluating it.",
+                    exc_info=True,
+                )
+            except Exception:
+                n_failed_set_trial_state += 1
+                logger.error(
+                    f"Error trying to set trial '{trial_to_eval.id}' to evaluating.",
+                    exc_info=True,
+                )
 
-def _post_evaluation_hook(  # type: ignore
-    trial: Trial,
-    result: ERROR | dict[str, Any],
-    logger: logging.Logger,
-    loss_value_on_error: float | None,
-    ignore_errors,
-) -> None:
-    # We import here to avoid circular imports
-    from neps.plot.tensorboard_eval import tblogger
-    from neps.utils.data_loading import _get_loss
-
-    working_directory = Path(trial.pipeline_dir, "../../")
-    loss = _get_loss(result, loss_value_on_error, ignore_errors=ignore_errors)
-
-    # 1. Write all configs and losses
-    all_configs_losses = Path(working_directory, "all_losses_and_configs.txt")
-
-    def write_loss_and_config(file_handle, loss_, config_id_, config_):  # type: ignore
-        file_handle.write(f"Loss: {loss_}\n")
-        file_handle.write(f"Config ID: {config_id_}\n")
-        file_handle.write(f"Config: {config_}\n")
-        file_handle.write(79 * "-" + "\n")
-
-    with all_configs_losses.open("a", encoding="utf-8") as f:
-        write_loss_and_config(f, loss, trial.id, trial.config)
-
-    # no need to handle best loss cases if an error occurred
-    if result == "error":
-        return
-
-    # The "best" loss exists only in the pareto sense for multi-objective
-    is_multi_objective = isinstance(loss, dict)
-    if is_multi_objective:
-        logger.info(f"Finished evaluating config {trial.id}")
-        return
-
-    # 2. Write best losses/configs
-    best_loss_trajectory_file = Path(working_directory, "best_loss_trajectory.txt")
-    best_loss_config_trajectory_file = Path(
-        working_directory, "best_loss_with_config_trajectory.txt"
-    )
+            # NOTE: This is to prevent infinite looping if it somehow keeps getting
+            # the same trial and can't set it to evaluating.
+            if n_failed_set_trial_state != 0:
+                if n_failed_set_trial_state >= N_FAILED_TO_SET_TRIAL_STATE:
+                    raise WorkerFailedToGetPendingTrialsError(
+                        "Worker '%s' failed to set trial to evaluating %d times in a row."
+                        " Bailing!"
+                    )
+                continue
 
-    if not best_loss_trajectory_file.exists():
-        is_new_best = result != "error"
-    else:
-        best_loss_trajectory: str | list[str]
-        best_loss_trajectory = best_loss_trajectory_file.read_text(encoding="utf-8")
-        best_loss_trajectory = list(best_loss_trajectory.rstrip("\n").split("\n"))
-        best_loss = best_loss_trajectory[-1]
-        is_new_best = float(best_loss) > loss  # type: ignore
+            # We (this worker) has managed to set it to evaluating, now we can evaluate it
+            with _set_global_trial(trial_to_eval):
+                evaluated_trial, report = evaluate_trial(
+                    trial=trial_to_eval,
+                    evaluation_fn=self.evaluation_fn,
+                    default_report_values=self.settings.default_report_values,
+                )
+                evaluation_duration = evaluated_trial.metadata.evaluation_duration
+                assert evaluation_duration is not None
+                self.worker_cumulative_evaluation_time_seconds += evaluation_duration
 
-    if is_new_best:
-        with best_loss_trajectory_file.open("a", encoding="utf-8") as f:
-            f.write(f"{loss}\n")
+            self.worker_cumulative_eval_count += 1
 
-        with best_loss_config_trajectory_file.open("a", encoding="utf-8") as f:
-            write_loss_and_config(f, loss, trial.id, trial.config)
+            logger.info(
+                "Worker '%s' evaluated trial: %s as %s.",
+                self.worker_id,
+                evaluated_trial.id,
+                evaluated_trial.state,
+            )
 
-        logger.info(
-            f"Finished evaluating config {trial.id}"
-            f" -- new best with loss {float(loss) :.6f}"
-        )
+            if report.cost is not None:
+                self.worker_cumulative_eval_cost += report.cost
 
-    else:
-        logger.info(f"Finished evaluating config {trial.id}")
+            if report.err is not None:
+                logger.error(
+                    f"Error during evaluation of '{evaluated_trial.id}'"
+                    f" : {evaluated_trial.config}."
+                )
+                logger.exception(report.err)
+                _error_from_evaluation = report.err
+
+            self.state.report_trial_evaluation(
+                optimizer=self.optimizer,
+                trial=evaluated_trial,
+                report=report,
+                worker_id=self.worker_id,
+            )
 
-    tblogger.end_of_config()
+            logger.debug("Config %s: %s", evaluated_trial.id, evaluated_trial.config)
+            logger.debug("Loss %s: %s", evaluated_trial.id, report.loss)
+            logger.debug("Cost %s: %s", evaluated_trial.id, report.loss)
+            logger.debug(
+                "Learning Curve %s: %s", evaluated_trial.id, report.learning_curve
+            )
 
 
-def launch_runtime(  # noqa: PLR0913, C901, PLR0915
+# TODO: This should be done directly in `api.run` at some point to make it clearer at an
+# entryy point how the woerer is set up to run if someone reads the entry point code.
+def _launch_runtime(  # noqa: PLR0913
     *,
     evaluation_fn: Callable[..., float | Mapping[str, Any]],
-    sampler: BaseOptimizer,
+    optimizer: BaseOptimizer,
     optimizer_info: dict,
-    optimization_dir: Path | str,
-    max_evaluations_total: int | None = None,
-    max_evaluations_per_run: int | None = None,
-    continue_until_max_evaluation_completed: bool = False,
-    logger: logging.Logger | None = None,
+    optimization_dir: Path,
+    max_cost_total: float | None,
     ignore_errors: bool = False,
-    loss_value_on_error: None | float = None,
-    overwrite_optimization_dir: bool = False,
-    pre_load_hooks: Iterable[Callable[[BaseOptimizer], BaseOptimizer]] | None = None,
+    loss_value_on_error: float | None,
+    cost_value_on_error: float | None,
+    continue_until_max_evaluation_completed: bool,
+    overwrite_optimization_dir: bool,
+    max_evaluations_total: int | None,
+    max_evaluations_for_worker: int | None,
+    pre_load_hooks: Iterable[Callable[[BaseOptimizer], BaseOptimizer]] | None,
 ) -> None:
-    """Launch the runtime of a single instance of NePS.
-
-    Please refer to the module docstring for a detailed explanation of the runtime.
-    Runs until some exit condition is met.
-
-    Args:
-        evaluation_fn: The evaluation function to use.
-        sampler: The optimizer to use for sampling configurations.
-        optimizer_info: Information about the optimizer.
-        optimization_dir: The directory where the optimization is running.
-        max_evaluations_total: The maximum number of evaluations to run.
-        max_evaluations_per_run: The maximum number of evaluations to run in a single run.
-        continue_until_max_evaluation_completed: Whether to continue until the maximum
-            evaluations are completed.
-        logger: The logger to use.
-        loss_value_on_error: Setting this and cost_value_on_error to any float will
-            supress any error and will use given loss value instead. default: None
-        ignore_errors: Ignore hyperparameter settings that threw an error and do not raise
-            an error. Error configs still count towards max_evaluations_total.
-        overwrite_optimization_dir: Whether to overwrite the optimization directory.
-        pre_load_hooks: Hooks to run before loading the results.
-    """
-    # NOTE(eddiebergman): This was deprecated a while ago and called at
-    # evaluate, now we just crash immediatly instead. Should probably
-    # promote this check closer to the user, i.e. `neps.run()`
-    evaluation_fn_params = inspect.signature(evaluation_fn).parameters
-    if "previous_working_directory" in evaluation_fn_params:
-        raise RuntimeError(
-            "the argument: 'previous_working_directory' was deprecated. "
-            f"In the function: '{evaluation_fn.__name__}', please,  "
-            "use 'previous_pipeline_directory' instead. ",
-        )
-    if "working_directory" in evaluation_fn_params:
-        raise RuntimeError(
-            "the argument: 'working_directory' was deprecated. "
-            f"In the function: '{evaluation_fn.__name__}', please,  "
-            "use 'pipeline_directory' instead. ",
-        )
-
-    if logger is None:
-        logger = logging.getLogger("neps")
-
-    optimization_dir = Path(optimization_dir)
-
-    # TODO(eddiebergman): Not sure how overwriting works with multiple workers....
     if overwrite_optimization_dir and optimization_dir.exists():
-        logger.warning("Overwriting working_directory")
+        logger.info(
+            f"Overwriting optimization directory '{optimization_dir}' as"
+            " `overwrite_optimization_dir=True`."
+        )
         shutil.rmtree(optimization_dir)
 
-    shared_state = SharedState(optimization_dir, create_dirs=True)
-
-    _poll, _timeout = get_shared_state_poll_and_timeout()
-    with shared_state.sync(lock=True):
-        if not shared_state.paths.optimizer_info_file.exists():
-            serialize(
-                optimizer_info,
-                shared_state.paths.optimizer_info_file,
-                sort_keys=False,
-            )
-        else:
-            shared_state.check_optimizer_info_on_disk_matches(optimizer_info)
-
-    _max_evals_this_run = (
-        max_evaluations_per_run if max_evaluations_per_run is not None else np.inf
+    neps_state = create_or_load_filebased_neps_state(
+        directory=optimization_dir,
+        optimizer_info=OptimizerInfo(optimizer_info),
+        optimizer_state=OptimizationState(
+            budget=(
+                BudgetInfo(max_cost_budget=max_cost_total, used_cost_budget=0)
+                if max_cost_total is not None
+                else None
+            ),
+            shared_state={},  # TODO: Unused for the time being...
+        ),
     )
 
-    evaluations_in_this_run = 0
-    while True:
-        if evaluations_in_this_run >= _max_evals_this_run:
-            logger.info("Maximum evaluations per run is reached, shutting down")
-            break
-
-        with shared_state.sync(lock=True):
-            trials_by_state = shared_state.trials_by_state()
-            if not _worker_should_continue(
-                max_evaluations_total,
-                n_inprogress=len(trials_by_state[Trial.State.IN_PROGRESS]),
-                n_evaluated=(
-                    len(trials_by_state[Trial.State.SUCCESS])
-                    + len(trials_by_state[Trial.State.ERROR])
-                ),
-                continue_until_max_evaluation_completed=continue_until_max_evaluation_completed,
-            ):
-                logger.info("Maximum total evaluations is reached, shutting down")
-                break
-
-            # While we have the decision lock, we will now sample
-            # with the optimizer in this process
-            with shared_state.use_sampler(sampler) as sampler:
-                if sampler.is_out_of_budget():
-                    logger.info("Maximum budget reached, shutting down")
-                    break
-
-                if pre_load_hooks is not None:
-                    for hook in pre_load_hooks:
-                        sampler = hook(sampler)  # noqa: PLW2901
-
-                logger.debug("Sampling a new configuration")
-
-                evaluated = (
-                    trials_by_state[Trial.State.SUCCESS]
-                    + trials_by_state[Trial.State.ERROR]
-                )
-                pending = (
-                    trials_by_state[Trial.State.PENDING]
-                    + trials_by_state[Trial.State.IN_PROGRESS]
-                )
-                trial = _sample_trial_from_optimizer(
-                    sampler,
-                    shared_state.paths.config_dir,
-                    evaluated_trials={trial.id: trial for trial in evaluated},
-                    pending_trials={trial.id: trial for trial in pending},
-                )
-                serialize(trial.config, trial.config_file)
-                serialize(trial.metadata, trial.metadata_file)
-                if trial.previous is not None:
-                    trial.previous_config_id_file.write_text(trial.previous.id)
-
-            logger.debug(f"Sampled config {trial.id}")
-
-        # Obtain the lock on this trial and evaluate it,
-        # otherwise continue back to waiting to sampling
-        with trial._lock.try_lock() as acquired:
-            if not acquired:
-                continue
-
-            # Inform the global state that this trial is being evaluated
-            _set_in_progress_trial(trial)
-
-            # TODO(eddiebergman): Right now if a trial crashes, it's cost is not accounted
-            # for, this should probably removed from BaseOptimizer as it does not need
-            # to know this and the runtime can fill this in for it.
-            try:
-                user_result = _evaluate_config(trial, evaluation_fn, logger)
-            except Exception as e:  # noqa: BLE001
-                # TODO(eddiebergman): Right now this never accounts for cost!
-                # NOTE: It's important to lock the shared state such that any
-                # sampling done is with taking this result into account
-                # accidentally reads this config as un-evaluated
-                with shared_state.lock(poll=_poll, timeout=_timeout):
-                    # TODO(eddiebergman): We should add an option to just crash here
-                    # if something goes wrong and raise up this error to the top.
-                    logger.error(
-                        f"Error during evaluation of '{trial.id}': {trial.config}."
-                    )
-                    logger.exception(e)
-                    tb = traceback.format_exc()
-
-                    trial.report = trial.create_error_report(e, tb=tb)
-                    trial.metadata["time_end"] = time.time()
-
-                    shared_state.trials[trial.id] = (trial, Trial.State.ERROR)
-
-                    serialize({"err": str(e), "tb": tb}, trial.disk.error_file)
-                    serialize(trial.metadata, trial.disk.metadata_file)
-            else:
-                trial.report = trial.create_success_report(user_result)
-                trial.metadata["time_end"] = time.time()
-                if sampler.budget is not None and trial.report.cost is None:
-                    raise ValueError(
-                        "The evaluation function result should contain a 'cost'"
-                        f"field when used with a budget. Got {trial.report.results}",
-                    )
-
-                with shared_state.lock(poll=_poll, timeout=_timeout):
-                    shared_state.trials[trial.id] = (trial, Trial.State.SUCCESS)
-
-                    eval_cost = trial.report.cost
-                    account_for_cost = False
-                    if eval_cost is not None:
-                        account_for_cost = trial.report.account_for_cost
-                        budget_metadata = {
-                            "max": sampler.budget,
-                            "used": sampler.used_budget,
-                            "eval_cost": eval_cost,
-                            "account_for_cost": account_for_cost,
-                        }
-                        trial.metadata.update(budget_metadata)
-
-                    serialize(trial.metadata, trial.disk.metadata_file)
-                    serialize(trial.report.results, trial.disk.result_file)
-                    if account_for_cost:
-                        assert eval_cost is not None
-                        with shared_state.use_sampler(sampler, serialize_seed=False):
-                            sampler.used_budget += eval_cost
-
-            _result: ERROR | dict[str, Any]
-            report = trial.report
-            if isinstance(report, ErrorReport):
-                _result = "error"
-            elif isinstance(report, SuccessReport):
-                _result = dict(report.results)
-            else:
-                _type = type(report)
-                raise TypeError(f"Unknown result type '{_type}' for report: {report}")
-
-            _post_evaluation_hook(
-                trial,
-                _result,
-                logger,
-                loss_value_on_error,
-                ignore_errors,
-            )
+    settings = WorkerSettings(
+        on_error=(
+            OnErrorPossibilities.IGNORE
+            if ignore_errors
+            else OnErrorPossibilities.RAISE_ANY_ERROR
+        ),
+        default_report_values=DefaultReportValues(
+            loss_value_on_error=loss_value_on_error,
+            cost_value_on_error=cost_value_on_error,
+            cost_if_not_provided=None,  # TODO: User can't specify yet
+            learning_curve_on_error=None,  # TODO: User can't specify yet
+            learning_curve_if_not_provided="loss",  # report the loss as single value LC
+        ),
+        max_evaluations_total=max_evaluations_total,
+        include_in_progress_evaluations_towards_maximum=(
+            not continue_until_max_evaluation_completed
+        ),
+        max_cost_total=max_cost_total,
+        max_evaluations_for_worker=max_evaluations_for_worker,
+        max_evaluation_time_total_seconds=None,  # TODO: User can't specify yet
+        max_wallclock_time_for_worker_seconds=None,  # TODO: User can't specify yet
+        max_evaluation_time_for_worker_seconds=None,  # TODO: User can't specify yet
+        max_cost_for_worker=None,  # TODO: User can't specify yet
+    )
 
-            evaluations_in_this_run += 1
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=evaluation_fn,
+        settings=settings,
+        _pre_sample_hooks=list(pre_load_hooks) if pre_load_hooks is not None else None,
+    )
+    worker.run()
diff --git a/neps/search_spaces/architecture/graph.py b/neps/search_spaces/architecture/graph.py
index b1cd2e8b..f776b231 100644
--- a/neps/search_spaces/architecture/graph.py
+++ b/neps/search_spaces/architecture/graph.py
@@ -300,9 +300,7 @@ def copy_dict(d):
                 if isinstance(v, Graph):
                     copied_dict[k] = v.copy()
                 elif isinstance(v, list):
-                    copied_dict[k] = [
-                        i.copy() if isinstance(i, Graph) else i for i in v
-                    ]
+                    copied_dict[k] = [i.copy() if isinstance(i, Graph) else i for i in v]
                 elif isinstance(v, torch.nn.Module) or isinstance(v, AbstractPrimitive):
                     copied_dict[k] = copy.deepcopy(v)
             return copied_dict
@@ -634,6 +632,7 @@ def parse(self):
                         f"{self.name}-comb_op_at({node_idx})",
                         self.nodes[node_idx]["comb_op"],
                     )
+
             for neigbor_idx in self.neighbors(node_idx):
                 edge_data = self.get_edge_data(node_idx, neigbor_idx)
                 if isinstance(edge_data.op, Graph):
@@ -642,6 +641,7 @@ def parse(self):
                     for primitive in edge_data.op.get_embedded_ops():
                         if isinstance(primitive, Graph):
                             primitive.parse()
+
                 self.add_module(
                     f"{self.name}-edge({node_idx},{neigbor_idx})",
                     edge_data.op,
@@ -705,9 +705,7 @@ def _get_child_graphs(self, single_instances: bool = False) -> list:
             node_data = self.nodes[node_idx]
             if "subgraph" in node_data:
                 graphs.append(node_data["subgraph"])
-                graphs.append(
-                    node_data["subgraph"]._get_child_graphs()
-                )
+                graphs.append(node_data["subgraph"]._get_child_graphs())
 
         for _, _, edge_data in self.edges.data():
             if isinstance(edge_data.op, Graph):
@@ -724,16 +722,12 @@ def _get_child_graphs(self, single_instances: bool = False) -> list:
                 if embedded_ops is not None:
                     if isinstance(embedded_ops, Graph):
                         graphs.append(embedded_ops)
-                        graphs.append(
-                            embedded_ops._get_child_graphs()
-                        )
+                        graphs.append(embedded_ops._get_child_graphs())
                     elif isinstance(embedded_ops, list):
                         for child_op in edge_data.op.get_embedded_ops():
                             if isinstance(child_op, Graph):
                                 graphs.append(child_op)
-                                graphs.append(
-                                    child_op._get_child_graphs()
-                                )
+                                graphs.append(child_op._get_child_graphs())
                     else:
                         logger.debug(
                             "Got embedded op, but is neither a graph nor a list: {}".format(
@@ -971,9 +965,7 @@ def update_nodes(
                     in_edges = [
                         (v, data) for v, u, data in in_edges if not data.is_final()
                     ]  # u is same for all
-                    out_edges = list(
-                        graph.out_edges(node_idx, data=True)
-                    )  # (v, u, data)
+                    out_edges = list(graph.out_edges(node_idx, data=True))  # (v, u, data)
                     out_edges = [
                         (u, data) for v, u, data in out_edges if not data.is_final()
                     ]  # v is same for all
diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py
index b02a7662..1b990802 100644
--- a/neps/search_spaces/search_space.py
+++ b/neps/search_spaces/search_space.py
@@ -94,23 +94,21 @@ def pipeline_space_from_configspace(
     return pipeline_space
 
 
-def pipeline_space_from_yaml(  # noqa: C901, PLR0912
+def pipeline_space_from_yaml(  # noqa: C901
     config: str | Path | dict,
 ) -> dict[str, Parameter]:
     """Reads configuration details from a YAML file or a dictionary and constructs a
     pipeline space dictionary.
 
     Args:
-        config (str | Path | dict): Path to the YAML file or a dictionary containing
-        parameter configurations.
+        config: Path to the YAML file or a dictionary containing parameter configurations.
 
     Returns:
-        dict[str, Parameter]: A dictionary where keys are parameter names and values
-        are parameter objects.
+        A dictionary where keys are parameter names and values are parameter objects.
 
     Raises:
         SearchSpaceFromYamlFileError: Raised if there are issues with the YAML file's
-        format, contents, or if the dictionary is invalid.
+            format, contents, or if the dictionary is invalid.
     """
     try:
         if isinstance(config, (str, Path)):
@@ -134,31 +132,23 @@ def pipeline_space_from_yaml(  # noqa: C901, PLR0912
             except yaml.YAMLError as e:
                 raise ValueError(f"The file at {config} is not a valid YAML file.") from e
 
-        # Initialize the pipeline space
         pipeline_space: dict[str, Parameter] = {}
 
-        # Iterate over the items in the YAML configuration
         for name, details in config.items():
-            # get parameter type
             param_type = deduce_type(name, details)
 
-            # init parameter by checking type
             if param_type in ("int", "integer"):
-                # Integer Parameter
                 formatted_details = formatting_int(name, details)
                 pipeline_space[name] = IntegerParameter(**formatted_details)
             elif param_type == "float":
-                # Float Parameter
                 formatted_details = formatting_float(name, details)
                 pipeline_space[name] = FloatParameter(**formatted_details)
             elif param_type in ("cat", "categorical"):
-                # Categorical parameter
                 formatted_details = formatting_cat(name, details)
                 pipeline_space[name] = CategoricalParameter(**formatted_details)
             elif param_type == "const":
-                # Constant parameter
-                formatted_details = formatting_const(details)  # type: ignore
-                pipeline_space[name] = ConstantParameter(formatted_details)
+                const_details = formatting_const(details)
+                pipeline_space[name] = ConstantParameter(const_details)
             else:
                 # Handle unknown parameter type
                 raise TypeError(
@@ -408,7 +398,6 @@ def _smbo_mutation(self, *, patience: int = 5, **kwargs: Any) -> Self:
                 mutated_param = hp.mutate(**kwargs)
             except Exception as e:  # noqa: BLE001
                 logger.warning(f"{chosen_hp_name} failed to mutate! Error: {e}, {kwargs}")
-                # !- print(traceback.format_exc())  # noq-a: T201
                 continue
 
             new_params = {
@@ -683,10 +672,17 @@ def serialize(self) -> dict[str, Hashable]:
             serialized_config[name] = hp.serialize_value(hp.value)
         return serialized_config
 
-    def load_from(self, config: Mapping[str, Any | GraphParameter]) -> None:
-        """Load a configuration from a dictionary, setting all the values."""
+    def from_dict(self, config: Mapping[str, Any | GraphParameter]) -> SearchSpace:
+        """Create a new instance of this search space with parameters set from the config.
+
+        Args:
+            config: The dictionary of hyperparameters to set with values.
+        """
+        new = self.clone()
         for name, val in config.items():
-            self.hyperparameters[name].load_from(val)
+            new.hyperparameters[name].load_from(val)
+
+        return new
 
     def clone(self, *, _with_tabular: bool = False) -> SearchSpace:
         """Create a copy of the search space."""
diff --git a/neps/search_spaces/yaml_search_space_utils.py b/neps/search_spaces/yaml_search_space_utils.py
index 9bcfcb11..8b25b1b0 100644
--- a/neps/search_spaces/yaml_search_space_utils.py
+++ b/neps/search_spaces/yaml_search_space_utils.py
@@ -1,12 +1,27 @@
 from __future__ import annotations
+
 import logging
 import re
+from typing import Literal, overload
 
 logger = logging.getLogger("neps")
 
 
-def convert_scientific_notation(value: str | int | float, show_usage_flag=False) \
-        -> float | (float, bool):
+@overload
+def convert_scientific_notation(
+    value: str | int | float, show_usage_flag: Literal[False] = False
+) -> float: ...
+
+
+@overload
+def convert_scientific_notation(
+    value: str | int | float, show_usage_flag: Literal[True]
+) -> tuple[float, bool]: ...
+
+
+def convert_scientific_notation(
+    value: str | int | float, show_usage_flag: bool = False
+) -> float | tuple[float, bool]:
     """
     Convert a given value to a float if it's a string that matches scientific e notation.
     This is especially useful for numbers like "3.3e-5" which YAML parsers may not
@@ -72,7 +87,7 @@ class SearchSpaceFromYamlFileError(Exception):
             raise SearchSpaceFromYamlFileError(e)
     """
 
-    def __init__(self, exception):
+    def __init__(self, exception: Exception) -> None:
         self.exception_type = type(exception).__name__
         self.message = (
             f"Error occurred during initialization of search space from "
@@ -84,33 +99,34 @@ def __init__(self, exception):
 def deduce_type(
     name: str, details: dict[str, str | int | float] | str | int | float
 ) -> str:
-    """
-    Deduces the parameter type from details.
+    """Deduces the parameter type from details.
 
     Args:
-        name (str): The name of the parameter.
-        details (dict | str | int | float): A dictionary containing parameter
-                specifications or a direct value (string, integer, or float).
+        name: The name of the parameter.
+        details: A dictionary containing parameter specifications or
+            a direct value (string, integer, or float).
 
     Returns:
-        str: The deduced parameter type ('int', 'float', 'categorical', or 'constant').
+        The deduced parameter type ('int', 'float', 'categorical', or 'constant').
 
     Raises:
         TypeError: If the type cannot be deduced or the details don't align with expected
                 constraints.
-        """
-    if isinstance(details, (str,  int, float)):
-        param_type = "const"
-    elif isinstance(details, dict):
+    """
+    if isinstance(details, (str, int, float)):
+        return "const"
+
+    if isinstance(details, dict):
         if "type" in details:
-            param_type = details.pop("type").lower()
-        else:
-            param_type = deduce_param_type(name, details)
-    else:
-        raise TypeError(
-            f"Unable to deduce parameter type for '{name}' with details '{details}'.")
+            param_type = details.pop("type")
+            assert isinstance(param_type, str)
+            return param_type.lower()
 
-    return param_type
+        return deduce_param_type(name, details)
+
+    raise TypeError(
+        f"Unable to deduce parameter type for '{name}' with details '{details}'."
+    )
 
 
 def deduce_param_type(name: str, details: dict[str, int | str | float]) -> str:
@@ -284,7 +300,7 @@ def formatting_float(name: str, details: dict[str, str | int | float]) -> dict:
     return details
 
 
-def formatting_cat(name: str, details: dict[str, str | int | float]) -> dict:
+def formatting_cat(name: str, details: dict[str, list | str | int | float]) -> dict:
     """
     This function ensures that the 'choices' key in the details is a list and attempts
     to convert any elements expressed in scientific notation to floats. It also handles
@@ -303,34 +319,44 @@ def formatting_cat(name: str, details: dict[str, str | int | float]) -> dict:
     """
     if not isinstance(details["choices"], list):
         raise TypeError(f"The 'choices' for '{name}' must be a list.")
+
     for i, element in enumerate(details["choices"]):
         try:
             converted_value, e_flag = convert_scientific_notation(
                 element, show_usage_flag=True
             )
+
             if e_flag:
-                details["choices"][
-                    i
-                ] = converted_value  # Replace the element at the same position
+                # Replace the element at the same position
+                details["choices"][i] = converted_value
         except ValueError:
             pass  # If a ValueError occurs, simply continue to the next element
+
     if "default" in details:
         e_flag = False
+        extracted_default = details["default"]
+        if not isinstance(extracted_default, (str, int, float)):
+            raise TypeError(
+                f"The 'default' value for '{name}' must be a string, integer, or float."
+                f" Got {type(extracted_default).__name__}."
+            )
+
         try:
             # check if e notation, if then convert to number
             default, e_flag = convert_scientific_notation(
-                details["default"], show_usage_flag=True
+                extracted_default, show_usage_flag=True
             )
         except ValueError:
             pass  # if default value is not in a numeric format, Value Error occurs
+
         if e_flag is True:
             details["default"] = default
+
     return details
 
 
 def formatting_const(details: str | int | float) -> str | int | float:
-    """
-    Validates and converts a constant parameter.
+    """Validates and converts a constant parameter.
 
     This function checks if the 'details' parameter contains a value expressed in
     scientific notation and converts it to a float. It ensures that the input
@@ -354,8 +380,8 @@ def formatting_const(details: str | int | float) -> str | int | float:
         # if the value is not able to convert to float a ValueError get raised by
         # convert_scientific_notation function
         pass
+
     if e_flag:
         details = converted_value
-    return details
-
 
+    return details
diff --git a/neps/state/__init__.py b/neps/state/__init__.py
new file mode 100644
index 00000000..6508dba2
--- /dev/null
+++ b/neps/state/__init__.py
@@ -0,0 +1,19 @@
+from neps.state.protocols import (
+    Locker,
+    ReaderWriter,
+    Synced,
+    VersionedResource,
+    Versioner,
+)
+from neps.state.seed_snapshot import SeedSnapshot
+from neps.state.trial import Trial
+
+__all__ = [
+    "Locker",
+    "SeedSnapshot",
+    "Synced",
+    "Trial",
+    "ReaderWriter",
+    "Versioner",
+    "VersionedResource",
+]
diff --git a/neps/state/_eval.py b/neps/state/_eval.py
new file mode 100644
index 00000000..0d08dfdd
--- /dev/null
+++ b/neps/state/_eval.py
@@ -0,0 +1,195 @@
+from __future__ import annotations
+
+import inspect
+import logging
+import time
+import traceback
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, Literal, Mapping, TypeVar
+
+from neps.exceptions import NePSError
+
+if TYPE_CHECKING:
+    from neps.state.settings import DefaultReportValues
+    from neps.state.trial import Trial
+
+logger = logging.getLogger(__name__)
+
+Loc = TypeVar("Loc")
+_notset = object()
+
+
+class GotNonePendingTrialForEvalautionError(NePSError):
+    """Raised when trying to evaluate a trial that is not in a pending state."""
+
+    def __init__(
+        self,
+        trial_id: Trial.ID,
+        state: Trial.State,
+        worker_id: str,
+        *args: Any,
+    ):
+        """Initialize the error.
+
+        Args:
+            trial_id: The ID of the trial that was not in a pending state.
+            state: The state of the trial.
+            worker_id: The ID of the worker that picked up this trial.
+            *args: Additional arguments to pass to the parent class.
+        """
+        super().__init__(trial_id, state, worker_id, *args)
+        self.trial_id = trial_id
+        self.state = state
+        self.worker_id = worker_id
+
+    def __str__(self) -> str:
+        return (
+            f"Trial '{self.trial_id}' is not in a pending state but in '{self.state}'."
+            f"This trial was picked up for evaluation by worker '{self.worker_id}'."
+        )
+
+
+def _check_float(value: Any, name: str) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError) as e:
+        raise ValueError(
+            f"The '{name}' should be a float but got a `{type(value)}`"
+            f" with value of {value}",
+        ) from e
+
+
+def parse_user_result(
+    user_result: float | dict[str, Any],
+    *,
+    default_cost_value: float | None = None,
+    default_learning_curve: Literal["loss"] | list[float] | None = None,
+) -> tuple[float, float | None, list[float] | None, dict[str, Any]]:
+    """Check if the trial has succeeded."""
+    if isinstance(user_result, Mapping):
+        extracted_loss = user_result.pop("loss", _notset)
+        if extracted_loss is _notset:
+            raise KeyError(
+                "The 'loss' should be provided in the evaluation result if providing"
+                " a dictionary."
+            )
+        extracted_cost = user_result.pop("cost", default_cost_value)
+
+        extracted_learning_curve = user_result.pop("learning_curve", _notset)
+
+        if extracted_learning_curve is _notset:
+            # HACK: Backwards compat, check if it's in the "info_dict" key
+            if "info_dict" in user_result:
+                extracted_learning_curve = user_result["info_dict"].pop(
+                    "learning_curve",
+                    default_learning_curve,
+                )
+            else:
+                extracted_learning_curve = default_learning_curve
+
+        if extracted_learning_curve == "loss":
+            extracted_learning_curve = [extracted_loss]
+
+        extra = user_result
+    else:
+        extracted_loss = user_result
+        extracted_learning_curve = (
+            None
+            if default_learning_curve is None
+            else [user_result]
+            if default_learning_curve == "loss"
+            else default_learning_curve
+        )
+        extracted_cost = default_cost_value
+        extra = {}
+
+    loss = _check_float(extracted_loss, "loss")
+    cost = _check_float(extracted_cost, "cost") if extracted_cost is not None else None
+    learning_curve = (
+        [float(v) for v in extracted_learning_curve]
+        if extracted_learning_curve is not None
+        else None
+    )
+    return loss, cost, learning_curve, extra
+
+
+def _eval_trial(
+    *,
+    trial: Trial,
+    default_report_values: DefaultReportValues,
+    fn: Callable[..., Any],
+    **kwargs: Any,
+) -> Trial.Report:
+    start = time.monotonic()
+    try:
+        user_result = fn(**kwargs, **trial.config)
+    # Something went wrong in evaluation
+    except Exception as e:
+        duration = time.monotonic() - start
+        time_end = time.time()
+        logger.error(f"Error during evaluation of '{trial.id}': {trial.config}.")
+        logger.exception(e)
+        report = trial.set_complete(
+            report_as="crashed",
+            loss=default_report_values.loss_value_on_error,
+            cost=default_report_values.cost_value_on_error,
+            learning_curve=default_report_values.learning_curve_on_error,
+            extra=None,
+            err=e,
+            tb=traceback.format_exc(),
+            time_end=time_end,
+            evaluation_duration=duration,
+        )
+    else:
+        duration = time.monotonic() - start
+        time_end = time.time()
+        logger.info(f"Successful evaluation of '{trial.id}': {user_result}.")
+
+        loss, cost, learning_curve, extra = parse_user_result(
+            dict(user_result) if isinstance(user_result, Mapping) else user_result,
+            default_cost_value=default_report_values.cost_if_not_provided,
+            default_learning_curve=default_report_values.learning_curve_if_not_provided,
+        )
+        report = trial.set_complete(
+            report_as="success",
+            loss=loss,
+            cost=cost,
+            learning_curve=learning_curve,
+            err=None,
+            tb=None,
+            extra=extra,
+            time_end=time_end,
+            evaluation_duration=duration,
+        )
+
+    return report
+
+
+def evaluate_trial(
+    trial: Trial,
+    *,
+    evaluation_fn: Callable[..., Any],
+    default_report_values: DefaultReportValues,
+) -> tuple[Trial, Trial.Report]:
+    # NOTE: For now we are assuming everything is on a shared filesystem
+    # will have to revisit if the location can be elsewhere
+    trial_location = Path(trial.metadata.location)
+    prev_trial_location = (
+        Path(trial.metadata.previous_trial_location)
+        if trial.metadata.previous_trial_location is not None
+        else None
+    )
+
+    params = {
+        "pipeline_directory": trial_location,
+        "previous_pipeline_directory": prev_trial_location,
+    }
+    sigkeys = inspect.signature(evaluation_fn).parameters.keys()
+    injectable_params = {key: val for key, val in params.items() if key in sigkeys}
+    report = _eval_trial(
+        trial=trial,
+        fn=evaluation_fn,
+        default_report_values=default_report_values,
+        **injectable_params,
+    )
+    return trial, report
diff --git a/neps/state/err_dump.py b/neps/state/err_dump.py
new file mode 100644
index 00000000..167ab48f
--- /dev/null
+++ b/neps/state/err_dump.py
@@ -0,0 +1,77 @@
+"""Error dump for serializing errors.
+
+This resource is used to store errors that can be serialized and deserialized,
+such that they can be shared between workers.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import ClassVar
+
+from neps.exceptions import NePSError
+
+
+class SerializedError(NePSError):
+    """An error the is serialized."""
+
+
+@dataclass
+class SerializableTrialError:
+    """Error information for a trial."""
+
+    trial_id: str
+    """The ID of the trial."""
+
+    worker_id: str
+    """The ID of the worker that evaluated the trial which caused the error."""
+
+    err_type: str
+    """The type of the error."""
+
+    err: str
+    """The error msg."""
+
+    tb: str | None
+    """The traceback of the error."""
+
+    def as_raisable(self) -> SerializedError:
+        """Convert the error to a raisable error."""
+        return SerializedError(
+            f"An error occurred during the evaluation of a trial '{self.trial_id}' which"
+            f" was evaluted by worker '{self.worker_id}'. The original error could not"
+            " be deserialized but had the following information:"
+            "\n"
+            f"{self.err_type}: {self.err}"
+            "\n\n"
+            f"{self.tb}"
+        )
+
+
+@dataclass
+class ErrDump:
+    """A collection of errors that can be serialized and deserialized."""
+
+    SerializableTrialError: ClassVar = SerializableTrialError
+
+    errs: list[SerializableTrialError] = field(default_factory=list)
+
+    def append(self, err: SerializableTrialError) -> None:
+        """Append the an error to the reported errors."""
+        return self.errs.append(err)
+
+    def __len__(self) -> int:
+        return len(self.errs)
+
+    def __bool__(self) -> bool:
+        return bool(self.errs)
+
+    def empty(self) -> bool:
+        """Check if the queue is empty."""
+        return not self.errs
+
+    def latest_err_as_raisable(self) -> SerializedError | None:
+        """Get the latest error."""
+        if self.errs:
+            return self.errs[-1].as_raisable()
+        return None
diff --git a/neps/state/filebased.py b/neps/state/filebased.py
new file mode 100644
index 00000000..6940016d
--- /dev/null
+++ b/neps/state/filebased.py
@@ -0,0 +1,672 @@
+"""This module houses the implementation of a NePSState that
+does everything on the filesystem, i.e. locking, versioning and
+storing/loading.
+
+The main components are:
+* [`FileVersioner`][neps.state.filebased.FileVersioner]: A versioner that
+    stores a version tag on disk, usually for a resource like a Trial.
+* [`FileLocker`][neps.state.filebased.FileLocker]: A locker that uses a file
+    to lock between processes.
+* [`TrialRepoInDirectory`][neps.state.filebased.TrialRepoInDirectory]: A
+    repository of Trials that are stored in a directory.
+* `ReaderWriterXXX`: Reader/writers for various resources NePSState needs
+* [`load_filebased_neps_state`][neps.state.filebased.load_filebased_neps_state]:
+    A function to load a NePSState from a directory.
+* [`create_filebased_neps_state`][neps.state.filebased.create_filebased_neps_state]:
+    A function to create a new NePSState in a directory.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from contextlib import contextmanager
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import ClassVar, Iterable, Iterator, TypeVar
+from typing_extensions import override
+from uuid import uuid4
+
+import numpy as np
+import portalocker as pl
+
+from neps.env import (
+    GLOBAL_ERR_FILELOCK_POLL,
+    GLOBAL_ERR_FILELOCK_TIMEOUT,
+    SEED_SNAPSHOT_FILELOCK_POLL,
+    SEED_SNAPSHOT_FILELOCK_TIMEOUT,
+    TRIAL_FILELOCK_POLL,
+    TRIAL_FILELOCK_TIMEOUT,
+)
+from neps.exceptions import NePSError
+from neps.state.err_dump import ErrDump
+from neps.state.neps_state import NePSState
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.state.protocols import Locker, ReaderWriter, Synced, TrialRepo, Versioner
+from neps.state.seed_snapshot import SeedSnapshot
+from neps.state.trial import Trial
+from neps.utils.files import deserialize, serialize
+
+logger = logging.getLogger(__name__)
+K = TypeVar("K")
+T = TypeVar("T")
+
+
+def make_sha() -> str:
+    """Generate a str hex sha."""
+    return uuid4().hex
+
+
+@dataclass
+class FileVersioner(Versioner):
+    """A versioner that stores a version tag on disk."""
+
+    version_file: Path
+
+    @override
+    def current(self) -> str | None:
+        if not self.version_file.exists():
+            return None
+        return self.version_file.read_text()
+
+    @override
+    def bump(self) -> str:
+        sha = make_sha()
+        self.version_file.write_text(sha)
+        return sha
+
+
+@dataclass
+class TrialRepoInDirectory(TrialRepo[Path]):
+    """A repository of Trials that are stored in a directory."""
+
+    directory: Path
+    _cache: dict[Trial.ID, Synced[Trial, Path]] = field(default_factory=dict)
+
+    @override
+    def all_trial_ids(self) -> set[Trial.ID]:
+        """List all the trial ids in this trial Repo."""
+        return {
+            config_path.name.replace("config_", "")
+            for config_path in self.directory.iterdir()
+            if config_path.name.startswith("config_") and config_path.is_dir()
+        }
+
+    @override
+    def get_by_id(
+        self,
+        trial_id: Trial.ID,
+        *,
+        lock_poll: float = TRIAL_FILELOCK_POLL,
+        lock_timeout: float | None = TRIAL_FILELOCK_TIMEOUT,
+    ) -> Synced[Trial, Path]:
+        """Get a Trial by its ID.
+
+        !!! note
+
+            This will **not** explicitly sync the trial and it is up to the caller
+            to do so. Most of the time, the caller should be a NePSState
+            object which will do that for you. However if the trial is not in the
+            cache, then it will be loaded from disk which requires syncing.
+
+        Args:
+            trial_id: The ID of the trial to get.
+            lock_poll: The poll time for the file lock.
+            lock_timeout: The timeout for the file lock.
+
+        Returns:
+            The trial with the given ID.
+        """
+        trial = self._cache.get(trial_id)
+        if trial is not None:
+            return trial
+
+        config_path = self.directory / f"config_{trial_id}"
+        if not config_path.exists():
+            raise TrialRepo.TrialNotFoundError(trial_id, config_path)
+
+        trial = Synced.load(
+            location=config_path,
+            locker=FileLocker(
+                lock_path=config_path / ".lock",
+                poll=lock_poll,
+                timeout=lock_timeout,
+            ),
+            versioner=FileVersioner(version_file=config_path / ".version"),
+            reader_writer=ReaderWriterTrial(),
+        )
+        self._cache[trial_id] = trial
+        return trial
+
+    @override
+    def get_by_ids(self, trial_ids: Iterable[Trial.ID]) -> dict[str, Synced[Trial, Path]]:
+        """Get multiple Trials by their IDs.
+
+        !!! note
+            See [`get_by_id()`][neps.state.filebased.TrialRepoInDirectory.get_by_id]
+            for notes on the trials syncing.
+
+        Args:
+            trial_ids: The IDs of the trials to get.
+
+        Returns:
+            A dictionary of the trials with the given IDs.
+
+        Raises:
+            TrialRepo.TrialNotFoundError: If a trial is not found.
+        """
+        return {trial_id: self.get_by_id(trial_id) for trial_id in trial_ids}
+
+    @override
+    def put_new(
+        self,
+        trial: Trial,
+        *,
+        lock_poll: float = TRIAL_FILELOCK_POLL,
+        lock_timeout: float | None = TRIAL_FILELOCK_TIMEOUT,
+    ) -> Synced[Trial, Path]:
+        """Put a new Trial into the repository.
+
+        Args:
+            trial: The trial to put.
+            lock_poll: The poll time for the file lock.
+            lock_timeout: The timeout for the file lock.
+
+        Returns:
+            The synced trial.
+
+        Raises:
+            TrialRepo.TrialAlreadyExistsError: If the trial already exists in the
+                repository.
+        """
+        config_path = self.directory / f"config_{trial.metadata.id}"
+        if config_path.exists():
+            raise TrialRepo.TrialAlreadyExistsError(
+                f"Trial '{trial.metadata.id}' already exists as '{config_path}'."
+            )
+
+        # HACK: We do this here as there is no way to know where a Trial will
+        # be located when it's created...
+        trial.metadata.location = str(config_path)
+        shared_trial = Synced.new(
+            data=trial,
+            location=config_path,
+            locker=FileLocker(
+                lock_path=config_path / ".lock",
+                poll=lock_poll,
+                timeout=lock_timeout,
+            ),
+            versioner=FileVersioner(version_file=config_path / ".version"),
+            reader_writer=ReaderWriterTrial(),
+        )
+        self._cache[trial.metadata.id] = shared_trial
+        return shared_trial
+
+    @override
+    def all(self) -> dict[Trial.ID, Synced[Trial, Path]]:
+        """Get a dictionary of all the Trials in the repository.
+
+        !!! note
+            See [`get_by_id()`][neps.state.filebased.TrialRepoInDirectory.get_by_id]
+            for notes on the trials syncing.
+        """
+        return {trial_id: self.get_by_id(trial_id) for trial_id in self.all_trial_ids()}
+
+    @override
+    def pending(self) -> Iterable[tuple[Trial.ID, Synced[Trial, Path]]]:
+        pending = [
+            (_id, t, trial.metadata.time_sampled)
+            for (_id, t) in self.all().items()
+            if (trial := t.synced()).state == Trial.State.PENDING
+        ]
+        return iter((_id, t) for _id, t, _ in sorted(pending, key=lambda x: x[2]))
+
+
+@dataclass
+class ReaderWriterTrial(ReaderWriter[Trial, Path]):
+    """ReaderWriter for Trial objects."""
+
+    CONFIG_FILENAME = "config.yaml"
+    METADATA_FILENAME = "metadata.yaml"
+    STATE_FILENAME = "state.txt"
+    REPORT_FILENAME = "report.yaml"
+    PREVIOUS_TRIAL_ID_FILENAME = "previous_trial_id.txt"
+
+    @override
+    @classmethod
+    def read(cls, directory: Path) -> Trial:
+        config_path = directory / cls.CONFIG_FILENAME
+        metadata_path = directory / cls.METADATA_FILENAME
+        state_path = directory / cls.STATE_FILENAME
+        report_path = directory / cls.REPORT_FILENAME
+
+        return Trial(
+            config=deserialize(config_path),
+            metadata=Trial.MetaData(**deserialize(metadata_path)),
+            state=Trial.State(state_path.read_text(encoding="utf-8").strip()),
+            report=(
+                Trial.Report(**deserialize(report_path)) if report_path.exists() else None
+            ),
+        )
+
+    @override
+    @classmethod
+    def write(cls, trial: Trial, directory: Path) -> None:
+        config_path = directory / cls.CONFIG_FILENAME
+        metadata_path = directory / cls.METADATA_FILENAME
+        state_path = directory / cls.STATE_FILENAME
+
+        serialize(trial.config, config_path)
+        serialize(asdict(trial.metadata), metadata_path)
+        state_path.write_text(trial.state.value, encoding="utf-8")
+
+        if trial.metadata.previous_trial_id is not None:
+            previous_trial_path = directory / cls.PREVIOUS_TRIAL_ID_FILENAME
+            previous_trial_path.write_text(trial.metadata.previous_trial_id)
+
+        if trial.report is not None:
+            report_path = directory / cls.REPORT_FILENAME
+            serialize(asdict(trial.report), report_path)
+
+
+@dataclass
+class ReaderWriterSeedSnapshot(ReaderWriter[SeedSnapshot, Path]):
+    """ReaderWriter for SeedSnapshot objects."""
+
+    # It seems like they're all uint32 but I can't be sure.
+    PY_RNG_STATE_DTYPE: ClassVar = np.int64
+
+    PY_RNG_TUPLE_FILENAME: ClassVar = "py_rng.npy"
+    NP_RNG_STATE_FILENAME: ClassVar = "np_rng_state.npy"
+    TORCH_RNG_STATE_FILENAME: ClassVar = "torch_rng_state.pt"
+    TORCH_CUDA_RNG_STATE_FILENAME: ClassVar = "torch_cuda_rng_state.pt"
+    SEED_INFO_FILENAME: ClassVar = "seed_info.json"
+
+    @override
+    @classmethod
+    def read(cls, directory: Path) -> SeedSnapshot:
+        seedinfo_path = directory / cls.SEED_INFO_FILENAME
+        py_rng_path = directory / cls.PY_RNG_TUPLE_FILENAME
+        np_rng_path = directory / cls.NP_RNG_STATE_FILENAME
+        torch_rng_path = directory / cls.TORCH_RNG_STATE_FILENAME
+        torch_cuda_rng_path = directory / cls.TORCH_CUDA_RNG_STATE_FILENAME
+
+        # Load and set pythons rng
+        py_rng_state = tuple(
+            int(x) for x in np.fromfile(py_rng_path, dtype=cls.PY_RNG_STATE_DTYPE)
+        )
+        np_rng_state = np.fromfile(np_rng_path, dtype=np.uint32)
+        seed_info = deserialize(seedinfo_path)
+
+        torch_exists = torch_rng_path.exists() or torch_cuda_rng_path.exists()
+
+        # By specifying `weights_only=True`, it disables arbitrary object loading
+        torch_rng_state = None
+        torch_cuda_rng = None
+        if torch_exists:
+            import torch
+
+            if torch_rng_path.exists():
+                torch_rng_state = torch.load(torch_rng_path, weights_only=True)
+
+            if torch_cuda_rng_path.exists():
+                # By specifying `weights_only=True`, it disables arbitrary object loading
+                torch_cuda_rng = torch.load(torch_cuda_rng_path, weights_only=True)
+
+        return SeedSnapshot(
+            np_rng=(
+                seed_info["np_rng_kind"],
+                np_rng_state,
+                seed_info["np_pos"],
+                seed_info["np_has_gauss"],
+                seed_info["np_cached_gauss"],
+            ),
+            py_rng=(
+                seed_info["py_rng_version"],
+                py_rng_state,
+                seed_info["py_guass_next"],
+            ),
+            torch_rng=torch_rng_state,
+            torch_cuda_rng=torch_cuda_rng,
+        )
+
+    @override
+    @classmethod
+    def write(cls, snapshot: SeedSnapshot, directory: Path) -> None:
+        seedinfo_path = directory / cls.SEED_INFO_FILENAME
+        py_rng_path = directory / cls.PY_RNG_TUPLE_FILENAME
+        np_rng_path = directory / cls.NP_RNG_STATE_FILENAME
+        torch_rng_path = directory / cls.TORCH_RNG_STATE_FILENAME
+        torch_cuda_rng_path = directory / cls.TORCH_CUDA_RNG_STATE_FILENAME
+
+        py_rng_version, py_rng_state, py_guass_next = snapshot.py_rng
+
+        np.array(py_rng_state, dtype=cls.PY_RNG_STATE_DTYPE).tofile(py_rng_path)
+
+        seed_info = {
+            "np_rng_kind": snapshot.np_rng[0],
+            "np_pos": snapshot.np_rng[2],
+            "np_has_gauss": snapshot.np_rng[3],
+            "np_cached_gauss": snapshot.np_rng[4],
+            "py_rng_version": py_rng_version,
+            "py_guass_next": py_guass_next,
+        }
+        serialize(seed_info, seedinfo_path)
+        np_rng_state = snapshot.np_rng[1]
+        np_rng_state.tofile(np_rng_path)
+
+        if snapshot.torch_rng is not None:
+            import torch
+
+            torch.save(snapshot.torch_rng, torch_rng_path)
+
+        if snapshot.torch_cuda_rng is not None:
+            import torch
+
+            torch.save(snapshot.torch_cuda_rng, torch_cuda_rng_path)
+
+
+@dataclass
+class ReaderWriterOptimizerInfo(ReaderWriter[OptimizerInfo, Path]):
+    """ReaderWriter for OptimizerInfo objects."""
+
+    INFO_FILENAME: ClassVar = "info.yaml"
+
+    @override
+    @classmethod
+    def read(cls, directory: Path) -> OptimizerInfo:
+        info_path = directory / cls.INFO_FILENAME
+        return OptimizerInfo(info=deserialize(info_path))
+
+    @override
+    @classmethod
+    def write(cls, optimizer_info: OptimizerInfo, directory: Path) -> None:
+        info_path = directory / cls.INFO_FILENAME
+        serialize(optimizer_info.info, info_path)
+
+
+# TODO(eddiebergman): If an optimizer wants to store some hefty state, i.e. a numpy array
+# or something, this is horribly inefficient and we would need to adapt OptimizerState to
+# handle this.
+# TODO(eddiebergman): May also want to consider serializing budget into a seperate entity
+@dataclass
+class ReaderWriterOptimizationState(ReaderWriter[OptimizationState, Path]):
+    """ReaderWriter for OptimizationState objects."""
+
+    STATE_FILE_NAME: ClassVar = "state.yaml"
+
+    @override
+    @classmethod
+    def read(cls, directory: Path) -> OptimizationState:
+        state_path = directory / cls.STATE_FILE_NAME
+        state = deserialize(state_path)
+        budget_info = state.get("budget")
+        budget = BudgetInfo(**budget_info) if budget_info is not None else None
+        return OptimizationState(
+            shared_state=state.get("shared_state") or {},
+            budget=budget,
+        )
+
+    @override
+    @classmethod
+    def write(cls, info: OptimizationState, directory: Path) -> None:
+        info_path = directory / cls.STATE_FILE_NAME
+        serialize(asdict(info), info_path)
+
+
+@dataclass
+class ReaderWriterErrDump(ReaderWriter[ErrDump, Path]):
+    """ReaderWriter for shared error lists."""
+
+    name: str
+
+    @override
+    def read(self, directory: Path) -> ErrDump:
+        errors_path = directory / f"{self.name}-errors.jsonl"
+        with errors_path.open("r") as f:
+            data = [json.loads(line) for line in f]
+
+        return ErrDump([ErrDump.SerializableTrialError(**d) for d in data])
+
+    @override
+    def write(self, err_dump: ErrDump, directory: Path) -> None:
+        errors_path = directory / f"{self.name}-errors.jsonl"
+        with errors_path.open("w") as f:
+            lines = [json.dumps(asdict(trial_err)) for trial_err in err_dump.errs]
+            f.write("\n".join(lines))
+
+
+FILELOCK_EXCLUSIVE_NONE_BLOCKING = pl.LOCK_EX | pl.LOCK_NB
+
+
+@dataclass
+class FileLocker(Locker):
+    """File-based locker using `portalocker`.
+
+    [`FileLocker`][neps.state.locker.file.FileLocker] implements
+    the [`Locker`][neps.state.locker.locker.Locker] protocol using
+    `portalocker` to lock a file between processes with a shared
+    filesystem.
+    """
+
+    lock_path: Path
+    poll: float
+    timeout: float | None
+
+    def __post_init__(self) -> None:
+        self.lock_path = self.lock_path.resolve().absolute()
+
+    @override
+    def is_locked(self) -> bool:
+        if not self.lock_path.exists():
+            return False
+        try:
+            with self.lock(fail_if_locked=True):
+                pass
+            return False
+        except pl.exceptions.LockException:
+            return True
+
+    @override
+    @contextmanager
+    def lock(
+        self,
+        *,
+        fail_if_locked: bool = False,
+    ) -> Iterator[None]:
+        self.lock_path.parent.mkdir(parents=True, exist_ok=True)
+        self.lock_path.touch(exist_ok=True)
+        logger.debug("Acquiring lock on %s", self.lock_path)
+        with pl.Lock(
+            self.lock_path,
+            check_interval=self.poll,
+            timeout=self.timeout,
+            flags=FILELOCK_EXCLUSIVE_NONE_BLOCKING,
+            fail_when_locked=fail_if_locked,
+        ):
+            yield
+        logger.debug("Released lock on %s", self.lock_path)
+
+
+def load_filebased_neps_state(directory: Path) -> NePSState[Path]:
+    """Load a NePSState from a directory.
+
+    Args:
+        directory: The directory to load the state from.
+
+    Returns:
+        The loaded NePSState.
+
+    Raises:
+        FileNotFoundError: If no NePSState is found at the given directory.
+    """
+    if not directory.exists():
+        raise FileNotFoundError(f"No NePSState found at '{directory}'.")
+    directory.mkdir(parents=True, exist_ok=True)
+    config_dir = directory / "configs"
+    config_dir.mkdir(parents=True, exist_ok=True)
+    seed_dir = directory / ".seed_state"
+    seed_dir.mkdir(parents=True, exist_ok=True)
+    error_dir = directory / ".errors"
+    error_dir.mkdir(parents=True, exist_ok=True)
+    optimizer_state_dir = directory / ".optimizer_state"
+    optimizer_state_dir.mkdir(parents=True, exist_ok=True)
+    optimizer_info_dir = directory / ".optimizer_info"
+    optimizer_info_dir.mkdir(parents=True, exist_ok=True)
+
+    return NePSState(
+        location=str(directory.absolute().resolve()),
+        _trials=TrialRepoInDirectory(config_dir),
+        _optimizer_info=Synced.load(
+            location=optimizer_info_dir,
+            versioner=FileVersioner(version_file=optimizer_info_dir / ".version"),
+            locker=FileLocker(
+                lock_path=optimizer_info_dir / ".lock",
+                poll=0.01,
+                timeout=None,
+            ),
+            reader_writer=ReaderWriterOptimizerInfo(),
+        ),
+        _seed_state=Synced.load(
+            location=seed_dir,
+            reader_writer=ReaderWriterSeedSnapshot(),
+            versioner=FileVersioner(version_file=seed_dir / ".version"),
+            locker=FileLocker(
+                lock_path=seed_dir / ".lock",
+                poll=SEED_SNAPSHOT_FILELOCK_POLL,
+                timeout=SEED_SNAPSHOT_FILELOCK_TIMEOUT,
+            ),
+        ),
+        _shared_errors=Synced.load(
+            location=error_dir,
+            reader_writer=ReaderWriterErrDump("all"),
+            versioner=FileVersioner(version_file=error_dir / ".all.version"),
+            locker=FileLocker(
+                lock_path=error_dir / ".all.lock",
+                poll=GLOBAL_ERR_FILELOCK_POLL,
+                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
+            ),
+        ),
+        _optimizer_state=Synced.load(
+            location=optimizer_state_dir,
+            reader_writer=ReaderWriterOptimizationState(),
+            versioner=FileVersioner(version_file=optimizer_state_dir / ".version"),
+            locker=FileLocker(
+                lock_path=optimizer_state_dir / ".lock",
+                poll=GLOBAL_ERR_FILELOCK_POLL,
+                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
+            ),
+        ),
+    )
+
+
+def create_or_load_filebased_neps_state(
+    directory: Path,
+    *,
+    optimizer_info: OptimizerInfo,
+    optimizer_state: OptimizationState,
+) -> NePSState[Path]:
+    """Create a new NePSState in a directory or load the existing one
+    if it already exists.
+
+    !!! warning
+
+        We check that the optimizer info in the NePSState on disk matches
+        the one that is passed. However we do not lock this check so it
+        is possible that if two processes try to create a NePSState at the
+        same time, both with different optimizer infos, that one will fail
+        to create the NePSState. This is a limitation of the current design.
+
+        In principal, we could allow multiple optimizers to be run and share
+        the same set of trials.
+
+    Args:
+        directory: The directory to create the state in.
+        optimizer_info: The optimizer info to use.
+        optimizer_state: The optimizer state to use.
+
+    Returns:
+        The NePSState.
+
+    Raises:
+        NePSError: If the optimizer info on disk does not match the one provided.
+    """
+    is_new = not directory.exists()
+    directory.mkdir(parents=True, exist_ok=True)
+    config_dir = directory / "configs"
+    config_dir.mkdir(parents=True, exist_ok=True)
+    seed_dir = directory / ".seed_state"
+    seed_dir.mkdir(parents=True, exist_ok=True)
+    error_dir = directory / ".errors"
+    error_dir.mkdir(parents=True, exist_ok=True)
+    optimizer_state_dir = directory / ".optimizer_state"
+    optimizer_state_dir.mkdir(parents=True, exist_ok=True)
+    optimizer_info_dir = directory / ".optimizer_info"
+    optimizer_info_dir.mkdir(parents=True, exist_ok=True)
+
+    # We have to do one bit of sanity checking to ensure that the optimzier
+    # info on disk manages the one we have recieved, otherwise we are unsure which
+    # optimizer is being used.
+    # NOTE: We assume that we do not have to worry about a race condition
+    # here where we have two different NePSState objects with two different optimizer
+    # infos trying to be created at the same time. This avoids the need to lock to
+    # check the optimizer info. If this assumption changes, then we would have
+    # to first lock before we do this check
+    optimizer_info_reader_writer = ReaderWriterOptimizerInfo()
+    if not is_new:
+        existing_info = optimizer_info_reader_writer.read(optimizer_info_dir)
+        if existing_info != optimizer_info:
+            raise NePSError(
+                "The optimizer info on disk does not match the one provided."
+                f"\nOn disk: {existing_info}\nProvided: {optimizer_info}"
+                f"\n\nLoaded the one on disk from {optimizer_info_dir}."
+            )
+
+    return NePSState(
+        location=str(directory.absolute().resolve()),
+        _trials=TrialRepoInDirectory(config_dir),
+        _optimizer_info=Synced.new_or_load(
+            data=optimizer_info,  # type: ignore
+            location=optimizer_info_dir,
+            versioner=FileVersioner(version_file=optimizer_info_dir / ".version"),
+            locker=FileLocker(
+                lock_path=optimizer_info_dir / ".lock",
+                poll=0.01,
+                timeout=None,
+            ),
+            reader_writer=ReaderWriterOptimizerInfo(),
+        ),
+        _seed_state=Synced.new_or_load(
+            data=SeedSnapshot.new_capture(),
+            location=seed_dir,
+            reader_writer=ReaderWriterSeedSnapshot(),
+            versioner=FileVersioner(version_file=seed_dir / ".version"),
+            locker=FileLocker(
+                lock_path=seed_dir / ".lock",
+                poll=SEED_SNAPSHOT_FILELOCK_POLL,
+                timeout=SEED_SNAPSHOT_FILELOCK_TIMEOUT,
+            ),
+        ),
+        _shared_errors=Synced.new_or_load(
+            data=ErrDump(),
+            location=error_dir,
+            reader_writer=ReaderWriterErrDump("all"),
+            versioner=FileVersioner(version_file=error_dir / ".all.version"),
+            locker=FileLocker(
+                lock_path=error_dir / ".all.lock",
+                poll=GLOBAL_ERR_FILELOCK_POLL,
+                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
+            ),
+        ),
+        _optimizer_state=Synced.new_or_load(
+            data=optimizer_state,
+            location=optimizer_state_dir,
+            reader_writer=ReaderWriterOptimizationState(),
+            versioner=FileVersioner(version_file=optimizer_state_dir / ".version"),
+            locker=FileLocker(
+                lock_path=optimizer_state_dir / ".lock",
+                poll=GLOBAL_ERR_FILELOCK_POLL,
+                timeout=GLOBAL_ERR_FILELOCK_TIMEOUT,
+            ),
+        ),
+    )
diff --git a/neps/state/neps_state.py b/neps/state/neps_state.py
new file mode 100644
index 00000000..8afaee62
--- /dev/null
+++ b/neps/state/neps_state.py
@@ -0,0 +1,231 @@
+"""The main state object that holds all the shared state objects.
+
+This object is used to interact with the shared state objects in a safe atomic
+manner, such that each worker can create an identical NePSState and interact with
+it without having to worry about locking or out-dated information.
+
+For an actual instantiation of this object, see
+[`create_or_load_filebased_neps_state`][neps.state.filebased.create_or_load_filebased_neps_state].
+"""
+
+from __future__ import annotations
+
+import logging
+import time
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Callable, Generic, TypeVar, overload
+
+from more_itertools import take
+
+from neps.state.err_dump import ErrDump
+from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.trial import Trial
+
+if TYPE_CHECKING:
+    from neps.optimizers.base_optimizer import BaseOptimizer
+    from neps.state.protocols import Synced, TrialRepo
+    from neps.state.seed_snapshot import SeedSnapshot
+
+logger = logging.getLogger(__name__)
+
+# TODO: Technically we don't need the same Location type for all shared objects.
+Loc = TypeVar("Loc")
+T = TypeVar("T")
+
+
+@dataclass
+class NePSState(Generic[Loc]):
+    """The main state object that holds all the shared state objects."""
+
+    location: str
+
+    _trials: TrialRepo[Loc] = field(repr=False)
+    _optimizer_info: Synced[OptimizerInfo, Loc]
+    _seed_state: Synced[SeedSnapshot, Loc] = field(repr=False)
+    _optimizer_state: Synced[OptimizationState, Loc]
+    _shared_errors: Synced[ErrDump, Loc] = field(repr=False)
+
+    def put_updated_trial(self, trial: Trial, /) -> None:
+        """Update the trial with the new information.
+
+        Args:
+            trial: The trial to update.
+
+        Raises:
+            VersionMismatchError: If the trial has been updated since it was last
+                fetched by the worker using this state. This indicates that some other
+                worker has updated the trial in the meantime and the changes from
+                this worker are rejected.
+        """
+        shared_trial = self._trials.get_by_id(trial.id)
+        shared_trial.put(trial)
+
+    def get_trial_by_id(self, trial_id: str, /) -> Trial:
+        """Get a trial by its id."""
+        return self._trials.get_by_id(trial_id).synced()
+
+    def get_trials_by_ids(self, trial_ids: list[str], /) -> dict[str, Trial | None]:
+        """Get trials by their ids."""
+        return {
+            _id: shared_trial.synced()
+            for _id, shared_trial in self._trials.get_by_ids(trial_ids).items()
+        }
+
+    def sample_trial(
+        self,
+        optimizer: BaseOptimizer,
+        *,
+        worker_id: str,
+        _sample_hooks: list[Callable] | None = None,
+    ) -> Trial:
+        """Sample a new trial from the optimizer.
+
+        Args:
+            optimizer: The optimizer to sample the trial from.
+            worker_id: The worker that is sampling the trial.
+            _sample_hooks: A list of hooks to apply to the optimizer before sampling.
+
+        Returns:
+            The new trial.
+        """
+        with self._optimizer_state.acquire() as (
+            opt_state,
+            put_opt,
+        ), self._seed_state.acquire() as (seed_state, put_seed_state):
+            trials: dict[Trial.ID, Trial] = {}
+            for trial_id, shared_trial in self._trials.all().items():
+                trial = shared_trial.synced()
+                trials[trial_id] = trial
+
+            seed_state.set_as_global_seed_state()
+
+            # TODO: Not sure if any existing pre_load hooks required
+            # it to be done after `load_results`... I hope not.
+            if _sample_hooks is not None:
+                for hook in _sample_hooks:
+                    optimizer = hook(optimizer)
+
+            # NOTE: We don't want optimizers mutating this before serialization
+            budget = opt_state.budget.clone() if opt_state.budget is not None else None
+            sampled_config, new_opt_state = optimizer.ask(
+                trials=trials,
+                budget_info=budget,
+                optimizer_state=opt_state.shared_state,
+            )
+
+            if sampled_config.previous_config_id is not None:
+                previous_trial = trials.get(sampled_config.previous_config_id)
+                if previous_trial is None:
+                    raise ValueError(
+                        f"Previous trial '{sampled_config.previous_config_id}' not found."
+                    )
+                previous_trial_location = previous_trial.metadata.location
+            else:
+                previous_trial_location = None
+
+            trial = Trial.new(
+                trial_id=sampled_config.id,
+                location="",  # HACK: This will be set by the `TrialRepo`
+                config=sampled_config.config,
+                previous_trial=sampled_config.previous_config_id,
+                previous_trial_location=previous_trial_location,
+                time_sampled=time.time(),
+                worker_id=worker_id,
+            )
+            shared_trial = self._trials.put_new(trial)
+            seed_state.recapture()
+            put_seed_state(seed_state)
+            put_opt(
+                OptimizationState(budget=opt_state.budget, shared_state=new_opt_state)
+            )
+
+        return trial
+
+    def report_trial_evaluation(
+        self,
+        trial: Trial,
+        report: Trial.Report,
+        optimizer: BaseOptimizer,
+        *,
+        worker_id: str,
+    ) -> None:
+        """Update the trial with the evaluation report and update the optimizer state
+        accordingly.
+
+        Args:
+            trial: The trial that was evaluated.
+            report: The evaluation report.
+            optimizer: The optimizer to update and get the state from
+            worker_id: The worker that evaluated the trial.
+        """
+        shared_trial = self._trials.get_by_id(trial.id)
+        # TODO: This would fail if some other worker has already updated the trial.
+
+        # IMPORTANT: We need to attach the report to the trial before updating the things.
+        trial.report = report
+        shared_trial.put(trial)
+        logger.debug("Updated trial '%s' with status '%s'", trial.id, trial.state)
+        with self._optimizer_state.acquire() as (opt_state, put_opt_state):
+            optimizer.update_state_post_evaluation(opt_state.shared_state, report)
+
+            # TODO: If an optimizer doesn't use the state, this is a waste of time.
+            # Update the budget if we have one.
+            if opt_state.budget is not None:
+                budget_info = opt_state.budget
+
+                if report.cost is not None:
+                    budget_info.used_cost_budget += report.cost
+            put_opt_state(opt_state)
+
+        if report.err is not None:
+            with self._shared_errors.acquire() as (errs, put_errs):
+                trial_err = ErrDump.SerializableTrialError(
+                    trial_id=trial.id,
+                    worker_id=worker_id,
+                    err_type=type(report.err).__name__,
+                    err=str(report.err),
+                    tb=report.tb,
+                )
+                errs.append(trial_err)
+                put_errs(errs)
+
+    def get_errors(self) -> ErrDump:
+        """Get all the errors that have occurred during the optimization."""
+        return self._shared_errors.synced()
+
+    @overload
+    def get_next_pending_trial(self) -> Trial | None: ...
+    @overload
+    def get_next_pending_trial(self, n: int | None = None) -> list[Trial]: ...
+
+    def get_next_pending_trial(self, n: int | None = None) -> Trial | list[Trial] | None:
+        """Get the next pending trial to evaluate.
+
+        Args:
+            n: The number of trials to get. If `None`, get the next trial.
+
+        Returns:
+            The next trial or a list of trials if `n` is not `None`.
+        """
+        _pending_itr = (
+            shared_trial.synced() for _, shared_trial in self._trials.pending()
+        )
+        if n is not None:
+            return take(n, _pending_itr)
+        return next(_pending_itr, None)
+
+    def all_trial_ids(self) -> set[Trial.ID]:
+        """Get all the trial ids that are known about."""
+        return self._trials.all_trial_ids()
+
+    def get_all_trials(self) -> dict[Trial.ID, Trial]:
+        """Get all the trials that are known about."""
+        return {_id: trial.synced() for _id, trial in self._trials.all().items()}
+
+    def optimizer_info(self) -> OptimizerInfo:
+        """Get the optimizer information."""
+        return self._optimizer_info.synced()
+
+    def optimizer_state(self) -> OptimizationState:
+        """Get the optimizer state."""
+        return self._optimizer_state.synced()
diff --git a/neps/state/optimizer.py b/neps/state/optimizer.py
new file mode 100644
index 00000000..f4000b07
--- /dev/null
+++ b/neps/state/optimizer.py
@@ -0,0 +1,57 @@
+"""Optimizer state and info dataclasses."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Mapping
+
+
+@dataclass
+class BudgetInfo:
+    """Information about the budget of an optimizer."""
+
+    max_cost_budget: float
+    used_cost_budget: float
+
+    @property
+    def remaining_cost_budget(self) -> float:
+        """The remaining budget."""
+        return self.max_cost_budget - self.used_cost_budget
+
+    def clone(self) -> BudgetInfo:
+        """Clone the budget info."""
+        return BudgetInfo(
+            max_cost_budget=self.max_cost_budget,
+            used_cost_budget=self.used_cost_budget,
+        )
+
+
+@dataclass
+class OptimizationState:
+    """The current state of an optimizer."""
+
+    budget: BudgetInfo | None
+    """Information regarind the budget used by the optimization trajectory."""
+
+    shared_state: dict[str, Any]
+    """Any information the optimizer wants to store between calls
+    to sample and post evaluations.
+
+    For example, an optimizer may wish to store running totals here or various other
+    bits of information that may be expensive to recompute.
+
+    Right now there's no support for tensors/arrays and almost no optimizer uses this
+    feature. Only cost-cooling uses information out of `.budget`.
+
+    Please reach out to @eddiebergman if you have a use case for this so we can make
+    it more robust.
+    """
+
+
+@dataclass
+class OptimizerInfo:
+    """Meta-information about an optimizer."""
+
+    # TODO(eddiebergman): What are the common keywords
+    # we can use that don't have to be crammed into mapping
+    info: Mapping[str, Any]
diff --git a/neps/state/protocols.py b/neps/state/protocols.py
new file mode 100644
index 00000000..78fcee0d
--- /dev/null
+++ b/neps/state/protocols.py
@@ -0,0 +1,560 @@
+"""This module defines the protocols used by
+[`NePSState`][neps.state.neps_state.NePSState] and
+[`Synced`][neps.state.synced.Synced] to ensure atomic operations to the state itself.
+"""
+
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, ClassVar, Generic, Iterable, Iterator, TypeVar
+from typing_extensions import Protocol, Self
+
+from neps.exceptions import (
+    LockFailedError,
+    TrialAlreadyExistsError,
+    TrialNotFoundError,
+    VersionedResourceAlreadyExistsError,
+    VersionedResourceDoesNotExistsError,
+    VersionedResourceRemovedError,
+    VersionMismatchError,
+)
+
+if TYPE_CHECKING:
+    from neps.state import Trial
+
+logger = logging.getLogger(__name__)
+
+T = TypeVar("T")
+K = TypeVar("K")
+
+# https://github.com/MaT1g3R/option/issues/40
+K2 = TypeVar("K2")
+T2 = TypeVar("T2")
+
+Loc_contra = TypeVar("Loc_contra", contravariant=True)
+
+
+class Versioner(Protocol):
+    """A versioner that can bump the version of a resource.
+
+    It should have some [`current()`][neps.state.protocols.Versioner.current] method
+    to give the current version tag of a resource and a
+    [`bump()`][neps.state.protocols.Versioner.bump] method to provide a new version tag.
+
+    These [`current()`][neps.state.protocols.Versioner.current] and
+    [`bump()`][neps.state.protocols.Versioner.bump] methods do not need to be atomic
+    but they should read/write to external state, i.e. file-system, database, etc.
+    """
+
+    def current(self) -> str | None:
+        """Return the current version as defined by the external state, i.e.
+        the version of the tag on disk.
+
+        Returns:
+            The current version if there is one written.
+        """
+        ...
+
+    def bump(self) -> str:
+        """Create a new external version tag.
+
+        Returns:
+            The new version tag.
+        """
+        ...
+
+
+class Locker(Protocol):
+    """A locker that can be used to communicate between workers."""
+
+    LockFailedError: ClassVar = LockFailedError
+
+    @contextmanager
+    def lock(self) -> Iterator[None]:
+        """Initiate the lock as a context manager, releasing it when done."""
+        ...
+
+    def is_locked(self) -> bool:
+        """Check if lock is...well, locked.
+
+        Should return True if the resource is locked, even if the lock is held by the
+        current worker/process.
+        """
+        ...
+
+
+class ReaderWriter(Protocol[T, Loc_contra]):
+    """A reader-writer that can read and write some resource T with location Loc.
+
+    For example, a `ReaderWriter[Trial, Path]` indicates a class that can read and write
+    trials, given some `Path`.
+    """
+
+    def read(self, loc: Loc_contra, /) -> T:
+        """Read the resource at the given location."""
+        ...
+
+    def write(self, value: T, loc: Loc_contra, /) -> None:
+        """Write the resource at the given location."""
+        ...
+
+
+class TrialRepo(Protocol[K]):
+    """A repository of trials.
+
+    The primary purpose of this protocol is to ensure consistent access to trial,
+    the ability to put in a new trial and know about the trials that are stored there.
+    """
+
+    TrialAlreadyExistsError: ClassVar = TrialAlreadyExistsError
+    TrialNotFoundError: ClassVar = TrialNotFoundError
+
+    def all_trial_ids(self) -> set[Trial.ID]:
+        """List all the trial ids in this trial Repo."""
+        ...
+
+    def get_by_id(self, trial_id: Trial.ID) -> Synced[Trial, K]:
+        """Get a trial by its id."""
+        ...
+
+    def get_by_ids(self, trial_ids: list[Trial.ID]) -> dict[str, Synced[Trial, K]]:
+        """Get trials by their ids."""
+        ...
+
+    def put_new(self, trial: Trial) -> Synced[Trial, K]:
+        """Put a new trial in the repo."""
+        ...
+
+    def all(self) -> dict[Trial.ID, Synced[Trial, K]]:
+        """Get all trials in the repo."""
+        ...
+
+    def pending(self) -> Iterable[tuple[Trial.ID, Synced[Trial, K]]]:
+        """Get all pending trials in the repo.
+
+        !!! note
+            This should return trials in the order in which they should be next evaluated,
+            usually the order in which they were put in the repo.
+        """
+        ...
+
+
+@dataclass
+class VersionedResource(Generic[T, K]):
+    """A resource that will be read if it needs to update to the latest version.
+
+    Relies on 3 main components:
+    * A [`Versioner`][neps.state.protocols.Versioner] to manage the versioning of the
+        resource.
+    * A [`ReaderWriter`][neps.state.protocols.ReaderWriter] to read and write the
+        resource.
+    * The location of the resource that can be used for the reader-writer.
+    """
+
+    VersionMismatchError: ClassVar = VersionMismatchError
+    VersionedResourceDoesNotExistsError: ClassVar = VersionedResourceDoesNotExistsError
+    VersionedResourceAlreadyExistsError: ClassVar = VersionedResourceAlreadyExistsError
+    VersionedResourceRemovedError: ClassVar = VersionedResourceRemovedError
+
+    _current: T
+    _location: K
+    _version: str
+    _versioner: Versioner
+    _reader_writer: ReaderWriter[T, K]
+
+    @staticmethod
+    def new(
+        *,
+        data: T2,
+        location: K2,
+        versioner: Versioner,
+        reader_writer: ReaderWriter[T2, K2],
+    ) -> VersionedResource[T2, K2]:
+        """Create a new VersionedResource.
+
+        This will create a new resource if it doesn't exist, otherwise,
+        if it already exists, it will raise an error.
+
+        Use [`load()`][neps.state.protocols.VersionedResource.load] if you want to
+        load an existing resource.
+
+        Args:
+            data: The data to be stored.
+            location: The location where the data will be stored.
+            versioner: The versioner to be used.
+            reader_writer: The reader-writer to be used.
+
+        Returns:
+            A new VersionedResource
+
+        Raises:
+            VersionedResourceAlreadyExistsError: If a versioned resource already exists
+                at the given location.
+        """
+        current_version = versioner.current()
+        if current_version is not None:
+            raise VersionedResourceAlreadyExistsError(
+                f"A versioend resource already already exists at '{location}'"
+                f" with version '{current_version}'"
+            )
+
+        version = versioner.bump()
+        reader_writer.write(data, location)
+        return VersionedResource(
+            _current=data,
+            _location=location,
+            _version=version,
+            _versioner=versioner,
+            _reader_writer=reader_writer,
+        )
+
+    @classmethod
+    def load(
+        cls,
+        *,
+        location: K2,
+        versioner: Versioner,
+        reader_writer: ReaderWriter[T2, K2],
+    ) -> VersionedResource[T2, K2]:
+        """Load an existing VersionedResource.
+
+        This will load an existing resource if it exists, otherwise, it will raise an
+        error.
+
+        Use [`new()`][neps.state.protocols.VersionedResource.new] if you want to
+        create a new resource.
+
+        Args:
+            location: The location of the resource.
+            versioner: The versioner to be used.
+            reader_writer: The reader-writer to be used.
+
+        Returns:
+            A VersionedResource
+
+        Raises:
+            VersionedResourceDoesNotExistsError: If no versioned resource exists at
+                the given location.
+        """
+        version = versioner.current()
+        if version is None:
+            raise cls.VersionedResourceDoesNotExistsError(
+                f"No versioned resource exists at '{location}'."
+            )
+        data = reader_writer.read(location)
+        return VersionedResource(
+            _current=data,
+            _location=location,
+            _version=version,
+            _versioner=versioner,
+            _reader_writer=reader_writer,
+        )
+
+    def sync_and_get(self) -> T:
+        """Get the data and version of the resource."""
+        self.sync()
+        return self._current
+
+    def sync(self) -> None:
+        """Sync the resource with the latest version."""
+        current_version = self._versioner.current()
+        if current_version is None:
+            raise self.VersionedResourceRemovedError(
+                f"Versioned resource at '{self._location}' has been removed!"
+                f" Last known version was '{self._version}'."
+            )
+
+        if self._version != current_version:
+            self._current = self._reader_writer.read(self._location)
+            self._version = current_version
+
+    def put(self, data: T) -> None:
+        """Put the data and version of the resource.
+
+        Raises:
+            VersionMismatchError: If the version of the resource is not the same as the
+                current version. This implies that the resource has been updated by
+                another worker.
+        """
+        current_version = self._versioner.current()
+        if self._version != current_version:
+            raise self.VersionMismatchError(
+                f"Version mismatch - ours: '{self._version}', remote: '{current_version}'"
+                f" Tried to put data at '{self._location}'. Doing so would overwrite"
+                " changes made by another worker. The solution is to pull the latest"
+                " version of the resource and try again."
+                " The most possible reasons for this error is that a lock was not"
+                " utilized when getting this resource before putting it back."
+            )
+
+        self._reader_writer.write(data, self._location)
+        self._current = data
+        self._version = self._versioner.bump()
+
+    def current(self) -> T:
+        """Get the current data of the resource."""
+        return self._current
+
+    def is_stale(self) -> bool:
+        """Check if the resource is stale."""
+        return self._version != self._versioner.current()
+
+    def location(self) -> K:
+        """Get the location of the resource."""
+        return self._location
+
+
+@dataclass
+class Synced(Generic[T, K]):
+    """Manages a versioned resource but it's methods also implement locking procedures
+    for accessing it.
+
+    Its types are parametrized by two type variables:
+
+    * `T` is the type of the data stored in the resource.
+    * `K` is the type of the location of the resource, for example `Path`
+
+    This wraps a [`VersionedResource`][neps.state.protocols.VersionedResource] and
+    additionally provides utility to perform atmoic operations on it using a
+    [`Locker`][neps.state.protocols.Locker].
+
+    This is used by [`NePSState`][neps.state.neps_state.NePSState] to manage the state
+    of trials and other shared resources.
+
+    It consists of 2 main components:
+
+    * A [`VersionedResource`][neps.state.protocols.VersionedResource] to manage the
+        versioning of the resource.
+    * A [`Locker`][neps.state.protocols.Locker] to manage the locking of the resource.
+
+    The primary methods to interact with a resource that is behined a `Synced` are:
+
+    * [`synced()`][neps.state.protocols.Synced.synced] to get the data of the resource
+        after syncing it to it's latest verison.
+    * [`acquire()`][neps.state.protocols.Synced.acquire] context manager to get latest
+        version of the data while also mainting a lock on it. This additionally provides
+        a `put()` operation to put the data back. This can primarily be used to get the
+        data, perform some mutation on it and then put it back, while not allowing other
+        workers access to the data.
+    """
+
+    LockFailedError: ClassVar = Locker.LockFailedError
+    VersionedResourceRemovedError: ClassVar = (
+        VersionedResource.VersionedResourceRemovedError
+    )
+    VersionMismatchError: ClassVar = VersionedResource.VersionMismatchError
+    VersionedResourceAlreadyExistsError: ClassVar = (
+        VersionedResource.VersionedResourceAlreadyExistsError
+    )
+    VersionedResourceDoesNotExistsError: ClassVar = (
+        VersionedResource.VersionedResourceDoesNotExistsError
+    )
+
+    _resource: VersionedResource[T, K]
+    _locker: Locker
+
+    @classmethod
+    def new(
+        cls,
+        *,
+        locker: Locker,
+        data: T2,
+        location: K2,
+        versioner: Versioner,
+        reader_writer: ReaderWriter[T2, K2],
+    ) -> Synced[T2, K2]:
+        """Create a new Synced resource.
+
+        This will create a new resource if it doesn't exist, otherwise,
+        if it already exists, it will raise an error.
+
+        Use [`load()`][neps.state.protocols.Synced.load] if you want to load an existing
+        resource. Use [`new_or_load()`][neps.state.protocols.Synced.new_or_load] if you
+        want to create a new resource if it doesn't exist, otherwise load an existing
+        resource.
+
+        Args:
+            locker: The locker to be used.
+            data: The data to be stored.
+            location: The location where the data will be stored.
+            versioner: The versioner to be used.
+            reader_writer: The reader-writer to be used.
+
+        Returns:
+            A new Synced resource.
+
+        Raises:
+            VersionedResourceAlreadyExistsError: If a versioned resource already exists
+                at the given location.
+        """
+        with locker.lock():
+            vr = VersionedResource.new(
+                data=data,
+                location=location,
+                versioner=versioner,
+                reader_writer=reader_writer,
+            )
+            return Synced(_resource=vr, _locker=locker)
+
+    @classmethod
+    def load(
+        cls,
+        *,
+        locker: Locker,
+        location: K2,
+        versioner: Versioner,
+        reader_writer: ReaderWriter[T2, K2],
+    ) -> Synced[T2, K2]:
+        """Load an existing Synced resource.
+
+        This will load an existing resource if it exists, otherwise, it will raise an
+        error.
+
+        Use [`new()`][neps.state.protocols.Synced.new] if you want to create a new
+        resource. Use [`new_or_load()`][neps.state.protocols.Synced.new_or_load] if you
+        want to create a new resource if it doesn't exist, otherwise load an existing
+        resource.
+
+        Args:
+            locker: The locker to be used.
+            location: The location of the resource.
+            versioner: The versioner to be used.
+            reader_writer: The reader-writer to be used.
+
+        Returns:
+            A Synced resource.
+
+        Raises:
+            VersionedResourceDoesNotExistsError: If no versioned resource exists at
+                the given location.
+        """
+        with locker.lock():
+            return Synced(
+                _resource=VersionedResource.load(
+                    location=location,
+                    versioner=versioner,
+                    reader_writer=reader_writer,
+                ),
+                _locker=locker,
+            )
+
+    @classmethod
+    def new_or_load(
+        cls,
+        *,
+        locker: Locker,
+        data: T2,
+        location: K2,
+        versioner: Versioner,
+        reader_writer: ReaderWriter[T2, K2],
+    ) -> Synced[T2, K2]:
+        """Create a new Synced resource if it doesn't exist, otherwise load it.
+
+        This will create a new resource if it doesn't exist, otherwise, it will load
+        an existing resource.
+
+        Use [`new()`][neps.state.protocols.Synced.new] if you want to create a new
+        resource and fail otherwise. Use [`load()`][neps.state.protocols.Synced.load]
+        if you want to load an existing resource and fail if it doesn't exist.
+
+        Args:
+            locker: The locker to be used.
+            data: The data to be stored.
+
+                !!! warning
+
+                    This will be ignored if the data already exists.
+
+            location: The location where the data will be stored.
+            versioner: The versioner to be used.
+            reader_writer: The reader-writer to be used.
+
+        Returns:
+            A Synced resource.
+        """
+        try:
+            return Synced.new(
+                locker=locker,
+                data=data,
+                location=location,
+                versioner=versioner,
+                reader_writer=reader_writer,
+            )
+        except VersionedResourceAlreadyExistsError:
+            return Synced.load(
+                locker=locker,
+                location=location,
+                versioner=versioner,
+                reader_writer=reader_writer,
+            )
+
+    def synced(self) -> T:
+        """Get the data of the resource atomically."""
+        with self._locker.lock():
+            return self._resource.sync_and_get()
+
+    def location(self) -> K:
+        """Get the location of the resource."""
+        return self._resource.location()
+
+    def put(self, data: T) -> None:
+        """Update the data atomically."""
+        with self._locker.lock():
+            self._resource.put(data)
+
+    @contextmanager
+    def acquire(self) -> Iterator[tuple[T, Callable[[T], None]]]:
+        """Acquire the lock and get the data of the resource.
+
+        This is a context manager that returns the data of the resource and a function
+        to put the data back.
+
+        !!! note
+            This is the primary way to get the resource, mutate it and put it back.
+            Otherwise you likely want [`synced()`][neps.state.protocols.Synced.synced]
+            or [`put()`][neps.state.protocols.Synced.put].
+
+        Yields:
+            A tuple containing the data of the resource and a function to put the data
+            back.
+        """
+        with self._locker.lock():
+            self._resource.sync()
+            yield self._resource.current(), self._put_unsafe
+
+    def deepcopy(self) -> Self:
+        """Create a deep copy of the shared resource."""
+        return deepcopy(self)
+
+    def _components(self) -> tuple[T, K, Versioner, ReaderWriter[T, K], Locker]:
+        """Get the components of the shared resource."""
+        return (
+            self._resource.current(),
+            self._resource.location(),
+            self._resource._versioner,
+            self._resource._reader_writer,
+            self._locker,
+        )
+
+    def _unsynced(self) -> T:
+        """Get the current data of the resource **without** locking and syncing it."""
+        return self._resource.current()
+
+    def _is_stale(self) -> bool:
+        """Check if the data held currently is not the latest version."""
+        return self._resource.is_stale()
+
+    def _is_locked(self) -> bool:
+        """Check if the resource is locked."""
+        return self._locker.is_locked()
+
+    def _put_unsafe(self, data: T) -> None:
+        """Put the data without checking for staleness or acquiring the lock.
+
+        !!! warning
+            This should only really be called if you know what you're doing.
+        """
+        self._resource.put(data)
diff --git a/neps/state/seed_snapshot.py b/neps/state/seed_snapshot.py
new file mode 100644
index 00000000..0f9fad87
--- /dev/null
+++ b/neps/state/seed_snapshot.py
@@ -0,0 +1,115 @@
+"""Snapshot of the global rng state."""
+
+from __future__ import annotations
+
+import contextlib
+import random
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, List, Tuple, Union
+from typing_extensions import TypeAlias
+
+import numpy as np
+
+if TYPE_CHECKING:
+    import torch
+
+    NP_RNG_STATE: TypeAlias = Tuple[str, np.ndarray, int, int, float]
+    PY_RNG_STATE: TypeAlias = Tuple[int, Tuple[int, ...], Union[int, None]]
+    TORCH_RNG_STATE: TypeAlias = torch.Tensor
+    TORCH_CUDA_RNG_STATE: TypeAlias = List[torch.Tensor]
+
+
+@dataclass
+class SeedSnapshot:
+    """State of the global rng.
+
+    Primarly enables storing of the rng state to disk using a binary format
+    native to each library, allowing for potential version mistmatches between
+    processes loading the state, as long as they can read the binary format.
+    """
+
+    np_rng: NP_RNG_STATE
+    py_rng: PY_RNG_STATE
+    torch_rng: TORCH_RNG_STATE | None
+    torch_cuda_rng: TORCH_CUDA_RNG_STATE | None
+
+    @classmethod
+    def new_capture(cls) -> SeedSnapshot:
+        """Current state of the global rng.
+
+        Takes a snapshot, including cloning or copying any arrays, tensors, etc.
+        """
+        self = cls(None, None, None, None)  # type: ignore
+        self.recapture()
+        return self
+
+    def recapture(self) -> None:
+        """Reread the state of the global rng into this snapshot."""
+        # https://numpy.org/doc/stable/reference/random/generated/numpy.random.get_state.html
+
+        self.py_rng = random.getstate()
+
+        np_keys = np.random.get_state(legacy=True)
+        assert np_keys[0] == "MT19937"  # type: ignore
+        self.np_rng = (np_keys[0], np_keys[1].copy(), *np_keys[2:])  # type: ignore
+
+        with contextlib.suppress(Exception):
+            import torch
+
+            self.torch_rng = torch.random.get_rng_state().clone()
+            torch_cuda_keys: list[torch.Tensor] | None = None
+            if torch.cuda.is_available():
+                torch_cuda_keys = [c.clone() for c in torch.cuda.get_rng_state_all()]
+            self.torch_cuda_rng = torch_cuda_keys
+
+    def set_as_global_seed_state(self) -> None:
+        """Set the global rng to the given state."""
+        np.random.set_state(self.np_rng)
+        random.setstate(self.py_rng)
+
+        if self.torch_rng is not None or self.torch_cuda_rng is not None:
+            import torch
+
+            if self.torch_rng is not None:
+                torch.random.set_rng_state(self.torch_rng)
+
+            if self.torch_cuda_rng is not None and torch.cuda.is_available():
+                torch.cuda.set_rng_state_all(self.torch_cuda_rng)
+
+    def __eq__(self, other: Any, /) -> bool:  # noqa: PLR0911
+        if not isinstance(other, SeedSnapshot):
+            return False
+
+        if not (self.py_rng == other.py_rng):
+            return False
+
+        if not (
+            self.np_rng[0] == other.np_rng[0]
+            and self.np_rng[2] == other.np_rng[2]
+            and self.np_rng[3] == other.np_rng[3]
+            and self.np_rng[4] == other.np_rng[4]
+        ):
+            return False
+
+        if not np.array_equal(self.np_rng[1], other.np_rng[1]):
+            return False
+
+        if self.torch_rng is not None and other.torch_rng is not None:
+            import torch
+
+            if not torch.equal(self.torch_rng, other.torch_rng):
+                return False
+
+        if self.torch_cuda_rng is not None and other.torch_cuda_rng is not None:
+            import torch
+
+            if not all(
+                torch.equal(a, b)
+                for a, b in zip(self.torch_cuda_rng, other.torch_cuda_rng)
+            ):
+                return False
+
+        if not isinstance(self.torch_rng, type(other.torch_rng)):
+            return False
+
+        return isinstance(self.torch_cuda_rng, type(other.torch_cuda_rng))
diff --git a/neps/state/settings.py b/neps/state/settings.py
new file mode 100644
index 00000000..f34a9435
--- /dev/null
+++ b/neps/state/settings.py
@@ -0,0 +1,171 @@
+"""Settings for the worker and the global state of NePS."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Literal
+
+
+@dataclass
+class DefaultReportValues:
+    """Values to use when an error occurs."""
+
+    loss_value_on_error: float | None = None
+    """The value to use for the loss when an error occurs."""
+
+    cost_value_on_error: float | None = None
+    """The value to use for the cost when an error occurs."""
+
+    cost_if_not_provided: float | None = None
+    """The value to use for the cost when the evaluation function does not provide one."""
+
+    learning_curve_on_error: list[float] | None = None
+    """The value to use for the learning curve when an error occurs.
+
+    If `'loss'`, the learning curve will be set to the loss value but as
+    a list with a single value.
+    """
+
+    learning_curve_if_not_provided: Literal["loss"] | list[float] | None = None
+    """The value to use for the learning curve when the evaluation function does
+    not provide one."""
+
+
+class OnErrorPossibilities(Enum):
+    """Possible values for what to do when an error occurs."""
+
+    RAISE_WORKER_ERROR = "raise_worker_error"
+    """Raise an error only if the error occurs in the worker."""
+
+    STOP_WORKER_ERROR = "stop_worker_error"
+    """Stop the worker if an error occurs in the worker, without raising"""
+
+    RAISE_ANY_ERROR = "raise_any_error"
+    """Raise an error if there was an error from any worker, i.e. there is a trial in the
+    NePSState that has an error."""
+
+    STOP_ANY_ERROR = "stop_any_error"
+    """Stop the workers if any error occured from any worker, i.e. there is a trial in the
+    NePSState that has an error."""
+
+    IGNORE = "ignore"
+    """Ignore all errors and continue running."""
+
+
+# TODO: We can extend this over time
+# For now this is what was needed for the backend state and workers.
+@dataclass
+class WorkerSettings:
+    """Settings for a running instance of NePS."""
+
+    # --------- Evaluation ---------
+    on_error: OnErrorPossibilities
+    """What to do when an error occurs.
+
+    - `'raise_worker_error'`: Raise an error only if the error occurs in the worker.
+    - `'raise_any_error'`: Raise an error if any error occurs from any worker, i.e.
+        there is a trial in the NePSState that has an error.
+    - `'ignore'`: Ignore all errors and continue running.
+    """
+
+    default_report_values: DefaultReportValues
+    """Values to use when an error occurs or was not specified."""
+
+    # --------- Global Stopping Criterion ---------
+    max_evaluations_total: int | None
+    """The maximum number of evaluations to run in total.
+
+    Once this evaluation total is reached, **all** workers will stop evaluating
+    new configurations.
+
+    To control whether currently evaluating configurations are included in this
+    total, see
+    [`include_in_progress_evaluations_towards_maximum`][neps.state.settings.WorkerSettings.include_in_progress_evaluations_towards_maximum].
+
+    If `None`, there is no limit and workers will continue to evaluate
+    indefinitely.
+    """
+
+    include_in_progress_evaluations_towards_maximum: bool
+    """Whether to include currently evaluating configurations towards the
+    stopping criterion
+    [`max_evaluations_total`][neps.state.settings.WorkerSettings.max_evaluations_total]
+    """
+
+    max_cost_total: float | None
+    """The maximum cost to run in total.
+
+    Once this cost total is reached, **all** workers will stop evaluating new
+    configurations.
+
+    This cost is the sum of `'cost'` values that are returned by evaluation
+    of the target function.
+
+    If `None`, there is no limit and workers will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
+
+    max_evaluation_time_total_seconds: float | None
+    """The maximum wallclock time allowed for evaluation in total.
+
+    !!! note
+        This does not include time for sampling new configurations.
+
+    Once this wallclock time is reached, **all** workers will stop once their
+    current evaluation is finished.
+
+    If `None`, there is no limit and workers will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
+
+    # --------- Local Worker Stopping Criterion ---------
+    max_evaluations_for_worker: int | None
+    """The maximum number of evaluations to run for the worker.
+
+    This count is specific to each worker spawned by NePS.
+    **only** the current worker will stop evaluating new configurations once
+    this limit is reached.
+
+    If `None`, there is no limit and this worker will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
+
+    max_cost_for_worker: float | None
+    """The maximum cost incurred by a worker before finisihng.
+
+    Once this cost total is reached, **only** this worker will stop evaluating new
+    configurations.
+
+    This cost is the sum of `'cost'` values that are returned by evaluation
+    of the target function.
+
+    If `None`, there is no limit and the worker will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
+
+    max_evaluation_time_for_worker_seconds: float | None
+    """The maximum time to allow this worker for evaluating configurations.
+
+    !!! note
+        This does not include time for sampling new configurations.
+
+    If `None`, there is no limit and this worker will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
+
+    max_wallclock_time_for_worker_seconds: float | None
+    """The maximum wallclock time to run for this worker.
+
+    Once this wallclock time is reached, **only** this worker will stop evaluating
+    new configurations.
+
+    !!! warning
+        This will not stop the worker if it is currently evaluating a configuration.
+
+    This is useful when the worker is deployed on some managed resource where
+    there is a time limit.
+
+    If `None`, there is no limit and this worker will continue to evaluate
+    indefinitely or until another stopping criterion is met.
+    """
diff --git a/neps/state/trial.py b/neps/state/trial.py
new file mode 100644
index 00000000..862e2bbb
--- /dev/null
+++ b/neps/state/trial.py
@@ -0,0 +1,289 @@
+"""A trial is a configuration and it's associated data."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import asdict, dataclass
+from enum import Enum
+from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, Mapping
+from typing_extensions import Self
+
+import numpy as np
+
+from neps.exceptions import NePSError
+from neps.utils.types import ConfigResult
+
+if TYPE_CHECKING:
+    from neps.search_spaces import SearchSpace
+    from neps.utils.types import ERROR, RawConfig
+
+
+logger = logging.getLogger(__name__)
+
+
+class NotReportedYetError(NePSError):
+    """Raised when trying to access a report that has not been reported yet."""
+
+
+class State(Enum):
+    """The state of a trial."""
+
+    PENDING = "pending"
+    SUBMITTED = "submitted"
+    EVALUATING = "evaluating"
+    SUCCESS = "success"
+    FAILED = "failed"
+    CRASHED = "crashed"
+    CORRUPTED = "corrupted"
+    UNKNOWN = "unknown"
+
+
+@dataclass
+class MetaData:
+    """Metadata for a trial."""
+
+    id: str
+    location: str
+    previous_trial_id: Trial.ID | None
+    previous_trial_location: str | None
+    sampling_worker_id: str
+    time_sampled: float
+
+    evaluating_worker_id: str | None = None
+    evaluation_duration: float | None = None
+
+    time_submitted: float | None = None
+    time_started: float | None = None
+    time_end: float | None = None
+
+
+@dataclass
+class Report:
+    """A failed report of the evaluation of a configuration."""
+
+    trial_id: Trial.ID
+    loss: float | None
+    cost: float | None
+    learning_curve: list[float] | None  # TODO: Serializing a large list into yaml sucks!
+    extra: Mapping[str, Any]
+    err: Exception | None
+    tb: str | None
+    reported_as: Literal["success", "failed", "crashed"]
+    evaluation_duration: float | None
+
+    def __post_init__(self) -> None:
+        if isinstance(self.err, str):
+            self.err = Exception(self.err)  # type: ignore
+
+    def to_deprecate_result_dict(self) -> dict[str, Any] | ERROR:
+        """Return the report as a dictionary."""
+        if self.reported_as == "success":
+            d = {"loss": self.loss, "cost": self.cost, **self.extra}
+
+            # HACK: Backwards compatibility. Not sure how much this is needed
+            # but it should be removed once optimizers stop calling the
+            # `get_loss`, `get_cost`, `get_learning_curve` methods of `BaseOptimizer`
+            # and just use the `Report` directly.
+            if "info_dict" not in d or "learning_curve" not in d["info_dict"]:
+                d.setdefault("info_dict", {})["learning_curve"] = self.learning_curve
+            return d
+
+        return "error"
+
+    def __eq__(self, value: Any, /) -> bool:
+        # HACK : Since it could be probably that one of loss or cost is nan,
+        # we need a custom comparator for this object
+        # HACK : We also have to skip over the `Err` object since when it's deserialized,
+        # we can not recover the original object/type.
+        if not isinstance(value, Report):
+            return False
+
+        other_items = value.__dict__
+        for k, v in self.__dict__.items():
+            other_v = other_items[k]
+
+            # HACK: Deserialization of `Err` means we can only compare
+            # the string representation of the error.
+            if k == "err":
+                if str(v) != str(other_v):
+                    return False
+            elif k in ("loss", "cost"):
+                if v is not None and np.isnan(v):
+                    if other_v is None or not np.isnan(other_v):
+                        return False
+                elif v != other_v:
+                    return False
+            elif v != other_v:
+                return False
+
+        return True
+
+
+@dataclass
+class Trial:
+    """A trial is a configuration and it's associated data."""
+
+    ID: ClassVar = str
+    State: ClassVar = State
+    Report: ClassVar = Report
+    MetaData: ClassVar = MetaData
+    NotReportedYetError: ClassVar = NotReportedYetError
+
+    config: Mapping[str, Any]
+    metadata: MetaData
+    state: State
+    report: Report | None
+
+    @classmethod
+    def new(
+        cls,
+        *,
+        trial_id: Trial.ID,
+        config: Mapping[str, Any],
+        location: str,
+        previous_trial: Trial.ID | None,
+        previous_trial_location: str | None,
+        time_sampled: float,
+        worker_id: int | str,
+    ) -> Self:
+        """Create a new trial object that was just sampled."""
+        worker_id = str(worker_id)
+        return cls(
+            state=State.PENDING,
+            config=config,
+            metadata=MetaData(
+                id=trial_id,
+                location=location,
+                time_sampled=time_sampled,
+                previous_trial_id=previous_trial,
+                previous_trial_location=previous_trial_location,
+                sampling_worker_id=worker_id,
+            ),
+            report=None,
+        )
+
+    @property
+    def id(self) -> Trial.ID:
+        """Return the id of the trial."""
+        return self.metadata.id
+
+    def into_config_result(
+        self,
+        config_to_search_space: Callable[[RawConfig], SearchSpace],
+    ) -> ConfigResult:
+        """Convert the trial and report to a `ConfigResult` object."""
+        if self.report is None:
+            raise self.NotReportedYetError("The trial has not been reported yet.")
+
+        result: dict[str, Any] | ERROR
+        if self.report.reported_as == "success":
+            result = {
+                **self.report.extra,
+                "loss": self.report.loss,
+                "cost": self.report.cost,
+            }
+        else:
+            result = "error"
+
+        return ConfigResult(
+            self.id,
+            config=config_to_search_space(self.config),
+            result=result,
+            metadata=asdict(self.metadata),
+        )
+
+    def set_submitted(self, *, time_submitted: float) -> None:
+        """Set the trial as submitted."""
+        self.metadata.time_submitted = time_submitted
+        self.state = State.SUBMITTED
+
+    def set_evaluating(self, *, time_started: float, worker_id: int | str) -> None:
+        """Set the trial as in progress."""
+        self.metadata.time_started = time_started
+        self.metadata.evaluating_worker_id = str(worker_id)
+        self.state = State.EVALUATING
+
+    def set_complete(
+        self,
+        *,
+        report_as: Literal["success", "failed", "crashed"],
+        time_end: float,
+        loss: float | None,
+        cost: float | None,
+        learning_curve: list[float] | None,
+        err: Exception | None,
+        tb: str | None,
+        extra: Mapping[str, Any] | None,
+        evaluation_duration: float | None,
+    ) -> Report:
+        """Set the report for the trial."""
+        if report_as == "success":
+            self.state = State.SUCCESS
+        elif report_as == "failed":
+            self.state = State.FAILED
+        elif report_as == "crashed":
+            self.state = State.CRASHED
+        else:
+            raise ValueError(f"Invalid report_as: '{report_as}'")
+
+        self.metadata.time_end = time_end
+        self.metadata.evaluation_duration = evaluation_duration
+
+        extra = {} if extra is None else extra
+
+        loss = float(loss) if loss is not None else None
+        cost = float(cost) if cost is not None else None
+        if learning_curve is not None:
+            learning_curve = [float(v) for v in learning_curve]
+
+        return Report(
+            trial_id=self.metadata.id,
+            reported_as=report_as,
+            evaluation_duration=evaluation_duration,
+            loss=loss,
+            cost=cost,
+            learning_curve=learning_curve,
+            extra=extra,
+            err=err,
+            tb=tb,
+        )
+
+    def set_corrupted(self) -> None:
+        """Set the trial as corrupted."""
+        self.state = State.CORRUPTED
+
+    def reset(self) -> None:
+        """Reset the trial to a pending state."""
+        self.state = State.PENDING
+        self.metadata = MetaData(
+            id=self.metadata.id,
+            location=self.metadata.location,
+            previous_trial_id=self.metadata.previous_trial_id,
+            previous_trial_location=self.metadata.previous_trial_location,
+            time_sampled=self.metadata.time_sampled,
+            sampling_worker_id=self.metadata.sampling_worker_id,
+        )
+
+
+def to_config_result(
+    trial: Trial,
+    report: Report,
+    config_to_search_space: Callable[[RawConfig], SearchSpace],
+) -> ConfigResult:
+    """Convert the trial and report to a `ConfigResult` object."""
+    result: dict[str, Any] | ERROR
+    if report.reported_as == "success":
+        result = {
+            **report.extra,
+            "loss": report.loss,
+            "cost": report.cost,
+        }
+    else:
+        result = "error"
+
+    return ConfigResult(
+        trial.id,
+        config=config_to_search_space(trial.config),
+        result=result,
+        metadata=asdict(trial.metadata),
+    )
diff --git a/neps/status/status.py b/neps/status/status.py
index 0199a911..e2f43eb6 100644
--- a/neps/status/status.py
+++ b/neps/status/status.py
@@ -3,13 +3,14 @@
 # ruff: noqa: T201
 from __future__ import annotations
 
-from itertools import chain
+from dataclasses import asdict
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import pandas as pd
 
-from neps.runtime import ErrorReport, SharedState, Trial
+from neps.state.filebased import load_filebased_neps_state
+from neps.state.trial import Trial
 from neps.utils._locker import Locker
 from neps.utils.types import ConfigID, _ConfigResultForStats
 
@@ -36,30 +37,34 @@ def get_summary_dict(
 
     # NOTE: We don't lock the shared state since we are just reading and don't need to
     # make decisions based on the state
-    shared_state = SharedState(root_directory)
-    shared_state.update_from_disk()
+    shared_state = load_filebased_neps_state(root_directory)
 
-    trials_by_state = shared_state.trials_by_state()
+    trials = shared_state.get_all_trials()
 
     evaluated: dict[ConfigID, _ConfigResultForStats] = {}
 
-    for trial in chain(
-        trials_by_state[Trial.State.SUCCESS],
-        trials_by_state[Trial.State.ERROR],
-    ):
-        assert trial.report is not None
+    for trial in trials.values():
+        if trial.report is None:
+            continue
+
         _result_for_stats = _ConfigResultForStats(
-            trial.id,
-            trial.config,
-            "error" if isinstance(trial.report, ErrorReport) else trial.report.results,
-            trial.metadata,
+            id=trial.id,
+            config=trial.config,
+            result=trial.report.to_deprecate_result_dict(),
+            metadata=asdict(trial.metadata),
         )
         evaluated[trial.id] = _result_for_stats
 
     in_progress = {
-        trial.id: trial.config for trial in trials_by_state[Trial.State.IN_PROGRESS]
+        trial.id: trial.config
+        for trial in trials.values()
+        if trial.State == Trial.State.EVALUATING
+    }
+    pending = {
+        trial.id: trial.config
+        for trial in trials.values()
+        if trial.State == Trial.State.PENDING
     }
-    pending = {trial.id: trial.config for trial in trials_by_state[Trial.State.PENDING]}
 
     summary: dict[str, Any] = {}
 
diff --git a/neps/utils/_rng.py b/neps/utils/_rng.py
deleted file mode 100644
index 0705837f..00000000
--- a/neps/utils/_rng.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from __future__ import annotations
-
-import json
-import random
-from contextlib import contextmanager
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterator, List, Tuple, Union
-from typing_extensions import TypeAlias
-
-import numpy as np
-import torch
-
-NP_RNG_STATE: TypeAlias = Tuple[str, np.ndarray, int, int, float]
-PY_RNG_STATE: TypeAlias = Tuple[int, Tuple[int, ...], Union[int, None]]
-TORCH_RNG_STATE: TypeAlias = torch.Tensor
-TORCH_CUDA_RNG_STATE: TypeAlias = List[torch.Tensor]
-
-
-@dataclass
-class SeedState:
-    """State of the global rng.
-
-    Primarly enables storing of the rng state to disk using a binary format
-    native to each library, allowing for potential version mistmatches between
-    processes loading the state, as long as they can read the binary format.
-    """
-
-    # It seems like they're all uint32 but I can't be sure.
-    PY_RNG_STATE_DTYPE = np.int64
-
-    np_rng: NP_RNG_STATE
-    py_rng: PY_RNG_STATE
-    torch_rng: TORCH_RNG_STATE
-    torch_cuda_rng: TORCH_CUDA_RNG_STATE | None
-
-    @classmethod
-    def get(cls) -> SeedState:
-        """Current state of the global rng.
-
-        Takes a snapshot, including cloning or copying any arrays, tensors, etc.
-        """
-        # https://numpy.org/doc/stable/reference/random/generated/numpy.random.get_state.html
-        np_keys = np.random.get_state(legacy=True)
-        assert np_keys[0] == "MT19937"  # type: ignore
-        np_keys = (np_keys[0], np_keys[1].copy(), *np_keys[2:])  # type: ignore
-
-        py_rng = random.getstate()
-        torch_rng = torch.random.get_rng_state().clone()
-        torch_cuda_keys: list[torch.Tensor] | None = None
-        if torch.cuda.is_available():
-            torch_cuda_keys = [c.clone() for c in torch.cuda.get_rng_state_all()]
-
-        return cls(
-            np_rng=np_keys,  # type: ignore
-            py_rng=py_rng,
-            torch_rng=torch_rng,
-            torch_cuda_rng=torch_cuda_keys,
-        )
-
-    def set_as_global_state(self) -> None:
-        """Set the global rng to the given state."""
-        np.random.set_state(self.np_rng)
-        random.setstate(self.py_rng)
-        torch.random.set_rng_state(self.torch_rng)
-        if self.torch_cuda_rng and torch.cuda.is_available():
-            torch.cuda.set_rng_state_all(self.torch_cuda_rng)
-
-    def dump(self, path: Path) -> None:
-        """Save the state to a directory."""
-        if path.exists():
-            assert path.is_dir()
-        else:
-            path.mkdir(parents=True)
-
-        py_rng_version, py_rng_state, py_guass_next = self.py_rng
-        np_rng_kind, np_rng_state, np_pos, np_has_gauss, np_cached_gauss = self.np_rng
-
-        seed_info = {
-            "np_rng_kind": np_rng_kind,
-            "np_pos": np_pos,
-            "np_has_gauss": np_has_gauss,
-            "np_cached_gauss": np_cached_gauss,
-            "py_rng_version": py_rng_version,
-            "py_guass_next": py_guass_next,
-        }
-
-        # NOTE(eddiebergman): Chose JSON since it's fast and non-injectable
-        with (path / "seed_info.json").open("w") as f:
-            json.dump(seed_info, f)
-
-        py_rng_state_arr = np.array(py_rng_state, dtype=self.PY_RNG_STATE_DTYPE)
-        with (path / "py_rng.npy").open("wb") as f:
-            py_rng_state_arr.tofile(f)
-
-        with (path / "np_rng_state.npy").open("wb") as f:
-            np_rng_state.tofile(f)
-
-        torch.save(self.torch_rng, path / "torch_rng_state.pt")
-
-        if self.torch_cuda_rng:
-            torch.save(self.torch_cuda_rng, path / "torch_cuda_rng_state.pt")
-
-    @classmethod
-    def load(cls, path: Path) -> SeedState:
-        assert path.is_dir()
-
-        with (path / "seed_info.json").open("r") as f:
-            seed_info = json.load(f)
-
-        # Load and set pythons rng
-        py_rng_state = tuple(
-            int(x) for x in np.fromfile(path / "py_rng.npy", dtype=cls.PY_RNG_STATE_DTYPE)
-        )
-        np_rng_state = np.fromfile(path / "np_rng_state.npy", dtype=np.uint32)
-
-        # By specifying `weights_only=True`, it disables arbitrary object loading
-        torch_rng_state = torch.load(path / "torch_rng_state.pt", weights_only=True)
-
-        torch_cuda_rng = None
-        torch_cuda_rng_path = path / "torch_cuda_rng_state.pt"
-        if torch_cuda_rng_path.exists():
-            # By specifying `weights_only=True`, it disables arbitrary object loading
-            torch_cuda_rng = torch.load(
-                path / "torch_cuda_rng_state.pt",
-                weights_only=True,
-            )
-
-        return cls(
-            np_rng=(
-                seed_info["np_rng_kind"],
-                np_rng_state,
-                seed_info["np_pos"],
-                seed_info["np_has_gauss"],
-                seed_info["np_cached_gauss"],
-            ),
-            py_rng=(
-                seed_info["py_rng_version"],
-                py_rng_state,
-                seed_info["py_guass_next"],
-            ),
-            torch_rng=torch_rng_state,
-            torch_cuda_rng=torch_cuda_rng,
-        )
-
-    @classmethod
-    @contextmanager
-    def use(
-        cls,
-        path: Path,
-        *,
-        update_on_exit: bool = True,
-    ) -> Iterator[SeedState]:
-        """Context manager to use a seed state.
-
-        If the path exists, load the seed state from the path and set it as the
-        global state. Otherwise, use the current global state.
-
-        Args:
-            path: Path to the seed state.
-            update_on_exit: If True, get the seed state after the context manager returns
-                and save it to the path.
-
-        Yields:
-            SeedState: The seed state in use.
-        """
-        if path.exists():
-            seed_state = cls.load(path)
-            seed_state.set_as_global_state()
-        else:
-            seed_state = cls.get()
-
-        yield seed_state
-
-        if update_on_exit:
-            cls.get().dump(path)
diff --git a/neps/utils/common.py b/neps/utils/common.py
index f80a01ea..2a9ca586 100644
--- a/neps/utils/common.py
+++ b/neps/utils/common.py
@@ -10,7 +10,7 @@
 import torch
 import yaml
 
-from neps.runtime import get_in_progress_trial
+from neps.runtime import get_in_progress_trial, get_workers_neps_state
 
 
 # TODO(eddiebergman): I feel like this function should throw an error if it can't
@@ -36,13 +36,10 @@ def load_checkpoint(
     """
     if directory is None:
         trial = get_in_progress_trial()
-
-        if trial is None:
-            return None
-
-        directory = trial.disk.previous_pipeline_dir
+        directory = trial.metadata.previous_trial_location
         if directory is None:
             return None
+        assert isinstance(directory, str)
 
     directory = Path(directory)
     checkpoint_path = (directory / checkpoint_name).with_suffix(".pth")
@@ -79,14 +76,7 @@ def save_checkpoint(
     """
     if directory is None:
         in_progress_trial = get_in_progress_trial()
-
-        if in_progress_trial is None:
-            raise ValueError(
-                "No current trial was found to save the checkpoint! This should not"
-                " happen. Please report this issue and in the meantime you may provide a"
-                " directory manually."
-            )
-        directory = in_progress_trial.pipeline_dir
+        directory = in_progress_trial.metadata.location
 
     directory = Path(directory)
     checkpoint_path = (directory / checkpoint_name).with_suffix(".pth")
@@ -115,8 +105,8 @@ def load_lightning_checkpoint(
     checkpoint data.
 
     Args:
-        previous_pipeline_directory: The previous pipeline directory.
         checkpoint_dir: The directory where checkpoint files are stored.
+        previous_pipeline_directory: The previous pipeline directory.
 
     Returns:
         A tuple containing the checkpoint path (str) and the loaded checkpoint data (dict)
@@ -124,9 +114,7 @@ def load_lightning_checkpoint(
     """
     if previous_pipeline_directory is None:
         trial = get_in_progress_trial()
-        if trial is not None:
-            previous_pipeline_directory = trial.disk.previous_pipeline_dir
-
+        previous_pipeline_directory = trial.metadata.previous_trial_location
         if previous_pipeline_directory is None:
             return None, None
 
@@ -151,6 +139,9 @@ def load_lightning_checkpoint(
     return checkpoint_path, checkpoint
 
 
+# TODO: We should have a better way to have a shared folder between trials.
+# Right now, the fidelity lineage is linear, however this will be a difficulty
+# when/if we have a tree structure.
 def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path:
     """Find the initial directory based on its existence and the presence of
     the "previous_config.id" file.
@@ -161,35 +152,24 @@ def get_initial_directory(pipeline_directory: Path | str | None = None) -> Path:
     Returns:
         The initial directory.
     """
+    neps_state = get_workers_neps_state()
     if pipeline_directory is not None:
         pipeline_directory = Path(pipeline_directory)
+        # TODO: Hard coded assumption
+        config_id = pipeline_directory.name.split("_", maxsplit=1)[-1]
+        trial = neps_state.get_trial_by_id(config_id)
     else:
         trial = get_in_progress_trial()
-        if trial is None:
-            raise ValueError(
-                "No current trial was found to get the initial directory! This should not"
-                " happen. Please report this issue and in the meantime you may provide"
-                " a directory manually."
-            )
-        pipeline_directory = trial.pipeline_dir
-
-    # TODO(eddiebergman): Can we just make this a method of the Trial class somehow?
-    # This relies on the fact it's always called "previous_config.id" which could subtly
-    # break, if it were to be updated.
 
     # Recursively find the initial directory
-    current_pipeline_directory = pipeline_directory
-    while True:
-        previous_pipeline_directory_id = current_pipeline_directory / "previous_config.id"
-        if not previous_pipeline_directory_id.exists():
-            # Initial directory found
-            return pipeline_directory
+    while (prev_trial_id := trial.metadata.previous_trial_id) is not None:
+        trial = neps_state.get_trial_by_id(prev_trial_id)
 
-        optim_result_dir = pipeline_directory.parent
-        with previous_pipeline_directory_id.open("r") as config_id_file:
-            config_id = config_id_file.read()
+    initial_dir = trial.metadata.location
 
-        current_pipeline_directory = optim_result_dir / f"config_{config_id}"
+    # TODO: Hard coded assumption that we are operating in a filebased neps
+    assert isinstance(initial_dir, str)
+    return Path(initial_dir)
 
 
 def get_searcher_data(
@@ -363,14 +343,14 @@ def instance_from_map(  # noqa: C901, PLR0912
 
     # Give the arguments to the class
     if args_dict:
-        instance = partial(instance, **args_dict)
+        instance = partial(instance, **args_dict)  # type: ignore
 
     if as_class:
         return instance
 
     if is_partial_class(instance):
         try:
-            instance = instance()
+            instance = instance()  # type: ignore
         except TypeError as e:
             raise TypeError(f"{e} when calling {instance} with {args_dict}") from e
 
diff --git a/neps/utils/data_loading.py b/neps/utils/data_loading.py
index 0bdb15e3..a0f86210 100644
--- a/neps/utils/data_loading.py
+++ b/neps/utils/data_loading.py
@@ -5,6 +5,7 @@
 import json
 import os
 import re
+from dataclasses import asdict
 from itertools import chain
 from pathlib import Path
 from typing import Any, Mapping, TypedDict
@@ -12,7 +13,7 @@
 import numpy as np
 import yaml
 
-from neps.runtime import ErrorReport, SharedState, Trial
+from neps.state.filebased import load_filebased_neps_state
 from neps.utils.types import ERROR, ConfigID, ResultDict, _ConfigResultForStats
 
 
@@ -135,24 +136,20 @@ def read_tasks_and_dev_stages_from_disk(
             if dev_id is None:
                 continue
 
-            state = SharedState(Path(dev_dir_path))
-            state.update_from_disk()
-            trials_by_state = state.trials_by_state()
+            state = load_filebased_neps_state(Path(dev_dir_path))
+            trials = state.get_all_trials()
 
             evaluated: dict[ConfigID, _ConfigResultForStats] = {}
 
-            for trial in chain(
-                trials_by_state[Trial.State.SUCCESS],
-                trials_by_state[Trial.State.ERROR],
-            ):
-                assert trial.report is not None
+            for trial in trials.values():
+                if trial.report is None:
+                    continue
+
                 _result_for_stats = _ConfigResultForStats(
                     trial.id,
                     trial.config,
-                    "error"
-                    if isinstance(trial.report, ErrorReport)
-                    else trial.report.results,
-                    trial.metadata,
+                    trial.report.to_deprecate_result_dict(),
+                    asdict(trial.metadata),
                 )
                 evaluated[trial.id] = _result_for_stats
 
@@ -181,27 +178,24 @@ def read_user_prior_results_from_disk(
         if not prior_dir.is_dir():
             continue
 
-        state = SharedState(prior_dir)
-        with state.sync(lock=False):
-            evaluated: dict[ConfigID, _ConfigResultForStats] = {}
-            trials_by_state = state.trials_by_state()
+        state = load_filebased_neps_state(Path(prior_dir))
+        trials = state.get_all_trials()
+        evaluated: dict[ConfigID, _ConfigResultForStats] = {}
 
-            for trial in chain(
-                trials_by_state[Trial.State.SUCCESS],
-                trials_by_state[Trial.State.ERROR],
-            ):
-                assert trial.report is not None
-                _result_for_stats = _ConfigResultForStats(
-                    trial.id,
-                    trial.config,
-                    "error"
-                    if isinstance(trial.report, ErrorReport)
-                    else trial.report.results,
-                    trial.metadata,
-                )
-                evaluated[trial.id] = _result_for_stats
+        for trial in trials.values():
+            if trial.report is None:
+                continue
 
-            results[prior_dir.name] = evaluated
+            assert trial.report is not None
+            _result_for_stats = _ConfigResultForStats(
+                trial.id,
+                trial.config,
+                trial.report.to_deprecate_result_dict(),
+                asdict(trial.metadata),
+            )
+            evaluated[trial.id] = _result_for_stats
+
+        results[prior_dir.name] = evaluated
 
     return results
 
@@ -328,25 +322,22 @@ def summarize_results(  # noqa: C901
             # TODO(unknown): only use IDs if provided
             final_results = results[final_task_id][final_dev_id]
         else:
-            state = SharedState(Path(seed_dir))
-            with state.sync(lock=False):
-                trials_by_state = state.trials_by_state()
-
-                final_results = {}
-                for trial in chain(
-                    trials_by_state[Trial.State.SUCCESS],
-                    trials_by_state[Trial.State.ERROR],
-                ):
-                    assert trial.report is not None
-                    _result_for_stats = _ConfigResultForStats(
-                        trial.id,
-                        trial.config,
-                        "error"
-                        if isinstance(trial.report, ErrorReport)
-                        else trial.report.results,
-                        trial.metadata,
-                    )
-                    final_results[trial.id] = _result_for_stats
+            state = load_filebased_neps_state(Path(seed_dir))
+            trials = state.get_all_trials()
+
+            final_results = {}
+            for trial in trials.values():
+                if trial.report is None:
+                    continue
+
+                assert trial.report is not None
+                _result_for_stats = _ConfigResultForStats(
+                    trial.id,
+                    trial.config,
+                    trial.report.to_deprecate_result_dict(),
+                    asdict(trial.metadata),
+                )
+                final_results[trial.id] = _result_for_stats
 
         # This part is copied from neps.status()
         best_loss: float = float("inf")
diff --git a/neps/utils/files.py b/neps/utils/files.py
index 0111f2a7..ddb0627c 100644
--- a/neps/utils/files.py
+++ b/neps/utils/files.py
@@ -2,35 +2,47 @@
 
 from __future__ import annotations
 
+import dataclasses
+from enum import Enum
 from pathlib import Path
 from typing import Any, Iterable, Mapping
 
 import yaml
 
 
-def _serializable_format(data: Any) -> Any:
+def serializable_format(data: Any) -> Any:  # noqa: PLR0911
+    """Format data to be serializable."""
     if hasattr(data, "serialize"):
-        return _serializable_format(data.serialize())
+        return serializable_format(data.serialize())
+
+    if dataclasses.is_dataclass(data) and not isinstance(data, type):
+        return serializable_format(dataclasses.asdict(data))  # type: ignore
+
+    if isinstance(data, Exception):
+        return str(data)
+
+    if isinstance(data, Enum):
+        return data.value
 
     if isinstance(data, Mapping):
-        return {key: _serializable_format(val) for key, val in data.items()}
+        return {key: serializable_format(val) for key, val in data.items()}
 
     if not isinstance(data, str) and isinstance(data, Iterable):
-        return [_serializable_format(val) for val in data]
+        return [serializable_format(val) for val in data]
 
     if type(data).__module__ in ["numpy", "torch"]:
         data = data.tolist()  # type: ignore
         if type(data).__module__ == "numpy":
             data = data.item()
 
-        return _serializable_format(data)
+        return serializable_format(data)
 
     return data
 
 
 def serialize(data: Any, path: Path | str, *, sort_keys: bool = True) -> None:
     """Serialize data to a yaml file."""
-    data = _serializable_format(data)
+    data = serializable_format(data)
     path = Path(path)
     with path.open("w") as file_stream:
         try:
@@ -45,7 +57,15 @@ def serialize(data: Any, path: Path | str, *, sort_keys: bool = True) -> None:
 def deserialize(path: Path | str) -> dict[str, Any]:
     """Deserialize data from a yaml file."""
     with Path(path).open("r") as file_stream:
-        return yaml.full_load(file_stream)  # type: ignore
+        data = yaml.full_load(file_stream)  # type: ignore
+
+    if not isinstance(data, dict):
+        raise TypeError(
+            f"Deserialized data at {path} is not a dictionary!"
+            f" Got {type(data)} instead.\n{data}"
+        )
+
+    return data
 
 
 def empty_file(file_path: Path) -> bool:
diff --git a/neps/utils/types.py b/neps/utils/types.py
index 3316eb03..a6b6c540 100644
--- a/neps/utils/types.py
+++ b/neps/utils/types.py
@@ -7,18 +7,15 @@
 from typing_extensions import TypeAlias
 
 import numpy as np
-import torch
 
 if TYPE_CHECKING:
     from neps.search_spaces.search_space import SearchSpace
+    from neps.state.trial import Trial
 
 # TODO(eddiebergman): We can turn this to an enum at some
 # point to prevent having to isinstance and str match
 ERROR: TypeAlias = Literal["error"]
-
 Number: TypeAlias = Union[int, float, np.number]
-Array: TypeAlias = Union[np.ndarray, torch.Tensor]
-
 ConfigID: TypeAlias = str
 RawConfig: TypeAlias = Mapping[str, Any]
 Metadata: TypeAlias = Dict[str, Any]
@@ -54,9 +51,7 @@ class ConfigResult:
     config: SearchSpace
     """Configuration that was evaluated."""
 
-    # TODO(eddiebergman): Check about using a `TypedDict` here since I'm pretty sure
-    # there's always a "loss" key
-    result: ResultDict | ERROR
+    result: Trial.Report | ResultDict | ERROR
     """Some dictionary of results."""
 
     metadata: dict
diff --git a/neps/utils/validation.py b/neps/utils/validation.py
new file mode 100644
index 00000000..884df0c5
--- /dev/null
+++ b/neps/utils/validation.py
@@ -0,0 +1,31 @@
+"""Validation utilities for the NePS package."""
+
+from __future__ import annotations
+
+import inspect
+from typing import Any, Callable
+
+from neps.exceptions import NePSError
+
+
+class DeprecatedArgumentError(NePSError):
+    """Raised when a deprecated argument is used."""
+
+
+def validate_run_pipeline_arguments(f: Callable[..., Any]) -> None:
+    """Validate the arguments of a run pipeline function to see if deprcated arguments
+    are used.
+    """
+    evaluation_fn_params = inspect.signature(f).parameters
+    if "previous_working_directory" in evaluation_fn_params:
+        raise RuntimeError(
+            "the argument: 'previous_working_directory' was deprecated. "
+            f"In the function: '{f.__name__}', please,  "
+            "use 'previous_pipeline_directory' instead. ",
+        )
+    if "working_directory" in evaluation_fn_params:
+        raise RuntimeError(
+            "the argument: 'working_directory' was deprecated. "
+            f"In the function: '{f.__name__}', please,  "
+            "use 'pipeline_directory' instead. ",
+        )
diff --git a/neps_examples/basic_usage/hyperparameters.py b/neps_examples/basic_usage/hyperparameters.py
index b254e16b..164b49cb 100644
--- a/neps_examples/basic_usage/hyperparameters.py
+++ b/neps_examples/basic_usage/hyperparameters.py
@@ -8,7 +8,7 @@
 
 def run_pipeline(float1, float2, categorical, integer1, integer2):
     loss = -float(np.sum([float1, float2, int(categorical), integer1, integer2]))
-    time.sleep(0.7)  # For demonstration purposes
+    # time.sleep(0.7)  # For demonstration purposes
     return loss
 
 
diff --git a/neps_examples/efficiency/multi_fidelity.py b/neps_examples/efficiency/multi_fidelity.py
index 0731b1b5..bdbcc965 100644
--- a/neps_examples/efficiency/multi_fidelity.py
+++ b/neps_examples/efficiency/multi_fidelity.py
@@ -82,7 +82,7 @@ def run_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate,
     run_pipeline=run_pipeline,
     pipeline_space=pipeline_space,
     root_directory="results/multi_fidelity_example",
-    # Optional: Do not start another evaluation after <=100 epochs, corresponds to cost
+    # Optional: Do not start another evaluation after <=50 epochs, corresponds to cost
     # field above.
-    max_cost_total=100,
+    max_cost_total=50,
 )
diff --git a/pyproject.toml b/pyproject.toml
index 37cf020d..06b4baa4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,7 @@ ruff = "^0.4"
 pre-commit = "^3"
 mypy = "^1"
 pytest = "^7"
+pytest-cases = "^3"
 types-PyYAML = "^6"
 mkdocs-material = "*"
 mkdocs-autorefs = "*"
@@ -261,7 +262,7 @@ convention = "google"
 max-args = 10 # Changed from default of 5
 
 [tool.pytest.ini_options]
-addopts = "--basetemp ./tests_tmpdir -m 'neps_api or core_examples'"
+addopts = "--basetemp ./tests_tmpdir -m 'not ci_examples'"
 markers = [
   "ci_examples",
   "core_examples",
@@ -302,7 +303,6 @@ module = [
   "neps.api",
   "neps.optimizers.*",
   "neps.search_spaces.architecture.*",
-  "neps.search_spaces.yaml_search_space_utils",
   "neps.utils.run_args_from_yaml",
 ]
 ignore_errors = true
diff --git a/tests/test_neps_api/test_api.py b/tests/test_neps_api/test_api.py
index a50b91d1..32408007 100644
--- a/tests/test_neps_api/test_api.py
+++ b/tests/test_neps_api/test_api.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import logging
 import os
 import runpy
@@ -54,11 +56,11 @@ def test_default_examples(tmp_path):
 
         assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
 
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info.yaml")
+        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
 
         assert os.path.exists(
             str(info_yaml_path)
-        ), f"File does not exist: {info_yaml_path}"
+        ), f"File does not exist: {info_yaml_path}\n{os.listdir(folder_path)}"
 
         # Load the YAML file
         with open(str(info_yaml_path)) as yaml_config:
@@ -85,7 +87,7 @@ def test_baseoptimizer_examples(tmp_path):
 
         assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
 
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info.yaml")
+        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
 
         assert os.path.exists(
             str(info_yaml_path)
@@ -114,7 +116,7 @@ def test_user_created_yaml_examples(tmp_path):
 
         assert os.path.exists(folder_path), f"Directory does not exist: {folder_path}"
 
-        info_yaml_path = os.path.join(folder_path, ".optimizer_info.yaml")
+        info_yaml_path = os.path.join(folder_path, ".optimizer_info", "info.yaml")
 
         assert os.path.exists(
             str(info_yaml_path)
diff --git a/tests/test_runtime/__init__.py b/tests/test_runtime/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_runtime/test_default_report_values.py b/tests/test_runtime/test_default_report_values.py
new file mode 100644
index 00000000..652db9de
--- /dev/null
+++ b/tests/test_runtime/test_default_report_values.py
@@ -0,0 +1,170 @@
+from __future__ import annotations
+
+from pathlib import Path
+from pytest_cases import fixture
+
+from neps.optimizers.random_search.optimizer import RandomSearch
+from neps.runtime import DefaultWorker
+from neps.search_spaces.search_space import SearchSpace
+from neps.state.filebased import create_or_load_filebased_neps_state
+from neps.state.neps_state import NePSState
+from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
+from neps.search_spaces import FloatParameter
+from neps.state.trial import Trial
+
+
+@fixture
+def neps_state(tmp_path: Path) -> NePSState[Path]:
+    return create_or_load_filebased_neps_state(
+        directory=tmp_path / "neps_state",
+        optimizer_info=OptimizerInfo(info={"nothing": "here"}),
+        optimizer_state=OptimizationState(budget=None, shared_state={}),
+    )
+
+
+def test_default_values_on_error(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(
+            loss_value_on_error=2.4,  # <- Highlight
+            cost_value_on_error=2.4,  # <- Highlight
+            learning_curve_on_error=[2.4, 2.5],  # <- Highlight
+        ),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        raise ValueError("This is an error")
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    trials = neps_state.get_all_trials()
+    n_crashed = sum(
+        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+    )
+    assert len(trials) == 1
+    assert n_crashed == 1
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 1
+
+    trial = trials.popitem()[1]
+    assert trial.state == Trial.State.CRASHED
+    assert trial.report is not None
+    assert trial.report.loss == 2.4
+    assert trial.report.cost == 2.4
+    assert trial.report.learning_curve == [2.4, 2.5]
+
+
+def test_default_values_on_not_specified(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(
+            cost_if_not_provided=2.4,
+            learning_curve_if_not_provided=[2.4, 2.5],
+        ),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    trials = neps_state.get_all_trials()
+    n_sucess = sum(
+        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+    )
+    assert len(trials) == 1
+    assert n_sucess == 1
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+    trial = trials.popitem()[1]
+    assert trial.state == Trial.State.SUCCESS
+    assert trial.report is not None
+    assert trial.report.cost == 2.4
+    assert trial.report.learning_curve == [2.4, 2.5]
+
+
+def test_default_value_loss_curve_take_loss_value(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(learning_curve_if_not_provided="loss"),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    LOSS = 1.0
+
+    def eval_function(*args, **kwargs) -> float:
+        return LOSS
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    trials = neps_state.get_all_trials()
+    n_sucess = sum(
+        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+    )
+    assert len(trials) == 1
+    assert n_sucess == 1
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+    trial = trials.popitem()[1]
+    assert trial.state == Trial.State.SUCCESS
+    assert trial.report is not None
+    assert trial.report.learning_curve == [LOSS]
diff --git a/tests/test_runtime/test_error_handling_strategies.py b/tests/test_runtime/test_error_handling_strategies.py
new file mode 100644
index 00000000..5e819448
--- /dev/null
+++ b/tests/test_runtime/test_error_handling_strategies.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+import pytest
+from dataclasses import dataclass
+from pandas.core.common import contextlib
+from pathlib import Path
+from pytest_cases import fixture, parametrize
+
+from neps.optimizers.random_search.optimizer import RandomSearch
+from neps.runtime import DefaultWorker
+from neps.search_spaces.search_space import SearchSpace
+from neps.state.err_dump import SerializedError
+from neps.state.filebased import create_or_load_filebased_neps_state
+from neps.state.neps_state import NePSState
+from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
+from neps.search_spaces import FloatParameter
+from neps.state.trial import Trial
+
+
+@fixture
+def neps_state(tmp_path: Path) -> NePSState[Path]:
+    return create_or_load_filebased_neps_state(
+        directory=tmp_path / "neps_state",
+        optimizer_info=OptimizerInfo(info={"nothing": "here"}),
+        optimizer_state=OptimizationState(budget=None, shared_state={}),
+    )
+
+
+@parametrize(
+    "on_error",
+    [OnErrorPossibilities.RAISE_ANY_ERROR, OnErrorPossibilities.RAISE_WORKER_ERROR],
+)
+def test_worker_raises_when_error_in_self(
+    neps_state: NePSState,
+    on_error: OnErrorPossibilities,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=on_error,  # <- Highlight
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        raise ValueError("This is an error")
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    with pytest.raises(ValueError, match="This is an error"):
+        worker.run()
+
+    trials = neps_state.get_all_trials()
+    n_crashed = sum(
+        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+    )
+    assert len(trials) == 1
+    assert n_crashed == 1
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 1
+
+
+def test_worker_raises_when_error_in_other_worker(neps_state: NePSState) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.RAISE_ANY_ERROR,  # <- Highlight
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def evaler(*args, **kwargs) -> float:
+        raise ValueError("This is an error")
+
+    worker1 = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=evaler,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker2 = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=evaler,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+
+    # Worker1 should run 1 and error out
+    with contextlib.suppress(ValueError):
+        worker1.run()
+
+    # Worker2 should not run and immeditaly error out, however
+    # it will have loaded in a serialized error
+    with pytest.raises(SerializedError):
+        worker2.run()
+
+    trials = neps_state.get_all_trials()
+    n_crashed = sum(
+        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+    )
+    assert len(trials) == 1
+    assert n_crashed == 1
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 1
+
+
+@pytest.mark.parametrize(
+    "on_error",
+    [OnErrorPossibilities.IGNORE, OnErrorPossibilities.RAISE_WORKER_ERROR],
+)
+def test_worker_does_not_raise_when_error_in_other_worker(
+    neps_state: NePSState,
+    on_error: OnErrorPossibilities,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.RAISE_WORKER_ERROR,  # <- Highlight
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=1,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    @dataclass
+    class _Eval:
+        do_raise: bool
+
+        def __call__(self, *args, **kwargs) -> float:
+            if self.do_raise:
+                raise ValueError("This is an error")
+            return 10
+
+    evaler = _Eval(do_raise=True)
+
+    worker1 = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=evaler,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker2 = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=evaler,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+
+    # Worker1 should run 1 and error out
+    evaler.do_raise = True
+    with contextlib.suppress(ValueError):
+        worker1.run()
+    assert worker1.worker_cumulative_eval_count == 1
+
+    # Worker2 should run successfully
+    evaler.do_raise = False
+    worker2.run()
+    assert worker2.worker_cumulative_eval_count == 1
+
+    trials = neps_state.get_all_trials()
+    n_success = sum(
+        trial.state == Trial.State.SUCCESS is not None for trial in trials.values()
+    )
+    n_crashed = sum(
+        trial.state == Trial.State.CRASHED is not None for trial in trials.values()
+    )
+    assert n_success == 1
+    assert n_crashed == 1
+    assert len(trials) == 2
+
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 1
diff --git a/tests/test_runtime/test_locking.py b/tests/test_runtime/test_locking.py
deleted file mode 100644
index a79dfa23..00000000
--- a/tests/test_runtime/test_locking.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import re
-import shutil
-import subprocess
-from pathlib import Path
-
-import pandas as pd
-import pytest
-from more_itertools import first_true
-
-
-def launch_example_processes(n_workers: int = 3) -> list:
-    processes = []
-    for _ in range(n_workers):
-        processes.append(
-            subprocess.Popen(
-                "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
-                stdout=subprocess.PIPE,
-                shell=True,
-                text=True,
-            )
-        )
-    return processes
-
-
-@pytest.mark.runtime
-def test_filelock() -> None:
-    """Test that the filelocking method of parallelization works as intended."""
-    # Note: Not using tmpdir
-    #
-    #   Unfortunatly we can't control this from launching the subprocess so we handle
-    #   clean up manualy. This is likely to break if doing multi-processing testing
-    #   with something like pytest-forked
-    #
-    # Note: dependancy on basic_usage example
-    #
-    #   Not a great idea incase the example changes, ideally each process here would
-    #   perform some predefined operation which is known to this test. If the example
-    #   changes in some unexpected way, it may break this test
-    results_dir = Path("results") / "hyperparameters_example" / "results"
-    assert not results_dir.exists(), "Please delete this directory before running the test"
-    try:
-        # Wait for them
-        p_list = launch_example_processes(n_workers=2)
-        for p in p_list:
-            p.wait()
-            out, _ = p.communicate()
-            lines = out.splitlines()
-
-            pending_re = r"#Pending configs with worker:\s+(\d+)"
-            eval_re = r"#Evaluated configs:\s+(\d+)"
-
-            evaluated = first_true((re.match(eval_re, l) for l in lines), default=0)  # noqa
-            pending = first_true((re.match(pending_re, l) for l in lines), default=0)  # noqa
-
-            assert evaluated is not None
-            assert pending is not None
-
-            if evaluated == 0:
-                evaluated_configs = 0
-            else:
-                evaluated_configs = int(evaluated.groups()[0])  # type: ignore
-
-            if pending == 0:
-                pending_configs = 0
-            else:
-                pending_configs = int(pending.groups()[0])  # type: ignore
-
-            # Make sure the evaluated configs and the ones pending add up to 15
-            assert evaluated_configs + pending_configs == 15
-
-        # Make sure there are 15 completed configurations
-        expected = sorted(f"config_{i}" for i in range(1, 16))
-        folders = sorted(f.name for f in results_dir.iterdir())
-        assert folders == expected
-
-    except Exception as e:
-        raise e
-    finally:
-        if results_dir.exists():
-            shutil.rmtree(results_dir.parent)
-
-
-@pytest.mark.summary_csv
-def test_summary_csv():
-    # Testing the csv files output.
-    summary_dir = Path("results") / "hyperparameters_example" / "summary_csv"
-    try:
-        if not summary_dir.exists():
-            p_list = launch_example_processes(n_workers=2)
-            for p in p_list:
-                p.wait()
-        assert summary_dir.is_dir()
-        run_data_df = pd.read_csv(summary_dir / "run_status.csv")
-        run_data_df.set_index("description", inplace=True)
-        num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "value"]
-        assert num_evaluated_configs_csv == 15
-
-        config_data_df = pd.read_csv(summary_dir / "config_data.csv")
-        assert config_data_df.shape[0] == 15
-        assert (config_data_df["status"] == "complete").all()
-    except Exception as e:
-        raise e
-    finally:
-        if summary_dir.exists():
-            shutil.rmtree(summary_dir.parent)
diff --git a/tests/test_runtime/test_stopping_criterion.py b/tests/test_runtime/test_stopping_criterion.py
new file mode 100644
index 00000000..28426a1f
--- /dev/null
+++ b/tests/test_runtime/test_stopping_criterion.py
@@ -0,0 +1,481 @@
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from pytest_cases import fixture
+
+from neps.optimizers.random_search.optimizer import RandomSearch
+from neps.runtime import DefaultWorker
+from neps.search_spaces.search_space import SearchSpace
+from neps.state.filebased import create_or_load_filebased_neps_state
+from neps.state.neps_state import NePSState
+from neps.state.optimizer import OptimizationState, OptimizerInfo
+from neps.state.settings import DefaultReportValues, OnErrorPossibilities, WorkerSettings
+from neps.search_spaces import FloatParameter
+from neps.state.trial import Trial
+
+
+@fixture
+def neps_state(tmp_path: Path) -> NePSState[Path]:
+    return create_or_load_filebased_neps_state(
+        directory=tmp_path / "neps_state",
+        optimizer_info=OptimizerInfo(info={"nothing": "here"}),
+        optimizer_state=OptimizationState(budget=None, shared_state={}),
+    )
+
+
+def test_max_evaluations_total_stopping_criterion(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=3,  # <- Highlight
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count == 3
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    for _, trial in trials.items():
+        assert trial.state == Trial.State.SUCCESS
+        assert trial.report is not None
+        assert trial.report.loss == 1.0
+
+    # New worker has the same total number of evaluations so it should not run anything.
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count == 0
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+
+def test_worker_evaluations_total_stopping_criterion(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=None,
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=2,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count == 2
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 2
+    for _, trial in trials.items():
+        assert trial.state == Trial.State.SUCCESS
+        assert trial.report is not None
+        assert trial.report.loss == 1.0
+
+    # New worker should run 2 more evaluations
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    new_worker.run()
+
+    assert worker.worker_cumulative_eval_count == 2
+    assert neps_state.get_next_pending_trial() is None
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 4  # Now we should have 4 of them
+    for _, trial in trials.items():
+        assert trial.state == Trial.State.SUCCESS
+        assert trial.report is not None
+        assert trial.report.loss == 1.0
+
+
+def test_include_in_progress_evaluations_towards_maximum_with_work_eval_count(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=2,  # <- Highlight, only 2 maximum evaluations allowed
+        include_in_progress_evaluations_towards_maximum=True,  # <- include the inprogress trial
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    # We put in one trial as being inprogress
+    pending_trial = neps_state.sample_trial(optimizer, worker_id="dummy")
+    pending_trial.set_evaluating(time_started=0.0, worker_id="dummy")
+    neps_state.put_updated_trial(pending_trial)
+
+    def eval_function(*args, **kwargs) -> float:
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count == 1
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 2
+
+    the_pending_trial = trials[pending_trial.id]
+    assert the_pending_trial == pending_trial
+    assert the_pending_trial.state == Trial.State.EVALUATING
+    assert the_pending_trial.report is None
+
+    the_completed_trial_id = next(iter(trials.keys() - {pending_trial.id}))
+    the_completed_trial = trials[the_completed_trial_id]
+
+    assert the_completed_trial.state == Trial.State.SUCCESS
+    assert the_completed_trial.report is not None
+    assert the_completed_trial.report.loss == 1.0
+
+
+def test_max_cost_total(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=10,  # Safety incase it doesn't work that we eventually stop
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=2,  # <- Highlight, only 2 maximum evaluations allowed
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> dict:
+        return {"loss": 1.0, "cost": 1.0}
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count == 2
+    assert worker.worker_cumulative_eval_cost == 2.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 2
+
+    # New worker should now not run anything as the total cost has been reached.
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count == 0
+
+
+def test_worker_cost_total(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=10,  # Safety incase it doesn't work that we eventually stop
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=2,  # <- Highlight, only 2 maximum evaluations allowed
+    )
+
+    def eval_function(*args, **kwargs) -> dict:
+        return {"loss": 1.0, "cost": 1.0}
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count == 2
+    assert worker.worker_cumulative_eval_cost == 2.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 2
+
+    # New worker should also run 2 more trials
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count == 2
+    assert new_worker.worker_cumulative_eval_cost == 2.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+
+    trials = neps_state.get_all_trials()
+    assert len(trials) == 4  # 2 more trials were ran
+
+
+def test_worker_wallclock_time(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=1000,  # Safety incase it doesn't work that we eventually stop
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=1,  # <- highlight, 1 second
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy",
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count > 0
+    assert worker.worker_cumulative_evaluation_time_seconds <= 2.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.get_all_trials())
+
+    # New worker should also run some trials more trials
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy2",
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count > 0
+    assert new_worker.worker_cumulative_evaluation_time_seconds <= 2.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len_trials_on_second_worker > len_trials_on_first_worker
+
+
+def test_max_worker_evaluation_time(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=10,  # Safety incase it doesn't work that we eventually stop
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=None,
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=0.5,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        time.sleep(0.6)
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy",
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count > 0
+    assert worker.worker_cumulative_evaluation_time_seconds <= 1.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.get_all_trials())
+
+    # New worker should also run some trials more trials
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy2",
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count > 0
+    assert new_worker.worker_cumulative_evaluation_time_seconds <= 1.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len_trials_on_second_worker > len_trials_on_first_worker
+
+
+def test_max_evaluation_time_global(
+    neps_state: NePSState,
+) -> None:
+    optimizer = RandomSearch(pipeline_space=SearchSpace(a=FloatParameter(0, 1)))
+    settings = WorkerSettings(
+        on_error=OnErrorPossibilities.IGNORE,
+        default_report_values=DefaultReportValues(),
+        max_evaluations_total=10,  # Safety incase it doesn't work that we eventually stop
+        include_in_progress_evaluations_towards_maximum=False,
+        max_cost_total=None,
+        max_evaluations_for_worker=None,
+        max_evaluation_time_total_seconds=0.5,  # <- Highlight
+        max_wallclock_time_for_worker_seconds=None,
+        max_evaluation_time_for_worker_seconds=None,
+        max_cost_for_worker=None,
+    )
+
+    def eval_function(*args, **kwargs) -> float:
+        time.sleep(0.6)
+        return 1.0
+
+    worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy",
+    )
+    worker.run()
+
+    assert worker.worker_cumulative_eval_count > 0
+    assert worker.worker_cumulative_evaluation_time_seconds <= 1.0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_first_worker = len(neps_state.get_all_trials())
+
+    # New worker should also run some trials more trials
+    new_worker = DefaultWorker.new(
+        state=neps_state,
+        optimizer=optimizer,
+        evaluation_fn=eval_function,
+        settings=settings,
+        _pre_sample_hooks=None,
+        worker_id="dummy2",
+    )
+    new_worker.run()
+    assert new_worker.worker_cumulative_eval_count == 0
+    assert new_worker.worker_cumulative_evaluation_time_seconds == 0
+    assert (
+        neps_state.get_next_pending_trial() is None
+    )  # should have no pending trials to be picked up
+    assert len(neps_state.get_errors()) == 0
+    len_trials_on_second_worker = len(neps_state.get_all_trials())
+    assert len_trials_on_second_worker == len_trials_on_first_worker
diff --git a/tests/test_state/__init__.py b/tests/test_state/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_state/test_filebased_neps_state.py b/tests/test_state/test_filebased_neps_state.py
new file mode 100644
index 00000000..a3385141
--- /dev/null
+++ b/tests/test_state/test_filebased_neps_state.py
@@ -0,0 +1,123 @@
+"""NOTE: These tests are pretty specific to the filebased state implementation.
+This could be generalized if we end up with a server based implementation but
+for now we're just testing the filebased implementation."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+from neps.exceptions import NePSError, TrialNotFoundError
+from neps.state.err_dump import ErrDump
+from neps.state.filebased import (
+    create_or_load_filebased_neps_state,
+    load_filebased_neps_state,
+)
+
+import pytest
+from pytest_cases import fixture, parametrize
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+
+
+@fixture
+@parametrize("budget", [BudgetInfo(max_cost_budget=10, used_cost_budget=0), None])
+@parametrize("shared_state", [{"a": "b"}, {}])
+def optimizer_state(
+    budget: BudgetInfo | None,
+    shared_state: dict[str, Any],
+) -> OptimizationState:
+    return OptimizationState(budget=budget, shared_state=shared_state)
+
+
+@fixture
+@parametrize("optimizer_info", [OptimizerInfo({"a": "b"}), OptimizerInfo({})])
+def optimizer_info(optimizer_info: OptimizerInfo) -> OptimizerInfo:
+    return optimizer_info
+
+
+def test_create_with_new_filebased_neps_state(
+    tmp_path: Path,
+    optimizer_info: OptimizerInfo,
+    optimizer_state: OptimizationState,
+) -> None:
+    new_path = tmp_path / "neps_state"
+    neps_state = create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=optimizer_state,
+    )
+    assert neps_state.optimizer_info() == optimizer_info
+    assert neps_state.optimizer_state() == optimizer_state
+    assert neps_state.all_trial_ids() == set()
+    assert neps_state.get_all_trials() == {}
+    assert neps_state.get_errors() == ErrDump(errs=[])
+    assert neps_state.get_next_pending_trial() is None
+    assert neps_state.get_next_pending_trial(n=10) == []
+
+    with pytest.raises(TrialNotFoundError):
+        assert neps_state.get_trial_by_id("1")
+
+    with pytest.raises(TrialNotFoundError):
+        assert neps_state.get_trials_by_ids(["1", "2"])
+
+
+def test_create_or_load_with_load_filebased_neps_state(
+    tmp_path: Path,
+    optimizer_info: OptimizerInfo,
+    optimizer_state: OptimizationState,
+) -> None:
+    new_path = tmp_path / "neps_state"
+    neps_state = create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=optimizer_state,
+    )
+
+    # NOTE: This isn't a defined way to do this but we should check
+    # that we prioritize what's in the existing data over what
+    # was passed in.
+    different_state = OptimizationState(
+        budget=BudgetInfo(max_cost_budget=20, used_cost_budget=10),
+        shared_state={"c": "d"},
+    )
+    neps_state2 = create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=different_state,
+    )
+    assert neps_state == neps_state2
+
+
+def test_load_on_existing_neps_state(
+    tmp_path: Path,
+    optimizer_info: OptimizerInfo,
+    optimizer_state: OptimizationState,
+) -> None:
+    new_path = tmp_path / "neps_state"
+    neps_state = create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=optimizer_state,
+    )
+
+    neps_state2 = load_filebased_neps_state(directory=new_path)
+    assert neps_state == neps_state2
+
+
+def test_new_or_load_on_existing_neps_state_with_different_optimizer_info(
+    tmp_path: Path,
+    optimizer_info: OptimizerInfo,
+    optimizer_state: OptimizationState,
+) -> None:
+    new_path = tmp_path / "neps_state"
+    create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=optimizer_state,
+    )
+
+    with pytest.raises(NePSError):
+        create_or_load_filebased_neps_state(
+            directory=new_path,
+            optimizer_info=OptimizerInfo({"e": "f"}),
+            optimizer_state=optimizer_state,
+        )
diff --git a/tests/test_state/test_neps_state.py b/tests/test_state/test_neps_state.py
new file mode 100644
index 00000000..0d0891ce
--- /dev/null
+++ b/tests/test_state/test_neps_state.py
@@ -0,0 +1,205 @@
+"""NOTE: These tests are pretty specific to the filebased state implementation.
+This could be generalized if we end up with a server based implementation but
+for now we're just testing the filebased implementation."""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+import pytest
+from neps.optimizers.base_optimizer import BaseOptimizer
+from neps.search_spaces.hyperparameters import (
+    FloatParameter,
+    IntegerParameter,
+    ConstantParameter,
+    CategoricalParameter,
+)
+from neps.search_spaces.search_space import SearchSpace
+from neps.state.filebased import (
+    create_or_load_filebased_neps_state,
+)
+
+from pytest_cases import fixture, parametrize, parametrize_with_cases, case
+from neps.state.neps_state import NePSState
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.optimizers import SearcherMapping
+from neps.utils.common import MissingDependencyError
+
+
+@case
+def case_search_space_no_fid() -> SearchSpace:
+    return SearchSpace(
+        a=FloatParameter(0, 1),
+        b=CategoricalParameter(["a", "b", "c"]),
+        c=ConstantParameter("a"),
+        d=IntegerParameter(0, 10),
+    )
+
+
+@case
+def case_search_space_with_fid() -> SearchSpace:
+    return SearchSpace(
+        a=FloatParameter(0, 1),
+        b=CategoricalParameter(["a", "b", "c"]),
+        c=ConstantParameter("a"),
+        d=IntegerParameter(0, 10),
+        e=IntegerParameter(1, 10, is_fidelity=True),
+    )
+
+
+@case
+def case_search_space_no_fid_with_prior() -> SearchSpace:
+    return SearchSpace(
+        a=FloatParameter(0, 1, default=0.5),
+        b=CategoricalParameter(["a", "b", "c"], default="a"),
+        c=ConstantParameter("a"),
+        d=IntegerParameter(0, 10, default=5),
+    )
+
+
+@case
+def case_search_space_fid_with_prior() -> SearchSpace:
+    return SearchSpace(
+        a=FloatParameter(0, 1, default=0.5),
+        b=CategoricalParameter(["a", "b", "c"], default="a"),
+        c=ConstantParameter("a"),
+        d=IntegerParameter(0, 10, default=5),
+        e=IntegerParameter(1, 10, is_fidelity=True),
+    )
+
+
+# See issue #118
+NON_INSTANTIABLE_SEARCH_SPACES_WITHOUT_SPECIFIC_KWARGS = "assisted_regularized_evolution"
+
+# See issue #121
+JUST_SKIP = [
+    "multifidelity_tpe",
+]
+
+#
+OPTIMIZER_FAILS_WITH_FIDELITY = [
+    "random_search",
+]
+
+# There's no programattic way to check if a class requires a fidelity.
+# See issue #118, #119, #120
+OPTIMIZER_REQUIRES_FIDELITY = [
+    "successive_halving",
+    "successive_halving_prior",
+    "asha",
+    "asha_prior",
+    "hyperband",
+    "hyperband_custom_default",
+    "priorband",
+    "mobster",
+    "mf_ei_bo",
+]
+OPTIMIZER_REQUIRES_BUDGET = [
+    "successive_halving_prior",
+    "hyperband_custom_default",
+    "asha",
+    "priorband",
+    "hyperband",
+    "asha_prior",
+    "mobster",
+]
+REQUIRES_PRIOR = {
+    "priorband",
+}
+REQUIRES_COST = ["cost_cooling_bayesian_optimization", "cost_cooling"]
+
+
+@fixture
+@parametrize(
+    "key",
+    [
+        k
+        for k in SearcherMapping.keys()
+        if k not in NON_INSTANTIABLE_SEARCH_SPACES_WITHOUT_SPECIFIC_KWARGS
+    ],
+)
+@parametrize_with_cases("search_space", cases=".", prefix="case_search_space")
+def optimizer_and_key(key: str, search_space: SearchSpace) -> tuple[BaseOptimizer, str]:
+    if key in JUST_SKIP:
+        pytest.xfail(f"{key} is not instantiable")
+
+    if key in REQUIRES_PRIOR and search_space.hyperparameters["a"].default is None:
+        pytest.xfail(f"{key} requires a prior")
+
+    if search_space.has_fidelity and key in OPTIMIZER_FAILS_WITH_FIDELITY:
+        pytest.xfail(f"{key} crashed with a fidelity")
+
+    if key in OPTIMIZER_REQUIRES_FIDELITY and not search_space.has_fidelity:
+        pytest.xfail(f"{key} requires a fidelity parameter")
+    kwargs: dict[str, Any] = {
+        "pipeline_space": search_space,
+    }
+    if key in OPTIMIZER_REQUIRES_BUDGET:
+        kwargs["budget"] = 10
+
+    optimizer_cls = SearcherMapping[key]
+
+    try:
+        return optimizer_cls(**kwargs), key
+    except MissingDependencyError as e:
+        pytest.xfail(f"{key} requires {e.dep} to run.")
+
+
+@parametrize("optimizer_info", [OptimizerInfo({"a": "b"}), OptimizerInfo({})])
+@parametrize("budget", [BudgetInfo(max_cost_budget=10, used_cost_budget=0), None])
+@parametrize("shared_state", [{"a": "b"}, {}])
+def case_neps_state_filebased(
+    tmp_path: Path,
+    budget: BudgetInfo | None,
+    optimizer_info: OptimizerInfo,
+    shared_state: dict[str, Any],
+) -> NePSState:
+    new_path = tmp_path / "neps_state"
+    return create_or_load_filebased_neps_state(
+        directory=new_path,
+        optimizer_info=optimizer_info,
+        optimizer_state=OptimizationState(budget=budget, shared_state=shared_state),
+    )
+
+
+@parametrize_with_cases("neps_state", cases=".", prefix="case_neps_state")
+def test_sample_trial(
+    neps_state: NePSState,
+    optimizer_and_key: tuple[BaseOptimizer, str],
+) -> None:
+    optimizer, key = optimizer_and_key
+    if key in REQUIRES_COST and neps_state.optimizer_state().budget is None:
+        pytest.xfail(f"{key} requires a cost budget")
+
+    assert neps_state.get_all_trials() == {}
+    assert neps_state.get_next_pending_trial() is None
+    assert neps_state.get_next_pending_trial(n=10) == []
+    assert neps_state.all_trial_ids() == set()
+
+    trial1 = neps_state.sample_trial(optimizer=optimizer, worker_id="1")
+    for k, v in trial1.config.items():
+        assert k in optimizer.pipeline_space.hyperparameters
+        assert v is not None, f"'{k}' is None in {trial1.config}"
+
+    # HACK: Unfortunatly due to windows, who's time.time() is not very
+    # precise, we need to introduce a sleep -_-
+    time.sleep(0.1)
+
+    assert neps_state.get_all_trials() == {trial1.id: trial1}
+    assert neps_state.get_next_pending_trial() == trial1
+    assert neps_state.get_next_pending_trial(n=10) == [trial1]
+    assert neps_state.all_trial_ids() == {trial1.id}
+
+    trial2 = neps_state.sample_trial(optimizer=optimizer, worker_id="1")
+    for k, v in trial1.config.items():
+        assert k in optimizer.pipeline_space.hyperparameters
+        assert v is not None, f"'{k}' is None in {trial1.config}"
+
+    assert trial1 != trial2
+
+    assert neps_state.get_all_trials() == {trial1.id: trial1, trial2.id: trial2}
+    assert neps_state.get_next_pending_trial() == trial1
+    assert neps_state.get_next_pending_trial(n=10) == [trial1, trial2]
+    assert neps_state.all_trial_ids() == {trial1.id, trial2.id}
diff --git a/tests/test_rng.py b/tests/test_state/test_rng.py
similarity index 52%
rename from tests/test_rng.py
rename to tests/test_state/test_rng.py
index 0af38b49..1f1318d3 100644
--- a/tests/test_rng.py
+++ b/tests/test_state/test_rng.py
@@ -7,36 +7,50 @@
 import torch
 import pytest
 
-from neps.utils._rng import SeedState
+from neps.state.seed_snapshot import SeedSnapshot
+from neps.state.filebased import ReaderWriterSeedSnapshot
+
 
 @pytest.mark.parametrize(
-    "make_ints", (
+    "make_ints",
+    (
         lambda: [random.randint(0, 100) for _ in range(10)],
         lambda: list(np.random.randint(0, 100, (10,))),
         lambda: list(torch.randint(0, 100, (10,))),
-    )
+    ),
 )
-def test_randomstate_consistent(tmp_path: Path, make_ints: Callable[[], list[int]]) -> None:
+def test_randomstate_consistent(
+    tmp_path: Path, make_ints: Callable[[], list[int]]
+) -> None:
     random.seed(42)
     np.random.seed(42)
     torch.manual_seed(42)
 
     seed_dir = tmp_path / "seed_dir"
+    seed_dir.mkdir(exist_ok=True, parents=True)
 
-    seed_state = SeedState.get()
+    seed_state = SeedSnapshot.new_capture()
     integers_1 = make_ints()
 
-    seed_state.set_as_global_state()
-    integers_2 = make_ints()
+    seed_state.set_as_global_seed_state()
 
+    integers_2 = make_ints()
     assert integers_1 == integers_2
 
-    SeedState.get().dump(seed_dir)
-    integers_3 = make_ints()
+    ReaderWriterSeedSnapshot.write(SeedSnapshot.new_capture(), seed_dir)
 
+    integers_3 = make_ints()
     assert integers_3 != integers_2, "Ensure we have actually changed random state"
 
-    SeedState.load(seed_dir).set_as_global_state()
+    ReaderWriterSeedSnapshot.read(seed_dir).set_as_global_seed_state()
     integers_4 = make_ints()
 
     assert integers_3 == integers_4
+
+    before = SeedSnapshot.new_capture()
+    after = SeedSnapshot.new_capture()
+
+    _ = make_ints()
+
+    after.recapture()
+    assert before != after
diff --git a/tests/test_state/test_synced.py b/tests/test_state/test_synced.py
new file mode 100644
index 00000000..3a28b724
--- /dev/null
+++ b/tests/test_state/test_synced.py
@@ -0,0 +1,432 @@
+from __future__ import annotations
+
+from pytest_cases import parametrize, parametrize_with_cases, case
+import copy
+import numpy as np
+import random
+from neps.state.err_dump import ErrDump, SerializableTrialError
+from neps.state.filebased import (
+    ReaderWriterErrDump,
+    ReaderWriterOptimizationState,
+    ReaderWriterOptimizerInfo,
+    ReaderWriterSeedSnapshot,
+    ReaderWriterTrial,
+    FileVersioner,
+    FileLocker,
+)
+from neps.state.optimizer import BudgetInfo, OptimizationState, OptimizerInfo
+from neps.state.protocols import Synced
+from neps.state.trial import Trial
+import pytest
+from typing import Any, Callable
+from pathlib import Path
+from neps.state import SeedSnapshot, Synced, Trial
+
+
+@case
+def case_trial_1(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        location="",
+        config={"a": "b"},
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_submitted(time_submitted=1)
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_2(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        location="",
+        config={"a": "b"},
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_evaluating(time_started=2, worker_id="1")
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_3(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="",
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+    trial.set_evaluating(time_started=2, worker_id="1")
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_complete(
+            time_end=3,
+            loss=1,
+            cost=1,
+            extra={"hi": [1, 2, 3]},
+            learning_curve=[1],
+            report_as="success",
+            evaluation_duration=1,
+            err=None,
+            tb=None,
+        )
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_4(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="",
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+    trial.set_evaluating(time_started=2, worker_id="1")
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_complete(
+            time_end=3,
+            loss=np.nan,
+            cost=np.inf,
+            extra={"hi": [1, 2, 3]},
+            report_as="failed",
+            learning_curve=None,
+            evaluation_duration=2,
+            err=None,
+            tb=None,
+        )
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_5(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="",
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+    trial.set_evaluating(time_started=2, worker_id=1)
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_complete(
+            time_end=3,
+            loss=np.nan,
+            cost=np.inf,
+            extra={"hi": [1, 2, 3]},
+            learning_curve=None,
+            evaluation_duration=2,
+            report_as="failed",
+            err=ValueError("hi"),
+            tb="something something traceback",
+        )
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_6(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="",
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+    trial.set_evaluating(time_started=2, worker_id=1)
+
+    def _mutate(trial: Trial) -> None:
+        trial.set_corrupted()
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_trial_7(tmp_path: Path) -> tuple[Synced[Trial, Path], Callable[[Trial], None]]:
+    trial_id = "1"
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="",
+        time_sampled=0,
+        previous_trial=None,
+        previous_trial_location=None,
+        worker_id=0,
+    )
+    trial.set_submitted(time_submitted=1)
+    trial.set_evaluating(time_started=2, worker_id=1)
+    trial.set_complete(
+        time_end=3,
+        loss=np.nan,
+        cost=np.inf,
+        extra={"hi": [1, 2, 3]},
+        learning_curve=[1, 2, 3],
+        report_as="failed",
+        evaluation_duration=2,
+        err=ValueError("hi"),
+        tb="something something traceback",
+    )
+
+    def _mutate(trial: Trial) -> None:
+        trial.reset()
+
+    x = Synced.new(
+        data=trial,
+        location=tmp_path / "1",
+        locker=FileLocker(lock_path=tmp_path / "1" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "1" / ".version"),
+        reader_writer=ReaderWriterTrial(),
+    )
+    return x, _mutate
+
+
+@case
+def case_seed_snapshot(
+    tmp_path: Path,
+) -> tuple[Synced[SeedSnapshot, Path], Callable[[SeedSnapshot], None]]:
+    seed = SeedSnapshot.new_capture()
+
+    def _mutate(seed: SeedSnapshot) -> None:
+        random.randint(0, 100)
+        seed.recapture()
+
+    x = Synced.new(
+        data=seed,
+        location=tmp_path / "seeds",
+        locker=FileLocker(lock_path=tmp_path / "seeds" / ".lock", poll=0.1, timeout=None),
+        versioner=FileVersioner(version_file=tmp_path / "seeds" / ".version"),
+        reader_writer=ReaderWriterSeedSnapshot(),
+    )
+    return x, _mutate
+
+
+@case
+@parametrize(
+    "err",
+    [
+        None,
+        SerializableTrialError(
+            trial_id="1",
+            worker_id="2",
+            err_type="ValueError",
+            err="hi",
+            tb="traceback\nmore",
+        ),
+    ],
+)
+def case_err_dump(
+    tmp_path: Path,
+    err: None | SerializableTrialError,
+) -> tuple[Synced[ErrDump, Path], Callable[[ErrDump], None]]:
+    err_dump = ErrDump() if err is None else ErrDump(errs=[err])
+
+    def _mutate(err_dump: ErrDump) -> None:
+        new_err = SerializableTrialError(
+            trial_id="2",
+            worker_id="2",
+            err_type="RuntimeError",
+            err="hi",
+            tb="traceback\nless",
+        )
+        err_dump.append(new_err)
+
+    x = Synced.new(
+        data=err_dump,
+        location=tmp_path / "err_dump",
+        locker=FileLocker(
+            lock_path=tmp_path / "err_dump" / ".lock", poll=0.1, timeout=None
+        ),
+        versioner=FileVersioner(version_file=tmp_path / "err_dump" / ".version"),
+        reader_writer=ReaderWriterErrDump("all"),
+    )
+    return x, _mutate
+
+
+@case
+def case_optimizer_info(
+    tmp_path: Path,
+) -> tuple[Synced[OptimizerInfo, Path], Callable[[OptimizerInfo], None]]:
+    optimizer_info = OptimizerInfo(info={"a": "b"})
+
+    def _mutate(optimizer_info: OptimizerInfo) -> None:
+        optimizer_info.info["b"] = "c"  # type: ignore # NOTE: We shouldn't be mutating but anywho...
+
+    x = Synced.new(
+        data=optimizer_info,
+        location=tmp_path / "optimizer_info",
+        locker=FileLocker(
+            lock_path=tmp_path / "optimizer_info" / ".lock", poll=0.1, timeout=None
+        ),
+        versioner=FileVersioner(version_file=tmp_path / "optimizer_info" / ".version"),
+        reader_writer=ReaderWriterOptimizerInfo(),
+    )
+    return x, _mutate
+
+
+@case
+@pytest.mark.parametrize(
+    "budget", (None, BudgetInfo(max_cost_budget=10, used_cost_budget=0))
+)
+@pytest.mark.parametrize("shared_state", ({}, {"a": "b"}))
+def case_optimization_state(
+    tmp_path: Path,
+    budget: BudgetInfo | None,
+    shared_state: dict[str, Any],
+) -> tuple[Synced[OptimizationState, Path], Callable[[OptimizationState], None]]:
+    optimization_state = OptimizationState(budget=budget, shared_state=shared_state)
+
+    def _mutate(optimization_state: OptimizationState) -> None:
+        optimization_state.shared_state["a"] = "c"  # type: ignore # NOTE: We shouldn't be mutating but anywho...
+        optimization_state.budget = BudgetInfo(max_cost_budget=10, used_cost_budget=5)
+
+    x = Synced.new(
+        data=optimization_state,
+        location=tmp_path / "optimizer_info",
+        locker=FileLocker(
+            lock_path=tmp_path / "optimizer_info" / ".lock", poll=0.1, timeout=None
+        ),
+        versioner=FileVersioner(version_file=tmp_path / "optimizer_info" / ".version"),
+        reader_writer=ReaderWriterOptimizationState(),
+    )
+    return x, _mutate
+
+
+@parametrize_with_cases("shared, mutate", cases=".")
+def test_initial_state(shared: Synced, mutate: Callable) -> None:
+    assert shared._is_locked() == False
+    assert shared._is_stale() == False
+    assert shared._unsynced() == shared.synced()
+
+
+@parametrize_with_cases("shared, mutate", cases=".")
+def test_put_updates_current_data_and_is_not_stale(
+    shared: Synced, mutate: Callable
+) -> None:
+    current_data = shared._unsynced()
+
+    new_data = copy.deepcopy(current_data)
+    mutate(new_data)
+    assert new_data != current_data
+
+    shared.put(new_data)
+    assert shared._unsynced() == new_data
+    assert shared._is_stale() == False
+    assert shared._is_locked() == False
+
+
+@parametrize_with_cases("shared1, mutate", cases=".")
+def test_share_synced_mutate_and_put(shared1: Synced, mutate: Callable) -> None:
+    shared2 = shared1.deepcopy()
+    assert shared1 == shared2
+    assert not shared1._is_locked()
+    assert not shared2._is_locked()
+
+    with shared2.acquire() as (data2, put2):
+        assert shared1._is_locked()
+        assert shared2._is_locked()
+        mutate(data2)
+        put2(data2)
+
+    assert not shared1._is_locked()
+    assert not shared2._is_locked()
+
+    assert shared1 != shared2
+    assert shared1._unsynced() != shared2._unsynced()
+    assert shared1._is_stale()
+
+    shared1.synced()
+    assert not shared1._is_stale()
+    assert not shared2._is_stale()
+    assert shared1._unsynced() == shared2._unsynced()
+
+
+@parametrize_with_cases("shared, mutate", cases=".")
+def test_shared_new_fails_if_done_on_existing_resource(
+    shared: Synced, mutate: Callable
+) -> None:
+    data, location, versioner, rw, lock = shared._components()
+    with pytest.raises(Synced.VersionedResourceAlreadyExistsError):
+        Synced.new(
+            data=data,
+            location=location,
+            versioner=versioner,
+            reader_writer=rw,
+            locker=lock,
+        )
diff --git a/tests/test_state/test_trial.py b/tests/test_state/test_trial.py
new file mode 100644
index 00000000..0ddc9e34
--- /dev/null
+++ b/tests/test_state/test_trial.py
@@ -0,0 +1,301 @@
+from __future__ import annotations
+from neps.state import Trial
+import os
+import numpy as np
+
+
+def test_trial_creation() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    previous_trial = "0"
+    worker_id = str(os.getpid())
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="1",
+        previous_trial_location=None,
+        time_sampled=time_sampled,
+        previous_trial=previous_trial,
+        worker_id=worker_id,
+    )
+    assert trial.state == Trial.State.PENDING
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id="1",
+        time_sampled=time_sampled,
+        location="1",
+        previous_trial_location=None,
+        previous_trial_id=previous_trial,
+        sampling_worker_id=worker_id,
+        time_started=None,
+        time_submitted=None,
+        time_end=None,
+    )
+
+
+def test_trial_as_submitted() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    time_submitted = 1
+    previous_trial = "0"
+    worker_id = str(os.getpid())
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        previous_trial_location="0",
+        location="1",
+        time_sampled=time_sampled,
+        previous_trial=previous_trial,
+        worker_id=worker_id,
+    )
+    trial.set_submitted(time_submitted=time_submitted)
+
+    assert trial.state == Trial.State.SUBMITTED
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id=trial_id,
+        time_sampled=time_sampled,
+        previous_trial_location="0",
+        location="1",
+        previous_trial_id=previous_trial,
+        sampling_worker_id=worker_id,
+        time_submitted=time_submitted,
+        time_started=None,
+        time_end=None,
+    )
+
+
+def test_trial_as_in_progress_with_different_evaluating_worker() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    time_submitted = 1
+    time_started = 2
+    previous_trial = "0"
+    sampling_worker_id = "42"
+    evaluating_worker_id = "43"
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="1",
+        previous_trial_location="0",
+        time_sampled=time_sampled,
+        previous_trial=previous_trial,
+        worker_id=sampling_worker_id,
+    )
+    trial.set_submitted(time_submitted=time_submitted)
+    trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
+
+    assert trial.state == Trial.State.EVALUATING
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id=trial_id,
+        time_sampled=time_sampled,
+        previous_trial_id=previous_trial,
+        previous_trial_location="0",
+        location="1",
+        sampling_worker_id=sampling_worker_id,
+        evaluating_worker_id=evaluating_worker_id,
+        time_submitted=time_submitted,
+        time_started=time_started,
+        time_end=None,
+    )
+
+
+def test_trial_as_success_after_being_progress() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    time_submitted = 1
+    time_started = 2
+    time_end = 3
+    previous_trial = "0"
+    sampling_worker_id = "42"
+    evaluating_worker_id = "43"
+    loss = 427
+    cost = -123.6
+    extra = {"picnic": "basket", "counts": [1, 2, 3]}
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        time_sampled=time_sampled,
+        previous_trial=previous_trial,
+        location="1",
+        previous_trial_location="0",
+        worker_id=sampling_worker_id,
+    )
+    trial.set_submitted(time_submitted=time_submitted)
+    trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
+    report = trial.set_complete(
+        report_as="success",
+        loss=loss,
+        cost=cost,
+        err=None,
+        tb=None,
+        learning_curve=None,
+        evaluation_duration=time_end - time_started,
+        extra=extra,
+        time_end=time_end,
+    )
+
+    assert trial.state == Trial.State.SUCCESS
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id=trial_id,
+        time_sampled=time_sampled,
+        previous_trial_location="0",
+        location="1",
+        previous_trial_id=previous_trial,
+        sampling_worker_id=sampling_worker_id,
+        evaluating_worker_id=evaluating_worker_id,
+        evaluation_duration=time_end - time_started,
+        time_submitted=time_submitted,
+        time_started=time_started,
+        time_end=time_end,
+    )
+    assert report == Trial.Report(
+        trial_id=trial_id,
+        loss=loss,
+        cost=cost,
+        learning_curve=None,
+        evaluation_duration=1,
+        extra=extra,
+        err=None,
+        tb=None,
+        reported_as="success",
+    )
+
+
+def test_trial_as_failed_with_nan_loss_and_in_cost() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    time_submitted = 1
+    time_started = 2
+    time_end = 3
+    previous_trial = "0"
+    sampling_worker_id = "42"
+    evaluating_worker_id = "43"
+    loss = np.nan
+    cost = np.inf
+    extra = {"picnic": "basket", "counts": [1, 2, 3]}
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        location="1",
+        previous_trial_location="0",
+        time_sampled=time_sampled,
+        previous_trial=previous_trial,
+        worker_id=sampling_worker_id,
+    )
+    trial.set_submitted(time_submitted=time_submitted)
+    trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
+    report = trial.set_complete(
+        report_as="failed",
+        loss=loss,
+        cost=cost,
+        learning_curve=None,
+        evaluation_duration=time_end - time_started,
+        err=None,
+        tb=None,
+        extra=extra,
+        time_end=time_end,
+    )
+    assert trial.state == Trial.State.FAILED
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id=trial_id,
+        time_sampled=time_sampled,
+        previous_trial_id=previous_trial,
+        sampling_worker_id=sampling_worker_id,
+        evaluating_worker_id=evaluating_worker_id,
+        time_submitted=time_submitted,
+        previous_trial_location="0",
+        location="1",
+        time_started=time_started,
+        time_end=time_end,
+        evaluation_duration=time_end - time_started,
+    )
+    assert report == Trial.Report(
+        trial_id=trial_id,
+        loss=loss,
+        cost=cost,
+        learning_curve=None,
+        evaluation_duration=time_end - time_started,
+        extra=extra,
+        err=None,
+        tb=None,
+        reported_as="failed",
+    )
+
+
+def test_trial_as_crashed_with_err_and_tb() -> None:
+    trial_id = "1"
+    time_sampled = 0
+    time_submitted = 1
+    time_started = 2
+    time_end = 3
+    err = ValueError("Something went wrong")
+    tb = "some traceback"
+    previous_trial = "0"
+    sampling_worker_id = "42"
+    evaluating_worker_id = "43"
+    extra = {"picnic": "basket", "counts": [1, 2, 3]}
+
+    trial = Trial.new(
+        trial_id=trial_id,
+        config={"a": "b"},
+        time_sampled=time_sampled,
+        location="1",
+        previous_trial_location="42",
+        previous_trial=previous_trial,
+        worker_id=sampling_worker_id,
+    )
+    trial.set_submitted(time_submitted=time_submitted)
+    trial.set_evaluating(time_started=time_started, worker_id=evaluating_worker_id)
+    report = trial.set_complete(
+        report_as="crashed",
+        loss=None,
+        cost=None,
+        learning_curve=None,
+        evaluation_duration=time_end - time_started,
+        err=err,
+        tb=tb,
+        extra=extra,
+        time_end=time_end,
+    )
+
+    assert trial.state == Trial.State.CRASHED
+    assert trial.id == trial_id
+    assert trial.config == {"a": "b"}
+    assert trial.metadata == Trial.MetaData(
+        id=trial_id,
+        time_sampled=time_sampled,
+        previous_trial_id=previous_trial,
+        sampling_worker_id=sampling_worker_id,
+        evaluating_worker_id=evaluating_worker_id,
+        time_submitted=time_submitted,
+        previous_trial_location="42",
+        location="1",
+        time_started=time_started,
+        time_end=time_end,
+        evaluation_duration=time_end - time_started,
+    )
+    assert report == Trial.Report(
+        trial_id=trial_id,
+        loss=None,
+        cost=None,
+        learning_curve=None,
+        evaluation_duration=time_end - time_started,
+        extra=extra,
+        err=err,
+        tb=tb,
+        reported_as="crashed",
+    )
diff --git a/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py b/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
index 5d6d8368..ea2ca6ec 100644
--- a/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
+++ b/tests/test_yaml_run_args/test_declarative_usage_docs/test_declarative_usage_docs.py
@@ -2,20 +2,24 @@
 import os
 import subprocess
 import sys
+
 BASE_PATH = "tests/test_yaml_run_args/test_declarative_usage_docs/"
 
 
 @pytest.mark.neps_api
-@pytest.mark.parametrize("yaml_file", [
-    "simple_example_including_run_pipeline.yaml",
-    "full_configuration_template.yaml",
-    "defining_hooks.yaml",
-    "customizing_neps_optimizer.yaml",
-    "loading_own_optimizer.yaml",
-    "loading_pipeline_space_dict.yaml",
-    "outsourcing_optimizer.yaml",
-    "outsourcing_pipeline_space.yaml"
-])
+@pytest.mark.parametrize(
+    "yaml_file",
+    [
+        "simple_example_including_run_pipeline.yaml",
+        "full_configuration_template.yaml",
+        "defining_hooks.yaml",
+        "customizing_neps_optimizer.yaml",
+        "loading_own_optimizer.yaml",
+        "loading_pipeline_space_dict.yaml",
+        "outsourcing_optimizer.yaml",
+        "outsourcing_pipeline_space.yaml",
+    ],
+)
 def test_run_with_yaml(yaml_file: str) -> None:
     """
     Test 'neps.run' with various run_args.yaml settings to simulate loading options
@@ -25,11 +29,11 @@ def test_run_with_yaml(yaml_file: str) -> None:
     assert os.path.exists(yaml_path), f"{yaml_file} does not exist."
 
     try:
-        subprocess.check_call(
-            [sys.executable, BASE_PATH + 'neps_run.py', yaml_path])
+        subprocess.check_call([sys.executable, BASE_PATH + "neps_run.py", yaml_path])
     except subprocess.CalledProcessError as e:
         pytest.fail(
-            f"NePS run failed for configuration: {yaml_file} with error: {str(e)}")
+            f"NePS run failed for configuration: {yaml_file} with error: {str(e)}"
+        )
 
 
 @pytest.mark.neps_api
@@ -43,8 +47,9 @@ def test_run_with_yaml_and_run_pipeline() -> None:
 
     try:
         subprocess.check_call(
-            [sys.executable, BASE_PATH + 'neps_run.py', yaml_path, "--run_pipeline"]
+            [sys.executable, BASE_PATH + "neps_run.py", yaml_path, "--run_pipeline"]
         )
     except subprocess.CalledProcessError as e:
         pytest.fail(
-            f"NePS run failed for configuration: simple_example.yaml with error: {str(e)}")
+            f"NePS run failed for configuration: simple_example.yaml with error: {str(e)}"
+        )
diff --git a/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py b/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
index 30d6e178..4995a14c 100644
--- a/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
+++ b/tests/test_yaml_run_args/test_run_args_by_neps_run/test_neps_run.py
@@ -3,39 +3,54 @@
 import os
 import sys
 import yaml
+
 BASE_PATH = "tests/test_yaml_run_args/test_run_args_by_neps_run/"
 
 
 @pytest.mark.neps_api
-@pytest.mark.parametrize("config", [
-    {"file_name": "config.yaml"},
-    {"file_name": "loading_pipeline_space.yaml"},
-    {"file_name": "loading_optimizer.yaml"},
-    {"file_name": "config_select_bo.yaml", "check_optimizer": True, "optimizer_path":
-        "select_bo_run_args.yaml",
-     "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_bo"
-                    "/.optimizer_info.yaml"},
-    {"file_name": "config_priorband_with_args.yaml", "check_optimizer": True,
-     "optimizer_path": "priorband_args_run_args.yaml",
-     "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_priorband"
-                    "/.optimizer_info.yaml"},
-    {"file_name": "config_hyperband_mixed_args.yaml", "check_optimizer": True,
-     "optimizer_path": "hyperband_searcher_kwargs_yaml_args.yaml",
-     "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_hyperband"
-                    "/.optimizer_info.yaml", "args": True}
-])
+@pytest.mark.parametrize(
+    "config",
+    [
+        {"file_name": "config.yaml"},
+        {"file_name": "loading_pipeline_space.yaml"},
+        {"file_name": "loading_optimizer.yaml"},
+        {
+            "file_name": "config_select_bo.yaml",
+            "check_optimizer": True,
+            "optimizer_path": "select_bo_run_args.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_bo/.optimizer_info/info.yaml",
+        },
+        {
+            "file_name": "config_priorband_with_args.yaml",
+            "check_optimizer": True,
+            "optimizer_path": "priorband_args_run_args.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_priorband/.optimizer_info/info.yaml",
+        },
+        {
+            "file_name": "config_hyperband_mixed_args.yaml",
+            "check_optimizer": True,
+            "optimizer_path": "hyperband_searcher_kwargs_yaml_args.yaml",
+            "result_path": "tests_tmpdir/test_run_args_by_neps_run/optimizer_hyperband/.optimizer_info/info.yaml",
+            "args": True,
+        },
+    ],
+)
 def test_run_with_yaml(config: dict) -> None:
     """Test "neps.run" with various run_args.yaml settings to simulate loading options
     for variables."""
     file_name = config["file_name"]
     check_optimizer = config.pop("check_optimizer", False)
-    assert os.path.exists(os.path.join(BASE_PATH, file_name)), (f"{file_name} "
-                                                                f"does not exist.")
+    assert os.path.exists(os.path.join(BASE_PATH, file_name)), (
+        f"{file_name} " f"does not exist."
+    )
 
-    cmd = [sys.executable, os.path.join(BASE_PATH, 'neps_run.py'),
-           os.path.join(BASE_PATH, file_name)]
+    cmd = [
+        sys.executable,
+        os.path.join(BASE_PATH, "neps_run.py"),
+        os.path.join(BASE_PATH, file_name),
+    ]
     if "args" in config:
-        cmd.append('--kwargs_flag')
+        cmd.append("--kwargs_flag")
 
     try:
         subprocess.check_call(cmd)
@@ -50,17 +65,18 @@ def test_run_with_yaml(config: dict) -> None:
 
 def compare_generated_yaml(result_path, optimizer_path):
     """compare generated optimizer settings and solution settings"""
-    assert os.path.exists(result_path), \
-        "Generated YAML file does not exist."
+    assert os.path.exists(result_path), "Generated YAML file does not exist."
 
-    assert os.path.exists(BASE_PATH + "optimizer_yamls/" + optimizer_path), \
-        "Solution YAML file does not exist."
+    assert os.path.exists(
+        BASE_PATH + "optimizer_yamls/" + optimizer_path
+    ), "Solution YAML file does not exist."
 
-    with open(result_path, 'r') as gen_file:
+    with open(result_path, "r") as gen_file:
         generated_content = yaml.safe_load(gen_file)
 
-    with open(BASE_PATH + "optimizer_yamls/" + optimizer_path, 'r') as ref_file:
+    with open(BASE_PATH + "optimizer_yamls/" + optimizer_path, "r") as ref_file:
         reference_content = yaml.safe_load(ref_file)
 
-    assert generated_content == reference_content, \
-        "The generated YAML does not match the reference YAML"
+    assert (
+        generated_content == reference_content
+    ), "The generated YAML does not match the reference YAML"