From c2de687542280f21d27c7f5b299243203b9e3770 Mon Sep 17 00:00:00 2001
From: Kavya Sree Kaitepalli <kkaitepalli@microsoft.com>
Date: Wed, 3 Jun 2026 08:38:41 +0000
Subject: [PATCH] Add auto_memory module with configuration, data models, and
 error handling

---
 src/microbots/auto_memory/__init__.py    |   1 +
 src/microbots/auto_memory/config.py      | 146 ++++++++++++++++
 src/microbots/auto_memory/data_models.py |  54 ++++++
 src/microbots/auto_memory/errors.py      |  22 +++
 test/auto_memory/test_config.py          | 212 +++++++++++++++++++++++
 test/auto_memory/test_models.py          |  86 +++++++++
 6 files changed, 521 insertions(+)
 create mode 100644 src/microbots/auto_memory/__init__.py
 create mode 100644 src/microbots/auto_memory/config.py
 create mode 100644 src/microbots/auto_memory/data_models.py
 create mode 100644 src/microbots/auto_memory/errors.py
 create mode 100644 test/auto_memory/test_config.py
 create mode 100644 test/auto_memory/test_models.py

diff --git a/src/microbots/auto_memory/__init__.py b/src/microbots/auto_memory/__init__.py
new file mode 100644
index 0000000..7eb3b65
--- /dev/null
+++ b/src/microbots/auto_memory/__init__.py
@@ -0,0 +1 @@
+# auto_memory — iterative agent loop with memory feedback
\ No newline at end of file
diff --git a/src/microbots/auto_memory/config.py b/src/microbots/auto_memory/config.py
new file mode 100644
index 0000000..9febd31
--- /dev/null
+++ b/src/microbots/auto_memory/config.py
@@ -0,0 +1,146 @@
+from __future__ import annotations
+
+import yaml
+from dataclasses import dataclass, field
+from logging import getLogger
+from pathlib import Path
+
+from microbots.auto_memory.errors import ConfigError
+from microbots.auto_memory.data_models import CallbackSpec, ReferenceInput
+
+logger = getLogger(__name__)
+
+
+@dataclass
+class TaskConfig:
+    """All configuration for one auto_memory run, loaded from a YAML file."""
+
+    # --- required ---
+    task_definition: str
+    prompt_template: str
+    callbacks: list[CallbackSpec]
+
+    # --- optional with defaults ---
+    reference_inputs: list[ReferenceInput] = field(default_factory=list)
+    output_format: str = "dir"          # "file" | "dir" | "stdout"
+    output_path: str = "candidate"      # relative to iteration dir
+    max_iterations: int = 5
+    timeout_min: int = 60
+    per_iteration_timeout: int = 600    # seconds
+
+    # -----------------------------------------------------------------------
+
+    @classmethod
+    def load_from_yaml(cls, path: str) -> "TaskConfig":
+        """Parse a task YAML file into a TaskConfig.
+
+        Raises:
+            ConfigError: if the file is not found, not valid YAML, or missing
+                         required fields.
+        """
+        yaml_path = Path(path)
+        if not yaml_path.exists():
+            raise ConfigError(f"Task config file not found: {path}")
+
+        try:
+            with yaml_path.open() as f:
+                data = yaml.safe_load(f)
+        except yaml.YAMLError as exc:
+            raise ConfigError(f"Failed to parse YAML from {path}: {exc}") from exc
+
+        if not isinstance(data, dict):
+            raise ConfigError(f"Expected a YAML mapping at the top level in {path}")
+
+        # required fields
+        for required in ("task_definition", "prompt_template"):
+            if required not in data:
+                raise ConfigError(f"Missing required field '{required}' in {path}")
+
+        # reference_inputs
+        raw_inputs = data.get("reference_inputs", [])
+        reference_inputs = [
+            ReferenceInput(
+                name=item["name"],
+                value=item["value"],
+                is_path=bool(item.get("is_path", False)),
+            )
+            for item in (raw_inputs or [])
+        ]
+
+        # callbacks
+        raw_callbacks = data.get("callbacks", [])
+        callbacks = [
+            CallbackSpec(
+                name=cb["name"],
+                command=cb["command"],
+                timeout_s=int(cb.get("timeout_s", 120)),
+                expected_return_code=int(cb.get("expected_return_code", 0)),
+            )
+            for cb in (raw_callbacks or [])
+        ]
+
+        config = cls(
+            task_definition=data["task_definition"].strip(),
+            prompt_template=data["prompt_template"],
+            callbacks=callbacks,
+            reference_inputs=reference_inputs,
+            output_format=data.get("output_format", "dir"),
+            output_path=data.get("output_path", "candidate"),
+            max_iterations=int(data.get("max_iterations", 5)),
+            timeout_min=int(data.get("timeout_min", 60)),
+            per_iteration_timeout=int(data.get("per_iteration_timeout", 600)),
+        )
+        config.validate()
+        return config
+
+    def validate(self) -> None:
+        """Validate the config.
+
+        Raises:
+            ConfigError: on any invalid field.
+        """
+        if not self.task_definition:
+            raise ConfigError("'task_definition' must not be empty")
+
+        if not self.prompt_template:
+            raise ConfigError("'prompt_template' must not be empty")
+
+        if self.max_iterations < 1:
+            raise ConfigError(f"'max_iterations' must be >= 1, got {self.max_iterations}")
+
+        if self.timeout_min < 1:
+            raise ConfigError(f"'timeout_min' must be >= 1, got {self.timeout_min}")
+
+        if self.per_iteration_timeout < 1:
+            raise ConfigError(
+                f"'per_iteration_timeout' must be >= 1, got {self.per_iteration_timeout}"
+            )
+
+        if self.output_format not in ("file", "dir", "stdout"):
+            raise ConfigError(
+                f"'output_format' must be 'file', 'dir', or 'stdout', got '{self.output_format}'"
+            )
+
+        if Path(self.output_path).is_absolute():
+            raise ConfigError(
+                f"'output_path' must be a relative path, got '{self.output_path}'"
+            )
+
+        if not self.callbacks:
+            raise ConfigError("'callbacks' must contain at least one entry")
+
+        for cb in self.callbacks:
+            if not cb.name:
+                raise ConfigError("Each callback must have a non-empty 'name'")
+            if not cb.command:
+                raise ConfigError(
+                    f"Callback '{cb.name}' must have a non-empty 'command'"
+                )
+
+        for ri in self.reference_inputs:
+            if not ri.name:
+                raise ConfigError("Each reference_input must have a non-empty 'name'")
+            if not ri.value:
+                raise ConfigError(
+                    f"reference_input '{ri.name}' must have a non-empty 'value'"
+                )
diff --git a/src/microbots/auto_memory/data_models.py b/src/microbots/auto_memory/data_models.py
new file mode 100644
index 0000000..312de23
--- /dev/null
+++ b/src/microbots/auto_memory/data_models.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import StrEnum
+
+
+# Enums  (shared across orchestrator, callbacks, cli)
+
+class IterationStatus(StrEnum):
+    PASSED = "passed"
+    FAILED = "failed"
+    TIMEOUT = "timeout"
+    ERROR = "error"
+    SKIPPED = "skipped"
+
+
+class FinalStatus(StrEnum):
+    PASSED = "passed"
+    LIMIT_REACHED = "limit_reached"
+    TIMEOUT = "timeout"
+    ERROR = "error"
+
+
+# Shared input value objects  (used by config.py)
+
+@dataclass(frozen=True)
+class ReferenceInput:
+    """A named input to the task — either a literal value or a file path."""
+    name: str
+    value: str
+    is_path: bool = False
+
+
+@dataclass(frozen=True)
+class CallbackSpec:
+    """Specification for a single validator callback command.
+    Shared between config.py (declaration) and callbacks.py (execution).
+    """
+    name: str
+    command: str
+    timeout_s: int = 120
+    expected_return_code: int = 0
+
+
+@dataclass
+class Feedback:
+    """Structured failure summary produced by analyze_failure().
+    Written into MemoryStore so the agent reads it on the next iteration.
+    """
+    iteration_idx: int
+    summary: str
+    root_causes: list[str] = field(default_factory=list)
+    validator_failures: list[str] = field(default_factory=list)
+    suggested_actions: list[str] = field(default_factory=list)
diff --git a/src/microbots/auto_memory/errors.py b/src/microbots/auto_memory/errors.py
new file mode 100644
index 0000000..e002c16
--- /dev/null
+++ b/src/microbots/auto_memory/errors.py
@@ -0,0 +1,22 @@
+class AutoMemoryError(Exception):
+    """Base class for all auto_memory framework errors."""
+
+
+class ConfigError(AutoMemoryError):
+    """Raised when TaskConfig is invalid or cannot be loaded."""
+
+
+class AgentError(AutoMemoryError):
+    """Raised when the agent runner encounters an unrecoverable error."""
+
+
+class CallbackError(AutoMemoryError):
+    """Raised when a callback cannot be spawned or set up (not a failing assertion)."""
+
+
+class TimeoutError(AutoMemoryError):  # noqa: A001 — intentional shadow of builtin
+    """Raised when the per-iteration or total run timeout is exceeded."""
+
+
+class MemoryStoreError(AutoMemoryError):
+    """Raised when the memory store cannot be read from or written to."""
diff --git a/test/auto_memory/test_config.py b/test/auto_memory/test_config.py
new file mode 100644
index 0000000..f159be0
--- /dev/null
+++ b/test/auto_memory/test_config.py
@@ -0,0 +1,212 @@
+import textwrap
+import pytest
+from pathlib import Path
+
+from microbots.auto_memory.config import TaskConfig
+from microbots.auto_memory.errors import ConfigError
+
+
+MINIMAL_YAML = textwrap.dedent("""\
+    task_definition: Fix the bug
+    prompt_template: "Goal: {{ task }}"
+    callbacks:
+      - name: tests
+        command: pytest "$CANDIDATE"
+""")
+
+FULL_YAML = textwrap.dedent("""\
+    task_definition: Fix the auth bug
+    prompt_template: |
+      Goal: {{ task }}
+      {% if feedback %}Feedback: {{ feedback.summary }}{% endif %}
+
+    reference_inputs:
+      - name: spec
+        value: ./spec.md
+        is_path: true
+      - name: style
+        value: snake_case
+
+    output_format: dir
+    output_path: candidate
+    max_iterations: 3
+    timeout_min: 30
+    per_iteration_timeout: 300
+
+    callbacks:
+      - name: unit_tests
+        command: pytest "$CANDIDATE"
+        timeout_s: 180
+      - name: lint
+        command: ruff check "$CANDIDATE"
+        expected_return_code: 0
+""")
+
+
+@pytest.fixture
+def tmp_yaml(tmp_path):
+    """Helper that writes content to a temp file and returns its path."""
+    def _write(content: str) -> str:
+        p = tmp_path / "task.yml"
+        p.write_text(content)
+        return str(p)
+    return _write
+
+
+@pytest.mark.unit
+class TestLoadFromYaml:
+    def test_minimal_valid(self, tmp_yaml):
+        cfg = TaskConfig.load_from_yaml(tmp_yaml(MINIMAL_YAML))
+        assert cfg.task_definition == "Fix the bug"
+        assert "{{ task }}" in cfg.prompt_template
+
+    def test_full_yaml(self, tmp_yaml):
+        cfg = TaskConfig.load_from_yaml(tmp_yaml(FULL_YAML))
+        assert cfg.max_iterations == 3
+        assert cfg.timeout_min == 30
+        assert cfg.per_iteration_timeout == 300
+        assert len(cfg.reference_inputs) == 2
+        assert len(cfg.callbacks) == 2
+
+    def test_reference_inputs_parsed(self, tmp_yaml):
+        cfg = TaskConfig.load_from_yaml(tmp_yaml(FULL_YAML))
+        spec = cfg.reference_inputs[0]
+        assert spec.name == "spec"
+        assert spec.value == "./spec.md"
+        assert spec.is_path is True
+        style = cfg.reference_inputs[1]
+        assert style.is_path is False
+
+    def test_callbacks_parsed(self, tmp_yaml):
+        cfg = TaskConfig.load_from_yaml(tmp_yaml(FULL_YAML))
+        cb = cfg.callbacks[0]
+        assert cb.name == "unit_tests"
+        assert cb.timeout_s == 180
+        assert cb.expected_return_code == 0
+
+    def test_defaults_applied(self, tmp_yaml):
+        cfg = TaskConfig.load_from_yaml(tmp_yaml(MINIMAL_YAML))
+        assert cfg.max_iterations == 5
+        assert cfg.timeout_min == 60
+        assert cfg.per_iteration_timeout == 600
+        assert cfg.output_format == "dir"
+        assert cfg.output_path == "candidate"
+        assert cfg.reference_inputs == []
+
+    def test_missing_callbacks(self, tmp_yaml):
+        yaml = textwrap.dedent("""\
+            task_definition: Fix the bug
+            prompt_template: "Goal: {{ task }}"
+        """)
+        with pytest.raises(ConfigError, match="callbacks"):
+            TaskConfig.load_from_yaml(tmp_yaml(yaml))
+
+    def test_file_not_found(self):
+        with pytest.raises(ConfigError, match="not found"):
+            TaskConfig.load_from_yaml("/nonexistent/path/task.yml")
+
+    def test_invalid_yaml(self, tmp_yaml):
+        with pytest.raises(ConfigError, match="Failed to parse YAML"):
+            TaskConfig.load_from_yaml(tmp_yaml("}{invalid yaml}{"))
+
+    def test_missing_task_definition(self, tmp_yaml):
+        with pytest.raises(ConfigError, match="task_definition"):
+            TaskConfig.load_from_yaml(tmp_yaml("prompt_template: hello"))
+
+    def test_missing_prompt_template(self, tmp_yaml):
+        with pytest.raises(ConfigError, match="prompt_template"):
+            TaskConfig.load_from_yaml(tmp_yaml("task_definition: hello"))
+
+    def test_not_a_mapping(self, tmp_yaml):
+        with pytest.raises(ConfigError, match="mapping"):
+            TaskConfig.load_from_yaml(tmp_yaml("- item1\n- item2\n"))
+
+
+@pytest.mark.unit
+class TestValidate:
+    def _base(self) -> TaskConfig:
+        from microbots.auto_memory.data_models import CallbackSpec
+        return TaskConfig(
+            task_definition="Fix the bug",
+            prompt_template="Goal: {{ task }}",
+            callbacks=[CallbackSpec(name="tests", command="pytest")],
+        )
+
+    def test_valid_passes(self):
+        self._base().validate()  # should not raise
+
+    def test_empty_task_definition(self):
+        cfg = self._base()
+        cfg.task_definition = ""
+        with pytest.raises(ConfigError, match="task_definition"):
+            cfg.validate()
+
+    def test_empty_prompt_template(self):
+        cfg = self._base()
+        cfg.prompt_template = ""
+        with pytest.raises(ConfigError, match="prompt_template"):
+            cfg.validate()
+
+    def test_max_iterations_zero(self):
+        cfg = self._base()
+        cfg.max_iterations = 0
+        with pytest.raises(ConfigError, match="max_iterations"):
+            cfg.validate()
+
+    def test_timeout_min_zero(self):
+        cfg = self._base()
+        cfg.timeout_min = 0
+        with pytest.raises(ConfigError, match="timeout_min"):
+            cfg.validate()
+
+    def test_per_iteration_timeout_zero(self):
+        cfg = self._base()
+        cfg.per_iteration_timeout = 0
+        with pytest.raises(ConfigError, match="per_iteration_timeout"):
+            cfg.validate()
+
+    def test_invalid_output_format(self):
+        cfg = self._base()
+        cfg.output_format = "json"
+        with pytest.raises(ConfigError, match="output_format"):
+            cfg.validate()
+
+    def test_absolute_output_path(self):
+        cfg = self._base()
+        cfg.output_path = "/absolute/path"
+        with pytest.raises(ConfigError, match="output_path"):
+            cfg.validate()
+
+    def test_callback_empty_name(self):
+        from microbots.auto_memory.data_models import CallbackSpec
+        cfg = self._base()
+        cfg.callbacks = [CallbackSpec(name="", command="pytest")]
+        with pytest.raises(ConfigError, match="name"):
+            cfg.validate()
+
+    def test_callback_empty_command(self):
+        from microbots.auto_memory.data_models import CallbackSpec
+        cfg = self._base()
+        cfg.callbacks = [CallbackSpec(name="tests", command="")]
+        with pytest.raises(ConfigError, match="command"):
+            cfg.validate()
+
+    def test_reference_input_empty_name(self):
+        from microbots.auto_memory.data_models import ReferenceInput
+        cfg = self._base()
+        cfg.reference_inputs = [ReferenceInput(name="", value="val")]
+        with pytest.raises(ConfigError, match="name"):
+            cfg.validate()
+
+    def test_reference_input_empty_value(self):
+        from microbots.auto_memory.data_models import ReferenceInput
+        cfg = self._base()
+        cfg.reference_inputs = [ReferenceInput(name="spec", value="")]
+        with pytest.raises(ConfigError, match="value"):
+            cfg.validate()
+
+    def test_empty_callbacks_list(self):
+        cfg = self._base()
+        cfg.callbacks = []
+        with pytest.raises(ConfigError, match="callbacks"):
+            cfg.validate()
diff --git a/test/auto_memory/test_models.py b/test/auto_memory/test_models.py
new file mode 100644
index 0000000..2fcf046
--- /dev/null
+++ b/test/auto_memory/test_models.py
@@ -0,0 +1,86 @@
+import pytest
+from microbots.auto_memory.data_models import (
+    CallbackSpec,
+    Feedback,
+    FinalStatus,
+    IterationStatus,
+    ReferenceInput,
+)
+
+
+@pytest.mark.unit
+class TestReferenceInput:
+    def test_defaults(self):
+        ri = ReferenceInput(name="spec", value="./spec.md")
+        assert ri.is_path is False
+
+    def test_is_path_flag(self):
+        ri = ReferenceInput(name="spec", value="./spec.md", is_path=True)
+        assert ri.is_path is True
+
+    def test_frozen(self):
+        ri = ReferenceInput(name="spec", value="./spec.md")
+        with pytest.raises(AttributeError):
+            ri.name = "other"  # type: ignore[misc]
+
+
+@pytest.mark.unit
+class TestCallbackSpec:
+    def test_defaults(self):
+        cb = CallbackSpec(name="tests", command="pytest $CANDIDATE")
+        assert cb.timeout_s == 120
+        assert cb.expected_return_code == 0
+
+    def test_custom_return_code(self):
+        cb = CallbackSpec(name="custom", command="check.sh", expected_return_code=1)
+        assert cb.expected_return_code == 1
+
+    def test_frozen(self):
+        cb = CallbackSpec(name="tests", command="pytest")
+        with pytest.raises(AttributeError):
+            cb.name = "other"  # type: ignore[misc]
+
+
+@pytest.mark.unit
+class TestFeedback:
+    def test_defaults(self):
+        fb = Feedback(iteration_idx=1, summary="Tests failed")
+        assert fb.root_causes == []
+        assert fb.validator_failures == []
+        assert fb.suggested_actions == []
+
+    def test_full(self):
+        fb = Feedback(
+            iteration_idx=2,
+            summary="Two callbacks failed",
+            root_causes=["null pointer"],
+            validator_failures=["unit_tests", "lint"],
+            suggested_actions=["add null check"],
+        )
+        assert len(fb.validator_failures) == 2
+
+    def test_mutable_lists_are_independent(self):
+        fb1 = Feedback(iteration_idx=1, summary="a")
+        fb2 = Feedback(iteration_idx=2, summary="b")
+        fb1.root_causes.append("x")
+        assert fb2.root_causes == []
+
+
+@pytest.mark.unit
+class TestIterationStatus:
+    def test_all_values(self):
+        assert IterationStatus.PASSED == "passed"
+        assert IterationStatus.FAILED == "failed"
+        assert IterationStatus.TIMEOUT == "timeout"
+        assert IterationStatus.ERROR == "error"
+        assert IterationStatus.SKIPPED == "skipped"
+
+
+@pytest.mark.unit
+class TestFinalStatus:
+    def test_all_values(self):
+        assert FinalStatus.PASSED == "passed"
+        assert FinalStatus.LIMIT_REACHED == "limit_reached"
+        assert FinalStatus.TIMEOUT == "timeout"
+        assert FinalStatus.ERROR == "error"
+