diff --git a/apps/analysis.py b/apps/analysis.py
index 4e651752..9bb15520 100644
--- a/apps/analysis.py
+++ b/apps/analysis.py
@@ -75,7 +75,7 @@ def _build_metric_group_map() -> dict[str, str]:
     "Other": "#AAAAAA",
 }
 
-_NON_NORMALIZED_METRICS = {"response_speed"}
+_NON_NORMALIZED_METRICS = {"response_speed", "response_speed_with_tool_calls", "response_speed_no_tool_calls"}
 
 # EVA composite scores to show in the bar chart
 _EVA_BAR_COMPOSITES = ["EVA-A_pass", "EVA-X_pass", "EVA-A_mean", "EVA-X_mean"]
diff --git a/pyproject.toml b/pyproject.toml
index 561cba2d..47827e98 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -101,7 +101,7 @@ ignore = ["D203", "D206", "D213", "D400", "D401", "D413", "D415", "E1", "E501"]
 simulation_version = "0.1.0"
 # Bump when metrics pipeline changes (metrics code, judge prompts, pricing,
 # postprocessor). Old metric results become stale — cheap to recompute.
-metrics_version = "0.1.1"
+metrics_version = "0.1.2"
 
 [tool.mypy]
 python_version = "3.11"
diff --git a/src/eva/__init__.py b/src/eva/__init__.py
index 6796f4aa..03f1f13b 100644
--- a/src/eva/__init__.py
+++ b/src/eva/__init__.py
@@ -11,4 +11,4 @@
 
 # Bump metrics_version when changes affect metric computation (metrics code,
 # judge prompts, pricing tables, postprocessor).
-metrics_version = "0.1.1"
+metrics_version = "0.1.2"
diff --git a/src/eva/metrics/diagnostic/response_speed.py b/src/eva/metrics/diagnostic/response_speed.py
index 0dd4fb53..224ebc49 100644
--- a/src/eva/metrics/diagnostic/response_speed.py
+++ b/src/eva/metrics/diagnostic/response_speed.py
@@ -4,54 +4,93 @@
 final evaluation scores.
 """
 
+import json
+from abc import abstractmethod
+from pathlib import Path
+
 from eva.metrics.base import CodeMetric, MetricContext
 from eva.metrics.registry import register_metric
 from eva.models.results import MetricScore
 
 
-@register_metric
-class ResponseSpeedMetric(CodeMetric):
-    """Response speed metric.
+def _split_turn_taking_latencies_by_tool_calls(
+    context: MetricContext,
+) -> tuple[list[float], list[float]]:
+    """Partition turn_taking per_turn_latency values into (with_tool_calls, no_tool_calls).
 
-    Measures the elapsed time between the end of the user's utterance
-    and the beginning of the assistant's response.
+    Reads metrics/turn_taking/details/per_turn_latency from the record's
+    metrics.json, then checks conversation_trace to determine which turn_ids
+    had at least one tool call.
 
-    Reports raw latency values in seconds — no normalization applied.
+    Returns:
+        (with_tool_latencies, no_tool_latencies)
+    """
+    if not context.output_dir:
+        return [], []
 
-    This is a diagnostic metric used for diagnosing model performance issues.
-    It is not directly used in final evaluation scores.
+    metrics_path = Path(context.output_dir) / "metrics.json"
+    if not metrics_path.exists():
+        return [], []
+
+    with open(metrics_path) as f:
+        data = json.load(f)
+
+    per_turn_latency: dict[str, float] = (
+        data.get("metrics", {}).get("turn_taking", {}).get("details", {}).get("per_turn_latency", {})
+    )
+    if not per_turn_latency:
+        return [], []
+
+    tool_call_turn_ids = {
+        entry["turn_id"] for entry in (context.conversation_trace or []) if entry.get("type") == "tool_call"
+    }
+
+    with_tool: list[float] = []
+    no_tool: list[float] = []
+    for turn_id_str, latency in per_turn_latency.items():
+        if int(turn_id_str) in tool_call_turn_ids:
+            with_tool.append(latency)
+        else:
+            no_tool.append(latency)
+
+    return with_tool, no_tool
+
+
+class _ResponseSpeedBase(CodeMetric):
+    """Base class for response-speed metrics.
+
+    Subclasses implement `_get_latencies` to return the subset of latencies
+    to compute over; everything else is shared.
     """
 
-    name = "response_speed"
-    description = "Debug metric: latency between user utterance end and assistant response start"
     category = "diagnostic"
     exclude_from_pass_at_k = True
 
+    @abstractmethod
+    def _get_latencies(self, context: MetricContext) -> tuple[list[float], str]:
+        """Return (latencies, error_if_empty) for this metric variant."""
+
     async def compute(self, context: MetricContext) -> MetricScore:
-        """Compute response speed from Pipecat's UserBotLatencyObserver measurements."""
         try:
-            # Check if we have response speed latencies from UserBotLatencyObserver
-            if not context.response_speed_latencies:
+            latencies, empty_error = self._get_latencies(context)
+
+            if not latencies:
                 return MetricScore(
                     name=self.name,
                     score=0.0,
                     normalized_score=None,
-                    error="No response latencies available (UserBotLatencyObserver data missing)",
+                    error=empty_error,
                 )
 
-            # Use latencies measured by Pipecat's UserBotLatencyObserver
-            # These measure the time from user stopped speaking to assistant started speaking
             speeds = []
             per_turn_speeds = []
-
-            for response_speed in context.response_speed_latencies:
-                # Filter out invalid values (negative or extremely large)
-                if 0 < response_speed < 1000:  # Sanity check: under 1000 seconds
-                    speeds.append(response_speed)
-                    per_turn_speeds.append(round(response_speed, 3))
+            for latency in latencies:
+                if 0 < latency < 1000:
+                    speeds.append(latency)
+                    per_turn_speeds.append(round(latency, 3))
                 else:
                     self.logger.warning(
-                        f"[{context.record_id}] Unusual response speed detected and dropped: {response_speed} seconds"
+                        f"[{context.record_id}] Unusual response speed detected and dropped: {latency} seconds"
                     )
 
             if not speeds:
@@ -63,15 +102,14 @@ async def compute(self, context: MetricContext) -> MetricScore:
                 )
 
             mean_speed = sum(speeds) / len(speeds)
-            max_speed = max(speeds)
 
             return MetricScore(
                 name=self.name,
-                score=round(mean_speed, 3),  # Mean response speed in seconds
-                normalized_score=None,  # Raw latency in seconds; not normalizable to [0,1]
+                score=round(mean_speed, 3),
+                normalized_score=None,
                 details={
                     "mean_speed_seconds": round(mean_speed, 3),
-                    "max_speed_seconds": round(max_speed, 3),
+                    "max_speed_seconds": round(max(speeds), 3),
                     "num_turns": len(speeds),
                     "per_turn_speeds": per_turn_speeds,
                 },
@@ -79,3 +117,61 @@ async def compute(self, context: MetricContext) -> MetricScore:
 
         except Exception as e:
             return self._handle_error(e, context)
+
+
+@register_metric
+class ResponseSpeedMetric(_ResponseSpeedBase):
+    """Response speed metric.
+
+    Measures the elapsed time between the end of the user's utterance
+    and the beginning of the assistant's response, using Pipecat's
+    UserBotLatencyObserver measurements.
+
+    Reports raw latency values in seconds — no normalization applied.
+
+    This is a diagnostic metric used for diagnosing model performance issues.
+    It is not directly used in final evaluation scores.
+    """
+
+    name = "response_speed"
+    description = "Debug metric: latency between user utterance end and assistant response start"
+
+    def _get_latencies(self, context: MetricContext) -> tuple[list[float], str]:
+        return (
+            context.response_speed_latencies,
+            "No response latencies available (UserBotLatencyObserver data missing)",
+        )
+
+
+@register_metric
+class ResponseSpeedWithToolCallsMetric(_ResponseSpeedBase):
+    """Response speed restricted to turns where the assistant made at least one tool call.
+
+    Uses per_turn_latency from the turn_taking metric and filters to turns
+    that contain a tool_call entry in the conversation trace.
+    This is a diagnostic metric not used in final evaluation scores.
+    """
+
+    name = "response_speed_with_tool_calls"
+    description = "Debug metric: response latency for turns that included a tool call"
+
+    def _get_latencies(self, context: MetricContext) -> tuple[list[float], str]:
+        with_tool, _ = _split_turn_taking_latencies_by_tool_calls(context)
+        return with_tool, "No turns with tool calls found (or turn_taking latency data unavailable)"
+
+
+@register_metric
+class ResponseSpeedNoToolCallsMetric(_ResponseSpeedBase):
+    """Response speed restricted to turns where the assistant made no tool calls.
+
+    Uses per_turn_latency from the turn_taking metric and filters to turns
+    that contain no tool_call entry in the conversation trace.
+    This is a diagnostic metric not used in final evaluation scores.
+    """
+
+    name = "response_speed_no_tool_calls"
+    description = "Debug metric: response latency for turns that did not include a tool call"
+
+    def _get_latencies(self, context: MetricContext) -> tuple[list[float], str]:
+        _, no_tool = _split_turn_taking_latencies_by_tool_calls(context)
+        return no_tool, "No turns without tool calls found (or turn_taking latency data unavailable)"
diff --git a/src/eva/metrics/processor.py b/src/eva/metrics/processor.py
index 660e7ce9..94aa6e44 100644
--- a/src/eva/metrics/processor.py
+++ b/src/eva/metrics/processor.py
@@ -24,6 +24,19 @@
 
 logger = get_logger(__name__)
 
+
+def _resolve_path(stored: str | None, fallback: Path) -> str | Path:
+    """Return *stored* if it exists on disk, otherwise *fallback*.
+
+    Allows metrics to re-run correctly when a run directory has been moved:
+    the stored path reflects the original location, but the file is now at
+    *fallback* (i.e. output_dir / filename).
+    """
+    if stored and Path(stored).exists():
+        return stored
+    return fallback
+
+
 # Elevenlabs audio user field → _ProcessorContext attribute name
 AUDIO_ATTR = {
     "pipecat_agent": "audio_timestamps_assistant_turns",
@@ -824,8 +837,10 @@ def _build_history(
         Each entry: {timestamp_ms, source, event_type, data}.
         """
         history = self._load_audit_log_transcript(output_dir)
-        history.extend(self._load_pipecat_logs(result.pipecat_logs_path))
-        history.extend(self._load_elevenlabs_logs(result.elevenlabs_logs_path))
+        pipecat_path = _resolve_path(result.pipecat_logs_path, output_dir / "pipecat_logs.jsonl")
+        history.extend(self._load_pipecat_logs(pipecat_path))
+        elevenlabs_path = _resolve_path(result.elevenlabs_logs_path, output_dir / "elevenlabs_events.jsonl")
+        history.extend(self._load_elevenlabs_logs(elevenlabs_path))
 
         history.sort(key=lambda e: e["timestamp_ms"])
         context.history = history
diff --git a/src/eva/models/config.py b/src/eva/models/config.py
index e08783bd..f3885c54 100644
--- a/src/eva/models/config.py
+++ b/src/eva/models/config.py
@@ -648,10 +648,17 @@ def apply_env_overrides(self, live: "RunConfig") -> None:
             if not has_redacted:
                 continue
             if name not in live_by_name:
-                raise ValueError(
-                    f"Cannot restore secrets: deployment {name!r} not found in "
-                    f"current EVA_MODEL_LIST (available: {list(live_by_name)})"
+                active_llm = getattr(self.model, "llm", None)
+                if name == active_llm:
+                    raise ValueError(
+                        f"Cannot restore secrets: deployment {name!r} not found in "
+                        f"current EVA_MODEL_LIST (available: {list(live_by_name)})"
+                    )
+                logger.warning(
+                    f"Deployment {name!r} has redacted secrets but is not in the current "
+                    f"EVA_MODEL_LIST — skipping (not used in this run)."
                 )
+                continue
             live_params = live_by_name[name].get("litellm_params", {})
             for key, value in saved_params.items():
                 if value == "***" and key in live_params:
diff --git a/tests/unit/metrics/test_response_speed.py b/tests/unit/metrics/test_response_speed.py
index 8cb3ecfc..343e73ee 100644
--- a/tests/unit/metrics/test_response_speed.py
+++ b/tests/unit/metrics/test_response_speed.py
@@ -1,11 +1,51 @@
 """Tests for the ResponseSpeedMetric."""
 
+import json
+
 import pytest
 
-from eva.metrics.diagnostic.response_speed import ResponseSpeedMetric
+from eva.metrics.diagnostic.response_speed import (
+    ResponseSpeedMetric,
+    ResponseSpeedNoToolCallsMetric,
+    ResponseSpeedWithToolCallsMetric,
+)
 
 from .conftest import make_metric_context
 
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _write_metrics_json(tmp_path, per_turn_latency: dict) -> None:
+    """Write a minimal metrics.json with turn_taking per_turn_latency data."""
+    data = {
+        "metrics": {
+            "turn_taking": {
+                "details": {
+                    "per_turn_latency": per_turn_latency,
+                }
+            }
+        }
+    }
+    (tmp_path / "metrics.json").write_text(json.dumps(data))
+
+
+def _make_trace(tool_call_turn_ids: set[int], all_turn_ids: set[int]) -> list[dict]:
+    """Build a minimal conversation_trace with the given turn structure."""
+    trace = []
+    for tid in sorted(all_turn_ids):
+        trace.append({"turn_id": tid, "type": "transcribed", "content": "user utterance"})
+        if tid in tool_call_turn_ids:
+            trace.append({"turn_id": tid, "type": "tool_call", "tool_name": "some_tool"})
+            trace.append({"turn_id": tid, "type": "tool_response", "tool_name": "some_tool"})
+    return trace
+
+
+# ---------------------------------------------------------------------------
+# ResponseSpeedMetric
+# ---------------------------------------------------------------------------
+
 
 class TestResponseSpeedMetric:
     @pytest.mark.asyncio
@@ -91,3 +131,193 @@ async def test_single_latency_value(self):
         assert result.details["max_speed_seconds"] == pytest.approx(0.75)
         assert result.details["num_turns"] == 1
         assert result.details["per_turn_speeds"] == [0.75]
+
+
+# ---------------------------------------------------------------------------
+# ResponseSpeedWithToolCallsMetric
+# ---------------------------------------------------------------------------
+
+
+class TestResponseSpeedWithToolCallsMetric:
+    @pytest.mark.asyncio
+    async def test_no_output_dir(self):
+        """Missing output_dir returns error."""
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context()
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+
+    @pytest.mark.asyncio
+    async def test_missing_metrics_json(self, tmp_path):
+        """output_dir exists but has no metrics.json — returns error."""
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path)
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+
+    @pytest.mark.asyncio
+    async def test_missing_turn_taking_data(self, tmp_path):
+        """metrics.json exists but has no turn_taking entry — returns error."""
+        (tmp_path / "metrics.json").write_text(json.dumps({"metrics": {}}))
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path)
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+
+    @pytest.mark.asyncio
+    async def test_no_turns_with_tool_calls(self, tmp_path):
+        """Record has no tool-call turns — returns 'not found' error."""
+        _write_metrics_json(tmp_path, {"1": 1.0, "2": 2.0, "3": 3.0})
+        trace = _make_trace(tool_call_turn_ids=set(), all_turn_ids={1, 2, 3})
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+        assert "No turns with tool calls" in result.error
+
+    @pytest.mark.asyncio
+    async def test_mixed_turns(self, tmp_path):
+        """Correctly includes only tool-call turn latencies."""
+        _write_metrics_json(tmp_path, {"1": 1.0, "2": 5.0, "3": 3.0, "4": 7.0})
+        # Turns 2 and 4 have tool calls
+        trace = _make_trace(tool_call_turn_ids={2, 4}, all_turn_ids={1, 2, 3, 4})
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.error is None
+        assert result.details["num_turns"] == 2
+        assert result.score == pytest.approx((5.0 + 7.0) / 2)
+        assert result.details["max_speed_seconds"] == pytest.approx(7.0)
+        assert result.details["per_turn_speeds"] == [5.0, 7.0]
+
+    @pytest.mark.asyncio
+    async def test_all_turns_have_tool_calls(self, tmp_path):
+        """When every turn has a tool call, all latencies are included."""
+        _write_metrics_json(tmp_path, {"1": 2.0, "2": 4.0})
+        trace = _make_trace(tool_call_turn_ids={1, 2}, all_turn_ids={1, 2})
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.error is None
+        assert result.details["num_turns"] == 2
+        assert result.score == pytest.approx(3.0)
+
+    @pytest.mark.asyncio
+    async def test_filters_invalid_latency_values(self, tmp_path):
+        """Sanity filter (0 < x < 1000) applies to per_turn_latency values."""
+        _write_metrics_json(tmp_path, {"1": -1.0, "2": 5.0, "3": 2000.0, "4": 3.0})
+        trace = _make_trace(tool_call_turn_ids={1, 2, 3, 4}, all_turn_ids={1, 2, 3, 4})
+        metric = ResponseSpeedWithToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.error is None
+        assert result.details["num_turns"] == 2  # only 5.0 and 3.0 pass
+        assert result.score == pytest.approx((5.0 + 3.0) / 2)
+
+
+# ---------------------------------------------------------------------------
+# ResponseSpeedNoToolCallsMetric
+# ---------------------------------------------------------------------------
+
+
+class TestResponseSpeedNoToolCallsMetric:
+    @pytest.mark.asyncio
+    async def test_no_output_dir(self):
+        """Missing output_dir returns error."""
+        metric = ResponseSpeedNoToolCallsMetric()
+        ctx = make_metric_context()
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+
+    @pytest.mark.asyncio
+    async def test_missing_metrics_json(self, tmp_path):
+        """output_dir exists but has no metrics.json — returns error."""
+        metric = ResponseSpeedNoToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path)
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+
+    @pytest.mark.asyncio
+    async def test_all_turns_have_tool_calls(self, tmp_path):
+        """Every turn has a tool call — no-tool bucket is empty."""
+        _write_metrics_json(tmp_path, {"1": 2.0, "2": 4.0})
+        trace = _make_trace(tool_call_turn_ids={1, 2}, all_turn_ids={1, 2})
+        metric = ResponseSpeedNoToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.score == 0.0
+        assert result.error is not None
+        assert "No turns without tool calls" in result.error
+
+    @pytest.mark.asyncio
+    async def test_mixed_turns(self, tmp_path):
+        """Correctly includes only non-tool-call turn latencies."""
+        _write_metrics_json(tmp_path, {"1": 1.0, "2": 5.0, "3": 3.0, "4": 7.0})
+        # Turns 2 and 4 have tool calls; turns 1 and 3 do not
+        trace = _make_trace(tool_call_turn_ids={2, 4}, all_turn_ids={1, 2, 3, 4})
+        metric = ResponseSpeedNoToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.error is None
+        assert result.details["num_turns"] == 2
+        assert result.score == pytest.approx((1.0 + 3.0) / 2)
+        assert result.details["max_speed_seconds"] == pytest.approx(3.0)
+        assert result.details["per_turn_speeds"] == [1.0, 3.0]
+
+    @pytest.mark.asyncio
+    async def test_no_turns_with_tool_calls(self, tmp_path):
+        """Record with no tool-call turns — all latencies included."""
+        _write_metrics_json(tmp_path, {"1": 1.0, "2": 2.0, "3": 3.0})
+        trace = _make_trace(tool_call_turn_ids=set(), all_turn_ids={1, 2, 3})
+        metric = ResponseSpeedNoToolCallsMetric()
+        ctx = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result = await metric.compute(ctx)
+
+        assert result.error is None
+        assert result.details["num_turns"] == 3
+        assert result.score == pytest.approx(2.0)
+
+    @pytest.mark.asyncio
+    async def test_with_and_no_tool_split_is_exhaustive(self, tmp_path):
+        """with_tool + no_tool latencies together cover all per_turn_latency values."""
+        per_turn = {"1": 1.0, "2": 5.0, "3": 3.0, "4": 7.0, "5": 2.0}
+        _write_metrics_json(tmp_path, per_turn)
+        trace = _make_trace(tool_call_turn_ids={2, 4}, all_turn_ids={1, 2, 3, 4, 5})
+
+        ctx_with = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+        ctx_no = make_metric_context(output_dir=tmp_path, conversation_trace=trace)
+
+        result_with = await ResponseSpeedWithToolCallsMetric().compute(ctx_with)
+        result_no = await ResponseSpeedNoToolCallsMetric().compute(ctx_no)
+
+        combined = result_with.details["per_turn_speeds"] + result_no.details["per_turn_speeds"]
+        assert sorted(combined) == sorted(per_turn.values())