diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
index 4352a8af47..3570e46f93 100644
--- a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
+++ b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
@@ -40,7 +40,7 @@
 
 from agent_framework import Agent, SupportsAgentRun
 from agent_framework._middleware import FunctionInvocationContext, FunctionMiddleware
-from agent_framework._sessions import AgentSession
+from agent_framework._sessions import AgentSession, BaseHistoryProvider, InMemoryHistoryProvider
 from agent_framework._tools import FunctionTool, tool
 from agent_framework._types import AgentResponse, Content, Message
 from agent_framework._workflows._agent_executor import AgentExecutor, AgentExecutorRequest
@@ -368,12 +368,32 @@ def _clone_chat_agent(self, agent: Agent[Any]) -> Agent[Any]:
         # restore the original tools, in case they are shared between agents
         options["tools"] = tools_from_options
 
+        # Filter out history providers to prevent duplicate messages.
+        # The HandoffAgentExecutor manages conversation history via _full_conversation,
+        # so history providers would re-inject previously stored messages on each
+        # agent.run() call, causing the entire conversation to appear twice.
+        # A no-op InMemoryHistoryProvider placeholder prevents the agent from
+        # auto-injecting a default one at runtime.
+        filtered_providers = [
+            p for p in agent.context_providers
+            if not isinstance(p, BaseHistoryProvider)
+        ]
+        # Always add a no-op placeholder to prevent the agent from
+        # auto-injecting a default InMemoryHistoryProvider at runtime.
+        filtered_providers.append(
+            InMemoryHistoryProvider(
+                load_messages=False,
+                store_inputs=False,
+                store_outputs=False,
+            )
+        )
+
         return Agent(
             client=agent.client,
             id=agent.id,
             name=agent.name,
             description=agent.description,
-            context_providers=agent.context_providers,
+            context_providers=filtered_providers,
             middleware=agent.agent_middleware,
             default_options=cloned_options,  # type: ignore[assignment]
         )
diff --git a/python/packages/orchestrations/tests/test_handoff.py b/python/packages/orchestrations/tests/test_handoff.py
index 43c2f9153a..71e17c6389 100644
--- a/python/packages/orchestrations/tests/test_handoff.py
+++ b/python/packages/orchestrations/tests/test_handoff.py
@@ -1117,3 +1117,114 @@ def get_session(self, *, service_session_id, **kwargs):
 
     with pytest.raises(TypeError, match="Participants must be Agent instances"):
         HandoffBuilder().participants([fake])
+
+
+class CapturingChatClient(ChatMiddlewareLayer[Any], FunctionInvocationLayer[Any], BaseChatClient[Any]):
+    """Mock client that records the messages it receives on each call."""
+
+    def __init__(
+        self,
+        *,
+        name: str = "",
+        handoff_to: str | None = None,
+    ) -> None:
+        ChatMiddlewareLayer.__init__(self)
+        FunctionInvocationLayer.__init__(self)
+        BaseChatClient.__init__(self)
+        self._name = name
+        self._handoff_to = handoff_to
+        self._call_index = 0
+        self.captured_calls: list[list[Message]] = []
+
+    def _inner_get_response(
+        self,
+        *,
+        messages: Sequence[Message],
+        stream: bool,
+        options: Mapping[str, Any],
+        **kwargs: Any,
+    ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
+        self.captured_calls.append(list(messages))
+        contents = _build_reply_contents(self._name, self._handoff_to, self._next_call_id())
+        reply = Message(role="assistant", contents=contents)
+        if stream:
+            return self._build_streaming_response(contents, dict(options))
+
+        async def _get() -> ChatResponse:
+            return ChatResponse(messages=reply, response_id="mock_response")
+
+        return _get()
+
+    def _build_streaming_response(
+        self, contents: list[Content], options: dict[str, Any]
+    ) -> ResponseStream[ChatResponseUpdate, ChatResponse]:
+        async def _stream() -> AsyncIterable[ChatResponseUpdate]:
+            yield ChatResponseUpdate(contents=contents, role="assistant", finish_reason="stop")
+
+        def _finalize(updates: Sequence[ChatResponseUpdate]) -> ChatResponse:
+            response_format = options.get("response_format")
+            output_format_type = response_format if isinstance(response_format, type) else None
+            return ChatResponse.from_updates(updates, output_format_type=output_format_type)
+
+        return ResponseStream(_stream(), finalizer=_finalize)
+
+    def _next_call_id(self) -> str | None:
+        if not self._handoff_to:
+            return None
+        call_id = f"{self._name}-handoff-{self._call_index}"
+        self._call_index += 1
+        # Only handoff on first call
+        self._handoff_to = None
+        return call_id
+
+
+async def test_no_duplicate_messages_after_handoff_and_resume() -> None:
+    """Regression test for issue #4695: duplicate messages in Handoff workflow.
+
+    When InMemoryHistoryProvider is active on handoff agents, it re-injects
+    previously stored messages alongside the executor's _full_conversation,
+    causing the entire conversation to appear twice in the API call.
+    """
+    triage_client = CapturingChatClient(name="triage", handoff_to="specialist")
+    specialist_client = CapturingChatClient(name="specialist")
+
+    triage = Agent(client=triage_client, name="triage", id="triage")
+    specialist = Agent(client=specialist_client, name="specialist", id="specialist")
+
+    workflow = (
+        HandoffBuilder(
+            participants=[triage, specialist],
+            termination_condition=lambda _: False,
+        )
+        .with_start_agent(triage)
+        .build()
+    )
+
+    # First run: triage hands off to specialist, specialist responds
+    first_events = await _drain(workflow.run("Hello, I need help.", stream=True))
+    first_request = _latest_request_info_event(first_events)
+
+    # Second run: user replies, specialist responds again
+    await _drain(
+        workflow.run(
+            stream=True,
+            responses={first_request.request_id: [Message(role="user", text="More details please.")]},
+        )
+    )
+
+    # Specialist should have been called twice
+    assert len(specialist_client.captured_calls) == 2
+
+    # On the second call, verify no duplicate messages.
+    # Use a structural fingerprint (role + content types/text) to detect duplicates
+    # rather than comparing .text alone, which can miss non-text duplicates
+    # and false-fail on legitimately identical text.
+    second_call_messages = specialist_client.captured_calls[1]
+    fingerprints = [
+        (m.role, tuple((c.type, c.text) for c in m.contents))
+        for m in second_call_messages
+    ]
+    assert len(fingerprints) == len(set(fingerprints)), (
+        f"Duplicate messages detected in specialist's second call: {fingerprints}"
+    )
+