GetStream · Nash0x7E2 · Jan 7, 2026 · Jan 7, 2026
diff --git a/examples/03_phone_and_rag_example/uv.lock b/examples/03_phone_and_rag_example/uv.lock
diff --git a/plugins/nemotron/README.md b/plugins/nemotron/README.md
@@ -0,0 +1,57 @@
+# Vision Agents - NVIDIA Nemotron Speech Plugin
+
+NVIDIA Nemotron Speech STT integration for Vision Agents.
+
+## Installation
+
+```bash
+pip install vision-agents-plugins-nemotron
+```
+
+## Quick Start
+
+### 1. Start the Nemotron Server
+
+```bash
+cd plugins/nemotron/server
+
+# Option A: Direct Python (requires NeMo)
+pip install -r requirements.txt
+python nemotron_server.py
+
+# Option B: Docker
+docker build -t nemotron-server .
+docker run -p 8765:8765 nemotron-server
+```
+
+### 2. Use the Plugin
+
+```python
+from vision_agents.plugins import nemotron
+
+stt = nemotron.STT(server_url="http://localhost:8765")
+await stt.start()
+
+await stt.process_audio(pcm_data)
+```
+
+## Configuration
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `server_url` | `http://localhost:8765` | Nemotron server URL |
+| `timeout` | `30.0` | HTTP request timeout (seconds) |
+
+## Server Configuration
+
+Set via environment variables:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `NEMOTRON_DEVICE` | `cpu` | Device: cpu or cuda |
+| `NEMOTRON_MODEL` | `nvidia/nemotron-speech-streaming-en-0.6b` | HuggingFace model |
+
+## Links
+
+- [Nemotron Speech on HuggingFace](https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b)
+- [NVIDIA NeMo](https://github.com/NVIDIA/NeMo)
diff --git a/plugins/nemotron/example/README.md b/plugins/nemotron/example/README.md
@@ -0,0 +1,85 @@
+# Nemotron STT Example
+
+Example demonstrating NVIDIA Nemotron Speech-to-Text with Vision Agents.
+
+## Architecture
+
+Due to dependency conflicts between NeMo toolkit and other Vision Agents dependencies,
+Nemotron runs as a **separate server**:
+
+```
+┌─────────────────┐         HTTP         ┌─────────────────┐
+│  Vision Agents  │ ──────────────────▶  │ Nemotron Server │
+│  (this example) │                      │  (NeMo toolkit) │
+└─────────────────┘                      └─────────────────┘
+```
+
+## Setup
+
+### 1. Start the Nemotron Server
+
+In a separate terminal/environment with NeMo installed:
+
+```bash
+cd plugins/nemotron/server
+
+# Create a separate virtual environment for NeMo
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+
+# Start the server
+python nemotron_server.py
+```
+
+Or use Docker:
+
+```bash
+cd plugins/nemotron/server
+docker build -t nemotron-server .
+docker run -p 8765:8765 nemotron-server
+```
+
+### 2. Run the Example
+
+```bash
+cd plugins/nemotron/example
+
+# Sync dependencies (this installs vision-agents and plugins)
+uv sync
+
+# Create .env with your API keys
+cat > .env << EOF
+STREAM_API_KEY=your_stream_api_key
+STREAM_API_SECRET=your_stream_api_secret
+GOOGLE_API_KEY=your_google_api_key
+ELEVENLABS_API_KEY=your_elevenlabs_api_key
+EOF
+
+# Run the example
+uv run python main.py
+```
+
+**Note:** If you get `ModuleNotFoundError`, make sure you've run `uv sync` from the example directory first. The example uses editable dependencies that point to the workspace.
+
+## Configuration
+
+```python
+# Connect to a remote server
+stt = nemotron.STT(server_url="http://your-server:8765")
+
+# Custom timeout
+stt = nemotron.STT(timeout=60.0)
+```
+
+## Server Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `NEMOTRON_DEVICE` | `cpu` | Device: cpu or cuda |
+| `NEMOTRON_MODEL` | `nvidia/nemotron-speech-streaming-en-0.6b` | Model name |
+
+## Links
+
+- [Nemotron Speech on HuggingFace](https://huggingface.co/nvidia/nemotron-speech-streaming-en-0.6b)
+- [Vision Agents Documentation](https://visionagents.ai)
diff --git a/plugins/nemotron/example/__init__.py b/plugins/nemotron/example/__init__.py
diff --git a/plugins/nemotron/example/main.py b/plugins/nemotron/example/main.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""
+Example: Speech-to-Text with NVIDIA Nemotron using Agent class
+
+Requires the Nemotron server running separately (see ../server/).
+
+Usage::
+    # First start the server (in separate terminal with NeMo installed):
+    cd plugins/nemotron/server
+    python nemotron_server.py
+
+    # Then run this example:
+    python main.py
+"""
+
+import logging
+
+from dotenv import load_dotenv
+
+from vision_agents.core import User, Agent, cli
+from vision_agents.core.agents import AgentLauncher
+from vision_agents.plugins import nemotron, getstream, gemini, elevenlabs, vogent
+
+logger = logging.getLogger(__name__)
+
+load_dotenv()
+
+
+async def create_agent(**kwargs) -> Agent:
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Nemotron Bot", id="agent"),
+        instructions="You're a helpful voice assistant. Keep responses short.",
+        llm=gemini.LLM(model="gemini-2.0-flash"),
+        tts=elevenlabs.TTS(),
+        stt=nemotron.STT(
+            server_url="http://localhost:8765",
+        ),
+        turn_detection=vogent.TurnDetection(),
+    )
+
+    return agent
+
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
+    await agent.create_user()
+    call = await agent.create_call(call_type, call_id)
+
+    with await agent.join(call):
+        await agent.simple_response(
+            "Hello! I'm using NVIDIA Nemotron for speech recognition."
+        )
+        await agent.finish()
+
+
+if __name__ == "__main__":
+    cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
diff --git a/plugins/nemotron/example/pyproject.toml b/plugins/nemotron/example/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "nemotron-example"
+version = "0.0.0"
+requires-python = ">=3.10"
+
+dependencies = [
+    "vision-agents",
+    "python-dotenv>=1.0",
+    "vision-agents-plugins-nemotron",
+    "vision-agents-plugins-getstream",
+    "vision-agents-plugins-gemini",
+    "vision-agents-plugins-elevenlabs",
+    "vision-agents-plugins-vogent",
+]
+
+[tool.uv.sources]
+"vision-agents" = {path = "../../../agents-core", editable=true}
+"vision-agents-plugins-nemotron" = {path = "..", editable=true}
+"vision-agents-plugins-getstream" = {path = "../../getstream", editable=true}
+"vision-agents-plugins-gemini" = {path = "../../gemini", editable=true}
+"vision-agents-plugins-elevenlabs" = {path = "../../elevenlabs", editable=true}
+"vision-agents-plugins-vogent" = {path = "../../vogent", editable=true}