livekit · sunghyunjun · Oct 10, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/examples/other/text-to-speech/requirements.txt b/examples/other/text-to-speech/requirements.txt
@@ -3,4 +3,5 @@ livekit-plugins-openai>=0.12.2
 livekit-plugins-cartesia>=0.4.11
 livekit-plugins-elevenlabs>=0.8.1
 livekit-plugins-speechify>=0.1.0
+livekit-plugins-typecast>=0.1.0
 python-dotenv~=1.0
diff --git a/examples/other/text-to-speech/typecast_tts.py b/examples/other/text-to-speech/typecast_tts.py
@@ -0,0 +1,131 @@
+import asyncio
+import logging
+
+from dotenv import load_dotenv
+
+from livekit import rtc
+from livekit.agents import AutoSubscribe, JobContext, WorkerOptions, cli
+from livekit.plugins import typecast
+
+load_dotenv()
+
+logger = logging.getLogger("typecast-tts-demo")
+logger.setLevel(logging.INFO)
+
+
+async def entrypoint(job: JobContext):
+    logger.info("starting Typecast TTS example agent")
+
+    # Example 0: List available voices (optional)
+    logger.info("Listing available Typecast voices...")
+
+    # Create TTS instance with default voice
+    # You can also specify a voice ID: typecast.TTS(voice="tc_your_voice_id")
+    tts = typecast.TTS(language="eng")  # Uses DEFAULT_VOICE_ID
+
+    try:
+        voices = await tts.list_voices()
+        logger.info(f"Found {len(voices)} available voices")
+
+        # Display first 3 voices as examples
+        for i, voice in enumerate(voices[:3], 1):
+            logger.info(
+                f"  {i}. {voice.name} ({voice.id}) - Emotions: {', '.join(voice.emotions[:3])}..."
+            )
+
+        # You can filter by model
+        # voices_filtered = await tts.list_voices(model="ssfm-v21")
+    except Exception as e:
+        logger.warning(f"Could not list voices: {e}")
+
+    # Optionally, select a specific voice from the list
+    # For this demo, we'll use the default voice
+    logger.info(f"Using default voice: {typecast.DEFAULT_VOICE_ID}")
+
+    source = rtc.AudioSource(tts.sample_rate, tts.num_channels)
+    track = rtc.LocalAudioTrack.create_audio_track("agent-mic", source)
+    options = rtc.TrackPublishOptions()
+    options.source = rtc.TrackSource.SOURCE_MICROPHONE
+
+    await job.connect(auto_subscribe=AutoSubscribe.SUBSCRIBE_NONE)
+    publication = await job.room.local_participant.publish_track(track, options)
+    await publication.wait_for_subscription()
+
+    # Example 1: Basic synthesis
+    logger.info("Example 1: Basic synthesis")
+    text1 = "Hello! Welcome to Typecast text-to-speech demonstration."
+    async for output in tts.synthesize(text1):
+        await source.capture_frame(output.frame)
+
+    await asyncio.sleep(1)
+
+    # Example 2: Happy emotion
+    logger.info("Example 2: Synthesizing with happy emotion")
+    tts.update_options(
+        prompt_options=typecast.PromptOptions(
+            emotion_preset="happy",
+            emotion_intensity=1.5,
+        )
+    )
+    text2 = "This is great! I'm so excited to demonstrate emotional expression!"
+    async for output in tts.synthesize(text2):
+        await source.capture_frame(output.frame)
+
+    await asyncio.sleep(1)
+
+    # Example 3: Sad emotion
+    logger.info("Example 3: Synthesizing with sad emotion")
+    tts.update_options(
+        prompt_options=typecast.PromptOptions(
+            emotion_preset="sad",
+            emotion_intensity=1.2,
+        )
+    )
+    text3 = "Sometimes things don't go as planned, and that's okay."
+    async for output in tts.synthesize(text3):
+        await source.capture_frame(output.frame)
+
+    await asyncio.sleep(1)
+
+    # Example 4: Back to normal with audio adjustments
+    logger.info("Example 4: Normal emotion with audio adjustments")
+    tts.update_options(
+        prompt_options=typecast.PromptOptions(
+            emotion_preset="normal",
+            emotion_intensity=1.0,
+        ),
+        output_options=typecast.OutputOptions(
+            volume=110,  # Louder volume
+            audio_pitch=1,  # Higher pitch
+            audio_tempo=1.1,  # Faster tempo
+        ),
+    )
+    text4 = "Now I'm speaking faster with a higher pitch!"
+    async for output in tts.synthesize(text4):
+        await source.capture_frame(output.frame)
+
+    await asyncio.sleep(1)
+
+    # Example 5: Reproducible synthesis with seed
+    logger.info("Example 5: Using seed for reproducible synthesis")
+    tts.update_options(
+        seed=42,  # Same seed will produce the same output
+        prompt_options=typecast.PromptOptions(
+            emotion_preset="normal",
+            emotion_intensity=1.0,
+        ),
+        output_options=typecast.OutputOptions(
+            volume=100,
+            audio_pitch=0,
+            audio_tempo=1.0,
+        ),
+    )
+    text5 = "This synthesis can be reproduced with the same seed value."
+    async for output in tts.synthesize(text5):
+        await source.capture_frame(output.frame)
+
+    logger.info("Typecast TTS demonstration completed!")
+
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
diff --git a/livekit-plugins/livekit-plugins-typecast/README.md b/livekit-plugins/livekit-plugins-typecast/README.md
@@ -0,0 +1,15 @@
+# Typecast plugin for LiveKit Agents
+
+Support for voice synthesis with [Typecast](https://typecast.ai/).
+
+## Installation
+
+```bash
+pip install livekit-plugins-typecast
+```
+
+## Pre-requisites
+
+You'll need an API key from Typecast. Visit the [Typecast API](https://typecast.ai/developers/api) page to get started.
+
+The API key can be set as an environment variable: `TYPECAST_API_KEY`
diff --git a/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/__init__.py b/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/__init__.py
@@ -0,0 +1,66 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Typecast TTS plugin for LiveKit Agents
+
+Typecast provides high-quality, emotionally expressive text-to-speech synthesis
+with support for multiple languages and voice styles.
+
+See https://typecast.ai for more information.
+"""
+
+from .models import (
+    DEFAULT_VOICE_ID,
+    AudioFormat,
+    OutputOptions,
+    PromptOptions,
+    TTSLanguages,
+    TTSModels,
+    Voice,
+)
+from .tts import TTS
+from .version import __version__
+
+__all__ = [
+    "TTS",
+    "Voice",
+    "DEFAULT_VOICE_ID",
+    "TTSModels",
+    "TTSLanguages",
+    "AudioFormat",
+    "PromptOptions",
+    "OutputOptions",
+    "__version__",
+]
+
+from livekit.agents import Plugin
+
+from .log import logger
+
+
+class TypecastPlugin(Plugin):
+    def __init__(self) -> None:
+        super().__init__(__name__, __version__, __package__, logger)
+
+
+Plugin.register_plugin(TypecastPlugin())
+
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+
+__pdoc__ = {}
+
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False
diff --git a/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/log.py b/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/log.py
@@ -0,0 +1,3 @@
+import logging
+
+logger = logging.getLogger("livekit.plugins.typecast")
diff --git a/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/models.py b/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/models.py
@@ -0,0 +1,112 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Literal
+
+# Typecast TTS models
+TTSModels = Literal["ssfm-v21"]
+
+# Default voice ID (Olivia - supports multiple emotions)
+DEFAULT_VOICE_ID = "tc_62a8975e695ad26f7fb514d1"
+
+# Audio format options
+AudioFormat = Literal["wav", "mp3"]
+
+# Supported languages (ISO 639-3 codes)
+TTSLanguages = Literal[
+    "eng",  # English
+    "kor",  # Korean
+    "jpn",  # Japanese
+    "zho",  # Chinese
+    "spa",  # Spanish
+    "deu",  # German
+    "fra",  # French
+    "ita",  # Italian
+    "rus",  # Russian
+    "ara",  # Arabic
+    "por",  # Portuguese
+    "nld",  # Dutch
+    "pol",  # Polish
+    "swe",  # Swedish
+    "tur",  # Turkish
+    "hin",  # Hindi
+    "tha",  # Thai
+    "vie",  # Vietnamese
+    "ind",  # Indonesian
+]
+
+
+@dataclass
+class Voice:
+    """
+    Typecast voice model information.
+
+    Attributes:
+        id: Unique voice identifier (e.g., "tc_62a8975e695ad26f7fb514d1")
+        name: Human-readable voice name (e.g., "Olivia")
+        model: TTS model type (e.g., "ssfm-v21")
+        emotions: List of supported emotions (e.g., ["normal", "happy", "sad", "angry"])
+    """
+
+    id: str
+    name: str
+    model: str
+    emotions: list[str]
+
+
+@dataclass
+class PromptOptions:
+    """
+    Options for controlling the emotional expression in Typecast TTS synthesis.
+
+    Attributes:
+        emotion_preset: Emotion type (e.g., "normal", "happy", "sad", "angry")
+        emotion_intensity: Intensity of the emotion (0.0 ~ 2.0, default: 1.0)
+    """
+
+    emotion_preset: str = "normal"
+    emotion_intensity: float = 1.0
+
+    def to_dict(self) -> dict:
+        return {
+            "emotion_preset": self.emotion_preset,
+            "emotion_intensity": self.emotion_intensity,
+        }
+
+
+@dataclass
+class OutputOptions:
+    """
+    Options for controlling the audio output characteristics.
+
+    Attributes:
+        volume: Volume level (0 ~ 200, default: 100)
+        audio_pitch: Pitch adjustment in semitones (-12 ~ +12, default: 0)
+        audio_tempo: Speed multiplier (0.5x ~ 2.0x, default: 1.0)
+        audio_format: Output format ("wav" or "mp3", default: "wav")
+    """
+
+    volume: int = 100
+    audio_pitch: int = 0
+    audio_tempo: float = 1.0
+    audio_format: AudioFormat = "wav"
+
+    def to_dict(self) -> dict:
+        return {
+            "volume": self.volume,
+            "audio_pitch": self.audio_pitch,
+            "audio_tempo": self.audio_tempo,
+            "audio_format": self.audio_format,
+        }
diff --git a/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/py.typed b/livekit-plugins/livekit-plugins-typecast/livekit/plugins/typecast/py.typed
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import logging

		logger = logging.getLogger("livekit.plugins.typecast")