diff --git a/packages/typescript/ai-gemini/src/adapters/live.ts b/packages/typescript/ai-gemini/src/adapters/live.ts new file mode 100644 index 00000000..42c0f61a --- /dev/null +++ b/packages/typescript/ai-gemini/src/adapters/live.ts @@ -0,0 +1,144 @@ +import { BaseRealtimeAdapter } from '@tanstack/ai/adapters' +import { Modality } from '@google/genai' +import { createGeminiClient, getGeminiApiKeyFromEnv } from '../utils' +import type { GeminiClientConfig} from '../utils'; +import type { GoogleGenAI} from '@google/genai'; +import type { GEMINI_LIVE_MODELS, GeminiLiveAPIVoice } from '../model-meta' +import type { RealtimeOptions, RealtimeResult } from '@tanstack/ai/src' + +/** + * Provider-specific options for Gemini Live Api + * + * @experimental Gemini Live Api is an experimental feature. + * @see https://ai.google.dev/gemini-api/docs/live + */ +export interface GeminiLiveAPIProviderOptions { + /** + * Voice configuration for Gemini Live API. + * Choose from 6 available voices with different characteristics. + */ + voiceConfig?: { + prebuiltVoiceConfig?: { + /** + * The voice name to use for speech synthesis. + * @see https://ai.google.dev/gemini-api/docs/speech-generation#voices + */ + voiceName?: GeminiLiveAPIVoice + } + } +} + +export interface GeminiLiveAPIConfig extends GeminiClientConfig {} + +export type GeminiLiveAPIModel = (typeof GEMINI_LIVE_MODELS)[number] + +export class GeminiLiveAPIAdapter< + TModel extends GeminiLiveAPIModel +> extends BaseRealtimeAdapter { + readonly name = 'gemini' as const + + private client: GoogleGenAI + + constructor(config: GeminiLiveAPIConfig, model: TModel) { + super(config, model); + + this.client = createGeminiClient(config); + } + + /** + * Connects to Live API from Gemini Models functionalities. + * + * @experimental This implementation is experimental and may change. + * @see https://ai.google.dev/gemini-api/docs/live + */ + async connectRealtime( + options: RealtimeOptions, + ): Promise { + const { model, modelOptions } = options; + + const voiceConfig = modelOptions?.voiceConfig || { + prebuiltVoiceConfig: { + voiceName: 'Kore', + }, + } + + const liveSession = await this.client.live.connect({ + model, + config: { + responseModalities: [Modality.AUDIO], + speechConfig: { + voiceConfig, + ...(modelOptions?.languageCode && { + languageCode: modelOptions.languageCode, + }), + }, + }, + ...(modelOptions?.systemInstruction && { + systemInstruction: modelOptions.systemInstruction + }), + callbacks: { + onopen: function() { + console.debug('Opened'); + }, + onmessage: function(message) { + console.debug(message); + }, + onerror: function(e) { + console.debug('Error:', e.message); + }, + onclose: function(e) { + console.debug('Close:', e.reason); + } + } + }); + + liveSession.close(); + + return {}; + } +} + +/** + * Creates a Gemini Live API adapter with explicit API key. + * Type resolution happens here at the call site. + * + * @experimental Gemini Live API is an experimental feature and may change. + * + * @param model - The model name (e.g., 'gemini-live-2.5-flash-native-audio') + * @param apiKey - Your Google API key + * @param config - Optional additional configuration + * @returns Configured Gemini Live API adapter instance with resolved types + * + */ +export function createGeminiLiveApi( + model: TModel, + apiKey: string, + config?: Omit +): GeminiLiveAPIAdapter { + return new GeminiLiveAPIAdapter({ apiKey, ...config }, model); +} + +/** + * Creates a Gemini Live API adapter with automatic API key detection from environment variables. + * Type resolution happens here at the call site. + * + * @experimental Gemini Live API is an experimental feature and may change. + * + * Looks for `GOOGLE_API_KEY` or `GEMINI_API_KEY` in: + * - `process.env` (Node.js) + * - `window.env` (Browser with injected env) + * + * @param model - The model name (e.g., 'gemini-live-2.5-flash-native-audio') + * @param config - Optional configuration (excluding apiKey which is auto-detected) + * @returns Configured Gemini Live API adapter instance with resolved types + * @throws Error if GOOGLE_API_KEY or GEMINI_API_KEY is not found in environment + * + */ +export function geminiLiveAPI( + model: TModel, + config?: Omit +) { + const apiKey = getGeminiApiKeyFromEnv(); + + return createGeminiLiveApi(model, apiKey, config); +} diff --git a/packages/typescript/ai-gemini/src/model-meta.ts b/packages/typescript/ai-gemini/src/model-meta.ts index 5fca6c16..2f91bc15 100644 --- a/packages/typescript/ai-gemini/src/model-meta.ts +++ b/packages/typescript/ai-gemini/src/model-meta.ts @@ -26,6 +26,7 @@ interface ModelMeta { | 'structured_output' | 'thinking' | 'url_context' + | 'gemini_live_api' > } max_input_tokens?: number @@ -76,11 +77,11 @@ const GEMINI_3_PRO = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > const GEMINI_2_5_PRO = { @@ -114,11 +115,11 @@ const GEMINI_2_5_PRO = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > const GEMINI_2_5_PRO_TTS = { @@ -141,9 +142,9 @@ const GEMINI_2_5_PRO_TTS = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > const GEMINI_2_5_FLASH = { @@ -177,11 +178,11 @@ const GEMINI_2_5_FLASH = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > const GEMINI_2_5_FLASH_PREVIEW = { @@ -214,11 +215,11 @@ const GEMINI_2_5_FLASH_PREVIEW = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > const GEMINI_2_5_FLASH_IMAGE = { @@ -245,47 +246,13 @@ const GEMINI_2_5_FLASH_IMAGE = { normal: 0.4, }, }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> -/** -const GEMINI_2_5_FLASH_LIVE = { - name: 'gemini-2.5-flash-native-audio-preview-09-2025', - max_input_tokens: 141_072, - max_output_tokens: 8_192, - knowledge_cutoff: '2025-01-01', - supports: { - input: ['text', 'audio', 'video'], - output: ['text', 'audio'], - capabilities: [ - 'audio_generation', - 'file_search', - 'function_calling', - 'live_api', - 'search_grounding', - 'thinking', - ], - }, - pricing: { - // todo find this info - input: { - normal: 0, - }, - output: { - normal: 0, - }, - }, } as const satisfies ModelMeta< GeminiToolConfigOptions & GeminiSafetyOptions & GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiThinkingOptions + GeminiCachedContentOptions > -*/ + const GEMINI_2_5_FLASH_TTS = { name: 'gemini-2.5-flash-preview-tts', max_input_tokens: 8_192, @@ -306,9 +273,9 @@ const GEMINI_2_5_FLASH_TTS = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > const GEMINI_2_5_FLASH_LITE = { @@ -341,11 +308,11 @@ const GEMINI_2_5_FLASH_LITE = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > const GEMINI_2_5_FLASH_LITE_PREVIEW = { @@ -377,13 +344,44 @@ const GEMINI_2_5_FLASH_LITE_PREVIEW = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions & - GeminiThinkingOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions > +export const GEMINI_2_5_FLASH_NATIVE_AUDIO = { + name: 'gemini-2.5-flash-native-audio-preview-12-2025', + max_input_tokens: 131_072, + max_output_tokens: 8_192, + supports: { + input: ['text', 'image', 'audio', 'video'], + output: ['text', 'audio'], + capabilities: [ + 'function_calling', + 'search_grounding', + 'gemini_live_api' + ], + }, + pricing: { + input: { + normal: 0.5 + }, + output: { + normal: 2 + }, + } +} as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions & + GeminiThinkingOptions +> + + const GEMINI_2_FLASH = { name: 'gemini-2.0-flash', max_input_tokens: 1_048_576, @@ -413,10 +411,10 @@ const GEMINI_2_FLASH = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions > const GEMINI_2_FLASH_IMAGE = { @@ -444,11 +442,11 @@ const GEMINI_2_FLASH_IMAGE = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > -/* +/* const GEMINI_2_FLASH_LIVE = { name: 'gemini-2.0-flash-live-001', max_input_tokens: 1_048_576, @@ -508,10 +506,10 @@ const GEMINI_2_FLASH_LITE = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions & - GeminiStructuredOutputOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions > const IMAGEN_4_GENERATE = { @@ -532,9 +530,9 @@ const IMAGEN_4_GENERATE = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > const IMAGEN_4_GENERATE_ULTRA = { @@ -555,9 +553,9 @@ const IMAGEN_4_GENERATE_ULTRA = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > const IMAGEN_4_GENERATE_FAST = { @@ -578,9 +576,9 @@ const IMAGEN_4_GENERATE_FAST = { }, } as const satisfies ModelMeta< GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions > const IMAGEN_3 = { @@ -598,126 +596,126 @@ const IMAGEN_3 = { normal: 0.03, }, }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> -/** -const VEO_3_1_PREVIEW = { - name: 'veo-3.1-generate-preview', - max_input_tokens: 1024, - max_output_tokens: 1, - supports: { - input: ['text', 'image'], - output: ['video', 'audio'], - }, - pricing: { - input: { - normal: 0, - }, - output: { - normal: 0.4, - }, - }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> - -const VEO_3_1_FAST_PREVIEW = { - name: 'veo-3.1-fast-generate-preview', - max_input_tokens: 1024, - max_output_tokens: 1, - supports: { - input: ['text', 'image'], - output: ['video', 'audio'], - }, - pricing: { - input: { - normal: 0, - }, - output: { - normal: 0.15, - }, - }, } as const satisfies ModelMeta< GeminiToolConfigOptions & GeminiSafetyOptions & GeminiGenerationConfigOptions & GeminiCachedContentOptions > - -const VEO_3 = { - name: 'veo-3.0-generate-001', - max_input_tokens: 1024, - max_output_tokens: 1, - supports: { - input: ['text', 'image'], - output: ['video', 'audio'], - }, - pricing: { - input: { - normal: 0, - }, - output: { - normal: 0.4, - }, - }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> - -const VEO_3_FAST = { - name: 'veo-3.0-fast-generate-001', - max_input_tokens: 1024, - max_output_tokens: 1, - supports: { - input: ['text', 'image'], - output: ['video', 'audio'], - }, - pricing: { - input: { - normal: 0, - }, - output: { - normal: 0.15, - }, - }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> - -const VEO_2 = { - name: 'veo-2.0-generate-001', - max_output_tokens: 2, - supports: { - input: ['text', 'image'], - output: ['video'], - }, - pricing: { - input: { - normal: 0, - }, - output: { - normal: 0.35, - }, - }, -} as const satisfies ModelMeta< - GeminiToolConfigOptions & - GeminiSafetyOptions & - GeminiGenerationConfigOptions & - GeminiCachedContentOptions -> */ +/** + const VEO_3_1_PREVIEW = { + name: 'veo-3.1-generate-preview', + max_input_tokens: 1024, + max_output_tokens: 1, + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + normal: 0.4, + }, + }, + } as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions + > + + const VEO_3_1_FAST_PREVIEW = { + name: 'veo-3.1-fast-generate-preview', + max_input_tokens: 1024, + max_output_tokens: 1, + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + normal: 0.15, + }, + }, + } as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions + > + + const VEO_3 = { + name: 'veo-3.0-generate-001', + max_input_tokens: 1024, + max_output_tokens: 1, + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + normal: 0.4, + }, + }, + } as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions + > + + const VEO_3_FAST = { + name: 'veo-3.0-fast-generate-001', + max_input_tokens: 1024, + max_output_tokens: 1, + supports: { + input: ['text', 'image'], + output: ['video', 'audio'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + normal: 0.15, + }, + }, + } as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions + > + + const VEO_2 = { + name: 'veo-2.0-generate-001', + max_output_tokens: 2, + supports: { + input: ['text', 'image'], + output: ['video'], + }, + pricing: { + input: { + normal: 0, + }, + output: { + normal: 0.35, + }, + }, + } as const satisfies ModelMeta< + GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions + > */ /* const GEMINI_MODEL_META = { [GEMINI_3_PRO.name]: GEMINI_3_PRO, @@ -769,6 +767,28 @@ export const GEMINI_IMAGE_MODELS = [ IMAGEN_4_GENERATE_ULTRA.name, ] as const +/** + * Live model + * @experimental Gemini Live API is an experimental feature and may change. + */ +export const GEMINI_LIVE_MODELS = [ + GEMINI_2_5_FLASH_NATIVE_AUDIO.name +] as const; + +/** + * Available voice names for Gemini TTS + * @see https://ai.google.dev/gemini-api/docs/speech-generation + */ +export const GEMINI_LIVE_VOICES = [ + 'Zephyr', + 'Puck', + 'Charon', + 'Kore', + 'Fenrir' +] as const; + +export type GeminiLiveAPIVoice = (typeof GEMINI_LIVE_VOICES)[number] + /** * Text-to-speech models * @experimental Gemini TTS is an experimental feature and may change. @@ -871,6 +891,11 @@ export type GeminiChatModelProviderOptionsByName = { GeminiCachedContentOptions & GeminiStructuredOutputOptions & GeminiThinkingOptions + [GEMINI_2_5_FLASH_NATIVE_AUDIO.name]: GeminiToolConfigOptions & + GeminiSafetyOptions & + GeminiGenerationConfigOptions & + GeminiCachedContentOptions & + GeminiStructuredOutputOptions // Models with structured output but no thinking support [GEMINI_2_FLASH.name]: GeminiToolConfigOptions & GeminiSafetyOptions & @@ -909,4 +934,5 @@ export type GeminiModelInputModalitiesByName = { [GEMINI_2_5_FLASH_PREVIEW.name]: typeof GEMINI_2_5_FLASH_PREVIEW.supports.input [GEMINI_2_FLASH.name]: typeof GEMINI_2_FLASH.supports.input [GEMINI_2_FLASH_LITE.name]: typeof GEMINI_2_FLASH_LITE.supports.input + [GEMINI_2_5_FLASH_NATIVE_AUDIO.name]: typeof GEMINI_2_5_FLASH_NATIVE_AUDIO.supports.input } diff --git a/packages/typescript/ai/src/activities/connectRealtime/adapter.ts b/packages/typescript/ai/src/activities/connectRealtime/adapter.ts new file mode 100644 index 00000000..6177014b --- /dev/null +++ b/packages/typescript/ai/src/activities/connectRealtime/adapter.ts @@ -0,0 +1,82 @@ +import type { RealtimeOptions, RealtimeResult } from '../../types' + +/** + * Configuration for Realtime adapter instances + */ +export interface RealtimeAdapterConfig { + apiKey?: string; + baseUrl?: string; + timeout?: number; + maxRetries?: number; + headers?: Record +} + +/** + * Realtime adapter interface with pre-resolved generics. + * + * An adapter is created by a provider function: `provider('model')` → `adapter` + * All type resolution happens at the provider call site, not in this interface. + * + * Generic parameters: + * - TModel: The specific model name (e.g., 'realtime-1') + * - TProviderOptions: Provider-specific options (already resolved) + */ +export interface RealtimeAdapter< + TModel extends string = string, + TProviderOptions extends object = Record +> { + /** Discriminator for adapter kind - used to determine API shape */ + readonly kind: 'realtime' + /** Adapter name identifier */ + readonly name: string + /** The model this adapter is configured for */ + readonly model: TModel + + /** + * @internal Type-only properties for inference. Not assigned at runtime. + */ + '~types': { + providerOptions: TProviderOptions + } + + /** + * Connect to Realtime + */ + connectRealtime: (options: RealtimeOptions) => Promise +} + +export type AnyRealtimeAdapter = RealtimeAdapter + +/** + * Abstract base class for Realtime adapters. + * Extend this class to implement a Realtime adapter for a specific provider. + * + * Generic parameters match RealtimeAdapter - all pre-resolved by the provider function. + */ +export abstract class BaseRealtimeAdapter< + TModel extends string = string, + TProviderOptions extends object = Record +> implements RealtimeAdapter { + readonly kind = 'realtime' as const + abstract readonly name: string + readonly model: TModel + + declare '~types': { + providerOptions: TProviderOptions + } + + protected config: RealtimeAdapterConfig + + constructor(config: RealtimeAdapterConfig = {}, model: TModel) { + this.config = config + this.model = model + } + + abstract connectRealtime( + options: RealtimeOptions + ): Promise + + protected generateId(): string { + return `${this.name}-${Date.now()}-${Math.random().toString(36).substring(7)}` + } +} diff --git a/packages/typescript/ai/src/activities/connectRealtime/index.ts b/packages/typescript/ai/src/activities/connectRealtime/index.ts new file mode 100644 index 00000000..5114c128 --- /dev/null +++ b/packages/typescript/ai/src/activities/connectRealtime/index.ts @@ -0,0 +1,96 @@ +/** + * Realtime Activity + * + * Creates connection with Realtime models. + * This is a self-contained module with implementation, types, and JSDoc. + */ + +import type { + RealtimeAdapter, +} from './adapter' +import type { RealtimeResult } from '../../types' + +// =========================== +// Activity Kind +// =========================== + +/** The adapter kind this activity handles */ +export const kind = 'realtime' as const + +// =========================== +// Type Extraction Helpers +// =========================== + +/** + * Extract provider options from a RealtimeAdapter via ~types. + */ +export type RealtimeProviderOptions = + TAdapter extends RealtimeAdapter + ? TAdapter['~types']['providerOptions'] + : object + +// =========================== +// Activity Options Type +// =========================== + +/** + * Options for the Realtime activity. + * The model is extracted from the adapter's model property. + * + * @template TAdapter - The Realtime adapter type + */ +export interface RealtimeActivityOptions< + TAdapter extends RealtimeAdapter, +> { + /** The Live API adapter to use (must be created with a model) */ + adapter: TAdapter & { kind: typeof kind } + /** The voice to use for generation */ + voice?: string + /** The output audio format */ + format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' + /** Provider-specific options for Live API connection */ + modelOptions?: RealtimeProviderOptions +} + +// =========================== +// Activity Result Type +// =========================== + +/** Result type for the Live API activity */ +export type RealtimeActivityResult = Promise + +// =========================== +// Activity Implementation +// =========================== + +/** + * Live API activity - connects to Live API + * + * Uses AI Live API models functionalities for speech-to-speech real-time conversations. + * + */ +export async function connectRealtime< + TAdapter extends RealtimeAdapter, +>(options: RealtimeActivityOptions): RealtimeActivityResult { + const { adapter, ...rest } = options; + const model = adapter.model; + + return adapter.connectRealtime({ ...rest, model }); +} + +// =========================== +// Options Factory +// =========================== + +/** + * Create typed options for the connectLive() function without executing. + */ +export function createRealtimeOptions< + TAdapter extends RealtimeAdapter, +>(options: RealtimeActivityOptions): RealtimeActivityOptions { + return options +} + +// Re-export adapter types +export type { RealtimeAdapter, RealtimeAdapterConfig, AnyRealtimeAdapter } from './adapter'; +export { BaseRealtimeAdapter } from './adapter' diff --git a/packages/typescript/ai/src/activities/index.ts b/packages/typescript/ai/src/activities/index.ts index 521675a7..05b5505d 100644 --- a/packages/typescript/ai/src/activities/index.ts +++ b/packages/typescript/ai/src/activities/index.ts @@ -20,6 +20,7 @@ import type { AnyImageAdapter } from './generateImage/adapter' import type { AnyVideoAdapter } from './generateVideo/adapter' import type { AnyTTSAdapter } from './generateSpeech/adapter' import type { AnyTranscriptionAdapter } from './generateTranscription/adapter' +import type { AnyRealtimeAdapter } from './connectRealtime' // =========================== // Chat Activity @@ -141,6 +142,25 @@ export { type AnyTranscriptionAdapter, } from './generateTranscription/adapter' +// =========================== +// Live API Activity +// =========================== + +export { + kind as realtime, + connectRealtime, + type RealtimeActivityOptions, + type RealtimeActivityResult, + type RealtimeProviderOptions, +} from './connectRealtime/index' + +export { + BaseRealtimeAdapter, + type RealtimeAdapter, + type RealtimeAdapterConfig, + type AnyRealtimeAdapter, +} from './connectRealtime/adapter' + // =========================== // Adapter Union Types // =========================== @@ -153,6 +173,7 @@ export type AIAdapter = | AnyVideoAdapter | AnyTTSAdapter | AnyTranscriptionAdapter + | AnyRealtimeAdapter /** Union type of all adapter kinds */ export type AdapterKind = @@ -162,3 +183,4 @@ export type AdapterKind = | 'video' | 'tts' | 'transcription' + | 'realtime' diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts index 9df621c6..bd91c982 100644 --- a/packages/typescript/ai/src/types.ts +++ b/packages/typescript/ai/src/types.ts @@ -1028,3 +1028,32 @@ export interface DefaultMessageMetadataByModality { video: unknown document: unknown } + +// ============================================================================ +// Live API Types +// ============================================================================ +export interface RealtimeOptions { + /** The model to use for Realtime generation */ + model: string + /** The voice to use for generation */ + voice?: string + /** The output audio format */ + format?: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' + /** Model-specific options for Realtime generation */ + modelOptions?: TProviderOptions +} + +export interface RealtimeResult { + /** Unique identifier for the generation */ + id: string + /** Model used for generation */ + model: string + /** Base64-encoded audio data */ + audio: string + /** Audio format of the generated audio */ + format: string + /** Duration of the audio in seconds, if available */ + duration?: number + /** Content type of the audio (e.g., 'audio/mp3') */ + contentType?: string +}