Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/major-teeth-greet.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/ai-openai': minor
---

Introduces a single-source-of-truth model registry for all OpenAI models, preventing silent drift between capability declarations and runtime validation. Significantly expands model coverage across text, image, video, and audio categories.
9 changes: 5 additions & 4 deletions packages/typescript/ai-openai/src/adapters/image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ import {
validateNumberOfImages,
validatePrompt,
} from '../image/image-provider-options'
import type { OpenAIImageModel } from '../model-meta'
import type {
OpenAIImageModel,
OpenAIImageModelProviderOptionsByName,
OpenAIImageModelSizeByName,
OpenAIImageProviderOptions,
} from '../image/image-provider-options'
} from '../model-meta'
import type { OpenAIImageProviderOptions } from '../image/image-provider-options'
import type {
GeneratedImage,
ImageGenerationOptions,
Expand All @@ -32,7 +32,8 @@ export interface OpenAIImageConfig extends OpenAIClientConfig {}
* OpenAI Image Generation Adapter
*
* Tree-shakeable adapter for OpenAI image generation functionality.
* Supports gpt-image-1, gpt-image-1-mini, dall-e-3, and dall-e-2 models.
* Supports gpt-image-1.5, chatgpt-image-latest, gpt-image-1, gpt-image-1-mini,
* dall-e-3, and dall-e-2 models.
*
* Features:
* - Model-specific type-safe provider options
Expand Down
12 changes: 5 additions & 7 deletions packages/typescript/ai-openai/src/adapters/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import type {
TextOptions,
} from '@tanstack/ai'
import type {
ExternalTextProviderOptions,
InternalTextProviderOptions,
} from '../text/text-provider-options'
import type {
Expand All @@ -47,20 +46,19 @@ export interface OpenAITextConfig extends OpenAIClientConfig {}
/**
* Alias for TextProviderOptions
*/
export type OpenAITextProviderOptions = ExternalTextProviderOptions
export type OpenAITextProviderOptions<
TModel extends OpenAIChatModel = OpenAIChatModel,
> = OpenAIChatModelProviderOptionsByName[TModel]

// ===========================
// Type Resolution Helpers
// ===========================

/**
* Resolve provider options for a specific model.
* If the model has explicit options in the map, use those; otherwise use base options.
*/
type ResolveProviderOptions<TModel extends string> =
TModel extends keyof OpenAIChatModelProviderOptionsByName
? OpenAIChatModelProviderOptionsByName[TModel]
: OpenAITextProviderOptions
type ResolveProviderOptions<TModel extends OpenAIChatModel> =
OpenAIChatModelProviderOptionsByName[TModel]

/**
* Resolve input modalities for a specific model.
Expand Down
32 changes: 25 additions & 7 deletions packages/typescript/ai-openai/src/adapters/transcription.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ export class OpenAITranscriptionAdapter<
const file = this.prepareAudioFile(audio)

// Build request
const request: OpenAI_SDK.Audio.TranscriptionCreateParams = {
const requestBase: Omit<
OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming,
'response_format'
> = {
model,
file,
language,
prompt,
response_format: this.mapResponseFormat(responseFormat),
...modelOptions,
}

Expand All @@ -69,9 +71,14 @@ export class OpenAITranscriptionAdapter<
(!responseFormat && model !== 'whisper-1')

if (useVerbose) {
const verboseRequest: OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming<'verbose_json'> =
{
...requestBase,
response_format: 'verbose_json',
stream: false,
}
const response = await this.client.audio.transcriptions.create({
...request,
response_format: 'verbose_json',
...verboseRequest,
})

return {
Expand All @@ -96,12 +103,23 @@ export class OpenAITranscriptionAdapter<
})),
}
} else {
const request: OpenAI_SDK.Audio.TranscriptionCreateParamsNonStreaming =
{
...requestBase,
response_format: this.mapResponseFormat(responseFormat),
stream: false,
}
const response = await this.client.audio.transcriptions.create(request)

return {
id: generateId(this.name),
model,
text: typeof response === 'string' ? response : response.text,
text:
typeof response === 'string'
? response
: 'text' in response
? response.text
: '',
language,
}
}
Expand Down Expand Up @@ -157,9 +175,9 @@ export class OpenAITranscriptionAdapter<

private mapResponseFormat(
format?: 'json' | 'text' | 'srt' | 'verbose_json' | 'vtt',
): OpenAI_SDK.Audio.TranscriptionCreateParams['response_format'] {
): OpenAI_SDK.Audio.AudioResponseFormat {
if (!format) return 'json'
return format as OpenAI_SDK.Audio.TranscriptionCreateParams['response_format']
return format
}
}

Expand Down
2 changes: 1 addition & 1 deletion packages/typescript/ai-openai/src/adapters/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export interface OpenAITTSConfig extends OpenAIClientConfig {}
* OpenAI Text-to-Speech Adapter
*
* Tree-shakeable adapter for OpenAI TTS functionality.
* Supports tts-1, tts-1-hd, and gpt-4o-audio-preview models.
* Supports gpt-4o-mini-tts, tts-1, and tts-1-hd models.
*
* Features:
* - Multiple voice options: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, verse
Expand Down
13 changes: 8 additions & 5 deletions packages/typescript/ai-openai/src/adapters/video.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import {
validateVideoSize,
} from '../video/video-provider-options'
import type { VideoModel } from 'openai/resources'
import type { OpenAIVideoModel } from '../model-meta'
import type {
OpenAIVideoModel,
OpenAIVideoModelProviderOptionsByName,
OpenAIVideoModelSizeByName,
OpenAIVideoProviderOptions,
} from '../video/video-provider-options'
} from '../model-meta'
import type { OpenAIVideoProviderOptions } from '../video/video-provider-options'
import type {
VideoGenerationOptions,
VideoJobResult,
Expand Down Expand Up @@ -53,9 +53,11 @@ export class OpenAIVideoAdapter<
readonly name = 'openai' as const

private client: OpenAI_SDK
private readonly clientConfig: OpenAIVideoConfig

constructor(config: OpenAIVideoConfig, model: TModel) {
super(config, model)
this.clientConfig = config
this.client = createOpenAIClient(config)
}

Expand Down Expand Up @@ -212,8 +214,9 @@ export class OpenAIVideoAdapter<
// Option 3: Return a proxy URL through our server

// Let's try fetching and returning a data URL for now
const baseUrl = this.config.baseUrl || 'https://api.openai.com/v1'
const apiKey = this.config.apiKey
const baseUrl =
this.clientConfig.baseURL || 'https://api.openai.com/v1'
const apiKey = this.clientConfig.apiKey

const contentResponse = await fetch(
`${baseUrl}/videos/${jobId}/content`,
Expand Down
35 changes: 31 additions & 4 deletions packages/typescript/ai-openai/src/audio/audio-provider-options.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { TTS_MODELS } from '../models/audio'

export interface AudioProviderOptions {
/**
* The text to generate audio for. The maximum length is 4096 characters.
Expand Down Expand Up @@ -46,13 +48,28 @@ export interface AudioProviderOptions {
stream_format?: 'sse' | 'audio'
}

/**
* Validates the requested stream format against the selected TTS model.
*/
export const validateStreamFormat = (options: AudioProviderOptions) => {
const unsupportedModels = ['tts-1', 'tts-1-hd']
if (options.stream_format && unsupportedModels.includes(options.model)) {
if (!Object.hasOwn(TTS_MODELS, options.model)) {
if (options.stream_format) {
console.warn(
`Unknown TTS model: ${options.model}. stream_format may not be supported.`,
)
}
return
}

const modelMeta = TTS_MODELS[options.model as keyof typeof TTS_MODELS]
if (options.stream_format && !modelMeta.supportsStreaming) {
throw new Error(`The model ${options.model} does not support streaming.`)
}
}

/**
* Validates that the requested speech speed falls within OpenAI's supported range.
*/
export const validateSpeed = (options: AudioProviderOptions) => {
if (options.speed) {
if (options.speed < 0.25 || options.speed > 4.0) {
Expand All @@ -61,13 +78,23 @@ export const validateSpeed = (options: AudioProviderOptions) => {
}
}

/**
* Validates that the selected TTS model supports voice instructions.
*/
export const validateInstructions = (options: AudioProviderOptions) => {
const unsupportedModels = ['tts-1', 'tts-1-hd']
if (options.instructions && unsupportedModels.includes(options.model)) {
if (!Object.hasOwn(TTS_MODELS, options.model)) {
throw new Error(`Unknown TTS model: ${options.model}`)
}

const modelMeta = TTS_MODELS[options.model as keyof typeof TTS_MODELS]
if (options.instructions && !modelMeta.supportsInstructions) {
throw new Error(`The model ${options.model} does not support instructions.`)
}
}

/**
* Validates the maximum input length for text-to-speech requests.
*/
export const validateAudioInput = (options: AudioProviderOptions) => {
if (options.input.length > 4096) {
throw new Error('Input text exceeds maximum length of 4096 characters.')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ export interface OpenAITranscriptionProviderOptions {
* Additional information to include in the transcription response. logprobs will return the log probabilities
* of the tokens in the response to understand the model's confidence in the transcription.
* logprobs only works with response_format set to json and only with the models gpt-4o-transcribe,
* gpt-4o-mini-transcribe, and gpt-4o-mini-transcribe-2025-12-15.
* gpt-4o-mini-transcribe, gpt-4o-mini-transcribe-2025-12-15, and
* gpt-4o-mini-transcribe-2025-03-20.
* This field is not supported when using gpt-4o-transcribe-diarize.
*/
include?: OpenAI.Audio.TranscriptionCreateParams['include']
Expand Down
89 changes: 28 additions & 61 deletions packages/typescript/ai-openai/src/image/image-provider-options.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { IMAGE_MODELS } from '../models/image'

/**
* OpenAI Image Generation Provider Options
*
Expand Down Expand Up @@ -176,27 +178,6 @@ export type OpenAIImageProviderOptions =
| DallE3ProviderOptions
| DallE2ProviderOptions

/**
* Type-only map from model name to its specific provider options.
* Used by the core AI types to narrow providerOptions based on the selected model.
*/
export type OpenAIImageModelProviderOptionsByName = {
'gpt-image-1': GptImage1ProviderOptions
'gpt-image-1-mini': GptImage1MiniProviderOptions
'dall-e-3': DallE3ProviderOptions
'dall-e-2': DallE2ProviderOptions
}

/**
* Type-only map from model name to its supported sizes.
*/
export type OpenAIImageModelSizeByName = {
'gpt-image-1': GptImageSize
'gpt-image-1-mini': GptImageSize
'dall-e-3': DallE3Size
'dall-e-2': DallE2Size
}

/**
* Internal options interface for validation
*/
Expand All @@ -206,6 +187,14 @@ interface ImageValidationOptions {
background?: 'transparent' | 'opaque' | 'auto' | null
}

function getImageModelMeta(model: string) {
if (!Object.hasOwn(IMAGE_MODELS, model)) {
throw new Error(`Unknown image model: ${model}`)
}

return IMAGE_MODELS[model as keyof typeof IMAGE_MODELS]
}

/**
* Validates that the provided size is supported by the model.
* Throws a descriptive error if the size is not supported.
Expand All @@ -216,19 +205,10 @@ export function validateImageSize(
): void {
if (!size || size === 'auto') return

const validSizes: Record<string, Array<string>> = {
'gpt-image-1': ['1024x1024', '1536x1024', '1024x1536', 'auto'],
'gpt-image-1-mini': ['1024x1024', '1536x1024', '1024x1536', 'auto'],
'dall-e-3': ['1024x1024', '1792x1024', '1024x1792'],
'dall-e-2': ['256x256', '512x512', '1024x1024'],
}
const modelMeta = getImageModelMeta(model)
const modelSizes = modelMeta.sizes

const modelSizes = validSizes[model]
if (!modelSizes) {
throw new Error(`Unknown image model: ${model}`)
}

if (!modelSizes.includes(size)) {
if (!(modelSizes as ReadonlyArray<string>).includes(size)) {
throw new Error(
`Size "${size}" is not supported by model "${model}". ` +
`Supported sizes: ${modelSizes.join(', ')}`,
Expand All @@ -245,53 +225,40 @@ export function validateNumberOfImages(
): void {
if (numberOfImages === undefined) return

// dall-e-3 only supports n=1
if (model === 'dall-e-3' && numberOfImages !== 1) {
throw new Error(
`Model "dall-e-3" only supports generating 1 image at a time. ` +
`Requested: ${numberOfImages}`,
)
}
const modelMeta = getImageModelMeta(model)

// Other models support 1-10
if (numberOfImages < 1 || numberOfImages > 10) {
if (numberOfImages < 1 || numberOfImages > modelMeta.maxImages) {
throw new Error(
`Number of images must be between 1 and 10. Requested: ${numberOfImages}`,
`Number of images must be between 1 and ${modelMeta.maxImages}. Requested: ${numberOfImages}`,
)
}
}

/**
* Validates that the selected image model supports background control.
*/
export const validateBackground = (options: ImageValidationOptions) => {
if (options.background) {
const supportedModels = ['gpt-image-1', 'gpt-image-1-mini']
if (!supportedModels.includes(options.model)) {
if (options.background != null) {
const modelMeta = getImageModelMeta(options.model)
if (!('supportsBackground' in modelMeta)) {
throw new Error(
`The model ${options.model} does not support background option.`,
)
}
}
}

/**
* Validates prompt presence and model-specific prompt length limits.
*/
export const validatePrompt = (options: ImageValidationOptions) => {
if (options.prompt.length === 0) {
throw new Error('Prompt cannot be empty.')
}
if (
(options.model === 'gpt-image-1' || options.model === 'gpt-image-1-mini') &&
options.prompt.length > 32000
) {
throw new Error(
'For gpt-image-1/gpt-image-1-mini, prompt length must be less than or equal to 32000 characters.',
)
}
if (options.model === 'dall-e-2' && options.prompt.length > 1000) {
throw new Error(
'For dall-e-2, prompt length must be less than or equal to 1000 characters.',
)
}
if (options.model === 'dall-e-3' && options.prompt.length > 4000) {
const modelMeta = getImageModelMeta(options.model)
if (options.prompt.length > modelMeta.maxPromptLength) {
throw new Error(
'For dall-e-3, prompt length must be less than or equal to 4000 characters.',
`For ${options.model}, prompt length must be less than or equal to ${modelMeta.maxPromptLength} characters.`,
)
}
}
Loading