diff --git a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java index 1d1e4afd941..39649a7db71 100644 --- a/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java +++ b/models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.java @@ -29,8 +29,8 @@ import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Word; import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat; import org.springframework.ai.azure.openai.metadata.AzureOpenAiAudioTranscriptionResponseMetadata; -import org.springframework.ai.model.Model; import org.springframework.ai.model.ModelOptionsUtils; +import org.springframework.ai.model.TranscriptionModel; import org.springframework.core.io.Resource; import org.springframework.util.Assert; import org.springframework.util.StringUtils; @@ -45,7 +45,7 @@ * * @author Piotr Olaszewski */ -public class AzureOpenAiAudioTranscriptionModel implements Model { +public class AzureOpenAiAudioTranscriptionModel implements TranscriptionModel { private static final List JSON_FORMATS = List.of(AudioTranscriptionFormat.JSON, AudioTranscriptionFormat.VERBOSE_JSON); diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java index fbf51bb78ed..516f680ce16 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiAudioTranscriptionModel.java @@ -35,7 +35,7 @@ import org.slf4j.LoggerFactory; import org.springframework.ai.chat.metadata.RateLimit; -import org.springframework.ai.model.Model; +import org.springframework.ai.model.TranscriptionModel; import org.springframework.ai.openai.api.OpenAiAudioApi; import org.springframework.ai.openai.api.OpenAiAudioApi.StructuredResponse; import org.springframework.ai.audio.transcription.AudioTranscription; @@ -60,7 +60,7 @@ * @see OpenAiAudioApi * @since 0.8.1 */ -public class OpenAiAudioTranscriptionModel implements Model { +public class OpenAiAudioTranscriptionModel implements TranscriptionModel { private final Logger logger = LoggerFactory.getLogger(getClass()); diff --git a/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java new file mode 100644 index 00000000000..475b5f49fcc --- /dev/null +++ b/spring-ai-core/src/main/java/org/springframework/ai/model/TranscriptionModel.java @@ -0,0 +1,22 @@ +package org.springframework.ai.model; + +import org.springframework.ai.audio.transcription.AudioTranscriptionOptions; +import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt; +import org.springframework.ai.audio.transcription.AudioTranscriptionResponse; +import org.springframework.core.io.Resource; + +public interface TranscriptionModel extends Model { + + AudioTranscriptionResponse call(AudioTranscriptionPrompt transcriptionPrompt); + + default String transcribe(Resource resource) { + AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource); + return this.call(prompt).getResult().getOutput(); + } + + default String transcribe(Resource resource, AudioTranscriptionOptions options) { + AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(resource, options); + return this.call(prompt).getResult().getOutput(); + } + +}