Skip to content

feat: text to speech support (#103) #113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Runtime/DataTypes.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.Collections.Generic;
using Newtonsoft.Json;
using UnityEngine;

namespace OpenAI
{
Expand Down Expand Up @@ -162,6 +163,26 @@ public struct CreateAudioResponse: IResponse
}
#endregion

#region Text To Speech Data Types

public class CreateTextToSpeechRequestBase
{
public string Input { get; set; }
public string Voice { get; set; }
public string Model { get; set; }
}

public class CreateTextToSpeechRequest: CreateTextToSpeechRequestBase { }

public struct CreateTextToSpeechResponse: IAudioResponse
{
public ApiError Error { get; set; }
public string Warning { get; set; }
public AudioClip AudioClip { get; set; }
}

#endregion

#region Images API Data Types
public class CreateImageRequestBase
{
Expand Down
9 changes: 9 additions & 0 deletions Runtime/Interfaces/IAudioResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using UnityEngine;

namespace OpenAI
{
public interface IAudioResponse: IResponse
{
public AudioClip AudioClip { get; set; }
}
}
3 changes: 3 additions & 0 deletions Runtime/Interfaces/IAudioResponse.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions Runtime/OpenAIApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,55 @@ private async Task<T> DispatchRequest<T>(string path, string method, byte[] payl
return data;
}

/// <summary>
/// Dispatches an HTTP request for an audio file to the specified path with the specified method and optional payload.
/// </summary>
/// <param name="path">The path to send the request to.</param>
/// <param name="method">The HTTP method to use for the request.</param>
/// <param name="payload">An optional byte array of json payload to include in the request.</param>
/// <typeparam name="T">Response type of the request.</typeparam>
/// <returns>A Task containing the response from the request as the specified type.</returns>
private async Task<T> DispatchAudioRequest<T>(string path, string method, byte[] payload = null) where T: IAudioResponse
{
T data = default;

using (var request = UnityWebRequest.Put(path, payload))
{
request.method = method;
request.SetHeaders(Configuration, ContentType.ApplicationJson);

var downloadHandlerAudioClip = new DownloadHandlerAudioClip(string.Empty, AudioType.MPEG);
request.downloadHandler = downloadHandlerAudioClip;

var asyncOperation = request.SendWebRequest();

while (!asyncOperation.isDone) await Task.Yield();

if (request.result == UnityWebRequest.Result.Success)
{
if(data != null) data.AudioClip = DownloadHandlerAudioClip.GetContent(request);
}
else
{
if(data != null) data.Error = new ApiError
{ Code = request.responseCode, Message = request.error, Type = request.error };
}
}

if (data?.Error != null)
{
ApiError error = data.Error;
Debug.LogError($"Error Message: {error.Message}\nError Type: {error.Type}\n");
}

if (data?.Warning != null)
{
Debug.LogWarning(data.Warning);
}

return data;
}

/// <summary>
/// Dispatches an HTTP request to the specified path with the specified method and optional payload.
/// </summary>
Expand Down Expand Up @@ -308,6 +357,19 @@ public async Task<CreateEmbeddingsResponse> CreateEmbeddings(CreateEmbeddingsReq
var payload = CreatePayload(request);
return await DispatchRequest<CreateEmbeddingsResponse>(path, UnityWebRequest.kHttpVerbPOST, payload);
}

/// <summary>
/// Returns speech audio for the provided text.
/// </summary>
/// <param name="request">See <see cref="CreateTextToSpeechRequest"/></param>
/// <returns>See <see cref="CreateTextToSpeechResponse"/></returns>
public async Task<CreateTextToSpeechResponse> CreateTextToSpeech(CreateTextToSpeechRequest request)
{
var path = $"{BASE_PATH}/audio/speech";
var payload = CreatePayload(request);

return await DispatchAudioRequest<CreateTextToSpeechResponse>(path, UnityWebRequest.kHttpVerbPOST, payload);
}

/// <summary>
/// Transcribes audio into the input language.
Expand Down
8 changes: 8 additions & 0 deletions Samples~/Text To Speech.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading