diff --git a/README.md b/README.md index 8aff073..9ad8171 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Add the [AssemblyAI.SemanticKernel NuGet package](https://www.nuget.org/packages dotnet add package AssemblyAI.SemanticKernel ``` -Next, register the `TranscriptPlugin` into your kernel: +Next, register the `AssemblyAI` plugin into your kernel: ```csharp using AssemblyAI.SemanticKernel; @@ -37,6 +37,7 @@ string apiKey = Environment.GetEnvironmentVariable("ASSEMBLYAI_API_KEY") kernel.ImportPluginFromObject( new TranscriptPlugin(apiKey: apiKey) + TranscriptPlugin.PluginName ); ``` @@ -45,8 +46,8 @@ kernel.ImportPluginFromObject( Get the `Transcribe` function from the transcript plugin and invoke it with the context variables. ```csharp var result = await kernel.InvokeAsync( - nameof(TranscriptPlugin), - TranscriptPlugin.TranscribeFunctionName, + nameof(AssemblyAIPlugin), + AssemblyAIPlugin.TranscribeFunctionName, new KernelArguments { ["INPUT"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a" @@ -58,7 +59,7 @@ Console.WriteLine(result.GetValue()); You can get the transcript using `result.GetValue()`. You can also upload local audio and video file. To do this: -- Set the `TranscriptPlugin.AllowFileSystemAccess` property to `true`. +- Set the `AssemblyAI:Plugin:AllowFileSystemAccess` configuration to `true`. - Configure the `INPUT` variable with a local file path. ```csharp @@ -69,8 +70,8 @@ kernel.ImportPluginFromObject( } ); var result = await kernel.InvokeAsync( - nameof(TranscriptPlugin), - TranscriptPlugin.TranscribeFunctionName, + nameof(AssemblyAIPlugin), + AssemblyAIPlugin.TranscribeFunctionName, new KernelArguments { ["INPUT"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a" @@ -84,7 +85,7 @@ You can also invoke the function from within a semantic function like this. ```csharp const string prompt = """ Here is a transcript: - {{TranscriptPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}} + {{AssemblyAIPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}} --- Summarize the transcript. """; diff --git a/src/AssemblyAI.SemanticKernel/AssemblyAI.SemanticKernel.csproj b/src/AssemblyAI.SemanticKernel/AssemblyAI.SemanticKernel.csproj index 376369c..0c9a3a6 100644 --- a/src/AssemblyAI.SemanticKernel/AssemblyAI.SemanticKernel.csproj +++ b/src/AssemblyAI.SemanticKernel/AssemblyAI.SemanticKernel.csproj @@ -11,9 +11,9 @@ SemanticKernel;AI;AssemblyAI;transcript AssemblyAI AssemblyAI - 1.0.3.0 - 1.0.3.0 - 1.0.3 + 1.1.0.0 + 1.1.0.0 + 1.1.0 Library MIT https://github.com/AssemblyAI/assemblyai-semantic-kernel @@ -31,6 +31,12 @@ true + + 8.0.0 + + + 8.0.0 + 1.0.1 diff --git a/src/AssemblyAI.SemanticKernel/AssemblyAIPlugin.cs b/src/AssemblyAI.SemanticKernel/AssemblyAIPlugin.cs new file mode 100644 index 0000000..8f6f533 --- /dev/null +++ b/src/AssemblyAI.SemanticKernel/AssemblyAIPlugin.cs @@ -0,0 +1,161 @@ +using System; +using System.ComponentModel; +using System.IO; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Net.Http.Json; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Options; +using Microsoft.SemanticKernel; + +namespace AssemblyAI.SemanticKernel +{ + public class AssemblyAIPlugin + { + internal AssemblyAIPluginOptions Options { get; } + + private string ApiKey => Options.ApiKey; + + private bool AllowFileSystemAccess => Options.AllowFileSystemAccess; + + public AssemblyAIPlugin(string apiKey) + { + Options = new AssemblyAIPluginOptions + { + ApiKey = apiKey + }; + } + + public AssemblyAIPlugin(string apiKey, bool allowFileSystemAccess) + { + Options = new AssemblyAIPluginOptions + { + ApiKey = apiKey, + AllowFileSystemAccess = allowFileSystemAccess + }; + } + + [ActivatorUtilitiesConstructor] + public AssemblyAIPlugin(IOptions options) + { + Options = options.Value; + } + + public const string TranscribeFunctionName = nameof(Transcribe); + + [KernelFunction, Description("Transcribe an audio or video file to text.")] + public async Task Transcribe( + [Description("The public URL or the local path of the audio or video file to transcribe.")] + string input + ) + { + if (string.IsNullOrEmpty(input)) + { + throw new Exception("The INPUT parameter is required."); + } + + using (var httpClient = new HttpClient()) + { + httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue(ApiKey); + string audioUrl; + if (TryGetPath(input, out var filePath)) + { + if (AllowFileSystemAccess == false) + { + throw new Exception( + "You need to allow file system access to upload files. Set AssemblyAI:Plugin:AllowFileSystemAccess to true." + ); + } + + audioUrl = await UploadFileAsync(filePath, httpClient); + } + else + { + audioUrl = input; + } + + var transcript = await CreateTranscriptAsync(audioUrl, httpClient); + transcript = await WaitForTranscriptToProcess(transcript, httpClient); + return transcript.Text ?? throw new Exception("Transcript text is null. This should not happen."); + } + } + + private static bool TryGetPath(string input, out string filePath) + { + if (Uri.TryCreate(input, UriKind.Absolute, out var inputUrl)) + { + if (inputUrl.IsFile) + { + filePath = inputUrl.LocalPath; + return true; + } + + filePath = null; + return false; + } + + filePath = input; + return true; + } + + private static async Task UploadFileAsync(string path, HttpClient httpClient) + { + using (var fileStream = File.OpenRead(path)) + using (var fileContent = new StreamContent(fileStream)) + { + fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); + using (var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/upload", fileContent)) + { + response.EnsureSuccessStatusCode(); + var jsonDoc = await response.Content.ReadFromJsonAsync(); + return jsonDoc?.RootElement.GetProperty("upload_url").GetString(); + } + } + } + + private static async Task CreateTranscriptAsync(string audioUrl, HttpClient httpClient) + { + var jsonString = JsonSerializer.Serialize(new + { + audio_url = audioUrl + }); + + var content = new StringContent(jsonString, Encoding.UTF8, "application/json"); + using (var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/transcript", content)) + { + response.EnsureSuccessStatusCode(); + var transcript = await response.Content.ReadFromJsonAsync(); + if (transcript.Status == "error") throw new Exception(transcript.Error); + return transcript; + } + } + + private static async Task WaitForTranscriptToProcess(Transcript transcript, HttpClient httpClient) + { + var pollingEndpoint = $"https://api.assemblyai.com/v2/transcript/{transcript.Id}"; + + while (true) + { + var pollingResponse = await httpClient.GetAsync(pollingEndpoint); + pollingResponse.EnsureSuccessStatusCode(); + transcript = (await pollingResponse.Content.ReadFromJsonAsync()); + switch (transcript.Status) + { + case "processing": + case "queued": + await Task.Delay(TimeSpan.FromSeconds(3)); + break; + case "completed": + return transcript; + case "error": + throw new Exception(transcript.Error); + default: + throw new Exception("This code shouldn't be reachable."); + } + } + } + } +} \ No newline at end of file diff --git a/src/AssemblyAI.SemanticKernel/AssemblyAIPluginOptions.cs b/src/AssemblyAI.SemanticKernel/AssemblyAIPluginOptions.cs new file mode 100644 index 0000000..3f1f23a --- /dev/null +++ b/src/AssemblyAI.SemanticKernel/AssemblyAIPluginOptions.cs @@ -0,0 +1,26 @@ +namespace AssemblyAI.SemanticKernel +{ + /// + /// Options to configure the AssemblyAI plugin with. + /// + public class AssemblyAIPluginOptions + { + /// + /// The name of the plugin registered into Semantic Kernel. + /// Defaults to "AssemblyAIPlugin". + /// + public string PluginName { get; set; } + + /// + /// The AssemblyAI API key. Find your API key at https://www.assemblyai.com/app/account + /// + public string ApiKey { get; set; } + + /// + /// If true, you can transcribe audio files from disk. + /// The file be uploaded to AssemblyAI's server to transcribe and deleted when transcription is completed. + /// If false, an exception will be thrown when trying to transcribe files from disk. + /// + public bool AllowFileSystemAccess { get; set; } + } +} \ No newline at end of file diff --git a/src/AssemblyAI.SemanticKernel/Extensions.cs b/src/AssemblyAI.SemanticKernel/Extensions.cs new file mode 100644 index 0000000..b88666e --- /dev/null +++ b/src/AssemblyAI.SemanticKernel/Extensions.cs @@ -0,0 +1,119 @@ +using System; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Options; +using Microsoft.SemanticKernel; + +// ReSharper disable UnusedMember.Global +// ReSharper disable MemberCanBePrivate.Global + +namespace AssemblyAI.SemanticKernel +{ + public static class Extensions + { + /// + /// Configure the AssemblyAI plugins using the specified configuration section path. + /// + /// + /// The configuration to bind options to + /// + public static IKernelBuilder AddAssemblyAIPlugin( + this IKernelBuilder builder, + IConfiguration configuration + ) + { + var pluginConfigurationSection = configuration.GetSection("AssemblyAI:Plugin"); + // if configuration exists at section, use that config, otherwise using section that was passed in. + if (pluginConfigurationSection.Exists()) + { + configuration = pluginConfigurationSection; + } + + var services = builder.Services; + var optionsBuilder = services.AddOptions(); + optionsBuilder.Bind(configuration); + ValidateOptions(optionsBuilder); + AddPlugin(builder); + return builder; + } + + /// + /// Configure the AssemblyAI plugins using the specified options. + /// + /// + /// Options to configure plugin with + /// + public static IKernelBuilder AddAssemblyAIPlugin( + this IKernelBuilder builder, + AssemblyAIPluginOptions options + ) + { + var services = builder.Services; + var optionsBuilder = services.AddOptions(); + optionsBuilder.Configure(optionsToConfigure => + { + optionsToConfigure.ApiKey = options.ApiKey; + optionsToConfigure.AllowFileSystemAccess = options.AllowFileSystemAccess; + }); + ValidateOptions(optionsBuilder); + AddPlugin(builder); + return builder; + } + + /// + /// Configure the AssemblyAI plugins using the specified options. + /// + /// + /// Action to configure options + /// + public static IKernelBuilder AddAssemblyAIPlugin( + this IKernelBuilder builder, + Action configureOptions + ) + { + var services = builder.Services; + var optionsBuilder = services.AddOptions(); + optionsBuilder.Configure(configureOptions); + ValidateOptions(optionsBuilder); + AddPlugin(builder); + return builder; + } + + /// + /// Configure the AssemblyAI plugins using the specified options. + /// + /// + /// Action to configure options + /// + public static IKernelBuilder AddAssemblyAIPlugin( + this IKernelBuilder builder, + Action configureOptions + ) + { + var services = builder.Services; + var optionsBuilder = services.AddOptions(); + optionsBuilder.Configure((options, provider) => configureOptions(provider, options)); + ValidateOptions(optionsBuilder); + AddPlugin(builder); + return builder; + } + + private static void ValidateOptions(OptionsBuilder optionsBuilder) + { + optionsBuilder.Validate( + options => !string.IsNullOrEmpty(options.ApiKey), + "AssemblyAI:Plugin:ApiKey must be configured." + ); + } + + private static void AddPlugin(IKernelBuilder builder) + { + using (var sp = builder.Services.BuildServiceProvider()) + { + var config = sp.GetRequiredService>().Value; + var pluginName = string.IsNullOrEmpty(config.PluginName) ? null : config.PluginName; + builder.Plugins.AddFromType(pluginName); + } + } + } +} \ No newline at end of file diff --git a/src/AssemblyAI.SemanticKernel/Transcript.cs b/src/AssemblyAI.SemanticKernel/Transcript.cs new file mode 100644 index 0000000..72e1ec3 --- /dev/null +++ b/src/AssemblyAI.SemanticKernel/Transcript.cs @@ -0,0 +1,12 @@ +namespace AssemblyAI.SemanticKernel +{ + // ReSharper disable once ClassNeverInstantiated.Global + public class Transcript + { + public string Id { get; set; } = null; + public string Status { get; set; } = null; + public string Text { get; set; } + + public string Error { get; set; } + } +} \ No newline at end of file diff --git a/src/AssemblyAI.SemanticKernel/TranscriptPlugin.cs b/src/AssemblyAI.SemanticKernel/TranscriptPlugin.cs index eea1bc7..7fd566f 100644 --- a/src/AssemblyAI.SemanticKernel/TranscriptPlugin.cs +++ b/src/AssemblyAI.SemanticKernel/TranscriptPlugin.cs @@ -1,148 +1,24 @@ using System; -using System.ComponentModel; -using System.IO; -using System.Net.Http; -using System.Net.Http.Headers; -using System.Net.Http.Json; -using System.Text; -using System.Text.Json; -using System.Threading.Tasks; -using Microsoft.SemanticKernel; namespace AssemblyAI.SemanticKernel { - public class TranscriptPlugin + [Obsolete("Use AssemblyAIPlugin instead.")] + public class TranscriptPlugin : AssemblyAIPlugin { - public const string PluginName = nameof(TranscriptPlugin); - private readonly string _apiKey; - public bool AllowFileSystemAccess { get; set; } + public new const string PluginName = nameof(TranscriptPlugin); - public TranscriptPlugin(string apiKey) + public bool AllowFileSystemAccess { - _apiKey = apiKey; + get => Options.AllowFileSystemAccess; + set => Options.AllowFileSystemAccess = value; } - public const string TranscribeFunctionName = nameof(Transcribe); - - [KernelFunction, Description("Transcribe an audio or video file to text.")] - public async Task Transcribe( - [Description("The public URL or the local path of the audio or video file to transcribe.")] - string input - ) + public TranscriptPlugin(string apiKey) : base(apiKey) { - if (string.IsNullOrEmpty(input)) - { - throw new Exception("The INPUT parameter is required."); - } - - using (var httpClient = new HttpClient()) - { - httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue(_apiKey); - string audioUrl; - if (TryGetPath(input, out var filePath)) - { - if (AllowFileSystemAccess == false) - { - throw new Exception( - "You need to allow file system access to upload files. Set TranscriptPlugin.AllowFileSystemAccess to true." - ); - } - - audioUrl = await UploadFileAsync(filePath, httpClient); - } - else - { - audioUrl = input; - } - - var transcript = await CreateTranscriptAsync(audioUrl, httpClient); - transcript = await WaitForTranscriptToProcess(transcript, httpClient); - return transcript.Text ?? throw new Exception("Transcript text is null. This should not happen."); - } } - private static bool TryGetPath(string input, out string filePath) + public TranscriptPlugin(string apiKey, bool allowFileSystemAccess) : base(apiKey, allowFileSystemAccess) { - if (Uri.TryCreate(input, UriKind.Absolute, out var inputUrl)) - { - if (inputUrl.IsFile) - { - filePath = inputUrl.LocalPath; - return true; - } - - filePath = null; - return false; - } - - filePath = input; - return true; } - - private static async Task UploadFileAsync(string path, HttpClient httpClient) - { - using (var fileStream = File.OpenRead(path)) - using (var fileContent = new StreamContent(fileStream)) - { - fileContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); - using (var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/upload", fileContent)) - { - response.EnsureSuccessStatusCode(); - var jsonDoc = await response.Content.ReadFromJsonAsync(); - return jsonDoc?.RootElement.GetProperty("upload_url").GetString(); - } - } - } - - private static async Task CreateTranscriptAsync(string audioUrl, HttpClient httpClient) - { - var jsonString = JsonSerializer.Serialize(new - { - audio_url = audioUrl - }); - - var content = new StringContent(jsonString, Encoding.UTF8, "application/json"); - using (var response = await httpClient.PostAsync("https://api.assemblyai.com/v2/transcript", content)) - { - response.EnsureSuccessStatusCode(); - var transcript = await response.Content.ReadFromJsonAsync(); - if (transcript.Status == "error") throw new Exception(transcript.Error); - return transcript; - } - } - - private static async Task WaitForTranscriptToProcess(Transcript transcript, HttpClient httpClient) - { - var pollingEndpoint = $"https://api.assemblyai.com/v2/transcript/{transcript.Id}"; - - while (true) - { - var pollingResponse = await httpClient.GetAsync(pollingEndpoint); - pollingResponse.EnsureSuccessStatusCode(); - transcript = (await pollingResponse.Content.ReadFromJsonAsync()); - switch (transcript.Status) - { - case "processing": - case "queued": - await Task.Delay(TimeSpan.FromSeconds(3)); - break; - case "completed": - return transcript; - case "error": - throw new Exception(transcript.Error); - default: - throw new Exception("This code shouldn't be reachable."); - } - } - } - } - - public class Transcript - { - public string Id { get; set; } = null; - public string Status { get; set; } = null; - public string Text { get; set; } - - public string Error { get; set; } } } \ No newline at end of file diff --git a/src/Sample/Program.cs b/src/Sample/Program.cs index fef6e7e..6d91a64 100644 --- a/src/Sample/Program.cs +++ b/src/Sample/Program.cs @@ -18,31 +18,10 @@ public static async Task Main(string[] args) await TranscribeFileUsingPlan(kernel); } - private static Kernel BuildKernel(IConfiguration config) - { - var kernel = Kernel.CreateBuilder() - .AddOpenAIChatCompletion( - "gpt-3.5-turbo", - config["OpenAI:ApiKey"] ?? throw new Exception("OpenAI:ApiKey configuration is required.") - ) - .Build(); - - var apiKey = config["AssemblyAI:ApiKey"] ?? throw new Exception("AssemblyAI:ApiKey configuration is required."); - - kernel.ImportPluginFromObject( - new TranscriptPlugin(apiKey: apiKey) - { - AllowFileSystemAccess = true - } - ); - - kernel.ImportPluginFromType(); - return kernel; - } - private static IConfigurationRoot BuildConfig(string[] args) { var config = new ConfigurationBuilder() + .AddJsonFile("appsettings.json") .AddEnvironmentVariables() .AddUserSecrets() .AddCommandLine(args) @@ -50,12 +29,26 @@ private static IConfigurationRoot BuildConfig(string[] args) return config; } + private static Kernel BuildKernel(IConfiguration config) + { + var kernelBuilder = Kernel.CreateBuilder(); + kernelBuilder.AddOpenAIChatCompletion( + "gpt-3.5-turbo", + config["OpenAI:ApiKey"] ?? throw new Exception("OpenAI:ApiKey configuration is required.") + ) + .AddAssemblyAIPlugin(config); + var kernel = kernelBuilder.Build(); + + kernel.ImportPluginFromType(); + return kernel; + } + private static async Task TranscribeFileUsingPluginDirectly(Kernel kernel) { Console.WriteLine("Transcribing file using plugin directly"); var result = await kernel.InvokeAsync( - nameof(TranscriptPlugin), - TranscriptPlugin.TranscribeFunctionName, + nameof(AssemblyAIPlugin), + AssemblyAIPlugin.TranscribeFunctionName, new KernelArguments { ["INPUT"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a" @@ -73,7 +66,7 @@ private static async Task TranscribeFileUsingPluginFromSemanticFunction(Kernel k // If `INPUT` is a URL, it'll use `INPUT` as `audioUrl`, otherwise, it'll use `INPUT` as `filePath`. const string prompt = """ Here is a transcript: - {{TranscriptPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}} + {{AssemblyAIPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}} --- Summarize the transcript. """; diff --git a/src/Sample/Sample.csproj b/src/Sample/Sample.csproj index 2b6a0d4..b542721 100644 --- a/src/Sample/Sample.csproj +++ b/src/Sample/Sample.csproj @@ -40,4 +40,10 @@ 1.0.1-preview + + + + Always + + diff --git a/src/Sample/appsettings.json b/src/Sample/appsettings.json new file mode 100644 index 0000000..1de348a --- /dev/null +++ b/src/Sample/appsettings.json @@ -0,0 +1,8 @@ +{ + "AssemblyAI": { + "Plugin": { + "ApiKey": "", + "AllowFileSystemAccess": true + } + } +} \ No newline at end of file