Skip to content

Commit

Permalink
Support INPUT variable
Browse files Browse the repository at this point in the history
  • Loading branch information
Swimburger committed Oct 2, 2023
1 parent e921fce commit bf6567d
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 84 deletions.
72 changes: 49 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![AssemblyAI Twitter](https://img.shields.io/twitter/follow/AssemblyAI?label=%40AssemblyAI&style=social "AssemblyAI Twitter")](https://twitter.com/AssemblyAI)
[![AssemblyAI YouTube](https://img.shields.io/youtube/channel/subscribers/UCtatfZMf-8EkIwASXM4ts0A "AssemblyAI YouTube")](https://www.youtube.com/@AssemblyAI)

# AssemblyAI plugins for Semantic Kernel
# AssemblyAI integration for Semantic Kernel

Transcribe audio using AssemblyAI with Semantic Kernel plugins.

Expand Down Expand Up @@ -35,21 +35,19 @@ string apiKey = Environment.GetEnvironmentVariable("ASSEMBLYAI_API_KEY")

var transcriptPlugin = kernel.ImportSkill(
new TranscriptPlugin(apiKey: apiKey),
"TranscriptPlugin"
TranscriptPlugin.PluginName
);
```

## Usage

Get the `Transcribe` function from the transcript plugin and invoke it with the context variables.
```csharp
var variables = new ContextVariables
{
["audioUrl"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a"
};

var context = await kernel.Skills
.GetFunction("TranscriptPlugin", "Transcribe")
.InvokeAsync(variables);

var function = kernel.Skills
.GetFunction(TranscriptPlugin.PluginName, TranscriptPlugin.TranscribeFunctionName);
var context = kernel.CreateNewContext();
context.Variables["audioUrl"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a";
await function.InvokeAsync(context);
Console.WriteLine(context.Result);
```

Expand All @@ -65,22 +63,50 @@ var transcriptPlugin = kernel.ImportSkill(
{
AllowFileSystemAccess = true
},
"TranscriptPlugin"
TranscriptPlugin.PluginName
);
var function = kernel.Skills
.GetFunction(TranscriptPlugin.PluginName, TranscriptPlugin.TranscribeFunctionName);
var context = kernel.CreateNewContext();
context.Variables["filePath"] = "./espn.m4a";
await function.InvokeAsync(context);
Console.WriteLine(context.Result);
```

var variables = new ContextVariables
{
["filePath"] = "./espn.m4a"
};
If `filePath` and `audioUrl` are specified, the `filePath` will be used to upload the file and `audioUrl` will be overridden.

var context = await kernel.Skills
.GetFunction("TranscriptPlugin", "Transcribe")
.InvokeAsync(variables);

Lastly, you can also use the `INPUT` variable, so you can transcribe a file like this.

```csharp
var function = kernel.Skills
.GetFunction(TranscriptPlugin.PluginName, TranscriptPlugin.TranscribeFunctionName);
var context = await function.InvokeAsync("./espn.m4a");
```

Or from within a semantic function like this.

```csharp
var prompt = """
Here is a transcript:
{{TranscriptPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}}
---
Summarize the transcript.
""";
var context = kernel.CreateNewContext();
var function = kernel.CreateSemanticFunction(prompt);
await function.InvokeAsync(context);
Console.WriteLine(context.Result);
```

If `filePath` and `audioUrl` are specified, the `filePath` will be used to upload the file and `audioUrl` will be overridden.
If the `INPUT` variable is a URL, it'll be used as the `audioUrl`, otherwise, it'll be used as the `filePath`.
If either `audioUrl` or `filePath` are configured, `INPUT` is ignored.

All the code above explicitly invokes the transcript plugin, but it can also be invoked as part of a plan.
Check out [the Sample project](./src/Sample/Program.cs#L50) which uses a plan to transcribe an audio file in addition to explicit invocation.

## Notes

The code above explicitly invokes the transcript plugin, but it can also be invoked as part of a plan.
Check out [the Sample project](./src/Sample/Program.cs#L54) which uses a plan to transcribe an audio file in addition to explicit invocation.
- The AssemblyAI integration only supports Semantic Kernel with .NET at this moment.
If there's demand, we will extend support to other platforms, so let us know!
- Semantic Kernel itself is still in pre-release, and changes frequently, so we'll keep our integration in pre-release until SK is GA'd.
- Feel free to [file an issue](https://github.com/AssemblyAI/assemblyai-semantic-kernel/issues) in case of bugs or feature requests.
57 changes: 48 additions & 9 deletions src/AssemblyAI.SemanticKernel/TranscriptPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,15 @@ public TranscriptPlugin(string apiKey)
If filePath is configured, the file will be uploaded to AssemblyAI, and then used as the audioUrl to transcribe.
Optional if audioUrl is configured. The uploaded file will override the audioUrl parameter.")]
[SKParameter("audioUrl", @"The public URL of the audio or video file to transcribe.
Optional if filePath is configured.
""")]
Optional if filePath is configured.")]
public async Task<string> Transcribe(SKContext context)
{
SetPathAndUrl(context, out var filePath, out var audioUrl);
using (var httpClient = new HttpClient())
{
httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue(_apiKey);

string audioUrl;
if (context.Variables.TryGetValue("filePath", out var filePath))
if (filePath != null)
{
if (AllowFileSystemAccess == false)
{
Expand All @@ -50,16 +49,56 @@ public async Task<string> Transcribe(SKContext context)

audioUrl = await UploadFileAsync(filePath, httpClient);
}

var transcript = await CreateTranscriptAsync(audioUrl, httpClient);
transcript = await WaitForTranscriptToProcess(transcript, httpClient);
return transcript.Text ?? throw new Exception("Transcript text is null. This should not happen.");
}
}

private static void SetPathAndUrl(SKContext context, out string filePath, out string audioUrl)
{
filePath = null;
audioUrl = null;
if (context.Variables.TryGetValue("filePath", out filePath))
{
return;
}

if (context.Variables.TryGetValue("audioUrl", out audioUrl))
{
var uri = new Uri(audioUrl);
if (uri.IsFile)
{
filePath = uri.LocalPath;
audioUrl = null;
}
else
{
context.Variables.TryGetValue("audioUrl", out audioUrl);
return;
}
}

if (audioUrl is null) throw new Exception("You have to pass in the filePath or audioUrl parameter.");
context.Variables.TryGetValue("INPUT", out var input);
if (input == null)
{
throw new Exception("You must pass in INPUT, filePath, or audioUrl parameter.");
}

var transcript = await CreateTranscriptAsync(audioUrl, httpClient);
transcript = await WaitForTranscriptToProcess(transcript, httpClient);
return transcript.Text ?? throw new Exception("Transcript text is null. This should not happen.");
if (Uri.TryCreate(input, UriKind.Absolute, out var inputUrl))
{
if (inputUrl.IsFile)
{
filePath = inputUrl.LocalPath;
}
else
{
audioUrl = input;
}
}
else
{
filePath = input;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/Sample/FindFilePlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace AssemblyAI.SemanticKernel.Sample;

public class FindFilePlugin
{
public const string PluginName = "FindFilePlugin";
private readonly IKernel _kernel;

public FindFilePlugin(IKernel kernel)
Expand All @@ -32,6 +33,9 @@ public FindFilePlugin(IKernel kernel)
return matches.LastOrDefault()?.Value ?? null;
}


public const string LocateFileFunctionName = nameof(LocateFile);

[SKFunction, Description("Find files in common folders.")]
[SKParameter("fileName", "The name of the file")]
[SKParameter("commonFolderName", "The name of the common folder")]
Expand Down
145 changes: 93 additions & 52 deletions src/Sample/Program.cs
Original file line number Diff line number Diff line change
@@ -1,70 +1,111 @@
using System.Text.Json;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Orchestration;
using Microsoft.SemanticKernel.Planning;
using AssemblyAI.SemanticKernel;
using AssemblyAI.SemanticKernel.Sample;
using Microsoft.Extensions.Logging;

var config = new ConfigurationBuilder()
.AddEnvironmentVariables()
.AddUserSecrets<Program>()
.AddCommandLine(args)
.Build();

using var loggerFactory = LoggerFactory.Create(builder => { builder.SetMinimumLevel(0); });
var kernel = new KernelBuilder()
.WithOpenAIChatCompletionService(
"gpt-3.5-turbo",
config["OpenAI:ApiKey"] ?? throw new Exception("OpenAI:ApiKey configuration is required.")
)
.WithLoggerFactory(loggerFactory)
.Build();

var apiKey = config["AssemblyAI:ApiKey"] ?? throw new Exception("AssemblyAI:ApiKey configuration is required.");

var transcriptPlugin = kernel.ImportSkill(
new TranscriptPlugin(apiKey: apiKey)
namespace AssemblyAI.SemanticKernel.Sample;

internal class Program
{
public static async Task Main(string[] args)
{
AllowFileSystemAccess = true
},
TranscriptPlugin.PluginName
);
var config = BuildConfig(args);

await TranscribeFileUsingPlugin(kernel);
var kernel = BuildKernel(config);

async Task TranscribeFileUsingPlugin(IKernel kernel)
{
var variables = new ContextVariables
await TranscribeFileUsingPluginDirectly(kernel);
//await TranscribeFileUsingPluginFromSemanticFunction(kernel);
//await TranscribeFileUsingPlan(kernel);
}

private static IKernel BuildKernel(IConfiguration config)
{
["audioUrl"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a",
};
var loggerFactory = LoggerFactory.Create(builder => { builder.SetMinimumLevel(0); });
var kernel = new KernelBuilder()
.WithOpenAIChatCompletionService(
"gpt-3.5-turbo",
config["OpenAI:ApiKey"] ?? throw new Exception("OpenAI:ApiKey configuration is required.")
)
.WithLoggerFactory(loggerFactory)
.Build();

var result = await kernel.Skills
.GetFunction(TranscriptPlugin.PluginName, TranscriptPlugin.TranscribeFunctionName)
.InvokeAsync(variables);
Console.WriteLine(result.Result);
}
var apiKey = config["AssemblyAI:ApiKey"] ?? throw new Exception("AssemblyAI:ApiKey configuration is required.");

var findFilePlugin = kernel.ImportSkill(
new FindFilePlugin(kernel: kernel),
"FindFilePlugin"
);
kernel.ImportSkill(
new TranscriptPlugin(apiKey: apiKey)
{
AllowFileSystemAccess = true
},
TranscriptPlugin.PluginName
);

await TranscribeFileUsingPlan(kernel);
kernel.ImportSkill(
new FindFilePlugin(kernel: kernel),
FindFilePlugin.PluginName
);
return kernel;
}

async Task TranscribeFileUsingPlan(IKernel kernel)
{
var planner = new SequentialPlanner(kernel);
private static IConfigurationRoot BuildConfig(string[] args)
{
var config = new ConfigurationBuilder()
.AddEnvironmentVariables()
.AddUserSecrets<Program>()
.AddCommandLine(args)
.Build();
return config;
}

private static async Task TranscribeFileUsingPluginDirectly(IKernel kernel)
{
Console.WriteLine("Transcribing file using plugin directly");
var variables = new ContextVariables
{
["audioUrl"] = "https://storage.googleapis.com/aai-docs-samples/espn.m4a",
// ["filePath"] = "./espn.m4a" // you can also use `filePath` which will upload the file and override `audioUrl`
};

var result = await kernel.Skills
.GetFunction(TranscriptPlugin.PluginName, TranscriptPlugin.TranscribeFunctionName)
.InvokeAsync(variables);

Console.WriteLine(result.Result);
Console.WriteLine();
}

private static async Task TranscribeFileUsingPluginFromSemanticFunction(IKernel kernel)
{
Console.WriteLine("Transcribing file and summarizing from within a semantic function");
// This will pass the URL to the `INPUT` variable.
// If `INPUT` is a URL, it'll use `INPUT` as `audioUrl`, otherwise, it'll use `INPUT` as `filePath`.
const string prompt = """
Here is a transcript:
{{TranscriptPlugin.Transcribe "https://storage.googleapis.com/aai-docs-samples/espn.m4a"}}
---
Summarize the transcript.
""";
var context = kernel.CreateNewContext();
var function = kernel.CreateSemanticFunction(prompt);
await function.InvokeAsync(context);
Console.WriteLine(context.Result);
Console.WriteLine();
}

private static async Task TranscribeFileUsingPlan(IKernel kernel)
{
Console.WriteLine("Transcribing file from a plan");
var planner = new SequentialPlanner(kernel);

const string prompt = "Transcribe the espn.m4a in my downloads folder.";
var plan = await planner.CreatePlanAsync(prompt);
const string prompt = "Transcribe the espn.m4a in my downloads folder.";
var plan = await planner.CreatePlanAsync(prompt);

Console.WriteLine("Plan:\n");
Console.WriteLine(JsonSerializer.Serialize(plan, new JsonSerializerOptions { WriteIndented = true }));
Console.WriteLine("Plan:\n");
Console.WriteLine(JsonSerializer.Serialize(plan, new JsonSerializerOptions { WriteIndented = true }));

var transcript = (await kernel.RunAsync(plan)).Result;
Console.WriteLine("Transcript:");
Console.WriteLine(transcript);
var transcript = (await kernel.RunAsync(plan)).Result;
Console.WriteLine(transcript);
Console.WriteLine();
}
}

0 comments on commit bf6567d

Please sign in to comment.