From 3258198a0a7a3039e349cdbbdf1b34064f019a04 Mon Sep 17 00:00:00 2001 From: aniturza Date: Wed, 10 Jan 2024 21:29:31 +0100 Subject: [PATCH] finished audio transcription feature --- .gitignore | 1 + .../AudioTranscriptionController.cs | 226 ++++++++---------- .../AudioTranscription.cshtml | 20 +- src/AIHub/Views/Shared/_Layout.cshtml | 2 +- src/AIHub/appsettings.Development.json | 47 +++- src/AIHub/appsettings.template.json | 5 + src/AIHub/wwwroot/images/icon1.svg | 1 - src/AIHub/wwwroot/images/icon2.svg | 1 - src/AIHub/wwwroot/images/icon3.svg | 1 - src/AIHub/wwwroot/images/icon4.svg | 1 - src/AIHub/wwwroot/images/icon5.svg | 1 - src/AIHub/wwwroot/images/icon6.svg | 1 - src/AIHub/wwwroot/images/icon8.svg | 1 - src/AIHub/wwwroot/images/icon9.svg | 1 - 14 files changed, 158 insertions(+), 151 deletions(-) delete mode 100644 src/AIHub/wwwroot/images/icon1.svg delete mode 100644 src/AIHub/wwwroot/images/icon2.svg delete mode 100644 src/AIHub/wwwroot/images/icon3.svg delete mode 100644 src/AIHub/wwwroot/images/icon4.svg delete mode 100644 src/AIHub/wwwroot/images/icon5.svg delete mode 100644 src/AIHub/wwwroot/images/icon6.svg delete mode 100644 src/AIHub/wwwroot/images/icon8.svg delete mode 100644 src/AIHub/wwwroot/images/icon9.svg diff --git a/.gitignore b/.gitignore index 92938cf..05db607 100644 --- a/.gitignore +++ b/.gitignore @@ -407,3 +407,4 @@ FodyWeavers.xsd # JetBrains Rider *.sln.iml +src/AIHub/appsettings.Development.json \ No newline at end of file diff --git a/src/AIHub/Controllers/AudioTranscriptionController.cs b/src/AIHub/Controllers/AudioTranscriptionController.cs index 2ca5394..ee8ff40 100644 --- a/src/AIHub/Controllers/AudioTranscriptionController.cs +++ b/src/AIHub/Controllers/AudioTranscriptionController.cs @@ -1,13 +1,20 @@ namespace MVCWeb.Controllers; - -public class AudioTrancriptionController : Controller +using System; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Threading.Tasks; +using Newtonsoft.Json; +using Microsoft.AspNetCore.Mvc; +using System.Net; +using Newtonsoft.Json.Linq; +using Microsoft.VisualBasic; + +public class AudioTranscriptionController : Controller { private readonly ILogger _logger; private readonly IConfiguration _config; - private string FormRecogEndpoint; - private string FormRecogSubscriptionKey; - private string AOAIendpoint; - private string AOAIsubscriptionKey; + private string SpeechRegion; + private string SpeechSubscriptionKey; private string storageconnstring; private readonly BlobServiceClient blobServiceClient; private readonly BlobContainerClient containerClient; @@ -16,28 +23,25 @@ public class AudioTrancriptionController : Controller //Results - string result_image_front; string result_message_front; - private FormAnalyzerModel model; + private AudioTranscriptionModel model; - public AudioTrancriptionController(IConfiguration config) + public AudioTranscriptionController(IConfiguration config) { _config = config; - FormRecogEndpoint = _config.GetValue("FormAnalyzer:FormRecogEndpoint"); - FormRecogSubscriptionKey = _config.GetValue("FormAnalyzer:FormRecogSubscriptionKey"); - AOAIendpoint = _config.GetValue("FormAnalyzer:OpenAIEndpoint"); - AOAIsubscriptionKey = _config.GetValue("FormAnalyzer:OpenAISubscriptionKey"); + SpeechRegion = _config.GetValue("AudioTranscription:SpeechLocation"); + SpeechSubscriptionKey = _config.GetValue("AudioTranscription:SpeechSubscriptionKey"); storageconnstring = _config.GetValue("Storage:ConnectionString"); BlobServiceClient blobServiceClient = new BlobServiceClient(storageconnstring); - containerClient = blobServiceClient.GetBlobContainerClient(_config.GetValue("FormAnalyzer:ContainerName")); + containerClient = blobServiceClient.GetBlobContainerClient(_config.GetValue("AudioTranscription:ContainerName")); sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1)); // Obtiene una lista de blobs en el contenedor blobs = containerClient.GetBlobs(); - model = new FormAnalyzerModel(); + model = new AudioTranscriptionModel(); } public IActionResult AudioTranscription() @@ -46,127 +50,95 @@ public IActionResult AudioTranscription() } [HttpPost] - public async Task TranscribeAudio(string image_url, string prompt) + public async Task TranscribeAudio(string audio_url, IFormFile imageFile) { + string audio = audio_url + sasUri.Query; - //1. Get Image - string image = image_url + sasUri.Query; - Console.WriteLine(image); - //ViewBag.PdfUrl = "http://docs.google.com/gview?url="+image+"&embedded=true"; - ViewBag.PdfUrl = image; - string output_result; - - HttpClient client = new HttpClient(); - client.BaseAddress = new Uri(FormRecogEndpoint); - - // Add an Accept header for JSON format. - client.DefaultRequestHeaders.Accept.Add( - new MediaTypeWithQualityHeaderValue("application/json")); - client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", FormRecogSubscriptionKey); + // CALL 1: STT 3.1 - var content = new - { - urlSource = image - }; - var json = System.Text.Json.JsonSerializer.Serialize(content); - // Crear un HttpContent con el JSON y el tipo de contenido - HttpContent content_body = new StringContent(json, Encoding.UTF8, "application/json"); - // List data response. - HttpResponseMessage response = await client.PostAsync(FormRecogEndpoint, content_body); // Blocking call! Program will wait here until a response is received or a timeout occurs. + var client = new HttpClient(); + var request = new HttpRequestMessage(HttpMethod.Post, "https://"+SpeechRegion+".api.cognitive.microsoft.com/speechtotext/v3.1/transcriptions"); + request.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); + var content = new StringContent("{\r\n\"contentUrls\": [\r\n \"" + audio + "\"\r\n ],\r\n \"locale\": \"es-es\",\r\n \"displayName\": \"My Transcription\",\r\n \"model\": null,\r\n \"properties\": {\r\n \"wordLevelTimestampsEnabled\": true,\r\n \"languageIdentification\": {\r\n \"candidateLocales\": [\r\n \"en-US\", \"de-DE\", \"es-ES\"\r\n ]\r\n }\r\n }\r\n}", null, "application/json"); + request.Content = content; + var response = await client.SendAsync(request); response.EnsureSuccessStatusCode(); - - //string responseBody = await response.Content.ReadAsStringAsync(); - string operation_location_url = response.Headers.GetValues("Operation-Location").FirstOrDefault(); - + //Console.WriteLine(await response.Content.ReadAsStringAsync()); + var responsejson = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); + Console.WriteLine(responsejson); + var output_result = responsejson.self.ToString(); + Console.WriteLine("SELF: "+output_result); client.Dispose(); - - //llamar a GET OPERATION - HttpClient client2 = new HttpClient(); - client2.BaseAddress = new Uri(operation_location_url); - - // Add an Accept header for JSON format. - client2.DefaultRequestHeaders.Accept.Add( - new MediaTypeWithQualityHeaderValue("application/json")); - client2.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", FormRecogSubscriptionKey); - - // Crear un HttpContent con el JSON y el tipo de contenido - // List data response. - HttpResponseMessage response2 = await client2.GetAsync(operation_location_url); // Blocking call! Program will wait here until a response is received or a timeout occurs. - Console.WriteLine(response2); + // CALL 2: CHECK FOR FINISH + var client2 = new HttpClient(); + var request2 = new HttpRequestMessage(HttpMethod.Get, output_result); + client2.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); + var content2 = new StringContent(string.Empty); + content2.Headers.ContentType = new MediaTypeHeaderValue("application/json"); + request2.Content = content2; + var response2 = await client2.SendAsync(request2); response2.EnsureSuccessStatusCode(); - var responseBody = await response2.Content.ReadAsStringAsync(); - var responsejson = JsonConvert.DeserializeObject(await response2.Content.ReadAsStringAsync()); - - //var analyzeresult = responseBody.analyzeResult; - while (responsejson.status != "succeeded") - { - Thread.Sleep(10000); - response2 = await client2.GetAsync(operation_location_url); - responsejson = JsonConvert.DeserializeObject(await response2.Content.ReadAsStringAsync()); - } - output_result = responsejson.analyzeResult.content.ToString(); - - // Above three lines can be replaced with new helper method below - // string responseBody = await client.GetStringAsync(uri); - - // Parse the response as JSON - // var operationLocation= await response.Headers.ReadAsStringAsync(); - + //Console.WriteLine(await response2.Content.ReadAsStringAsync()); + var responsejson2 = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); + Console.WriteLine(responsejson2); + while (responsejson2.status != "Succeeded") + { + Thread.Sleep(10000); + response2 = await client2.GetAsync(output_result); + responsejson2 = JsonConvert.DeserializeObject(await response2.Content.ReadAsStringAsync()); + Console.WriteLine(responsejson2.status); + } client2.Dispose(); - try - { + // CALL 3: GET RESULTS URL + + var client3 = new HttpClient(); + var request3 = new HttpRequestMessage(HttpMethod.Get, output_result+"/files/"); + request3.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); + var content3 = new StringContent(string.Empty); + content3.Headers.ContentType = new MediaTypeHeaderValue("application/json"); + request3.Content = content3; + var response3 = await client3.SendAsync(request3); + response3.EnsureSuccessStatusCode(); + var responsejson3 = JsonConvert.DeserializeObject(await response3.Content.ReadAsStringAsync()); + Console.WriteLine(responsejson3); + // Extract contentUrl field + string output_result3 = (string)responsejson3["values"][0]["links"]["contentUrl"]; + Console.WriteLine(output_result3); + client3.Dispose(); + + // CALL 4: GET RESULTS (TRANSCRIPTION) + + var client4 = new HttpClient(); + var request4 = new HttpRequestMessage(HttpMethod.Get, output_result3); + request4.Headers.Add("Ocp-Apim-Subscription-Key", SpeechSubscriptionKey); + var content4 = new StringContent(string.Empty); + content4.Headers.ContentType = new MediaTypeHeaderValue("application/json"); + request4.Content = content4; + var response4 = await client4.SendAsync(request4); + response4.EnsureSuccessStatusCode(); + Console.WriteLine(await response4.Content.ReadAsStringAsync()); + var jsonObject4 = JsonConvert.DeserializeObject(await response4.Content.ReadAsStringAsync()); + string output_result4 = (string)jsonObject4["combinedRecognizedPhrases"][0]["lexical"]; + Console.WriteLine(output_result4); + client4.Dispose(); + + + //Show transcript results + ViewBag.Message = "TRANSCRIPTION RESULTS: \n\n"+output_result4; - OpenAIClient client_oai = new OpenAIClient( - new Uri(AOAIendpoint), - new AzureKeyCredential(AOAIsubscriptionKey)); - - // ### If streaming is not selected - Response responseWithoutStream = await client_oai.GetChatCompletionsAsync( - "DemoBuild", - new ChatCompletionsOptions() - { - Messages = - { - new ChatMessage(ChatRole.System, @"You are specialized in understanding PDFs and answering questions about it. Document OCR result is: "+output_result), - new ChatMessage(ChatRole.User, @"User question: "+prompt ), - }, - Temperature = (float)0.7, - MaxTokens = 1000, - NucleusSamplingFactor = (float)0.95, - FrequencyPenalty = 0, - PresencePenalty = 0, - }); - - ChatCompletions completions = responseWithoutStream.Value; - ChatChoice results_analisis = completions.Choices[0]; - ViewBag.Message = - //"Hate severity: " + (response.Value.HateResult?.Severity ?? 0); - results_analisis.Message.Content - ; - - /* result_image_front=image; - Console.WriteLine("1) "+result_image_front); - Console.WriteLine("2) "+result_message_front); - /* ViewBag.Message = - results_analisis.Message.Content - ; */ - //ViewBag.Image=result_image_front+".jpg"; - } - catch (RequestFailedException ex) - { - throw; - } - - // var result = await _service.GetBuildingHomeAsync(); - // return Ok(result); return View("AudioTranscription", model); } + public class SpeechToTextResponse + { + [JsonProperty("text")] + public string Text { get; set; } + } //Upload a file to my azure storage account [HttpPost] @@ -176,19 +148,19 @@ public async Task UploadFile(IFormFile imageFile, string prompt) if (CheckNullValues(imageFile)) { - ViewBag.Message = "You must upload an image"; + ViewBag.Message = "You must upload an mp3 audio file"; return View("AudioTranscription"); } //Upload file to azure storage account string url = imageFile.FileName.ToString(); - Console.WriteLine(url); + //Console.WriteLine(url); url = url.Replace(" ", ""); - Console.WriteLine(url); + //Console.WriteLine(url); BlobClient blobClient = containerClient.GetBlobClient(url); var httpHeaders = new BlobHttpHeaders { - ContentType = "application/pdf", + ContentType = "audio/mpeg", }; await blobClient.UploadAsync(imageFile.OpenReadStream(), new BlobUploadOptions { HttpHeaders = httpHeaders }); @@ -197,13 +169,13 @@ public async Task UploadFile(IFormFile imageFile, string prompt) if (CheckImageExtension(blobUrl.ToString())) { - ViewBag.Message = "You must upload a document with .mp3 extension"; + ViewBag.Message = "You must upload an audio file with .mp3 extension"; return View("AudioTranscription", model); } //Call EvaluateImage with the url - await TranscribeAudio(blobUrl.ToString(), prompt); + await TranscribeAudio(blobUrl.ToString(), imageFile); ViewBag.Waiting = null; return View("AudioTranscription", model); @@ -229,7 +201,7 @@ private bool CheckNullValues(IFormFile imageFile) private bool CheckImageExtension(string blobUri) { string uri_lower = blobUri; - if (uri_lower.Contains(".pdf", StringComparison.OrdinalIgnoreCase)) + if (uri_lower.Contains(".mp3", StringComparison.OrdinalIgnoreCase)) { return false; } diff --git a/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml b/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml index a8252bb..d610a85 100644 --- a/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml +++ b/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml @@ -6,10 +6,10 @@ + -

Audio Trancription

-

Analiza tus audios usando Azure Speech Service

+

Audio Transcription

+

Analiza tus audios usando Azure AI Speech

Sólo necesitas subir un audio (.mp3).

@@ -22,11 +22,6 @@ @Html.Raw(ViewBag.Message.Replace("\n", "
")) -
- -
}
@@ -42,15 +37,12 @@
-
+

-
-
- - + +
diff --git a/src/AIHub/appsettings.Development.json b/src/AIHub/appsettings.Development.json index 0c208ae..1563ad8 100644 --- a/src/AIHub/appsettings.Development.json +++ b/src/AIHub/appsettings.Development.json @@ -4,5 +4,50 @@ "Default": "Information", "Microsoft.AspNetCore": "Warning" } - } + }, + "ContentModerator": { + "Endpoint": "", + "SubscriptionKey": "" + }, + "BrandAnalyzer": { + "BingEndpoint": "https://api.bing.microsoft.com/v7.0/search", + "BingKey": "", + "OpenAIEndpoint": "", + "OpenAISubscriptionKey": "" + }, + "CallCenter": { + "OpenAIEndpoint": "", + "OpenAISubscriptionKey": "" + }, + "AudioTranscription": { + "SpeechLocation": "westeurope", + "SpeechSubscriptionKey": "", + "ContainerName": "audio-files" + }, + "ImageAnalyzer": { + "VisionEndpoint": "", + "OCREndpoint": "", + "VisionSubscriptionKey": "", + "OpenAIEndpoint": "", + "OpenAISubscriptionKey": "KEY", + "ContainerName": "image-analyzer" + }, + "FormAnalyzer": { + "FormRecogEndpoint": "", + "FormRecogSubscriptionKey": "", + "OpenAIEndpoint": "", + "OpenAISubscriptionKey": "", + "ContainerName": "form-analyzer" + }, + "ChatOnYourData": { + "Link": "" + }, + "PBIReport": { + "Link": ">Link to PBIReport>" + }, + "Storage": { + "ConnectionString": "", + "ContainerName": "image-moderator" + }, + "AllowedHosts": "*" } diff --git a/src/AIHub/appsettings.template.json b/src/AIHub/appsettings.template.json index 0204b61..1563ad8 100644 --- a/src/AIHub/appsettings.template.json +++ b/src/AIHub/appsettings.template.json @@ -19,6 +19,11 @@ "OpenAIEndpoint": "", "OpenAISubscriptionKey": "" }, + "AudioTranscription": { + "SpeechLocation": "westeurope", + "SpeechSubscriptionKey": "", + "ContainerName": "audio-files" + }, "ImageAnalyzer": { "VisionEndpoint": "", "OCREndpoint": "", diff --git a/src/AIHub/wwwroot/images/icon1.svg b/src/AIHub/wwwroot/images/icon1.svg deleted file mode 100644 index 2dfb953..0000000 --- a/src/AIHub/wwwroot/images/icon1.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon2.svg b/src/AIHub/wwwroot/images/icon2.svg deleted file mode 100644 index bda7e97..0000000 --- a/src/AIHub/wwwroot/images/icon2.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon3.svg b/src/AIHub/wwwroot/images/icon3.svg deleted file mode 100644 index 8a87a13..0000000 --- a/src/AIHub/wwwroot/images/icon3.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon4.svg b/src/AIHub/wwwroot/images/icon4.svg deleted file mode 100644 index 068af1b..0000000 --- a/src/AIHub/wwwroot/images/icon4.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon5.svg b/src/AIHub/wwwroot/images/icon5.svg deleted file mode 100644 index d1be82e..0000000 --- a/src/AIHub/wwwroot/images/icon5.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon6.svg b/src/AIHub/wwwroot/images/icon6.svg deleted file mode 100644 index 0692e0a..0000000 --- a/src/AIHub/wwwroot/images/icon6.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon8.svg b/src/AIHub/wwwroot/images/icon8.svg deleted file mode 100644 index a94a78b..0000000 --- a/src/AIHub/wwwroot/images/icon8.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/AIHub/wwwroot/images/icon9.svg b/src/AIHub/wwwroot/images/icon9.svg deleted file mode 100644 index e27f62e..0000000 --- a/src/AIHub/wwwroot/images/icon9.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file