From d5fd31d69ca49122ad1afd249356acc65b63fed4 Mon Sep 17 00:00:00 2001 From: heblasco Date: Thu, 7 Mar 2024 15:56:11 +0100 Subject: [PATCH] Add Microsoft.CognitiveServices.Speech package and update audio file parameter name in audiotranscription.js --- src/AIHub/AIHub.csproj | 1 + .../AudioTranscriptionController.cs | 28 ++++++++++-------- src/AIHub/GlobalUsings.cs | 2 ++ .../AudioTranscription.cshtml | 29 ++++++++++++------- src/AIHub/wwwroot/js/ui/audiotranscription.js | 4 +-- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/AIHub/AIHub.csproj b/src/AIHub/AIHub.csproj index 598fb8d..793cb58 100644 --- a/src/AIHub/AIHub.csproj +++ b/src/AIHub/AIHub.csproj @@ -13,6 +13,7 @@ + diff --git a/src/AIHub/Controllers/AudioTranscriptionController.cs b/src/AIHub/Controllers/AudioTranscriptionController.cs index 927c898..6b9cd83 100644 --- a/src/AIHub/Controllers/AudioTranscriptionController.cs +++ b/src/AIHub/Controllers/AudioTranscriptionController.cs @@ -31,7 +31,7 @@ public IActionResult AudioTranscription() } [HttpPost] - public async Task TranscribeAudio(string audio_url, IFormFile imageFile) + public async Task TranscribeAudio(string audio_url) { string audio = audio_url + sasUri.Query; @@ -42,11 +42,15 @@ public async Task TranscribeAudio(string audio_url, IFormFile ima request.Content = content; var response = await httpClient.SendAsync(request); response.EnsureSuccessStatusCode(); - var responsejson = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync())!; - Console.WriteLine(responsejson); - var output_result = responsejson.self.ToString(); + var responsejson = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync())!; + Console.WriteLine(responsejson["self"]!.ToString()); + if (responsejson["self"] == null || responsejson["self"]!.ToString() == string.Empty) + { + ViewBag.Message = "Error in the transcription process"; + return View("AudioTranscription", model); + } + var output_result = responsejson["self"]!.ToString(); Console.WriteLine("SELF: " + output_result); - // CALL 2: CHECK FOR FINISH var request2 = new HttpRequestMessage(HttpMethod.Get, output_result); httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey); @@ -56,14 +60,14 @@ public async Task TranscribeAudio(string audio_url, IFormFile ima var response2 = await httpClient.SendAsync(request2); response2.EnsureSuccessStatusCode(); //Console.WriteLine(await response2.Content.ReadAsStringAsync()); - var responsejson2 = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync())!; + var responsejson2 = JsonSerializer.Deserialize(await response.Content.ReadAsStringAsync())!; Console.WriteLine(responsejson2); - while (responsejson2.status != "Succeeded") + while (responsejson2["status"]!.ToString() != "Succeeded") { Thread.Sleep(10000); response2 = await httpClient.GetAsync(output_result); - responsejson2 = JsonSerializer.Deserialize(await response2.Content.ReadAsStringAsync())!; - Console.WriteLine(responsejson2.status); + responsejson2 = JsonSerializer.Deserialize(await response2.Content.ReadAsStringAsync())!; + Console.WriteLine(responsejson2["status"]!.ToString()); } // CALL 3: GET RESULTS URL @@ -74,10 +78,10 @@ public async Task TranscribeAudio(string audio_url, IFormFile ima request3.Content = content3; var response3 = await httpClient.SendAsync(request3); response3.EnsureSuccessStatusCode(); - var responsejson3 = JsonSerializer.Deserialize(await response3.Content.ReadAsStringAsync())!; + var responsejson3 = JsonSerializer.Deserialize(await response3.Content.ReadAsStringAsync())!; Console.WriteLine(responsejson3); // Extract contentUrl field - string output_result3 = (string)responsejson3["values"][0]["links"]["contentUrl"]; + string output_result3 = (string)responsejson3["values"]![0]!["links"]!["contentUrl"]!; Console.WriteLine(output_result3); // CALL 4: GET RESULTS (TRANSCRIPTION) @@ -140,7 +144,7 @@ public async Task UploadFile(IFormFile audioFile, string prompt) } // Call EvaluateImage with the url - await TranscribeAudio(blobUrl.ToString(), audioFile); + await TranscribeAudio(blobUrl.ToString()); ViewBag.Waiting = null; // return View("AudioTranscription", model); diff --git a/src/AIHub/GlobalUsings.cs b/src/AIHub/GlobalUsings.cs index 8395264..1523421 100644 --- a/src/AIHub/GlobalUsings.cs +++ b/src/AIHub/GlobalUsings.cs @@ -15,3 +15,5 @@ global using System.Text.Json; global using System.Text.Json.Nodes; global using System.Text.Json.Serialization; +global using Microsoft.CognitiveServices.Speech; +global using Microsoft.CognitiveServices.Speech.Audio; diff --git a/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml b/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml index b683c90..d09d7e6 100644 --- a/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml +++ b/src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml @@ -4,7 +4,7 @@
- +
@@ -27,20 +27,26 @@

Audio Transcription with Azure AI Speech

- Optimize your business efficiency with our Azure AI Speech-to-Text audio analysis service. Automatically convert speech to text with high accuracy and save valuable time on manual transcriptions. + Optimize your business efficiency with our Azure AI Speech-to-Text audio analysis service. + Automatically convert speech to text with high accuracy and save valuable time on manual + transcriptions.

-
+
- +

Drop files here or click to upload.

- You just need to upload an image (.mp3). + You just need to upload an image + (.mp3).
@@ -73,7 +79,7 @@
- +
@@ -81,17 +87,20 @@
-

Audio Transcription Result Transcription Success

+

Audio Transcription Result Transcription Success

-
+
-
+
@section Scripts { - @{await Html.RenderPartialAsync("_AudioTranscriptionScriptsPartial");} + @{ + await Html.RenderPartialAsync("_AudioTranscriptionScriptsPartial"); + } } diff --git a/src/AIHub/wwwroot/js/ui/audiotranscription.js b/src/AIHub/wwwroot/js/ui/audiotranscription.js index 2394683..d911218 100644 --- a/src/AIHub/wwwroot/js/ui/audiotranscription.js +++ b/src/AIHub/wwwroot/js/ui/audiotranscription.js @@ -1,5 +1,5 @@ -Dropzone.options.dropaiimage = { - paramName: "imageFile", +Dropzone.options.dropaiaudio = { + paramName: "audioFile", maxFilesize: 2, // MB createImageThumbnails:true, previewsContainer: "#file-previews",