Skip to content

Commit

Permalink
Add Microsoft.CognitiveServices.Speech package and update audio file …
Browse files Browse the repository at this point in the history
…parameter name in audiotranscription.js
  • Loading branch information
heblasco committed Mar 7, 2024
1 parent be8c5df commit d5fd31d
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 24 deletions.
1 change: 1 addition & 0 deletions src/AIHub/AIHub.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<PackageReference Include="Azure.AI.Vision.ImageAnalysis" Version="1.0.0-beta.2" />
<PackageReference Include="Azure.Identity" Version="1.10.3" />
<PackageReference Include="Azure.Storage.Blobs" Version="12.18.0" />
<PackageReference Include="Microsoft.CognitiveServices.Speech" Version="1.36.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
</ItemGroup>

Expand Down
28 changes: 16 additions & 12 deletions src/AIHub/Controllers/AudioTranscriptionController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public IActionResult AudioTranscription()
}

[HttpPost]
public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile imageFile)
public async Task<IActionResult> TranscribeAudio(string audio_url)
{
string audio = audio_url + sasUri.Query;

Expand All @@ -42,11 +42,15 @@ public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile ima
request.Content = content;
var response = await httpClient.SendAsync(request);
response.EnsureSuccessStatusCode();
var responsejson = JsonSerializer.Deserialize<dynamic>(await response.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson);
var output_result = responsejson.self.ToString();
var responsejson = JsonSerializer.Deserialize<JsonObject>(await response.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson["self"]!.ToString());
if (responsejson["self"] == null || responsejson["self"]!.ToString() == string.Empty)
{
ViewBag.Message = "Error in the transcription process";
return View("AudioTranscription", model);
}
var output_result = responsejson["self"]!.ToString();
Console.WriteLine("SELF: " + output_result);

// CALL 2: CHECK FOR FINISH
var request2 = new HttpRequestMessage(HttpMethod.Get, output_result);
httpClient.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", speechSubscriptionKey);
Expand All @@ -56,14 +60,14 @@ public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile ima
var response2 = await httpClient.SendAsync(request2);
response2.EnsureSuccessStatusCode();
//Console.WriteLine(await response2.Content.ReadAsStringAsync());
var responsejson2 = JsonSerializer.Deserialize<dynamic>(await response.Content.ReadAsStringAsync())!;
var responsejson2 = JsonSerializer.Deserialize<JsonObject>(await response.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson2);
while (responsejson2.status != "Succeeded")
while (responsejson2["status"]!.ToString() != "Succeeded")
{
Thread.Sleep(10000);
response2 = await httpClient.GetAsync(output_result);
responsejson2 = JsonSerializer.Deserialize<dynamic>(await response2.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson2.status);
responsejson2 = JsonSerializer.Deserialize<JsonObject>(await response2.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson2["status"]!.ToString());
}

// CALL 3: GET RESULTS URL
Expand All @@ -74,10 +78,10 @@ public async Task<IActionResult> TranscribeAudio(string audio_url, IFormFile ima
request3.Content = content3;
var response3 = await httpClient.SendAsync(request3);
response3.EnsureSuccessStatusCode();
var responsejson3 = JsonSerializer.Deserialize<dynamic>(await response3.Content.ReadAsStringAsync())!;
var responsejson3 = JsonSerializer.Deserialize<JsonObject>(await response3.Content.ReadAsStringAsync())!;
Console.WriteLine(responsejson3);
// Extract contentUrl field
string output_result3 = (string)responsejson3["values"][0]["links"]["contentUrl"];
string output_result3 = (string)responsejson3["values"]![0]!["links"]!["contentUrl"]!;
Console.WriteLine(output_result3);

// CALL 4: GET RESULTS (TRANSCRIPTION)
Expand Down Expand Up @@ -140,7 +144,7 @@ public async Task<IActionResult> UploadFile(IFormFile audioFile, string prompt)
}

// Call EvaluateImage with the url
await TranscribeAudio(blobUrl.ToString(), audioFile);
await TranscribeAudio(blobUrl.ToString());
ViewBag.Waiting = null;

// return View("AudioTranscription", model);
Expand Down
2 changes: 2 additions & 0 deletions src/AIHub/GlobalUsings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
global using System.Text.Json;
global using System.Text.Json.Nodes;
global using System.Text.Json.Serialization;
global using Microsoft.CognitiveServices.Speech;
global using Microsoft.CognitiveServices.Speech.Audio;
29 changes: 19 additions & 10 deletions src/AIHub/Views/AudioTranscription/AudioTranscription.cshtml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<!-- Start Content-->
<div class="container-fluid">

<!-- start page title -->
<div class="row">
<div class="col-12">
Expand All @@ -27,20 +27,26 @@
<div class="card-body">
<h4 class="header-title">Audio Transcription with Azure AI Speech</h4>
<p class="text-muted font-14">
Optimize your business efficiency with our Azure AI Speech-to-Text audio analysis service. Automatically convert speech to text with high accuracy and save valuable time on manual transcriptions.
Optimize your business efficiency with our Azure AI Speech-to-Text audio analysis service.
Automatically convert speech to text with high accuracy and save valuable time on manual
transcriptions.
</p>

<div class="tab-content">
<div class="tab-pane show active" id="file-upload-preview">
<form asp-controller="AudioTranscription" asp-action="UploadFile" method="post" enctype="multipart/form-data" id="dropaiimage" class="dropzone" data-plugin="dropzone" data-previews-container="#file-previews" data-upload-preview-template="#uploadPreviewTemplate">
<form asp-controller="AudioTranscription" asp-action="UploadFile" method="post"
enctype="multipart/form-data" id="dropaiaudio" class="dropzone" data-plugin="dropzone"
data-previews-container="#file-previews"
data-upload-preview-template="#uploadPreviewTemplate">
<div class="fallback">
<input name="file" type="file" id="imageFile" name="imageFile"/>
<input name="file" type="file" id="dropaiaudio" name="dropaiaudio" />
</div>

<div class="dz-message needsclick">
<i class="h1 text-muted ri-upload-cloud-2-line"></i>
<h3>Drop files here or click to upload. </h3>
<span class="text-muted font-13"><strong>You just need to upload an image (.mp3).</strong> </span>
<span class="text-muted font-13"><strong>You just need to upload an image
(.mp3).</strong> </span>
</div>

</form>
Expand Down Expand Up @@ -73,25 +79,28 @@
</div>
</div>

<!-- end row -->
<!-- end row -->
<!--- show results -->
<div class="row d-none" id="showresult">
<div class="col-lg-12">
<div class="card">
<div class="row g-0 align-items-center">
<div class="col-md-12">
<div class="card-body">
<h2 class="header-title mt-0 mb-3">Audio Transcription Result <span class="badge bg-success rounded-pill">Transcription Success</span></h2>
<h2 class="header-title mt-0 mb-3">Audio Transcription Result <span
class="badge bg-success rounded-pill">Transcription Success</span></h2>
<p class="text-muted font-16 mb-3" id="show-message-result"></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- container -->

@section Scripts {
@{await Html.RenderPartialAsync("_AudioTranscriptionScriptsPartial");}
@{
await Html.RenderPartialAsync("_AudioTranscriptionScriptsPartial");
}
}
4 changes: 2 additions & 2 deletions src/AIHub/wwwroot/js/ui/audiotranscription.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Dropzone.options.dropaiimage = {
paramName: "imageFile",
Dropzone.options.dropaiaudio = {
paramName: "audioFile",
maxFilesize: 2, // MB
createImageThumbnails:true,
previewsContainer: "#file-previews",
Expand Down

0 comments on commit d5fd31d

Please sign in to comment.