diff --git a/infra/modules/ca-aihub/main.tf b/infra/modules/ca-aihub/main.tf index e03598d..e769474 100644 --- a/infra/modules/ca-aihub/main.tf +++ b/infra/modules/ca-aihub/main.tf @@ -119,16 +119,8 @@ resource "azapi_resource" "ca_back" { value = var.chat_gpt_deployment }, { - name = "ImageAnalyzer__VisionEndpoint", - value = "${var.cognitive_service_endpoint}" - }, - { - name = "ImageAnalyzer__OCREndpoint", - value = "${var.cognitive_service_endpoint}" - }, - { - name = "ImageAnalyzer__VisionSubscriptionKey", - secretRef = "cognitive-service-key" + name = "ImageAnalyzer__GPT4Vision", + value = var.gpt4vision_endpoint }, { name = "ImageAnalyzer__OpenAIEndpoint", diff --git a/infra/modules/ca-aihub/variables.tf b/infra/modules/ca-aihub/variables.tf index b48eaa9..bf12869 100644 --- a/infra/modules/ca-aihub/variables.tf +++ b/infra/modules/ca-aihub/variables.tf @@ -20,6 +20,7 @@ variable "chat_gpt4_vision_model" {} variable "embeddings_deployment" {} variable "embeddings_model" {} variable "openai_endpoint" {} +variable "gpt4vision_endpoint" {} variable "chat_fqdn" {} variable "pbi_report_link" {} diff --git a/src/AIHub/Controllers/ImageAnalyzerController.cs b/src/AIHub/Controllers/ImageAnalyzerController.cs index 41338da..39d68b3 100644 --- a/src/AIHub/Controllers/ImageAnalyzerController.cs +++ b/src/AIHub/Controllers/ImageAnalyzerController.cs @@ -1,26 +1,28 @@ using System.ComponentModel.DataAnnotations; +using Newtonsoft.Json; +using System; +using System.IO; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; namespace MVCWeb.Controllers; public class ImageAnalyzerController : Controller { - private string Visionendpoint; - private string OCRendpoint; - private string VisionsubscriptionKey; private string AOAIendpoint; private string AOAIsubscriptionKey; private string storageconnstring; private string AOAIDeploymentName; + private string gpt4Vision; private readonly BlobContainerClient containerClient; private readonly IEnumerable blobs; private Uri sasUri; private ImageAnalyzerModel model; + private HttpClient httpClient; - public ImageAnalyzerController(IConfiguration config) + public ImageAnalyzerController(IConfiguration config, IHttpClientFactory clientFactory) { - Visionendpoint = config.GetValue("ImageAnalyzer:VisionEndpoint") ?? throw new ArgumentNullException("VisionEndpoint"); - OCRendpoint = config.GetValue("ImageAnalyzer:OCREndpoint") ?? throw new ArgumentNullException("OCREndpoint"); - VisionsubscriptionKey = config.GetValue("ImageAnalyzer:VisionSubscriptionKey") ?? throw new ArgumentNullException("VisionSubscriptionKey"); AOAIendpoint = config.GetValue("ImageAnalyzer:OpenAIEndpoint") ?? throw new ArgumentNullException("OpenAIEndpoint"); AOAIsubscriptionKey = config.GetValue("ImageAnalyzer:OpenAISubscriptionKey") ?? throw new ArgumentNullException("OpenAISubscriptionKey"); storageconnstring = config.GetValue("Storage:ConnectionString") ?? throw new ArgumentNullException("ConnectionString"); @@ -28,8 +30,10 @@ public ImageAnalyzerController(IConfiguration config) containerClient = blobServiceClient.GetBlobContainerClient(config.GetValue("Storage:ContainerName")); sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1)); AOAIDeploymentName = config.GetValue("ImageAnalyzer:DeploymentName") ?? throw new ArgumentNullException("DeploymentName"); - // Obtiene una lista de blobs en el contenedor + gpt4Vision = config.GetValue("ImageAnalyzer:GPT4Vision") ?? throw new ArgumentNullException("GPT4Vision"); + // Obtain the blobs list in the container blobs = containerClient.GetBlobs(); + httpClient = clientFactory.CreateClient(); model = new ImageAnalyzerModel(); } @@ -39,93 +43,73 @@ public IActionResult ImageAnalyzer() } [HttpPost] - public async Task DenseCaptionImage(string image_url) + public async Task DenseCaptionImage(string image_url, string prompt) { - // 1. Get Image - model.Image = image_url; - // 2. Dense Captioning and OCR - var sb = new StringBuilder(); + string GPT4V_ENDPOINT = AOAIendpoint + gpt4Vision; + image_url = image_url + sasUri.Query; - ImageAnalysisClient client = new( - new Uri(Visionendpoint), - new AzureKeyCredential(VisionsubscriptionKey)); - - ImageAnalysisResult result = client.Analyze( - new Uri(model.Image + sasUri.Query), - VisualFeatures.DenseCaptions | VisualFeatures.Read, - new ImageAnalysisOptions { GenderNeutralCaption = false, Language = "en" }); - - foreach (var caption in result.DenseCaptions.Values) + using (httpClient = new HttpClient()) { - sb.Append(caption.Text); - } - - var captions = sb.ToString(); - - var ocr = "there is no text in the image"; - if (result.Read.Blocks.Count > 0) - { - ocr = result.Read.Blocks[0].ToString(); - } - - // 4. Tags - - - // 5. Objects - - - // 6. Trancript of image - - - // 7. Describe Image GPT4 - try - { - OpenAIClient aoaiClient; - if (string.IsNullOrEmpty(AOAIsubscriptionKey)) + httpClient.DefaultRequestHeaders.Add("api-key", AOAIsubscriptionKey); + var payload = new + { + enhancements = new + { + ocr = new { enabled = true }, + grounding = new { enabled = true } + }, + messages = new object[] + { + new { + role = "system", + content = new object[] { + new { + type = "text", + text = "You are an AI assistant that helps people find information." + } + } + }, + new { + role = "user", + content = new object[] { + new { + type = "image_url", + image_url = new { + url = image_url + } + }, + new { + type = "text", + text = prompt + } + } + } + }, + temperature = 0.7, + top_p = 0.95, + max_tokens = 800, + stream = false + }; + var response = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json")); + + + if (response.IsSuccessStatusCode) { - aoaiClient = new OpenAIClient( - new Uri(AOAIendpoint), - new DefaultAzureCredential()); + var responseData = JsonConvert.DeserializeObject(await response.Content.ReadAsStringAsync()); + + // Get the web pages from the response + var response_final = responseData!.choices[0]; + string final = response_final.message.content; + model.Message = final; + model.Image = image_url; } else { - aoaiClient = new OpenAIClient( - new Uri(AOAIendpoint), - new AzureKeyCredential(AOAIsubscriptionKey)); + Console.WriteLine($"Error after GPT4V: {response.StatusCode}, {response.ReasonPhrase}"); } - - // If streaming is not selected - Response responseWithoutStream = await aoaiClient.GetChatCompletionsAsync( - new ChatCompletionsOptions() - { - DeploymentName = AOAIDeploymentName, - Messages = - { - new ChatRequestSystemMessage(@"The user will provide a list of descriptions of an image. I want you to create a unified and complete description of the image based of the list provided. Each suggested description is separated by a \ symbol. Also, it will provide the text detected in the image, try to associate the text detected (if any) with the rest of the captions of the image. If you are not sure, say to user something like 'MIGHT BE'. "), - new ChatRequestUserMessage($"Descriptions: {captions}. & OCR: {ocr}" ), - }, - Temperature = (float)0.7, - MaxTokens = 1000, - NucleusSamplingFactor = (float)0.95, - FrequencyPenalty = 0, - PresencePenalty = 0, - }); - - ChatCompletions completions = responseWithoutStream.Value; - ChatChoice results_analisis = completions.Choices[0]; - model.Message = results_analisis.Message.Content; - ViewBag.Message = results_analisis.Message.Content; - ViewBag.Image = model.Image + sasUri.Query; - model.Image = model.Image + sasUri.Query; - Console.WriteLine(ViewBag.Message); - Console.WriteLine(ViewBag.Image); - } - catch (RequestFailedException) - { - throw; } - return Ok(model); + return View("ImageAnalyzer"); } // Upload a file to my azure storage account @@ -138,7 +122,12 @@ public async Task UploadFile(IFormFile imageFile) ViewBag.Message = "You must upload an image"; return View("ImageAnalyzer"); } - + if (string.IsNullOrEmpty(HttpContext.Request.Form["text"])) + { + ViewBag.Message = "You must enter a prompt to evaluate"; + return View("ImageAnalyzer", model); + } + model.Prompt = HttpContext.Request.Form["text"]; // Upload file to azure storage account string url = imageFile.FileName.ToString(); Console.WriteLine(url); @@ -156,7 +145,8 @@ public async Task UploadFile(IFormFile imageFile) } // Call EvaluateImage with the url - await DenseCaptionImage(blobUrl.ToString()); + Console.WriteLine(blobUrl.ToString()); + await DenseCaptionImage(blobUrl.ToString(), model.Prompt!); ViewBag.Waiting = null; return Ok(model); diff --git a/src/AIHub/Models/ImageAnalyzerModel.cs b/src/AIHub/Models/ImageAnalyzerModel.cs index af4bb8d..675307c 100644 --- a/src/AIHub/Models/ImageAnalyzerModel.cs +++ b/src/AIHub/Models/ImageAnalyzerModel.cs @@ -10,5 +10,6 @@ public class ImageAnalyzerModel public string? Text { get; set; } public string? Image { get; set; } public string? Message { get; set; } + public string? Prompt { get; set; } } \ No newline at end of file diff --git a/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml b/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml index d027468..65b5ddc 100644 --- a/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml +++ b/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml @@ -5,7 +5,7 @@
- +
@@ -28,22 +28,43 @@

Image Analyzer with Azure OpenAI Services

- Optimize your visual content strategies using our service, which allows you to analyze your images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover patterns, gain meaningful insights, and improve your data-driven decision making with accurate and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision Services. + Optimize your visual content strategies using our service, which allows you to analyze your + images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover + patterns, gain meaningful insights, and improve your data-driven decision making with accurate + and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision + Services.

-
-
- -
+ + +
+ + +
+
+
-
+
+ +
+

Drop files here or click to upload.

- You just need to upload an image (.jpg, .png). + You just need to upload an image (.jpg, + .png).
+ + +
+
+
@@ -74,7 +95,7 @@
- +
@@ -85,17 +106,20 @@
-

Text Analyze Result Analysis Success

+

Text Analyze Result Analysis Success

-
+
-
+
@section Scripts { - @{await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial");} + @{ + await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial"); + } } \ No newline at end of file diff --git a/src/AIHub/appsettings.template.json b/src/AIHub/appsettings.template.json index d404557..5ffda8b 100644 --- a/src/AIHub/appsettings.template.json +++ b/src/AIHub/appsettings.template.json @@ -15,12 +15,12 @@ "BingKey": "", "OpenAIEndpoint": "", "OpenAISubscriptionKey": "", - "DeploymentName": "DemoBuild" + "DeploymentName": "gpt-35-turbo" }, "CallCenter": { "OpenAIEndpoint": "", "OpenAISubscriptionKey": "", - "DeploymentName": "DemoBuild" + "DeploymentName": "gpt-35-turbo" }, "AudioTranscription": { "SpeechLocation": "westeurope", @@ -28,13 +28,11 @@ "ContainerName": "audio-files" }, "ImageAnalyzer": { - "VisionEndpoint": "", - "OCREndpoint": "", - "VisionSubscriptionKey": "", "OpenAIEndpoint": "", + "GPT4Vision": "openai/deployments//extensions/chat/completions?api-version=2023-07-01-preview", "OpenAISubscriptionKey": "KEY", "ContainerName": "image-analyzer", - "DeploymentName": "DemoBuild" + "DeploymentName": "gpt-35-turbo" }, "FormAnalyzer": { "FormRecogEndpoint": "
", @@ -42,7 +40,7 @@ "OpenAIEndpoint": "", "OpenAISubscriptionKey": "", "ContainerName": "form-analyzer", - "DeploymentName": "DemoBuild" + "DeploymentName": "gpt-35-turbo" }, "DocumentComparison": { "FormRecogEndpoint": "",