Add gpt4vision_endpoint variable and Prompt property to ImageAnalyzer…

…Model
Azure · Mar 6, 2024 · 4c6e046 · 4c6e046
1 parent 1fd5fde
commit 4c6e046
Show file tree

Hide file tree

Showing 6 changed files with 123 additions and 117 deletions.
diff --git a/infra/modules/ca-aihub/main.tf b/infra/modules/ca-aihub/main.tf
@@ -119,16 +119,8 @@ resource "azapi_resource" "ca_back" {
                 value = var.chat_gpt_deployment
               },
               {
-                name  = "ImageAnalyzer__VisionEndpoint",
-                value = "${var.cognitive_service_endpoint}"
-              },
-              {
-                name  = "ImageAnalyzer__OCREndpoint",
-                value = "${var.cognitive_service_endpoint}"
-              },
-              {
-                name      = "ImageAnalyzer__VisionSubscriptionKey",
-                secretRef = "cognitive-service-key"
+                name  = "ImageAnalyzer__GPT4Vision",
+                value = var.gpt4vision_endpoint
               },
               {
                 name  = "ImageAnalyzer__OpenAIEndpoint",

diff --git a/infra/modules/ca-aihub/variables.tf b/infra/modules/ca-aihub/variables.tf
@@ -20,6 +20,7 @@ variable "chat_gpt4_vision_model" {}
 variable "embeddings_deployment" {}
 variable "embeddings_model" {}
 variable "openai_endpoint" {}
+variable "gpt4vision_endpoint" {}
 
 variable "chat_fqdn" {}
 variable "pbi_report_link" {}

diff --git a/src/AIHub/Controllers/ImageAnalyzerController.cs b/src/AIHub/Controllers/ImageAnalyzerController.cs
@@ -1,35 +1,39 @@
 using System.ComponentModel.DataAnnotations;
+using Newtonsoft.Json;
+using System;
+using System.IO;
+using System.Net.Http;
+using System.Text;
+using System.Threading.Tasks;
 
 namespace MVCWeb.Controllers;
 
 public class ImageAnalyzerController : Controller
 {
-    private string Visionendpoint;
-    private string OCRendpoint;
-    private string VisionsubscriptionKey;
     private string AOAIendpoint;
     private string AOAIsubscriptionKey;
     private string storageconnstring;
     private string AOAIDeploymentName;
+    private string gpt4Vision;
     private readonly BlobContainerClient containerClient;
     private readonly IEnumerable<BlobItem> blobs;
     private Uri sasUri;
     private ImageAnalyzerModel model;
+    private HttpClient httpClient;
 
-    public ImageAnalyzerController(IConfiguration config)
+    public ImageAnalyzerController(IConfiguration config, IHttpClientFactory clientFactory)
     {
-        Visionendpoint = config.GetValue<string>("ImageAnalyzer:VisionEndpoint") ?? throw new ArgumentNullException("VisionEndpoint");
-        OCRendpoint = config.GetValue<string>("ImageAnalyzer:OCREndpoint") ?? throw new ArgumentNullException("OCREndpoint");
-        VisionsubscriptionKey = config.GetValue<string>("ImageAnalyzer:VisionSubscriptionKey") ?? throw new ArgumentNullException("VisionSubscriptionKey");
         AOAIendpoint = config.GetValue<string>("ImageAnalyzer:OpenAIEndpoint") ?? throw new ArgumentNullException("OpenAIEndpoint");
         AOAIsubscriptionKey = config.GetValue<string>("ImageAnalyzer:OpenAISubscriptionKey") ?? throw new ArgumentNullException("OpenAISubscriptionKey");
         storageconnstring = config.GetValue<string>("Storage:ConnectionString") ?? throw new ArgumentNullException("ConnectionString");
         BlobServiceClient blobServiceClient = new BlobServiceClient(storageconnstring);
         containerClient = blobServiceClient.GetBlobContainerClient(config.GetValue<string>("Storage:ContainerName"));
         sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1));
         AOAIDeploymentName = config.GetValue<string>("ImageAnalyzer:DeploymentName") ?? throw new ArgumentNullException("DeploymentName");
-        // Obtiene una lista de blobs en el contenedor
+        gpt4Vision = config.GetValue<string>("ImageAnalyzer:GPT4Vision") ?? throw new ArgumentNullException("GPT4Vision");
+        // Obtain the blobs list in the container
         blobs = containerClient.GetBlobs();
+        httpClient = clientFactory.CreateClient();
         model = new ImageAnalyzerModel();
     }
 
@@ -39,93 +43,73 @@ public IActionResult ImageAnalyzer()
     }
 
     [HttpPost]
-    public async Task<IActionResult> DenseCaptionImage(string image_url)
+    public async Task<IActionResult> DenseCaptionImage(string image_url, string prompt)
     {
-        // 1. Get Image
-        model.Image = image_url;
-        // 2. Dense Captioning and OCR
-        var sb = new StringBuilder();
+        string GPT4V_ENDPOINT = AOAIendpoint + gpt4Vision;
+        image_url = image_url + sasUri.Query;
 
-        ImageAnalysisClient client = new(
-            new Uri(Visionendpoint),
-            new AzureKeyCredential(VisionsubscriptionKey));
-
-        ImageAnalysisResult result = client.Analyze(
-            new Uri(model.Image + sasUri.Query),
-            VisualFeatures.DenseCaptions | VisualFeatures.Read,
-            new ImageAnalysisOptions { GenderNeutralCaption = false, Language = "en" });
-
-        foreach (var caption in result.DenseCaptions.Values)
+        using (httpClient = new HttpClient())
         {
-            sb.Append(caption.Text);
-        }
-
-        var captions = sb.ToString();
-
-        var ocr = "there is no text in the image";
-        if (result.Read.Blocks.Count > 0)
-        {
-            ocr = result.Read.Blocks[0].ToString();
-        }
-
-        // 4. Tags 
-
-
-        // 5. Objects
-
-
-        // 6. Trancript of image
-
-
-        // 7. Describe Image GPT4
-        try
-        {
-            OpenAIClient aoaiClient;
-            if (string.IsNullOrEmpty(AOAIsubscriptionKey))
+            httpClient.DefaultRequestHeaders.Add("api-key", AOAIsubscriptionKey);
+            var payload = new
+            {
+                enhancements = new
+                {
+                    ocr = new { enabled = true },
+                    grounding = new { enabled = true }
+                },
+                messages = new object[]
+                {
+                  new {
+                      role = "system",
+                      content = new object[] {
+                          new {
+                              type = "text",
+                              text = "You are an AI assistant that helps people find information."
+                          }
+                      }
+                  },
+                  new {
+                      role = "user",
+                      content = new object[] {
+                          new {
+                              type = "image_url",
+                              image_url = new {
+                                  url = image_url
+                              }
+                          },
+                          new {
+                              type = "text",
+                              text = prompt
+                          }
+                      }
+                  }
+                },
+                temperature = 0.7,
+                top_p = 0.95,
+                max_tokens = 800,
+                stream = false
+            };
+            var response = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json"));
+
+
+            if (response.IsSuccessStatusCode)
             {
-                aoaiClient = new OpenAIClient(
-                    new Uri(AOAIendpoint),
-                    new DefaultAzureCredential());
+                var responseData = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());
+
+                // Get the web pages from the response
+                var response_final = responseData!.choices[0];
+                string final = response_final.message.content;
+                model.Message = final;
+                model.Image = image_url;
             }
             else
             {
-                aoaiClient = new OpenAIClient(
-                    new Uri(AOAIendpoint),
-                    new AzureKeyCredential(AOAIsubscriptionKey));
+                Console.WriteLine($"Error after GPT4V: {response.StatusCode}, {response.ReasonPhrase}");
             }
-
-            // If streaming is not selected
-            Response<ChatCompletions> responseWithoutStream = await aoaiClient.GetChatCompletionsAsync(
-                new ChatCompletionsOptions()
-                {
-                    DeploymentName = AOAIDeploymentName,
-                    Messages =
-                    {
-                        new ChatRequestSystemMessage(@"The user will provide a list of descriptions of an image. I want you to create a unified and complete description of the image based of the list provided. Each suggested description is separated by a \ symbol. Also, it will provide the text detected in the image, try to associate the text detected (if any) with the rest of the captions of the image. If you are not sure, say to user something like 'MIGHT BE'. "),
-                        new ChatRequestUserMessage($"Descriptions: {captions}. & OCR: {ocr}" ),
-                    },
-                    Temperature = (float)0.7,
-                    MaxTokens = 1000,
-                    NucleusSamplingFactor = (float)0.95,
-                    FrequencyPenalty = 0,
-                    PresencePenalty = 0,
-                });
-
-            ChatCompletions completions = responseWithoutStream.Value;
-            ChatChoice results_analisis = completions.Choices[0];
-            model.Message = results_analisis.Message.Content;
-            ViewBag.Message = results_analisis.Message.Content;
-            ViewBag.Image = model.Image + sasUri.Query;
-            model.Image = model.Image + sasUri.Query;
-            Console.WriteLine(ViewBag.Message);
-            Console.WriteLine(ViewBag.Image);
-        }
-        catch (RequestFailedException)
-        {
-            throw;
         }
 
-        return Ok(model);
+        return View("ImageAnalyzer");
     }
 
     // Upload a file to my azure storage account
@@ -138,7 +122,12 @@ public async Task<IActionResult> UploadFile(IFormFile imageFile)
             ViewBag.Message = "You must upload an image";
             return View("ImageAnalyzer");
         }
-
+        if (string.IsNullOrEmpty(HttpContext.Request.Form["text"]))
+        {
+            ViewBag.Message = "You must enter a prompt to evaluate";
+            return View("ImageAnalyzer", model);
+        }
+        model.Prompt = HttpContext.Request.Form["text"];
         // Upload file to azure storage account
         string url = imageFile.FileName.ToString();
         Console.WriteLine(url);
@@ -156,7 +145,8 @@ public async Task<IActionResult> UploadFile(IFormFile imageFile)
         }
 
         // Call EvaluateImage with the url
-        await DenseCaptionImage(blobUrl.ToString());
+        Console.WriteLine(blobUrl.ToString());
+        await DenseCaptionImage(blobUrl.ToString(), model.Prompt!);
         ViewBag.Waiting = null;
 
         return Ok(model);

diff --git a/src/AIHub/Models/ImageAnalyzerModel.cs b/src/AIHub/Models/ImageAnalyzerModel.cs
@@ -10,5 +10,6 @@ public class ImageAnalyzerModel
     public string? Text { get; set; }
     public string? Image { get; set; }
     public string? Message { get; set; }
+    public string? Prompt { get; set; }
 
 }
diff --git a/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml b/src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml
@@ -5,7 +5,7 @@
 
 <!-- Start Content-->
 <div class="container-fluid">
-    
+
     <!-- start page title -->
     <div class="row">
         <div class="col-12">
@@ -28,22 +28,43 @@
                 <div class="card-body">
                     <h4 class="header-title">Image Analyzer with Azure OpenAI Services</h4>
                     <p class="text-muted font-14">
-                        Optimize your visual content strategies using our service, which allows you to analyze your images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover patterns, gain meaningful insights, and improve your data-driven decision making with accurate and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision Services.
+                        Optimize your visual content strategies using our service, which allows you to analyze your
+                        images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover
+                        patterns, gain meaningful insights, and improve your data-driven decision making with accurate
+                        and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision
+                        Services.
                     </p>
 
                     <div class="tab-content">
                         <div class="tab-pane show active" id="file-upload-preview">
-                            <form asp-controller="ImageAnalyzer" asp-action="UploadFile" method="post" enctype="multipart/form-data" id="dropaiimage" class="dropzone" data-plugin="dropzone" data-previews-container="#file-previews" data-upload-preview-template="#uploadPreviewTemplate">
-                                <div class="fallback">
-                                    <input name="file" type="file" id="imageFile" name="imageFile"/>
-                                </div>
+                            <form asp-controller="ImageAnalyzer" asp-action="UploadFile" method="post"
+                                enctype="multipart/form-data" id="dropaiimage" class="dropzone" data-plugin="dropzone"
+                                data-previews-container="#file-previews"
+                                data-upload-preview-template="#uploadPreviewTemplate">
+
+                                <div class="mb-3">
+                            <label for="text" class="form-label">Prompt:</label>
+                                    <textarea class="form-control" id="text" name="text" maxlength="225"
+                                        rows="3">@(Model.Prompt ?? "Describe the image in detail")</textarea>
+                        </div>
+                        <div class="tab-content form-control">
+                            <div class="tab-pane show active" id="file-upload-preview">
 
-                                <div class="dz-message needsclick">
+                                    <div class="fallback">
+                                        <input type="file" id="dropaiimage" name="dropaiimage" />
+                                    </div>
+                                    <div class="dz-message needsclick">
                                     <i class="h1 text-muted ri-upload-cloud-2-line"></i>
                                     <h3>Drop files here or click to upload. </h3>
-                                    <span class="text-muted font-13"><strong>You just need to upload an image (.jpg, .png).</strong> </span>
+                                    <span class="text-muted font-13"><strong>You just need to upload an image (.jpg,
+                                            .png).</strong> </span>
                                 </div>
+
+                                <!-- Preview -->
+                                <div class="dropzone-previews mt-3" id="file-previews"></div>
 
+                            </div> <!-- end preview-->
+                        </div> <!-- end tab-content-->
                             </form>
 
                             <!-- Preview -->
@@ -74,7 +95,7 @@
         </div>
     </div>
 
-    <!-- end row -->    
+    <!-- end row -->
     <!--- show results -->
     <div class="row d-none" id="showresult">
         <div class="col-lg-12">
@@ -85,17 +106,20 @@
                     </div>
                     <div class="col-md-8">
                         <div class="card-body">
-                            <h2 class="header-title mt-0 mb-3">Text Analyze Result <span class="badge bg-success rounded-pill">Analysis Success</span></h2>
+                            <h2 class="header-title mt-0 mb-3">Text Analyze Result <span
+                                    class="badge bg-success rounded-pill">Analysis Success</span></h2>
                             <p class="text-muted font-16 mb-3" id="show-message-result"></p>
-                        </div> 
+                        </div>
                     </div>
                 </div>
-            </div> 
+            </div>
         </div>
     </div>
 </div>
 <!-- container -->
 
 @section Scripts {
-    @{await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial");}
+    @{
+        await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial");
+    }
 }
diff --git a/src/AIHub/appsettings.template.json b/src/AIHub/appsettings.template.json
@@ -15,34 +15,32 @@
     "BingKey": "<Bing Key>",
     "OpenAIEndpoint": "<AOAI EndPoint>",
     "OpenAISubscriptionKey": "<KEY>",
-    "DeploymentName": "DemoBuild"
+    "DeploymentName": "gpt-35-turbo"
   },
   "CallCenter": {
     "OpenAIEndpoint": "<AOAI EndPoint>",
     "OpenAISubscriptionKey": "<KEY>",
-    "DeploymentName": "DemoBuild"
+    "DeploymentName": "gpt-35-turbo"
   },
   "AudioTranscription": {
     "SpeechLocation": "westeurope",
     "SpeechSubscriptionKey": "<your speech key>",
     "ContainerName": "audio-files"
   },
   "ImageAnalyzer": {
-    "VisionEndpoint": "<Vision EndPoint>",
-    "OCREndpoint": "<OCR EndPoint>",
-    "VisionSubscriptionKey": "<KEY>",
     "OpenAIEndpoint": "<AOAI EndPoint>",
+    "GPT4Vision": "openai/deployments/<deploymentName>/extensions/chat/completions?api-version=2023-07-01-preview",
     "OpenAISubscriptionKey": "KEY",
     "ContainerName": "image-analyzer",
-    "DeploymentName": "DemoBuild"
+    "DeploymentName": "gpt-35-turbo"
   },
   "FormAnalyzer": {
     "FormRecogEndpoint": "<Form Recog EndPoint>",
     "FormRecogSubscriptionKey": "<KEY>",
     "OpenAIEndpoint": "<AOAI EndPoint>",
     "OpenAISubscriptionKey": "<KEY>",
     "ContainerName": "form-analyzer",
-    "DeploymentName": "DemoBuild"
+    "DeploymentName": "gpt-35-turbo"
   },
   "DocumentComparison": {
     "FormRecogEndpoint": "<Form Recog EndPoint>",