Skip to content

Commit

Permalink
Add gpt4vision_endpoint variable and Prompt property to ImageAnalyzer…
Browse files Browse the repository at this point in the history
…Model
  • Loading branch information
heblasco committed Mar 6, 2024
1 parent 1fd5fde commit 4c6e046
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 117 deletions.
12 changes: 2 additions & 10 deletions infra/modules/ca-aihub/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,8 @@ resource "azapi_resource" "ca_back" {
value = var.chat_gpt_deployment
},
{
name = "ImageAnalyzer__VisionEndpoint",
value = "${var.cognitive_service_endpoint}"
},
{
name = "ImageAnalyzer__OCREndpoint",
value = "${var.cognitive_service_endpoint}"
},
{
name = "ImageAnalyzer__VisionSubscriptionKey",
secretRef = "cognitive-service-key"
name = "ImageAnalyzer__GPT4Vision",
value = var.gpt4vision_endpoint
},
{
name = "ImageAnalyzer__OpenAIEndpoint",
Expand Down
1 change: 1 addition & 0 deletions infra/modules/ca-aihub/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ variable "chat_gpt4_vision_model" {}
variable "embeddings_deployment" {}
variable "embeddings_model" {}
variable "openai_endpoint" {}
variable "gpt4vision_endpoint" {}

variable "chat_fqdn" {}
variable "pbi_report_link" {}
Expand Down
164 changes: 77 additions & 87 deletions src/AIHub/Controllers/ImageAnalyzerController.cs
Original file line number Diff line number Diff line change
@@ -1,35 +1,39 @@
using System.ComponentModel.DataAnnotations;
using Newtonsoft.Json;
using System;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

namespace MVCWeb.Controllers;

public class ImageAnalyzerController : Controller
{
private string Visionendpoint;
private string OCRendpoint;
private string VisionsubscriptionKey;
private string AOAIendpoint;
private string AOAIsubscriptionKey;
private string storageconnstring;
private string AOAIDeploymentName;
private string gpt4Vision;
private readonly BlobContainerClient containerClient;
private readonly IEnumerable<BlobItem> blobs;
private Uri sasUri;
private ImageAnalyzerModel model;
private HttpClient httpClient;

public ImageAnalyzerController(IConfiguration config)
public ImageAnalyzerController(IConfiguration config, IHttpClientFactory clientFactory)
{
Visionendpoint = config.GetValue<string>("ImageAnalyzer:VisionEndpoint") ?? throw new ArgumentNullException("VisionEndpoint");
OCRendpoint = config.GetValue<string>("ImageAnalyzer:OCREndpoint") ?? throw new ArgumentNullException("OCREndpoint");
VisionsubscriptionKey = config.GetValue<string>("ImageAnalyzer:VisionSubscriptionKey") ?? throw new ArgumentNullException("VisionSubscriptionKey");
AOAIendpoint = config.GetValue<string>("ImageAnalyzer:OpenAIEndpoint") ?? throw new ArgumentNullException("OpenAIEndpoint");
AOAIsubscriptionKey = config.GetValue<string>("ImageAnalyzer:OpenAISubscriptionKey") ?? throw new ArgumentNullException("OpenAISubscriptionKey");
storageconnstring = config.GetValue<string>("Storage:ConnectionString") ?? throw new ArgumentNullException("ConnectionString");
BlobServiceClient blobServiceClient = new BlobServiceClient(storageconnstring);
containerClient = blobServiceClient.GetBlobContainerClient(config.GetValue<string>("Storage:ContainerName"));
sasUri = containerClient.GenerateSasUri(Azure.Storage.Sas.BlobContainerSasPermissions.Read, DateTimeOffset.UtcNow.AddHours(1));
AOAIDeploymentName = config.GetValue<string>("ImageAnalyzer:DeploymentName") ?? throw new ArgumentNullException("DeploymentName");
// Obtiene una lista de blobs en el contenedor
gpt4Vision = config.GetValue<string>("ImageAnalyzer:GPT4Vision") ?? throw new ArgumentNullException("GPT4Vision");
// Obtain the blobs list in the container
blobs = containerClient.GetBlobs();
httpClient = clientFactory.CreateClient();
model = new ImageAnalyzerModel();
}

Expand All @@ -39,93 +43,73 @@ public IActionResult ImageAnalyzer()
}

[HttpPost]
public async Task<IActionResult> DenseCaptionImage(string image_url)
public async Task<IActionResult> DenseCaptionImage(string image_url, string prompt)
{
// 1. Get Image
model.Image = image_url;
// 2. Dense Captioning and OCR
var sb = new StringBuilder();
string GPT4V_ENDPOINT = AOAIendpoint + gpt4Vision;
image_url = image_url + sasUri.Query;

ImageAnalysisClient client = new(
new Uri(Visionendpoint),
new AzureKeyCredential(VisionsubscriptionKey));

ImageAnalysisResult result = client.Analyze(
new Uri(model.Image + sasUri.Query),
VisualFeatures.DenseCaptions | VisualFeatures.Read,
new ImageAnalysisOptions { GenderNeutralCaption = false, Language = "en" });

foreach (var caption in result.DenseCaptions.Values)
using (httpClient = new HttpClient())
{
sb.Append(caption.Text);
}

var captions = sb.ToString();

var ocr = "there is no text in the image";
if (result.Read.Blocks.Count > 0)
{
ocr = result.Read.Blocks[0].ToString();
}

// 4. Tags


// 5. Objects


// 6. Trancript of image


// 7. Describe Image GPT4
try
{
OpenAIClient aoaiClient;
if (string.IsNullOrEmpty(AOAIsubscriptionKey))
httpClient.DefaultRequestHeaders.Add("api-key", AOAIsubscriptionKey);
var payload = new
{
enhancements = new
{
ocr = new { enabled = true },
grounding = new { enabled = true }
},
messages = new object[]
{
new {
role = "system",
content = new object[] {
new {
type = "text",
text = "You are an AI assistant that helps people find information."
}
}
},
new {
role = "user",
content = new object[] {
new {
type = "image_url",
image_url = new {
url = image_url
}
},
new {
type = "text",
text = prompt
}
}
}
},
temperature = 0.7,
top_p = 0.95,
max_tokens = 800,
stream = false
};
var response = await httpClient.PostAsync(GPT4V_ENDPOINT, new StringContent(JsonConvert.SerializeObject(payload), Encoding.UTF8, "application/json"));


if (response.IsSuccessStatusCode)
{
aoaiClient = new OpenAIClient(
new Uri(AOAIendpoint),
new DefaultAzureCredential());
var responseData = JsonConvert.DeserializeObject<dynamic>(await response.Content.ReadAsStringAsync());

// Get the web pages from the response
var response_final = responseData!.choices[0];
string final = response_final.message.content;
model.Message = final;
model.Image = image_url;
}
else
{
aoaiClient = new OpenAIClient(
new Uri(AOAIendpoint),
new AzureKeyCredential(AOAIsubscriptionKey));
Console.WriteLine($"Error after GPT4V: {response.StatusCode}, {response.ReasonPhrase}");
}

// If streaming is not selected
Response<ChatCompletions> responseWithoutStream = await aoaiClient.GetChatCompletionsAsync(
new ChatCompletionsOptions()
{
DeploymentName = AOAIDeploymentName,
Messages =
{
new ChatRequestSystemMessage(@"The user will provide a list of descriptions of an image. I want you to create a unified and complete description of the image based of the list provided. Each suggested description is separated by a \ symbol. Also, it will provide the text detected in the image, try to associate the text detected (if any) with the rest of the captions of the image. If you are not sure, say to user something like 'MIGHT BE'. "),
new ChatRequestUserMessage($"Descriptions: {captions}. & OCR: {ocr}" ),
},
Temperature = (float)0.7,
MaxTokens = 1000,
NucleusSamplingFactor = (float)0.95,
FrequencyPenalty = 0,
PresencePenalty = 0,
});

ChatCompletions completions = responseWithoutStream.Value;
ChatChoice results_analisis = completions.Choices[0];
model.Message = results_analisis.Message.Content;
ViewBag.Message = results_analisis.Message.Content;
ViewBag.Image = model.Image + sasUri.Query;
model.Image = model.Image + sasUri.Query;
Console.WriteLine(ViewBag.Message);
Console.WriteLine(ViewBag.Image);
}
catch (RequestFailedException)
{
throw;
}

return Ok(model);
return View("ImageAnalyzer");
}

// Upload a file to my azure storage account
Expand All @@ -138,7 +122,12 @@ public async Task<IActionResult> UploadFile(IFormFile imageFile)
ViewBag.Message = "You must upload an image";
return View("ImageAnalyzer");
}

if (string.IsNullOrEmpty(HttpContext.Request.Form["text"]))
{
ViewBag.Message = "You must enter a prompt to evaluate";
return View("ImageAnalyzer", model);
}
model.Prompt = HttpContext.Request.Form["text"];
// Upload file to azure storage account
string url = imageFile.FileName.ToString();
Console.WriteLine(url);
Expand All @@ -156,7 +145,8 @@ public async Task<IActionResult> UploadFile(IFormFile imageFile)
}

// Call EvaluateImage with the url
await DenseCaptionImage(blobUrl.ToString());
Console.WriteLine(blobUrl.ToString());
await DenseCaptionImage(blobUrl.ToString(), model.Prompt!);
ViewBag.Waiting = null;

return Ok(model);
Expand Down
1 change: 1 addition & 0 deletions src/AIHub/Models/ImageAnalyzerModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ public class ImageAnalyzerModel
public string? Text { get; set; }
public string? Image { get; set; }
public string? Message { get; set; }
public string? Prompt { get; set; }

}
50 changes: 37 additions & 13 deletions src/AIHub/Views/ImageAnalyzer/ImageAnalyzer.cshtml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

<!-- Start Content-->
<div class="container-fluid">

<!-- start page title -->
<div class="row">
<div class="col-12">
Expand All @@ -28,22 +28,43 @@
<div class="card-body">
<h4 class="header-title">Image Analyzer with Azure OpenAI Services</h4>
<p class="text-muted font-14">
Optimize your visual content strategies using our service, which allows you to analyze your images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover patterns, gain meaningful insights, and improve your data-driven decision making with accurate and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision Services.
Optimize your visual content strategies using our service, which allows you to analyze your
images using the advanced artificial intelligence of GPT-4 and Azure Vision Services. Uncover
patterns, gain meaningful insights, and improve your data-driven decision making with accurate
and comprehensive data provided by our system. Analyze your image using GPT4 and Azure Vision
Services.
</p>

<div class="tab-content">
<div class="tab-pane show active" id="file-upload-preview">
<form asp-controller="ImageAnalyzer" asp-action="UploadFile" method="post" enctype="multipart/form-data" id="dropaiimage" class="dropzone" data-plugin="dropzone" data-previews-container="#file-previews" data-upload-preview-template="#uploadPreviewTemplate">
<div class="fallback">
<input name="file" type="file" id="imageFile" name="imageFile"/>
</div>
<form asp-controller="ImageAnalyzer" asp-action="UploadFile" method="post"
enctype="multipart/form-data" id="dropaiimage" class="dropzone" data-plugin="dropzone"
data-previews-container="#file-previews"
data-upload-preview-template="#uploadPreviewTemplate">

<div class="mb-3">
<label for="text" class="form-label">Prompt:</label>
<textarea class="form-control" id="text" name="text" maxlength="225"
rows="3">@(Model.Prompt ?? "Describe the image in detail")</textarea>
</div>
<div class="tab-content form-control">
<div class="tab-pane show active" id="file-upload-preview">

<div class="dz-message needsclick">
<div class="fallback">
<input type="file" id="dropaiimage" name="dropaiimage" />
</div>
<div class="dz-message needsclick">
<i class="h1 text-muted ri-upload-cloud-2-line"></i>
<h3>Drop files here or click to upload. </h3>
<span class="text-muted font-13"><strong>You just need to upload an image (.jpg, .png).</strong> </span>
<span class="text-muted font-13"><strong>You just need to upload an image (.jpg,
.png).</strong> </span>
</div>

<!-- Preview -->
<div class="dropzone-previews mt-3" id="file-previews"></div>

</div> <!-- end preview-->
</div> <!-- end tab-content-->
</form>

<!-- Preview -->
Expand Down Expand Up @@ -74,7 +95,7 @@
</div>
</div>

<!-- end row -->
<!-- end row -->
<!--- show results -->
<div class="row d-none" id="showresult">
<div class="col-lg-12">
Expand All @@ -85,17 +106,20 @@
</div>
<div class="col-md-8">
<div class="card-body">
<h2 class="header-title mt-0 mb-3">Text Analyze Result <span class="badge bg-success rounded-pill">Analysis Success</span></h2>
<h2 class="header-title mt-0 mb-3">Text Analyze Result <span
class="badge bg-success rounded-pill">Analysis Success</span></h2>
<p class="text-muted font-16 mb-3" id="show-message-result"></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- container -->

@section Scripts {
@{await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial");}
@{
await Html.RenderPartialAsync("_ImageAnalyzerScriptsPartial");
}
}
12 changes: 5 additions & 7 deletions src/AIHub/appsettings.template.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,32 @@
"BingKey": "<Bing Key>",
"OpenAIEndpoint": "<AOAI EndPoint>",
"OpenAISubscriptionKey": "<KEY>",
"DeploymentName": "DemoBuild"
"DeploymentName": "gpt-35-turbo"
},
"CallCenter": {
"OpenAIEndpoint": "<AOAI EndPoint>",
"OpenAISubscriptionKey": "<KEY>",
"DeploymentName": "DemoBuild"
"DeploymentName": "gpt-35-turbo"
},
"AudioTranscription": {
"SpeechLocation": "westeurope",
"SpeechSubscriptionKey": "<your speech key>",
"ContainerName": "audio-files"
},
"ImageAnalyzer": {
"VisionEndpoint": "<Vision EndPoint>",
"OCREndpoint": "<OCR EndPoint>",
"VisionSubscriptionKey": "<KEY>",
"OpenAIEndpoint": "<AOAI EndPoint>",
"GPT4Vision": "openai/deployments/<deploymentName>/extensions/chat/completions?api-version=2023-07-01-preview",
"OpenAISubscriptionKey": "KEY",
"ContainerName": "image-analyzer",
"DeploymentName": "DemoBuild"
"DeploymentName": "gpt-35-turbo"
},
"FormAnalyzer": {
"FormRecogEndpoint": "<Form Recog EndPoint>",
"FormRecogSubscriptionKey": "<KEY>",
"OpenAIEndpoint": "<AOAI EndPoint>",
"OpenAISubscriptionKey": "<KEY>",
"ContainerName": "form-analyzer",
"DeploymentName": "DemoBuild"
"DeploymentName": "gpt-35-turbo"
},
"DocumentComparison": {
"FormRecogEndpoint": "<Form Recog EndPoint>",
Expand Down

0 comments on commit 4c6e046

Please sign in to comment.