From 41419af63209bde344b00f51a333e805d32c9a02 Mon Sep 17 00:00:00 2001
From: neosouwchuan <55700126+neosouwchuan@users.noreply.github.com>
Date: Sun, 2 Jun 2024 22:25:28 +0000
Subject: [PATCH] Change VLM to use bigger train set

---
 vlm/src/VLMManager.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vlm/src/VLMManager.py b/vlm/src/VLMManager.py
index 117f171..21c0255 100644
--- a/vlm/src/VLMManager.py
+++ b/vlm/src/VLMManager.py
@@ -26,7 +26,7 @@ def __init__(self):
         print([f for f in os.listdir('.') if os.path.isfile(f)])
         self.clipmodel= torch.load(path.join(path.dirname(path.abspath(__file__)), "clip_ft_2.pt"))
         self.objects = ["cargo aircraft","light aircraft","commercial aircraft","drone","missile","helicopter","fighter jet","fighter plane"]
-        self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "yoloworldbest2.pt")).to(self.device)
+        self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "800allbest.pt")).to(self.device)
         for i in self.clipmodel.parameters():
             i.requires_grad=False
         for i in self.model.parameters():
@@ -51,7 +51,7 @@ def identify(self, imagebyte: bytes, caption: str):
         tokenizedtext = clip.tokenize([caption]).to(self.device)
         clipprob = []
         maxscore = 0
-        for chosenindex in possible:
+        for chosenindex in range(len(bboxlist)):
             bbox = bboxlist[chosenindex]
             bbox[0]*=1520
             bbox[1]*=870
@@ -59,10 +59,10 @@ def identify(self, imagebyte: bytes, caption: str):
             bbox[3]*=870
             deltax = bbox[2]-bbox[0]
             deltay = bbox[3]-bbox[1]
-            bbox[0]-=deltax/2
-            bbox[1]-=deltay/2
-            bbox[2]-=deltax/2
-            bbox[3]-=deltay/2
+            # bbox[0]-=deltax/2
+            # bbox[1]-=deltay/2
+            # bbox[2]-=deltax/2
+            # bbox[3]-=deltay/2
             croppedimage = inputimage.crop(bbox)
             croppedimage = self.clippreprocess(croppedimage).unsqueeze(0).to(self.device)
             logits_per_image, logits_per_text = self.clipmodel(croppedimage, tokenizedtext)