From 41419af63209bde344b00f51a333e805d32c9a02 Mon Sep 17 00:00:00 2001 From: neosouwchuan <55700126+neosouwchuan@users.noreply.github.com> Date: Sun, 2 Jun 2024 22:25:28 +0000 Subject: [PATCH] Change VLM to use bigger train set --- vlm/src/VLMManager.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vlm/src/VLMManager.py b/vlm/src/VLMManager.py index 117f171..21c0255 100644 --- a/vlm/src/VLMManager.py +++ b/vlm/src/VLMManager.py @@ -26,7 +26,7 @@ def __init__(self): print([f for f in os.listdir('.') if os.path.isfile(f)]) self.clipmodel= torch.load(path.join(path.dirname(path.abspath(__file__)), "clip_ft_2.pt")) self.objects = ["cargo aircraft","light aircraft","commercial aircraft","drone","missile","helicopter","fighter jet","fighter plane"] - self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "yoloworldbest2.pt")).to(self.device) + self.model = YOLOWorld(path.join(path.dirname(path.abspath(__file__)), "800allbest.pt")).to(self.device) for i in self.clipmodel.parameters(): i.requires_grad=False for i in self.model.parameters(): @@ -51,7 +51,7 @@ def identify(self, imagebyte: bytes, caption: str): tokenizedtext = clip.tokenize([caption]).to(self.device) clipprob = [] maxscore = 0 - for chosenindex in possible: + for chosenindex in range(len(bboxlist)): bbox = bboxlist[chosenindex] bbox[0]*=1520 bbox[1]*=870 @@ -59,10 +59,10 @@ def identify(self, imagebyte: bytes, caption: str): bbox[3]*=870 deltax = bbox[2]-bbox[0] deltay = bbox[3]-bbox[1] - bbox[0]-=deltax/2 - bbox[1]-=deltay/2 - bbox[2]-=deltax/2 - bbox[3]-=deltay/2 + # bbox[0]-=deltax/2 + # bbox[1]-=deltay/2 + # bbox[2]-=deltax/2 + # bbox[3]-=deltay/2 croppedimage = inputimage.crop(bbox) croppedimage = self.clippreprocess(croppedimage).unsqueeze(0).to(self.device) logits_per_image, logits_per_text = self.clipmodel(croppedimage, tokenizedtext)