update of face recognizer to support multiple new faces add add them …

…to its database
tue-robotics · Apr 2, 2024 · e4240d4 · e4240d4
1 parent ba3fbec
commit e4240d4
Showing 1 changed file with 35 additions and 19 deletions.
diff --git a/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py b/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py
@@ -5,10 +5,12 @@
 import numpy as np
 import pickle
 
+
 class TrainedFace:
     """
     This class serves as a custom struct to store information of people we recognized
     """
+
     def __init__(self, label):
         """
         A custom struct to store the names and the embedded representations (tensors) of people
@@ -33,12 +35,14 @@ class FaceRecognizer:
     """
     This class handles the recognition using the Facenet model.
     """
+
     def __init__(self):
         """
         Constructor for the list which contains the TrainedFace structure
         """
         self._trained_faces = []
-        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device(
+            "cuda:0" if torch.cuda.is_available() else "cpu")
         rospy.loginfo(f"Running on device: {self.device}")
         self.mtcnn = MTCNN(
             keep_all=True,
@@ -50,15 +54,15 @@ def __init__(self):
             post_process=True,
             device=self.device,
         )
-        
+
     def face_detection(self, img: np.ndarray) -> List[Union[Tuple[float, float, float, float], None]]:
         """
         Returns the index of the trained face
 
         :param img: inpute image
         :return: the bounding boxes of coordinations of the faces it detects
         """
-   
+
         # Keep the landmarks for future use
         boxes, _, landmarks = self.mtcnn.detect(img, landmarks=True)
         return boxes
@@ -79,31 +83,37 @@ def _get_dists(self, embeddings: List[torch.Tensor]) -> Tuple[List[float], List[
         min_index_list_per_emb = []
         min_value_list_per_emb = []
 
-        for e2 in embeddings:
-            for e1 in self._trained_faces:
-                for e3 in e1.representations:
-                    dist_per_emb.append(abs(e3 - e2).norm().item())
+        # Calculate the L2 distance between the embedding and all the stored representations.
+        for idx,emb in enumerate(embeddings):
+            for face in self._trained_faces:
+                for rep in face.representations:
+                    dist_per_emb.append(abs(rep - emb).norm().item())
                 dist.append(dist_per_emb)
-                rospy.loginfo(f"{dist_per_emb} dist_per_emb")
+                rospy.loginfo(f"{dist_per_emb} dist_per_emb for embedded with index {idx}")
                 dist_per_emb = []
             dist_per_emb_final.append(dist)
             dist = []
 
         rospy.loginfo(f"{dist_per_emb_final} dist_per_emb_final")
-        for i in dist_per_emb_final:
-            min_of_emb = [min(j) for j in i]
+
+        # Calculate the minimum distance for each labeled embedding
+        # e.g min distance of all observation of label "Jake"
+        for dist in dist_per_emb_final:
+            min_of_emb = [min(j) for j in dist]
             rospy.loginfo(f"{min_of_emb} min_of_emb")
             min_of_emb_final.append(min_of_emb)
         rospy.loginfo(f"{min_of_emb_final} min_of_emb_final")
 
-        for idx in min_of_emb_final:
-            rospy.loginfo(f"{idx} idx")
-            min_index_list_per_emb.append(idx.index(min(idx)))
-            min_value_list_per_emb.append(min(idx))
+        # Iterate through the minimum distances of every label and find the corresponding index
+        for value in min_of_emb_final:
+            rospy.loginfo(f"{value} idx")
+            min_index_list_per_emb.append(value.index(min(value)))
+            min_value_list_per_emb.append(min(value))
             rospy.loginfo(f"{min_index_list_per_emb}, min_index_list_per_emb")
             rospy.loginfo(f"{min_value_list_per_emb}, min_index_list")
 
-        labelling = [self._trained_faces[i].get_label() for i in min_index_list_per_emb]
+        labelling = [self._trained_faces[i].get_label()
+                     for i in min_index_list_per_emb]
         rospy.loginfo(f"{labelling}, {min_value_list_per_emb}")
 
         return min_value_list_per_emb, labelling
@@ -120,12 +130,14 @@ def detection_recognition(self, img: np.ndarray, labels: List[str], train: bool)
         :return: the min distance(s) of the embedded vector compared with the database faces
         :return: the corresponding label(s)
         """
-        resnet = InceptionResnetV1(pretrained="vggface2").eval().to(self.device)
+        resnet = InceptionResnetV1(
+            pretrained="vggface2").eval().to(self.device)
 
         x_aligned = self.mtcnn(img)
         x_aligned = x_aligned.cuda()  # add this line
         embeddings = resnet(x_aligned).detach().cpu()
-        rospy.loginfo(f"{embeddings.size()}, {type(embeddings)}, embeddings size")
+        rospy.loginfo(
+            f"{embeddings.size()}, {type(embeddings)}, embeddings size")
 
         if not self._trained_faces:
             for nam, emb in enumerate(embeddings):
@@ -135,9 +147,13 @@ def detection_recognition(self, img: np.ndarray, labels: List[str], train: bool)
 
                 self._trained_faces[index].representations.append(emb)
 
-        # try:
+        # Calculate the L2 norm and check if the distance is bigger than 1 (face that we have not seen yet)
         dist, labelling = self._get_dists(embeddings)
-        # if dist > 1:
+        for idx, dis in enumerate(dist):
+            rospy.loginfo(f"distances are {dist} and labels are {labelling}")
+            if dis > 1:
+                labelling[idx] = labels[idx] # you can always condider the last label or something similar
+                rospy.loginfo(f"BIG DISTANCE SO...Label: {self._trained_faces[-1].get_label()}, Representations: {len(self._trained_faces[-1].get_representations())}")
         # in this case we should ask for a label
         rospy.loginfo(f"{labels[0]}, {labelling}, label[0],labelling")