diff --git a/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py b/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py index 6684b3ff..e420f3b0 100644 --- a/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py +++ b/image_recognition_face_recognition/src/image_recognition_face_recognition/face_recognizer.py @@ -5,10 +5,12 @@ import numpy as np import pickle + class TrainedFace: """ This class serves as a custom struct to store information of people we recognized """ + def __init__(self, label): """ A custom struct to store the names and the embedded representations (tensors) of people @@ -33,12 +35,14 @@ class FaceRecognizer: """ This class handles the recognition using the Facenet model. """ + def __init__(self): """ Constructor for the list which contains the TrainedFace structure """ self._trained_faces = [] - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.device = torch.device( + "cuda:0" if torch.cuda.is_available() else "cpu") rospy.loginfo(f"Running on device: {self.device}") self.mtcnn = MTCNN( keep_all=True, @@ -50,7 +54,7 @@ def __init__(self): post_process=True, device=self.device, ) - + def face_detection(self, img: np.ndarray) -> List[Union[Tuple[float, float, float, float], None]]: """ Returns the index of the trained face @@ -58,7 +62,7 @@ def face_detection(self, img: np.ndarray) -> List[Union[Tuple[float, float, floa :param img: inpute image :return: the bounding boxes of coordinations of the faces it detects """ - + # Keep the landmarks for future use boxes, _, landmarks = self.mtcnn.detect(img, landmarks=True) return boxes @@ -79,31 +83,37 @@ def _get_dists(self, embeddings: List[torch.Tensor]) -> Tuple[List[float], List[ min_index_list_per_emb = [] min_value_list_per_emb = [] - for e2 in embeddings: - for e1 in self._trained_faces: - for e3 in e1.representations: - dist_per_emb.append(abs(e3 - e2).norm().item()) + # Calculate the L2 distance between the embedding and all the stored representations. + for idx,emb in enumerate(embeddings): + for face in self._trained_faces: + for rep in face.representations: + dist_per_emb.append(abs(rep - emb).norm().item()) dist.append(dist_per_emb) - rospy.loginfo(f"{dist_per_emb} dist_per_emb") + rospy.loginfo(f"{dist_per_emb} dist_per_emb for embedded with index {idx}") dist_per_emb = [] dist_per_emb_final.append(dist) dist = [] rospy.loginfo(f"{dist_per_emb_final} dist_per_emb_final") - for i in dist_per_emb_final: - min_of_emb = [min(j) for j in i] + + # Calculate the minimum distance for each labeled embedding + # e.g min distance of all observation of label "Jake" + for dist in dist_per_emb_final: + min_of_emb = [min(j) for j in dist] rospy.loginfo(f"{min_of_emb} min_of_emb") min_of_emb_final.append(min_of_emb) rospy.loginfo(f"{min_of_emb_final} min_of_emb_final") - for idx in min_of_emb_final: - rospy.loginfo(f"{idx} idx") - min_index_list_per_emb.append(idx.index(min(idx))) - min_value_list_per_emb.append(min(idx)) + # Iterate through the minimum distances of every label and find the corresponding index + for value in min_of_emb_final: + rospy.loginfo(f"{value} idx") + min_index_list_per_emb.append(value.index(min(value))) + min_value_list_per_emb.append(min(value)) rospy.loginfo(f"{min_index_list_per_emb}, min_index_list_per_emb") rospy.loginfo(f"{min_value_list_per_emb}, min_index_list") - labelling = [self._trained_faces[i].get_label() for i in min_index_list_per_emb] + labelling = [self._trained_faces[i].get_label() + for i in min_index_list_per_emb] rospy.loginfo(f"{labelling}, {min_value_list_per_emb}") return min_value_list_per_emb, labelling @@ -120,12 +130,14 @@ def detection_recognition(self, img: np.ndarray, labels: List[str], train: bool) :return: the min distance(s) of the embedded vector compared with the database faces :return: the corresponding label(s) """ - resnet = InceptionResnetV1(pretrained="vggface2").eval().to(self.device) + resnet = InceptionResnetV1( + pretrained="vggface2").eval().to(self.device) x_aligned = self.mtcnn(img) x_aligned = x_aligned.cuda() # add this line embeddings = resnet(x_aligned).detach().cpu() - rospy.loginfo(f"{embeddings.size()}, {type(embeddings)}, embeddings size") + rospy.loginfo( + f"{embeddings.size()}, {type(embeddings)}, embeddings size") if not self._trained_faces: for nam, emb in enumerate(embeddings): @@ -135,9 +147,13 @@ def detection_recognition(self, img: np.ndarray, labels: List[str], train: bool) self._trained_faces[index].representations.append(emb) - # try: + # Calculate the L2 norm and check if the distance is bigger than 1 (face that we have not seen yet) dist, labelling = self._get_dists(embeddings) - # if dist > 1: + for idx, dis in enumerate(dist): + rospy.loginfo(f"distances are {dist} and labels are {labelling}") + if dis > 1: + labelling[idx] = labels[idx] # you can always condider the last label or something similar + rospy.loginfo(f"BIG DISTANCE SO...Label: {self._trained_faces[-1].get_label()}, Representations: {len(self._trained_faces[-1].get_representations())}") # in this case we should ask for a label rospy.loginfo(f"{labels[0]}, {labelling}, label[0],labelling")