diff --git a/README.md b/README.md index a4549c3f0..d9e0ef905 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![PyPI Downloads](https://static.pepy.tech/personalized-badge/deepface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=pypi%20downloads)](https://pepy.tech/project/deepface) [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/deepface?color=green&label=conda%20downloads)](https://anaconda.org/conda-forge/deepface) -[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat)](https://github.com/serengil/deepface/stargazers) +[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat&label=%E2%AD%90%20stars)](https://github.com/serengil/deepface/stargazers) [![License](http://img.shields.io/:license-MIT-green.svg?style=flat)](https://github.com/serengil/deepface/blob/master/LICENSE) [![Tests](https://github.com/serengil/deepface/actions/workflows/tests.yml/badge.svg)](https://github.com/serengil/deepface/actions/workflows/tests.yml) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 8ab9eb5bd..5722db7db 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -45,7 +45,7 @@ def build_model(model_name: str) -> Any: VGG-Face, Facenet, OpenFace, DeepFace, DeepID for face recognition Age, Gender, Emotion, Race for facial attributes Returns: - built model with corresponding class + built_model """ return modeling.build_model(model_name=model_name) @@ -62,57 +62,37 @@ def verify( ) -> Dict[str, Any]: """ Verify if an image pair represents the same person or different persons. - - The verification function converts facial images to vectors and calculates the similarity - between those vectors. Vectors of images of the same person should exhibit higher similarity - (or lower distance) than vectors of images of different persons. - Args: img1_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. - img2_path (str or np.ndarray): Path to the second image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. - model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). - detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv) - distance_metric (string): Metric for measuring similarity. Options: 'cosine', 'euclidean', 'euclidean_l2' (default is cosine). - enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). - align (bool): Flag to enable face alignment (default is True). - normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) - Returns: - result (dict): A dictionary containing verification results. - + result (dict): A dictionary containing verification results with following keys. - 'verified' (bool): Indicates whether the images represent the same person (True) or different persons (False). - - 'distance' (float): The distance measure between the face vectors. A lower distance indicates higher similarity. - - 'max_threshold_to_verify' (float): The maximum threshold used for verification. If the distance is below this threshold, the images are considered a match. - - 'model' (str): The chosen face recognition model. - - 'similarity_metric' (str): The chosen similarity metric for measuring distances. - - 'facial_areas' (dict): Rectangular regions of interest for faces in both images. - 'img1': {'x': int, 'y': int, 'w': int, 'h': int} Region of interest for the first image. - 'img2': {'x': int, 'y': int, 'w': int, 'h': int} Region of interest for the second image. - - 'time' (float): Time taken for the verification process in seconds. """ @@ -138,77 +118,59 @@ def analyze( ) -> List[Dict[str, Any]]: """ Analyze facial attributes such as age, gender, emotion, and race in the provided image. - Args: img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. - actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race'). You can exclude some of these attributes from the analysis if needed. - enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). - detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). - distance_metric (string): Metric for measuring similarity. Options: 'cosine', 'euclidean', 'euclidean_l2' (default is cosine). - align (boolean): Perform alignment based on the eye positions (default is True). - silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). - Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents - the analysis results for a detected face. - - Each dictionary in the list contains the following keys: - - - 'region' (dict): Represents the rectangular region of the detected face in the image. - - 'x': x-coordinate of the top-left corner of the face. - - 'y': y-coordinate of the top-left corner of the face. - - 'w': Width of the detected face region. - - 'h': Height of the detected face region. - - - 'age' (float): Estimated age of the detected face. - - - 'face_confidence' (float): Confidence score for the detected face. - Indicates the reliability of the face detection. - - - 'dominant_gender' (str): The dominant gender in the detected face. - Either "Man" or "Woman." - - - 'gender' (dict): Confidence scores for each gender category. - - 'Man': Confidence score for the male gender. - - 'Woman': Confidence score for the female gender. - - - 'dominant_emotion' (str): The dominant emotion in the detected face. - Possible values include "sad," "angry," "surprise," "fear," "happy," - "disgust," and "neutral." - - - 'emotion' (dict): Confidence scores for each emotion category. - - 'sad': Confidence score for sadness. - - 'angry': Confidence score for anger. - - 'surprise': Confidence score for surprise. - - 'fear': Confidence score for fear. - - 'happy': Confidence score for happiness. - - 'disgust': Confidence score for disgust. - - 'neutral': Confidence score for neutrality. - - - 'dominant_race' (str): The dominant race in the detected face. - Possible values include "indian," "asian," "latino hispanic," - "black," "middle eastern," and "white." - - - 'race' (dict): Confidence scores for each race category. - - 'indian': Confidence score for Indian ethnicity. - - 'asian': Confidence score for Asian ethnicity. - - 'latino hispanic': Confidence score for Latino/Hispanic ethnicity. - - 'black': Confidence score for Black ethnicity. - - 'middle eastern': Confidence score for Middle Eastern ethnicity. - - 'white': Confidence score for White ethnicity. + the analysis results for a detected face. Each dictionary in the list contains the + following keys: + - 'region' (dict): Represents the rectangular region of the detected face in the image. + - 'x': x-coordinate of the top-left corner of the face. + - 'y': y-coordinate of the top-left corner of the face. + - 'w': Width of the detected face region. + - 'h': Height of the detected face region. + - 'age' (float): Estimated age of the detected face. + - 'face_confidence' (float): Confidence score for the detected face. + Indicates the reliability of the face detection. + - 'dominant_gender' (str): The dominant gender in the detected face. + Either "Man" or "Woman." + - 'gender' (dict): Confidence scores for each gender category. + - 'Man': Confidence score for the male gender. + - 'Woman': Confidence score for the female gender. + - 'dominant_emotion' (str): The dominant emotion in the detected face. + Possible values include "sad," "angry," "surprise," "fear," "happy," + "disgust," and "neutral." + - 'emotion' (dict): Confidence scores for each emotion category. + - 'sad': Confidence score for sadness. + - 'angry': Confidence score for anger. + - 'surprise': Confidence score for surprise. + - 'fear': Confidence score for fear. + - 'happy': Confidence score for happiness. + - 'disgust': Confidence score for disgust. + - 'neutral': Confidence score for neutrality. + - 'dominant_race' (str): The dominant race in the detected face. + Possible values include "indian," "asian," "latino hispanic," + "black," "middle eastern," and "white." + - 'race' (dict): Confidence scores for each race category. + - 'indian': Confidence score for Indian ethnicity. + - 'asian': Confidence score for Asian ethnicity. + - 'latino hispanic': Confidence score for Latino/Hispanic ethnicity. + - 'black': Confidence score for Black ethnicity. + - 'middle eastern': Confidence score for Middle Eastern ethnicity. + - 'white': Confidence score for White ethnicity. """ return demography.analyze( img_path=img_path, @@ -233,46 +195,36 @@ def find( ) -> List[pd.DataFrame]: """ Identify individuals in a database - Args: img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. - db_path (string): Path to the folder containing image files. All detected faces in the database will be considered in the decision-making process. - model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, - OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace - + OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). distance_metric (string): Metric for measuring similarity. Options: 'cosine', - 'euclidean', 'euclidean_l2'. - + 'euclidean', 'euclidean_l2' (default is cosine). enforce_detection (boolean): If no face is detected in an image, raise an exception. - Default is True. Set to False to avoid the exception for low-resolution images. - + Set to False to avoid the exception for low-resolution images (default is True). detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', - 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'. - - align (boolean): Perform alignment based on the eye positions. - + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + align (boolean): Perform alignment based on the eye positions (default is True). normalization (string): Normalize the input image before feeding it to the model. - Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace - - silent (boolean): Suppress or allow some log messages for a quieter analysis process. - + Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base). + silent (boolean): Suppress or allow some log messages for a quieter analysis process + (default is False). Returns: results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds to the identity information for an individual detected in the source image. The DataFrame columns include: - - - 'identity': Identity label of the detected individual. - - 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the - target face in the database. - - 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the - detected face in the source image. - - '{model_name}_{distance_metric}': Similarity score between the faces based on the - specified model and distance metric + - 'identity': Identity label of the detected individual. + - 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the + target face in the database. + - 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the + detected face in the source image. + - '{model_name}_{distance_metric}': Similarity score between the faces based on the + specified model and distance metric """ return recognition.find( img_path=img_path, @@ -302,25 +254,20 @@ def represent( img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. - model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, - OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace - + OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face.). enforce_detection (boolean): If no face is detected in an image, raise an exception. - Default is True. Set to False to avoid the exception for low-resolution images. - + Default is True. Set to False to avoid the exception for low-resolution images + (default is True). detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', - 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'. - - align (boolean): Perform alignment based on the eye positions. - + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + align (boolean): Perform alignment based on the eye positions (default is True). normalization (string): Normalize the input image before feeding it to the model. Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace - + (default is base). Returns: results (List[Dict[str, Any]]): A list of dictionaries, each containing the following fields: - - embedding (np.array): Multidimensional vector representing facial features. The number of dimensions varies based on the reference model (e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions). @@ -359,13 +306,13 @@ def stream( in the database will be considered in the decision-making process. model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, - OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace + OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', - 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'. + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). distance_metric (string): Metric for measuring similarity. Options: 'cosine', - 'euclidean', 'euclidean_l2'. + 'euclidean', 'euclidean_l2' (default is cosine). enable_face_analysis (bool): Flag to enable face analysis (default is True). @@ -408,22 +355,15 @@ def extract_faces( Args: img_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. - target_size (tuple): final shape of facial image. black pixels will be - added to resize the image. - + added to resize the image (default is (224, 224)). detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv) - enforce_detection (boolean): If no face is detected in an image, raise an exception. - Default is True. Set to False to avoid the exception for low-resolution images. - + Set to False to avoid the exception for low-resolution images (default is True). align (bool): Flag to enable face alignment (default is True). - grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). - - Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains: - "face" (np.ndarray): The detected face as a NumPy array. diff --git a/deepface/basemodels/VGGFace.py b/deepface/basemodels/VGGFace.py index 80d6d8765..7f204f437 100644 --- a/deepface/basemodels/VGGFace.py +++ b/deepface/basemodels/VGGFace.py @@ -2,9 +2,9 @@ import os import gdown import numpy as np -from deepface.commons import functions -from deepface.commons.logger import Logger +from deepface.commons import functions, distance from deepface.models.FacialRecognition import FacialRecognition +from deepface.commons.logger import Logger logger = Logger(module="basemodels.VGGFace") @@ -20,9 +20,7 @@ Flatten, Dropout, Activation, - Lambda, ) - from keras import backend as K else: from tensorflow.keras.models import Model, Sequential from tensorflow.keras.layers import ( @@ -32,9 +30,7 @@ Flatten, Dropout, Activation, - Lambda, ) - from tensorflow.keras import backend as K # --------------------------------------- @@ -58,7 +54,11 @@ def find_embeddings(self, img: np.ndarray) -> List[float]: """ # model.predict causes memory issue when it is called in a for loop # embedding = model.predict(img, verbose=0)[0].tolist() - return self.model(img, training=False).numpy()[0].tolist() + # having normalization layer in descriptor troubles for some gpu users (e.g. issue 957, 966) + # instead we are now calculating it with traditional way not with keras backend + embedding = self.model(img, training=False).numpy()[0].tolist() + embedding = distance.l2_normalize(embedding) + return embedding.tolist() def base_model() -> Sequential: @@ -144,9 +144,10 @@ def load_model( # as described here: https://github.com/serengil/deepface/issues/944 base_model_output = Sequential() base_model_output = Flatten()(model.layers[-5].output) - base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")( - base_model_output - ) + # keras backend's l2 normalization layer troubles some gpu users (e.g. issue 957, 966) + # base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")( + # base_model_output + # ) vgg_face_descriptor = Model(inputs=model.input, outputs=base_model_output) return vgg_face_descriptor diff --git a/deepface/commons/distance.py b/deepface/commons/distance.py index 174473686..6048b9e40 100644 --- a/deepface/commons/distance.py +++ b/deepface/commons/distance.py @@ -32,7 +32,9 @@ def findEuclideanDistance( return euclidean_distance -def l2_normalize(x: np.ndarray) -> np.ndarray: +def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray: + if isinstance(x, list): + x = np.array(x) return x / np.sqrt(np.sum(np.multiply(x, x))) diff --git a/tests/test_find.py b/tests/test_find.py index e33b31841..4a1007e55 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -6,6 +6,8 @@ logger = Logger("tests/test_find.py") +threshold = distance.findThreshold(model_name="VGG-Face", distance_metric="cosine") + def test_find_with_exact_path(): img_path = "dataset/img1.jpg" @@ -19,7 +21,7 @@ def test_find_with_exact_path(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] == 0 + assert identity_df["VGG-Face_cosine"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head()) @@ -40,7 +42,7 @@ def test_find_with_array_input(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] == 0 + assert identity_df["VGG-Face_cosine"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head()) @@ -63,9 +65,7 @@ def test_find_with_extracted_faces(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] < ( - distance.findThreshold(model_name="VGG-Face", distance_metric="cosine") - ) + assert identity_df["VGG-Face_cosine"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head())