Skip to content

Commit

Permalink
vgg normalization layer bug for gpu users
Browse files Browse the repository at this point in the history
  • Loading branch information
serengil committed Jan 23, 2024
1 parent 3265be2 commit 5ffa7bf
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 141 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[![PyPI Downloads](https://static.pepy.tech/personalized-badge/deepface?period=total&units=international_system&left_color=grey&right_color=blue&left_text=pypi%20downloads)](https://pepy.tech/project/deepface)
[![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/deepface?color=green&label=conda%20downloads)](https://anaconda.org/conda-forge/deepface)
[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat)](https://github.com/serengil/deepface/stargazers)
[![Stars](https://img.shields.io/github/stars/serengil/deepface?color=yellow&style=flat&label=%E2%AD%90%20stars)](https://github.com/serengil/deepface/stargazers)
[![License](http://img.shields.io/:license-MIT-green.svg?style=flat)](https://github.com/serengil/deepface/blob/master/LICENSE)
[![Tests](https://github.com/serengil/deepface/actions/workflows/tests.yml/badge.svg)](https://github.com/serengil/deepface/actions/workflows/tests.yml)

Expand Down
188 changes: 64 additions & 124 deletions deepface/DeepFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def build_model(model_name: str) -> Any:
VGG-Face, Facenet, OpenFace, DeepFace, DeepID for face recognition
Age, Gender, Emotion, Race for facial attributes
Returns:
built model with corresponding class
built_model
"""
return modeling.build_model(model_name=model_name)

Expand All @@ -62,57 +62,37 @@ def verify(
) -> Dict[str, Any]:
"""
Verify if an image pair represents the same person or different persons.
The verification function converts facial images to vectors and calculates the similarity
between those vectors. Vectors of images of the same person should exhibit higher similarity
(or lower distance) than vectors of images of different persons.
Args:
img1_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
img2_path (str or np.ndarray): Path to the second image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2' (default is cosine).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
Returns:
result (dict): A dictionary containing verification results.
result (dict): A dictionary containing verification results with following keys.
- 'verified' (bool): Indicates whether the images represent the same person (True)
or different persons (False).
- 'distance' (float): The distance measure between the face vectors.
A lower distance indicates higher similarity.
- 'max_threshold_to_verify' (float): The maximum threshold used for verification.
If the distance is below this threshold, the images are considered a match.
- 'model' (str): The chosen face recognition model.
- 'similarity_metric' (str): The chosen similarity metric for measuring distances.
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images.
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int}
Region of interest for the first image.
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int}
Region of interest for the second image.
- 'time' (float): Time taken for the verification process in seconds.
"""

Expand All @@ -138,77 +118,59 @@ def analyze(
) -> List[Dict[str, Any]]:
"""
Analyze facial attributes such as age, gender, emotion, and race in the provided image.
Args:
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
or a base64 encoded image. If the source image contains multiple faces, the result will
include information for each detected face.
actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race').
You can exclude some of these attributes from the analysis if needed.
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2' (default is cosine).
align (boolean): Perform alignment based on the eye positions (default is True).
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents
the analysis results for a detected face.
Each dictionary in the list contains the following keys:
- 'region' (dict): Represents the rectangular region of the detected face in the image.
- 'x': x-coordinate of the top-left corner of the face.
- 'y': y-coordinate of the top-left corner of the face.
- 'w': Width of the detected face region.
- 'h': Height of the detected face region.
- 'age' (float): Estimated age of the detected face.
- 'face_confidence' (float): Confidence score for the detected face.
Indicates the reliability of the face detection.
- 'dominant_gender' (str): The dominant gender in the detected face.
Either "Man" or "Woman."
- 'gender' (dict): Confidence scores for each gender category.
- 'Man': Confidence score for the male gender.
- 'Woman': Confidence score for the female gender.
- 'dominant_emotion' (str): The dominant emotion in the detected face.
Possible values include "sad," "angry," "surprise," "fear," "happy,"
"disgust," and "neutral."
- 'emotion' (dict): Confidence scores for each emotion category.
- 'sad': Confidence score for sadness.
- 'angry': Confidence score for anger.
- 'surprise': Confidence score for surprise.
- 'fear': Confidence score for fear.
- 'happy': Confidence score for happiness.
- 'disgust': Confidence score for disgust.
- 'neutral': Confidence score for neutrality.
- 'dominant_race' (str): The dominant race in the detected face.
Possible values include "indian," "asian," "latino hispanic,"
"black," "middle eastern," and "white."
- 'race' (dict): Confidence scores for each race category.
- 'indian': Confidence score for Indian ethnicity.
- 'asian': Confidence score for Asian ethnicity.
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
- 'black': Confidence score for Black ethnicity.
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
- 'white': Confidence score for White ethnicity.
the analysis results for a detected face. Each dictionary in the list contains the
following keys:
- 'region' (dict): Represents the rectangular region of the detected face in the image.
- 'x': x-coordinate of the top-left corner of the face.
- 'y': y-coordinate of the top-left corner of the face.
- 'w': Width of the detected face region.
- 'h': Height of the detected face region.
- 'age' (float): Estimated age of the detected face.
- 'face_confidence' (float): Confidence score for the detected face.
Indicates the reliability of the face detection.
- 'dominant_gender' (str): The dominant gender in the detected face.
Either "Man" or "Woman."
- 'gender' (dict): Confidence scores for each gender category.
- 'Man': Confidence score for the male gender.
- 'Woman': Confidence score for the female gender.
- 'dominant_emotion' (str): The dominant emotion in the detected face.
Possible values include "sad," "angry," "surprise," "fear," "happy,"
"disgust," and "neutral."
- 'emotion' (dict): Confidence scores for each emotion category.
- 'sad': Confidence score for sadness.
- 'angry': Confidence score for anger.
- 'surprise': Confidence score for surprise.
- 'fear': Confidence score for fear.
- 'happy': Confidence score for happiness.
- 'disgust': Confidence score for disgust.
- 'neutral': Confidence score for neutrality.
- 'dominant_race' (str): The dominant race in the detected face.
Possible values include "indian," "asian," "latino hispanic,"
"black," "middle eastern," and "white."
- 'race' (dict): Confidence scores for each race category.
- 'indian': Confidence score for Indian ethnicity.
- 'asian': Confidence score for Asian ethnicity.
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
- 'black': Confidence score for Black ethnicity.
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
- 'white': Confidence score for White ethnicity.
"""
return demography.analyze(
img_path=img_path,
Expand All @@ -233,46 +195,36 @@ def find(
) -> List[pd.DataFrame]:
"""
Identify individuals in a database
Args:
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
or a base64 encoded image. If the source image contains multiple faces, the result will
include information for each detected face.
db_path (string): Path to the folder containing image files. All detected faces
in the database will be considered in the decision-making process.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2'.
'euclidean', 'euclidean_l2' (default is cosine).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Default is True. Set to False to avoid the exception for low-resolution images.
Set to False to avoid the exception for low-resolution images (default is True).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
align (boolean): Perform alignment based on the eye positions.
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
align (boolean): Perform alignment based on the eye positions (default is True).
normalization (string): Normalize the input image before feeding it to the model.
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
silent (boolean): Suppress or allow some log messages for a quieter analysis process.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image.
The DataFrame columns include:
- 'identity': Identity label of the detected individual.
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
target face in the database.
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
detected face in the source image.
- '{model_name}_{distance_metric}': Similarity score between the faces based on the
specified model and distance metric
- 'identity': Identity label of the detected individual.
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
target face in the database.
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
detected face in the source image.
- '{model_name}_{distance_metric}': Similarity score between the faces based on the
specified model and distance metric
"""
return recognition.find(
img_path=img_path,
Expand Down Expand Up @@ -302,25 +254,20 @@ def represent(
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
or a base64 encoded image. If the source image contains multiple faces, the result will
include information for each detected face.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face.).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Default is True. Set to False to avoid the exception for low-resolution images.
Default is True. Set to False to avoid the exception for low-resolution images
(default is True).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
align (boolean): Perform alignment based on the eye positions.
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
align (boolean): Perform alignment based on the eye positions (default is True).
normalization (string): Normalize the input image before feeding it to the model.
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
(default is base).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, each containing the
following fields:
- embedding (np.array): Multidimensional vector representing facial features.
The number of dimensions varies based on the reference model
(e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
Expand Down Expand Up @@ -359,13 +306,13 @@ def stream(
in the database will be considered in the decision-making process.
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace
OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8'.
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
'euclidean', 'euclidean_l2'.
'euclidean', 'euclidean_l2' (default is cosine).
enable_face_analysis (bool): Flag to enable face analysis (default is True).
Expand Down Expand Up @@ -408,22 +355,15 @@ def extract_faces(
Args:
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image.
added to resize the image (default is (224, 224)).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Default is True. Set to False to avoid the exception for low-resolution images.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
grayscale (boolean): Flag to convert the image to grayscale before
processing (default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
- "face" (np.ndarray): The detected face as a NumPy array.
Expand Down
21 changes: 11 additions & 10 deletions deepface/basemodels/VGGFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import os
import gdown
import numpy as np
from deepface.commons import functions
from deepface.commons.logger import Logger
from deepface.commons import functions, distance
from deepface.models.FacialRecognition import FacialRecognition
from deepface.commons.logger import Logger

logger = Logger(module="basemodels.VGGFace")

Expand All @@ -20,9 +20,7 @@
Flatten,
Dropout,
Activation,
Lambda,
)
from keras import backend as K
else:
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (
Expand All @@ -32,9 +30,7 @@
Flatten,
Dropout,
Activation,
Lambda,
)
from tensorflow.keras import backend as K

# ---------------------------------------

Expand All @@ -58,7 +54,11 @@ def find_embeddings(self, img: np.ndarray) -> List[float]:
"""
# model.predict causes memory issue when it is called in a for loop
# embedding = model.predict(img, verbose=0)[0].tolist()
return self.model(img, training=False).numpy()[0].tolist()
# having normalization layer in descriptor troubles for some gpu users (e.g. issue 957, 966)
# instead we are now calculating it with traditional way not with keras backend
embedding = self.model(img, training=False).numpy()[0].tolist()
embedding = distance.l2_normalize(embedding)
return embedding.tolist()


def base_model() -> Sequential:
Expand Down Expand Up @@ -144,9 +144,10 @@ def load_model(
# as described here: https://github.com/serengil/deepface/issues/944
base_model_output = Sequential()
base_model_output = Flatten()(model.layers[-5].output)
base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
base_model_output
)
# keras backend's l2 normalization layer troubles some gpu users (e.g. issue 957, 966)
# base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
# base_model_output
# )
vgg_face_descriptor = Model(inputs=model.input, outputs=base_model_output)

return vgg_face_descriptor
Loading

0 comments on commit 5ffa7bf

Please sign in to comment.