person_detector.py

from dataclasses import dataclass
from os import path
from random import randint
from typing import Optional, Tuple
from ultralytics import YOLO
import cv2
import mediapipe as mp
import numpy as np
import time


@dataclass
class DetectionConfig:
    """Configuration for person detection"""
    confidence_threshold: float = 0.5
    model_path: str = "yolov8n.pt"
    person_class_id: int = 0

class VideoSource:
    """Handles video capture operations"""
    def __init__(self, camera_id: int = 0):
        self.capture = cv2.VideoCapture(camera_id)
        if not self.capture.isOpened():
            raise RuntimeError("Could not open video capture device")

    def read_frame(self) -> Optional[np.ndarray]:
        """Read a frame from the video source"""
        success, frame = self.capture.read()
        return frame if success else None

    def release(self):
        """Release the video capture device"""
        self.capture.release()

class PersonDetector:
    """Handles person detection using YOLOv8"""
    def __init__(self, config: DetectionConfig):
        self.config = config
        self.model = YOLO(config.model_path)

    def detect(self, frame: np.ndarray) -> list:
        """Detect persons in the frame"""
        results = self.model(frame, verbose=False)[0]
        return [
            detection for detection in results.boxes.data
            if (int(detection[5]) == self.config.person_class_id and 
                float(detection[4]) >= self.config.confidence_threshold)
        ]

@dataclass
class FaceLandmarkDetector:
    """
    A class to handle face landmark detection and head pose estimation using MediaPipe.
    Implements context manager pattern for clean resource management.
    """
    min_detection_confidence: float = 0.3
    min_tracking_confidence: float = 0.5
    # Thresholds for head pose (in degrees)
    yaw_threshold: float = 20.0
    pitch_threshold: float = 20.0

    def __init__(self):
        self.cap: Optional[cv2.VideoCapture] = None
        self.mp_face_mesh = mp.solutions.face_mesh
        self.mp_drawing = mp.solutions.drawing_utils
        self.drawing_spec = self.mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            max_num_faces=1,
            min_detection_confidence=self.min_detection_confidence,
            min_tracking_confidence=self.min_tracking_confidence
        )

    def initialize(self):
        """Initialize video capture and face mesh detector."""
        self.cap = cv2.VideoCapture(0)
        if not self.cap.isOpened():
            raise RuntimeError("Could not open video capture device")

    def release(self):
        """Release resources."""
        if self.face_mesh:
            self.face_mesh.close()
        if self.cap:
            self.cap.release()
        cv2.destroyAllWindows()
        
    def estimate_head_pose(self, face_landmarks, image_shape) -> Tuple[float, float]:
        """
        Estimate head pose using facial landmarks.
        Returns yaw and pitch angles in degrees.
        """
        # Get image dimensions
        image_height, image_width = image_shape[:2]
        
        # Get key face landmarks (normalized coordinates)
        nose_tip = face_landmarks.landmark[1]
        left_eye = face_landmarks.landmark[33]
        right_eye = face_landmarks.landmark[263]
        
        # Convert to pixel coordinates
        nose_tip_px = np.array([nose_tip.x * image_width, nose_tip.y * image_height])
        left_eye_px = np.array([left_eye.x * image_width, left_eye.y * image_height])
        right_eye_px = np.array([right_eye.x * image_width, right_eye.y * image_height])
        
        # Calculate eye midpoint
        eye_midpoint = (left_eye_px + right_eye_px) / 2
        
        # Calculate yaw (horizontal rotation) based on nose position relative to eye midpoint
        yaw = np.arctan2(nose_tip_px[0] - eye_midpoint[0], 
                        np.linalg.norm([nose_tip_px[1] - eye_midpoint[1], 30]))
        
        # Calculate pitch (vertical rotation)
        pitch = np.arctan2(nose_tip_px[1] - eye_midpoint[1], 30)
        
        return np.degrees(yaw), np.degrees(pitch)

    def is_facing_camera(self, yaw: float, pitch: float) -> bool:
        """Determine if the face is looking at the camera based on pose angles."""
        return (
            abs(yaw) < self.yaw_threshold and 
            20 < pitch < 60
                # abs(pitch) < self.pitch_threshold
        )
    
    def detect(self, frame: np.ndarray) -> list:
        """Detect face landmarks in a frame"""
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(image)
        return results.multi_face_landmarks or []
    
    def draw_detections(self, frame, detected_landmarks) -> np.ndarray:
        """Draw face landmarks on the frame"""
        if not self.face_mesh:
            self.initialize()
        
        if not detected_landmarks:
            return frame
        
        for face_landmarks in detected_landmarks:
            # Draw face mesh
            self.mp_drawing.draw_landmarks(
                image=frame,
                landmark_list=face_landmarks,
                connections=self.mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=self.drawing_spec,
                connection_drawing_spec=self.drawing_spec
            )

            # Estimate head pose and determine if facing camera
            yaw, pitch = self.estimate_head_pose(face_landmarks, frame.shape)
            facing_camera = self.is_facing_camera(yaw, pitch)

            # Display head pose status
            status_text = "Looking at camera" if facing_camera else "Looking away"
            color = (0, 255, 0) if facing_camera else (0, 0, 255)
            cv2.putText(frame, f"Status: {status_text}", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
            cv2.putText(frame, f"Yaw: {yaw:.1f}, Pitch: {pitch:.1f}", (10, 60),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            
        return frame


class DisplayManager:
    """Handles visualization of detections"""
    @staticmethod
    def draw_detections(frame: np.ndarray, detections: list) -> np.ndarray:
        """Draw bounding boxes around detected persons"""
        for detection in detections:
            x1, y1, x2, y2 = map(int, detection[:4])
            confidence = float(detection[4])
            
            # Draw rectangle
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # Draw confidence score
            label = f"Person: {confidence:.2f}"
            cv2.putText(frame, label, (x1, y1 - 10),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        return frame

class DetectionSystem:
    """Main system that coordinates all components"""
    def __init__(self, config: DetectionConfig):
        self.portrait_1 = cv2.imread(path.join('images', 'MainImage.jpg'))
        self.portrait_2 = cv2.imread(path.join('images', 'Asshole.jpg'))
        self.portrait_3 = cv2.imread(path.join('images', 'Banana.jpg'))
        self.portrait_4 = cv2.imread(path.join('images', 'LaughingStock.jpg'))
        self.portrait_5 = cv2.imread(path.join('images', 'ShhPainting.jpg'))
        self.portraits = [self.portrait_1, self.portrait_2, self.portrait_3, self.portrait_4, self.portrait_5]
        self.current_portrait = self.portrait_1
        self.portrait_changed = False
        self.last_state_change = time.time()
        self.state_change_cooldown = 0.5  # seconds
        self.debug = False
        
        self.video_source = VideoSource()
        self.detector = PersonDetector(config)
        self.landmark_detector = FaceLandmarkDetector()
        self.display = DisplayManager()
        self.window_name = "Person Detection"
        cv2.namedWindow(self.window_name)
    
    def randomize_portrait(self):
        """Change the current portrait to a random one."""
        if self.portrait_changed:
            return
        
        self.portrait_changed = True
        self.current_portrait = self.portraits[randint(1,4)]
    
    def default_portrait(self):
        """Change the current portrait to the default one."""
        self.portrait_changed = False
        self.current_portrait = self.portraits[0]

    def run(self):
        """Run the detection system"""
        try:
            while True:
                frame = self.video_source.read_frame()
                if frame is None:
                    break

                # Perform detection
                detections = self.detector.detect(frame)
                landmarks = []
                
                # Draw results
                if len(detections) == 1:
                    landmarks = self.landmark_detector.detect(frame)
                    if len(landmarks) > 0:
                        yaw, pitch = self.landmark_detector.estimate_head_pose(landmarks[0], frame.shape)
                        facing_camera = self.landmark_detector.is_facing_camera(yaw, pitch)
                        if facing_camera:
                            self.default_portrait()
                        else:
                            self.randomize_portrait()
                else:
                    self.default_portrait()
                
                # Show frame
                if self.debug:
                    frame = self.display.draw_detections(frame, detections)
                    frame = self.landmark_detector.draw_detections(frame, landmarks)
                    cv2.imshow(self.window_name, frame)
                else:
                    cv2.imshow(self.window_name, self.current_portrait)
                    
                # Check for exit command
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

        finally:
            self.cleanup()

    def cleanup(self):
        """Clean up resources"""
        self.video_source.release()
        cv2.destroyAllWindows()

def main():
    """Entry point of the application"""
    config = DetectionConfig()
    detection_system = DetectionSystem(config)
    detection_system.run()

if __name__ == "__main__":
    main()