Spaces:
Running
Running
File size: 5,158 Bytes
34f7d44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
import numpy as np
import cv2
import landmark_detection
import gradio as gr
from mtcnn_facedetection import detect_faces
def apply_sunglasses(image, landmarks, sunglasses_img):
# If image loading fails or no landmarks, return original image
if sunglasses_img is None or not landmarks:
return image
# Create a copy of the image to overlay on
result = image.copy()
# Process each face
for face_landmarks in landmarks:
# We need at least the eye landmarks
if len(face_landmarks) < 5:
continue
# Get eye landmarks
left_eye_center = np.mean(face_landmarks[36:42], axis=0).astype(int)
right_eye_center = np.mean(face_landmarks[42:48], axis=0).astype(int)
# Calculate eye distance and angle
eye_distance = np.linalg.norm(right_eye_center - left_eye_center)
# Negate the angle to correct rotation direction
angle = -np.degrees(
np.arctan2(
right_eye_center[1] - left_eye_center[1],
right_eye_center[0] - left_eye_center[0],
)
)
# Size for sunglasses based on eye distance
width = int(eye_distance * 2.5)
height = int(width * sunglasses_img.shape[0] / sunglasses_img.shape[1])
# Resize sunglasses
sunglasses_resized = cv2.resize(sunglasses_img, (width, height))
# Rotate the sunglasses image
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
# Calculate new dimensions after rotation
cos = np.abs(rotation_matrix[0, 0])
sin = np.abs(rotation_matrix[0, 1])
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust rotation matrix
rotation_matrix[0, 2] += (new_width / 2) - center[0]
rotation_matrix[1, 2] += (new_height / 2) - center[1]
# Perform the rotation
rotated_glasses = cv2.warpAffine(
sunglasses_resized,
rotation_matrix,
(new_width, new_height),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0, 0, 0, 0),
)
# Position the sunglasses
eye_center = ((left_eye_center + right_eye_center) // 2).astype(int)
x = eye_center[0] - new_width // 2
y = eye_center[1] - new_height // 2
# Create ROI for overlay
y1, y2 = max(0, y), min(result.shape[0], y + new_height)
x1, x2 = max(0, x), min(result.shape[1], x + new_width)
# ROI in the glasses image
g_y1, g_y2 = max(0, -y), max(0, -y) + (y2 - y1)
g_x1, g_x2 = max(0, -x), max(0, -x) + (x2 - x1)
# Check if we have valid regions
if g_y2 <= rotated_glasses.shape[0] and g_x2 <= rotated_glasses.shape[1]:
roi = result[y1:y2, x1:x2]
glasses_roi = rotated_glasses[g_y1:g_y2, g_x1:g_x2]
# Apply alpha blending
if glasses_roi.shape[2] == 4 and roi.shape[:2] == glasses_roi.shape[:2]:
alpha = glasses_roi[:, :, 3] / 255.0
for c in range(3):
roi[:, :, c] = (
glasses_roi[:, :, c] * alpha + roi[:, :, c] * (1 - alpha)
).astype(np.uint8)
result[y1:y2, x1:x2] = roi
return result
def do_facial_landmark_recognition(
image: np.ndarray, face_boxes: list[landmark_detection.BoundingBox]
):
faces = landmark_detection.get_faces(image, face_boxes)
landmarks_batch = landmark_detection.get_landmarks(faces)
return landmarks_batch
def do_facial_landmark_recognition_with_mtcnn(image: np.ndarray, sunglasses_img):
face_boxes = detect_faces(image)
landmarks_batch = do_facial_landmark_recognition(image, face_boxes)
return apply_sunglasses(image, landmarks_batch, sunglasses_img)
def process_video(input_path, sunglasses_img):
output_path = os.path.join(
os.path.dirname(input_path), "output_" + os.path.basename(input_path)
)
# Open the input video
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
gr.Error(f"Error opening input video file: {input_path}")
return
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_count = 0
# Process each frame
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process the frame
processed_frame = do_facial_landmark_recognition_with_mtcnn(
frame, sunglasses_img
)
# Write the frame
out.write(processed_frame)
# Release resources
cap.release()
out.release()
gr.Info(f"Video processing complete. Output saved to: {output_path}")
return output_path
|