VT45

Running

App Files Files Community

Ashrafb commited on Nov 9, 2024

Commit

22bf4fb

verified ·

1 Parent(s): dcb6047

Update vtoonify_model.py

Browse files

Files changed (1) hide show

vtoonify_model.py +21 -55

vtoonify_model.py CHANGED Viewed

@@ -115,71 +115,37 @@ class Model():
         with torch.no_grad():
             exstyle = self.vtoonify.zplus2wplus(exstyle)
         return exstyle, 'Model of %s loaded.' % (style_type)
     def detect_and_align(self, frame, top, bottom, left, right, return_para=False):
         message = 'Error: no face detected! Please retry or change the photo.'
         instyle = None
         h, w, scale = 0, 0, 0
-        # Use InsightFace for face detection
-        faces = self.face_detector.get(frame)
-        if len(faces) > 0:
-            logging.info(f"Detected {len(faces)} face(s).")
-            face = faces[0]
-            bbox = face.bbox.astype(int)
-            landmarks = face.landmark_2d_106
-            # Align face based on landmarks
-            aligned_face = self.align_face(frame, landmarks)
-            if aligned_face is not None:
-                logging.info(f"Aligned face shape: {aligned_face.shape}")
-                with torch.no_grad():
-                    I = self.transform(aligned_face).unsqueeze(dim=0).to(self.device)
                     instyle = self.pspencoder(I)
                     instyle = self.vtoonify.zplus2wplus(instyle)
-                    message = 'Successfully aligned the face.'
-            else:
-                logging.warning("Failed to align face.")
-                frame = np.zeros((256, 256, 3), np.uint8)
         else:
-            logging.warning("No face detected.")
-            frame = np.zeros((256, 256, 3), np.uint8)
         if return_para:
-            return frame, instyle, message, h, w, top, bottom, left, right, scale
         return frame, instyle, message
-    def align_face(self, image, landmarks):
-        # Calculate auxiliary vectors for alignment
-        eye_left = np.mean(landmarks[36:42], axis=0)
-        eye_right = np.mean(landmarks[42:48], axis=0)
-        mouth_left = landmarks[48]
-        mouth_right = landmarks[54]
-        # Calculate transformation parameters
-        eye_center = (eye_left + eye_right) / 2
-        mouth_center = (mouth_left + mouth_right) / 2
-        eye_to_eye = eye_right - eye_left
-        eye_to_mouth = mouth_center - eye_center
-        # Define the transformation matrix
-        x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
-        x /= np.hypot(*x)
-        x *= np.hypot(*eye_to_eye) * 2.0
-        y = np.flipud(x) * [-1, 1]
-        c = eye_center + eye_to_mouth * 0.1
-        quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
-        qsize = np.hypot(*x) * 2
-        # Transform and crop the image
-        transform_size = 256
-        output_size = 256
-        img = Image.fromarray(image)
-        img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
-        if output_size < transform_size:
-            img = img.resize((output_size, output_size), Image.ANTIALIAS)
-        return np.array(img)
     def detect_and_align_image(self, frame_rgb: np.ndarray, top: int, bottom: int, left: int, right: int) -> tuple:
         if frame_rgb is None:

         with torch.no_grad():
             exstyle = self.vtoonify.zplus2wplus(exstyle)
         return exstyle, 'Model of %s loaded.' % (style_type)
     def detect_and_align(self, frame, top, bottom, left, right, return_para=False):
         message = 'Error: no face detected! Please retry or change the photo.'
+        paras = get_video_crop_parameter(frame, self.landmarkpredictor, [left, right, top, bottom])
         instyle = None
         h, w, scale = 0, 0, 0
+        if paras is not None:
+            h,w,top,bottom,left,right,scale = paras
+            H, W = int(bottom-top), int(right-left)
+            # for HR image, we apply gaussian blur to it to avoid over-sharp stylization results
+            kernel_1d = np.array([[0.125],[0.375],[0.375],[0.125]])
+            if scale <= 0.75:
+                frame = cv2.sepFilter2D(frame, -1, kernel_1d, kernel_1d)
+            if scale <= 0.375:
+                frame = cv2.sepFilter2D(frame, -1, kernel_1d, kernel_1d)
+            frame = cv2.resize(frame, (w, h))[top:bottom, left:right]
+            with torch.no_grad():
+                I = align_face(frame, self.landmarkpredictor)
+                if I is not None:
+                    I = self.transform(I).unsqueeze(dim=0).to(self.device)
                     instyle = self.pspencoder(I)
                     instyle = self.vtoonify.zplus2wplus(instyle)
+                    message = 'Successfully rescale the frame to (%d, %d)'%(bottom-top, right-left)
+                else:
+                    frame = np.zeros((256,256,3), np.uint8)
         else:
+            frame = np.zeros((256,256,3), np.uint8)
         if return_para:
+            return frame, instyle, message, w, h, top, bottom, left, right, scale
         return frame, instyle, message
     def detect_and_align_image(self, frame_rgb: np.ndarray, top: int, bottom: int, left: int, right: int) -> tuple:
         if frame_rgb is None: