VT45

Running

App Files Files Community

Ashrafb commited on Nov 9, 2024

Commit

1776609

verified ·

1 Parent(s): e410b45

Update vtoonify_model.py

Browse files

Files changed (1) hide show

vtoonify_model.py +8 -109

vtoonify_model.py CHANGED Viewed

@@ -18,7 +18,7 @@ import gc
 import huggingface_hub
 import os
 import logging
-from PIL import Image  # Importing Image from PIL
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -132,12 +132,14 @@ class Model():
             # Align face based on landmarks
             aligned_face = self.align_face(frame, landmarks)
             if aligned_face is not None:
                 with torch.no_grad():
                     I = self.transform(aligned_face).unsqueeze(dim=0).to(self.device)
                     instyle = self.pspencoder(I)
                     instyle = self.vtoonify.zplus2wplus(instyle)
                     message = 'Successfully aligned the face.'
             else:
                 frame = np.zeros((256, 256, 3), np.uint8)
         else:
             logging.warning("No face detected.")
@@ -172,14 +174,13 @@ class Model():
         # Transform and crop the image
         transform_size = 256
         output_size = 256
-        img = Image.fromarray(image)  # Corrected to use PIL.Image
         img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
         if output_size < transform_size:
             img = img.resize((output_size, output_size), Image.ANTIALIAS)
         return np.array(img)
-    # Other methods remain unchanged
     def detect_and_align_image(self, frame_rgb: np.ndarray, top: int, bottom: int, left: int, right: int) -> tuple:
         if frame_rgb is None:
             return np.zeros((256, 256, 3), np.uint8), None, 'Error: fail to load the image.'
@@ -188,61 +189,13 @@ class Model():
         frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
         return self.detect_and_align(frame_bgr, top, bottom, left, right)
-    def detect_and_align_video(self, video: str, top: int, bottom: int, left: int, right: int) -> tuple:
-        if video is None:
-            return np.zeros((256, 256, 3), np.uint8), None, 'Error: fail to load empty file.'
-        video_cap = cv2.VideoCapture(video)
-        if video_cap.get(7) == 0:
-            video_cap.release()
-            return np.zeros((256, 256, 3), np.uint8), torch.zeros(1, 18, 512).to(self.device), 'Error: fail to load the video.'
-        success, frame = video_cap.read()
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        video_cap.release()
-        return self.detect_and_align(frame, top, bottom, left, right)
-    def detect_and_align_full_video(self, video: str, top: int, bottom: int, left: int, right: int) -> tuple:
-        message = 'Error: no face detected! Please retry or change the video.'
-        instyle = None
-        if video is None:
-            return 'default.mp4', instyle, 'Error: fail to load empty file.'
-        video_cap = cv2.VideoCapture(video)
-        if video_cap.get(7) == 0:
-            video_cap.release()
-            return 'default.mp4', instyle, 'Error: fail to load the video.'
-        num = min(self.video_limit_gpu, int(video_cap.get(7)))
-        if self.device == 'cpu':
-            num = min(self.video_limit_cpu, num)
-        success, frame = video_cap.read()
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        frame, instyle, message, w, h, top, bottom, left, right, scale = self.detect_and_align(frame, top, bottom, left, right, True)
-        if instyle is None:
-            return 'default.mp4', instyle, message
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        videoWriter = cv2.VideoWriter('input.mp4', fourcc, video_cap.get(5), (int(right-left), int(bottom-top)))
-        videoWriter.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        kernel_1d = np.array([[0.125], [0.375], [0.375], [0.125]])
-        for i in range(num-1):
-            success, frame = video_cap.read()
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            if scale <= 0.75:
-                frame = cv2.sepFilter2D(frame, -1, kernel_1d, kernel_1d)
-            if scale <= 0.375:
-                frame = cv2.sepFilter2D(frame, -1, kernel_1d, kernel_1d)
-            frame = cv2.resize(frame, (w, h))[top:bottom, left:right]
-            videoWriter.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        videoWriter.release()
-        video_cap.release()
-        return 'input.mp4', instyle, 'Successfully rescale the video to (%d, %d)' % (bottom-top, right-left)
     def image_toonify(self, aligned_face: np.ndarray, instyle: torch.Tensor, exstyle: torch.Tensor, style_degree: float, style_type: str) -> tuple:
         if instyle is None or aligned_face is None:
-            return np.zeros((256, 256, 3), np.uint8), 'Opps, something wrong with the input. Please go to Step 2 and Rescale Image/First Frame again.'
         if self.style_name != style_type:
             exstyle, _ = self.load_model(style_type)
         if exstyle is None:
-            return np.zeros((256, 256, 3), np.uint8), 'Opps, something wrong with the style type. Please go to Step 1 and load model again.'
         with torch.no_grad():
             if self.color_transfer:
                 s_w = exstyle
@@ -251,69 +204,15 @@ class Model():
                 s_w[:, :7] = exstyle[:, :7]
             x = self.transform(aligned_face).unsqueeze(dim=0).to(self.device)
             x_p = F.interpolate(self.parsingpredictor(2*(F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)))[0],
                                 scale_factor=0.5, recompute_scale_factor=False).detach()
             inputs = torch.cat((x, x_p/16.), dim=1)
             y_tilde = self.vtoonify(inputs, s_w.repeat(inputs.size(0), 1, 1), d_s=style_degree)
             y_tilde = torch.clamp(y_tilde, -1, 1)
-        print('*** Toonify %dx%d image with style of %s' % (y_tilde.shape[2], y_tilde.shape[3], style_type))
         return ((y_tilde[0].cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8), 'Successfully toonify the image with style of %s' % (self.style_name)
-    def video_toonify(self, aligned_video: str, instyle: torch.Tensor, exstyle: torch.Tensor, style_degree: float, style_type: str) -> tuple:
-        if aligned_video is None:
-            return 'default.mp4', 'Opps, something wrong with the input. Please go to Step 2 and Rescale Video again.'
-        video_cap = cv2.VideoCapture(aligned_video)
-        if instyle is None or aligned_video is None or video_cap.get(7) == 0:
-            video_cap.release()
-            return 'default.mp4', 'Opps, something wrong with the input. Please go to Step 2 and Rescale Video again.'
-        if self.style_name != style_type:
-            exstyle, _ = self.load_model(style_type)
-        if exstyle is None:
-            return 'default.mp4', 'Opps, something wrong with the style type. Please go to Step 1 and load model again.'
-        num = min(self.video_limit_gpu, int(video_cap.get(7)))
-        if self.device == 'cpu':
-            num = min(self.video_limit_cpu, num)
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        videoWriter = cv2.VideoWriter('output.mp4', fourcc,
-                                      video_cap.get(5), (int(video_cap.get(3)*4),
-                                      int(video_cap.get(4)*4)))
-        batch_frames = []
-        if video_cap.get(3) != 0:
-            if self.device == 'cpu':
-                batch_size = max(1, int(4 * 256 * 256 / video_cap.get(3) / video_cap.get(4)))
-            else:
-                batch_size = min(max(1, int(4 * 400 * 360 / video_cap.get(3) / video_cap.get(4))), 4)
-        else:
-            batch_size = 1
-        print('*** Toonify using batch size of %d on %dx%d video of %d frames with style of %s' % (batch_size, int(video_cap.get(3)*4), int(video_cap.get(4)*4), num, style_type))
-        with torch.no_grad():
-            if self.color_transfer:
-                s_w = exstyle
-            else:
-                s_w = instyle.clone()
-                s_w[:, :7] = exstyle[:, :7]
-            for i in range(num):
-                success, frame = video_cap.read()
-                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                batch_frames += [self.transform(frame).unsqueeze(dim=0).to(self.device)]
-                if len(batch_frames) == batch_size or (i+1) == num:
-                    x = torch.cat(batch_frames, dim=0)
-                    batch_frames = []
-                    with torch.no_grad():
-                        x_p = F.interpolate(self.parsingpredictor(2*(F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)))[0],
-                                            scale_factor=0.5, recompute_scale_factor=False).detach()
-                        inputs = torch.cat((x, x_p/16.), dim=1)
-                        y_tilde = self.vtoonify(inputs, s_w.repeat(inputs.size(0), 1, 1), style_degree)
-                        y_tilde = torch.clamp(y_tilde, -1, 1)
-                    for k in range(y_tilde.size(0)):
-                        videoWriter.write(tensor2cv2(y_tilde[k].cpu()))
-                    gc.collect()
-        videoWriter.release()
-        video_cap.release()
-        return 'output.mp4', 'Successfully toonify video of %d frames with style of %s' % (num, self.style_name)
     def tensor2cv2(self, img):
         """Convert a tensor image to OpenCV format."""
         tmp = ((img.cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8).copy()

 import huggingface_hub
 import os
 import logging
+from PIL import Image
 # Configure logging
 logging.basicConfig(level=logging.INFO)
             # Align face based on landmarks
             aligned_face = self.align_face(frame, landmarks)
             if aligned_face is not None:
+                logging.info(f"Aligned face shape: {aligned_face.shape}")
                 with torch.no_grad():
                     I = self.transform(aligned_face).unsqueeze(dim=0).to(self.device)
                     instyle = self.pspencoder(I)
                     instyle = self.vtoonify.zplus2wplus(instyle)
                     message = 'Successfully aligned the face.'
             else:
+                logging.warning("Failed to align face.")
                 frame = np.zeros((256, 256, 3), np.uint8)
         else:
             logging.warning("No face detected.")
         # Transform and crop the image
         transform_size = 256
         output_size = 256
+        img = Image.fromarray(image)
         img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
         if output_size < transform_size:
             img = img.resize((output_size, output_size), Image.ANTIALIAS)
         return np.array(img)
     def detect_and_align_image(self, frame_rgb: np.ndarray, top: int, bottom: int, left: int, right: int) -> tuple:
         if frame_rgb is None:
             return np.zeros((256, 256, 3), np.uint8), None, 'Error: fail to load the image.'
         frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
         return self.detect_and_align(frame_bgr, top, bottom, left, right)
     def image_toonify(self, aligned_face: np.ndarray, instyle: torch.Tensor, exstyle: torch.Tensor, style_degree: float, style_type: str) -> tuple:
         if instyle is None or aligned_face is None:
+            return np.zeros((256, 256, 3), np.uint8), 'Oops, something wrong with the input. Please go to Step 2 and Rescale Image/First Frame again.'
         if self.style_name != style_type:
             exstyle, _ = self.load_model(style_type)
         if exstyle is None:
+            return np.zeros((256, 256, 3), np.uint8), 'Oops, something wrong with the style type. Please go to Step 1 and load model again.'
         with torch.no_grad():
             if self.color_transfer:
                 s_w = exstyle
                 s_w[:, :7] = exstyle[:, :7]
             x = self.transform(aligned_face).unsqueeze(dim=0).to(self.device)
+            logging.info(f"Input to VToonify shape: {x.shape}")
             x_p = F.interpolate(self.parsingpredictor(2*(F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=False)))[0],
                                 scale_factor=0.5, recompute_scale_factor=False).detach()
             inputs = torch.cat((x, x_p/16.), dim=1)
             y_tilde = self.vtoonify(inputs, s_w.repeat(inputs.size(0), 1, 1), d_s=style_degree)
             y_tilde = torch.clamp(y_tilde, -1, 1)
+            logging.info(f"Output from VToonify shape: {y_tilde.shape}")
         return ((y_tilde[0].cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8), 'Successfully toonify the image with style of %s' % (self.style_name)
     def tensor2cv2(self, img):
         """Convert a tensor image to OpenCV format."""
         tmp = ((img.cpu().numpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8).copy()