DiffuEraser-demo

Runtime error

App Files Files Community

cocktailpeanut commited on Mar 8

Commit

8081e40

1 Parent(s): ce1de29

update

Browse files

Files changed (6) hide show

diffueraser/diffueraser.py +6 -5
diffueraser/pipeline_diffueraser.py +2 -1
gradio_app.py +3 -2
propainter/inference.py +21 -20
propainter/model/misc.py +9 -3
run_diffueraser.py +3 -2

diffueraser/diffueraser.py CHANGED Viewed

@@ -22,6 +22,7 @@ from libs.unet_motion_model import MotionAdapter, UNetMotionModel
 from libs.brushnet_CA import BrushNetModel
 from libs.unet_2d_condition import UNet2DConditionModel
 from diffueraser.pipeline_diffueraser import StableDiffusionDiffuEraserPipeline
 checkpoints = {
@@ -318,7 +319,7 @@ class DiffuEraser:
                 latents.append(self.vae.encode(pixel_values[i : i + num]).latent_dist.sample())
             latents = torch.cat(latents, dim=0)
         latents = latents * self.vae.config.scaling_factor #[(b f), c1, h, w], c1=4
-        torch.cuda.empty_cache()
         timesteps = torch.tensor([0], device=self.device)
         timesteps = timesteps.long()
@@ -349,7 +350,7 @@ class DiffuEraser:
                     guidance_scale=guidance_scale_final,
                     latents=latents_pre,
                 ).latents
-            torch.cuda.empty_cache()
             def decode_latents(latents, weight_dtype):
                 latents = 1 / self.vae.config.scaling_factor * latents
@@ -363,7 +364,7 @@ class DiffuEraser:
             with torch.no_grad():
                 video_tensor_temp = decode_latents(latents_pre_out, weight_dtype=torch.float16)
                 images_pre_out  = self.image_processor.postprocess(video_tensor_temp, output_type="pil")
-            torch.cuda.empty_cache()
             ## replace input frames with updated frames
             black_image = Image.new('L', validation_masks_input[0].size, color=0)
@@ -376,7 +377,7 @@ class DiffuEraser:
             latents_pre_out=None
             sample_index=None
         gc.collect()
-        torch.cuda.empty_cache()
         ################  Frame-by-frame inference  ################
         ## add priori
@@ -396,7 +397,7 @@ class DiffuEraser:
         images = images[:real_video_length]
         gc.collect()
-        torch.cuda.empty_cache()
         ################ Compose ################
         binary_masks = validation_masks_input_ori

 from libs.brushnet_CA import BrushNetModel
 from libs.unet_2d_condition import UNet2DConditionModel
 from diffueraser.pipeline_diffueraser import StableDiffusionDiffuEraserPipeline
+import devicetorch
 checkpoints = {
                 latents.append(self.vae.encode(pixel_values[i : i + num]).latent_dist.sample())
             latents = torch.cat(latents, dim=0)
         latents = latents * self.vae.config.scaling_factor #[(b f), c1, h, w], c1=4
+        devicetorch.empty_cache(torch)
         timesteps = torch.tensor([0], device=self.device)
         timesteps = timesteps.long()
                     guidance_scale=guidance_scale_final,
                     latents=latents_pre,
                 ).latents
+            devicetorch.empty_cache(torch)
             def decode_latents(latents, weight_dtype):
                 latents = 1 / self.vae.config.scaling_factor * latents
             with torch.no_grad():
                 video_tensor_temp = decode_latents(latents_pre_out, weight_dtype=torch.float16)
                 images_pre_out  = self.image_processor.postprocess(video_tensor_temp, output_type="pil")
+            devicetorch.empty_cache(torch)
             ## replace input frames with updated frames
             black_image = Image.new('L', validation_masks_input[0].size, color=0)
             latents_pre_out=None
             sample_index=None
         gc.collect()
+        devicetorch.empty_cache(torch)
         ################  Frame-by-frame inference  ################
         ## add priori
         images = images[:real_video_length]
         gc.collect()
+        devicetorch.empty_cache(torch)
         ################ Compose ################
         binary_masks = validation_masks_input_ori

diffueraser/pipeline_diffueraser.py CHANGED Viewed

@@ -36,6 +36,7 @@ from diffusers import (
 from libs.unet_2d_condition import UNet2DConditionModel
 from libs.brushnet_CA import BrushNetModel
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -1326,7 +1327,7 @@ class StableDiffusionDiffuEraserPipeline(
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.unet.to("cpu")
             self.brushnet.to("cpu")
-            torch.cuda.empty_cache()
         if  output_type == "latent":
             image = latents

 from libs.unet_2d_condition import UNet2DConditionModel
 from libs.brushnet_CA import BrushNetModel
+import devicetorch
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.unet.to("cpu")
             self.brushnet.to("cpu")
+            devicetorch.empty_cache(torch)
         if  output_type == "latent":
             image = latents

gradio_app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import gradio as gr
 # Download Weights
 from huggingface_hub import snapshot_download
 # List of subdirectories to create inside "checkpoints"
 subfolders = [
@@ -93,7 +94,7 @@ def infer(input_video, input_mask):
     inference_time = end_time - start_time
     print(f"DiffuEraser inference time: {inference_time:.4f} s")
-    torch.cuda.empty_cache()
     return output_path
@@ -150,4 +151,4 @@ demo.queue().launch(show_api=False, show_error=True)

 # Download Weights
 from huggingface_hub import snapshot_download
+import devicetorch
 # List of subdirectories to create inside "checkpoints"
 subfolders = [
     inference_time = end_time - start_time
     print(f"DiffuEraser inference time: {inference_time:.4f} s")
+    devicetorch.empty_cache(torch)
     return output_path

propainter/inference.py CHANGED Viewed

@@ -24,6 +24,7 @@ except:
     from propainter.core.utils import to_tensors
     from propainter.model.misc import get_device
 import warnings
 warnings.filterwarnings("ignore")
@@ -247,15 +248,15 @@ class Propainter:
                     gt_flows_f_list.append(flows_f)
                     gt_flows_b_list.append(flows_b)
-                    torch.cuda.empty_cache()
                 gt_flows_f = torch.cat(gt_flows_f_list, dim=1)
                 gt_flows_b = torch.cat(gt_flows_b_list, dim=1)
                 gt_flows_bi = (gt_flows_f, gt_flows_b)
             else:
                 gt_flows_bi = self.fix_raft(frames, iters=raft_iter)
-                torch.cuda.empty_cache()
-            torch.cuda.empty_cache()
             gc.collect()
             if use_half:
@@ -284,7 +285,7 @@ class Propainter:
                     pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f-s_f-pad_len_e])
                     pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f-s_f-pad_len_e])
-                    torch.cuda.empty_cache()
                 pred_flows_f = torch.cat(pred_flows_f, dim=1)
                 pred_flows_b = torch.cat(pred_flows_b, dim=1)
@@ -292,8 +293,8 @@ class Propainter:
             else:
                 pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(gt_flows_bi, flow_masks)
                 pred_flows_bi = self.fix_flow_complete.combine_flow(gt_flows_bi, pred_flows_bi, flow_masks)
-                torch.cuda.empty_cache()
-            torch.cuda.empty_cache()
             gc.collect()
@@ -321,15 +322,15 @@ class Propainter:
                         gt_flows_f_list.append(flows_f)
                         gt_flows_b_list.append(flows_b)
-                        torch.cuda.empty_cache()
                     gt_flows_f = torch.cat(gt_flows_f_list, dim=1)
                     gt_flows_b = torch.cat(gt_flows_b_list, dim=1)
                     sample_gt_flows_bi = (gt_flows_f, gt_flows_b)
                 else:
                     sample_gt_flows_bi = self.fix_raft(sample_frames, iters=raft_iter)
-                    torch.cuda.empty_cache()
-                torch.cuda.empty_cache()
                 gc.collect()
                 if use_half:
@@ -356,7 +357,7 @@ class Propainter:
                         pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f-s_f-pad_len_e])
                         pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f-s_f-pad_len_e])
-                        torch.cuda.empty_cache()
                     pred_flows_f = torch.cat(pred_flows_f, dim=1)
                     pred_flows_b = torch.cat(pred_flows_b, dim=1)
@@ -364,8 +365,8 @@ class Propainter:
                 else:
                     sample_pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(sample_gt_flows_bi, sample_flow_masks)
                     sample_pred_flows_bi = self.fix_flow_complete.combine_flow(sample_gt_flows_bi, sample_pred_flows_bi, sample_flow_masks)
-                    torch.cuda.empty_cache()
-                torch.cuda.empty_cache()
                 gc.collect()
                 masked_frames = sample_frames * (1 - sample_masks_dilated)
@@ -391,7 +392,7 @@ class Propainter:
                         updated_frames.append(updated_frames_sub[:, pad_len_s:e_f-s_f-pad_len_e])
                         updated_masks.append(updated_masks_sub[:, pad_len_s:e_f-s_f-pad_len_e])
-                        torch.cuda.empty_cache()
                     updated_frames = torch.cat(updated_frames, dim=1)
                     updated_masks = torch.cat(updated_masks, dim=1)
@@ -400,7 +401,7 @@ class Propainter:
                     prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, sample_pred_flows_bi, sample_masks_dilated, 'nearest')
                     updated_frames = sample_frames * (1 - sample_masks_dilated) + prop_imgs.view(b, t, 3, h, w) * sample_masks_dilated
                     updated_masks = updated_local_masks.view(b, t, 1, h, w)
-                    torch.cuda.empty_cache()
                 ## replace input frames/masks with updated frames/masks
                 for i,index in enumerate(index_sample):
@@ -432,7 +433,7 @@ class Propainter:
                     updated_frames.append(updated_frames_sub[:, pad_len_s:e_f-s_f-pad_len_e])
                     updated_masks.append(updated_masks_sub[:, pad_len_s:e_f-s_f-pad_len_e])
-                    torch.cuda.empty_cache()
                 updated_frames = torch.cat(updated_frames, dim=1)
                 updated_masks = torch.cat(updated_masks, dim=1)
@@ -441,7 +442,7 @@ class Propainter:
                 prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, pred_flows_bi, masks_dilated, 'nearest')
                 updated_frames = frames * (1 - masks_dilated) + prop_imgs.view(b, t, 3, h, w) * masks_dilated
                 updated_masks = updated_local_masks.view(b, t, 1, h, w)
-                torch.cuda.empty_cache()
         comp_frames = [None] * video_length
@@ -451,7 +452,7 @@ class Propainter:
         else:
             ref_num = -1
-        torch.cuda.empty_cache()
         # ---- feature propagation + transformer ----
         for f in tqdm(range(0, video_length, neighbor_stride)):
             neighbor_ids = [
@@ -488,7 +489,7 @@ class Propainter:
                     comp_frames[idx] = comp_frames[idx].astype(np.uint8)
-            torch.cuda.empty_cache()
         ##save composed video##
         comp_frames = [cv2.resize(f, out_size) for f in comp_frames]
@@ -499,7 +500,7 @@ class Propainter:
             writer.write(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         writer.release()
-        torch.cuda.empty_cache()
         return output_path
@@ -517,4 +518,4 @@ if __name__ == '__main__':
     res = propainter.forward(video, mask, output)

     from propainter.core.utils import to_tensors
     from propainter.model.misc import get_device
+import devicetorch
 import warnings
 warnings.filterwarnings("ignore")
                     gt_flows_f_list.append(flows_f)
                     gt_flows_b_list.append(flows_b)
+                    devicetorch.empty_cache(torch)
                 gt_flows_f = torch.cat(gt_flows_f_list, dim=1)
                 gt_flows_b = torch.cat(gt_flows_b_list, dim=1)
                 gt_flows_bi = (gt_flows_f, gt_flows_b)
             else:
                 gt_flows_bi = self.fix_raft(frames, iters=raft_iter)
+                devicetorch.empty_cache(torch)
+            devicetorch.empty_cache(torch)
             gc.collect()
             if use_half:
                     pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f-s_f-pad_len_e])
                     pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f-s_f-pad_len_e])
+                    devicetorch.empty_cache(torch)
                 pred_flows_f = torch.cat(pred_flows_f, dim=1)
                 pred_flows_b = torch.cat(pred_flows_b, dim=1)
             else:
                 pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(gt_flows_bi, flow_masks)
                 pred_flows_bi = self.fix_flow_complete.combine_flow(gt_flows_bi, pred_flows_bi, flow_masks)
+                devicetorch.empty_cache(torch)
+            devicetorch.empty_cache(torch)
             gc.collect()
                         gt_flows_f_list.append(flows_f)
                         gt_flows_b_list.append(flows_b)
+                        devicetorch.empty_cache(torch)
                     gt_flows_f = torch.cat(gt_flows_f_list, dim=1)
                     gt_flows_b = torch.cat(gt_flows_b_list, dim=1)
                     sample_gt_flows_bi = (gt_flows_f, gt_flows_b)
                 else:
                     sample_gt_flows_bi = self.fix_raft(sample_frames, iters=raft_iter)
+                    devicetorch.empty_cache(torch)
+                devicetorch.empty_cache(torch)
                 gc.collect()
                 if use_half:
                         pred_flows_f.append(pred_flows_bi_sub[0][:, pad_len_s:e_f-s_f-pad_len_e])
                         pred_flows_b.append(pred_flows_bi_sub[1][:, pad_len_s:e_f-s_f-pad_len_e])
+                        devicetorch.empty_cache(torch)
                     pred_flows_f = torch.cat(pred_flows_f, dim=1)
                     pred_flows_b = torch.cat(pred_flows_b, dim=1)
                 else:
                     sample_pred_flows_bi, _ = self.fix_flow_complete.forward_bidirect_flow(sample_gt_flows_bi, sample_flow_masks)
                     sample_pred_flows_bi = self.fix_flow_complete.combine_flow(sample_gt_flows_bi, sample_pred_flows_bi, sample_flow_masks)
+                    devicetorch.empty_cache(torch)
+                devicetorch.empty_cache(torch)
                 gc.collect()
                 masked_frames = sample_frames * (1 - sample_masks_dilated)
                         updated_frames.append(updated_frames_sub[:, pad_len_s:e_f-s_f-pad_len_e])
                         updated_masks.append(updated_masks_sub[:, pad_len_s:e_f-s_f-pad_len_e])
+                        devicetorch.empty_cache(torch)
                     updated_frames = torch.cat(updated_frames, dim=1)
                     updated_masks = torch.cat(updated_masks, dim=1)
                     prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, sample_pred_flows_bi, sample_masks_dilated, 'nearest')
                     updated_frames = sample_frames * (1 - sample_masks_dilated) + prop_imgs.view(b, t, 3, h, w) * sample_masks_dilated
                     updated_masks = updated_local_masks.view(b, t, 1, h, w)
+                    devicetorch.empty_cache(torch)
                 ## replace input frames/masks with updated frames/masks
                 for i,index in enumerate(index_sample):
                     updated_frames.append(updated_frames_sub[:, pad_len_s:e_f-s_f-pad_len_e])
                     updated_masks.append(updated_masks_sub[:, pad_len_s:e_f-s_f-pad_len_e])
+                    devicetorch.empty_cache(torch)
                 updated_frames = torch.cat(updated_frames, dim=1)
                 updated_masks = torch.cat(updated_masks, dim=1)
                 prop_imgs, updated_local_masks = self.model.img_propagation(masked_frames, pred_flows_bi, masks_dilated, 'nearest')
                 updated_frames = frames * (1 - masks_dilated) + prop_imgs.view(b, t, 3, h, w) * masks_dilated
                 updated_masks = updated_local_masks.view(b, t, 1, h, w)
+                devicetorch.empty_cache(torch)
         comp_frames = [None] * video_length
         else:
             ref_num = -1
+        devicetorch.empty_cache(torch)
         # ---- feature propagation + transformer ----
         for f in tqdm(range(0, video_length, neighbor_stride)):
             neighbor_ids = [
                     comp_frames[idx] = comp_frames[idx].astype(np.uint8)
+            devicetorch.empty_cache(torch)
         ##save composed video##
         comp_frames = [cv2.resize(f, out_size) for f in comp_frames]
             writer.write(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         writer.release()
+        devicetorch.empty_cache(torch)
         return output_path
     res = propainter.forward(video, mask, output)

propainter/model/misc.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch.nn as nn
 import logging
 import numpy as np
 from os import path as osp
 def constant_init(module, val, bias=0):
     if hasattr(module, 'weight') and module.weight is not None:
@@ -81,8 +82,13 @@ def set_random_seed(seed):
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
 def get_time_str():
@@ -128,4 +134,4 @@ def scandir(dir_path, suffix=None, recursive=False, full_path=False):
                 else:
                     continue
-    return _scandir(dir_path, suffix=suffix, recursive=recursive)

 import logging
 import numpy as np
 from os import path as osp
+import devicetorch
 def constant_init(module, val, bias=0):
     if hasattr(module, 'weight') and module.weight is not None:
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    if torch.backends.mps.is_available():
+        torch.mps.manual_seed(seed)
 def get_time_str():
                 else:
                     continue
+    return _scandir(dir_path, suffix=suffix, recursive=recursive)

run_diffueraser.py CHANGED Viewed

@@ -4,6 +4,7 @@ import time
 import argparse
 from diffueraser.diffueraser import DiffuEraser
 from propainter.inference import Propainter, get_device
 def main():
@@ -53,10 +54,10 @@ def main():
     inference_time = end_time - start_time
     print(f"DiffuEraser inference time: {inference_time:.4f} s")
-    torch.cuda.empty_cache()
 if __name__ == '__main__':
     main()

 import argparse
 from diffueraser.diffueraser import DiffuEraser
 from propainter.inference import Propainter, get_device
+import devicetorch
 def main():
     inference_time = end_time - start_time
     print(f"DiffuEraser inference time: {inference_time:.4f} s")
+    devicetorch.empty_cache(torch)
 if __name__ == '__main__':
     main()