Spaces:

PengWeixuanSZU
/

Senorita

Running on Zero

App Files Files Community

PengWeixuanSZU commited on Mar 12

Commit

eb0d243

verified ·

1 Parent(s): d13e174

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -7

app.py CHANGED Viewed

@@ -25,6 +25,8 @@ from torchvision import transforms
 import subprocess
 def download_model():
     command = "huggingface-cli download --resume-download THUDM/CogVideoX-5b-I2V --local-dir ./cogvideox-5b-i2v"
     process = subprocess.run(command, shell=True, check=True)
@@ -60,9 +62,9 @@ def init_pipe():
         )
     )
-    text_encoder = T5EncoderModel.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="text_encoder", torch_dtype=torch.float16)#.to("cuda:0")
-    vae = AutoencoderKLCogVideoX.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="vae", torch_dtype=torch.float16).cuda()#.to("cuda:0")
-    tokenizer = T5Tokenizer.from_pretrained(f"./cogvideox-5b-{key}/tokenizer", torch_dtype=torch.float16).cuda()
     config = OmegaConf.to_container(
@@ -92,8 +94,8 @@ def init_pipe():
     transformer.load_state_dict(transformer_state_dict, strict=True)
     controlnet_transformer.load_state_dict(controlnet_transformer_state_dict, strict=True)
-    transformer = transformer.half().to("cuda:0")
-    controlnet_transformer = controlnet_transformer.half().to("cuda:0")
     vae = vae.eval()
     text_encoder = text_encoder.eval()
@@ -160,7 +162,7 @@ def inference(source_images,
         num_inference_steps = 50,
         interval = 6,
         guidance_scale = guidance_scale,
-        generator = torch.Generator(device=f"cuda:0").manual_seed(random_seed)
     ).frames[0]
     return video
@@ -202,7 +204,7 @@ def process_video(video_file, image_file, positive_prompt, negative_prompt, guid
             first_frame=transforms.ToTensor()(video[-1])
             first_frame = first_frame*255.0
             first_frame = rearrange(first_frame,"c w h -> w h c")
-            source_images=source_images#仍用原视频引导
             target_images=first_frame[None,None,...]
         video+=inference(source_images, \

 import subprocess
+device = "cuda" if torch.cuda.is_available() else "cpu"
 def download_model():
     command = "huggingface-cli download --resume-download THUDM/CogVideoX-5b-I2V --local-dir ./cogvideox-5b-i2v"
     process = subprocess.run(command, shell=True, check=True)
         )
     )
+    text_encoder = T5EncoderModel.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="text_encoder", torch_dtype=torch.float16)
+    vae = AutoencoderKLCogVideoX.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="vae", torch_dtype=torch.float16).to(device)
+    tokenizer = T5Tokenizer.from_pretrained(f"./cogvideox-5b-{key}/tokenizer", torch_dtype=torch.float16)
     config = OmegaConf.to_container(
     transformer.load_state_dict(transformer_state_dict, strict=True)
     controlnet_transformer.load_state_dict(controlnet_transformer_state_dict, strict=True)
+    transformer = transformer.half().to(device)
+    controlnet_transformer = controlnet_transformer.half().to(device)
     vae = vae.eval()
     text_encoder = text_encoder.eval()
         num_inference_steps = 50,
         interval = 6,
         guidance_scale = guidance_scale,
+        generator = torch.Generator(device=device).manual_seed(random_seed)
     ).frames[0]
     return video
             first_frame=transforms.ToTensor()(video[-1])
             first_frame = first_frame*255.0
             first_frame = rearrange(first_frame,"c w h -> w h c")
+            source_images=source_images
             target_images=first_frame[None,None,...]
         video+=inference(source_images, \