Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,8 @@ from torchvision import transforms
|
|
25 |
|
26 |
import subprocess
|
27 |
|
|
|
|
|
28 |
def download_model():
|
29 |
command = "huggingface-cli download --resume-download THUDM/CogVideoX-5b-I2V --local-dir ./cogvideox-5b-i2v"
|
30 |
process = subprocess.run(command, shell=True, check=True)
|
@@ -60,9 +62,9 @@ def init_pipe():
|
|
60 |
)
|
61 |
)
|
62 |
|
63 |
-
text_encoder = T5EncoderModel.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="text_encoder", torch_dtype=torch.float16)
|
64 |
-
vae = AutoencoderKLCogVideoX.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="vae", torch_dtype=torch.float16).
|
65 |
-
tokenizer = T5Tokenizer.from_pretrained(f"./cogvideox-5b-{key}/tokenizer", torch_dtype=torch.float16)
|
66 |
|
67 |
|
68 |
config = OmegaConf.to_container(
|
@@ -92,8 +94,8 @@ def init_pipe():
|
|
92 |
transformer.load_state_dict(transformer_state_dict, strict=True)
|
93 |
controlnet_transformer.load_state_dict(controlnet_transformer_state_dict, strict=True)
|
94 |
|
95 |
-
transformer = transformer.half().to(
|
96 |
-
controlnet_transformer = controlnet_transformer.half().to(
|
97 |
|
98 |
vae = vae.eval()
|
99 |
text_encoder = text_encoder.eval()
|
@@ -160,7 +162,7 @@ def inference(source_images,
|
|
160 |
num_inference_steps = 50,
|
161 |
interval = 6,
|
162 |
guidance_scale = guidance_scale,
|
163 |
-
generator = torch.Generator(device=
|
164 |
).frames[0]
|
165 |
|
166 |
return video
|
@@ -202,7 +204,7 @@ def process_video(video_file, image_file, positive_prompt, negative_prompt, guid
|
|
202 |
first_frame=transforms.ToTensor()(video[-1])
|
203 |
first_frame = first_frame*255.0
|
204 |
first_frame = rearrange(first_frame,"c w h -> w h c")
|
205 |
-
source_images=source_images
|
206 |
target_images=first_frame[None,None,...]
|
207 |
|
208 |
video+=inference(source_images, \
|
|
|
25 |
|
26 |
import subprocess
|
27 |
|
28 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
29 |
+
|
30 |
def download_model():
|
31 |
command = "huggingface-cli download --resume-download THUDM/CogVideoX-5b-I2V --local-dir ./cogvideox-5b-i2v"
|
32 |
process = subprocess.run(command, shell=True, check=True)
|
|
|
62 |
)
|
63 |
)
|
64 |
|
65 |
+
text_encoder = T5EncoderModel.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="text_encoder", torch_dtype=torch.float16)
|
66 |
+
vae = AutoencoderKLCogVideoX.from_pretrained(f"./cogvideox-5b-{key}/", subfolder="vae", torch_dtype=torch.float16).to(device)
|
67 |
+
tokenizer = T5Tokenizer.from_pretrained(f"./cogvideox-5b-{key}/tokenizer", torch_dtype=torch.float16)
|
68 |
|
69 |
|
70 |
config = OmegaConf.to_container(
|
|
|
94 |
transformer.load_state_dict(transformer_state_dict, strict=True)
|
95 |
controlnet_transformer.load_state_dict(controlnet_transformer_state_dict, strict=True)
|
96 |
|
97 |
+
transformer = transformer.half().to(device)
|
98 |
+
controlnet_transformer = controlnet_transformer.half().to(device)
|
99 |
|
100 |
vae = vae.eval()
|
101 |
text_encoder = text_encoder.eval()
|
|
|
162 |
num_inference_steps = 50,
|
163 |
interval = 6,
|
164 |
guidance_scale = guidance_scale,
|
165 |
+
generator = torch.Generator(device=device).manual_seed(random_seed)
|
166 |
).frames[0]
|
167 |
|
168 |
return video
|
|
|
204 |
first_frame=transforms.ToTensor()(video[-1])
|
205 |
first_frame = first_frame*255.0
|
206 |
first_frame = rearrange(first_frame,"c w h -> w h c")
|
207 |
+
source_images=source_images
|
208 |
target_images=first_frame[None,None,...]
|
209 |
|
210 |
video+=inference(source_images, \
|