Spaces:
Running
on
Zero
Running
on
Zero
Update app_v2v.py
Browse files- app_v2v.py +12 -1
app_v2v.py
CHANGED
@@ -34,6 +34,7 @@ from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_pro
|
|
34 |
from transformers import SiglipImageProcessor, SiglipVisionModel
|
35 |
from diffusers_helper.clip_vision import hf_clip_vision_encode
|
36 |
from diffusers_helper.bucket_tools import find_nearest_bucket
|
|
|
37 |
|
38 |
parser = argparse.ArgumentParser()
|
39 |
parser.add_argument('--share', action='store_true')
|
@@ -50,6 +51,8 @@ high_vram = free_mem_gb > 60
|
|
50 |
print(f'Free VRAM {free_mem_gb} GB')
|
51 |
print(f'High-VRAM Mode: {high_vram}')
|
52 |
|
|
|
|
|
53 |
text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
|
54 |
text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
|
55 |
tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
|
@@ -59,7 +62,15 @@ vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanV
|
|
59 |
feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
|
60 |
image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
|
61 |
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
vae.eval()
|
65 |
text_encoder.eval()
|
|
|
34 |
from transformers import SiglipImageProcessor, SiglipVisionModel
|
35 |
from diffusers_helper.clip_vision import hf_clip_vision_encode
|
36 |
from diffusers_helper.bucket_tools import find_nearest_bucket
|
37 |
+
from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, HunyuanVideoTransformer3DModel, HunyuanVideoPipeline
|
38 |
|
39 |
parser = argparse.ArgumentParser()
|
40 |
parser.add_argument('--share', action='store_true')
|
|
|
51 |
print(f'Free VRAM {free_mem_gb} GB')
|
52 |
print(f'High-VRAM Mode: {high_vram}')
|
53 |
|
54 |
+
|
55 |
+
|
56 |
text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
|
57 |
text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
|
58 |
tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
|
|
|
62 |
feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
|
63 |
image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
|
64 |
|
65 |
+
quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
|
66 |
+
transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
|
67 |
+
"lllyasviel/FramePack_F1_I2V_HY_20250503",
|
68 |
+
subfolder="transformer",
|
69 |
+
quantization_config=quant_config,
|
70 |
+
torch_dtype=torch.bfloat16,
|
71 |
+
).cpu()
|
72 |
+
|
73 |
+
# transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePack_F1_I2V_HY_20250503', torch_dtype=torch.bfloat16).cpu()
|
74 |
|
75 |
vae.eval()
|
76 |
text_encoder.eval()
|