LPX55 commited on
Commit
9bd73d1
·
verified ·
1 Parent(s): df39dda

Update app_v2v.py

Browse files
Files changed (1) hide show
  1. app_v2v.py +12 -1
app_v2v.py CHANGED
@@ -34,6 +34,7 @@ from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_pro
34
  from transformers import SiglipImageProcessor, SiglipVisionModel
35
  from diffusers_helper.clip_vision import hf_clip_vision_encode
36
  from diffusers_helper.bucket_tools import find_nearest_bucket
 
37
 
38
  parser = argparse.ArgumentParser()
39
  parser.add_argument('--share', action='store_true')
@@ -50,6 +51,8 @@ high_vram = free_mem_gb > 60
50
  print(f'Free VRAM {free_mem_gb} GB')
51
  print(f'High-VRAM Mode: {high_vram}')
52
 
 
 
53
  text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
54
  text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
55
  tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
@@ -59,7 +62,15 @@ vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanV
59
  feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
60
  image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
61
 
62
- transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePack_F1_I2V_HY_20250503', torch_dtype=torch.bfloat16).cpu()
 
 
 
 
 
 
 
 
63
 
64
  vae.eval()
65
  text_encoder.eval()
 
34
  from transformers import SiglipImageProcessor, SiglipVisionModel
35
  from diffusers_helper.clip_vision import hf_clip_vision_encode
36
  from diffusers_helper.bucket_tools import find_nearest_bucket
37
+ from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, HunyuanVideoTransformer3DModel, HunyuanVideoPipeline
38
 
39
  parser = argparse.ArgumentParser()
40
  parser.add_argument('--share', action='store_true')
 
51
  print(f'Free VRAM {free_mem_gb} GB')
52
  print(f'High-VRAM Mode: {high_vram}')
53
 
54
+
55
+
56
  text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
57
  text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
58
  tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
 
62
  feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
63
  image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()
64
 
65
+ quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
66
+ transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
67
+ "lllyasviel/FramePack_F1_I2V_HY_20250503",
68
+ subfolder="transformer",
69
+ quantization_config=quant_config,
70
+ torch_dtype=torch.bfloat16,
71
+ ).cpu()
72
+
73
+ # transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePack_F1_I2V_HY_20250503', torch_dtype=torch.bfloat16).cpu()
74
 
75
  vae.eval()
76
  text_encoder.eval()