ovi054 commited on
Commit
ed7a1b2
·
verified ·
1 Parent(s): 75f8127

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -165
app.py CHANGED
@@ -1,187 +1,104 @@
1
- import gradio as gr
2
- import spaces
3
  import torch
4
- from diffusers import HiDreamImagePipeline
5
- from transformers import PreTrainedTokenizerFast, LlamaForCausalLM
6
- import random
 
 
7
  import numpy as np
 
 
8
 
9
- # Set data type
10
- dtype = torch.bfloat16
11
- device = "cpu" # Use CPU for model loading to avoid CUDA initialization
12
 
13
- # Load tokenizer and text encoder for Llama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
- tokenizer_4 = PreTrainedTokenizerFast.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
16
- text_encoder_4 = LlamaForCausalLM.from_pretrained(
17
- "meta-llama/Meta-Llama-3.1-8B-Instruct",
18
- output_hidden_states=True,
19
- output_attentions=True,
20
- attn_implementation="eager",
21
- torch_dtype=dtype,
22
- ).to(device)
23
  except Exception as e:
24
- raise Exception(f"Failed to load Llama model: {e}. Ensure you have access to 'meta-llama/Meta-Llama-3.1-8B-Instruct' and are logged in via `huggingface-cli login`.")
25
 
26
- # Load the HiDreamImagePipeline
 
 
 
 
27
  try:
28
- pipe = HiDreamImagePipeline.from_pretrained(
29
- "HiDream-ai/HiDream-I1-Fast",
30
- tokenizer_4=tokenizer_4,
31
- text_encoder_4=text_encoder_4,
32
- torch_dtype=dtype,
33
- ).to(device)
34
- pipe.enable_model_cpu_offload() # Offload to CPU, automatically manages GPU placement
35
  except Exception as e:
36
- raise Exception(f"Failed to load HiDreamImagePipeline: {e}. Ensure you have access to 'HiDream-ai/HiDream-I1-Full'.")
 
37
 
38
- # Define maximum values
39
- MAX_SEED = np.iinfo(np.int32).max
40
- MAX_IMAGE_SIZE = 2048
41
 
42
- # Inference function with GPU access
43
  @spaces.GPU()
44
- def infer(prompt, negative_prompt="", seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=16, guidance_scale=3.5, progress=gr.Progress(track_tqdm=True)):
45
- pipe.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
46
  try:
47
- if randomize_seed:
48
- seed = random.randint(0, MAX_SEED)
49
- generator = torch.Generator("cuda").manual_seed(seed)
50
-
51
- # Generate the image; offloading handles device placement
52
- image = pipe(
53
  prompt=prompt,
54
  negative_prompt=negative_prompt,
55
  height=height,
56
  width=width,
 
57
  num_inference_steps=num_inference_steps,
58
- guidance_scale=guidance_scale,
59
- generator=generator,
60
- output_type="pil",
61
- ).images[0]
62
-
63
- return image, seed
64
  finally:
65
- # Clear GPU memory
66
- torch.cuda.empty_cache()
 
 
67
 
68
- # Define examples
69
- examples = [
70
- ["A cat holding a sign that says \"Hi-Dreams.ai\".", ""],
71
- ["A futuristic cityscape with flying cars.", "blurry, low quality"],
72
- ["A serene landscape with mountains and a lake.", ""],
73
- ]
74
 
75
- # CSS styling
76
- css = """
77
- #col-container {
78
- margin: 0 auto;
79
- max-width: 960px;
80
- }
81
- .generate-btn {
82
- background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
83
- border: none !important;
84
- color: white !important;
85
- }
86
- .generate-btn:hover {
87
- transform: translateY2px);
88
- box-shadow: 0 5px 15px rgba(0,0,0,0.2);
89
- }
90
- """
91
-
92
- # Create Gradio interface
93
- with gr.Blocks(css=css) as app:
94
- gr.HTML("<center><h1>HiDreamImage Generator</h1></center>")
95
- with gr.Column(elem_id="col-container"):
96
- with gr.Row():
97
- with gr.Column():
98
- with gr.Row():
99
- text_prompt = gr.Textbox(
100
- label="Prompt",
101
- placeholder="Enter a prompt here",
102
- lines=3,
103
- elem_id="prompt-text-input"
104
- )
105
- with gr.Row():
106
- negative_prompt = gr.Textbox(
107
- label="Negative Prompt",
108
- placeholder="Enter what to avoid (optional)",
109
- lines=2
110
- )
111
- with gr.Row():
112
- with gr.Accordion("Advanced Settings", open=False):
113
- with gr.Row():
114
- width = gr.Slider(
115
- label="Width",
116
- value=1024,
117
- minimum=64,
118
- maximum=MAX_IMAGE_SIZE,
119
- step=8
120
- )
121
- height = gr.Slider(
122
- label="Height",
123
- value=1024,
124
- minimum=64,
125
- maximum=MAX_IMAGE_SIZE,
126
- step=8
127
- )
128
- with gr.Row():
129
- steps = gr.Slider(
130
- label="Inference Steps",
131
- value=16,
132
- minimum=1,
133
- maximum=100,
134
- step=1
135
- )
136
- cfg = gr.Slider(
137
- label="Guidance Scale",
138
- value=3.5,
139
- minimum=1,
140
- maximum=20,
141
- step=0.5
142
- )
143
- with gr.Row():
144
- seed = gr.Slider(
145
- label="Seed",
146
- value=42,
147
- minimum=0,
148
- maximum=MAX_SEED,
149
- step=1
150
- )
151
- randomize_seed = gr.Checkbox(
152
- label="Randomize Seed",
153
- value=True
154
- )
155
- with gr.Row():
156
- text_button = gr.Button(
157
- "✨ Generate Image",
158
- variant='primary',
159
- elem_classes=["generate-btn"]
160
- )
161
- with gr.Column():
162
- with gr.Row():
163
- image_output = gr.Image(
164
- type="pil",
165
- label="Generated Image",
166
- elem_id="gallery"
167
- )
168
- seed_output = gr.Textbox(
169
- label="Seed Used",
170
- interactive=False
171
- )
172
-
173
- with gr.Column():
174
- gr.Examples(
175
- examples=examples,
176
- inputs=[text_prompt, negative_prompt],
177
- )
178
-
179
- # Connect the button and textbox submit to the inference function
180
- gr.on(
181
- triggers=[text_button.click, text_prompt.submit],
182
- fn=infer,
183
- inputs=[text_prompt, negative_prompt, seed, randomize_seed, width, height, steps, cfg],
184
- outputs=[image_output, seed_output]
185
- )
186
 
187
- app.launch(share=True)
 
 
 
1
  import torch
2
+ from diffusers import UniPCMultistepScheduler
3
+ from diffusers import WanPipeline, AutoencoderKLWan
4
+ from para_attn.first_block_cache.diffusers_adapters import apply_cache_on_pipe
5
+ from huggingface_hub import hf_hub_download
6
+ from PIL import Image
7
  import numpy as np
8
+ import gradio as gr
9
+ import spaces
10
 
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
12
 
13
+ # --- MODEL SETUP ---
14
+ model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
15
+ vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
16
+ pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
17
+ flow_shift = 1.0
18
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift)
19
+
20
+ pipe.to(device)
21
+
22
+ # --- LORA SETUP ---
23
+ # Define unique names for our adapters
24
+ LORA_1_NAME = "causvid_lora"
25
+ LORA_2_NAME = "person_lora"
26
+
27
+ # 1. Load the first base LoRA ONCE at startup
28
+ print("Loading first LoRA (CausVid)...")
29
+ LORA_1_REPO = "Kijai/WanVideo_comfy"
30
+ LORA_1_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32_v2.safetensors"
31
  try:
32
+ lora_1_path = hf_hub_download(repo_id=LORA_1_REPO, filename=LORA_1_FILENAME)
33
+ # The `device_map="auto"` can sometimes help in tricky environments
34
+ pipe.load_lora_weights(lora_1_path, adapter_name=LORA_1_NAME, device_map="auto")
35
+ print(f"✅ Default LoRA '{LORA_1_NAME}' loaded successfully.")
 
 
 
 
36
  except Exception as e:
37
+ print(f"⚠️ Default LoRA '{LORA_1_NAME}' could not be loaded: {e}")
38
 
39
+ # 2. Load the second hard-coded LoRA ONCE at startup
40
+ print("Loading second LoRA (Person)...")
41
+ LORA_2_REPO = "ovi054/p3r5onVid1900"
42
+ # Assuming the file is named "pytorch_lora_weights.safetensors" which is standard.
43
+ # If it has a different name, you must specify it with the `filename` argument.
44
  try:
45
+ # We load the whole repository and diffusers will find the correct file
46
+ pipe.load_lora_weights(LORA_2_REPO, adapter_name=LORA_2_NAME, device_map="auto")
47
+ print(f"✅ Second LoRA '{LORA_2_NAME}' loaded successfully.")
 
 
 
 
48
  except Exception as e:
49
+ print(f"⚠️ Second LoRA '{LORA_2_NAME}' could not be loaded: {e}")
50
+
51
 
52
+ print("Initialization complete. Gradio is starting...")
 
 
53
 
 
54
  @spaces.GPU()
55
+ def generate(prompt, negative_prompt, width=1024, height=1024, num_inference_steps=30, lora_id=None, progress=gr.Progress(track_tqdm=True)):
56
+
57
+ # --- Activate both hard-coded LoRAs for this run ---
58
+ # We set the adapters at the start of every generation to ensure the state is correct.
59
+ print("Activating both LoRAs for inference...")
60
+ # You can adjust the weights here to change the intensity of each LoRA.
61
+ # For example, [1.0, 0.8] would make the second LoRA less strong.
62
+ pipe.set_adapters([LORA_1_NAME, LORA_2_NAME], adapter_weights=[1.0, 1.0])
63
+
64
+ apply_cache_on_pipe(
65
+ pipe,
66
+ )
67
+
68
  try:
69
+ output = pipe(
 
 
 
 
 
70
  prompt=prompt,
71
  negative_prompt=negative_prompt,
72
  height=height,
73
  width=width,
74
+ num_frames=1,
75
  num_inference_steps=num_inference_steps,
76
+ guidance_scale=1.0,
77
+ )
78
+ image = output.frames[0][0]
79
+ image = (image * 255).astype(np.uint8)
80
+ return Image.fromarray(image)
 
81
  finally:
82
+ # It's good practice to disable the adapters after the run,
83
+ # although set_adapters() at the start also handles this.
84
+ print("Disabling LoRAs after run.")
85
+ pipe.disable_lora()
86
 
 
 
 
 
 
 
87
 
88
+ iface = gr.Interface(
89
+ fn=generate,
90
+ inputs=[
91
+ gr.Textbox(label="Input prompt"),
92
+ ],
93
+ additional_inputs = [
94
+ gr.Textbox(label="Negative prompt", value = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"),
95
+ gr.Slider(label="Width", minimum=480, maximum=1280, step=16, value=1024),
96
+ gr.Slider(label="Height", minimum=480, maximum=1280, step=16, value=1024),
97
+ gr.Slider(minimum=1, maximum=80, step=1, label="Inference Steps", value=10),
98
+ gr.Textbox(label="LoRA ID", visible=False), # Hiding the dynamic LoRA input for now
99
+ ],
100
+ outputs=gr.Image(label="output"),
101
+ title="Wan 2.1 Generator with Two Hard-coded LoRAs",
102
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ iface.launch()