Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,22 +9,27 @@ from huggingface_hub import hf_hub_download
|
|
9 |
import numpy as np
|
10 |
from PIL import Image
|
11 |
import random
|
12 |
-
import logging
|
13 |
-
import gc
|
14 |
|
15 |
-
# 로깅 설정
|
16 |
-
logging.basicConfig(level=logging.INFO)
|
17 |
-
logger = logging.getLogger(__name__)
|
18 |
-
|
19 |
-
# 모델 설정
|
20 |
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
|
21 |
LORA_REPO_ID = "Kijai/WanVideo_comfy"
|
22 |
LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
MOD_VALUE = 32
|
26 |
DEFAULT_H_SLIDER_VALUE = 512
|
27 |
-
DEFAULT_W_SLIDER_VALUE =
|
28 |
NEW_FORMULA_MAX_AREA = 480.0 * 832.0
|
29 |
|
30 |
SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
|
@@ -36,36 +41,166 @@ MIN_FRAMES_MODEL = 8
|
|
36 |
MAX_FRAMES_MODEL = 81
|
37 |
|
38 |
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
|
39 |
-
default_negative_prompt = "static, blurred, low quality, watermark, text"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
|
71 |
min_slider_h, max_slider_h,
|
@@ -77,22 +212,12 @@ def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
|
|
77 |
|
78 |
aspect_ratio = orig_h / orig_w
|
79 |
|
80 |
-
# Zero GPU를 위한 보수적인 계산
|
81 |
-
if hasattr(spaces, 'GPU'):
|
82 |
-
# 더 작은 max_area 사용
|
83 |
-
calculation_max_area = min(calculation_max_area, 320.0 * 320.0)
|
84 |
-
|
85 |
calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
|
86 |
calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
|
87 |
|
88 |
calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
|
89 |
calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
|
90 |
|
91 |
-
# Zero GPU 환경에서 추가 제한
|
92 |
-
if hasattr(spaces, 'GPU'):
|
93 |
-
max_slider_h = min(max_slider_h, 640)
|
94 |
-
max_slider_w = min(max_slider_w, 640)
|
95 |
-
|
96 |
new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
|
97 |
new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
|
98 |
|
@@ -117,25 +242,12 @@ def get_duration(input_image, prompt, height, width,
|
|
117 |
guidance_scale, steps,
|
118 |
seed, randomize_seed,
|
119 |
progress):
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
# Zero GPU 환경에서 더 많은 시간 할당
|
125 |
-
if steps > 4 and duration_seconds > 2:
|
126 |
-
return 90
|
127 |
-
elif steps > 4 or duration_seconds > 2:
|
128 |
-
return 80
|
129 |
-
else:
|
130 |
-
return 70
|
131 |
else:
|
132 |
-
|
133 |
-
if steps > 4 and duration_seconds > 2:
|
134 |
-
return 90
|
135 |
-
elif steps > 4 or duration_seconds > 2:
|
136 |
-
return 75
|
137 |
-
else:
|
138 |
-
return 60
|
139 |
|
140 |
@spaces.GPU(duration=get_duration)
|
141 |
def generate_video(input_image, prompt, height, width,
|
@@ -146,424 +258,148 @@ def generate_video(input_image, prompt, height, width,
|
|
146 |
|
147 |
if input_image is None:
|
148 |
raise gr.Error("Please upload an input image.")
|
149 |
-
|
150 |
-
# Zero GPU 환경에서 추가 검증
|
151 |
-
if hasattr(spaces, 'GPU'):
|
152 |
-
# 픽셀 제한
|
153 |
-
max_pixels = 409600 # 640x640
|
154 |
-
if height * width > max_pixels:
|
155 |
-
raise gr.Error(f"Resolution too high for Zero GPU. Maximum {max_pixels:,} pixels (e.g., 640×640)")
|
156 |
-
|
157 |
-
# Duration 제한
|
158 |
-
if duration_seconds > 2.5:
|
159 |
-
duration_seconds = 2.5
|
160 |
-
gr.Warning("Duration limited to 2.5s in Zero GPU environment")
|
161 |
-
|
162 |
-
# Steps 제한
|
163 |
-
if steps > 8:
|
164 |
-
steps = 8
|
165 |
-
gr.Warning("Steps limited to 8 in Zero GPU environment")
|
166 |
|
167 |
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
|
168 |
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
|
169 |
|
170 |
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
|
171 |
|
172 |
-
# Zero GPU에서 프레임 수 추가 제한
|
173 |
-
if hasattr(spaces, 'GPU'):
|
174 |
-
max_frames_zerogpu = int(2.5 * FIXED_FPS) # 2.5초
|
175 |
-
num_frames = min(num_frames, max_frames_zerogpu)
|
176 |
-
|
177 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
# 이미지 리사이즈
|
182 |
-
resized_image = input_image.resize((target_w, target_h), Image.Resampling.LANCZOS)
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
width=target_w,
|
192 |
-
num_frames=num_frames,
|
193 |
-
guidance_scale=float(guidance_scale),
|
194 |
-
num_inference_steps=int(steps),
|
195 |
-
generator=torch.Generator(device="cuda").manual_seed(current_seed)
|
196 |
-
).frames[0]
|
197 |
-
except torch.cuda.OutOfMemoryError:
|
198 |
-
gc.collect()
|
199 |
-
torch.cuda.empty_cache()
|
200 |
-
raise gr.Error("GPU out of memory. Try smaller resolution or shorter duration.")
|
201 |
-
except Exception as e:
|
202 |
-
logger.error(f"Generation failed: {e}")
|
203 |
-
raise gr.Error(f"Video generation failed: {str(e)[:100]}")
|
204 |
|
205 |
-
# 비디오 저장
|
206 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
207 |
video_path = tmpfile.name
|
208 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
209 |
-
|
210 |
-
# 메모리 정리
|
211 |
-
del output_frames_list
|
212 |
-
gc.collect()
|
213 |
-
if torch.cuda.is_available():
|
214 |
-
torch.cuda.empty_cache()
|
215 |
-
|
216 |
return video_path, current_seed
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
.
|
221 |
-
max-width: 1200px;
|
222 |
-
margin: auto;
|
223 |
-
padding: 20px;
|
224 |
-
}
|
225 |
-
|
226 |
-
.header {
|
227 |
-
text-align: center;
|
228 |
-
margin-bottom: 30px;
|
229 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
230 |
-
padding: 40px;
|
231 |
-
border-radius: 20px;
|
232 |
-
color: white;
|
233 |
-
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
|
234 |
-
position: relative;
|
235 |
-
overflow: hidden;
|
236 |
-
}
|
237 |
-
|
238 |
-
.header::before {
|
239 |
-
content: '';
|
240 |
-
position: absolute;
|
241 |
-
top: -50%;
|
242 |
-
left: -50%;
|
243 |
-
width: 200%;
|
244 |
-
height: 200%;
|
245 |
-
background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, transparent 70%);
|
246 |
-
animation: pulse 4s ease-in-out infinite;
|
247 |
-
}
|
248 |
-
|
249 |
-
@keyframes pulse {
|
250 |
-
0%, 100% { transform: scale(1); opacity: 0.5; }
|
251 |
-
50% { transform: scale(1.1); opacity: 0.8; }
|
252 |
-
}
|
253 |
-
|
254 |
-
.header h1 {
|
255 |
-
font-size: 3em;
|
256 |
-
margin-bottom: 10px;
|
257 |
-
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
258 |
-
position: relative;
|
259 |
-
z-index: 1;
|
260 |
-
}
|
261 |
-
|
262 |
-
.header p {
|
263 |
-
font-size: 1.2em;
|
264 |
-
opacity: 0.95;
|
265 |
-
position: relative;
|
266 |
-
z-index: 1;
|
267 |
-
}
|
268 |
-
|
269 |
-
.gpu-status {
|
270 |
-
position: absolute;
|
271 |
-
top: 10px;
|
272 |
-
right: 10px;
|
273 |
-
background: rgba(0,0,0,0.3);
|
274 |
-
padding: 5px 15px;
|
275 |
-
border-radius: 20px;
|
276 |
-
font-size: 0.8em;
|
277 |
-
}
|
278 |
-
|
279 |
-
.main-content {
|
280 |
-
background: rgba(255, 255, 255, 0.95);
|
281 |
-
border-radius: 20px;
|
282 |
-
padding: 30px;
|
283 |
-
box-shadow: 0 5px 20px rgba(0,0,0,0.1);
|
284 |
-
backdrop-filter: blur(10px);
|
285 |
-
}
|
286 |
-
|
287 |
-
.input-section {
|
288 |
-
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
289 |
-
padding: 25px;
|
290 |
-
border-radius: 15px;
|
291 |
-
margin-bottom: 20px;
|
292 |
-
}
|
293 |
-
|
294 |
-
.generate-btn {
|
295 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
296 |
-
color: white;
|
297 |
-
font-size: 1.3em;
|
298 |
-
padding: 15px 40px;
|
299 |
-
border-radius: 30px;
|
300 |
-
border: none;
|
301 |
-
cursor: pointer;
|
302 |
-
transition: all 0.3s ease;
|
303 |
-
box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
|
304 |
-
width: 100%;
|
305 |
-
margin-top: 20px;
|
306 |
-
}
|
307 |
-
|
308 |
-
.generate-btn:hover {
|
309 |
-
transform: translateY(-2px);
|
310 |
-
box-shadow: 0 7px 20px rgba(102, 126, 234, 0.6);
|
311 |
-
}
|
312 |
-
|
313 |
-
.generate-btn:active {
|
314 |
-
transform: translateY(0);
|
315 |
-
}
|
316 |
-
|
317 |
-
.video-output {
|
318 |
-
background: #f8f9fa;
|
319 |
-
padding: 20px;
|
320 |
-
border-radius: 15px;
|
321 |
-
text-align: center;
|
322 |
-
min-height: 400px;
|
323 |
-
display: flex;
|
324 |
-
align-items: center;
|
325 |
-
justify-content: center;
|
326 |
-
}
|
327 |
-
|
328 |
-
.accordion {
|
329 |
-
background: rgba(255, 255, 255, 0.7);
|
330 |
-
border-radius: 10px;
|
331 |
-
margin-top: 15px;
|
332 |
-
padding: 15px;
|
333 |
-
}
|
334 |
-
|
335 |
-
.slider-container {
|
336 |
-
background: rgba(255, 255, 255, 0.5);
|
337 |
-
padding: 15px;
|
338 |
-
border-radius: 10px;
|
339 |
-
margin: 10px 0;
|
340 |
-
}
|
341 |
-
|
342 |
-
body {
|
343 |
-
background: linear-gradient(-45deg, #ee7752, #e73c7e, #23a6d5, #23d5ab);
|
344 |
-
background-size: 400% 400%;
|
345 |
-
animation: gradient 15s ease infinite;
|
346 |
-
}
|
347 |
-
|
348 |
-
@keyframes gradient {
|
349 |
-
0% { background-position: 0% 50%; }
|
350 |
-
50% { background-position: 100% 50%; }
|
351 |
-
100% { background-position: 0% 50%; }
|
352 |
-
}
|
353 |
-
|
354 |
-
.warning-box {
|
355 |
-
background: rgba(255, 193, 7, 0.1);
|
356 |
-
border: 1px solid rgba(255, 193, 7, 0.3);
|
357 |
-
border-radius: 10px;
|
358 |
-
padding: 15px;
|
359 |
-
margin: 10px 0;
|
360 |
-
color: #856404;
|
361 |
-
font-size: 0.9em;
|
362 |
-
}
|
363 |
-
|
364 |
-
.info-box {
|
365 |
-
background: rgba(52, 152, 219, 0.1);
|
366 |
-
border: 1px solid rgba(52, 152, 219, 0.3);
|
367 |
-
border-radius: 10px;
|
368 |
-
padding: 15px;
|
369 |
-
margin: 10px 0;
|
370 |
-
color: #2c5282;
|
371 |
-
font-size: 0.9em;
|
372 |
-
}
|
373 |
-
|
374 |
-
.footer {
|
375 |
-
text-align: center;
|
376 |
-
margin-top: 30px;
|
377 |
-
color: #666;
|
378 |
-
font-size: 0.9em;
|
379 |
-
}
|
380 |
-
"""
|
381 |
-
|
382 |
-
# Gradio UI (기존 구조 유지)
|
383 |
-
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
384 |
-
with gr.Column(elem_classes="container"):
|
385 |
-
# Header with GPU status
|
386 |
-
gr.HTML("""
|
387 |
-
<div class="header">
|
388 |
-
<h1>🎬 AI Video Magic Studio</h1>
|
389 |
-
<p>Transform your images into captivating videos with Wan 2.1 + CausVid LoRA</p>
|
390 |
-
<div class="gpu-status">🖥️ Zero GPU Optimized</div>
|
391 |
-
</div>
|
392 |
-
""")
|
393 |
-
|
394 |
-
# GPU 메모리 경고
|
395 |
-
if hasattr(spaces, 'GPU'):
|
396 |
-
gr.HTML("""
|
397 |
-
<div class="warning-box">
|
398 |
-
<strong>💡 Zero GPU Performance Tips:</strong>
|
399 |
-
<ul style="margin: 5px 0; padding-left: 20px;">
|
400 |
-
<li>Maximum duration: 2.5 seconds</li>
|
401 |
-
<li>Maximum resolution: 640×640 pixels</li>
|
402 |
-
<li>Recommended: 512×512 at 2 seconds</li>
|
403 |
-
<li>Use 4-6 steps for optimal speed/quality balance</li>
|
404 |
-
<li>Processing time: ~60-90 seconds</li>
|
405 |
-
</ul>
|
406 |
-
</div>
|
407 |
-
""")
|
408 |
-
|
409 |
-
# 정보 박스
|
410 |
-
gr.HTML("""
|
411 |
-
<div class="info-box">
|
412 |
-
<strong>🎯 Quick Start Guide:</strong>
|
413 |
-
<ol style="margin: 5px 0; padding-left: 20px;">
|
414 |
-
<li>Upload your image - AI will calculate optimal dimensions</li>
|
415 |
-
<li>Enter a creative prompt or use the default</li>
|
416 |
-
<li>Adjust duration (2s recommended for best results)</li>
|
417 |
-
<li>Click Generate and wait for completion</li>
|
418 |
-
</ol>
|
419 |
-
</div>
|
420 |
-
""")
|
421 |
|
422 |
-
with gr.Row(
|
423 |
-
with gr.Column(
|
424 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
426 |
-
with gr.
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
)
|
432 |
-
|
433 |
-
prompt_input = gr.Textbox(
|
434 |
-
label="✨ Animation Prompt",
|
435 |
-
value=default_prompt_i2v,
|
436 |
-
placeholder="Describe how you want your image to move...",
|
437 |
-
lines=2
|
438 |
)
|
439 |
-
|
440 |
-
|
441 |
-
minimum=
|
442 |
-
maximum=
|
443 |
-
step=
|
444 |
-
value=
|
445 |
-
|
446 |
-
elem_classes="slider-container"
|
447 |
)
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
value=default_negative_prompt,
|
453 |
-
lines=3
|
454 |
)
|
455 |
-
|
456 |
with gr.Row():
|
457 |
-
|
458 |
-
minimum=
|
459 |
-
maximum=
|
460 |
-
step=
|
461 |
-
value=
|
462 |
-
label="
|
463 |
)
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
height_slider = gr.Slider(
|
471 |
-
minimum=SLIDER_MIN_H,
|
472 |
-
maximum=SLIDER_MAX_H if not hasattr(spaces, 'GPU') else 640,
|
473 |
-
step=MOD_VALUE,
|
474 |
-
value=DEFAULT_H_SLIDER_VALUE,
|
475 |
-
label=f"📏 Height (multiple of {MOD_VALUE})"
|
476 |
)
|
477 |
-
width_slider = gr.Slider(
|
478 |
-
minimum=SLIDER_MIN_W,
|
479 |
-
maximum=SLIDER_MAX_W if not hasattr(spaces, 'GPU') else 640,
|
480 |
-
step=MOD_VALUE,
|
481 |
-
value=DEFAULT_W_SLIDER_VALUE,
|
482 |
-
label=f"📐 Width (multiple of {MOD_VALUE})"
|
483 |
-
)
|
484 |
-
|
485 |
steps_slider = gr.Slider(
|
486 |
-
minimum=1,
|
487 |
-
maximum=30
|
488 |
-
step=1,
|
489 |
-
value=4,
|
490 |
-
label="
|
491 |
-
)
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
label="🎯 Guidance Scale",
|
499 |
visible=False
|
500 |
)
|
501 |
-
|
502 |
-
|
503 |
-
"🎬 Generate Video",
|
504 |
variant="primary",
|
505 |
-
elem_classes="generate-btn"
|
506 |
)
|
507 |
-
|
508 |
-
with gr.Column(
|
509 |
-
gr.Markdown("### 🎥 Generated Video")
|
510 |
video_output = gr.Video(
|
511 |
-
label="",
|
512 |
-
autoplay=True,
|
513 |
-
|
514 |
)
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
["forg.jpg", "the frog jumps around", 448, 576],
|
527 |
-
],
|
528 |
-
inputs=[input_image, prompt_input, height_slider, width_slider],
|
529 |
-
outputs=[video_output, seed],
|
530 |
-
fn=generate_video,
|
531 |
-
cache_examples=False # 캐시 비활성화로 메모리 절약
|
532 |
)
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
inputs=[input_image, height_slider, width_slider],
|
554 |
-
outputs=[height_slider, width_slider]
|
555 |
-
)
|
556 |
-
|
557 |
-
generate_btn.click(
|
558 |
-
fn=generate_video,
|
559 |
-
inputs=[
|
560 |
-
input_image, prompt_input, height_slider, width_slider,
|
561 |
-
negative_prompt, duration_input, guidance_scale,
|
562 |
-
steps_slider, seed, randomize_seed
|
563 |
-
],
|
564 |
-
outputs=[video_output, seed]
|
565 |
-
)
|
566 |
|
567 |
if __name__ == "__main__":
|
568 |
-
demo.queue().launch()
|
569 |
-
|
|
|
9 |
import numpy as np
|
10 |
from PIL import Image
|
11 |
import random
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
|
14 |
LORA_REPO_ID = "Kijai/WanVideo_comfy"
|
15 |
LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
|
16 |
|
17 |
+
image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
|
18 |
+
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
|
19 |
+
pipe = WanImageToVideoPipeline.from_pretrained(
|
20 |
+
MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
|
21 |
+
)
|
22 |
+
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
|
23 |
+
pipe.to("cuda")
|
24 |
+
|
25 |
+
causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
|
26 |
+
pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
|
27 |
+
pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
|
28 |
+
pipe.fuse_lora()
|
29 |
+
|
30 |
MOD_VALUE = 32
|
31 |
DEFAULT_H_SLIDER_VALUE = 512
|
32 |
+
DEFAULT_W_SLIDER_VALUE = 896
|
33 |
NEW_FORMULA_MAX_AREA = 480.0 * 832.0
|
34 |
|
35 |
SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
|
|
|
41 |
MAX_FRAMES_MODEL = 81
|
42 |
|
43 |
default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
|
44 |
+
default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
|
45 |
+
|
46 |
+
# CSS 스타일 정의
|
47 |
+
custom_css = """
|
48 |
+
/* 전체 배경 그라디언트 */
|
49 |
+
.gradio-container {
|
50 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
51 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #f5576c 75%, #fa709a 100%) !important;
|
52 |
+
background-size: 400% 400% !important;
|
53 |
+
animation: gradientShift 15s ease infinite !important;
|
54 |
+
}
|
55 |
|
56 |
+
@keyframes gradientShift {
|
57 |
+
0% { background-position: 0% 50%; }
|
58 |
+
50% { background-position: 100% 50%; }
|
59 |
+
100% { background-position: 0% 50%; }
|
60 |
+
}
|
61 |
+
|
62 |
+
/* 메인 컨테이너 스타일 */
|
63 |
+
.main-container {
|
64 |
+
backdrop-filter: blur(10px);
|
65 |
+
background: rgba(255, 255, 255, 0.1) !important;
|
66 |
+
border-radius: 20px !important;
|
67 |
+
padding: 30px !important;
|
68 |
+
box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
|
69 |
+
border: 1px solid rgba(255, 255, 255, 0.18) !important;
|
70 |
+
}
|
71 |
+
|
72 |
+
/* 헤더 스타일 */
|
73 |
+
h1 {
|
74 |
+
background: linear-gradient(45deg, #ffffff, #f0f0f0) !important;
|
75 |
+
-webkit-background-clip: text !important;
|
76 |
+
-webkit-text-fill-color: transparent !important;
|
77 |
+
background-clip: text !important;
|
78 |
+
font-weight: 800 !important;
|
79 |
+
font-size: 2.5rem !important;
|
80 |
+
text-align: center !important;
|
81 |
+
margin-bottom: 2rem !important;
|
82 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.1) !important;
|
83 |
+
}
|
84 |
+
|
85 |
+
/* 컴포넌트 컨테이너 스타일 */
|
86 |
+
.input-container, .output-container {
|
87 |
+
background: rgba(255, 255, 255, 0.08) !important;
|
88 |
+
border-radius: 15px !important;
|
89 |
+
padding: 20px !important;
|
90 |
+
margin: 10px 0 !important;
|
91 |
+
backdrop-filter: blur(5px) !important;
|
92 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
93 |
+
}
|
94 |
+
|
95 |
+
/* 입력 필드 스타일 */
|
96 |
+
input, textarea, .gr-box {
|
97 |
+
background: rgba(255, 255, 255, 0.9) !important;
|
98 |
+
border: 1px solid rgba(255, 255, 255, 0.3) !important;
|
99 |
+
border-radius: 10px !important;
|
100 |
+
color: #333 !important;
|
101 |
+
transition: all 0.3s ease !important;
|
102 |
+
}
|
103 |
+
|
104 |
+
input:focus, textarea:focus {
|
105 |
+
background: rgba(255, 255, 255, 1) !important;
|
106 |
+
border-color: #667eea !important;
|
107 |
+
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
|
108 |
+
}
|
109 |
+
|
110 |
+
/* 버튼 스타일 */
|
111 |
+
.generate-btn {
|
112 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
113 |
+
color: white !important;
|
114 |
+
font-weight: 600 !important;
|
115 |
+
font-size: 1.1rem !important;
|
116 |
+
padding: 12px 30px !important;
|
117 |
+
border-radius: 50px !important;
|
118 |
+
border: none !important;
|
119 |
+
cursor: pointer !important;
|
120 |
+
transition: all 0.3s ease !important;
|
121 |
+
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
|
122 |
+
}
|
123 |
|
124 |
+
.generate-btn:hover {
|
125 |
+
transform: translateY(-2px) !important;
|
126 |
+
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;
|
127 |
+
}
|
128 |
+
|
129 |
+
/* 슬라이더 스타일 */
|
130 |
+
input[type="range"] {
|
131 |
+
background: transparent !important;
|
132 |
+
}
|
133 |
+
|
134 |
+
input[type="range"]::-webkit-slider-track {
|
135 |
+
background: rgba(255, 255, 255, 0.3) !important;
|
136 |
+
border-radius: 5px !important;
|
137 |
+
height: 6px !important;
|
138 |
+
}
|
139 |
+
|
140 |
+
input[type="range"]::-webkit-slider-thumb {
|
141 |
+
background: linear-gradient(135deg, #667eea, #764ba2) !important;
|
142 |
+
border: 2px solid white !important;
|
143 |
+
border-radius: 50% !important;
|
144 |
+
cursor: pointer !important;
|
145 |
+
width: 18px !important;
|
146 |
+
height: 18px !important;
|
147 |
+
-webkit-appearance: none !important;
|
148 |
+
}
|
149 |
+
|
150 |
+
/* Accordion 스타일 */
|
151 |
+
.gr-accordion {
|
152 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
153 |
+
border-radius: 10px !important;
|
154 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
155 |
+
margin: 15px 0 !important;
|
156 |
+
}
|
157 |
+
|
158 |
+
/* 라벨 스타일 */
|
159 |
+
label {
|
160 |
+
color: #ffffff !important;
|
161 |
+
font-weight: 500 !important;
|
162 |
+
font-size: 0.95rem !important;
|
163 |
+
margin-bottom: 5px !important;
|
164 |
+
}
|
165 |
+
|
166 |
+
/* 이미지 업로드 영역 */
|
167 |
+
.image-upload {
|
168 |
+
border: 2px dashed rgba(255, 255, 255, 0.3) !important;
|
169 |
+
border-radius: 15px !important;
|
170 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
171 |
+
transition: all 0.3s ease !important;
|
172 |
+
}
|
173 |
+
|
174 |
+
.image-upload:hover {
|
175 |
+
border-color: rgba(255, 255, 255, 0.5) !important;
|
176 |
+
background: rgba(255, 255, 255, 0.1) !important;
|
177 |
+
}
|
178 |
+
|
179 |
+
/* 비디오 출력 영역 */
|
180 |
+
video {
|
181 |
+
border-radius: 15px !important;
|
182 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3) !important;
|
183 |
+
}
|
184 |
+
|
185 |
+
/* Examples 섹션 스타일 */
|
186 |
+
.gr-examples {
|
187 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
188 |
+
border-radius: 15px !important;
|
189 |
+
padding: 20px !important;
|
190 |
+
margin-top: 20px !important;
|
191 |
+
}
|
192 |
+
|
193 |
+
/* Checkbox 스타일 */
|
194 |
+
input[type="checkbox"] {
|
195 |
+
accent-color: #667eea !important;
|
196 |
+
}
|
197 |
+
|
198 |
+
/* 반응형 애니메이션 */
|
199 |
+
@media (max-width: 768px) {
|
200 |
+
h1 { font-size: 2rem !important; }
|
201 |
+
.main-container { padding: 20px !important; }
|
202 |
+
}
|
203 |
+
"""
|
204 |
|
205 |
def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
|
206 |
min_slider_h, max_slider_h,
|
|
|
212 |
|
213 |
aspect_ratio = orig_h / orig_w
|
214 |
|
|
|
|
|
|
|
|
|
|
|
215 |
calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
|
216 |
calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
|
217 |
|
218 |
calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
|
219 |
calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
|
220 |
|
|
|
|
|
|
|
|
|
|
|
221 |
new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
|
222 |
new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
|
223 |
|
|
|
242 |
guidance_scale, steps,
|
243 |
seed, randomize_seed,
|
244 |
progress):
|
245 |
+
if steps > 4 and duration_seconds > 2:
|
246 |
+
return 90
|
247 |
+
elif steps > 4 or duration_seconds > 2:
|
248 |
+
return 75
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
else:
|
250 |
+
return 60
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
@spaces.GPU(duration=get_duration)
|
253 |
def generate_video(input_image, prompt, height, width,
|
|
|
258 |
|
259 |
if input_image is None:
|
260 |
raise gr.Error("Please upload an input image.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
|
263 |
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
|
264 |
|
265 |
num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
|
266 |
|
|
|
|
|
|
|
|
|
|
|
267 |
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
|
268 |
|
269 |
+
resized_image = input_image.resize((target_w, target_h))
|
|
|
|
|
|
|
270 |
|
271 |
+
with torch.inference_mode():
|
272 |
+
output_frames_list = pipe(
|
273 |
+
image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
|
274 |
+
height=target_h, width=target_w, num_frames=num_frames,
|
275 |
+
guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
|
276 |
+
generator=torch.Generator(device="cuda").manual_seed(current_seed)
|
277 |
+
).frames[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
|
|
279 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
|
280 |
video_path = tmpfile.name
|
281 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
return video_path, current_seed
|
283 |
|
284 |
+
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
|
285 |
+
with gr.Column(elem_classes=["main-container"]):
|
286 |
+
gr.Markdown("# ✨ Fast 4 steps Wan 2.1 I2V (14B) with CausVid LoRA")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
+
with gr.Row():
|
289 |
+
with gr.Column(elem_classes=["input-container"]):
|
290 |
+
input_image_component = gr.Image(
|
291 |
+
type="pil",
|
292 |
+
label="🖼️ Input Image (auto-resized to target H/W)",
|
293 |
+
elem_classes=["image-upload"]
|
294 |
+
)
|
295 |
+
prompt_input = gr.Textbox(
|
296 |
+
label="✏️ Prompt",
|
297 |
+
value=default_prompt_i2v,
|
298 |
+
lines=2
|
299 |
+
)
|
300 |
+
duration_seconds_input = gr.Slider(
|
301 |
+
minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
|
302 |
+
maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
|
303 |
+
step=0.1,
|
304 |
+
value=2,
|
305 |
+
label="⏱️ Duration (seconds)",
|
306 |
+
info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
|
307 |
+
)
|
308 |
|
309 |
+
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
310 |
+
negative_prompt_input = gr.Textbox(
|
311 |
+
label="❌ Negative Prompt",
|
312 |
+
value=default_negative_prompt,
|
313 |
+
lines=3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
)
|
315 |
+
seed_input = gr.Slider(
|
316 |
+
label="🎲 Seed",
|
317 |
+
minimum=0,
|
318 |
+
maximum=MAX_SEED,
|
319 |
+
step=1,
|
320 |
+
value=42,
|
321 |
+
interactive=True
|
|
|
322 |
)
|
323 |
+
randomize_seed_checkbox = gr.Checkbox(
|
324 |
+
label="🔀 Randomize seed",
|
325 |
+
value=True,
|
326 |
+
interactive=True
|
|
|
|
|
327 |
)
|
|
|
328 |
with gr.Row():
|
329 |
+
height_input = gr.Slider(
|
330 |
+
minimum=SLIDER_MIN_H,
|
331 |
+
maximum=SLIDER_MAX_H,
|
332 |
+
step=MOD_VALUE,
|
333 |
+
value=DEFAULT_H_SLIDER_VALUE,
|
334 |
+
label=f"📏 Output Height (multiple of {MOD_VALUE})"
|
335 |
)
|
336 |
+
width_input = gr.Slider(
|
337 |
+
minimum=SLIDER_MIN_W,
|
338 |
+
maximum=SLIDER_MAX_W,
|
339 |
+
step=MOD_VALUE,
|
340 |
+
value=DEFAULT_W_SLIDER_VALUE,
|
341 |
+
label=f"📐 Output Width (multiple of {MOD_VALUE})"
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
steps_slider = gr.Slider(
|
344 |
+
minimum=1,
|
345 |
+
maximum=30,
|
346 |
+
step=1,
|
347 |
+
value=4,
|
348 |
+
label="🚀 Inference Steps"
|
349 |
+
)
|
350 |
+
guidance_scale_input = gr.Slider(
|
351 |
+
minimum=0.0,
|
352 |
+
maximum=20.0,
|
353 |
+
step=0.5,
|
354 |
+
value=1.0,
|
355 |
+
label="🎯 Guidance Scale",
|
|
|
356 |
visible=False
|
357 |
)
|
358 |
+
|
359 |
+
generate_button = gr.Button(
|
360 |
+
"🎬 Generate Video",
|
361 |
variant="primary",
|
362 |
+
elem_classes=["generate-btn"]
|
363 |
)
|
364 |
+
|
365 |
+
with gr.Column(elem_classes=["output-container"]):
|
|
|
366 |
video_output = gr.Video(
|
367 |
+
label="🎥 Generated Video",
|
368 |
+
autoplay=True,
|
369 |
+
interactive=False
|
370 |
)
|
371 |
+
|
372 |
+
input_image_component.upload(
|
373 |
+
fn=handle_image_upload_for_dims_wan,
|
374 |
+
inputs=[input_image_component, height_input, width_input],
|
375 |
+
outputs=[height_input, width_input]
|
376 |
+
)
|
377 |
|
378 |
+
input_image_component.clear(
|
379 |
+
fn=handle_image_upload_for_dims_wan,
|
380 |
+
inputs=[input_image_component, height_input, width_input],
|
381 |
+
outputs=[height_input, width_input]
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
)
|
383 |
+
|
384 |
+
ui_inputs = [
|
385 |
+
input_image_component, prompt_input, height_input, width_input,
|
386 |
+
negative_prompt_input, duration_seconds_input,
|
387 |
+
guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
|
388 |
+
]
|
389 |
+
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
390 |
+
|
391 |
+
with gr.Column():
|
392 |
+
gr.Examples(
|
393 |
+
examples=[
|
394 |
+
["peng.png", "a penguin playfully dancing in the snow, Antarctica", 896, 512],
|
395 |
+
["forg.jpg", "the frog jumps around", 448, 832],
|
396 |
+
],
|
397 |
+
inputs=[input_image_component, prompt_input, height_input, width_input],
|
398 |
+
outputs=[video_output, seed_input],
|
399 |
+
fn=generate_video,
|
400 |
+
cache_examples="lazy",
|
401 |
+
label="🌟 Example Gallery"
|
402 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
|
404 |
if __name__ == "__main__":
|
405 |
+
demo.queue().launch()
|
|