seawolf2357 commited on
Commit
afae83c
·
verified ·
1 Parent(s): 1b3f0f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +288 -452
app.py CHANGED
@@ -9,22 +9,27 @@ from huggingface_hub import hf_hub_download
9
  import numpy as np
10
  from PIL import Image
11
  import random
12
- import logging
13
- import gc
14
 
15
- # 로깅 설정
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- # 모델 설정
20
  MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
21
  LORA_REPO_ID = "Kijai/WanVideo_comfy"
22
  LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
23
 
24
- # 파라미터 설정
 
 
 
 
 
 
 
 
 
 
 
 
25
  MOD_VALUE = 32
26
  DEFAULT_H_SLIDER_VALUE = 512
27
- DEFAULT_W_SLIDER_VALUE = 512 # Zero GPU를 위해 정사각형 기본값
28
  NEW_FORMULA_MAX_AREA = 480.0 * 832.0
29
 
30
  SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
@@ -36,36 +41,166 @@ MIN_FRAMES_MODEL = 8
36
  MAX_FRAMES_MODEL = 81
37
 
38
  default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
39
- default_negative_prompt = "static, blurred, low quality, watermark, text"
 
 
 
 
 
 
 
 
 
 
40
 
41
- # 모델 글로벌 로딩
42
- logger.info("Loading model components...")
43
- image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
44
- vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
45
- pipe = WanImageToVideoPipeline.from_pretrained(
46
- MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
47
- )
48
- pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
49
- pipe.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # LoRA 로딩
52
- try:
53
- causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
54
- pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
55
- pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
56
- pipe.fuse_lora()
57
- logger.info("LoRA loaded successfully")
58
- except Exception as e:
59
- logger.warning(f"LoRA loading failed: {e}")
60
-
61
- # 메모리 최적화 - WanImageToVideoPipeline에서 지원하는 메서드만 사용
62
- try:
63
- pipe.enable_model_cpu_offload()
64
- logger.info("CPU offload enabled")
65
- except:
66
- logger.info("CPU offload not available")
67
-
68
- logger.info("Model loaded and ready")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
71
  min_slider_h, max_slider_h,
@@ -77,22 +212,12 @@ def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
77
 
78
  aspect_ratio = orig_h / orig_w
79
 
80
- # Zero GPU를 위한 보수적인 계산
81
- if hasattr(spaces, 'GPU'):
82
- # 더 작은 max_area 사용
83
- calculation_max_area = min(calculation_max_area, 320.0 * 320.0)
84
-
85
  calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
86
  calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
87
 
88
  calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
89
  calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
90
 
91
- # Zero GPU 환경에서 추가 제한
92
- if hasattr(spaces, 'GPU'):
93
- max_slider_h = min(max_slider_h, 640)
94
- max_slider_w = min(max_slider_w, 640)
95
-
96
  new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
97
  new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
98
 
@@ -117,25 +242,12 @@ def get_duration(input_image, prompt, height, width,
117
  guidance_scale, steps,
118
  seed, randomize_seed,
119
  progress):
120
- # Zero GPU를 위한 보수적인 시간 할당
121
- base_time = 60
122
-
123
- if hasattr(spaces, 'GPU'):
124
- # Zero GPU 환경에서 더 많은 시간 할당
125
- if steps > 4 and duration_seconds > 2:
126
- return 90
127
- elif steps > 4 or duration_seconds > 2:
128
- return 80
129
- else:
130
- return 70
131
  else:
132
- # 일반 GPU 환경
133
- if steps > 4 and duration_seconds > 2:
134
- return 90
135
- elif steps > 4 or duration_seconds > 2:
136
- return 75
137
- else:
138
- return 60
139
 
140
  @spaces.GPU(duration=get_duration)
141
  def generate_video(input_image, prompt, height, width,
@@ -146,424 +258,148 @@ def generate_video(input_image, prompt, height, width,
146
 
147
  if input_image is None:
148
  raise gr.Error("Please upload an input image.")
149
-
150
- # Zero GPU 환경에서 추가 검증
151
- if hasattr(spaces, 'GPU'):
152
- # 픽셀 제한
153
- max_pixels = 409600 # 640x640
154
- if height * width > max_pixels:
155
- raise gr.Error(f"Resolution too high for Zero GPU. Maximum {max_pixels:,} pixels (e.g., 640×640)")
156
-
157
- # Duration 제한
158
- if duration_seconds > 2.5:
159
- duration_seconds = 2.5
160
- gr.Warning("Duration limited to 2.5s in Zero GPU environment")
161
-
162
- # Steps 제한
163
- if steps > 8:
164
- steps = 8
165
- gr.Warning("Steps limited to 8 in Zero GPU environment")
166
 
167
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
168
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
169
 
170
  num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
171
 
172
- # Zero GPU에서 프레임 수 추가 제한
173
- if hasattr(spaces, 'GPU'):
174
- max_frames_zerogpu = int(2.5 * FIXED_FPS) # 2.5초
175
- num_frames = min(num_frames, max_frames_zerogpu)
176
-
177
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
178
 
179
- logger.info(f"Generating video: {target_h}x{target_w}, {num_frames} frames, seed={current_seed}")
180
-
181
- # 이미지 리사이즈
182
- resized_image = input_image.resize((target_w, target_h), Image.Resampling.LANCZOS)
183
 
184
- try:
185
- with torch.inference_mode():
186
- output_frames_list = pipe(
187
- image=resized_image,
188
- prompt=prompt,
189
- negative_prompt=negative_prompt,
190
- height=target_h,
191
- width=target_w,
192
- num_frames=num_frames,
193
- guidance_scale=float(guidance_scale),
194
- num_inference_steps=int(steps),
195
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
196
- ).frames[0]
197
- except torch.cuda.OutOfMemoryError:
198
- gc.collect()
199
- torch.cuda.empty_cache()
200
- raise gr.Error("GPU out of memory. Try smaller resolution or shorter duration.")
201
- except Exception as e:
202
- logger.error(f"Generation failed: {e}")
203
- raise gr.Error(f"Video generation failed: {str(e)[:100]}")
204
 
205
- # 비디오 저장
206
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
207
  video_path = tmpfile.name
208
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
209
-
210
- # 메모리 정리
211
- del output_frames_list
212
- gc.collect()
213
- if torch.cuda.is_available():
214
- torch.cuda.empty_cache()
215
-
216
  return video_path, current_seed
217
 
218
- # CSS 스타일 (기존 UI 유지)
219
- css = """
220
- .container {
221
- max-width: 1200px;
222
- margin: auto;
223
- padding: 20px;
224
- }
225
-
226
- .header {
227
- text-align: center;
228
- margin-bottom: 30px;
229
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
230
- padding: 40px;
231
- border-radius: 20px;
232
- color: white;
233
- box-shadow: 0 10px 30px rgba(0,0,0,0.2);
234
- position: relative;
235
- overflow: hidden;
236
- }
237
-
238
- .header::before {
239
- content: '';
240
- position: absolute;
241
- top: -50%;
242
- left: -50%;
243
- width: 200%;
244
- height: 200%;
245
- background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, transparent 70%);
246
- animation: pulse 4s ease-in-out infinite;
247
- }
248
-
249
- @keyframes pulse {
250
- 0%, 100% { transform: scale(1); opacity: 0.5; }
251
- 50% { transform: scale(1.1); opacity: 0.8; }
252
- }
253
-
254
- .header h1 {
255
- font-size: 3em;
256
- margin-bottom: 10px;
257
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
258
- position: relative;
259
- z-index: 1;
260
- }
261
-
262
- .header p {
263
- font-size: 1.2em;
264
- opacity: 0.95;
265
- position: relative;
266
- z-index: 1;
267
- }
268
-
269
- .gpu-status {
270
- position: absolute;
271
- top: 10px;
272
- right: 10px;
273
- background: rgba(0,0,0,0.3);
274
- padding: 5px 15px;
275
- border-radius: 20px;
276
- font-size: 0.8em;
277
- }
278
-
279
- .main-content {
280
- background: rgba(255, 255, 255, 0.95);
281
- border-radius: 20px;
282
- padding: 30px;
283
- box-shadow: 0 5px 20px rgba(0,0,0,0.1);
284
- backdrop-filter: blur(10px);
285
- }
286
-
287
- .input-section {
288
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
289
- padding: 25px;
290
- border-radius: 15px;
291
- margin-bottom: 20px;
292
- }
293
-
294
- .generate-btn {
295
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
296
- color: white;
297
- font-size: 1.3em;
298
- padding: 15px 40px;
299
- border-radius: 30px;
300
- border: none;
301
- cursor: pointer;
302
- transition: all 0.3s ease;
303
- box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
304
- width: 100%;
305
- margin-top: 20px;
306
- }
307
-
308
- .generate-btn:hover {
309
- transform: translateY(-2px);
310
- box-shadow: 0 7px 20px rgba(102, 126, 234, 0.6);
311
- }
312
-
313
- .generate-btn:active {
314
- transform: translateY(0);
315
- }
316
-
317
- .video-output {
318
- background: #f8f9fa;
319
- padding: 20px;
320
- border-radius: 15px;
321
- text-align: center;
322
- min-height: 400px;
323
- display: flex;
324
- align-items: center;
325
- justify-content: center;
326
- }
327
-
328
- .accordion {
329
- background: rgba(255, 255, 255, 0.7);
330
- border-radius: 10px;
331
- margin-top: 15px;
332
- padding: 15px;
333
- }
334
-
335
- .slider-container {
336
- background: rgba(255, 255, 255, 0.5);
337
- padding: 15px;
338
- border-radius: 10px;
339
- margin: 10px 0;
340
- }
341
-
342
- body {
343
- background: linear-gradient(-45deg, #ee7752, #e73c7e, #23a6d5, #23d5ab);
344
- background-size: 400% 400%;
345
- animation: gradient 15s ease infinite;
346
- }
347
-
348
- @keyframes gradient {
349
- 0% { background-position: 0% 50%; }
350
- 50% { background-position: 100% 50%; }
351
- 100% { background-position: 0% 50%; }
352
- }
353
-
354
- .warning-box {
355
- background: rgba(255, 193, 7, 0.1);
356
- border: 1px solid rgba(255, 193, 7, 0.3);
357
- border-radius: 10px;
358
- padding: 15px;
359
- margin: 10px 0;
360
- color: #856404;
361
- font-size: 0.9em;
362
- }
363
-
364
- .info-box {
365
- background: rgba(52, 152, 219, 0.1);
366
- border: 1px solid rgba(52, 152, 219, 0.3);
367
- border-radius: 10px;
368
- padding: 15px;
369
- margin: 10px 0;
370
- color: #2c5282;
371
- font-size: 0.9em;
372
- }
373
-
374
- .footer {
375
- text-align: center;
376
- margin-top: 30px;
377
- color: #666;
378
- font-size: 0.9em;
379
- }
380
- """
381
-
382
- # Gradio UI (기존 구조 유지)
383
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
384
- with gr.Column(elem_classes="container"):
385
- # Header with GPU status
386
- gr.HTML("""
387
- <div class="header">
388
- <h1>🎬 AI Video Magic Studio</h1>
389
- <p>Transform your images into captivating videos with Wan 2.1 + CausVid LoRA</p>
390
- <div class="gpu-status">🖥️ Zero GPU Optimized</div>
391
- </div>
392
- """)
393
-
394
- # GPU 메모리 경고
395
- if hasattr(spaces, 'GPU'):
396
- gr.HTML("""
397
- <div class="warning-box">
398
- <strong>💡 Zero GPU Performance Tips:</strong>
399
- <ul style="margin: 5px 0; padding-left: 20px;">
400
- <li>Maximum duration: 2.5 seconds</li>
401
- <li>Maximum resolution: 640×640 pixels</li>
402
- <li>Recommended: 512×512 at 2 seconds</li>
403
- <li>Use 4-6 steps for optimal speed/quality balance</li>
404
- <li>Processing time: ~60-90 seconds</li>
405
- </ul>
406
- </div>
407
- """)
408
-
409
- # 정보 박스
410
- gr.HTML("""
411
- <div class="info-box">
412
- <strong>🎯 Quick Start Guide:</strong>
413
- <ol style="margin: 5px 0; padding-left: 20px;">
414
- <li>Upload your image - AI will calculate optimal dimensions</li>
415
- <li>Enter a creative prompt or use the default</li>
416
- <li>Adjust duration (2s recommended for best results)</li>
417
- <li>Click Generate and wait for completion</li>
418
- </ol>
419
- </div>
420
- """)
421
 
422
- with gr.Row(elem_classes="main-content"):
423
- with gr.Column(scale=1):
424
- gr.Markdown("### 📸 Input Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
- with gr.Column(elem_classes="input-section"):
427
- input_image = gr.Image(
428
- type="pil",
429
- label="🖼️ Upload Your Image",
430
- elem_classes="image-upload"
431
- )
432
-
433
- prompt_input = gr.Textbox(
434
- label="✨ Animation Prompt",
435
- value=default_prompt_i2v,
436
- placeholder="Describe how you want your image to move...",
437
- lines=2
438
  )
439
-
440
- duration_input = gr.Slider(
441
- minimum=round(MIN_FRAMES_MODEL/FIXED_FPS, 1),
442
- maximum=round(MAX_FRAMES_MODEL/FIXED_FPS, 1) if not hasattr(spaces, 'GPU') else 2.5,
443
- step=0.1,
444
- value=2,
445
- label=f"⏱️ Video Duration (seconds) - Clamped to {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps",
446
- elem_classes="slider-container"
447
  )
448
-
449
- with gr.Accordion("🎛️ Advanced Settings", open=False, elem_classes="accordion"):
450
- negative_prompt = gr.Textbox(
451
- label="🚫 Negative Prompt",
452
- value=default_negative_prompt,
453
- lines=3
454
  )
455
-
456
  with gr.Row():
457
- seed = gr.Slider(
458
- minimum=0,
459
- maximum=MAX_SEED,
460
- step=1,
461
- value=42,
462
- label="🎲 Seed"
463
  )
464
- randomize_seed = gr.Checkbox(
465
- label="🔀 Randomize",
466
- value=True
467
- )
468
-
469
- with gr.Row():
470
- height_slider = gr.Slider(
471
- minimum=SLIDER_MIN_H,
472
- maximum=SLIDER_MAX_H if not hasattr(spaces, 'GPU') else 640,
473
- step=MOD_VALUE,
474
- value=DEFAULT_H_SLIDER_VALUE,
475
- label=f"📏 Height (multiple of {MOD_VALUE})"
476
  )
477
- width_slider = gr.Slider(
478
- minimum=SLIDER_MIN_W,
479
- maximum=SLIDER_MAX_W if not hasattr(spaces, 'GPU') else 640,
480
- step=MOD_VALUE,
481
- value=DEFAULT_W_SLIDER_VALUE,
482
- label=f"📐 Width (multiple of {MOD_VALUE})"
483
- )
484
-
485
  steps_slider = gr.Slider(
486
- minimum=1,
487
- maximum=30 if not hasattr(spaces, 'GPU') else 8,
488
- step=1,
489
- value=4,
490
- label="🔧 Quality Steps (4-6 recommended)"
491
- )
492
-
493
- guidance_scale = gr.Slider(
494
- minimum=0.0,
495
- maximum=20.0,
496
- step=0.5,
497
- value=1.0,
498
- label="🎯 Guidance Scale",
499
  visible=False
500
  )
501
-
502
- generate_btn = gr.Button(
503
- "🎬 Generate Video",
504
  variant="primary",
505
- elem_classes="generate-btn"
506
  )
507
-
508
- with gr.Column(scale=1):
509
- gr.Markdown("### 🎥 Generated Video")
510
  video_output = gr.Video(
511
- label="",
512
- autoplay=True,
513
- elem_classes="video-output"
514
  )
515
-
516
- gr.HTML("""
517
- <div class="footer">
518
- <p>💡 Tip: For best results, use clear images with good lighting and distinct subjects</p>
519
- </div>
520
- """)
521
 
522
- # Examples
523
- gr.Examples(
524
- examples=[
525
- ["peng.png", "a penguin playfully dancing in the snow, Antarctica", 512, 512],
526
- ["forg.jpg", "the frog jumps around", 448, 576],
527
- ],
528
- inputs=[input_image, prompt_input, height_slider, width_slider],
529
- outputs=[video_output, seed],
530
- fn=generate_video,
531
- cache_examples=False # 캐시 비활성화로 메모리 절약
532
  )
533
-
534
- # 개선사항 요약
535
- gr.HTML("""
536
- <div style="background: rgba(255,255,255,0.9); border-radius: 10px; padding: 15px; margin-top: 20px; font-size: 0.8em; text-align: center;">
537
- <p style="margin: 0; color: #666;">
538
- <strong style="color: #667eea;">Powered by:</strong>
539
- Wan 2.1 I2V (14B) + CausVid LoRA • 🚀 4-8 steps fast inference • 🎬 Up to 81 frames
540
- </p>
541
- </div>
542
- """)
543
-
544
- # Event handlers
545
- input_image.upload(
546
- fn=handle_image_upload_for_dims_wan,
547
- inputs=[input_image, height_slider, width_slider],
548
- outputs=[height_slider, width_slider]
549
- )
550
-
551
- input_image.clear(
552
- fn=handle_image_upload_for_dims_wan,
553
- inputs=[input_image, height_slider, width_slider],
554
- outputs=[height_slider, width_slider]
555
- )
556
-
557
- generate_btn.click(
558
- fn=generate_video,
559
- inputs=[
560
- input_image, prompt_input, height_slider, width_slider,
561
- negative_prompt, duration_input, guidance_scale,
562
- steps_slider, seed, randomize_seed
563
- ],
564
- outputs=[video_output, seed]
565
- )
566
 
567
  if __name__ == "__main__":
568
- demo.queue().launch()
569
-
 
9
  import numpy as np
10
  from PIL import Image
11
  import random
 
 
12
 
 
 
 
 
 
13
  MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers"
14
  LORA_REPO_ID = "Kijai/WanVideo_comfy"
15
  LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
16
 
17
+ image_encoder = CLIPVisionModel.from_pretrained(MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32)
18
+ vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
19
+ pipe = WanImageToVideoPipeline.from_pretrained(
20
+ MODEL_ID, vae=vae, image_encoder=image_encoder, torch_dtype=torch.bfloat16
21
+ )
22
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
23
+ pipe.to("cuda")
24
+
25
+ causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
26
+ pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
27
+ pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
28
+ pipe.fuse_lora()
29
+
30
  MOD_VALUE = 32
31
  DEFAULT_H_SLIDER_VALUE = 512
32
+ DEFAULT_W_SLIDER_VALUE = 896
33
  NEW_FORMULA_MAX_AREA = 480.0 * 832.0
34
 
35
  SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
 
41
  MAX_FRAMES_MODEL = 81
42
 
43
  default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
44
+ default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
45
+
46
+ # CSS 스타일 정의
47
+ custom_css = """
48
+ /* 전체 배경 그라디언트 */
49
+ .gradio-container {
50
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
51
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #f5576c 75%, #fa709a 100%) !important;
52
+ background-size: 400% 400% !important;
53
+ animation: gradientShift 15s ease infinite !important;
54
+ }
55
 
56
+ @keyframes gradientShift {
57
+ 0% { background-position: 0% 50%; }
58
+ 50% { background-position: 100% 50%; }
59
+ 100% { background-position: 0% 50%; }
60
+ }
61
+
62
+ /* 메인 컨테이너 스타일 */
63
+ .main-container {
64
+ backdrop-filter: blur(10px);
65
+ background: rgba(255, 255, 255, 0.1) !important;
66
+ border-radius: 20px !important;
67
+ padding: 30px !important;
68
+ box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
69
+ border: 1px solid rgba(255, 255, 255, 0.18) !important;
70
+ }
71
+
72
+ /* 헤더 스타일 */
73
+ h1 {
74
+ background: linear-gradient(45deg, #ffffff, #f0f0f0) !important;
75
+ -webkit-background-clip: text !important;
76
+ -webkit-text-fill-color: transparent !important;
77
+ background-clip: text !important;
78
+ font-weight: 800 !important;
79
+ font-size: 2.5rem !important;
80
+ text-align: center !important;
81
+ margin-bottom: 2rem !important;
82
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.1) !important;
83
+ }
84
+
85
+ /* 컴포넌트 컨테이너 스타일 */
86
+ .input-container, .output-container {
87
+ background: rgba(255, 255, 255, 0.08) !important;
88
+ border-radius: 15px !important;
89
+ padding: 20px !important;
90
+ margin: 10px 0 !important;
91
+ backdrop-filter: blur(5px) !important;
92
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
93
+ }
94
+
95
+ /* 입력 필드 스타일 */
96
+ input, textarea, .gr-box {
97
+ background: rgba(255, 255, 255, 0.9) !important;
98
+ border: 1px solid rgba(255, 255, 255, 0.3) !important;
99
+ border-radius: 10px !important;
100
+ color: #333 !important;
101
+ transition: all 0.3s ease !important;
102
+ }
103
+
104
+ input:focus, textarea:focus {
105
+ background: rgba(255, 255, 255, 1) !important;
106
+ border-color: #667eea !important;
107
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
108
+ }
109
+
110
+ /* 버튼 스타일 */
111
+ .generate-btn {
112
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
113
+ color: white !important;
114
+ font-weight: 600 !important;
115
+ font-size: 1.1rem !important;
116
+ padding: 12px 30px !important;
117
+ border-radius: 50px !important;
118
+ border: none !important;
119
+ cursor: pointer !important;
120
+ transition: all 0.3s ease !important;
121
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
122
+ }
123
 
124
+ .generate-btn:hover {
125
+ transform: translateY(-2px) !important;
126
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.6) !important;
127
+ }
128
+
129
+ /* 슬라이더 스타일 */
130
+ input[type="range"] {
131
+ background: transparent !important;
132
+ }
133
+
134
+ input[type="range"]::-webkit-slider-track {
135
+ background: rgba(255, 255, 255, 0.3) !important;
136
+ border-radius: 5px !important;
137
+ height: 6px !important;
138
+ }
139
+
140
+ input[type="range"]::-webkit-slider-thumb {
141
+ background: linear-gradient(135deg, #667eea, #764ba2) !important;
142
+ border: 2px solid white !important;
143
+ border-radius: 50% !important;
144
+ cursor: pointer !important;
145
+ width: 18px !important;
146
+ height: 18px !important;
147
+ -webkit-appearance: none !important;
148
+ }
149
+
150
+ /* Accordion 스타일 */
151
+ .gr-accordion {
152
+ background: rgba(255, 255, 255, 0.05) !important;
153
+ border-radius: 10px !important;
154
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
155
+ margin: 15px 0 !important;
156
+ }
157
+
158
+ /* 라벨 스타일 */
159
+ label {
160
+ color: #ffffff !important;
161
+ font-weight: 500 !important;
162
+ font-size: 0.95rem !important;
163
+ margin-bottom: 5px !important;
164
+ }
165
+
166
+ /* 이미지 업로드 영역 */
167
+ .image-upload {
168
+ border: 2px dashed rgba(255, 255, 255, 0.3) !important;
169
+ border-radius: 15px !important;
170
+ background: rgba(255, 255, 255, 0.05) !important;
171
+ transition: all 0.3s ease !important;
172
+ }
173
+
174
+ .image-upload:hover {
175
+ border-color: rgba(255, 255, 255, 0.5) !important;
176
+ background: rgba(255, 255, 255, 0.1) !important;
177
+ }
178
+
179
+ /* 비디오 출력 영역 */
180
+ video {
181
+ border-radius: 15px !important;
182
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3) !important;
183
+ }
184
+
185
+ /* Examples 섹션 스타일 */
186
+ .gr-examples {
187
+ background: rgba(255, 255, 255, 0.05) !important;
188
+ border-radius: 15px !important;
189
+ padding: 20px !important;
190
+ margin-top: 20px !important;
191
+ }
192
+
193
+ /* Checkbox 스타일 */
194
+ input[type="checkbox"] {
195
+ accent-color: #667eea !important;
196
+ }
197
+
198
+ /* 반응형 애니메이션 */
199
+ @media (max-width: 768px) {
200
+ h1 { font-size: 2rem !important; }
201
+ .main-container { padding: 20px !important; }
202
+ }
203
+ """
204
 
205
  def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
206
  min_slider_h, max_slider_h,
 
212
 
213
  aspect_ratio = orig_h / orig_w
214
 
 
 
 
 
 
215
  calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
216
  calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
217
 
218
  calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
219
  calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
220
 
 
 
 
 
 
221
  new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
222
  new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
223
 
 
242
  guidance_scale, steps,
243
  seed, randomize_seed,
244
  progress):
245
+ if steps > 4 and duration_seconds > 2:
246
+ return 90
247
+ elif steps > 4 or duration_seconds > 2:
248
+ return 75
 
 
 
 
 
 
 
249
  else:
250
+ return 60
 
 
 
 
 
 
251
 
252
  @spaces.GPU(duration=get_duration)
253
  def generate_video(input_image, prompt, height, width,
 
258
 
259
  if input_image is None:
260
  raise gr.Error("Please upload an input image.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
263
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
264
 
265
  num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
266
 
 
 
 
 
 
267
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
268
 
269
+ resized_image = input_image.resize((target_w, target_h))
 
 
 
270
 
271
+ with torch.inference_mode():
272
+ output_frames_list = pipe(
273
+ image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
274
+ height=target_h, width=target_w, num_frames=num_frames,
275
+ guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
276
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
277
+ ).frames[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
 
279
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
280
  video_path = tmpfile.name
281
  export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
 
 
 
 
 
 
 
282
  return video_path, current_seed
283
 
284
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
285
+ with gr.Column(elem_classes=["main-container"]):
286
+ gr.Markdown("# ✨ Fast 4 steps Wan 2.1 I2V (14B) with CausVid LoRA")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ with gr.Row():
289
+ with gr.Column(elem_classes=["input-container"]):
290
+ input_image_component = gr.Image(
291
+ type="pil",
292
+ label="🖼️ Input Image (auto-resized to target H/W)",
293
+ elem_classes=["image-upload"]
294
+ )
295
+ prompt_input = gr.Textbox(
296
+ label="✏️ Prompt",
297
+ value=default_prompt_i2v,
298
+ lines=2
299
+ )
300
+ duration_seconds_input = gr.Slider(
301
+ minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
302
+ maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
303
+ step=0.1,
304
+ value=2,
305
+ label="⏱️ Duration (seconds)",
306
+ info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
307
+ )
308
 
309
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
310
+ negative_prompt_input = gr.Textbox(
311
+ label="❌ Negative Prompt",
312
+ value=default_negative_prompt,
313
+ lines=3
 
 
 
 
 
 
 
314
  )
315
+ seed_input = gr.Slider(
316
+ label="🎲 Seed",
317
+ minimum=0,
318
+ maximum=MAX_SEED,
319
+ step=1,
320
+ value=42,
321
+ interactive=True
 
322
  )
323
+ randomize_seed_checkbox = gr.Checkbox(
324
+ label="🔀 Randomize seed",
325
+ value=True,
326
+ interactive=True
 
 
327
  )
 
328
  with gr.Row():
329
+ height_input = gr.Slider(
330
+ minimum=SLIDER_MIN_H,
331
+ maximum=SLIDER_MAX_H,
332
+ step=MOD_VALUE,
333
+ value=DEFAULT_H_SLIDER_VALUE,
334
+ label=f"📏 Output Height (multiple of {MOD_VALUE})"
335
  )
336
+ width_input = gr.Slider(
337
+ minimum=SLIDER_MIN_W,
338
+ maximum=SLIDER_MAX_W,
339
+ step=MOD_VALUE,
340
+ value=DEFAULT_W_SLIDER_VALUE,
341
+ label=f"📐 Output Width (multiple of {MOD_VALUE})"
 
 
 
 
 
 
342
  )
 
 
 
 
 
 
 
 
343
  steps_slider = gr.Slider(
344
+ minimum=1,
345
+ maximum=30,
346
+ step=1,
347
+ value=4,
348
+ label="🚀 Inference Steps"
349
+ )
350
+ guidance_scale_input = gr.Slider(
351
+ minimum=0.0,
352
+ maximum=20.0,
353
+ step=0.5,
354
+ value=1.0,
355
+ label="🎯 Guidance Scale",
 
356
  visible=False
357
  )
358
+
359
+ generate_button = gr.Button(
360
+ "🎬 Generate Video",
361
  variant="primary",
362
+ elem_classes=["generate-btn"]
363
  )
364
+
365
+ with gr.Column(elem_classes=["output-container"]):
 
366
  video_output = gr.Video(
367
+ label="🎥 Generated Video",
368
+ autoplay=True,
369
+ interactive=False
370
  )
371
+
372
+ input_image_component.upload(
373
+ fn=handle_image_upload_for_dims_wan,
374
+ inputs=[input_image_component, height_input, width_input],
375
+ outputs=[height_input, width_input]
376
+ )
377
 
378
+ input_image_component.clear(
379
+ fn=handle_image_upload_for_dims_wan,
380
+ inputs=[input_image_component, height_input, width_input],
381
+ outputs=[height_input, width_input]
 
 
 
 
 
 
382
  )
383
+
384
+ ui_inputs = [
385
+ input_image_component, prompt_input, height_input, width_input,
386
+ negative_prompt_input, duration_seconds_input,
387
+ guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
388
+ ]
389
+ generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
390
+
391
+ with gr.Column():
392
+ gr.Examples(
393
+ examples=[
394
+ ["peng.png", "a penguin playfully dancing in the snow, Antarctica", 896, 512],
395
+ ["forg.jpg", "the frog jumps around", 448, 832],
396
+ ],
397
+ inputs=[input_image_component, prompt_input, height_input, width_input],
398
+ outputs=[video_output, seed_input],
399
+ fn=generate_video,
400
+ cache_examples="lazy",
401
+ label="🌟 Example Gallery"
402
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
  if __name__ == "__main__":
405
+ demo.queue().launch()