atunc29 commited on
Commit
398df78
·
verified ·
1 Parent(s): 220b0b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -18
app.py CHANGED
@@ -444,16 +444,41 @@ def process(
444
  input_image, prompt, t2v=False, n_prompt="", seed=31337,
445
  total_second_length=60, latent_window_size=9, steps=25,
446
  cfg=1.0, gs=10.0, rs=0.0, gpu_memory_preservation=6,
447
- use_teacache=True, mp4_crf=16
448
  ):
449
  global stream
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  if t2v:
451
- default_height, default_width = 640, 640
452
- input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
453
- print("Using blank white image for text-to-video mode")
 
 
 
 
 
 
 
454
  else:
 
455
  if isinstance(input_image, dict) and "composite" in input_image:
456
- # Handle uploaded image with alpha channel
457
  composite_rgba_uint8 = input_image["composite"]
458
  rgb_uint8 = composite_rgba_uint8[:, :, :3]
459
  mask_uint8 = composite_rgba_uint8[:, :, 3]
@@ -467,7 +492,6 @@ def process(
467
  elif input_image is None:
468
  raise ValueError("Please provide an input image or enable Text to Video mode")
469
  else:
470
- # Handle regular RGB image
471
  input_image = input_image.astype(np.uint8)
472
 
473
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
@@ -668,10 +692,10 @@ with block:
668
  with gr.Group():
669
  total_second_length = gr.Slider(
670
  label="Duration (Seconds)",
671
- minimum=0.01,
672
- maximum=15,
673
  value=2,
674
- step=0.1,
675
  info='Length of generated video'
676
  )
677
  steps = gr.Slider(
@@ -687,16 +711,22 @@ with block:
687
  minimum=1.0,
688
  maximum=32.0,
689
  value=10.0,
690
- step=1,
691
  info='8-12 recommended'
692
  )
693
- mp4_crf = gr.Slider(
694
- label="Video Quality",
695
- minimum=0,
696
- maximum=51,
697
- value=16,
698
- step=1,
699
- info='15-20 recommended'
 
 
 
 
 
 
700
  )
701
 
702
  gr.Markdown("### Advanced")
@@ -793,7 +823,8 @@ with block:
793
  input_image, prompt, t2v, n_prompt, seed,
794
  total_second_length, latent_window_size,
795
  steps, cfg, gs, rs, gpu_memory_preservation,
796
- use_teacache, mp4_crf
 
797
  ]
798
 
799
  start_button.click(
@@ -816,4 +847,14 @@ with block:
816
  queue=False
817
  )
818
 
 
 
 
 
 
 
 
 
 
 
819
  block.launch(share=True)
 
444
  input_image, prompt, t2v=False, n_prompt="", seed=31337,
445
  total_second_length=60, latent_window_size=9, steps=25,
446
  cfg=1.0, gs=10.0, rs=0.0, gpu_memory_preservation=6,
447
+ use_teacache=True, mp4_crf=16, quality_radio="640x360", aspect_ratio="1:1"
448
  ):
449
  global stream
450
+ quality_map = {
451
+ "360p": (640, 360),
452
+ "480p": (854, 480),
453
+ "540p": (960, 540),
454
+ "720p": (1280, 720),
455
+ "640x360": (640, 360), # fallback for default
456
+ }
457
+ # Aspect ratio map: (width, height)
458
+ aspect_map = {
459
+ "1:1": (1, 1),
460
+ "3:4": (3, 4),
461
+ "4:3": (4, 3),
462
+ "16:9": (16, 9),
463
+ "9:16": (9, 16),
464
+ }
465
+ selected_quality = quality_map.get(quality_radio, (640, 360))
466
+ base_width, base_height = selected_quality
467
+
468
  if t2v:
469
+ # Use aspect ratio to determine final width/height
470
+ ar_w, ar_h = aspect_map.get(aspect_ratio, (1, 1))
471
+ if ar_w >= ar_h:
472
+ target_height = base_height
473
+ target_width = int(round(target_height * ar_w / ar_h))
474
+ else:
475
+ target_width = base_width
476
+ target_height = int(round(target_width * ar_h / ar_w))
477
+ input_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
478
+ print(f"Using blank white image for text-to-video mode, {target_width}x{target_height} ({aspect_ratio})")
479
  else:
480
+ target_width, target_height = selected_quality
481
  if isinstance(input_image, dict) and "composite" in input_image:
 
482
  composite_rgba_uint8 = input_image["composite"]
483
  rgb_uint8 = composite_rgba_uint8[:, :, :3]
484
  mask_uint8 = composite_rgba_uint8[:, :, 3]
 
492
  elif input_image is None:
493
  raise ValueError("Please provide an input image or enable Text to Video mode")
494
  else:
 
495
  input_image = input_image.astype(np.uint8)
496
 
497
  yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
 
692
  with gr.Group():
693
  total_second_length = gr.Slider(
694
  label="Duration (Seconds)",
695
+ minimum=1,
696
+ maximum=10,
697
  value=2,
698
+ step=1,
699
  info='Length of generated video'
700
  )
701
  steps = gr.Slider(
 
711
  minimum=1.0,
712
  maximum=32.0,
713
  value=10.0,
714
+ step=0.1,
715
  info='8-12 recommended'
716
  )
717
+ quality_radio = gr.Radio(
718
+ label="Video Quality (Resolution)",
719
+ choices=["360p", "480p", "540p", "720p"],
720
+ value="640x360",
721
+ info="Choose output video resolution"
722
+ )
723
+ # Aspect ratio dropdown, hidden by default
724
+ aspect_ratio = gr.Dropdown(
725
+ label="Aspect Ratio",
726
+ choices=["1:1", "3:4", "4:3", "16:9", "9:16"],
727
+ value="1:1",
728
+ visible=False,
729
+ info="Only applies to Text to Video mode"
730
  )
731
 
732
  gr.Markdown("### Advanced")
 
823
  input_image, prompt, t2v, n_prompt, seed,
824
  total_second_length, latent_window_size,
825
  steps, cfg, gs, rs, gpu_memory_preservation,
826
+ use_teacache, 16, # mp4_crf default
827
+ quality_radio, aspect_ratio
828
  ]
829
 
830
  start_button.click(
 
847
  queue=False
848
  )
849
 
850
+ # Show/hide aspect ratio dropdown based on t2v checkbox
851
+ def show_aspect_ratio(t2v_checked):
852
+ return gr.update(visible=bool(t2v_checked))
853
+ t2v.change(
854
+ fn=show_aspect_ratio,
855
+ inputs=[t2v],
856
+ outputs=[aspect_ratio],
857
+ queue=False
858
+ )
859
+
860
  block.launch(share=True)