guangzhaoli commited on
Commit
83a6159
·
1 Parent(s): 992009a
Files changed (1) hide show
  1. app.py +303 -133
app.py CHANGED
@@ -1,85 +1,46 @@
1
- # app.py – revised to support both persistent and non‑persistent disks
2
- """
3
- 关键改动
4
- ---------
5
- 1. **自动探测是否存在 `/data`**(Hugging Face Persistent Storage 挂载点)。
6
- * 有 `/data` ⇒ 把模型下载到 `/data/checkpoints` 并把 `HF_HOME` 也指到 `/data/.huggingface`。
7
- * 没 `/data` ⇒ 回退到 `/tmp`(50 GB 临时盘)。容器休眠后会丢失缓存,但代码仍然能正常跑。
8
- 2. 通过 `huggingface_hub.snapshot_download()` 下载并缓存 `Wan-AI/Wan2.1-T2V-1.3B`。
9
- 3. 其余业务逻辑(Gradio UI、视频编辑流程)保持不变。
10
- """
11
-
12
  import os
13
  import sys
14
- import time
15
- import argparse
16
  import datetime
17
- import subprocess
18
- import gradio as gr
19
- import spaces
20
- from huggingface_hub import snapshot_download
21
-
22
- # -----------------------------------------------------------------------------
23
- # ▶ 运行时环境探测 & 路径配置
24
- # -----------------------------------------------------------------------------
25
- PERSIST_ROOT = "/data" if os.path.isdir("/data") else "/tmp" # /data 不存在就回退到 /tmp
26
-
27
- HF_CACHE_DIR = os.path.join(PERSIST_ROOT, ".huggingface") # Transformers 缓存
28
- MODEL_REPO = "Wan-AI/Wan2.1-T2V-1.3B" # Hub 上的模型仓库
29
- MODEL_DIR = os.path.join(PERSIST_ROOT, "checkpoints", "Wan2.1-T2V-1.3B")
30
-
31
- os.makedirs(HF_CACHE_DIR, exist_ok=True)
32
- os.makedirs(MODEL_DIR, exist_ok=True)
33
-
34
- # 让 Transformers / Diffusers 等库把文件缓存到持久或临时目录
35
- os.environ["HF_HOME"] = HF_CACHE_DIR
36
-
37
- # -----------------------------------------------------------------------------
38
- # ▶ 下载 / 准备模型权重(若文件不在本地,则 snapshot_download)
39
- # -----------------------------------------------------------------------------
40
- if not os.path.exists(os.path.join(MODEL_DIR, "model_index.json")):
41
- print(f"[Warm‑up] Downloading model {MODEL_REPO} to {MODEL_DIR} …")
42
- snapshot_download(
43
- repo_id=MODEL_REPO,
44
- local_dir=MODEL_DIR,
45
- local_dir_use_symlinks=False, # 真拷贝,避免 symlink 指向 cache 丢失
46
- resume_download=True, # 断点续传
47
- )
48
- print("[Warm‑up] Model download complete.")
49
 
50
- CKPT_DIR = MODEL_DIR # 供后续 edit.py 使用
51
- EDIT_SCRIPT_PATH = "edit.py"
 
 
52
  OUTPUT_DIR = "gradio_outputs"
53
- VIDEO_EXAMPLES_DIR = "video_list"
54
- PYTHON_EXECUTABLE = sys.executable
55
 
 
56
  os.makedirs(OUTPUT_DIR, exist_ok=True)
57
- os.makedirs(VIDEO_EXAMPLES_DIR, exist_ok=True)
58
-
59
- # -----------------------------------------------------------------------------
60
- # ▶ CLI 参数(保留向后兼容)
61
- # -----------------------------------------------------------------------------
62
 
63
  def _parse_args():
64
- parser = argparse.ArgumentParser(description="Generate an edited video with Wan 2.1‑T2V")
65
- parser.add_argument("--ckpt", type=str, default=CKPT_DIR, help="Custom checkpoint directory (optional)")
66
- return parser.parse_args()
 
 
 
 
 
67
 
68
- # -----------------------------------------------------------------------------
69
- # ▶ 工具函数
70
- # -----------------------------------------------------------------------------
71
 
72
- def generate_safe_filename_part(text: str, max_len: int = 20) -> str:
 
73
  if not text:
74
  return "untitled"
75
- safe_text = "".join(c if c.isalnum() or c in [" ", "_"] else "_" for c in text).strip()
76
- safe_text = "_".join(safe_text.split())
77
  return safe_text[:max_len]
78
 
79
- # -----------------------------------------------------------------------------
80
- # ▶ 核心编辑函数(装饰器 spaces.GPU 依旧保留)
81
- # -----------------------------------------------------------------------------
82
-
83
  @spaces.GPU
84
  def run_video_edit(
85
  source_video_path,
@@ -92,7 +53,7 @@ def run_video_edit(
92
  n_avg_value,
93
  progress=gr.Progress(track_tqdm=True),
94
  ):
95
- """调用 edit.py 执行文本‑到‑视频的定向编辑"""
96
 
97
  # --- 参数校验 -----------------------------------------------------------
98
  if not source_video_path:
@@ -106,7 +67,7 @@ def run_video_edit(
106
  if not target_words:
107
  raise gr.Error("Please provide target words.")
108
 
109
- progress(0, desc="Preparing for video editing…")
110
 
111
  worse_avg_value = n_avg_value // 2
112
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -122,56 +83,32 @@ def run_video_edit(
122
  cmd = [
123
  PYTHON_EXECUTABLE,
124
  EDIT_SCRIPT_PATH,
125
- "--task",
126
- "t2v-1.3B",
127
- "--size",
128
- "832*480",
129
- "--base_seed",
130
- "42",
131
- "--ckpt_dir",
132
- CKPT_DIR,
133
- "--sample_solver",
134
- "unipc",
135
- "--source_video_path",
136
- source_video_path,
137
- "--source_prompt",
138
- source_prompt,
139
- "--source_words",
140
- source_words,
141
- "--prompt",
142
- target_prompt,
143
- "--target_words",
144
- target_words,
145
- "--sample_guide_scale",
146
- "3.5",
147
- "--tar_guide_scale",
148
- "10.5",
149
- "--sample_shift",
150
- "12",
151
- "--sample_steps",
152
- "50",
153
- "--n_max",
154
- str(n_max_value),
155
- "--n_min",
156
- "0",
157
- "--n_avg",
158
- str(n_avg_value),
159
- "--worse_avg",
160
- str(worse_avg_value),
161
- "--omega",
162
- str(omega_value),
163
- "--window_size",
164
- "11",
165
- "--decay_factor",
166
- "0.25",
167
- "--frame_num",
168
- "41",
169
- "--save_file",
170
- output_video_path,
171
  ]
172
 
173
- # --- 调用子进程 & 进度回调 ---------------------------------------------
174
- progress(0.05, desc="Launching edit.py…")
175
  process = subprocess.Popen(
176
  cmd,
177
  stdout=subprocess.PIPE,
@@ -179,33 +116,266 @@ def run_video_edit(
179
  text=True,
180
  bufsize=1,
181
  )
182
-
183
- # 简易心跳进度条(真实项目可解析 stdout)
184
- for i in range(12):
185
- if process.poll() is not None:
186
- break
187
- progress(0.05 + i * 0.07, desc=f"Editing… ({i+1}/12)")
188
- time.sleep(1)
189
-
190
  stdout, stderr = process.communicate()
 
191
  if process.returncode != 0:
 
192
  raise gr.Error(f"Video editing failed.\nStderr: {stderr[:600]}")
193
 
194
  if not os.path.exists(output_video_path):
 
195
  raise gr.Error("edit.py reported success but output file missing.")
196
 
197
  progress(1, desc="Done!")
198
  return output_video_path
 
199
 
200
- # -----------------------------------------------------------------------------
201
- # Gradio UI(与之前相同,略)
202
- # -----------------------------------------------------------------------------
203
- # 由于篇幅,这里省略 UI 部分;逻辑与原版一致,只是依赖上述新路径。
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  if __name__ == "__main__":
 
 
 
 
 
206
  args = _parse_args()
207
- if args.ckpt: # 允许 CLI 覆盖
208
- CKPT_DIR = args.ckpt
209
- gr.close_all() # 防止在某些环境重复 launch
210
- demo = gr.load("./app.py") # 重新加载自身 Build
211
  demo.launch()
 
1
+ # app.py
2
+ import gradio as gr
3
+ import subprocess
4
+ import spaces
 
 
 
 
 
 
 
5
  import os
6
  import sys
 
 
7
  import datetime
8
+ import shutil
9
+ import time # Moved import time to the top for global access
10
+ import argparse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # --- Configuration ---
13
+ # !!! IMPORTANT: Ensure this path is correct for your environment !!!
14
+ CKPT_DIR = "./checkpoints/Wan2.1-T2V-1.3B"
15
+ EDIT_SCRIPT_PATH = "edit.py" # Assumes edit.py is in the same directory
16
  OUTPUT_DIR = "gradio_outputs"
17
+ PYTHON_EXECUTABLE = sys.executable # Uses the same python that runs gradio
18
+ VIDEO_EXAMPLES_DIR = "video_list" # Directory for example videos
19
 
20
+ # Create output directory if it doesn't exist
21
  os.makedirs(OUTPUT_DIR, exist_ok=True)
22
+ os.makedirs(VIDEO_EXAMPLES_DIR, exist_ok=True) # Ensure video_list exists for clarity
 
 
 
 
23
 
24
  def _parse_args():
25
+ parser = argparse.ArgumentParser(
26
+ description="Generate a image or video from a text prompt or image using Wan"
27
+ )
28
+ parser.add_argument(
29
+ "--ckpt",
30
+ type=str,
31
+ default="./checkpoints/Wan2.1-T2V-1.3B",
32
+ help="The path to the checkpoint directory.")
33
 
34
+ return parser.parse_args()
 
 
35
 
36
+ def generate_safe_filename_part(text, max_len=20):
37
+ """Generates a filesystem-safe string from text."""
38
  if not text:
39
  return "untitled"
40
+ safe_text = "".join(c if c.isalnum() or c in [' ', '_'] else '_' for c in text).strip()
41
+ safe_text = "_".join(safe_text.split()) # Replace spaces with underscores
42
  return safe_text[:max_len]
43
 
 
 
 
 
44
  @spaces.GPU
45
  def run_video_edit(
46
  source_video_path,
 
53
  n_avg_value,
54
  progress=gr.Progress(track_tqdm=True),
55
  ):
56
+ """调用 edit.py 执行定向视频编辑,不使用伪进度条。"""
57
 
58
  # --- 参数校验 -----------------------------------------------------------
59
  if not source_video_path:
 
67
  if not target_words:
68
  raise gr.Error("Please provide target words.")
69
 
70
+ progress(0, desc="Launching edit.py this may take a few minutes…")
71
 
72
  worse_avg_value = n_avg_value // 2
73
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
83
  cmd = [
84
  PYTHON_EXECUTABLE,
85
  EDIT_SCRIPT_PATH,
86
+ "--task", "t2v-1.3B",
87
+ "--size", "832*480",
88
+ "--base_seed", "42",
89
+ "--ckpt_dir", CKPT_DIR,
90
+ "--sample_solver", "unipc",
91
+ "--source_video_path", source_video_path,
92
+ "--source_prompt", source_prompt,
93
+ "--source_words", source_words,
94
+ "--prompt", target_prompt,
95
+ "--target_words", target_words,
96
+ "--sample_guide_scale", "3.5",
97
+ "--tar_guide_scale", "10.5",
98
+ "--sample_shift", "12",
99
+ "--sample_steps", "50",
100
+ "--n_max", str(n_max_value),
101
+ "--n_min", "0",
102
+ "--n_avg", str(n_avg_value),
103
+ "--worse_avg", str(worse_avg_value),
104
+ "--omega", str(omega_value),
105
+ "--window_size", "11",
106
+ "--decay_factor", "0.25",
107
+ "--frame_num", "41",
108
+ "--save_file", output_video_path,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  ]
110
 
111
+ # --- 调用子进程 ---------------------------------------------------------
 
112
  process = subprocess.Popen(
113
  cmd,
114
  stdout=subprocess.PIPE,
 
116
  text=True,
117
  bufsize=1,
118
  )
 
 
 
 
 
 
 
 
119
  stdout, stderr = process.communicate()
120
+
121
  if process.returncode != 0:
122
+ progress(1, desc="Error")
123
  raise gr.Error(f"Video editing failed.\nStderr: {stderr[:600]}")
124
 
125
  if not os.path.exists(output_video_path):
126
+ progress(1, desc="Error")
127
  raise gr.Error("edit.py reported success but output file missing.")
128
 
129
  progress(1, desc="Done!")
130
  return output_video_path
131
+ # --- Gradio UI Definition ---
132
 
133
+ # Define all examples to be loaded
134
+ examples_to_load_definitions = [
135
+ { # Original bear_g example (corresponds to bear_g_03 in YAML)
136
+ "video_base_name": "bear_g",
137
+ "src_prompt": "A large brown bear is walking slowly across a rocky terrain in a zoo enclosure, surrounded by stone walls and scattered greenery. The camera remains fixed, capturing the bear's deliberate movements.",
138
+ "tar_prompt": "A large dinosaur is walking slowly across a rocky terrain in a zoo enclosure, surrounded by stone walls and scattered greenery. The camera remains fixed, capturing the dinosaur's deliberate movements.",
139
+ "src_words": "large brown bear",
140
+ "tar_words": "large dinosaur",
141
+ },
142
+ { # blackswan_02
143
+ "video_base_name": "blackswan",
144
+ "src_prompt": "A black swan with a red beak swimming in a river near a wall and bushes.",
145
+ "tar_prompt": "A white duck with a red beak swimming in a river near a wall and bushes.",
146
+ "src_words": "black swan",
147
+ "tar_words": "white duck",
148
+ },
149
+ { # jeep_01
150
+ "video_base_name": "jeep",
151
+ "src_prompt": "A silver jeep driving down a curvy road in the countryside.",
152
+ "tar_prompt": "A Porsche car driving down a curvy road in the countryside.",
153
+ "src_words": "silver jeep",
154
+ "tar_words": "Porsche car",
155
+ },
156
+ { # woman_02 (additive edit)
157
+ "video_base_name": "woman",
158
+ "src_prompt": "A woman in a black dress is walking along a paved path in a lush green park, with trees and a wooden bench in the background. The camera remains fixed, capturing her steady movement.",
159
+ "tar_prompt": "A woman in a black dress and a red baseball cap is walking along a paved path in a lush green park, with trees and a wooden bench in the background. The camera remains fixed, capturing her steady movement.",
160
+ "src_words": "", # Empty source words for addition
161
+ "tar_words": "a red baseball cap",
162
+ }
163
+ ]
164
+
165
+ examples_data = []
166
+ # Default advanced parameters for all examples
167
+ default_omega = 2.75
168
+ default_n_max = 40
169
+ default_n_avg = 4
170
+
171
+ for ex_def in examples_to_load_definitions:
172
+ # Assuming .mp4 extension for all videos
173
+ video_file_name = f"{ex_def['video_base_name']}.mp4"
174
+ example_video_path = os.path.join(VIDEO_EXAMPLES_DIR, video_file_name)
175
+
176
+ if os.path.exists(example_video_path):
177
+ examples_data.append([
178
+ example_video_path,
179
+ ex_def["src_prompt"],
180
+ ex_def["tar_prompt"],
181
+ ex_def["src_words"],
182
+ ex_def["tar_words"],
183
+ default_omega,
184
+ default_n_max,
185
+ default_n_avg
186
+ ])
187
+ else:
188
+ print(f"Warning: Example video {example_video_path} not found. Example for '{ex_def['video_base_name']}' will be skipped.")
189
+
190
+ if not examples_data:
191
+ print(f"Warning: No example videos found in '{VIDEO_EXAMPLES_DIR}'. Examples section will be empty or not show.")
192
+
193
+
194
+
195
+ with gr.Blocks(theme=gr.themes.Soft(), css="""
196
+ /* Main container - maximize width and improve spacing */
197
+ .gradio-container {
198
+ max-width: 98% !important;
199
+ width: 98% !important;
200
+ margin: 0 auto !important;
201
+ padding: 20px !important;
202
+ min-height: 100vh !important;
203
+ }
204
+
205
+ /* All containers should use full width */
206
+ .contain, .container {
207
+ max-width: 100% !important;
208
+ width: 100% !important;
209
+ padding: 0 !important;
210
+ }
211
+
212
+ /* Remove default padding from main wrapper */
213
+ .main, .wrap, .panel {
214
+ max-width: 100% !important;
215
+ width: 100% !important;
216
+ padding: 0 !important;
217
+ }
218
+
219
+ /* Improve spacing for components */
220
+ .gap, .form {
221
+ gap: 15px !important;
222
+ }
223
+
224
+ /* Make all components full width */
225
+ #component-0, .block {
226
+ max-width: 100% !important;
227
+ width: 100% !important;
228
+ }
229
+
230
+ /* Better padding for groups */
231
+ .group {
232
+ padding: 20px !important;
233
+ margin-bottom: 15px !important;
234
+ border-radius: 8px !important;
235
+ }
236
+
237
+ /* Make rows and columns use full space with better gaps */
238
+ .row {
239
+ gap: 30px !important;
240
+ margin-bottom: 20px !important;
241
+ }
242
+
243
+ /* Improve column spacing */
244
+ .column {
245
+ padding: 0 10px !important;
246
+ }
247
+
248
+ /* Better video component sizing */
249
+ .video-container {
250
+ width: 100% !important;
251
+ }
252
+
253
+ /* Textbox improvements */
254
+ .textbox, .input-field {
255
+ width: 100% !important;
256
+ }
257
+
258
+ /* Button styling */
259
+ .primary {
260
+ width: 100% !important;
261
+ padding: 12px !important;
262
+ font-size: 16px !important;
263
+ margin-top: 20px !important;
264
+ }
265
+
266
+ /* Examples section spacing */
267
+ .examples {
268
+ margin-top: 30px !important;
269
+ padding: 20px !important;
270
+ }
271
+
272
+ /* Accordion improvements */
273
+ .accordion {
274
+ margin: 15px 0 !important;
275
+ }
276
+ """) as demo:
277
+ gr.Markdown(
278
+ """
279
+ <h1 style="text-align: center; font-size: 2.5em;">🪄 FlowDirector Video Edit</h1>
280
+ <p style="text-align: center;">
281
+ Edit videos by providing a source video, descriptive prompts, and specifying words to change.<br>
282
+ Powered by FlowDirector.
283
+ </p>
284
+ """
285
+ )
286
+
287
+ with gr.Row():
288
+ with gr.Column(scale=5): # Input column - increased scale for better space usage
289
+ with gr.Group():
290
+ gr.Markdown("### 🎬 Source Material")
291
+ source_video_input = gr.Video(label="Upload Source Video", height=540)
292
+ source_prompt_input = gr.Textbox(
293
+ label="Source Prompt",
294
+ placeholder="Describe the original video content accurately.",
295
+ lines=3,
296
+ show_label=True
297
+ )
298
+ target_prompt_input = gr.Textbox(
299
+ label="Target Prompt (Desired Edit)",
300
+ placeholder="Describe how you want the video to be after editing.",
301
+ lines=3,
302
+ show_label=True
303
+ )
304
+
305
+ with gr.Group():
306
+ gr.Markdown("### ✍️ Editing Instructions")
307
+ source_words_input = gr.Textbox(
308
+ label="Source Words (to be replaced, or empty for addition)",
309
+ placeholder="e.g., large brown bear (leave empty to add target words globally)"
310
+ )
311
+ target_words_input = gr.Textbox(
312
+ label="Target Words (replacement or addition)",
313
+ placeholder="e.g., large dinosaur OR a red baseball cap"
314
+ )
315
+
316
+ with gr.Accordion("🔧 Advanced Parameters", open=False):
317
+ omega_slider = gr.Slider(
318
+ minimum=0.0, maximum=5.0, step=0.05, value=default_omega, label="Omega (ω)",
319
+ info="Controls the intensity/style of the edit. Higher values might lead to stronger edits."
320
+ )
321
+ n_max_slider = gr.Slider(
322
+ minimum=0, maximum=50, step=1, value=default_n_max, label="N_max",
323
+ info="Max value for an adaptive param. `n_min` is fixed at 0."
324
+ )
325
+ n_avg_slider = gr.Slider(
326
+ minimum=0, maximum=5, step=1, value=default_n_avg, label="N_avg",
327
+ info="Average value for an adaptive param. `worse_avg` will be N_avg // 2."
328
+ )
329
+
330
+ submit_button = gr.Button("✨ Generate Edited Video", variant="primary")
331
+
332
+ with gr.Column(scale=4): # Output column - increased scale for better proportion
333
+ gr.Markdown("### 🖼️ Edited Video Output")
334
+ output_video = gr.Video(label="Result", height=540, show_label=False)
335
+
336
+
337
+ if examples_data: # Only show examples if some were successfully loaded
338
+ gr.Examples(
339
+ examples=examples_data,
340
+ inputs=[
341
+ source_video_input,
342
+ source_prompt_input,
343
+ target_prompt_input,
344
+ source_words_input,
345
+ target_words_input,
346
+ omega_slider,
347
+ n_max_slider,
348
+ n_avg_slider
349
+ ],
350
+ outputs=output_video,
351
+ fn=run_video_edit,
352
+ cache_examples=False # For long processes, False is better
353
+ )
354
+
355
+ all_process_inputs = [
356
+ source_video_input,
357
+ source_prompt_input,
358
+ target_prompt_input,
359
+ source_words_input,
360
+ target_words_input,
361
+ omega_slider,
362
+ n_max_slider,
363
+ n_avg_slider
364
+ ]
365
+
366
+
367
+ submit_button.click(
368
+ fn=run_video_edit,
369
+ inputs=all_process_inputs,
370
+ outputs=output_video
371
+ )
372
 
373
  if __name__ == "__main__":
374
+ # print(f"Make sure your checkpoint directory is correctly set to: {CKPT_DIR}")
375
+ # print(f"And that '{EDIT_SCRIPT_PATH}' is in the same directory as app.py or correctly pathed.")
376
+ # print(f"Outputs will be saved to: {os.path.abspath(OUTPUT_DIR)}")
377
+ # print(f"Place example videos (e.g., bear_g.mp4, blackswan.mp4, etc.) in: {os.path.abspath(VIDEO_EXAMPLES_DIR)}")
378
+
379
  args = _parse_args()
380
+ CKPT_DIR = args.ckpt
 
 
 
381
  demo.launch()