Spaces:

yanboding
/

MTVCrafter

Runtime error

App Files Files Community

MTVCrafter / app.py

yanboding

Upload 32 files

30a0a93 verified 15 days ago

raw

history blame contribute delete

3.89 kB

	import os
	import torch
	import gradio as gr
	import cv2
	from PIL import Image
	from inference_engine import run_inference
	from motion_extractor import extract_pkl_from_video

	device = "cuda" if torch.cuda.is_available() else "cpu"

	def full_pipeline(video_file, ref_image=None, width=512, height=512, steps=50, scale=3.0, seed=6666):
	# 1. 提取 motion pkl
	video_path = video_file.name
	motion_pkl_path = extract_pkl_from_video(video_path)
	gr.Info("⏳ Extract motion finished and begin animation...", visible=True)

	# 2. 处理参考图像（可选）
	if ref_image is not None:
	ref_path = "temp_ref.png"
	ref_image.save(ref_path)
	else:
	ref_path = ""

	# 3. 推理
	output_path = run_inference(
	device,
	motion_pkl_path,
	ref_path,
	dst_width=width,
	dst_height=height,
	num_inference_steps=steps,
	guidance_scale=scale,
	seed=seed,
	)

	return output_path


	def run_pipeline_with_feedback(video_file, ref_image, width, height, steps, scale, seed):
	try:
	if video_file is None:
	raise gr.Error("Please upload a dancing video (.mp4/.mov/.avi).")
	# 添加进度提示
	gr.Info("⏳ Processing... Please wait several minutes.", visible=True)
	result = full_pipeline(video_file, ref_image, width, height, steps, scale, seed)
	gr.Info("✅ Inference done, please enjoy it!", visible=True)
	return result
	except Exception as e:
	traceback.print_exc()
	gr.Warning("⚠️ Inference failed: " + str(e))
	return None

	# 构建 UI
	with gr.Blocks(title="MTVCrafter Inference Demo") as demo:
	gr.Markdown(
	"""
	# 🎨💃 MTVCrafter Inference Demo

	💡 Tip: Upload a dancing video in MP4/MOV/AVI format, and optionally a reference image (e.g., PNG or JPG).
	This demo will extract human motion from the input video and animate the reference image accordingly.
	If no reference image is provided, the first frame of the video will be used as the reference.

	🎞️ Note: The generated output video will contain exactly 49 frames.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	video_input = gr.File(label="📹 Input Video (Required)", file_types=[".mp4", ".mov", ".avi"])
	video_preview = gr.Video(label="👀 Preview of Uploaded Video", height=280) # 固定高度，避免对齐错位

	def show_video_preview(video_file):
	return video_file.name if video_file else None

	video_input.change(fn=show_video_preview, inputs=video_input, outputs=video_preview)

	with gr.Column(scale=1):
	ref_image = gr.Image(type="pil", label="🖼️ Reference Image (Optional)", height=538)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	with gr.Row():
	width = gr.Slider(384, 1024, value=512, step=16, label="Output Width")
	height = gr.Slider(384, 1024, value=512, step=16, label="Output Height")
	with gr.Row():
	steps = gr.Slider(20, 100, value=50, step=5, label="Inference Steps")
	scale = gr.Slider(0.0, 10.0, value=3.0, step=0.25, label="Guidance Scale")
	seed = gr.Number(value=6666, label="Random Seed")

	with gr.Row(scale=1):
	output_video = gr.Video(label="🎬 Generated Video", interactive=False)

	run_btn = gr.Button("🚀 Run MTVCrafter", variant="primary")

	run_btn.click(
	fn=run_pipeline_with_feedback,
	inputs=[video_input, ref_image, width, height, steps, scale, seed],
	outputs=output_video,
	)

	if __name__ == "__main__":
	os.environ["HF_ENDPOINT"] = "https://hf-mirror.com/"
	os.environ["NO_PROXY"] = "localhost,127.0.0.1/8,::1"
	demo.launch(server_name="0.0.0.0", server_port=7860, share=True)