Spaces:

wedyanessam
/

Real_Time_Interactive_Avatar_v2

Runtime error

App Files Files Community

Real_Time_Interactive_Avatar_v2 / app.py

wedyanessam

Update app.py

4aaf630 verified 3 days ago

raw

history blame contribute delete

2.62 kB

	import subprocess
	import os
	import sys
	import shutil
	from pathlib import Path
	import argparse
	import gradio as gr
	from STT.sst import speech_to_text
	from LLM.llm import generate_reply
	from TTS_X.tts import generate_voice
	from FantasyTalking.infer import load_models, main



	# downloading of models if didn't exist
	if not os.path.exists("./models/fantasytalking_model.ckpt"):
	subprocess.run(["python", "download_models.py"])


	sys.path.append(os.path.abspath("."))

	args_template = argparse.Namespace(
	fantasytalking_model_path="./models/fantasytalking_model.ckpt",
	wav2vec_model_dir="./models/wav2vec2-base-960h",
	wan_model_dir="./models/Wan2.1-I2V-14B-720P",
	image_path="",
	audio_path="",
	prompt="",
	output_dir="./output",
	image_size=512,
	audio_scale=1.0,
	prompt_cfg_scale=5.0,
	audio_cfg_scale=5.0,
	max_num_frames=81,
	inference_steps=20,
	fps=23,
	num_persistent_param_in_dit=None,
	seed=1111
	)


	pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
	print("✅")


	def generate_video(image_path, audio_path, prompt, output_dir="./output"):
	args_dict = vars(args_template).copy()
	args_dict.update({
	"image_path": image_path,
	"audio_path": audio_path,
	"prompt": prompt,
	"output_dir": output_dir
	})

	args = argparse.Namespace(**args_dict)
	return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)


	def full_pipeline(user_audio, user_image):
	Path("./output").mkdir(parents=True, exist_ok=True)

	video_path = generate_video(
	image_path=user_image,
	audio_path=user_audio,
	prompt="..." # تقدر تتركه فاضي أو تكتب أي شيء بسيط
	)

	return "", "", user_audio, video_path



	with gr.Blocks() as demo:
	gr.Markdown("## Realtime Interactive Avatar 🎭")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="Upload Voice", type="filepath")
	image_input = gr.Image(label="Upload Image", type="filepath")
	btn = gr.Button("Generate")

	with gr.Column():
	user_text = gr.Textbox(label="Transcribed Text (Speech to Text)")
	reply_text = gr.Textbox(label="Assistant Response (LLM)")
	reply_audio = gr.Audio(label="Spoken Response (Text to Speech)")
	video_output = gr.Video(label="Final Generated Video")

	btn.click(fn=full_pipeline,
	inputs=[audio_input, image_input],
	outputs=[user_text, reply_text, reply_audio, video_output])

	demo.launch(inbrowser=True, share=True)