Spaces:
Runtime error
Runtime error
File size: 2,617 Bytes
7cedd07 e75e1c6 5223b6a fa13218 97e8796 5223b6a bad5ae3 7cedd07 fa13218 bad5ae3 da8f7f9 7cedd07 1ed541f 638c139 bad5ae3 97e8796 da8f7f9 fa13218 97e8796 fa13218 97e8796 fa13218 97e8796 3402d0b 568d66f bad5ae3 97e8796 bad5ae3 97e8796 93d986f fa13218 3402d0b bad5ae3 3402d0b 97e8796 4aaf630 3402d0b 4aaf630 3402d0b 4aaf630 3402d0b bad5ae3 2c73edd e682a2e 3402d0b bad5ae3 e682a2e 3402d0b bad5ae3 e682a2e 97e8796 3402d0b 9dd7b34 3402d0b bad5ae3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import subprocess
import os
import sys
import shutil
from pathlib import Path
import argparse
import gradio as gr
from STT.sst import speech_to_text
from LLM.llm import generate_reply
from TTS_X.tts import generate_voice
from FantasyTalking.infer import load_models, main
# downloading of models if didn't exist
if not os.path.exists("./models/fantasytalking_model.ckpt"):
subprocess.run(["python", "download_models.py"])
sys.path.append(os.path.abspath("."))
args_template = argparse.Namespace(
fantasytalking_model_path="./models/fantasytalking_model.ckpt",
wav2vec_model_dir="./models/wav2vec2-base-960h",
wan_model_dir="./models/Wan2.1-I2V-14B-720P",
image_path="",
audio_path="",
prompt="",
output_dir="./output",
image_size=512,
audio_scale=1.0,
prompt_cfg_scale=5.0,
audio_cfg_scale=5.0,
max_num_frames=81,
inference_steps=20,
fps=23,
num_persistent_param_in_dit=None,
seed=1111
)
pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args_template)
print("✅")
def generate_video(image_path, audio_path, prompt, output_dir="./output"):
args_dict = vars(args_template).copy()
args_dict.update({
"image_path": image_path,
"audio_path": audio_path,
"prompt": prompt,
"output_dir": output_dir
})
args = argparse.Namespace(**args_dict)
return main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
def full_pipeline(user_audio, user_image):
Path("./output").mkdir(parents=True, exist_ok=True)
video_path = generate_video(
image_path=user_image,
audio_path=user_audio,
prompt="..." # تقدر تتركه فاضي أو تكتب أي شيء بسيط
)
return "", "", user_audio, video_path
with gr.Blocks() as demo:
gr.Markdown("## Realtime Interactive Avatar 🎭")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(label="Upload Voice", type="filepath")
image_input = gr.Image(label="Upload Image", type="filepath")
btn = gr.Button("Generate")
with gr.Column():
user_text = gr.Textbox(label="Transcribed Text (Speech to Text)")
reply_text = gr.Textbox(label="Assistant Response (LLM)")
reply_audio = gr.Audio(label="Spoken Response (Text to Speech)")
video_output = gr.Video(label="Final Generated Video")
btn.click(fn=full_pipeline,
inputs=[audio_input, image_input],
outputs=[user_text, reply_text, reply_audio, video_output])
demo.launch(inbrowser=True, share=True)
|