CUDA_VISIBLE_DEVICES=2 python infer.py \ --image_path ./assets/images/woman.png \ --audio_path ./assets/audios/woman.wav \ --prompt "A woman is talking." \ --max_num_frames 81 \ --image_size 512 \ --audio_scale 1.0 \ --prompt_cfg_scale 5.0 \ --audio_cfg_scale 5.0 \ --fps 23 \ --num_persistent_param_in_dit 7000000000 \ --seed 1111