diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..6e4a65ed6ead41fe32eee3ac8494301742bbe87f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,45 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/api.jpg filter=lfs diff=lfs merge=lfs -text
+docs/picwish.com.jpg filter=lfs diff=lfs merge=lfs -text
+docs/picwish.jpg filter=lfs diff=lfs merge=lfs -text
+docs/reccloud.cn.jpg filter=lfs diff=lfs merge=lfs -text
+docs/reccloud.com.jpg filter=lfs diff=lfs merge=lfs -text
+docs/webui-en.jpg filter=lfs diff=lfs merge=lfs -text
+docs/webui.jpg filter=lfs diff=lfs merge=lfs -text
+resource/fonts/Charm-Bold.ttf filter=lfs diff=lfs merge=lfs -text
+resource/fonts/Charm-Regular.ttf filter=lfs diff=lfs merge=lfs -text
+resource/fonts/MicrosoftYaHeiBold.ttc filter=lfs diff=lfs merge=lfs -text
+resource/fonts/MicrosoftYaHeiNormal.ttc filter=lfs diff=lfs merge=lfs -text
+resource/fonts/STHeitiLight.ttc filter=lfs diff=lfs merge=lfs -text
+resource/fonts/STHeitiMedium.ttc filter=lfs diff=lfs merge=lfs -text
+resource/songs/output000.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output001.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output002.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output003.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output004.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output005.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output006.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output007.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output008.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output009.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output010.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output011.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output012.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output013.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output014.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output015.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output016.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output017.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output018.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output019.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output020.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output021.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output022.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output023.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output024.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output025.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output027.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output028.mp3 filter=lfs diff=lfs merge=lfs -text
+resource/songs/output029.mp3 filter=lfs diff=lfs merge=lfs -text
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..d110240ff27ce85953efb015449246284042972f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,44 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim-bullseye
+
+# Set the working directory in the container
+WORKDIR /MoneyPrinterTurbo
+
+# 设置/MoneyPrinterTurbo目录权限为777
+RUN chmod 777 /MoneyPrinterTurbo
+
+ENV PYTHONPATH="/MoneyPrinterTurbo"
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    imagemagick \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+# Fix security policy for ImageMagick
+RUN sed -i '/<policy domain="path" rights="none" pattern="@\*"/d' /etc/ImageMagick-6/policy.xml
+
+# Copy only the requirements.txt first to leverage Docker cache
+COPY requirements.txt ./
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Now copy the rest of the codebase into the image
+COPY . .
+
+# Expose the port the app runs on
+EXPOSE 8501
+
+# Command to run the application
+CMD ["streamlit", "run", "./webui/Main.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False"]
+
+# 1. Build the Docker image using the following command
+# docker build -t moneyprinterturbo .
+
+# 2. Run the Docker container using the following command
+## For Linux or MacOS:
+# docker run -v $(pwd)/config.toml:/MoneyPrinterTurbo/config.toml -v $(pwd)/storage:/MoneyPrinterTurbo/storage -p 8501:8501 moneyprinterturbo
+## For Windows:
+# docker run -v ${PWD}/config.toml:/MoneyPrinterTurbo/config.toml -v ${PWD}/storage:/MoneyPrinterTurbo/storage -p 8501:8501 moneyprinterturbo
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..3b409c921f64304a971ca0a0e595c5bb459ac903
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Harry
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README-en.md b/README-en.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c16f2d957fba0abf6368c4c76df5c2afbc39744
--- /dev/null
+++ b/README-en.md
@@ -0,0 +1,389 @@
+<div align="center">
+<h1 align="center">MoneyPrinterTurbo 💸</h1>
+
+<p align="center">
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/stargazers"><img src="https://img.shields.io/github/stars/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Stargazers"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/issues"><img src="https://img.shields.io/github/issues/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Issues"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/network/members"><img src="https://img.shields.io/github/forks/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Forks"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/blob/main/LICENSE"><img src="https://img.shields.io/github/license/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="License"></a>
+</p>
+
+<h3>English | <a href="README.md">简体中文</a></h3>
+
+<div align="center">
+  <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FMoneyPrinterTurbo | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+
+Simply provide a <b>topic</b> or <b>keyword</b> for a video, and it will automatically generate the video copy, video
+materials, video subtitles, and video background music before synthesizing a high-definition short video.
+
+### WebUI
+
+![](docs/webui-en.jpg)
+
+### API Interface
+
+![](docs/api.jpg)
+
+</div>
+
+## Special Thanks 🙏
+
+Due to the **deployment** and **usage** of this project, there is a certain threshold for some beginner users. We would
+like to express our special thanks to
+
+**RecCloud (AI-Powered Multimedia Service Platform)** for providing a free `AI Video Generator` service based on this
+project. It allows for online use without deployment, which is very convenient.
+
+- Chinese version: https://reccloud.cn
+- English version: https://reccloud.com
+
+![](docs/reccloud.com.jpg)
+
+## Thanks for Sponsorship 🙏
+
+Thanks to Picwish https://picwish.com for supporting and sponsoring this project, enabling continuous updates and maintenance.
+
+Picwish focuses on the **image processing field**, providing a rich set of **image processing tools** that extremely simplify complex operations, truly making image processing easier.
+
+![picwish.jpg](docs/picwish.com.jpg)
+
+## Features 🎯
+
+- [x] Complete **MVC architecture**, **clearly structured** code, easy to maintain, supports both `API`
+  and `Web interface`
+- [x] Supports **AI-generated** video copy, as well as **customized copy**
+- [x] Supports various **high-definition video** sizes
+    - [x] Portrait 9:16, `1080x1920`
+    - [x] Landscape 16:9, `1920x1080`
+- [x] Supports **batch video generation**, allowing the creation of multiple videos at once, then selecting the most
+  satisfactory one
+- [x] Supports setting the **duration of video clips**, facilitating adjustments to material switching frequency
+- [x] Supports video copy in both **Chinese** and **English**
+- [x] Supports **multiple voice** synthesis, with **real-time preview** of effects
+- [x] Supports **subtitle generation**, with adjustable `font`, `position`, `color`, `size`, and also
+  supports `subtitle outlining`
+- [x] Supports **background music**, either random or specified music files, with adjustable `background music volume`
+- [x] Video material sources are **high-definition** and **royalty-free**, and you can also use your own **local materials**
+- [x] Supports integration with various models such as **OpenAI**, **Moonshot**, **Azure**, **gpt4free**, **one-api**, **Qwen**, **Google Gemini**, **Ollama**, **DeepSeek**, **ERNIE**, **Pollinations** and more
+
+### Future Plans 📅
+
+- [ ] GPT-SoVITS dubbing support
+- [ ] Optimize voice synthesis using large models for more natural and emotionally rich voice output
+- [ ] Add video transition effects for a smoother viewing experience
+- [ ] Add more video material sources, improve the matching between video materials and script
+- [ ] Add video length options: short, medium, long
+- [ ] Support more voice synthesis providers, such as OpenAI TTS
+- [ ] Automate upload to YouTube platform
+
+## Video Demos 📺
+
+### Portrait 9:16
+
+<table>
+<thead>
+<tr>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> How to Add Fun to Your Life </th>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> What is the Meaning of Life</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/a84d33d5-27a2-4aba-8fd0-9fb2bd91c6a6"></video></td>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/112c9564-d52b-4472-99ad-970b75f66476"></video></td>
+</tr>
+</tbody>
+</table>
+
+### Landscape 16:9
+
+<table>
+<thead>
+<tr>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> What is the Meaning of Life</th>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> Why Exercise</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/346ebb15-c55f-47a9-a653-114f08bb8073"></video></td>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/271f2fae-8283-44a0-8aa0-0ed8f9a6fa87"></video></td>
+</tr>
+</tbody>
+</table>
+
+## System Requirements 📦
+
+- Recommended minimum 4 CPU cores or more, 4G of memory or more, GPU is not required
+- Windows 10 or MacOS 11.0, and their later versions
+
+## Quick Start 🚀
+
+### Run in Google Colab 
+Want to try MoneyPrinterTurbo without setting up a local environment? Run it directly in Google Colab!
+
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
+
+
+### Windows
+
+Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
+
+After downloading, it is recommended to **double-click** `update.bat` first to update to the **latest code**, then double-click `start.bat` to launch
+
+After launching, the browser will open automatically (if it opens blank, it is recommended to use **Chrome** or **Edge**)
+
+### Other Systems
+
+One-click startup packages have not been created yet. See the **Installation & Deployment** section below. It is recommended to use **docker** for deployment, which is more convenient.
+
+## Installation & Deployment 📥
+
+### Prerequisites
+
+#### ① Clone the Project
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+```
+
+#### ② Modify the Configuration File
+
+- Copy the `config.example.toml` file and rename it to `config.toml`
+- Follow the instructions in the `config.toml` file to configure `pexels_api_keys` and `llm_provider`, and according to
+  the llm_provider's service provider, set up the corresponding API Key
+
+### Docker Deployment 🐳
+
+#### ① Launch the Docker Container
+
+If you haven't installed Docker, please install it first https://www.docker.com/products/docker-desktop/
+If you are using a Windows system, please refer to Microsoft's documentation:
+
+1. https://learn.microsoft.com/en-us/windows/wsl/install
+2. https://learn.microsoft.com/en-us/windows/wsl/tutorials/wsl-containers
+
+```shell
+cd MoneyPrinterTurbo
+docker-compose up
+```
+
+> Note：The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
+
+#### ② Access the Web Interface
+
+Open your browser and visit http://0.0.0.0:8501
+
+#### ③ Access the API Interface
+
+Open your browser and visit http://0.0.0.0:8080/docs Or http://0.0.0.0:8080/redoc
+
+### Manual Deployment 📦
+
+#### ① Create a Python Virtual Environment
+
+It is recommended to create a Python virtual environment using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+cd MoneyPrinterTurbo
+conda create -n MoneyPrinterTurbo python=3.11
+conda activate MoneyPrinterTurbo
+pip install -r requirements.txt
+```
+
+#### ② Install ImageMagick
+
+###### Windows:
+
+- Download https://imagemagick.org/script/download.php Choose the Windows version, make sure to select the **static library** version, such as ImageMagick-7.1.1-32-Q16-x64-**static**.exe
+- Install the downloaded ImageMagick, **do not change the installation path**
+- Modify the `config.toml` configuration file, set `imagemagick_path` to your actual installation path
+
+###### MacOS:
+
+```shell
+brew install imagemagick
+````
+
+###### Ubuntu
+
+```shell
+sudo apt-get install imagemagick
+```
+
+###### CentOS
+
+```shell
+sudo yum install ImageMagick
+```
+
+#### ③ Launch the Web Interface 🌐
+
+Note that you need to execute the following commands in the `root directory` of the MoneyPrinterTurbo project
+
+###### Windows
+
+```bat
+webui.bat
+```
+
+###### MacOS or Linux
+
+```shell
+sh webui.sh
+```
+
+After launching, the browser will open automatically
+
+#### ④ Launch the API Service 🚀
+
+```shell
+python main.py
+```
+
+After launching, you can view the `API documentation` at http://127.0.0.1:8080/docs and directly test the interface
+online for a quick experience.
+
+## Voice Synthesis 🗣
+
+A list of all supported voices can be viewed here: [Voice List](./docs/voice-list.txt)
+
+2024-04-16 v1.1.2 Added 9 new Azure voice synthesis voices that require API KEY configuration. These voices sound more realistic.
+
+## Subtitle Generation 📜
+
+Currently, there are 2 ways to generate subtitles:
+
+- **edge**: Faster generation speed, better performance, no specific requirements for computer configuration, but the
+  quality may be unstable
+- **whisper**: Slower generation speed, poorer performance, specific requirements for computer configuration, but more
+  reliable quality
+
+You can switch between them by modifying the `subtitle_provider` in the `config.toml` configuration file
+
+It is recommended to use `edge` mode, and switch to `whisper` mode if the quality of the subtitles generated is not
+satisfactory.
+
+> Note:
+>
+> 1. In whisper mode, you need to download a model file from HuggingFace, about 3GB in size, please ensure good internet connectivity
+> 2. If left blank, it means no subtitles will be generated.
+
+> Since HuggingFace is not accessible in China, you can use the following methods to download the `whisper-large-v3` model file
+
+Download links:
+
+- Baidu Netdisk: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- Quark Netdisk: https://pan.quark.cn/s/3ee3d991d64b
+
+After downloading the model, extract it and place the entire directory in `.\MoneyPrinterTurbo\models`,
+The final file path should look like this: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+
+```
+MoneyPrinterTurbo
+  ├─models
+  │   └─whisper-large-v3
+  │          config.json
+  │          model.bin
+  │          preprocessor_config.json
+  │          tokenizer.json
+  │          vocabulary.json
+```
+
+## Background Music 🎵
+
+Background music for videos is located in the project's `resource/songs` directory.
+> The current project includes some default music from YouTube videos. If there are copyright issues, please delete
+> them.
+
+## Subtitle Fonts 🅰
+
+Fonts for rendering video subtitles are located in the project's `resource/fonts` directory, and you can also add your
+own fonts.
+
+## Common Questions 🤔
+
+### ❓RuntimeError: No ffmpeg exe could be found
+
+Normally, ffmpeg will be automatically downloaded and detected.
+However, if your environment has issues preventing automatic downloads, you may encounter the following error:
+
+```
+RuntimeError: No ffmpeg exe could be found.
+Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+```
+
+In this case, you can download ffmpeg from https://www.gyan.dev/ffmpeg/builds/, unzip it, and set `ffmpeg_path` to your
+actual installation path.
+
+```toml
+[app]
+# Please set according to your actual path, note that Windows path separators are \\
+ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
+```
+
+### ❓ImageMagick is not installed on your computer
+
+[issue 33](https://github.com/harry0703/MoneyPrinterTurbo/issues/33)
+
+1. Follow the `example configuration` provided `download address` to
+   install https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-30-Q16-x64-static.exe, using the static library
+2. Do not install in a path with Chinese characters to avoid unpredictable issues
+
+[issue 54](https://github.com/harry0703/MoneyPrinterTurbo/issues/54#issuecomment-2017842022)
+
+For Linux systems, you can manually install it, refer to https://cn.linux-console.net/?p=16978
+
+Thanks to [@wangwenqiao666](https://github.com/wangwenqiao666) for their research and exploration
+
+### ❓ImageMagick's security policy prevents operations related to temporary file @/tmp/tmpur5hyyto.txt
+
+You can find these policies in ImageMagick's configuration file policy.xml.
+This file is usually located in /etc/ImageMagick-`X`/ or a similar location in the ImageMagick installation directory.
+Modify the entry containing `pattern="@"`, change `rights="none"` to `rights="read|write"` to allow read and write operations on files.
+
+### ❓OSError: [Errno 24] Too many open files
+
+This issue is caused by the system's limit on the number of open files. You can solve it by modifying the system's file open limit.
+
+Check the current limit:
+
+```shell
+ulimit -n
+```
+
+If it's too low, you can increase it, for example:
+
+```shell
+ulimit -n 10240
+```
+
+### ❓Whisper model download failed, with the following error
+
+LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
+outgoing trafic has been disabled.
+To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
+
+or
+
+An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
+An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
+specified revision on the local disk. Please check your internet connection and try again.
+Trying to load the model directly from the local cache, if it exists.
+
+Solution: [Click to see how to manually download the model from netdisk](#subtitle-generation-)
+
+## Feedback & Suggestions 📢
+
+- You can submit an [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues) or
+  a [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls).
+
+## License 📝
+
+Click to view the [`LICENSE`](LICENSE) file
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=harry0703/MoneyPrinterTurbo&type=Date)](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
diff --git a/README.md b/README.md
index b42573cdef4ccb25c2560ff354fd9d2929be4973..7812761e480608b1e3c4e1e7addb3f02427e7b01 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,369 @@
----
-title: Avfwae
-emoji: 📚
-colorFrom: gray
-colorTo: gray
-sdk: docker
-pinned: false
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+<div align="center">
+<h1 align="center">MoneyPrinterTurbo 💸</h1>
+
+<p align="center">
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/stargazers"><img src="https://img.shields.io/github/stars/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Stargazers"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/issues"><img src="https://img.shields.io/github/issues/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Issues"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/network/members"><img src="https://img.shields.io/github/forks/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="Forks"></a>
+  <a href="https://github.com/harry0703/MoneyPrinterTurbo/blob/main/LICENSE"><img src="https://img.shields.io/github/license/harry0703/MoneyPrinterTurbo.svg?style=for-the-badge" alt="License"></a>
+</p>
+<br>
+<h3>简体中文 | <a href="README-en.md">English</a></h3>
+<div align="center">
+  <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FMoneyPrinterTurbo | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+<br>
+只需提供一个视频 <b>主题</b> 或 <b>关键词</b> ，就可以全自动生成视频文案、视频素材、视频字幕、视频背景音乐，然后合成一个高清的短视频。
+<br>
+
+<h4>Web界面</h4>
+
+![](docs/webui.jpg)
+
+<h4>API界面</h4>
+
+![](docs/api.jpg)
+
+</div>
+
+## 特别感谢 🙏
+
+由于该项目的 **部署** 和 **使用**，对于一些小白用户来说，还是 **有一定的门槛**，在此特别感谢
+**录咖（AI智能 多媒体服务平台）** 网站基于该项目，提供的免费`AI视频生成器`服务，可以不用部署，直接在线使用，非常方便。
+
+- 中文版：https://reccloud.cn
+- 英文版：https://reccloud.com
+
+![](docs/reccloud.cn.jpg)
+
+## 感谢赞助 🙏
+
+感谢佐糖 https://picwish.cn 对该项目的支持和赞助，使得该项目能够持续的更新和维护。
+
+佐糖专注于**图像处理领域**，提供丰富的**图像处理工具**，将复杂操作极致简化，真正实现让图像处理更简单。
+
+![picwish.jpg](docs/picwish.jpg)
+
+## 功能特性 🎯
+
+- [x] 完整的 **MVC架构**，代码 **结构清晰**，易于维护，支持 `API` 和 `Web界面`
+- [x] 支持视频文案 **AI自动生成**，也可以**自定义文案**
+- [x] 支持多种 **高清视频** 尺寸
+    - [x] 竖屏 9:16，`1080x1920`
+    - [x] 横屏 16:9，`1920x1080`
+- [x] 支持 **批量视频生成**，可以一次生成多个视频，然后选择一个最满意的
+- [x] 支持 **视频片段时长** 设置，方便调节素材切换频率
+- [x] 支持 **中文** 和 **英文** 视频文案
+- [x] 支持 **多种语音** 合成，可 **实时试听** 效果
+- [x] 支持 **字幕生成**，可以调整 `字体`、`位置`、`颜色`、`大小`，同时支持`字幕描边`设置
+- [x] 支持 **背景音乐**，随机或者指定音乐文件，可设置`背景音乐音量`
+- [x] 视频素材来源 **高清**，而且 **无版权**，也可以使用自己的 **本地素材**
+- [x] 支持 **OpenAI**、**Moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama**、**DeepSeek**、 **文心一言**, **Pollinations** 等多种模型接入
+    - 中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商（国内可直接访问，不需要VPN。注册就送额度，基本够用）
+
+
+### 后期计划 📅
+
+- [ ] GPT-SoVITS 配音支持
+- [ ] 优化语音合成，利用大模型，使其合成的声音，更加自然，情绪更加丰富
+- [ ] 增加视频转场效果，使其看起来更加的流畅
+- [ ] 增加更多视频素材来源，优化视频素材和文案的匹配度
+- [ ] 增加视频长度选项：短、中、长
+- [ ] 支持更多的语音合成服务商，比如 OpenAI TTS
+- [ ] 自动上传到YouTube平台
+
+## 视频演示 📺
+
+### 竖屏 9:16
+
+<table>
+<thead>
+<tr>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> 《如何增加生活的乐趣》</th>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> 《金钱的作用》<br>更真实的合成声音</th>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji> 《生命的意义是什么》</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/a84d33d5-27a2-4aba-8fd0-9fb2bd91c6a6"></video></td>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/af2f3b0b-002e-49fe-b161-18ba91c055e8"></video></td>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/112c9564-d52b-4472-99ad-970b75f66476"></video></td>
+</tr>
+</tbody>
+</table>
+
+### 横屏 16:9
+
+<table>
+<thead>
+<tr>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji>《生命的意义是什么》</th>
+<th align="center"><g-emoji class="g-emoji" alias="arrow_forward">▶️</g-emoji>《为什么要运动》</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/346ebb15-c55f-47a9-a653-114f08bb8073"></video></td>
+<td align="center"><video src="https://github.com/harry0703/MoneyPrinterTurbo/assets/4928832/271f2fae-8283-44a0-8aa0-0ed8f9a6fa87"></video></td>
+</tr>
+</tbody>
+</table>
+
+## 配置要求 📦
+
+- 建议最低 CPU **4核** 或以上，内存 **4G** 或以上，显卡非必须
+- Windows 10 或 MacOS 11.0 以上系统
+
+
+## 快速开始 🚀
+
+### 在 Google Colab 中运行
+免去本地环境配置，点击直接在 Google Colab 中快速体验 MoneyPrinterTurbo
+
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/harry0703/MoneyPrinterTurbo/blob/main/docs/MoneyPrinterTurbo.ipynb)
+
+
+### Windows一键启动包
+
+下载一键启动包，解压直接使用（路径不要有 **中文**、**特殊字符**、**空格**）
+
+- 百度网盘（v1.2.6）: https://pan.baidu.com/s/1wg0UaIyXpO3SqIpaq790SQ?pwd=sbqx 提取码: sbqx
+- Google Drive (v1.2.6): https://drive.google.com/file/d/1HsbzfT7XunkrCrHw5ncUjFX8XX4zAuUh/view?usp=sharing
+
+下载后，建议先**双击执行** `update.bat` 更新到**最新代码**，然后双击 `start.bat` 启动
+
+启动后，会自动打开浏览器（如果打开是空白，建议换成 **Chrome** 或者 **Edge** 打开）
+
+## 安装部署 📥
+
+### 前提条件
+
+- 尽量不要使用 **中文路径**，避免出现一些无法预料的问题
+- 请确保你的 **网络** 是正常的，VPN需要打开`全局流量`模式
+
+#### ① 克隆代码
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+```
+
+#### ② 修改配置文件（可选，建议启动后也可以在 WebUI 里面配置）
+
+- 将 `config.example.toml` 文件复制一份，命名为 `config.toml`
+- 按照 `config.toml` 文件中的说明，配置好 `pexels_api_keys` 和 `llm_provider`，并根据 llm_provider 对应的服务商，配置相关的
+  API Key
+
+### Docker部署 🐳
+
+#### ① 启动Docker
+
+如果未安装 Docker，请先安装 https://www.docker.com/products/docker-desktop/
+
+如果是Windows系统，请参考微软的文档：
+
+1. https://learn.microsoft.com/zh-cn/windows/wsl/install
+2. https://learn.microsoft.com/zh-cn/windows/wsl/tutorials/wsl-containers
+
+```shell
+cd MoneyPrinterTurbo
+docker-compose up
+```
+
+> 注意：最新版的docker安装时会自动以插件的形式安装docker compose，启动命令调整为docker compose up
+
+#### ② 访问Web界面
+
+打开浏览器，访问 http://0.0.0.0:8501
+
+#### ③ 访问API文档
+
+打开浏览器，访问 http://0.0.0.0:8080/docs 或者 http://0.0.0.0:8080/redoc
+
+### 手动部署 📦
+
+> 视频教程
+
+- 完整的使用演示：https://v.douyin.com/iFhnwsKY/
+- 如何在Windows上部署：https://v.douyin.com/iFyjoW3M
+
+#### ① 创建虚拟环境
+
+建议使用 [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 创建 python 虚拟环境
+
+```shell
+git clone https://github.com/harry0703/MoneyPrinterTurbo.git
+cd MoneyPrinterTurbo
+conda create -n MoneyPrinterTurbo python=3.11
+conda activate MoneyPrinterTurbo
+pip install -r requirements.txt
+```
+
+#### ② 安装好 ImageMagick
+
+- Windows:
+    - 下载 https://imagemagick.org/script/download.php 选择Windows版本，切记一定要选择 **静态库** 版本，比如
+      ImageMagick-7.1.1-32-Q16-x64-**static**.exe
+    - 安装下载好的 ImageMagick，**注意不要修改安装路径**
+    - 修改 `配置文件 config.toml` 中的 `imagemagick_path` 为你的 **实际安装路径**
+
+- MacOS:
+  ```shell
+  brew install imagemagick
+  ````
+- Ubuntu
+  ```shell
+  sudo apt-get install imagemagick
+  ```
+- CentOS
+  ```shell
+  sudo yum install ImageMagick
+  ```
+
+#### ③ 启动Web界面 🌐
+
+注意需要到 MoneyPrinterTurbo 项目 `根目录` 下执行以下命令
+
+###### Windows
+
+```bat
+webui.bat
+```
+
+###### MacOS or Linux
+
+```shell
+sh webui.sh
+```
+
+启动后，会自动打开浏览器（如果打开是空白，建议换成 **Chrome** 或者 **Edge** 打开）
+
+#### ④ 启动API服务 🚀
+
+```shell
+python main.py
+```
+
+启动后，可以查看 `API文档` http://127.0.0.1:8080/docs 或者 http://127.0.0.1:8080/redoc 直接在线调试接口，快速体验。
+
+## 语音合成 🗣
+
+所有支持的声音列表，可以查看：[声音列表](./docs/voice-list.txt)
+
+2024-04-16 v1.1.2 新增了9种Azure的语音合成声音，需要配置API KEY，该声音合成的更加真实。
+
+## 字幕生成 📜
+
+当前支持2种字幕生成方式：
+
+- **edge**: 生成`速度快`，性能更好，对电脑配置没有要求，但是质量可能不稳定
+- **whisper**: 生成`速度慢`，性能较差，对电脑配置有一定要求，但是`质量更可靠`。
+
+可以修改 `config.toml` 配置文件中的 `subtitle_provider` 进行切换
+
+建议使用 `edge` 模式，如果生成的字幕质量不好，再切换到 `whisper` 模式
+
+> 注意：
+
+1. whisper 模式下需要到 HuggingFace 下载一个模型文件，大约 3GB 左右，请确保网络通畅
+2. 如果留空，表示不生成字幕。
+
+> 由于国内无法访问 HuggingFace，可以使用以下方法下载 `whisper-large-v3` 的模型文件
+
+下载地址：
+
+- 百度网盘: https://pan.baidu.com/s/11h3Q6tsDtjQKTjUu3sc5cA?pwd=xjs9
+- 夸克网盘：https://pan.quark.cn/s/3ee3d991d64b
+
+模型下载后解压，整个目录放到 `.\MoneyPrinterTurbo\models` 里面，
+最终的文件路径应该是这样: `.\MoneyPrinterTurbo\models\whisper-large-v3`
+
+```
+MoneyPrinterTurbo  
+  ├─models
+  │   └─whisper-large-v3
+  │          config.json
+  │          model.bin
+  │          preprocessor_config.json
+  │          tokenizer.json
+  │          vocabulary.json
+```
+
+## 背景音乐 🎵
+
+用于视频的背景音乐，位于项目的 `resource/songs` 目录下。
+> 当前项目里面放了一些默认的音乐，来自于 YouTube 视频，如有侵权，请删除。
+
+## 字幕字体 🅰
+
+用于视频字幕的渲染，位于项目的 `resource/fonts` 目录下，你也可以放进去自己的字体。
+
+## 常见问题 🤔
+
+### ❓RuntimeError: No ffmpeg exe could be found
+
+通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
+但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
+
+```
+RuntimeError: No ffmpeg exe could be found.
+Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+```
+
+此时你可以从 https://www.gyan.dev/ffmpeg/builds/ 下载ffmpeg，解压后，设置 `ffmpeg_path` 为你的实际安装路径即可。
+
+```toml
+[app]
+# 请根据你的实际路径设置，注意 Windows 路径分隔符为 \\
+ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
+```
+
+### ❓ImageMagick的安全策略阻止了与临时文件@/tmp/tmpur5hyyto.txt相关的操作
+
+可以在ImageMagick的配置文件policy.xml中找到这些策略。
+这个文件通常位于 /etc/ImageMagick-`X`/ 或 ImageMagick 安装目录的类似位置。
+修改包含`pattern="@"`的条目，将`rights="none"`更改为`rights="read|write"`以允许对文件的读写操作。
+
+### ❓OSError: [Errno 24] Too many open files
+
+这个问题是由于系统打开文件数限制导致的，可以通过修改系统的文件打开数限制来解决。
+
+查看当前限制
+
+```shell
+ulimit -n
+```
+
+如果过低，可以调高一些，比如
+
+```shell
+ulimit -n 10240
+```
+
+### ❓Whisper 模型下载失败，出现如下错误
+
+LocalEntryNotfoundEror: Cannot find an appropriate cached snapshotfolderfor the specified revision on the local disk and
+outgoing trafic has been disabled.
+To enablerepo look-ups and downloads online, pass 'local files only=False' as input.
+
+或者
+
+An error occured while synchronizing the model Systran/faster-whisper-large-v3 from the Hugging Face Hub:
+An error happened while trying to locate the files on the Hub and we cannot find the appropriate snapshot folder for the
+specified revision on the local disk. Please check your internet connection and try again.
+Trying to load the model directly from the local cache, if it exists.
+
+解决方法：[点击查看如何从网盘手动下载模型](#%E5%AD%97%E5%B9%95%E7%94%9F%E6%88%90-)
+
+## 反馈建议 📢
+
+- 可以提交 [issue](https://github.com/harry0703/MoneyPrinterTurbo/issues)
+  或者 [pull request](https://github.com/harry0703/MoneyPrinterTurbo/pulls)。
+
+## 许可证 📝
+
+点击查看 [`LICENSE`](LICENSE) 文件
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=harry0703/MoneyPrinterTurbo&type=Date)](https://star-history.com/#harry0703/MoneyPrinterTurbo&Date)
\ No newline at end of file
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/asgi.py b/app/asgi.py
new file mode 100644
index 0000000000000000000000000000000000000000..01f8b7da6f1cc2c3c95d91737192c2801808e5fc
--- /dev/null
+++ b/app/asgi.py
@@ -0,0 +1,82 @@
+"""Application implementation - ASGI."""
+
+import os
+
+from fastapi import FastAPI, Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+from loguru import logger
+
+from app.config import config
+from app.models.exception import HttpException
+from app.router import root_api_router
+from app.utils import utils
+
+
+def exception_handler(request: Request, e: HttpException):
+    return JSONResponse(
+        status_code=e.status_code,
+        content=utils.get_response(e.status_code, e.data, e.message),
+    )
+
+
+def validation_exception_handler(request: Request, e: RequestValidationError):
+    return JSONResponse(
+        status_code=400,
+        content=utils.get_response(
+            status=400, data=e.errors(), message="field required"
+        ),
+    )
+
+
+def get_application() -> FastAPI:
+    """Initialize FastAPI application.
+
+    Returns:
+       FastAPI: Application object instance.
+
+    """
+    instance = FastAPI(
+        title=config.project_name,
+        description=config.project_description,
+        version=config.project_version,
+        debug=False,
+    )
+    instance.include_router(root_api_router)
+    instance.add_exception_handler(HttpException, exception_handler)
+    instance.add_exception_handler(RequestValidationError, validation_exception_handler)
+    return instance
+
+
+app = get_application()
+
+# Configures the CORS middleware for the FastAPI app
+cors_allowed_origins_str = os.getenv("CORS_ALLOWED_ORIGINS", "")
+origins = cors_allowed_origins_str.split(",") if cors_allowed_origins_str else ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+task_dir = utils.task_dir()
+app.mount(
+    "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
+)
+
+public_dir = utils.public_dir()
+app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
+
+
+@app.on_event("shutdown")
+def shutdown_event():
+    logger.info("shutdown event")
+
+
+@app.on_event("startup")
+def startup_event():
+    logger.info("startup event")
diff --git a/app/config/__init__.py b/app/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd468128b7999ad261d03bf81a7753d5843a882e
--- /dev/null
+++ b/app/config/__init__.py
@@ -0,0 +1,56 @@
+import os
+import sys
+
+from loguru import logger
+
+from app.config import config
+from app.utils import utils
+
+
+def __init_logger():
+    # _log_file = utils.storage_dir("logs/server.log")
+    _lvl = config.log_level
+    root_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    )
+
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
+        return _format
+
+    logger.remove()
+
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+
+    # logger.add(
+    #     _log_file,
+    #     level=_lvl,
+    #     format=format_record,
+    #     rotation="00:00",
+    #     retention="3 days",
+    #     backtrace=True,
+    #     diagnose=True,
+    #     enqueue=True,
+    # )
+
+
+__init_logger()
diff --git a/app/config/config.py b/app/config/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..c20a3dc388b4b9c57e403cb13adeb8f85913ceef
--- /dev/null
+++ b/app/config/config.py
@@ -0,0 +1,78 @@
+import os
+import shutil
+import socket
+
+import toml
+from loguru import logger
+
+root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+config_file = f"{root_dir}/config.toml"
+
+
+def load_config():
+    # fix: IsADirectoryError: [Errno 21] Is a directory: '/MoneyPrinterTurbo/config.toml'
+    if os.path.isdir(config_file):
+        shutil.rmtree(config_file)
+
+    if not os.path.isfile(config_file):
+        example_file = f"{root_dir}/config.example.toml"
+        if os.path.isfile(example_file):
+            shutil.copyfile(example_file, config_file)
+            logger.info("copy config.example.toml to config.toml")
+
+    logger.info(f"load config from file: {config_file}")
+
+    try:
+        _config_ = toml.load(config_file)
+    except Exception as e:
+        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
+        with open(config_file, mode="r", encoding="utf-8-sig") as fp:
+            _cfg_content = fp.read()
+            _config_ = toml.loads(_cfg_content)
+    return _config_
+
+
+def save_config():
+    with open(config_file, "w", encoding="utf-8") as f:
+        _cfg["app"] = app
+        _cfg["azure"] = azure
+        _cfg["siliconflow"] = siliconflow
+        _cfg["ui"] = ui
+        f.write(toml.dumps(_cfg))
+
+
+_cfg = load_config()
+app = _cfg.get("app", {})
+whisper = _cfg.get("whisper", {})
+proxy = _cfg.get("proxy", {})
+azure = _cfg.get("azure", {})
+siliconflow = _cfg.get("siliconflow", {})
+ui = _cfg.get(
+    "ui",
+    {
+        "hide_log": False,
+    },
+)
+
+hostname = socket.gethostname()
+
+log_level = _cfg.get("log_level", "DEBUG")
+listen_host = _cfg.get("listen_host", "0.0.0.0")
+listen_port = _cfg.get("listen_port", 8080)
+project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
+project_description = _cfg.get(
+    "project_description",
+    "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>",
+)
+project_version = _cfg.get("project_version", "1.2.6")
+reload_debug = False
+
+imagemagick_path = app.get("imagemagick_path", "")
+if imagemagick_path and os.path.isfile(imagemagick_path):
+    os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
+
+ffmpeg_path = app.get("ffmpeg_path", "")
+if ffmpeg_path and os.path.isfile(ffmpeg_path):
+    os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
+
+logger.info(f"{project_name} v{project_version}")
diff --git a/app/controllers/base.py b/app/controllers/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..122e341e07529b720f6a422d57f1f7a4d80a6638
--- /dev/null
+++ b/app/controllers/base.py
@@ -0,0 +1,31 @@
+from uuid import uuid4
+
+from fastapi import Request
+
+from app.config import config
+from app.models.exception import HttpException
+
+
+def get_task_id(request: Request):
+    task_id = request.headers.get("x-task-id")
+    if not task_id:
+        task_id = uuid4()
+    return str(task_id)
+
+
+def get_api_key(request: Request):
+    api_key = request.headers.get("x-api-key")
+    return api_key
+
+
+def verify_token(request: Request):
+    token = get_api_key(request)
+    if token != config.app.get("api_key", ""):
+        request_id = get_task_id(request)
+        request_url = request.url
+        user_agent = request.headers.get("user-agent")
+        raise HttpException(
+            task_id=request_id,
+            status_code=401,
+            message=f"invalid token: {request_url}, {user_agent}",
+        )
diff --git a/app/controllers/manager/base_manager.py b/app/controllers/manager/base_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..81251419b5a41cb34eca48635c5360e25383e5c5
--- /dev/null
+++ b/app/controllers/manager/base_manager.py
@@ -0,0 +1,64 @@
+import threading
+from typing import Any, Callable, Dict
+
+
+class TaskManager:
+    def __init__(self, max_concurrent_tasks: int):
+        self.max_concurrent_tasks = max_concurrent_tasks
+        self.current_tasks = 0
+        self.lock = threading.Lock()
+        self.queue = self.create_queue()
+
+    def create_queue(self):
+        raise NotImplementedError()
+
+    def add_task(self, func: Callable, *args: Any, **kwargs: Any):
+        with self.lock:
+            if self.current_tasks < self.max_concurrent_tasks:
+                print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
+                self.execute_task(func, *args, **kwargs)
+            else:
+                print(
+                    f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
+                )
+                self.enqueue({"func": func, "args": args, "kwargs": kwargs})
+
+    def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
+        thread = threading.Thread(
+            target=self.run_task, args=(func, *args), kwargs=kwargs
+        )
+        thread.start()
+
+    def run_task(self, func: Callable, *args: Any, **kwargs: Any):
+        try:
+            with self.lock:
+                self.current_tasks += 1
+            func(*args, **kwargs)  # call the function here, passing *args and **kwargs.
+        finally:
+            self.task_done()
+
+    def check_queue(self):
+        with self.lock:
+            if (
+                self.current_tasks < self.max_concurrent_tasks
+                and not self.is_queue_empty()
+            ):
+                task_info = self.dequeue()
+                func = task_info["func"]
+                args = task_info.get("args", ())
+                kwargs = task_info.get("kwargs", {})
+                self.execute_task(func, *args, **kwargs)
+
+    def task_done(self):
+        with self.lock:
+            self.current_tasks -= 1
+        self.check_queue()
+
+    def enqueue(self, task: Dict):
+        raise NotImplementedError()
+
+    def dequeue(self):
+        raise NotImplementedError()
+
+    def is_queue_empty(self):
+        raise NotImplementedError()
diff --git a/app/controllers/manager/memory_manager.py b/app/controllers/manager/memory_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf7321f56e3e2dde3177d304f3860185b1ab8baa
--- /dev/null
+++ b/app/controllers/manager/memory_manager.py
@@ -0,0 +1,18 @@
+from queue import Queue
+from typing import Dict
+
+from app.controllers.manager.base_manager import TaskManager
+
+
+class InMemoryTaskManager(TaskManager):
+    def create_queue(self):
+        return Queue()
+
+    def enqueue(self, task: Dict):
+        self.queue.put(task)
+
+    def dequeue(self):
+        return self.queue.get()
+
+    def is_queue_empty(self):
+        return self.queue.empty()
diff --git a/app/controllers/manager/redis_manager.py b/app/controllers/manager/redis_manager.py
new file mode 100644
index 0000000000000000000000000000000000000000..cad1912aefb95fa6a6ad16b3617dac08729c9cd1
--- /dev/null
+++ b/app/controllers/manager/redis_manager.py
@@ -0,0 +1,56 @@
+import json
+from typing import Dict
+
+import redis
+
+from app.controllers.manager.base_manager import TaskManager
+from app.models.schema import VideoParams
+from app.services import task as tm
+
+FUNC_MAP = {
+    "start": tm.start,
+    # 'start_test': tm.start_test
+}
+
+
+class RedisTaskManager(TaskManager):
+    def __init__(self, max_concurrent_tasks: int, redis_url: str):
+        self.redis_client = redis.Redis.from_url(redis_url)
+        super().__init__(max_concurrent_tasks)
+
+    def create_queue(self):
+        return "task_queue"
+
+    def enqueue(self, task: Dict):
+        task_with_serializable_params = task.copy()
+
+        if "params" in task["kwargs"] and isinstance(
+            task["kwargs"]["params"], VideoParams
+        ):
+            task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
+                "params"
+            ].dict()
+
+        # 将函数对象转换为其名称
+        task_with_serializable_params["func"] = task["func"].__name__
+        self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))
+
+    def dequeue(self):
+        task_json = self.redis_client.lpop(self.queue)
+        if task_json:
+            task_info = json.loads(task_json)
+            # 将函数名称转换回函数对象
+            task_info["func"] = FUNC_MAP[task_info["func"]]
+
+            if "params" in task_info["kwargs"] and isinstance(
+                task_info["kwargs"]["params"], dict
+            ):
+                task_info["kwargs"]["params"] = VideoParams(
+                    **task_info["kwargs"]["params"]
+                )
+
+            return task_info
+        return None
+
+    def is_queue_empty(self):
+        return self.redis_client.llen(self.queue) == 0
diff --git a/app/controllers/ping.py b/app/controllers/ping.py
new file mode 100644
index 0000000000000000000000000000000000000000..073247ba3448688814919082faec7339d4718d3f
--- /dev/null
+++ b/app/controllers/ping.py
@@ -0,0 +1,13 @@
+from fastapi import APIRouter, Request
+
+router = APIRouter()
+
+
+@router.get(
+    "/ping",
+    tags=["Health Check"],
+    description="检查服务可用性",
+    response_description="pong",
+)
+def ping(request: Request) -> str:
+    return "pong"
diff --git a/app/controllers/v1/base.py b/app/controllers/v1/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1336e479c92cb7e40c53cda8a114c6bb218eb2f4
--- /dev/null
+++ b/app/controllers/v1/base.py
@@ -0,0 +1,11 @@
+from fastapi import APIRouter
+
+
+def new_router(dependencies=None):
+    router = APIRouter()
+    router.tags = ["V1"]
+    router.prefix = "/api/v1"
+    # 将认证依赖项应用于所有路由
+    if dependencies:
+        router.dependencies = dependencies
+    return router
diff --git a/app/controllers/v1/llm.py b/app/controllers/v1/llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..54db26d06f2d9fd7cee7fb91befb3b900fa55a05
--- /dev/null
+++ b/app/controllers/v1/llm.py
@@ -0,0 +1,45 @@
+from fastapi import Request
+
+from app.controllers.v1.base import new_router
+from app.models.schema import (
+    VideoScriptRequest,
+    VideoScriptResponse,
+    VideoTermsRequest,
+    VideoTermsResponse,
+)
+from app.services import llm
+from app.utils import utils
+
+# authentication dependency
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+
+
+@router.post(
+    "/scripts",
+    response_model=VideoScriptResponse,
+    summary="Create a script for the video",
+)
+def generate_video_script(request: Request, body: VideoScriptRequest):
+    video_script = llm.generate_script(
+        video_subject=body.video_subject,
+        language=body.video_language,
+        paragraph_number=body.paragraph_number,
+    )
+    response = {"video_script": video_script}
+    return utils.get_response(200, response)
+
+
+@router.post(
+    "/terms",
+    response_model=VideoTermsResponse,
+    summary="Generate video terms based on the video script",
+)
+def generate_video_terms(request: Request, body: VideoTermsRequest):
+    video_terms = llm.generate_terms(
+        video_subject=body.video_subject,
+        video_script=body.video_script,
+        amount=body.amount,
+    )
+    response = {"video_terms": video_terms}
+    return utils.get_response(200, response)
diff --git a/app/controllers/v1/video.py b/app/controllers/v1/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..e80d762670405cf7555f11f94fa9dba3ba011c44
--- /dev/null
+++ b/app/controllers/v1/video.py
@@ -0,0 +1,287 @@
+import glob
+import os
+import pathlib
+import shutil
+from typing import Union
+
+from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
+from fastapi.params import File
+from fastapi.responses import FileResponse, StreamingResponse
+from loguru import logger
+
+from app.config import config
+from app.controllers import base
+from app.controllers.manager.memory_manager import InMemoryTaskManager
+from app.controllers.manager.redis_manager import RedisTaskManager
+from app.controllers.v1.base import new_router
+from app.models.exception import HttpException
+from app.models.schema import (
+    AudioRequest,
+    BgmRetrieveResponse,
+    BgmUploadResponse,
+    SubtitleRequest,
+    TaskDeletionResponse,
+    TaskQueryRequest,
+    TaskQueryResponse,
+    TaskResponse,
+    TaskVideoRequest,
+)
+from app.services import state as sm
+from app.services import task as tm
+from app.utils import utils
+
+# 认证依赖项
+# router = new_router(dependencies=[Depends(base.verify_token)])
+router = new_router()
+
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+_max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
+
+redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
+# 根据配置选择合适的任务管理器
+if _enable_redis:
+    task_manager = RedisTaskManager(
+        max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
+    )
+else:
+    task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)
+
+
+@router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
+def create_video(
+    background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
+):
+    return create_task(request, body, stop_at="video")
+
+
+@router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
+def create_subtitle(
+    background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
+):
+    return create_task(request, body, stop_at="subtitle")
+
+
+@router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
+def create_audio(
+    background_tasks: BackgroundTasks, request: Request, body: AudioRequest
+):
+    return create_task(request, body, stop_at="audio")
+
+
+def create_task(
+    request: Request,
+    body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
+    stop_at: str,
+):
+    task_id = utils.get_uuid()
+    request_id = base.get_task_id(request)
+    try:
+        task = {
+            "task_id": task_id,
+            "request_id": request_id,
+            "params": body.model_dump(),
+        }
+        sm.state.update_task(task_id)
+        task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
+        logger.success(f"Task created: {utils.to_json(task)}")
+        return utils.get_response(200, task)
+    except ValueError as e:
+        raise HttpException(
+            task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
+        )
+
+from fastapi import Query
+
+@router.get("/tasks", response_model=TaskQueryResponse, summary="Get all tasks")
+def get_all_tasks(request: Request, page: int = Query(1, ge=1), page_size: int = Query(10, ge=1)):
+    request_id = base.get_task_id(request)
+    tasks, total = sm.state.get_all_tasks(page, page_size)
+
+    response = {
+        "tasks": tasks,
+        "total": total,
+        "page": page,
+        "page_size": page_size,
+    }
+    return utils.get_response(200, response)
+
+
+
+@router.get(
+    "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
+)
+def get_task(
+    request: Request,
+    task_id: str = Path(..., description="Task ID"),
+    query: TaskQueryRequest = Depends(),
+):
+    endpoint = config.app.get("endpoint", "")
+    if not endpoint:
+        endpoint = str(request.base_url)
+    endpoint = endpoint.rstrip("/")
+
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        task_dir = utils.task_dir()
+
+        def file_to_uri(file):
+            if not file.startswith(endpoint):
+                _uri_path = v.replace(task_dir, "tasks").replace("\\", "/")
+                _uri_path = f"{endpoint}/{_uri_path}"
+            else:
+                _uri_path = file
+            return _uri_path
+
+        if "videos" in task:
+            videos = task["videos"]
+            urls = []
+            for v in videos:
+                urls.append(file_to_uri(v))
+            task["videos"] = urls
+        if "combined_videos" in task:
+            combined_videos = task["combined_videos"]
+            urls = []
+            for v in combined_videos:
+                urls.append(file_to_uri(v))
+            task["combined_videos"] = urls
+        return utils.get_response(200, task)
+
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+
+
+@router.delete(
+    "/tasks/{task_id}",
+    response_model=TaskDeletionResponse,
+    summary="Delete a generated short video task",
+)
+def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
+    request_id = base.get_task_id(request)
+    task = sm.state.get_task(task_id)
+    if task:
+        tasks_dir = utils.task_dir()
+        current_task_dir = os.path.join(tasks_dir, task_id)
+        if os.path.exists(current_task_dir):
+            shutil.rmtree(current_task_dir)
+
+        sm.state.delete_task(task_id)
+        logger.success(f"video deleted: {utils.to_json(task)}")
+        return utils.get_response(200)
+
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )
+
+
+@router.get(
+    "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
+)
+def get_bgm_list(request: Request):
+    suffix = "*.mp3"
+    song_dir = utils.song_dir()
+    files = glob.glob(os.path.join(song_dir, suffix))
+    bgm_list = []
+    for file in files:
+        bgm_list.append(
+            {
+                "name": os.path.basename(file),
+                "size": os.path.getsize(file),
+                "file": file,
+            }
+        )
+    response = {"files": bgm_list}
+    return utils.get_response(200, response)
+
+
+@router.post(
+    "/musics",
+    response_model=BgmUploadResponse,
+    summary="Upload the BGM file to the songs directory",
+)
+def upload_bgm_file(request: Request, file: UploadFile = File(...)):
+    request_id = base.get_task_id(request)
+    # check file ext
+    if file.filename.endswith("mp3"):
+        song_dir = utils.song_dir()
+        save_path = os.path.join(song_dir, file.filename)
+        # save file
+        with open(save_path, "wb+") as buffer:
+            # If the file already exists, it will be overwritten
+            file.file.seek(0)
+            buffer.write(file.file.read())
+        response = {"file": save_path}
+        return utils.get_response(200, response)
+
+    raise HttpException(
+        "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
+    )
+
+
+@router.get("/stream/{file_path:path}")
+async def stream_video(request: Request, file_path: str):
+    tasks_dir = utils.task_dir()
+    video_path = os.path.join(tasks_dir, file_path)
+    range_header = request.headers.get("Range")
+    video_size = os.path.getsize(video_path)
+    start, end = 0, video_size - 1
+
+    length = video_size
+    if range_header:
+        range_ = range_header.split("bytes=")[1]
+        start, end = [int(part) if part else None for part in range_.split("-")]
+        if start is None:
+            start = video_size - end
+            end = video_size - 1
+        if end is None:
+            end = video_size - 1
+        length = end - start + 1
+
+    def file_iterator(file_path, offset=0, bytes_to_read=None):
+        with open(file_path, "rb") as f:
+            f.seek(offset, os.SEEK_SET)
+            remaining = bytes_to_read or video_size
+            while remaining > 0:
+                bytes_to_read = min(4096, remaining)
+                data = f.read(bytes_to_read)
+                if not data:
+                    break
+                remaining -= len(data)
+                yield data
+
+    response = StreamingResponse(
+        file_iterator(video_path, start, length), media_type="video/mp4"
+    )
+    response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
+    response.headers["Accept-Ranges"] = "bytes"
+    response.headers["Content-Length"] = str(length)
+    response.status_code = 206  # Partial Content
+
+    return response
+
+
+@router.get("/download/{file_path:path}")
+async def download_video(_: Request, file_path: str):
+    """
+    download video
+    :param _: Request request
+    :param file_path: video file path, eg: /cd1727ed-3473-42a2-a7da-4faafafec72b/final-1.mp4
+    :return: video file
+    """
+    tasks_dir = utils.task_dir()
+    video_path = os.path.join(tasks_dir, file_path)
+    file_path = pathlib.Path(video_path)
+    filename = file_path.stem
+    extension = file_path.suffix
+    headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
+    return FileResponse(
+        path=video_path,
+        headers=headers,
+        filename=f"{filename}{extension}",
+        media_type=f"video/{extension[1:]}",
+    )
diff --git a/app/models/__init__.py b/app/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/models/const.py b/app/models/const.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7540ef34c134a3687d5d912b69e895b28c772d4
--- /dev/null
+++ b/app/models/const.py
@@ -0,0 +1,25 @@
+PUNCTUATIONS = [
+    "?",
+    ",",
+    ".",
+    "、",
+    ";",
+    ":",
+    "!",
+    "…",
+    "？",
+    "，",
+    "。",
+    "、",
+    "；",
+    "：",
+    "！",
+    "...",
+]
+
+TASK_STATE_FAILED = -1
+TASK_STATE_COMPLETE = 1
+TASK_STATE_PROCESSING = 4
+
+FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
+FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]
diff --git a/app/models/exception.py b/app/models/exception.py
new file mode 100644
index 0000000000000000000000000000000000000000..d938cb5ac8d32cd082d87a4dffe679fd17626ecc
--- /dev/null
+++ b/app/models/exception.py
@@ -0,0 +1,28 @@
+import traceback
+from typing import Any
+
+from loguru import logger
+
+
+class HttpException(Exception):
+    def __init__(
+        self, task_id: str, status_code: int, message: str = "", data: Any = None
+    ):
+        self.message = message
+        self.status_code = status_code
+        self.data = data
+        # Retrieve the exception stack trace information.
+        tb_str = traceback.format_exc().strip()
+        if not tb_str or tb_str == "NoneType: None":
+            msg = f"HttpException: {status_code}, {task_id}, {message}"
+        else:
+            msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"
+
+        if status_code == 400:
+            logger.warning(msg)
+        else:
+            logger.error(msg)
+
+
+class FileNotFoundException(Exception):
+    pass
diff --git a/app/models/schema.py b/app/models/schema.py
new file mode 100644
index 0000000000000000000000000000000000000000..3696fa3198ab71515182bc7610c1b9f191ca70e7
--- /dev/null
+++ b/app/models/schema.py
@@ -0,0 +1,303 @@
+import warnings
+from enum import Enum
+from typing import Any, List, Optional, Union
+
+import pydantic
+from pydantic import BaseModel
+
+# 忽略 Pydantic 的特定警告
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="Field name.*shadows an attribute in parent.*",
+)
+
+
+class VideoConcatMode(str, Enum):
+    random = "random"
+    sequential = "sequential"
+
+
+class VideoTransitionMode(str, Enum):
+    none = None
+    shuffle = "Shuffle"
+    fade_in = "FadeIn"
+    fade_out = "FadeOut"
+    slide_in = "SlideIn"
+    slide_out = "SlideOut"
+
+
+class VideoAspect(str, Enum):
+    landscape = "16:9"
+    portrait = "9:16"
+    square = "1:1"
+
+    def to_resolution(self):
+        if self == VideoAspect.landscape.value:
+            return 1920, 1080
+        elif self == VideoAspect.portrait.value:
+            return 1080, 1920
+        elif self == VideoAspect.square.value:
+            return 1080, 1080
+        return 1080, 1920
+
+
+class _Config:
+    arbitrary_types_allowed = True
+
+
+@pydantic.dataclasses.dataclass(config=_Config)
+class MaterialInfo:
+    provider: str = "pexels"
+    url: str = ""
+    duration: int = 0
+
+
+class VideoParams(BaseModel):
+    """
+    {
+      "video_subject": "",
+      "video_aspect": "横屏 16:9（西瓜视频）",
+      "voice_name": "女生-晓晓",
+      "bgm_name": "random",
+      "font_name": "STHeitiMedium 黑体-中",
+      "text_color": "#FFFFFF",
+      "font_size": 60,
+      "stroke_color": "#000000",
+      "stroke_width": 1.5
+    }
+    """
+
+    video_subject: str
+    video_script: str = ""  # Script used to generate the video
+    video_terms: Optional[str | list] = None  # Keywords used to generate the video
+    video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
+    video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
+    video_transition_mode: Optional[VideoTransitionMode] = None
+    video_clip_duration: Optional[int] = 5
+    video_count: Optional[int] = 1
+
+    video_source: Optional[str] = "pexels"
+    video_materials: Optional[List[MaterialInfo]] = (
+        None  # Materials used to generate the video
+    )
+
+    video_language: Optional[str] = ""  # auto detect
+
+    voice_name: Optional[str] = ""
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.0
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+
+    subtitle_enabled: Optional[bool] = True
+    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
+    custom_position: float = 70.0
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Union[bool, str] = True
+
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    n_threads: Optional[int] = 2
+    paragraph_number: Optional[int] = 1
+
+
+class SubtitleRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_position: Optional[str] = "bottom"
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Union[bool, str] = True
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    video_source: Optional[str] = "local"
+    subtitle_enabled: Optional[str] = "true"
+
+
+class AudioRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    video_source: Optional[str] = "local"
+
+
+class VideoScriptParams:
+    """
+    {
+      "video_subject": "春天的花海",
+      "video_language": "",
+      "paragraph_number": 1
+    }
+    """
+
+    video_subject: Optional[str] = "春天的花海"
+    video_language: Optional[str] = ""
+    paragraph_number: Optional[int] = 1
+
+
+class VideoTermsParams:
+    """
+    {
+      "video_subject": "",
+      "video_script": "",
+      "amount": 5
+    }
+    """
+
+    video_subject: Optional[str] = "春天的花海"
+    video_script: Optional[str] = (
+        "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    )
+    amount: Optional[int] = 5
+
+
+class BaseResponse(BaseModel):
+    status: int = 200
+    message: Optional[str] = "success"
+    data: Any = None
+
+
+class TaskVideoRequest(VideoParams, BaseModel):
+    pass
+
+
+class TaskQueryRequest(BaseModel):
+    pass
+
+
+class VideoScriptRequest(VideoScriptParams, BaseModel):
+    pass
+
+
+class VideoTermsRequest(VideoTermsParams, BaseModel):
+    pass
+
+
+######################################################################################################
+######################################################################################################
+######################################################################################################
+######################################################################################################
+class TaskResponse(BaseResponse):
+    class TaskResponseData(BaseModel):
+        task_id: str
+
+    data: TaskResponseData
+
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
+            },
+        }
+
+
+class TaskQueryResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+
+
+class TaskDeletionResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "state": 1,
+                    "progress": 100,
+                    "videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/final-1.mp4"
+                    ],
+                    "combined_videos": [
+                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
+                    ],
+                },
+            },
+        }
+
+
+class VideoScriptResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "video_script": "春天的花海，是大自然的一幅美丽画卷。在这个季节里，大地复苏，万物生长，花朵争相绽放，形成了一片五彩斑斓的花海..."
+                },
+            },
+        }
+
+
+class VideoTermsResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"video_terms": ["sky", "tree"]},
+            },
+        }
+
+
+class BgmRetrieveResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {
+                    "files": [
+                        {
+                            "name": "output013.mp3",
+                            "size": 1891269,
+                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
+                        }
+                    ]
+                },
+            },
+        }
+
+
+class BgmUploadResponse(BaseResponse):
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "status": 200,
+                "message": "success",
+                "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
+            },
+        }
diff --git a/app/router.py b/app/router.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf840374878235215f4f3560e65a844dc55506b2
--- /dev/null
+++ b/app/router.py
@@ -0,0 +1,17 @@
+"""Application configuration - root APIRouter.
+
+Defines all FastAPI application endpoints.
+
+Resources:
+    1. https://fastapi.tiangolo.com/tutorial/bigger-applications
+
+"""
+
+from fastapi import APIRouter
+
+from app.controllers.v1 import llm, video
+
+root_api_router = APIRouter()
+# v1
+root_api_router.include_router(video.router)
+root_api_router.include_router(llm.router)
diff --git a/app/services/__init__.py b/app/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/app/services/llm.py b/app/services/llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c954a8d4f4c6cc9ded15e8b0ef632af21509385
--- /dev/null
+++ b/app/services/llm.py
@@ -0,0 +1,444 @@
+import json
+import logging
+import re
+import requests
+from typing import List
+
+import g4f
+from loguru import logger
+from openai import AzureOpenAI, OpenAI
+from openai.types.chat import ChatCompletion
+
+from app.config import config
+
+_max_retries = 5
+
+
+def _generate_response(prompt: str) -> str:
+    try:
+        content = ""
+        llm_provider = config.app.get("llm_provider", "openai")
+        logger.info(f"llm provider: {llm_provider}")
+        if llm_provider == "g4f":
+            model_name = config.app.get("g4f_model_name", "")
+            if not model_name:
+                model_name = "gpt-3.5-turbo-16k-0613"
+            content = g4f.ChatCompletion.create(
+                model=model_name,
+                messages=[{"role": "user", "content": prompt}],
+            )
+        else:
+            api_version = ""  # for azure
+            if llm_provider == "moonshot":
+                api_key = config.app.get("moonshot_api_key")
+                model_name = config.app.get("moonshot_model_name")
+                base_url = "https://api.moonshot.cn/v1"
+            elif llm_provider == "ollama":
+                # api_key = config.app.get("openai_api_key")
+                api_key = "ollama"  # any string works but you are required to have one
+                model_name = config.app.get("ollama_model_name")
+                base_url = config.app.get("ollama_base_url", "")
+                if not base_url:
+                    base_url = "http://localhost:11434/v1"
+            elif llm_provider == "openai":
+                api_key = config.app.get("openai_api_key")
+                model_name = config.app.get("openai_model_name")
+                base_url = config.app.get("openai_base_url", "")
+                if not base_url:
+                    base_url = "https://api.openai.com/v1"
+            elif llm_provider == "oneapi":
+                api_key = config.app.get("oneapi_api_key")
+                model_name = config.app.get("oneapi_model_name")
+                base_url = config.app.get("oneapi_base_url", "")
+            elif llm_provider == "azure":
+                api_key = config.app.get("azure_api_key")
+                model_name = config.app.get("azure_model_name")
+                base_url = config.app.get("azure_base_url", "")
+                api_version = config.app.get("azure_api_version", "2024-02-15-preview")
+            elif llm_provider == "gemini":
+                api_key = config.app.get("gemini_api_key")
+                model_name = config.app.get("gemini_model_name")
+                base_url = "***"
+            elif llm_provider == "qwen":
+                api_key = config.app.get("qwen_api_key")
+                model_name = config.app.get("qwen_model_name")
+                base_url = "***"
+            elif llm_provider == "cloudflare":
+                api_key = config.app.get("cloudflare_api_key")
+                model_name = config.app.get("cloudflare_model_name")
+                account_id = config.app.get("cloudflare_account_id")
+                base_url = "***"
+            elif llm_provider == "deepseek":
+                api_key = config.app.get("deepseek_api_key")
+                model_name = config.app.get("deepseek_model_name")
+                base_url = config.app.get("deepseek_base_url")
+                if not base_url:
+                    base_url = "https://api.deepseek.com"
+            elif llm_provider == "ernie":
+                api_key = config.app.get("ernie_api_key")
+                secret_key = config.app.get("ernie_secret_key")
+                base_url = config.app.get("ernie_base_url")
+                model_name = "***"
+                if not secret_key:
+                    raise ValueError(
+                        f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
+                    )
+            elif llm_provider == "pollinations":
+                try:
+                    base_url = config.app.get("pollinations_base_url", "")
+                    if not base_url:
+                        base_url = "https://text.pollinations.ai/openai"
+                    model_name = config.app.get("pollinations_model_name", "openai-fast")
+                   
+                    # Prepare the payload
+                    payload = {
+                        "model": model_name,
+                        "messages": [
+                            {"role": "user", "content": prompt}
+                        ],
+                        "seed": 101  # Optional but helps with reproducibility
+                    }
+                    
+                    # Optional parameters if configured
+                    if config.app.get("pollinations_private"):
+                        payload["private"] = True
+                    if config.app.get("pollinations_referrer"):
+                        payload["referrer"] = config.app.get("pollinations_referrer")
+                    
+                    headers = {
+                        "Content-Type": "application/json"
+                    }
+                    
+                    # Make the API request
+                    response = requests.post(base_url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    result = response.json()
+                    
+                    if result and "choices" in result and len(result["choices"]) > 0:
+                        content = result["choices"][0]["message"]["content"]
+                        return content.replace("\n", "")
+                    else:
+                        raise Exception(f"[{llm_provider}] returned an invalid response format")
+                        
+                except requests.exceptions.RequestException as e:
+                    raise Exception(f"[{llm_provider}] request failed: {str(e)}")
+                except Exception as e:
+                    raise Exception(f"[{llm_provider}] error: {str(e)}")
+
+            if llm_provider not in ["pollinations", "ollama"]:  # Skip validation for providers that don't require API key
+                if not api_key:
+                    raise ValueError(
+                        f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+                    )
+                if not model_name:
+                    raise ValueError(
+                        f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+                    )
+                if not base_url:
+                    raise ValueError(
+                        f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+                    )
+
+            if llm_provider == "qwen":
+                import dashscope
+                from dashscope.api_entities.dashscope_response import GenerationResponse
+
+                dashscope.api_key = api_key
+                response = dashscope.Generation.call(
+                    model=model_name, messages=[{"role": "user", "content": prompt}]
+                )
+                if response:
+                    if isinstance(response, GenerationResponse):
+                        status_code = response.status_code
+                        if status_code != 200:
+                            raise Exception(
+                                f'[{llm_provider}] returned an error response: "{response}"'
+                            )
+
+                        content = response["output"]["text"]
+                        return content.replace("\n", "")
+                    else:
+                        raise Exception(
+                            f'[{llm_provider}] returned an invalid response: "{response}"'
+                        )
+                else:
+                    raise Exception(f"[{llm_provider}] returned an empty response")
+
+            if llm_provider == "gemini":
+                import google.generativeai as genai
+
+                genai.configure(api_key=api_key, transport="rest")
+
+                generation_config = {
+                    "temperature": 0.5,
+                    "top_p": 1,
+                    "top_k": 1,
+                    "max_output_tokens": 2048,
+                }
+
+                safety_settings = [
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_HATE_SPEECH",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "threshold": "BLOCK_ONLY_HIGH",
+                    },
+                ]
+
+                model = genai.GenerativeModel(
+                    model_name=model_name,
+                    generation_config=generation_config,
+                    safety_settings=safety_settings,
+                )
+
+                try:
+                    response = model.generate_content(prompt)
+                    candidates = response.candidates
+                    generated_text = candidates[0].content.parts[0].text
+                except (AttributeError, IndexError) as e:
+                    print("Gemini Error:", e)
+
+                return generated_text
+
+            if llm_provider == "cloudflare":
+                response = requests.post(
+                    f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
+                    headers={"Authorization": f"Bearer {api_key}"},
+                    json={
+                        "messages": [
+                            {
+                                "role": "system",
+                                "content": "You are a friendly assistant",
+                            },
+                            {"role": "user", "content": prompt},
+                        ]
+                    },
+                )
+                result = response.json()
+                logger.info(result)
+                return result["result"]["response"]
+
+            if llm_provider == "ernie":
+                response = requests.post(
+                    "https://aip.baidubce.com/oauth/2.0/token", 
+                    params={
+                        "grant_type": "client_credentials",
+                        "client_id": api_key,
+                        "client_secret": secret_key,
+                    }
+                )
+                access_token = response.json().get("access_token")
+                url = f"{base_url}?access_token={access_token}"
+
+                payload = json.dumps(
+                    {
+                        "messages": [{"role": "user", "content": prompt}],
+                        "temperature": 0.5,
+                        "top_p": 0.8,
+                        "penalty_score": 1,
+                        "disable_search": False,
+                        "enable_citation": False,
+                        "response_format": "text",
+                    }
+                )
+                headers = {"Content-Type": "application/json"}
+
+                response = requests.request(
+                    "POST", url, headers=headers, data=payload
+                ).json()
+                return response.get("result")
+
+            if llm_provider == "azure":
+                client = AzureOpenAI(
+                    api_key=api_key,
+                    api_version=api_version,
+                    azure_endpoint=base_url,
+                )
+            else:
+                client = OpenAI(
+                    api_key=api_key,
+                    base_url=base_url,
+                )
+
+            response = client.chat.completions.create(
+                model=model_name, messages=[{"role": "user", "content": prompt}]
+            )
+            if response:
+                if isinstance(response, ChatCompletion):
+                    content = response.choices[0].message.content
+                else:
+                    raise Exception(
+                        f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
+                        f"connection and try again."
+                    )
+            else:
+                raise Exception(
+                    f"[{llm_provider}] returned an empty response, please check your network connection and try again."
+                )
+
+        return content.replace("\n", "")
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
+def generate_script(
+    video_subject: str, language: str = "", paragraph_number: int = 1
+) -> str:
+    prompt = f"""
+# Role: Video Script Generator
+
+## Goals:
+Generate a script for a video, depending on the subject of the video.
+
+## Constrains:
+1. the script is to be returned as a string with the specified number of paragraphs.
+2. do not under any circumstance reference this prompt in your response.
+3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
+4. you must not include any type of markdown or formatting in the script, never use a title.
+5. only return the raw content of the script.
+6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
+7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
+8. respond in the same language as the video subject.
+
+# Initialization:
+- video subject: {video_subject}
+- number of paragraphs: {paragraph_number}
+""".strip()
+    if language:
+        prompt += f"\n- language: {language}"
+
+    final_script = ""
+    logger.info(f"subject: {video_subject}")
+
+    def format_response(response):
+        # Clean the script
+        # Remove asterisks, hashes
+        response = response.replace("*", "")
+        response = response.replace("#", "")
+
+        # Remove markdown syntax
+        response = re.sub(r"\[.*\]", "", response)
+        response = re.sub(r"\(.*\)", "", response)
+
+        # Split the script into paragraphs
+        paragraphs = response.split("\n\n")
+
+        # Select the specified number of paragraphs
+        # selected_paragraphs = paragraphs[:paragraph_number]
+
+        # Join the selected paragraphs into a single string
+        return "\n\n".join(paragraphs)
+
+    for i in range(_max_retries):
+        try:
+            response = _generate_response(prompt=prompt)
+            if response:
+                final_script = format_response(response)
+            else:
+                logging.error("gpt returned an empty response")
+
+            # g4f may return an error message
+            if final_script and "当日额度已消耗完" in final_script:
+                raise ValueError(final_script)
+
+            if final_script:
+                break
+        except Exception as e:
+            logger.error(f"failed to generate script: {e}")
+
+        if i < _max_retries:
+            logger.warning(f"failed to generate video script, trying again... {i + 1}")
+    if "Error: " in final_script:
+        logger.error(f"failed to generate video script: {final_script}")
+    else:
+        logger.success(f"completed: \n{final_script}")
+    return final_script.strip()
+
+
+def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
+    prompt = f"""
+# Role: Video Search Terms Generator
+
+## Goals:
+Generate {amount} search terms for stock videos, depending on the subject of a video.
+
+## Constrains:
+1. the search terms are to be returned as a json-array of strings.
+2. each search term should consist of 1-3 words, always add the main subject of the video.
+3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
+4. the search terms must be related to the subject of the video.
+5. reply with english search terms only.
+
+## Output Example:
+["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
+
+## Context:
+### Video Subject
+{video_subject}
+
+### Video Script
+{video_script}
+
+Please note that you must use English for generating video search terms; Chinese is not accepted.
+""".strip()
+
+    logger.info(f"subject: {video_subject}")
+
+    search_terms = []
+    response = ""
+    for i in range(_max_retries):
+        try:
+            response = _generate_response(prompt)
+            if "Error: " in response:
+                logger.error(f"failed to generate video script: {response}")
+                return response
+            search_terms = json.loads(response)
+            if not isinstance(search_terms, list) or not all(
+                isinstance(term, str) for term in search_terms
+            ):
+                logger.error("response is not a list of strings.")
+                continue
+
+        except Exception as e:
+            logger.warning(f"failed to generate video terms: {str(e)}")
+            if response:
+                match = re.search(r"\[.*]", response)
+                if match:
+                    try:
+                        search_terms = json.loads(match.group())
+                    except Exception as e:
+                        logger.warning(f"failed to generate video terms: {str(e)}")
+                        pass
+
+        if search_terms and len(search_terms) > 0:
+            break
+        if i < _max_retries:
+            logger.warning(f"failed to generate video terms, trying again... {i + 1}")
+
+    logger.success(f"completed: \n{search_terms}")
+    return search_terms
+
+
+if __name__ == "__main__":
+    video_subject = "生命的意义是什么"
+    script = generate_script(
+        video_subject=video_subject, language="zh-CN", paragraph_number=1
+    )
+    print("######################")
+    print(script)
+    search_terms = generate_terms(
+        video_subject=video_subject, video_script=script, amount=5
+    )
+    print("######################")
+    print(search_terms)
+    
\ No newline at end of file
diff --git a/app/services/material.py b/app/services/material.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae631fd263d819ab9cafa8a86cb5cef420c0c707
--- /dev/null
+++ b/app/services/material.py
@@ -0,0 +1,267 @@
+import os
+import random
+from typing import List
+from urllib.parse import urlencode
+
+import requests
+from loguru import logger
+from moviepy.video.io.VideoFileClip import VideoFileClip
+
+from app.config import config
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode
+from app.utils import utils
+
+requested_count = 0
+
+
+def get_api_key(cfg_key: str):
+    api_keys = config.app.get(cfg_key)
+    if not api_keys:
+        raise ValueError(
+            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
+            f"{utils.to_json(config.app)}"
+        )
+
+    # if only one key is provided, return it
+    if isinstance(api_keys, str):
+        return api_keys
+
+    global requested_count
+    requested_count += 1
+    return api_keys[requested_count % len(api_keys)]
+
+
+def search_videos_pexels(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+    video_orientation = aspect.name
+    video_width, video_height = aspect.to_resolution()
+    api_key = get_api_key("pexels_api_keys")
+    headers = {
+        "Authorization": api_key,
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
+    }
+    # Build URL
+    params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
+    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+
+    try:
+        r = requests.get(
+            query_url,
+            headers=headers,
+            proxies=config.proxy,
+            verify=False,
+            timeout=(30, 60),
+        )
+        response = r.json()
+        video_items = []
+        if "videos" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["videos"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["video_files"]
+            # loop through each url to determine the best quality
+            for video in video_files:
+                w = int(video["width"])
+                h = int(video["height"])
+                if w == video_width and h == video_height:
+                    item = MaterialInfo()
+                    item.provider = "pexels"
+                    item.url = video["link"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+
+    return []
+
+
+def search_videos_pixabay(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
+    aspect = VideoAspect(video_aspect)
+
+    video_width, video_height = aspect.to_resolution()
+
+    api_key = get_api_key("pixabay_api_keys")
+    # Build URL
+    params = {
+        "q": search_term,
+        "video_type": "all",  # Accepted values: "all", "film", "animation"
+        "per_page": 50,
+        "key": api_key,
+    }
+    query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
+    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
+
+    try:
+        r = requests.get(
+            query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
+        )
+        response = r.json()
+        video_items = []
+        if "hits" not in response:
+            logger.error(f"search videos failed: {response}")
+            return video_items
+        videos = response["hits"]
+        # loop through each video in the result
+        for v in videos:
+            duration = v["duration"]
+            # check if video has desired minimum duration
+            if duration < minimum_duration:
+                continue
+            video_files = v["videos"]
+            # loop through each url to determine the best quality
+            for video_type in video_files:
+                video = video_files[video_type]
+                w = int(video["width"])
+                # h = int(video["height"])
+                if w >= video_width:
+                    item = MaterialInfo()
+                    item.provider = "pixabay"
+                    item.url = video["url"]
+                    item.duration = duration
+                    video_items.append(item)
+                    break
+        return video_items
+    except Exception as e:
+        logger.error(f"search videos failed: {str(e)}")
+
+    return []
+
+
+def save_video(video_url: str, save_dir: str = "") -> str:
+    if not save_dir:
+        save_dir = utils.storage_dir("cache_videos")
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    url_without_query = video_url.split("?")[0]
+    url_hash = utils.md5(url_without_query)
+    video_id = f"vid-{url_hash}"
+    video_path = f"{save_dir}/{video_id}.mp4"
+
+    # if video already exists, return the path
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        logger.info(f"video already exists: {video_path}")
+        return video_path
+
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+    }
+
+    # if video does not exist, download it
+    with open(video_path, "wb") as f:
+        f.write(
+            requests.get(
+                video_url,
+                headers=headers,
+                proxies=config.proxy,
+                verify=False,
+                timeout=(60, 240),
+            ).content
+        )
+
+    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+        try:
+            clip = VideoFileClip(video_path)
+            duration = clip.duration
+            fps = clip.fps
+            clip.close()
+            if duration > 0 and fps > 0:
+                return video_path
+        except Exception as e:
+            try:
+                os.remove(video_path)
+            except Exception:
+                pass
+            logger.warning(f"invalid video file: {video_path} => {str(e)}")
+    return ""
+
+
+def download_videos(
+    task_id: str,
+    search_terms: List[str],
+    source: str = "pexels",
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
+    audio_duration: float = 0.0,
+    max_clip_duration: int = 5,
+) -> List[str]:
+    valid_video_items = []
+    valid_video_urls = []
+    found_duration = 0.0
+    search_videos = search_videos_pexels
+    if source == "pixabay":
+        search_videos = search_videos_pixabay
+
+    for search_term in search_terms:
+        video_items = search_videos(
+            search_term=search_term,
+            minimum_duration=max_clip_duration,
+            video_aspect=video_aspect,
+        )
+        logger.info(f"found {len(video_items)} videos for '{search_term}'")
+
+        for item in video_items:
+            if item.url not in valid_video_urls:
+                valid_video_items.append(item)
+                valid_video_urls.append(item.url)
+                found_duration += item.duration
+
+    logger.info(
+        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
+    )
+    video_paths = []
+
+    material_directory = config.app.get("material_directory", "").strip()
+    if material_directory == "task":
+        material_directory = utils.task_dir(task_id)
+    elif material_directory and not os.path.isdir(material_directory):
+        material_directory = ""
+
+    if video_contact_mode.value == VideoConcatMode.random.value:
+        random.shuffle(valid_video_items)
+
+    total_duration = 0.0
+    for item in valid_video_items:
+        try:
+            logger.info(f"downloading video: {item.url}")
+            saved_video_path = save_video(
+                video_url=item.url, save_dir=material_directory
+            )
+            if saved_video_path:
+                logger.info(f"video saved: {saved_video_path}")
+                video_paths.append(saved_video_path)
+                seconds = min(max_clip_duration, item.duration)
+                total_duration += seconds
+                if total_duration > audio_duration:
+                    logger.info(
+                        f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
+                    )
+                    break
+        except Exception as e:
+            logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
+    logger.success(f"downloaded {len(video_paths)} videos")
+    return video_paths
+
+
+if __name__ == "__main__":
+    download_videos(
+        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
+    )
diff --git a/app/services/state.py b/app/services/state.py
new file mode 100644
index 0000000000000000000000000000000000000000..21d325fe1cffea132ab9061b29949eb9e7d3f0bd
--- /dev/null
+++ b/app/services/state.py
@@ -0,0 +1,158 @@
+import ast
+from abc import ABC, abstractmethod
+
+from app.config import config
+from app.models import const
+
+
+# Base class for state management
+class BaseState(ABC):
+    @abstractmethod
+    def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
+        pass
+
+    @abstractmethod
+    def get_task(self, task_id: str):
+        pass
+
+    @abstractmethod
+    def get_all_tasks(self, page: int, page_size: int):
+        pass
+
+
+# Memory state management
+class MemoryState(BaseState):
+    def __init__(self):
+        self._tasks = {}
+
+    def get_all_tasks(self, page: int, page_size: int):
+        start = (page - 1) * page_size
+        end = start + page_size
+        tasks = list(self._tasks.values())
+        total = len(tasks)
+        return tasks[start:end], total
+
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+
+        self._tasks[task_id] = {
+            "task_id": task_id,
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+
+    def get_task(self, task_id: str):
+        return self._tasks.get(task_id, None)
+
+    def delete_task(self, task_id: str):
+        if task_id in self._tasks:
+            del self._tasks[task_id]
+
+
+# Redis state management
+class RedisState(BaseState):
+    def __init__(self, host="localhost", port=6379, db=0, password=None):
+        import redis
+
+        self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
+
+    def get_all_tasks(self, page: int, page_size: int):
+        start = (page - 1) * page_size
+        end = start + page_size
+        tasks = []
+        cursor = 0
+        total = 0
+        while True:
+            cursor, keys = self._redis.scan(cursor, count=page_size)
+            total += len(keys)
+            if total > start:
+                for key in keys[max(0, start - total):end - total]:
+                    task_data = self._redis.hgetall(key)
+                    task = {
+                        k.decode("utf-8"): self._convert_to_original_type(v) for k, v in task_data.items()
+                    }
+                    tasks.append(task)
+                    if len(tasks) >= page_size:
+                        break
+            if cursor == 0 or len(tasks) >= page_size:
+                break
+        return tasks, total
+
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
+        progress = int(progress)
+        if progress > 100:
+            progress = 100
+
+        fields = {
+            "task_id": task_id,
+            "state": state,
+            "progress": progress,
+            **kwargs,
+        }
+
+        for field, value in fields.items():
+            self._redis.hset(task_id, field, str(value))
+
+    def get_task(self, task_id: str):
+        task_data = self._redis.hgetall(task_id)
+        if not task_data:
+            return None
+
+        task = {
+            key.decode("utf-8"): self._convert_to_original_type(value)
+            for key, value in task_data.items()
+        }
+        return task
+
+    def delete_task(self, task_id: str):
+        self._redis.delete(task_id)
+
+    @staticmethod
+    def _convert_to_original_type(value):
+        """
+        Convert the value from byte string to its original data type.
+        You can extend this method to handle other data types as needed.
+        """
+        value_str = value.decode("utf-8")
+
+        try:
+            # try to convert byte string array to list
+            return ast.literal_eval(value_str)
+        except (ValueError, SyntaxError):
+            pass
+
+        if value_str.isdigit():
+            return int(value_str)
+        # Add more conversions here if needed
+        return value_str
+
+
+# Global state
+_enable_redis = config.app.get("enable_redis", False)
+_redis_host = config.app.get("redis_host", "localhost")
+_redis_port = config.app.get("redis_port", 6379)
+_redis_db = config.app.get("redis_db", 0)
+_redis_password = config.app.get("redis_password", None)
+
+state = (
+    RedisState(
+        host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
+    )
+    if _enable_redis
+    else MemoryState()
+)
diff --git a/app/services/subtitle.py b/app/services/subtitle.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca0f2473077d0af120fcf984033b2a4fa45db07a
--- /dev/null
+++ b/app/services/subtitle.py
@@ -0,0 +1,299 @@
+import json
+import os.path
+import re
+from timeit import default_timer as timer
+
+from faster_whisper import WhisperModel
+from loguru import logger
+
+from app.config import config
+from app.utils import utils
+
+model_size = config.whisper.get("model_size", "large-v3")
+device = config.whisper.get("device", "cpu")
+compute_type = config.whisper.get("compute_type", "int8")
+model = None
+
+
+def create(audio_file, subtitle_file: str = ""):
+    global model
+    if not model:
+        model_path = f"{utils.root_dir()}/models/whisper-{model_size}"
+        model_bin_file = f"{model_path}/model.bin"
+        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
+            model_path = model_size
+
+        logger.info(
+            f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
+        )
+        try:
+            model = WhisperModel(
+                model_size_or_path=model_path, device=device, compute_type=compute_type
+            )
+        except Exception as e:
+            logger.error(
+                f"failed to load model: {e} \n\n"
+                f"********************************************\n"
+                f"this may be caused by network issue. \n"
+                f"please download the model manually and put it in the 'models' folder. \n"
+                f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
+                f"********************************************\n\n"
+            )
+            return None
+
+    logger.info(f"start, output file: {subtitle_file}")
+    if not subtitle_file:
+        subtitle_file = f"{audio_file}.srt"
+
+    segments, info = model.transcribe(
+        audio_file,
+        beam_size=5,
+        word_timestamps=True,
+        vad_filter=True,
+        vad_parameters=dict(min_silence_duration_ms=500),
+    )
+
+    logger.info(
+        f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
+    )
+
+    start = timer()
+    subtitles = []
+
+    def recognized(seg_text, seg_start, seg_end):
+        seg_text = seg_text.strip()
+        if not seg_text:
+            return
+
+        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
+        logger.debug(msg)
+
+        subtitles.append(
+            {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
+        )
+
+    for segment in segments:
+        words_idx = 0
+        words_len = len(segment.words)
+
+        seg_start = 0
+        seg_end = 0
+        seg_text = ""
+
+        if segment.words:
+            is_segmented = False
+            for word in segment.words:
+                if not is_segmented:
+                    seg_start = word.start
+                    is_segmented = True
+
+                seg_end = word.end
+                # If it contains punctuation, then break the sentence.
+                seg_text += word.word
+
+                if utils.str_contains_punctuation(word.word):
+                    # remove last char
+                    seg_text = seg_text[:-1]
+                    if not seg_text:
+                        continue
+
+                    recognized(seg_text, seg_start, seg_end)
+
+                    is_segmented = False
+                    seg_text = ""
+
+                if words_idx == 0 and segment.start < word.start:
+                    seg_start = word.start
+                if words_idx == (words_len - 1) and segment.end > word.end:
+                    seg_end = word.end
+                words_idx += 1
+
+        if not seg_text:
+            continue
+
+        recognized(seg_text, seg_start, seg_end)
+
+    end = timer()
+
+    diff = end - start
+    logger.info(f"complete, elapsed: {diff:.2f} s")
+
+    idx = 1
+    lines = []
+    for subtitle in subtitles:
+        text = subtitle.get("msg")
+        if text:
+            lines.append(
+                utils.text_to_srt(
+                    idx, text, subtitle.get("start_time"), subtitle.get("end_time")
+                )
+            )
+            idx += 1
+
+    sub = "\n".join(lines) + "\n"
+    with open(subtitle_file, "w", encoding="utf-8") as f:
+        f.write(sub)
+    logger.info(f"subtitle file created: {subtitle_file}")
+
+
+def file_to_subtitles(filename):
+    if not filename or not os.path.isfile(filename):
+        return []
+
+    times_texts = []
+    current_times = None
+    current_text = ""
+    index = 0
+    with open(filename, "r", encoding="utf-8") as f:
+        for line in f:
+            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
+            if times:
+                current_times = line
+            elif line.strip() == "" and current_times:
+                index += 1
+                times_texts.append((index, current_times.strip(), current_text.strip()))
+                current_times, current_text = None, ""
+            elif current_times:
+                current_text += line
+    return times_texts
+
+
+def levenshtein_distance(s1, s2):
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+
+    if len(s2) == 0:
+        return len(s1)
+
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+
+    return previous_row[-1]
+
+
+def similarity(a, b):
+    distance = levenshtein_distance(a.lower(), b.lower())
+    max_length = max(len(a), len(b))
+    return 1 - (distance / max_length)
+
+
+def correct(subtitle_file, video_script):
+    subtitle_items = file_to_subtitles(subtitle_file)
+    script_lines = utils.split_string_by_punctuations(video_script)
+
+    corrected = False
+    new_subtitle_items = []
+    script_index = 0
+    subtitle_index = 0
+
+    while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
+        script_line = script_lines[script_index].strip()
+        subtitle_line = subtitle_items[subtitle_index][2].strip()
+
+        if script_line == subtitle_line:
+            new_subtitle_items.append(subtitle_items[subtitle_index])
+            script_index += 1
+            subtitle_index += 1
+        else:
+            combined_subtitle = subtitle_line
+            start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
+            end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
+            next_subtitle_index = subtitle_index + 1
+
+            while next_subtitle_index < len(subtitle_items):
+                next_subtitle = subtitle_items[next_subtitle_index][2].strip()
+                if similarity(
+                    script_line, combined_subtitle + " " + next_subtitle
+                ) > similarity(script_line, combined_subtitle):
+                    combined_subtitle += " " + next_subtitle
+                    end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
+                    next_subtitle_index += 1
+                else:
+                    break
+
+            if similarity(script_line, combined_subtitle) > 0.8:
+                logger.warning(
+                    f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+            else:
+                logger.warning(
+                    f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+
+            script_index += 1
+            subtitle_index = next_subtitle_index
+
+    # Process the remaining lines of the script.
+    while script_index < len(script_lines):
+        logger.warning(f"Extra script line: {script_lines[script_index]}")
+        if subtitle_index < len(subtitle_items):
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    subtitle_items[subtitle_index][1],
+                    script_lines[script_index],
+                )
+            )
+            subtitle_index += 1
+        else:
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    "00:00:00,000 --> 00:00:00,000",
+                    script_lines[script_index],
+                )
+            )
+        script_index += 1
+        corrected = True
+
+    if corrected:
+        with open(subtitle_file, "w", encoding="utf-8") as fd:
+            for i, item in enumerate(new_subtitle_items):
+                fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
+        logger.info("Subtitle corrected")
+    else:
+        logger.success("Subtitle is correct")
+
+
+if __name__ == "__main__":
+    task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
+    task_dir = utils.task_dir(task_id)
+    subtitle_file = f"{task_dir}/subtitle.srt"
+    audio_file = f"{task_dir}/audio.mp3"
+
+    subtitles = file_to_subtitles(subtitle_file)
+    print(subtitles)
+
+    script_file = f"{task_dir}/script.json"
+    with open(script_file, "r") as f:
+        script_content = f.read()
+    s = json.loads(script_content)
+    script = s.get("script")
+
+    correct(subtitle_file, script)
+
+    subtitle_file = f"{task_dir}/subtitle-test.srt"
+    create(audio_file, subtitle_file)
diff --git a/app/services/task.py b/app/services/task.py
new file mode 100644
index 0000000000000000000000000000000000000000..77ca9082cffa9a80b91a2c3b93fdbe1524e7c4f7
--- /dev/null
+++ b/app/services/task.py
@@ -0,0 +1,339 @@
+import math
+import os.path
+import re
+from os import path
+
+from loguru import logger
+
+from app.config import config
+from app.models import const
+from app.models.schema import VideoConcatMode, VideoParams
+from app.services import llm, material, subtitle, video, voice
+from app.services import state as sm
+from app.utils import utils
+
+
+def generate_script(task_id, params):
+    logger.info("\n\n## generating video script")
+    video_script = params.video_script.strip()
+    if not video_script:
+        video_script = llm.generate_script(
+            video_subject=params.video_subject,
+            language=params.video_language,
+            paragraph_number=params.paragraph_number,
+        )
+    else:
+        logger.debug(f"video script: \n{video_script}")
+
+    if not video_script:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error("failed to generate video script.")
+        return None
+
+    return video_script
+
+
+def generate_terms(task_id, params, video_script):
+    logger.info("\n\n## generating video terms")
+    video_terms = params.video_terms
+    if not video_terms:
+        video_terms = llm.generate_terms(
+            video_subject=params.video_subject, video_script=video_script, amount=5
+        )
+    else:
+        if isinstance(video_terms, str):
+            video_terms = [term.strip() for term in re.split(r"[,，]", video_terms)]
+        elif isinstance(video_terms, list):
+            video_terms = [term.strip() for term in video_terms]
+        else:
+            raise ValueError("video_terms must be a string or a list of strings.")
+
+        logger.debug(f"video terms: {utils.to_json(video_terms)}")
+
+    if not video_terms:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error("failed to generate video terms.")
+        return None
+
+    return video_terms
+
+
+def save_script_data(task_id, video_script, video_terms, params):
+    script_file = path.join(utils.task_dir(task_id), "script.json")
+    script_data = {
+        "script": video_script,
+        "search_terms": video_terms,
+        "params": params,
+    }
+
+    with open(script_file, "w", encoding="utf-8") as f:
+        f.write(utils.to_json(script_data))
+
+
+def generate_audio(task_id, params, video_script):
+    logger.info("\n\n## generating audio")
+    audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
+    sub_maker = voice.tts(
+        text=video_script,
+        voice_name=voice.parse_voice_name(params.voice_name),
+        voice_rate=params.voice_rate,
+        voice_file=audio_file,
+    )
+    if sub_maker is None:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        logger.error(
+            """failed to generate audio:
+1. check if the language of the voice matches the language of the video script.
+2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
+        """.strip()
+        )
+        return None, None, None
+
+    audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
+    return audio_file, audio_duration, sub_maker
+
+
+def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
+    if not params.subtitle_enabled:
+        return ""
+
+    subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
+    subtitle_provider = config.app.get("subtitle_provider", "edge").strip().lower()
+    logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
+
+    subtitle_fallback = False
+    if subtitle_provider == "edge":
+        voice.create_subtitle(
+            text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
+        )
+        if not os.path.exists(subtitle_path):
+            subtitle_fallback = True
+            logger.warning("subtitle file not found, fallback to whisper")
+
+    if subtitle_provider == "whisper" or subtitle_fallback:
+        subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+        logger.info("\n\n## correcting subtitle")
+        subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
+
+    subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
+    if not subtitle_lines:
+        logger.warning(f"subtitle file is invalid: {subtitle_path}")
+        return ""
+
+    return subtitle_path
+
+
+def get_video_materials(task_id, params, video_terms, audio_duration):
+    if params.video_source == "local":
+        logger.info("\n\n## preprocess local materials")
+        materials = video.preprocess_video(
+            materials=params.video_materials, clip_duration=params.video_clip_duration
+        )
+        if not materials:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error(
+                "no valid materials found, please check the materials and try again."
+            )
+            return None
+        return [material_info.url for material_info in materials]
+    else:
+        logger.info(f"\n\n## downloading videos from {params.video_source}")
+        downloaded_videos = material.download_videos(
+            task_id=task_id,
+            search_terms=video_terms,
+            source=params.video_source,
+            video_aspect=params.video_aspect,
+            video_contact_mode=params.video_concat_mode,
+            audio_duration=audio_duration * params.video_count,
+            max_clip_duration=params.video_clip_duration,
+        )
+        if not downloaded_videos:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error(
+                "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
+            )
+            return None
+        return downloaded_videos
+
+
+def generate_final_videos(
+    task_id, params, downloaded_videos, audio_file, subtitle_path
+):
+    final_video_paths = []
+    combined_video_paths = []
+    video_concat_mode = (
+        params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
+    )
+    video_transition_mode = params.video_transition_mode
+
+    _progress = 50
+    for i in range(params.video_count):
+        index = i + 1
+        combined_video_path = path.join(
+            utils.task_dir(task_id), f"combined-{index}.mp4"
+        )
+        logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
+        video.combine_videos(
+            combined_video_path=combined_video_path,
+            video_paths=downloaded_videos,
+            audio_file=audio_file,
+            video_aspect=params.video_aspect,
+            video_concat_mode=video_concat_mode,
+            video_transition_mode=video_transition_mode,
+            max_clip_duration=params.video_clip_duration,
+            threads=params.n_threads,
+        )
+
+        _progress += 50 / params.video_count / 2
+        sm.state.update_task(task_id, progress=_progress)
+
+        final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
+
+        logger.info(f"\n\n## generating video: {index} => {final_video_path}")
+        video.generate_video(
+            video_path=combined_video_path,
+            audio_path=audio_file,
+            subtitle_path=subtitle_path,
+            output_file=final_video_path,
+            params=params,
+        )
+
+        _progress += 50 / params.video_count / 2
+        sm.state.update_task(task_id, progress=_progress)
+
+        final_video_paths.append(final_video_path)
+        combined_video_paths.append(combined_video_path)
+
+    return final_video_paths, combined_video_paths
+
+
+def start(task_id, params: VideoParams, stop_at: str = "video"):
+    logger.info(f"start task: {task_id}, stop_at: {stop_at}")
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
+
+    if type(params.video_concat_mode) is str:
+        params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
+
+    # 1. Generate script
+    video_script = generate_script(task_id, params)
+    if not video_script or "Error: " in video_script:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
+
+    if stop_at == "script":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
+        )
+        return {"script": video_script}
+
+    # 2. Generate terms
+    video_terms = ""
+    if params.video_source != "local":
+        video_terms = generate_terms(task_id, params, video_script)
+        if not video_terms:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            return
+
+    save_script_data(task_id, video_script, video_terms, params)
+
+    if stop_at == "terms":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
+        )
+        return {"script": video_script, "terms": video_terms}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+
+    # 3. Generate audio
+    audio_file, audio_duration, sub_maker = generate_audio(
+        task_id, params, video_script
+    )
+    if not audio_file:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
+
+    if stop_at == "audio":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            audio_file=audio_file,
+        )
+        return {"audio_file": audio_file, "audio_duration": audio_duration}
+
+    # 4. Generate subtitle
+    subtitle_path = generate_subtitle(
+        task_id, params, video_script, sub_maker, audio_file
+    )
+
+    if stop_at == "subtitle":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            subtitle_path=subtitle_path,
+        )
+        return {"subtitle_path": subtitle_path}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+
+    # 5. Get video materials
+    downloaded_videos = get_video_materials(
+        task_id, params, video_terms, audio_duration
+    )
+    if not downloaded_videos:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    if stop_at == "materials":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            materials=downloaded_videos,
+        )
+        return {"materials": downloaded_videos}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
+
+    # 6. Generate final videos
+    final_video_paths, combined_video_paths = generate_final_videos(
+        task_id, params, downloaded_videos, audio_file, subtitle_path
+    )
+
+    if not final_video_paths:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    logger.success(
+        f"task {task_id} finished, generated {len(final_video_paths)} videos."
+    )
+
+    kwargs = {
+        "videos": final_video_paths,
+        "combined_videos": combined_video_paths,
+        "script": video_script,
+        "terms": video_terms,
+        "audio_file": audio_file,
+        "audio_duration": audio_duration,
+        "subtitle_path": subtitle_path,
+        "materials": downloaded_videos,
+    }
+    sm.state.update_task(
+        task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
+    )
+    return kwargs
+
+
+if __name__ == "__main__":
+    task_id = "task_id"
+    params = VideoParams(
+        video_subject="金钱的作用",
+        voice_name="zh-CN-XiaoyiNeural-Female",
+        voice_rate=1.0,
+    )
+    start(task_id, params, stop_at="video")
diff --git a/app/services/utils/video_effects.py b/app/services/utils/video_effects.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cba8ebfb6ece91851f4b81e9048b64db0874d31
--- /dev/null
+++ b/app/services/utils/video_effects.py
@@ -0,0 +1,21 @@
+from moviepy import Clip, vfx
+
+
+# FadeIn
+def fadein_transition(clip: Clip, t: float) -> Clip:
+    return clip.with_effects([vfx.FadeIn(t)])
+
+
+# FadeOut
+def fadeout_transition(clip: Clip, t: float) -> Clip:
+    return clip.with_effects([vfx.FadeOut(t)])
+
+
+# SlideIn
+def slidein_transition(clip: Clip, t: float, side: str) -> Clip:
+    return clip.with_effects([vfx.SlideIn(t, side)])
+
+
+# SlideOut
+def slideout_transition(clip: Clip, t: float, side: str) -> Clip:
+    return clip.with_effects([vfx.SlideOut(t, side)])
diff --git a/app/services/video.py b/app/services/video.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a79e301a1cdfb5782fc1904aa383f6c6a9ccfbc
--- /dev/null
+++ b/app/services/video.py
@@ -0,0 +1,531 @@
+import glob
+import itertools
+import os
+import random
+import gc
+import shutil
+from typing import List
+from loguru import logger
+from moviepy import (
+    AudioFileClip,
+    ColorClip,
+    CompositeAudioClip,
+    CompositeVideoClip,
+    ImageClip,
+    TextClip,
+    VideoFileClip,
+    afx,
+    concatenate_videoclips,
+)
+from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont
+
+from app.models import const
+from app.models.schema import (
+    MaterialInfo,
+    VideoAspect,
+    VideoConcatMode,
+    VideoParams,
+    VideoTransitionMode,
+)
+from app.services.utils import video_effects
+from app.utils import utils
+
+class SubClippedVideoClip:
+    def __init__(self, file_path, start_time=None, end_time=None, width=None, height=None, duration=None):
+        self.file_path = file_path
+        self.start_time = start_time
+        self.end_time = end_time
+        self.width = width
+        self.height = height
+        if duration is None:
+            self.duration = end_time - start_time
+        else:
+            self.duration = duration
+
+    def __str__(self):
+        return f"SubClippedVideoClip(file_path={self.file_path}, start_time={self.start_time}, end_time={self.end_time}, duration={self.duration}, width={self.width}, height={self.height})"
+
+
+audio_codec = "aac"
+video_codec = "libx264"
+fps = 30
+
+def close_clip(clip):
+    if clip is None:
+        return
+        
+    try:
+        # close main resources
+        if hasattr(clip, 'reader') and clip.reader is not None:
+            clip.reader.close()
+            
+        # close audio resources
+        if hasattr(clip, 'audio') and clip.audio is not None:
+            if hasattr(clip.audio, 'reader') and clip.audio.reader is not None:
+                clip.audio.reader.close()
+            del clip.audio
+            
+        # close mask resources
+        if hasattr(clip, 'mask') and clip.mask is not None:
+            if hasattr(clip.mask, 'reader') and clip.mask.reader is not None:
+                clip.mask.reader.close()
+            del clip.mask
+            
+        # handle child clips in composite clips
+        if hasattr(clip, 'clips') and clip.clips:
+            for child_clip in clip.clips:
+                if child_clip is not clip:  # avoid possible circular references
+                    close_clip(child_clip)
+            
+        # clear clip list
+        if hasattr(clip, 'clips'):
+            clip.clips = []
+            
+    except Exception as e:
+        logger.error(f"failed to close clip: {str(e)}")
+    
+    del clip
+    gc.collect()
+
+def delete_files(files: List[str] | str):
+    if isinstance(files, str):
+        files = [files]
+        
+    for file in files:
+        try:
+            os.remove(file)
+        except:
+            pass
+
+def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
+    if not bgm_type:
+        return ""
+
+    if bgm_file and os.path.exists(bgm_file):
+        return bgm_file
+
+    if bgm_type == "random":
+        suffix = "*.mp3"
+        song_dir = utils.song_dir()
+        files = glob.glob(os.path.join(song_dir, suffix))
+        return random.choice(files)
+
+    return ""
+
+
+def combine_videos(
+    combined_video_path: str,
+    video_paths: List[str],
+    audio_file: str,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
+    video_transition_mode: VideoTransitionMode = None,
+    max_clip_duration: int = 5,
+    threads: int = 2,
+) -> str:
+    audio_clip = AudioFileClip(audio_file)
+    audio_duration = audio_clip.duration
+    logger.info(f"audio duration: {audio_duration} seconds")
+    # Required duration of each clip
+    req_dur = audio_duration / len(video_paths)
+    req_dur = max_clip_duration
+    logger.info(f"maximum clip duration: {req_dur} seconds")
+    output_dir = os.path.dirname(combined_video_path)
+
+    aspect = VideoAspect(video_aspect)
+    video_width, video_height = aspect.to_resolution()
+
+    processed_clips = []
+    subclipped_items = []
+    video_duration = 0
+    for video_path in video_paths:
+        clip = VideoFileClip(video_path)
+        clip_duration = clip.duration
+        clip_w, clip_h = clip.size
+        close_clip(clip)
+        
+        start_time = 0
+
+        while start_time < clip_duration:
+            end_time = min(start_time + max_clip_duration, clip_duration)            
+            if clip_duration - start_time >= max_clip_duration:
+                subclipped_items.append(SubClippedVideoClip(file_path= video_path, start_time=start_time, end_time=end_time, width=clip_w, height=clip_h))
+            start_time = end_time    
+            if video_concat_mode.value == VideoConcatMode.sequential.value:
+                break
+
+    # random subclipped_items order
+    if video_concat_mode.value == VideoConcatMode.random.value:
+        random.shuffle(subclipped_items)
+        
+    logger.debug(f"total subclipped items: {len(subclipped_items)}")
+    
+    # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
+    for i, subclipped_item in enumerate(subclipped_items):
+        if video_duration > audio_duration:
+            break
+        
+        logger.debug(f"processing clip {i+1}: {subclipped_item.width}x{subclipped_item.height}, current duration: {video_duration:.2f}s, remaining: {audio_duration - video_duration:.2f}s")
+        
+        try:
+            clip = VideoFileClip(subclipped_item.file_path).subclipped(subclipped_item.start_time, subclipped_item.end_time)
+            clip_duration = clip.duration
+            # Not all videos are same size, so we need to resize them
+            clip_w, clip_h = clip.size
+            if clip_w != video_width or clip_h != video_height:
+                clip_ratio = clip.w / clip.h
+                video_ratio = video_width / video_height
+                logger.debug(f"resizing clip, source: {clip_w}x{clip_h}, ratio: {clip_ratio:.2f}, target: {video_width}x{video_height}, ratio: {video_ratio:.2f}")
+                
+                if clip_ratio == video_ratio:
+                    clip = clip.resized(new_size=(video_width, video_height))
+                else:
+                    if clip_ratio > video_ratio:
+                        scale_factor = video_width / clip_w
+                    else:
+                        scale_factor = video_height / clip_h
+
+                    new_width = int(clip_w * scale_factor)
+                    new_height = int(clip_h * scale_factor)
+
+                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0)).with_duration(clip_duration)
+                    clip_resized = clip.resized(new_size=(new_width, new_height)).with_position("center")
+                    clip = CompositeVideoClip([background, clip_resized])
+                    
+            shuffle_side = random.choice(["left", "right", "top", "bottom"])
+            if video_transition_mode.value == VideoTransitionMode.none.value:
+                clip = clip
+            elif video_transition_mode.value == VideoTransitionMode.fade_in.value:
+                clip = video_effects.fadein_transition(clip, 1)
+            elif video_transition_mode.value == VideoTransitionMode.fade_out.value:
+                clip = video_effects.fadeout_transition(clip, 1)
+            elif video_transition_mode.value == VideoTransitionMode.slide_in.value:
+                clip = video_effects.slidein_transition(clip, 1, shuffle_side)
+            elif video_transition_mode.value == VideoTransitionMode.slide_out.value:
+                clip = video_effects.slideout_transition(clip, 1, shuffle_side)
+            elif video_transition_mode.value == VideoTransitionMode.shuffle.value:
+                transition_funcs = [
+                    lambda c: video_effects.fadein_transition(c, 1),
+                    lambda c: video_effects.fadeout_transition(c, 1),
+                    lambda c: video_effects.slidein_transition(c, 1, shuffle_side),
+                    lambda c: video_effects.slideout_transition(c, 1, shuffle_side),
+                ]
+                shuffle_transition = random.choice(transition_funcs)
+                clip = shuffle_transition(clip)
+
+            if clip.duration > max_clip_duration:
+                clip = clip.subclipped(0, max_clip_duration)
+                
+            # wirte clip to temp file
+            clip_file = f"{output_dir}/temp-clip-{i+1}.mp4"
+            clip.write_videofile(clip_file, logger=None, fps=fps, codec=video_codec)
+            
+            close_clip(clip)
+        
+            processed_clips.append(SubClippedVideoClip(file_path=clip_file, duration=clip.duration, width=clip_w, height=clip_h))
+            video_duration += clip.duration
+            
+        except Exception as e:
+            logger.error(f"failed to process clip: {str(e)}")
+    
+    # loop processed clips until the video duration matches or exceeds the audio duration.
+    if video_duration < audio_duration:
+        logger.warning(f"video duration ({video_duration:.2f}s) is shorter than audio duration ({audio_duration:.2f}s), looping clips to match audio length.")
+        base_clips = processed_clips.copy()
+        for clip in itertools.cycle(base_clips):
+            if video_duration >= audio_duration:
+                break
+            processed_clips.append(clip)
+            video_duration += clip.duration
+        logger.info(f"video duration: {video_duration:.2f}s, audio duration: {audio_duration:.2f}s, looped {len(processed_clips)-len(base_clips)} clips")
+     
+    # merge video clips progressively, avoid loading all videos at once to avoid memory overflow
+    logger.info("starting clip merging process")
+    if not processed_clips:
+        logger.warning("no clips available for merging")
+        return combined_video_path
+    
+    # if there is only one clip, use it directly
+    if len(processed_clips) == 1:
+        logger.info("using single clip directly")
+        shutil.copy(processed_clips[0].file_path, combined_video_path)
+        delete_files(processed_clips)
+        logger.info("video combining completed")
+        return combined_video_path
+    
+    # create initial video file as base
+    base_clip_path = processed_clips[0].file_path
+    temp_merged_video = f"{output_dir}/temp-merged-video.mp4"
+    temp_merged_next = f"{output_dir}/temp-merged-next.mp4"
+    
+    # copy first clip as initial merged video
+    shutil.copy(base_clip_path, temp_merged_video)
+    
+    # merge remaining video clips one by one
+    for i, clip in enumerate(processed_clips[1:], 1):
+        logger.info(f"merging clip {i}/{len(processed_clips)-1}, duration: {clip.duration:.2f}s")
+        
+        try:
+            # load current base video and next clip to merge
+            base_clip = VideoFileClip(temp_merged_video)
+            next_clip = VideoFileClip(clip.file_path)
+            
+            # merge these two clips
+            merged_clip = concatenate_videoclips([base_clip, next_clip])
+
+            # save merged result to temp file
+            merged_clip.write_videofile(
+                filename=temp_merged_next,
+                threads=threads,
+                logger=None,
+                temp_audiofile_path=output_dir,
+                audio_codec=audio_codec,
+                fps=fps,
+            )
+            close_clip(base_clip)
+            close_clip(next_clip)
+            close_clip(merged_clip)
+            
+            # replace base file with new merged file
+            delete_files(temp_merged_video)
+            os.rename(temp_merged_next, temp_merged_video)
+            
+        except Exception as e:
+            logger.error(f"failed to merge clip: {str(e)}")
+            continue
+    
+    # after merging, rename final result to target file name
+    os.rename(temp_merged_video, combined_video_path)
+    
+    # clean temp files
+    clip_files = [clip.file_path for clip in processed_clips]
+    delete_files(clip_files)
+            
+    logger.info("video combining completed")
+    return combined_video_path
+
+
+def wrap_text(text, max_width, font="Arial", fontsize=60):
+    # Create ImageFont
+    font = ImageFont.truetype(font, fontsize)
+
+    def get_text_size(inner_text):
+        inner_text = inner_text.strip()
+        left, top, right, bottom = font.getbbox(inner_text)
+        return right - left, bottom - top
+
+    width, height = get_text_size(text)
+    if width <= max_width:
+        return text, height
+
+    processed = True
+
+    _wrapped_lines_ = []
+    words = text.split(" ")
+    _txt_ = ""
+    for word in words:
+        _before = _txt_
+        _txt_ += f"{word} "
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            if _txt_.strip() == word.strip():
+                processed = False
+                break
+            _wrapped_lines_.append(_before)
+            _txt_ = f"{word} "
+    _wrapped_lines_.append(_txt_)
+    if processed:
+        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
+        result = "\n".join(_wrapped_lines_).strip()
+        height = len(_wrapped_lines_) * height
+        return result, height
+
+    _wrapped_lines_ = []
+    chars = list(text)
+    _txt_ = ""
+    for word in chars:
+        _txt_ += word
+        _width, _height = get_text_size(_txt_)
+        if _width <= max_width:
+            continue
+        else:
+            _wrapped_lines_.append(_txt_)
+            _txt_ = ""
+    _wrapped_lines_.append(_txt_)
+    result = "\n".join(_wrapped_lines_).strip()
+    height = len(_wrapped_lines_) * height
+    return result, height
+
+
+def generate_video(
+    video_path: str,
+    audio_path: str,
+    subtitle_path: str,
+    output_file: str,
+    params: VideoParams,
+):
+    aspect = VideoAspect(params.video_aspect)
+    video_width, video_height = aspect.to_resolution()
+
+    logger.info(f"generating video: {video_width} x {video_height}")
+    logger.info(f"  ① video: {video_path}")
+    logger.info(f"  ② audio: {audio_path}")
+    logger.info(f"  ③ subtitle: {subtitle_path}")
+    logger.info(f"  ④ output: {output_file}")
+
+    # https://github.com/harry0703/MoneyPrinterTurbo/issues/217
+    # PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'final-1.mp4.tempTEMP_MPY_wvf_snd.mp3'
+    # write into the same directory as the output file
+    output_dir = os.path.dirname(output_file)
+
+    font_path = ""
+    if params.subtitle_enabled:
+        if not params.font_name:
+            params.font_name = "STHeitiMedium.ttc"
+        font_path = os.path.join(utils.font_dir(), params.font_name)
+        if os.name == "nt":
+            font_path = font_path.replace("\\", "/")
+
+        logger.info(f"  ⑤ font: {font_path}")
+
+    def create_text_clip(subtitle_item):
+        params.font_size = int(params.font_size)
+        params.stroke_width = int(params.stroke_width)
+        phrase = subtitle_item[1]
+        max_width = video_width * 0.9
+        wrapped_txt, txt_height = wrap_text(
+            phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+        )
+        interline = int(params.font_size * 0.25)
+        size=(int(max_width), int(txt_height + params.font_size * 0.25 + (interline * (wrapped_txt.count("\n") + 1))))
+
+        _clip = TextClip(
+            text=wrapped_txt,
+            font=font_path,
+            font_size=params.font_size,
+            color=params.text_fore_color,
+            bg_color=params.text_background_color,
+            stroke_color=params.stroke_color,
+            stroke_width=params.stroke_width,
+            # interline=interline,
+            # size=size,
+        )
+        duration = subtitle_item[0][1] - subtitle_item[0][0]
+        _clip = _clip.with_start(subtitle_item[0][0])
+        _clip = _clip.with_end(subtitle_item[0][1])
+        _clip = _clip.with_duration(duration)
+        if params.subtitle_position == "bottom":
+            _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
+        elif params.subtitle_position == "top":
+            _clip = _clip.with_position(("center", video_height * 0.05))
+        elif params.subtitle_position == "custom":
+            # Ensure the subtitle is fully within the screen bounds
+            margin = 10  # Additional margin, in pixels
+            max_y = video_height - _clip.h - margin
+            min_y = margin
+            custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+            custom_y = max(
+                min_y, min(custom_y, max_y)
+            )  # Constrain the y value within the valid range
+            _clip = _clip.with_position(("center", custom_y))
+        else:  # center
+            _clip = _clip.with_position(("center", "center"))
+        return _clip
+
+    video_clip = VideoFileClip(video_path).without_audio()
+    audio_clip = AudioFileClip(audio_path).with_effects(
+        [afx.MultiplyVolume(params.voice_volume)]
+    )
+
+    def make_textclip(text):
+        return TextClip(
+            text=text,
+            font=font_path,
+            font_size=params.font_size,
+        )
+
+    if subtitle_path and os.path.exists(subtitle_path):
+        sub = SubtitlesClip(
+            subtitles=subtitle_path, encoding="utf-8", make_textclip=make_textclip
+        )
+        text_clips = []
+        for item in sub.subtitles:
+            clip = create_text_clip(subtitle_item=item)
+            text_clips.append(clip)
+        video_clip = CompositeVideoClip([video_clip, *text_clips])
+
+    bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
+    if bgm_file:
+        try:
+            bgm_clip = AudioFileClip(bgm_file).with_effects(
+                [
+                    afx.MultiplyVolume(params.bgm_volume),
+                    afx.AudioFadeOut(3),
+                    afx.AudioLoop(duration=video_clip.duration),
+                ]
+            )
+            audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
+        except Exception as e:
+            logger.error(f"failed to add bgm: {str(e)}")
+
+    video_clip = video_clip.with_audio(audio_clip)
+    video_clip.write_videofile(
+        output_file,
+        audio_codec=audio_codec,
+        temp_audiofile_path=output_dir,
+        threads=params.n_threads or 2,
+        logger=None,
+        fps=fps,
+    )
+    video_clip.close()
+    del video_clip
+
+
+def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
+    for material in materials:
+        if not material.url:
+            continue
+
+        ext = utils.parse_extension(material.url)
+        try:
+            clip = VideoFileClip(material.url)
+        except Exception:
+            clip = ImageClip(material.url)
+
+        width = clip.size[0]
+        height = clip.size[1]
+        if width < 480 or height < 480:
+            logger.warning(f"low resolution material: {width}x{height}, minimum 480x480 required")
+            continue
+
+        if ext in const.FILE_TYPE_IMAGES:
+            logger.info(f"processing image: {material.url}")
+            # Create an image clip and set its duration to 3 seconds
+            clip = (
+                ImageClip(material.url)
+                .with_duration(clip_duration)
+                .with_position("center")
+            )
+            # Apply a zoom effect using the resize method.
+            # A lambda function is used to make the zoom effect dynamic over time.
+            # The zoom effect starts from the original size and gradually scales up to 120%.
+            # t represents the current time, and clip.duration is the total duration of the clip (3 seconds).
+            # Note: 1 represents 100% size, so 1.2 represents 120% size.
+            zoom_clip = clip.resized(
+                lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+            )
+
+            # Optionally, create a composite video clip containing the zoomed clip.
+            # This is useful when you want to add other elements to the video.
+            final_clip = CompositeVideoClip([zoom_clip])
+
+            # Output the video to a file.
+            video_file = f"{material.url}.mp4"
+            final_clip.write_videofile(video_file, fps=30, logger=None)
+            close_clip(clip)
+            material.url = video_file
+            logger.success(f"image processed: {video_file}")
+    return materials
\ No newline at end of file
diff --git a/app/services/voice.py b/app/services/voice.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6b4d5971d45c1c8bd8cc2f508b1a7df73bf7149
--- /dev/null
+++ b/app/services/voice.py
@@ -0,0 +1,1566 @@
+import asyncio
+import os
+import re
+from datetime import datetime
+from typing import Union
+from xml.sax.saxutils import unescape
+
+import edge_tts
+import requests
+from edge_tts import SubMaker, submaker
+from edge_tts.submaker import mktimestamp
+from loguru import logger
+from moviepy.video.tools import subtitles
+
+from app.config import config
+from app.utils import utils
+
+
+def get_siliconflow_voices() -> list[str]:
+    """
+    获取硅基流动的声音列表
+
+    Returns:
+        声音列表，格式为 ["siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex", ...]
+    """
+    # 硅基流动的声音列表和对应的性别（用于显示）
+    voices_with_gender = [
+        ("FunAudioLLM/CosyVoice2-0.5B", "alex", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "anna", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "bella", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "benjamin", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "charles", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "claire", "Female"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "david", "Male"),
+        ("FunAudioLLM/CosyVoice2-0.5B", "diana", "Female"),
+    ]
+
+    # 添加siliconflow:前缀，并格式化为显示名称
+    return [
+        f"siliconflow:{model}:{voice}-{gender}"
+        for model, voice, gender in voices_with_gender
+    ]
+
+
+def get_all_azure_voices(filter_locals=None) -> list[str]:
+    azure_voices_str = """
+Name: af-ZA-AdriNeural
+Gender: Female
+
+Name: af-ZA-WillemNeural
+Gender: Male
+
+Name: am-ET-AmehaNeural
+Gender: Male
+
+Name: am-ET-MekdesNeural
+Gender: Female
+
+Name: ar-AE-FatimaNeural
+Gender: Female
+
+Name: ar-AE-HamdanNeural
+Gender: Male
+
+Name: ar-BH-AliNeural
+Gender: Male
+
+Name: ar-BH-LailaNeural
+Gender: Female
+
+Name: ar-DZ-AminaNeural
+Gender: Female
+
+Name: ar-DZ-IsmaelNeural
+Gender: Male
+
+Name: ar-EG-SalmaNeural
+Gender: Female
+
+Name: ar-EG-ShakirNeural
+Gender: Male
+
+Name: ar-IQ-BasselNeural
+Gender: Male
+
+Name: ar-IQ-RanaNeural
+Gender: Female
+
+Name: ar-JO-SanaNeural
+Gender: Female
+
+Name: ar-JO-TaimNeural
+Gender: Male
+
+Name: ar-KW-FahedNeural
+Gender: Male
+
+Name: ar-KW-NouraNeural
+Gender: Female
+
+Name: ar-LB-LaylaNeural
+Gender: Female
+
+Name: ar-LB-RamiNeural
+Gender: Male
+
+Name: ar-LY-ImanNeural
+Gender: Female
+
+Name: ar-LY-OmarNeural
+Gender: Male
+
+Name: ar-MA-JamalNeural
+Gender: Male
+
+Name: ar-MA-MounaNeural
+Gender: Female
+
+Name: ar-OM-AbdullahNeural
+Gender: Male
+
+Name: ar-OM-AyshaNeural
+Gender: Female
+
+Name: ar-QA-AmalNeural
+Gender: Female
+
+Name: ar-QA-MoazNeural
+Gender: Male
+
+Name: ar-SA-HamedNeural
+Gender: Male
+
+Name: ar-SA-ZariyahNeural
+Gender: Female
+
+Name: ar-SY-AmanyNeural
+Gender: Female
+
+Name: ar-SY-LaithNeural
+Gender: Male
+
+Name: ar-TN-HediNeural
+Gender: Male
+
+Name: ar-TN-ReemNeural
+Gender: Female
+
+Name: ar-YE-MaryamNeural
+Gender: Female
+
+Name: ar-YE-SalehNeural
+Gender: Male
+
+Name: az-AZ-BabekNeural
+Gender: Male
+
+Name: az-AZ-BanuNeural
+Gender: Female
+
+Name: bg-BG-BorislavNeural
+Gender: Male
+
+Name: bg-BG-KalinaNeural
+Gender: Female
+
+Name: bn-BD-NabanitaNeural
+Gender: Female
+
+Name: bn-BD-PradeepNeural
+Gender: Male
+
+Name: bn-IN-BashkarNeural
+Gender: Male
+
+Name: bn-IN-TanishaaNeural
+Gender: Female
+
+Name: bs-BA-GoranNeural
+Gender: Male
+
+Name: bs-BA-VesnaNeural
+Gender: Female
+
+Name: ca-ES-EnricNeural
+Gender: Male
+
+Name: ca-ES-JoanaNeural
+Gender: Female
+
+Name: cs-CZ-AntoninNeural
+Gender: Male
+
+Name: cs-CZ-VlastaNeural
+Gender: Female
+
+Name: cy-GB-AledNeural
+Gender: Male
+
+Name: cy-GB-NiaNeural
+Gender: Female
+
+Name: da-DK-ChristelNeural
+Gender: Female
+
+Name: da-DK-JeppeNeural
+Gender: Male
+
+Name: de-AT-IngridNeural
+Gender: Female
+
+Name: de-AT-JonasNeural
+Gender: Male
+
+Name: de-CH-JanNeural
+Gender: Male
+
+Name: de-CH-LeniNeural
+Gender: Female
+
+Name: de-DE-AmalaNeural
+Gender: Female
+
+Name: de-DE-ConradNeural
+Gender: Male
+
+Name: de-DE-FlorianMultilingualNeural
+Gender: Male
+
+Name: de-DE-KatjaNeural
+Gender: Female
+
+Name: de-DE-KillianNeural
+Gender: Male
+
+Name: de-DE-SeraphinaMultilingualNeural
+Gender: Female
+
+Name: el-GR-AthinaNeural
+Gender: Female
+
+Name: el-GR-NestorasNeural
+Gender: Male
+
+Name: en-AU-NatashaNeural
+Gender: Female
+
+Name: en-AU-WilliamNeural
+Gender: Male
+
+Name: en-CA-ClaraNeural
+Gender: Female
+
+Name: en-CA-LiamNeural
+Gender: Male
+
+Name: en-GB-LibbyNeural
+Gender: Female
+
+Name: en-GB-MaisieNeural
+Gender: Female
+
+Name: en-GB-RyanNeural
+Gender: Male
+
+Name: en-GB-SoniaNeural
+Gender: Female
+
+Name: en-GB-ThomasNeural
+Gender: Male
+
+Name: en-HK-SamNeural
+Gender: Male
+
+Name: en-HK-YanNeural
+Gender: Female
+
+Name: en-IE-ConnorNeural
+Gender: Male
+
+Name: en-IE-EmilyNeural
+Gender: Female
+
+Name: en-IN-NeerjaExpressiveNeural
+Gender: Female
+
+Name: en-IN-NeerjaNeural
+Gender: Female
+
+Name: en-IN-PrabhatNeural
+Gender: Male
+
+Name: en-KE-AsiliaNeural
+Gender: Female
+
+Name: en-KE-ChilembaNeural
+Gender: Male
+
+Name: en-NG-AbeoNeural
+Gender: Male
+
+Name: en-NG-EzinneNeural
+Gender: Female
+
+Name: en-NZ-MitchellNeural
+Gender: Male
+
+Name: en-NZ-MollyNeural
+Gender: Female
+
+Name: en-PH-JamesNeural
+Gender: Male
+
+Name: en-PH-RosaNeural
+Gender: Female
+
+Name: en-SG-LunaNeural
+Gender: Female
+
+Name: en-SG-WayneNeural
+Gender: Male
+
+Name: en-TZ-ElimuNeural
+Gender: Male
+
+Name: en-TZ-ImaniNeural
+Gender: Female
+
+Name: en-US-AnaNeural
+Gender: Female
+
+Name: en-US-AndrewMultilingualNeural
+Gender: Male
+
+Name: en-US-AndrewNeural
+Gender: Male
+
+Name: en-US-AriaNeural
+Gender: Female
+
+Name: en-US-AvaMultilingualNeural
+Gender: Female
+
+Name: en-US-AvaNeural
+Gender: Female
+
+Name: en-US-BrianMultilingualNeural
+Gender: Male
+
+Name: en-US-BrianNeural
+Gender: Male
+
+Name: en-US-ChristopherNeural
+Gender: Male
+
+Name: en-US-EmmaMultilingualNeural
+Gender: Female
+
+Name: en-US-EmmaNeural
+Gender: Female
+
+Name: en-US-EricNeural
+Gender: Male
+
+Name: en-US-GuyNeural
+Gender: Male
+
+Name: en-US-JennyNeural
+Gender: Female
+
+Name: en-US-MichelleNeural
+Gender: Female
+
+Name: en-US-RogerNeural
+Gender: Male
+
+Name: en-US-SteffanNeural
+Gender: Male
+
+Name: en-ZA-LeahNeural
+Gender: Female
+
+Name: en-ZA-LukeNeural
+Gender: Male
+
+Name: es-AR-ElenaNeural
+Gender: Female
+
+Name: es-AR-TomasNeural
+Gender: Male
+
+Name: es-BO-MarceloNeural
+Gender: Male
+
+Name: es-BO-SofiaNeural
+Gender: Female
+
+Name: es-CL-CatalinaNeural
+Gender: Female
+
+Name: es-CL-LorenzoNeural
+Gender: Male
+
+Name: es-CO-GonzaloNeural
+Gender: Male
+
+Name: es-CO-SalomeNeural
+Gender: Female
+
+Name: es-CR-JuanNeural
+Gender: Male
+
+Name: es-CR-MariaNeural
+Gender: Female
+
+Name: es-CU-BelkysNeural
+Gender: Female
+
+Name: es-CU-ManuelNeural
+Gender: Male
+
+Name: es-DO-EmilioNeural
+Gender: Male
+
+Name: es-DO-RamonaNeural
+Gender: Female
+
+Name: es-EC-AndreaNeural
+Gender: Female
+
+Name: es-EC-LuisNeural
+Gender: Male
+
+Name: es-ES-AlvaroNeural
+Gender: Male
+
+Name: es-ES-ElviraNeural
+Gender: Female
+
+Name: es-ES-XimenaNeural
+Gender: Female
+
+Name: es-GQ-JavierNeural
+Gender: Male
+
+Name: es-GQ-TeresaNeural
+Gender: Female
+
+Name: es-GT-AndresNeural
+Gender: Male
+
+Name: es-GT-MartaNeural
+Gender: Female
+
+Name: es-HN-CarlosNeural
+Gender: Male
+
+Name: es-HN-KarlaNeural
+Gender: Female
+
+Name: es-MX-DaliaNeural
+Gender: Female
+
+Name: es-MX-JorgeNeural
+Gender: Male
+
+Name: es-NI-FedericoNeural
+Gender: Male
+
+Name: es-NI-YolandaNeural
+Gender: Female
+
+Name: es-PA-MargaritaNeural
+Gender: Female
+
+Name: es-PA-RobertoNeural
+Gender: Male
+
+Name: es-PE-AlexNeural
+Gender: Male
+
+Name: es-PE-CamilaNeural
+Gender: Female
+
+Name: es-PR-KarinaNeural
+Gender: Female
+
+Name: es-PR-VictorNeural
+Gender: Male
+
+Name: es-PY-MarioNeural
+Gender: Male
+
+Name: es-PY-TaniaNeural
+Gender: Female
+
+Name: es-SV-LorenaNeural
+Gender: Female
+
+Name: es-SV-RodrigoNeural
+Gender: Male
+
+Name: es-US-AlonsoNeural
+Gender: Male
+
+Name: es-US-PalomaNeural
+Gender: Female
+
+Name: es-UY-MateoNeural
+Gender: Male
+
+Name: es-UY-ValentinaNeural
+Gender: Female
+
+Name: es-VE-PaolaNeural
+Gender: Female
+
+Name: es-VE-SebastianNeural
+Gender: Male
+
+Name: et-EE-AnuNeural
+Gender: Female
+
+Name: et-EE-KertNeural
+Gender: Male
+
+Name: fa-IR-DilaraNeural
+Gender: Female
+
+Name: fa-IR-FaridNeural
+Gender: Male
+
+Name: fi-FI-HarriNeural
+Gender: Male
+
+Name: fi-FI-NooraNeural
+Gender: Female
+
+Name: fil-PH-AngeloNeural
+Gender: Male
+
+Name: fil-PH-BlessicaNeural
+Gender: Female
+
+Name: fr-BE-CharlineNeural
+Gender: Female
+
+Name: fr-BE-GerardNeural
+Gender: Male
+
+Name: fr-CA-AntoineNeural
+Gender: Male
+
+Name: fr-CA-JeanNeural
+Gender: Male
+
+Name: fr-CA-SylvieNeural
+Gender: Female
+
+Name: fr-CA-ThierryNeural
+Gender: Male
+
+Name: fr-CH-ArianeNeural
+Gender: Female
+
+Name: fr-CH-FabriceNeural
+Gender: Male
+
+Name: fr-FR-DeniseNeural
+Gender: Female
+
+Name: fr-FR-EloiseNeural
+Gender: Female
+
+Name: fr-FR-HenriNeural
+Gender: Male
+
+Name: fr-FR-RemyMultilingualNeural
+Gender: Male
+
+Name: fr-FR-VivienneMultilingualNeural
+Gender: Female
+
+Name: ga-IE-ColmNeural
+Gender: Male
+
+Name: ga-IE-OrlaNeural
+Gender: Female
+
+Name: gl-ES-RoiNeural
+Gender: Male
+
+Name: gl-ES-SabelaNeural
+Gender: Female
+
+Name: gu-IN-DhwaniNeural
+Gender: Female
+
+Name: gu-IN-NiranjanNeural
+Gender: Male
+
+Name: he-IL-AvriNeural
+Gender: Male
+
+Name: he-IL-HilaNeural
+Gender: Female
+
+Name: hi-IN-MadhurNeural
+Gender: Male
+
+Name: hi-IN-SwaraNeural
+Gender: Female
+
+Name: hr-HR-GabrijelaNeural
+Gender: Female
+
+Name: hr-HR-SreckoNeural
+Gender: Male
+
+Name: hu-HU-NoemiNeural
+Gender: Female
+
+Name: hu-HU-TamasNeural
+Gender: Male
+
+Name: id-ID-ArdiNeural
+Gender: Male
+
+Name: id-ID-GadisNeural
+Gender: Female
+
+Name: is-IS-GudrunNeural
+Gender: Female
+
+Name: is-IS-GunnarNeural
+Gender: Male
+
+Name: it-IT-DiegoNeural
+Gender: Male
+
+Name: it-IT-ElsaNeural
+Gender: Female
+
+Name: it-IT-GiuseppeMultilingualNeural
+Gender: Male
+
+Name: it-IT-IsabellaNeural
+Gender: Female
+
+Name: iu-Cans-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Cans-CA-TaqqiqNeural
+Gender: Male
+
+Name: iu-Latn-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Latn-CA-TaqqiqNeural
+Gender: Male
+
+Name: ja-JP-KeitaNeural
+Gender: Male
+
+Name: ja-JP-NanamiNeural
+Gender: Female
+
+Name: jv-ID-DimasNeural
+Gender: Male
+
+Name: jv-ID-SitiNeural
+Gender: Female
+
+Name: ka-GE-EkaNeural
+Gender: Female
+
+Name: ka-GE-GiorgiNeural
+Gender: Male
+
+Name: kk-KZ-AigulNeural
+Gender: Female
+
+Name: kk-KZ-DauletNeural
+Gender: Male
+
+Name: km-KH-PisethNeural
+Gender: Male
+
+Name: km-KH-SreymomNeural
+Gender: Female
+
+Name: kn-IN-GaganNeural
+Gender: Male
+
+Name: kn-IN-SapnaNeural
+Gender: Female
+
+Name: ko-KR-HyunsuMultilingualNeural
+Gender: Male
+
+Name: ko-KR-InJoonNeural
+Gender: Male
+
+Name: ko-KR-SunHiNeural
+Gender: Female
+
+Name: lo-LA-ChanthavongNeural
+Gender: Male
+
+Name: lo-LA-KeomanyNeural
+Gender: Female
+
+Name: lt-LT-LeonasNeural
+Gender: Male
+
+Name: lt-LT-OnaNeural
+Gender: Female
+
+Name: lv-LV-EveritaNeural
+Gender: Female
+
+Name: lv-LV-NilsNeural
+Gender: Male
+
+Name: mk-MK-AleksandarNeural
+Gender: Male
+
+Name: mk-MK-MarijaNeural
+Gender: Female
+
+Name: ml-IN-MidhunNeural
+Gender: Male
+
+Name: ml-IN-SobhanaNeural
+Gender: Female
+
+Name: mn-MN-BataaNeural
+Gender: Male
+
+Name: mn-MN-YesuiNeural
+Gender: Female
+
+Name: mr-IN-AarohiNeural
+Gender: Female
+
+Name: mr-IN-ManoharNeural
+Gender: Male
+
+Name: ms-MY-OsmanNeural
+Gender: Male
+
+Name: ms-MY-YasminNeural
+Gender: Female
+
+Name: mt-MT-GraceNeural
+Gender: Female
+
+Name: mt-MT-JosephNeural
+Gender: Male
+
+Name: my-MM-NilarNeural
+Gender: Female
+
+Name: my-MM-ThihaNeural
+Gender: Male
+
+Name: nb-NO-FinnNeural
+Gender: Male
+
+Name: nb-NO-PernilleNeural
+Gender: Female
+
+Name: ne-NP-HemkalaNeural
+Gender: Female
+
+Name: ne-NP-SagarNeural
+Gender: Male
+
+Name: nl-BE-ArnaudNeural
+Gender: Male
+
+Name: nl-BE-DenaNeural
+Gender: Female
+
+Name: nl-NL-ColetteNeural
+Gender: Female
+
+Name: nl-NL-FennaNeural
+Gender: Female
+
+Name: nl-NL-MaartenNeural
+Gender: Male
+
+Name: pl-PL-MarekNeural
+Gender: Male
+
+Name: pl-PL-ZofiaNeural
+Gender: Female
+
+Name: ps-AF-GulNawazNeural
+Gender: Male
+
+Name: ps-AF-LatifaNeural
+Gender: Female
+
+Name: pt-BR-AntonioNeural
+Gender: Male
+
+Name: pt-BR-FranciscaNeural
+Gender: Female
+
+Name: pt-BR-ThalitaMultilingualNeural
+Gender: Female
+
+Name: pt-PT-DuarteNeural
+Gender: Male
+
+Name: pt-PT-RaquelNeural
+Gender: Female
+
+Name: ro-RO-AlinaNeural
+Gender: Female
+
+Name: ro-RO-EmilNeural
+Gender: Male
+
+Name: ru-RU-DmitryNeural
+Gender: Male
+
+Name: ru-RU-SvetlanaNeural
+Gender: Female
+
+Name: si-LK-SameeraNeural
+Gender: Male
+
+Name: si-LK-ThiliniNeural
+Gender: Female
+
+Name: sk-SK-LukasNeural
+Gender: Male
+
+Name: sk-SK-ViktoriaNeural
+Gender: Female
+
+Name: sl-SI-PetraNeural
+Gender: Female
+
+Name: sl-SI-RokNeural
+Gender: Male
+
+Name: so-SO-MuuseNeural
+Gender: Male
+
+Name: so-SO-UbaxNeural
+Gender: Female
+
+Name: sq-AL-AnilaNeural
+Gender: Female
+
+Name: sq-AL-IlirNeural
+Gender: Male
+
+Name: sr-RS-NicholasNeural
+Gender: Male
+
+Name: sr-RS-SophieNeural
+Gender: Female
+
+Name: su-ID-JajangNeural
+Gender: Male
+
+Name: su-ID-TutiNeural
+Gender: Female
+
+Name: sv-SE-MattiasNeural
+Gender: Male
+
+Name: sv-SE-SofieNeural
+Gender: Female
+
+Name: sw-KE-RafikiNeural
+Gender: Male
+
+Name: sw-KE-ZuriNeural
+Gender: Female
+
+Name: sw-TZ-DaudiNeural
+Gender: Male
+
+Name: sw-TZ-RehemaNeural
+Gender: Female
+
+Name: ta-IN-PallaviNeural
+Gender: Female
+
+Name: ta-IN-ValluvarNeural
+Gender: Male
+
+Name: ta-LK-KumarNeural
+Gender: Male
+
+Name: ta-LK-SaranyaNeural
+Gender: Female
+
+Name: ta-MY-KaniNeural
+Gender: Female
+
+Name: ta-MY-SuryaNeural
+Gender: Male
+
+Name: ta-SG-AnbuNeural
+Gender: Male
+
+Name: ta-SG-VenbaNeural
+Gender: Female
+
+Name: te-IN-MohanNeural
+Gender: Male
+
+Name: te-IN-ShrutiNeural
+Gender: Female
+
+Name: th-TH-NiwatNeural
+Gender: Male
+
+Name: th-TH-PremwadeeNeural
+Gender: Female
+
+Name: tr-TR-AhmetNeural
+Gender: Male
+
+Name: tr-TR-EmelNeural
+Gender: Female
+
+Name: uk-UA-OstapNeural
+Gender: Male
+
+Name: uk-UA-PolinaNeural
+Gender: Female
+
+Name: ur-IN-GulNeural
+Gender: Female
+
+Name: ur-IN-SalmanNeural
+Gender: Male
+
+Name: ur-PK-AsadNeural
+Gender: Male
+
+Name: ur-PK-UzmaNeural
+Gender: Female
+
+Name: uz-UZ-MadinaNeural
+Gender: Female
+
+Name: uz-UZ-SardorNeural
+Gender: Male
+
+Name: vi-VN-HoaiMyNeural
+Gender: Female
+
+Name: vi-VN-NamMinhNeural
+Gender: Male
+
+Name: zh-CN-XiaoxiaoNeural
+Gender: Female
+
+Name: zh-CN-XiaoyiNeural
+Gender: Female
+
+Name: zh-CN-YunjianNeural
+Gender: Male
+
+Name: zh-CN-YunxiNeural
+Gender: Male
+
+Name: zh-CN-YunxiaNeural
+Gender: Male
+
+Name: zh-CN-YunyangNeural
+Gender: Male
+
+Name: zh-CN-liaoning-XiaobeiNeural
+Gender: Female
+
+Name: zh-CN-shaanxi-XiaoniNeural
+Gender: Female
+
+Name: zh-HK-HiuGaaiNeural
+Gender: Female
+
+Name: zh-HK-HiuMaanNeural
+Gender: Female
+
+Name: zh-HK-WanLungNeural
+Gender: Male
+
+Name: zh-TW-HsiaoChenNeural
+Gender: Female
+
+Name: zh-TW-HsiaoYuNeural
+Gender: Female
+
+Name: zh-TW-YunJheNeural
+Gender: Male
+
+Name: zu-ZA-ThandoNeural
+Gender: Female
+
+Name: zu-ZA-ThembaNeural
+Gender: Male
+
+
+Name: en-US-AvaMultilingualNeural-V2
+Gender: Female
+
+Name: en-US-AndrewMultilingualNeural-V2
+Gender: Male
+
+Name: en-US-EmmaMultilingualNeural-V2
+Gender: Female
+
+Name: en-US-BrianMultilingualNeural-V2
+Gender: Male
+
+Name: de-DE-FlorianMultilingualNeural-V2
+Gender: Male
+
+Name: de-DE-SeraphinaMultilingualNeural-V2
+Gender: Female
+
+Name: fr-FR-RemyMultilingualNeural-V2
+Gender: Male
+
+Name: fr-FR-VivienneMultilingualNeural-V2
+Gender: Female
+
+Name: zh-CN-XiaoxiaoMultilingualNeural-V2
+Gender: Female
+    """.strip()
+    voices = []
+    # 定义正则表达式模式，用于匹配 Name 和 Gender 行
+    pattern = re.compile(r"Name:\s*(.+)\s*Gender:\s*(.+)\s*", re.MULTILINE)
+    # 使用正则表达式查找所有匹配项
+    matches = pattern.findall(azure_voices_str)
+
+    for name, gender in matches:
+        # 应用过滤条件
+        if filter_locals and any(
+            name.lower().startswith(fl.lower()) for fl in filter_locals
+        ):
+            voices.append(f"{name}-{gender}")
+        elif not filter_locals:
+            voices.append(f"{name}-{gender}")
+
+    voices.sort()
+    return voices
+
+
+def parse_voice_name(name: str):
+    # zh-CN-XiaoyiNeural-Female
+    # zh-CN-YunxiNeural-Male
+    # zh-CN-XiaoxiaoMultilingualNeural-V2-Female
+    name = name.replace("-Female", "").replace("-Male", "").strip()
+    return name
+
+
+def is_azure_v2_voice(voice_name: str):
+    voice_name = parse_voice_name(voice_name)
+    if voice_name.endswith("-V2"):
+        return voice_name.replace("-V2", "").strip()
+    return ""
+
+
+def is_siliconflow_voice(voice_name: str):
+    """检查是否是硅基流动的声音"""
+    return voice_name.startswith("siliconflow:")
+
+
+def tts(
+    text: str,
+    voice_name: str,
+    voice_rate: float,
+    voice_file: str,
+    voice_volume: float = 1.0,
+) -> Union[SubMaker, None]:
+    if is_azure_v2_voice(voice_name):
+        return azure_tts_v2(text, voice_name, voice_file)
+    elif is_siliconflow_voice(voice_name):
+        # 从voice_name中提取模型和声音
+        # 格式: siliconflow:model:voice-Gender
+        parts = voice_name.split(":")
+        if len(parts) >= 3:
+            model = parts[1]
+            # 移除性别后缀，例如 "alex-Male" -> "alex"
+            voice_with_gender = parts[2]
+            voice = voice_with_gender.split("-")[0]
+            # 构建完整的voice参数，格式为 "model:voice"
+            full_voice = f"{model}:{voice}"
+            return siliconflow_tts(
+                text, model, full_voice, voice_rate, voice_file, voice_volume
+            )
+        else:
+            logger.error(f"Invalid siliconflow voice name format: {voice_name}")
+            return None
+    return azure_tts_v1(text, voice_name, voice_rate, voice_file)
+
+
+def convert_rate_to_percent(rate: float) -> str:
+    if rate == 1.0:
+        return "+0%"
+    percent = round((rate - 1.0) * 100)
+    if percent > 0:
+        return f"+{percent}%"
+    else:
+        return f"{percent}%"
+
+
+def azure_tts_v1(
+    text: str, voice_name: str, voice_rate: float, voice_file: str
+) -> Union[SubMaker, None]:
+    voice_name = parse_voice_name(voice_name)
+    text = text.strip()
+    rate_str = convert_rate_to_percent(voice_rate)
+    for i in range(3):
+        try:
+            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
+
+            async def _do() -> SubMaker:
+                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
+                sub_maker = edge_tts.SubMaker()
+                with open(voice_file, "wb") as file:
+                    async for chunk in communicate.stream():
+                        if chunk["type"] == "audio":
+                            file.write(chunk["data"])
+                        elif chunk["type"] == "WordBoundary":
+                            sub_maker.create_sub(
+                                (chunk["offset"], chunk["duration"]), chunk["text"]
+                            )
+                return sub_maker
+
+            sub_maker = asyncio.run(_do())
+            if not sub_maker or not sub_maker.subs:
+                logger.warning("failed, sub_maker is None or sub_maker.subs is None")
+                continue
+
+            logger.info(f"completed, output file: {voice_file}")
+            return sub_maker
+        except Exception as e:
+            logger.error(f"failed, error: {str(e)}")
+    return None
+
+
+def siliconflow_tts(
+    text: str,
+    model: str,
+    voice: str,
+    voice_rate: float,
+    voice_file: str,
+    voice_volume: float = 1.0,
+) -> Union[SubMaker, None]:
+    """
+    使用硅基流动的API生成语音
+
+    Args:
+        text: 要转换为语音的文本
+        model: 模型名称，如 "FunAudioLLM/CosyVoice2-0.5B"
+        voice: 声音名称，如 "FunAudioLLM/CosyVoice2-0.5B:alex"
+        voice_rate: 语音速度，范围[0.25, 4.0]
+        voice_file: 输出的音频文件路径
+        voice_volume: 语音音量，范围[0.6, 5.0]，需要转换为硅基流动的增益范围[-10, 10]
+
+    Returns:
+        SubMaker对象或None
+    """
+    text = text.strip()
+    api_key = config.siliconflow.get("api_key", "")
+
+    if not api_key:
+        logger.error("SiliconFlow API key is not set")
+        return None
+
+    # 将voice_volume转换为硅基流动的增益范围
+    # 默认voice_volume为1.0，对应gain为0
+    gain = voice_volume - 1.0
+    # 确保gain在[-10, 10]范围内
+    gain = max(-10, min(10, gain))
+
+    url = "https://api.siliconflow.cn/v1/audio/speech"
+
+    payload = {
+        "model": model,
+        "input": text,
+        "voice": voice,
+        "response_format": "mp3",
+        "sample_rate": 32000,
+        "stream": False,
+        "speed": voice_rate,
+        "gain": gain,
+    }
+
+    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+
+    for i in range(3):  # 尝试3次
+        try:
+            logger.info(
+                f"start siliconflow tts, model: {model}, voice: {voice}, try: {i + 1}"
+            )
+
+            response = requests.post(url, json=payload, headers=headers)
+
+            if response.status_code == 200:
+                # 保存音频文件
+                with open(voice_file, "wb") as f:
+                    f.write(response.content)
+
+                # 创建一个空的SubMaker对象
+                sub_maker = SubMaker()
+
+                # 获取音频文件的实际长度
+                try:
+                    # 尝试使用moviepy获取音频长度
+                    from moviepy import AudioFileClip
+
+                    audio_clip = AudioFileClip(voice_file)
+                    audio_duration = audio_clip.duration
+                    audio_clip.close()
+
+                    # 将音频长度转换为100纳秒单位（与edge_tts兼容）
+                    audio_duration_100ns = int(audio_duration * 10000000)
+
+                    # 使用文本分割来创建更准确的字幕
+                    # 将文本按标点符号分割成句子
+                    sentences = utils.split_string_by_punctuations(text)
+
+                    if sentences:
+                        # 计算每个句子的大致时长（按字符数比例分配）
+                        total_chars = sum(len(s) for s in sentences)
+                        char_duration = (
+                            audio_duration_100ns / total_chars if total_chars > 0 else 0
+                        )
+
+                        current_offset = 0
+                        for sentence in sentences:
+                            if not sentence.strip():
+                                continue
+
+                            # 计算当前句子的时长
+                            sentence_chars = len(sentence)
+                            sentence_duration = int(sentence_chars * char_duration)
+
+                            # 添加到SubMaker
+                            sub_maker.subs.append(sentence)
+                            sub_maker.offset.append(
+                                (current_offset, current_offset + sentence_duration)
+                            )
+
+                            # 更新偏移量
+                            current_offset += sentence_duration
+                    else:
+                        # 如果无法分割，则使用整个文本作为一个字幕
+                        sub_maker.subs = [text]
+                        sub_maker.offset = [(0, audio_duration_100ns)]
+
+                except Exception as e:
+                    logger.warning(f"Failed to create accurate subtitles: {str(e)}")
+                    # 回退到简单的字幕
+                    sub_maker.subs = [text]
+                    # 使用音频文件的实际长度，如果无法获取，则假设为10秒
+                    sub_maker.offset = [
+                        (
+                            0,
+                            audio_duration_100ns
+                            if "audio_duration_100ns" in locals()
+                            else 10000000,
+                        )
+                    ]
+
+                logger.success(f"siliconflow tts succeeded: {voice_file}")
+                print("s", sub_maker.subs, sub_maker.offset)
+                return sub_maker
+            else:
+                logger.error(
+                    f"siliconflow tts failed with status code {response.status_code}: {response.text}"
+                )
+        except Exception as e:
+            logger.error(f"siliconflow tts failed: {str(e)}")
+
+    return None
+
+
+def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
+    voice_name = is_azure_v2_voice(voice_name)
+    if not voice_name:
+        logger.error(f"invalid voice name: {voice_name}")
+        raise ValueError(f"invalid voice name: {voice_name}")
+    text = text.strip()
+
+    def _format_duration_to_offset(duration) -> int:
+        if isinstance(duration, str):
+            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
+            milliseconds = (
+                (time_obj.hour * 3600000)
+                + (time_obj.minute * 60000)
+                + (time_obj.second * 1000)
+                + (time_obj.microsecond // 1000)
+            )
+            return milliseconds * 10000
+
+        if isinstance(duration, int):
+            return duration
+
+        return 0
+
+    for i in range(3):
+        try:
+            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
+
+            import azure.cognitiveservices.speech as speechsdk
+
+            sub_maker = SubMaker()
+
+            def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
+                # print('WordBoundary event:')
+                # print('\tBoundaryType: {}'.format(evt.boundary_type))
+                # print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
+                # print('\tDuration: {}'.format(evt.duration))
+                # print('\tText: {}'.format(evt.text))
+                # print('\tTextOffset: {}'.format(evt.text_offset))
+                # print('\tWordLength: {}'.format(evt.word_length))
+
+                duration = _format_duration_to_offset(str(evt.duration))
+                offset = _format_duration_to_offset(evt.audio_offset)
+                sub_maker.subs.append(evt.text)
+                sub_maker.offset.append((offset, offset + duration))
+
+            # Creates an instance of a speech config with specified subscription key and service region.
+            speech_key = config.azure.get("speech_key", "")
+            service_region = config.azure.get("speech_region", "")
+            if not speech_key or not service_region:
+                logger.error("Azure speech key or region is not set")
+                return None
+
+            audio_config = speechsdk.audio.AudioOutputConfig(
+                filename=voice_file, use_default_speaker=True
+            )
+            speech_config = speechsdk.SpeechConfig(
+                subscription=speech_key, region=service_region
+            )
+            speech_config.speech_synthesis_voice_name = voice_name
+            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
+            #                            value='true')
+            speech_config.set_property(
+                property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
+                value="true",
+            )
+
+            speech_config.set_speech_synthesis_output_format(
+                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
+            )
+            speech_synthesizer = speechsdk.SpeechSynthesizer(
+                audio_config=audio_config, speech_config=speech_config
+            )
+            speech_synthesizer.synthesis_word_boundary.connect(
+                speech_synthesizer_word_boundary_cb
+            )
+
+            result = speech_synthesizer.speak_text_async(text).get()
+            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+                logger.success(f"azure v2 speech synthesis succeeded: {voice_file}")
+                return sub_maker
+            elif result.reason == speechsdk.ResultReason.Canceled:
+                cancellation_details = result.cancellation_details
+                logger.error(
+                    f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
+                )
+                if cancellation_details.reason == speechsdk.CancellationReason.Error:
+                    logger.error(
+                        f"azure v2 speech synthesis error: {cancellation_details.error_details}"
+                    )
+            logger.info(f"completed, output file: {voice_file}")
+        except Exception as e:
+            logger.error(f"failed, error: {str(e)}")
+    return None
+
+
+def _format_text(text: str) -> str:
+    # text = text.replace("\n", " ")
+    text = text.replace("[", " ")
+    text = text.replace("]", " ")
+    text = text.replace("(", " ")
+    text = text.replace(")", " ")
+    text = text.replace("{", " ")
+    text = text.replace("}", " ")
+    text = text.strip()
+    return text
+
+
+def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
+    """
+    优化字幕文件
+    1. 将字幕文件按照标点符号分割成多行
+    2. 逐行匹配字幕文件中的文本
+    3. 生成新的字幕文件
+    """
+
+    text = _format_text(text)
+
+    def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
+        """
+        1
+        00:00:00,000 --> 00:00:02,360
+        跑步是一项简单易行的运动
+        """
+        start_t = mktimestamp(start_time).replace(".", ",")
+        end_t = mktimestamp(end_time).replace(".", ",")
+        return f"{idx}\n{start_t} --> {end_t}\n{sub_text}\n"
+
+    start_time = -1.0
+    sub_items = []
+    sub_index = 0
+
+    script_lines = utils.split_string_by_punctuations(text)
+
+    def match_line(_sub_line: str, _sub_index: int):
+        if len(script_lines) <= _sub_index:
+            return ""
+
+        _line = script_lines[_sub_index]
+        if _sub_line == _line:
+            return script_lines[_sub_index].strip()
+
+        _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
+        _line_ = re.sub(r"[^\w\s]", "", _line)
+        if _sub_line_ == _line_:
+            return _line_.strip()
+
+        _sub_line_ = re.sub(r"\W+", "", _sub_line)
+        _line_ = re.sub(r"\W+", "", _line)
+        if _sub_line_ == _line_:
+            return _line.strip()
+
+        return ""
+
+    sub_line = ""
+
+    try:
+        for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
+            _start_time, end_time = offset
+            if start_time < 0:
+                start_time = _start_time
+
+            sub = unescape(sub)
+            sub_line += sub
+            sub_text = match_line(sub_line, sub_index)
+            if sub_text:
+                sub_index += 1
+                line = formatter(
+                    idx=sub_index,
+                    start_time=start_time,
+                    end_time=end_time,
+                    sub_text=sub_text,
+                )
+                sub_items.append(line)
+                start_time = -1.0
+                sub_line = ""
+
+        if len(sub_items) == len(script_lines):
+            with open(subtitle_file, "w", encoding="utf-8") as file:
+                file.write("\n".join(sub_items) + "\n")
+            try:
+                sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
+                duration = max([tb for ((ta, tb), txt) in sbs])
+                logger.info(
+                    f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
+                )
+            except Exception as e:
+                logger.error(f"failed, error: {str(e)}")
+                os.remove(subtitle_file)
+        else:
+            logger.warning(
+                f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
+            )
+
+    except Exception as e:
+        logger.error(f"failed, error: {str(e)}")
+
+
+def get_audio_duration(sub_maker: submaker.SubMaker):
+    """
+    获取音频时长
+    """
+    if not sub_maker.offset:
+        return 0.0
+    return sub_maker.offset[-1][1] / 10000000
+
+
+if __name__ == "__main__":
+    voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
+    voice_name = parse_voice_name(voice_name)
+    voice_name = is_azure_v2_voice(voice_name)
+    print(voice_name)
+
+    voices = get_all_azure_voices()
+    print(len(voices))
+
+    async def _do():
+        temp_dir = utils.storage_dir("temp")
+
+        voice_names = [
+            "zh-CN-XiaoxiaoMultilingualNeural",
+            # 女性
+            "zh-CN-XiaoxiaoNeural",
+            "zh-CN-XiaoyiNeural",
+            # 男性
+            "zh-CN-YunyangNeural",
+            "zh-CN-YunxiNeural",
+        ]
+        text = """
+        静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚，看到窗前的明月，不禁想起远方的家乡和亲人，表达了他对家乡和亲人的深深思念之情。全诗内容是：“床前明月光，疑是地上霜。举头望明月，低头思故乡。”在这短短的四句诗中，诗人通过“明月”和“思故乡”的意象，巧妙地表达了离乡背井人的孤独与哀愁。首句“床前明月光”设景立意，通过明亮的月光引出诗人的遐想；“疑是地上霜”增添了夜晚的寒冷感，加深了诗人的孤寂之情；“举头望明月”和“低头思故乡”则是情感的升华，展现了诗人内心深处的乡愁和对家的渴望。这首诗简洁明快，情感真挚，是中国古典诗歌中非常著名的一首，也深受后人喜爱和推崇。
+            """
+
+        text = """
+        What is the meaning of life? This question has puzzled philosophers, scientists, and thinkers of all kinds for centuries. Throughout history, various cultures and individuals have come up with their interpretations and beliefs around the purpose of life. Some say it's to seek happiness and self-fulfillment, while others believe it's about contributing to the welfare of others and making a positive impact in the world. Despite the myriad of perspectives, one thing remains clear: the meaning of life is a deeply personal concept that varies from one person to another. It's an existential inquiry that encourages us to reflect on our values, desires, and the essence of our existence.
+        """
+
+        text = """
+               预计未来3天深圳冷空气活动频繁，未来两天持续阴天有小雨，出门带好雨具；
+               10-11日持续阴天有小雨，日温差小，气温在13-17℃之间，体感阴凉；
+               12日天气短暂好转，早晚清凉；
+                   """
+
+        text = "[Opening scene: A sunny day in a suburban neighborhood. A young boy named Alex, around 8 years old, is playing in his front yard with his loyal dog, Buddy.]\n\n[Camera zooms in on Alex as he throws a ball for Buddy to fetch. Buddy excitedly runs after it and brings it back to Alex.]\n\nAlex: Good boy, Buddy! You're the best dog ever!\n\n[Buddy barks happily and wags his tail.]\n\n[As Alex and Buddy continue playing, a series of potential dangers loom nearby, such as a stray dog approaching, a ball rolling towards the street, and a suspicious-looking stranger walking by.]\n\nAlex: Uh oh, Buddy, look out!\n\n[Buddy senses the danger and immediately springs into action. He barks loudly at the stray dog, scaring it away. Then, he rushes to retrieve the ball before it reaches the street and gently nudges it back towards Alex. Finally, he stands protectively between Alex and the stranger, growling softly to warn them away.]\n\nAlex: Wow, Buddy, you're like my superhero!\n\n[Just as Alex and Buddy are about to head inside, they hear a loud crash from a nearby construction site. They rush over to investigate and find a pile of rubble blocking the path of a kitten trapped underneath.]\n\nAlex: Oh no, Buddy, we have to help!\n\n[Buddy barks in agreement and together they work to carefully move the rubble aside, allowing the kitten to escape unharmed. The kitten gratefully nuzzles against Buddy, who responds with a friendly lick.]\n\nAlex: We did it, Buddy! We saved the day again!\n\n[As Alex and Buddy walk home together, the sun begins to set, casting a warm glow over the neighborhood.]\n\nAlex: Thanks for always being there to watch over me, Buddy. You're not just my dog, you're my best friend.\n\n[Buddy barks happily and nuzzles against Alex as they disappear into the sunset, ready to face whatever adventures tomorrow may bring.]\n\n[End scene.]"
+
+        text = "大家好，我是乔哥，一个想帮你把信用卡全部还清的家伙！\n今天我们要聊的是信用卡的取现功能。\n你是不是也曾经因为一时的资金紧张，而拿着信用卡到ATM机取现？如果是，那你得好好看看这个视频了。\n现在都2024年了，我以为现在不会再有人用信用卡取现功能了。前几天一个粉丝发来一张图片，取现1万。\n信用卡取现有三个弊端。\n一，信用卡取现功能代价可不小。会先收取一个取现手续费，比如这个粉丝，取现1万，按2.5%收取手续费，收取了250元。\n二，信用卡正常消费有最长56天的免息期，但取现不享受免息期。从取现那一天开始，每天按照万5收取利息，这个粉丝用了11天，收取了55元利息。\n三，频繁的取现行为，银行会认为你资金紧张，会被标记为高风险用户，影响你的综合评分和额度。\n那么，如果你资金紧张了，该怎么办呢？\n乔哥给你支一招，用破思机摩擦信用卡，只需要少量的手续费，而且还可以享受最长56天的免息期。\n最后，如果你对玩卡感兴趣，可以找乔哥领取一本《卡神秘籍》，用卡过程中遇到任何疑惑，也欢迎找乔哥交流。\n别忘了，关注乔哥，回复用卡技巧，免费领取《2024用卡技巧》，让我们一起成为用卡高手！"
+
+        text = """
+        2023全年业绩速览
+公司全年累计实现营业收入1476.94亿元，同比增长19.01%，归母净利润747.34亿元，同比增长19.16%。EPS达到59.49元。第四季度单季，营业收入444.25亿元，同比增长20.26%，环比增长31.86%；归母净利润218.58亿元，同比增长19.33%，环比增长29.37%。这一阶段
+的业绩表现不仅突显了公司的增长动力和盈利能力，也反映出公司在竞争激烈的市场环境中保持了良好的发展势头。
+2023年Q4业绩速览
+第四季度，营业收入贡献主要增长点；销售费用高增致盈利能力承压；税金同比上升27%，扰动净利率表现。
+业绩解读
+利润方面，2023全年贵州茅台，>归母净利润增速为19%，其中营业收入正贡献18%，营业成本正贡献百分之一，管理费用正贡献百分之一点四。(注：归母净利润增速值=营业收入增速+各科目贡献，展示贡献/拖累的前四名科目，且要求贡献值/净利润增速>15%)
+"""
+        text = "静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚，看到窗前的明月，不禁想起远方的家乡和亲人"
+
+        text = _format_text(text)
+        lines = utils.split_string_by_punctuations(text)
+        print(lines)
+
+        for voice_name in voice_names:
+            voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
+            subtitle_file = f"{temp_dir}/tts.mp3.srt"
+            sub_maker = azure_tts_v2(
+                text=text, voice_name=voice_name, voice_file=voice_file
+            )
+            create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
+            audio_duration = get_audio_duration(sub_maker)
+            print(f"voice: {voice_name}, audio duration: {audio_duration}s")
+
+    loop = asyncio.get_event_loop_policy().get_event_loop()
+    try:
+        loop.run_until_complete(_do())
+    finally:
+        loop.close()
diff --git a/app/utils/utils.py b/app/utils/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7efb521a08f12ff922b2b33dc0c8bcbcdf8b031f
--- /dev/null
+++ b/app/utils/utils.py
@@ -0,0 +1,230 @@
+import json
+import locale
+import os
+from pathlib import Path
+import threading
+from typing import Any
+from uuid import uuid4
+
+import urllib3
+from loguru import logger
+
+from app.models import const
+
+urllib3.disable_warnings()
+
+
+def get_response(status: int, data: Any = None, message: str = ""):
+    obj = {
+        "status": status,
+    }
+    if data:
+        obj["data"] = data
+    if message:
+        obj["message"] = message
+    return obj
+
+
+def to_json(obj):
+    try:
+        # Define a helper function to handle different types of objects
+        def serialize(o):
+            # If the object is a serializable type, return it directly
+            if isinstance(o, (int, float, bool, str)) or o is None:
+                return o
+            # If the object is binary data, convert it to a base64-encoded string
+            elif isinstance(o, bytes):
+                return "*** binary data ***"
+            # If the object is a dictionary, recursively process each key-value pair
+            elif isinstance(o, dict):
+                return {k: serialize(v) for k, v in o.items()}
+            # If the object is a list or tuple, recursively process each element
+            elif isinstance(o, (list, tuple)):
+                return [serialize(item) for item in o]
+            # If the object is a custom type, attempt to return its __dict__ attribute
+            elif hasattr(o, "__dict__"):
+                return serialize(o.__dict__)
+            # Return None for other cases (or choose to raise an exception)
+            else:
+                return None
+
+        # Use the serialize function to process the input object
+        serialized_obj = serialize(obj)
+
+        # Serialize the processed object into a JSON string
+        return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
+    except Exception:
+        return None
+
+
+def get_uuid(remove_hyphen: bool = False):
+    u = str(uuid4())
+    if remove_hyphen:
+        u = u.replace("-", "")
+    return u
+
+
+def root_dir():
+    return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+
+
+def storage_dir(sub_dir: str = "", create: bool = False):
+    d = os.path.join(root_dir(), "storage")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if create and not os.path.exists(d):
+        os.makedirs(d)
+
+    return d
+
+
+def resource_dir(sub_dir: str = ""):
+    d = os.path.join(root_dir(), "resource")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    return d
+
+
+def task_dir(sub_dir: str = ""):
+    d = os.path.join(storage_dir(), "tasks")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def font_dir(sub_dir: str = ""):
+    d = resource_dir("fonts")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def song_dir(sub_dir: str = ""):
+    d = resource_dir("songs")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def public_dir(sub_dir: str = ""):
+    d = resource_dir("public")
+    if sub_dir:
+        d = os.path.join(d, sub_dir)
+    if not os.path.exists(d):
+        os.makedirs(d)
+    return d
+
+
+def run_in_background(func, *args, **kwargs):
+    def run():
+        try:
+            func(*args, **kwargs)
+        except Exception as e:
+            logger.error(f"run_in_background error: {e}")
+
+    thread = threading.Thread(target=run)
+    thread.start()
+    return thread
+
+
+def time_convert_seconds_to_hmsm(seconds) -> str:
+    hours = int(seconds // 3600)
+    seconds = seconds % 3600
+    minutes = int(seconds // 60)
+    milliseconds = int(seconds * 1000) % 1000
+    seconds = int(seconds % 60)
+    return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
+
+
+def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
+    start_time = time_convert_seconds_to_hmsm(start_time)
+    end_time = time_convert_seconds_to_hmsm(end_time)
+    srt = """%d
+%s --> %s
+%s
+        """ % (
+        idx,
+        start_time,
+        end_time,
+        msg,
+    )
+    return srt
+
+
+def str_contains_punctuation(word):
+    for p in const.PUNCTUATIONS:
+        if p in word:
+            return True
+    return False
+
+
+def split_string_by_punctuations(s):
+    result = []
+    txt = ""
+
+    previous_char = ""
+    next_char = ""
+    for i in range(len(s)):
+        char = s[i]
+        if char == "\n":
+            result.append(txt.strip())
+            txt = ""
+            continue
+
+        if i > 0:
+            previous_char = s[i - 1]
+        if i < len(s) - 1:
+            next_char = s[i + 1]
+
+        if char == "." and previous_char.isdigit() and next_char.isdigit():
+            # # In the case of "withdraw 10,000, charged at 2.5% fee", the dot in "2.5" should not be treated as a line break marker
+            txt += char
+            continue
+
+        if char not in const.PUNCTUATIONS:
+            txt += char
+        else:
+            result.append(txt.strip())
+            txt = ""
+    result.append(txt.strip())
+    # filter empty string
+    result = list(filter(None, result))
+    return result
+
+
+def md5(text):
+    import hashlib
+
+    return hashlib.md5(text.encode("utf-8")).hexdigest()
+
+
+def get_system_locale():
+    try:
+        loc = locale.getdefaultlocale()
+        # zh_CN, zh_TW return zh
+        # en_US, en_GB return en
+        language_code = loc[0].split("_")[0]
+        return language_code
+    except Exception:
+        return "en"
+
+
+def load_locales(i18n_dir):
+    _locales = {}
+    for root, dirs, files in os.walk(i18n_dir):
+        for file in files:
+            if file.endswith(".json"):
+                lang = file.split(".")[0]
+                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
+                    _locales[lang] = json.loads(f.read())
+    return _locales
+
+
+def parse_extension(filename):
+    return Path(filename).suffix.lower().lstrip('.')
diff --git a/config.example.toml b/config.example.toml
new file mode 100644
index 0000000000000000000000000000000000000000..ffe250c096f89385d5afbb9870220c65883a97b6
--- /dev/null
+++ b/config.example.toml
@@ -0,0 +1,214 @@
+[app]
+video_source = "pexels" # "pexels" or "pixabay"
+
+# 是否隐藏配置面板
+hide_config = false
+
+# Pexels API Key
+# Register at https://www.pexels.com/api/ to get your API key.
+# You can use multiple keys to avoid rate limits.
+# For example: pexels_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
+# 特别注意格式，Key 用英文双引号括起来，多个Key用逗号隔开
+pexels_api_keys = []
+
+# Pixabay API Key
+# Register at https://pixabay.com/api/docs/ to get your API key.
+# You can use multiple keys to avoid rate limits.
+# For example: pixabay_api_keys = ["123adsf4567adf89","abd1321cd13efgfdfhi"]
+# 特别注意格式，Key 用英文双引号括起来，多个Key用逗号隔开
+pixabay_api_keys = []
+
+# 支持的提供商 (Supported providers):
+#   openai
+#   moonshot    (月之暗面)
+#   azure
+#   qwen        (通义千问)
+#   deepseek
+#   gemini
+#   ollama
+#   g4f
+#   oneapi
+#   cloudflare
+#   ernie       (文心一言)
+llm_provider = "openai"
+
+########## Pollinations AI Settings
+# Visit https://pollinations.ai/ to learn more
+# API Key is optional - leave empty for public access
+pollinations_api_key = ""
+# Default base URL for Pollinations API
+pollinations_base_url = "https://pollinations.ai/api/v1"
+# Default model for text generation
+pollinations_model_name = "openai-fast"
+
+########## Ollama Settings
+# No need to set it unless you want to use your own proxy
+ollama_base_url = ""
+# Check your available models at https://ollama.com/library
+ollama_model_name = ""
+
+########## OpenAI API Key
+# Get your API key at https://platform.openai.com/api-keys
+openai_api_key = ""
+# No need to set it unless you want to use your own proxy
+openai_base_url = ""
+# Check your available models at https://platform.openai.com/account/limits
+openai_model_name = "gpt-4o-mini"
+
+########## Moonshot API Key
+# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
+moonshot_api_key = ""
+moonshot_base_url = "https://api.moonshot.cn/v1"
+moonshot_model_name = "moonshot-v1-8k"
+
+########## OneAPI API Key
+# Visit https://github.com/songquanpeng/one-api to get your API key
+oneapi_api_key = ""
+oneapi_base_url = ""
+oneapi_model_name = ""
+
+########## G4F
+# Visit https://github.com/xtekky/gpt4free to get more details
+# Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
+g4f_model_name = "gpt-3.5-turbo"
+
+########## Azure API Key
+# Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
+# API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
+azure_api_key = ""
+azure_base_url = ""
+azure_model_name = "gpt-35-turbo"        # replace with your model deployment name
+azure_api_version = "2024-02-15-preview"
+
+########## Gemini API Key
+gemini_api_key = ""
+gemini_model_name = "gemini-1.0-pro"
+
+########## Qwen API Key
+# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
+# Visit below links to get more details
+# https://tongyi.aliyun.com/qianwen/
+# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
+qwen_api_key = ""
+qwen_model_name = "qwen-max"
+
+
+########## DeepSeek API Key
+# Visit https://platform.deepseek.com/api_keys to get your API key
+deepseek_api_key = ""
+deepseek_base_url = "https://api.deepseek.com"
+deepseek_model_name = "deepseek-chat"
+
+# Subtitle Provider, "edge" or "whisper"
+# If empty, the subtitle will not be generated
+subtitle_provider = "edge"
+
+#
+# ImageMagick
+#
+# Once you have installed it, ImageMagick will be automatically detected, except on Windows!
+# On Windows, for example "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
+# Download from https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
+
+# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
+
+
+#
+# FFMPEG
+#
+# 通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
+# 但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
+#   RuntimeError: No ffmpeg exe could be found.
+#   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path，下载地址：https://www.gyan.dev/ffmpeg/builds/
+
+# Under normal circumstances, ffmpeg is downloaded automatically and detected automatically.
+# However, if there is an issue with your environment that prevents automatic downloading, you might encounter the following error:
+#   RuntimeError: No ffmpeg exe could be found.
+#   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
+# In such cases, you can manually download ffmpeg and set the ffmpeg_path, download link: https://www.gyan.dev/ffmpeg/builds/
+
+# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
+#########################################################################################
+
+# 当视频生成成功后，API服务提供的视频下载接入点，默认为当前服务的地址和监听端口
+# 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# 如果你需要使用域名对外提供服务（一般会用nginx做代理），则可以设置为你的域名
+# 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# endpoint="https://xxxx.com"
+
+# When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
+# For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
+# For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
+# endpoint="https://xxxx.com"
+endpoint = ""
+
+
+# Video material storage location
+# material_directory = ""                    # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
+# material_directory = "/user/harry/videos"  # Indicates that video materials will be downloaded to a specified folder
+# material_directory = "task"                # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
+
+# 视频素材存放位置
+# material_directory = ""                    #表示将视频素材下载到默认的文件夹，默认文件夹为当前项目下的 ./storage/cache_videos
+# material_directory = "/user/harry/videos"  #表示将视频素材下载到指定的文件夹中
+# material_directory = "task"                #表示将视频素材下载到当前任务的文件夹中，这种方式无法共享已经下载的视频素材
+
+material_directory = ""
+
+# Used for state management of the task
+enable_redis = false
+redis_host = "localhost"
+redis_port = 6379
+redis_db = 0
+redis_password = ""
+
+# 文生视频时的最大并发任务数
+max_concurrent_tasks = 5
+
+
+[whisper]
+# Only effective when subtitle_provider is "whisper"
+
+# Run on GPU with FP16
+# model = WhisperModel(model_size, device="cuda", compute_type="float16")
+
+# Run on GPU with INT8
+# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
+
+# Run on CPU with INT8
+# model = WhisperModel(model_size, device="cpu", compute_type="int8")
+
+# recommended model_size: "large-v3"
+model_size = "large-v3"
+# if you want to use GPU, set device="cuda"
+device = "CPU"
+compute_type = "int8"
+
+
+[proxy]
+### Use a proxy to access the Pexels API
+### Format: "http://<username>:<password>@<proxy>:<port>"
+### Example: "http://user:pass@proxy:1234"
+### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
+
+# http = "http://10.10.1.10:3128"
+# https = "http://10.10.1.10:1080"
+
+[azure]
+# Azure Speech API Key
+# Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
+speech_key = ""
+speech_region = ""
+
+[siliconflow]
+# SiliconFlow API Key
+# Get your API key at https://siliconflow.cn
+api_key = ""
+
+[ui]
+# UI related settings
+# 是否隐藏日志信息
+# Whether to hide logs in the UI
+hide_log = false
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..acad46263535796b11ed3f3423309f52fe3eaed0
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,24 @@
+x-common-volumes: &common-volumes
+  - ./:/MoneyPrinterTurbo
+
+services:
+  webui:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: "moneyprinterturbo-webui"
+    ports:
+      - "8501:8501"
+    command: [ "streamlit", "run", "./webui/Main.py","--browser.serverAddress=127.0.0.1","--server.enableCORS=True","--browser.gatherUsageStats=False" ]
+    volumes: *common-volumes
+    restart: always
+  api:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: "moneyprinterturbo-api"
+    ports:
+      - "8080:8080"
+    command: [ "python3", "main.py" ]
+    volumes: *common-volumes
+    restart: always
\ No newline at end of file
diff --git a/docs/MoneyPrinterTurbo.ipynb b/docs/MoneyPrinterTurbo.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..9ec1087b8e3595b69c44dee2a92d93453ec36469
--- /dev/null
+++ b/docs/MoneyPrinterTurbo.ipynb
@@ -0,0 +1,118 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# MoneyPrinterTurbo Setup Guide\n",
+        "\n",
+        "This notebook will guide you through the process of setting up [MoneyPrinterTurbo](https://github.com/harry0703/MoneyPrinterTurbo)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 1. Clone Repository and Install Dependencies\n",
+        "\n",
+        "First, we'll clone the repository from GitHub and install all required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "S8Eu-aQarY_B"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/harry0703/MoneyPrinterTurbo.git\n",
+        "%cd MoneyPrinterTurbo\n",
+        "!pip install -q -r requirements.txt\n",
+        "!pip install pyngrok --quiet"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 2. Configure ngrok for Remote Access\n",
+        "\n",
+        "We'll use ngrok to create a secure tunnel to expose our local Streamlit server to the internet.\n",
+        "\n",
+        "**Important**: You need to get your authentication token from the [ngrok dashboard](https://dashboard.ngrok.com/get-started/your-authtoken) to use this service."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from pyngrok import ngrok\n",
+        "\n",
+        "# Terminate any existing ngrok tunnels\n",
+        "ngrok.kill()\n",
+        "\n",
+        "# Set your authentication token\n",
+        "# Replace \"your_ngrok_auth_token\" with your actual token\n",
+        "ngrok.set_auth_token(\"your_ngrok_auth_token\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 3. Launch Application and Generate Public URL\n",
+        "\n",
+        "Now we'll start the Streamlit server and create an ngrok tunnel to make it accessible online:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "oahsIOXmwjl9",
+        "outputId": "ee23a96c-af21-4207-deb7-9fab69e0c05e"
+      },
+      "outputs": [],
+      "source": [
+        "import subprocess\n",
+        "import time\n",
+        "\n",
+        "print(\"🚀 Starting MoneyPrinterTurbo...\")\n",
+        "# Start Streamlit server on port 8501\n",
+        "streamlit_proc = subprocess.Popen([\n",
+        "    \"streamlit\", \"run\", \"./webui/Main.py\", \"--server.port=8501\"\n",
+        "])\n",
+        "\n",
+        "# Wait for the server to initialize\n",
+        "time.sleep(5)\n",
+        "\n",
+        "print(\"🌐 Creating ngrok tunnel to expose the MoneyPrinterTurbo...\")\n",
+        "public_url = ngrok.connect(8501, bind_tls=True)\n",
+        "\n",
+        "print(\"✅ Deployment complete! Access your MoneyPrinterTurbo at:\")\n",
+        "print(public_url)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/docs/api.jpg b/docs/api.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..427d721009351bb34d287fe55b554eacca545c61
--- /dev/null
+++ b/docs/api.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:faaecfd5a7581f06a6f7193917c8d3dcc84bb2ac365298fe156460f20c64958a
+size 115450
diff --git a/docs/picwish.com.jpg b/docs/picwish.com.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..498b3fdb93c0ec1272387be5bb03d9437fc66d46
--- /dev/null
+++ b/docs/picwish.com.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:066f53ea8441931adf84d6128cad91b4bc20cbb4ac519048de538ad508cc52b1
+size 151340
diff --git a/docs/picwish.jpg b/docs/picwish.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..743568e94769516f70056719c46e04d874d7ec74
--- /dev/null
+++ b/docs/picwish.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:741cad7b8b6c98c486037291ab359066d14132569d3b6562b2f3f3db6e6ce29f
+size 182738
diff --git a/docs/reccloud.cn.jpg b/docs/reccloud.cn.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b69ecdef72c2a5d7d1278d20d550a0839e81fa02
--- /dev/null
+++ b/docs/reccloud.cn.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aac9aac624f86ebcdf1fbe4f5ba73014d868ff229fb584b53dc02cd5d373435
+size 301206
diff --git a/docs/reccloud.com.jpg b/docs/reccloud.com.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3571a9e6642e3690745a7365ab911de0f35459a0
--- /dev/null
+++ b/docs/reccloud.com.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0e7ca1c3d48a2d8f3921f3dd1cf2c0a2863e28d0950dee2556dbc284b91b7a5
+size 261606
diff --git a/docs/voice-list.txt b/docs/voice-list.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4672117c65afe2b5e92cb18caf503aadf1df561d
--- /dev/null
+++ b/docs/voice-list.txt
@@ -0,0 +1,941 @@
+Name: af-ZA-AdriNeural
+Gender: Female
+
+Name: af-ZA-WillemNeural
+Gender: Male
+
+Name: am-ET-AmehaNeural
+Gender: Male
+
+Name: am-ET-MekdesNeural
+Gender: Female
+
+Name: ar-AE-FatimaNeural
+Gender: Female
+
+Name: ar-AE-HamdanNeural
+Gender: Male
+
+Name: ar-BH-AliNeural
+Gender: Male
+
+Name: ar-BH-LailaNeural
+Gender: Female
+
+Name: ar-DZ-AminaNeural
+Gender: Female
+
+Name: ar-DZ-IsmaelNeural
+Gender: Male
+
+Name: ar-EG-SalmaNeural
+Gender: Female
+
+Name: ar-EG-ShakirNeural
+Gender: Male
+
+Name: ar-IQ-BasselNeural
+Gender: Male
+
+Name: ar-IQ-RanaNeural
+Gender: Female
+
+Name: ar-JO-SanaNeural
+Gender: Female
+
+Name: ar-JO-TaimNeural
+Gender: Male
+
+Name: ar-KW-FahedNeural
+Gender: Male
+
+Name: ar-KW-NouraNeural
+Gender: Female
+
+Name: ar-LB-LaylaNeural
+Gender: Female
+
+Name: ar-LB-RamiNeural
+Gender: Male
+
+Name: ar-LY-ImanNeural
+Gender: Female
+
+Name: ar-LY-OmarNeural
+Gender: Male
+
+Name: ar-MA-JamalNeural
+Gender: Male
+
+Name: ar-MA-MounaNeural
+Gender: Female
+
+Name: ar-OM-AbdullahNeural
+Gender: Male
+
+Name: ar-OM-AyshaNeural
+Gender: Female
+
+Name: ar-QA-AmalNeural
+Gender: Female
+
+Name: ar-QA-MoazNeural
+Gender: Male
+
+Name: ar-SA-HamedNeural
+Gender: Male
+
+Name: ar-SA-ZariyahNeural
+Gender: Female
+
+Name: ar-SY-AmanyNeural
+Gender: Female
+
+Name: ar-SY-LaithNeural
+Gender: Male
+
+Name: ar-TN-HediNeural
+Gender: Male
+
+Name: ar-TN-ReemNeural
+Gender: Female
+
+Name: ar-YE-MaryamNeural
+Gender: Female
+
+Name: ar-YE-SalehNeural
+Gender: Male
+
+Name: az-AZ-BabekNeural
+Gender: Male
+
+Name: az-AZ-BanuNeural
+Gender: Female
+
+Name: bg-BG-BorislavNeural
+Gender: Male
+
+Name: bg-BG-KalinaNeural
+Gender: Female
+
+Name: bn-BD-NabanitaNeural
+Gender: Female
+
+Name: bn-BD-PradeepNeural
+Gender: Male
+
+Name: bn-IN-BashkarNeural
+Gender: Male
+
+Name: bn-IN-TanishaaNeural
+Gender: Female
+
+Name: bs-BA-GoranNeural
+Gender: Male
+
+Name: bs-BA-VesnaNeural
+Gender: Female
+
+Name: ca-ES-EnricNeural
+Gender: Male
+
+Name: ca-ES-JoanaNeural
+Gender: Female
+
+Name: cs-CZ-AntoninNeural
+Gender: Male
+
+Name: cs-CZ-VlastaNeural
+Gender: Female
+
+Name: cy-GB-AledNeural
+Gender: Male
+
+Name: cy-GB-NiaNeural
+Gender: Female
+
+Name: da-DK-ChristelNeural
+Gender: Female
+
+Name: da-DK-JeppeNeural
+Gender: Male
+
+Name: de-AT-IngridNeural
+Gender: Female
+
+Name: de-AT-JonasNeural
+Gender: Male
+
+Name: de-CH-JanNeural
+Gender: Male
+
+Name: de-CH-LeniNeural
+Gender: Female
+
+Name: de-DE-AmalaNeural
+Gender: Female
+
+Name: de-DE-ConradNeural
+Gender: Male
+
+Name: de-DE-FlorianMultilingualNeural
+Gender: Male
+
+Name: de-DE-KatjaNeural
+Gender: Female
+
+Name: de-DE-KillianNeural
+Gender: Male
+
+Name: de-DE-SeraphinaMultilingualNeural
+Gender: Female
+
+Name: el-GR-AthinaNeural
+Gender: Female
+
+Name: el-GR-NestorasNeural
+Gender: Male
+
+Name: en-AU-NatashaNeural
+Gender: Female
+
+Name: en-AU-WilliamNeural
+Gender: Male
+
+Name: en-CA-ClaraNeural
+Gender: Female
+
+Name: en-CA-LiamNeural
+Gender: Male
+
+Name: en-GB-LibbyNeural
+Gender: Female
+
+Name: en-GB-MaisieNeural
+Gender: Female
+
+Name: en-GB-RyanNeural
+Gender: Male
+
+Name: en-GB-SoniaNeural
+Gender: Female
+
+Name: en-GB-ThomasNeural
+Gender: Male
+
+Name: en-HK-SamNeural
+Gender: Male
+
+Name: en-HK-YanNeural
+Gender: Female
+
+Name: en-IE-ConnorNeural
+Gender: Male
+
+Name: en-IE-EmilyNeural
+Gender: Female
+
+Name: en-IN-NeerjaExpressiveNeural
+Gender: Female
+
+Name: en-IN-NeerjaNeural
+Gender: Female
+
+Name: en-IN-PrabhatNeural
+Gender: Male
+
+Name: en-KE-AsiliaNeural
+Gender: Female
+
+Name: en-KE-ChilembaNeural
+Gender: Male
+
+Name: en-NG-AbeoNeural
+Gender: Male
+
+Name: en-NG-EzinneNeural
+Gender: Female
+
+Name: en-NZ-MitchellNeural
+Gender: Male
+
+Name: en-NZ-MollyNeural
+Gender: Female
+
+Name: en-PH-JamesNeural
+Gender: Male
+
+Name: en-PH-RosaNeural
+Gender: Female
+
+Name: en-SG-LunaNeural
+Gender: Female
+
+Name: en-SG-WayneNeural
+Gender: Male
+
+Name: en-TZ-ElimuNeural
+Gender: Male
+
+Name: en-TZ-ImaniNeural
+Gender: Female
+
+Name: en-US-AnaNeural
+Gender: Female
+
+Name: en-US-AndrewNeural
+Gender: Male
+
+Name: en-US-AriaNeural
+Gender: Female
+
+Name: en-US-AvaNeural
+Gender: Female
+
+Name: en-US-BrianNeural
+Gender: Male
+
+Name: en-US-ChristopherNeural
+Gender: Male
+
+Name: en-US-EmmaNeural
+Gender: Female
+
+Name: en-US-EricNeural
+Gender: Male
+
+Name: en-US-GuyNeural
+Gender: Male
+
+Name: en-US-JennyNeural
+Gender: Female
+
+Name: en-US-MichelleNeural
+Gender: Female
+
+Name: en-US-RogerNeural
+Gender: Male
+
+Name: en-US-SteffanNeural
+Gender: Male
+
+Name: en-ZA-LeahNeural
+Gender: Female
+
+Name: en-ZA-LukeNeural
+Gender: Male
+
+Name: es-AR-ElenaNeural
+Gender: Female
+
+Name: es-AR-TomasNeural
+Gender: Male
+
+Name: es-BO-MarceloNeural
+Gender: Male
+
+Name: es-BO-SofiaNeural
+Gender: Female
+
+Name: es-CL-CatalinaNeural
+Gender: Female
+
+Name: es-CL-LorenzoNeural
+Gender: Male
+
+Name: es-CO-GonzaloNeural
+Gender: Male
+
+Name: es-CO-SalomeNeural
+Gender: Female
+
+Name: es-CR-JuanNeural
+Gender: Male
+
+Name: es-CR-MariaNeural
+Gender: Female
+
+Name: es-CU-BelkysNeural
+Gender: Female
+
+Name: es-CU-ManuelNeural
+Gender: Male
+
+Name: es-DO-EmilioNeural
+Gender: Male
+
+Name: es-DO-RamonaNeural
+Gender: Female
+
+Name: es-EC-AndreaNeural
+Gender: Female
+
+Name: es-EC-LuisNeural
+Gender: Male
+
+Name: es-ES-AlvaroNeural
+Gender: Male
+
+Name: es-ES-ElviraNeural
+Gender: Female
+
+Name: es-ES-XimenaNeural
+Gender: Female
+
+Name: es-GQ-JavierNeural
+Gender: Male
+
+Name: es-GQ-TeresaNeural
+Gender: Female
+
+Name: es-GT-AndresNeural
+Gender: Male
+
+Name: es-GT-MartaNeural
+Gender: Female
+
+Name: es-HN-CarlosNeural
+Gender: Male
+
+Name: es-HN-KarlaNeural
+Gender: Female
+
+Name: es-MX-DaliaNeural
+Gender: Female
+
+Name: es-MX-JorgeNeural
+Gender: Male
+
+Name: es-NI-FedericoNeural
+Gender: Male
+
+Name: es-NI-YolandaNeural
+Gender: Female
+
+Name: es-PA-MargaritaNeural
+Gender: Female
+
+Name: es-PA-RobertoNeural
+Gender: Male
+
+Name: es-PE-AlexNeural
+Gender: Male
+
+Name: es-PE-CamilaNeural
+Gender: Female
+
+Name: es-PR-KarinaNeural
+Gender: Female
+
+Name: es-PR-VictorNeural
+Gender: Male
+
+Name: es-PY-MarioNeural
+Gender: Male
+
+Name: es-PY-TaniaNeural
+Gender: Female
+
+Name: es-SV-LorenaNeural
+Gender: Female
+
+Name: es-SV-RodrigoNeural
+Gender: Male
+
+Name: es-US-AlonsoNeural
+Gender: Male
+
+Name: es-US-PalomaNeural
+Gender: Female
+
+Name: es-UY-MateoNeural
+Gender: Male
+
+Name: es-UY-ValentinaNeural
+Gender: Female
+
+Name: es-VE-PaolaNeural
+Gender: Female
+
+Name: es-VE-SebastianNeural
+Gender: Male
+
+Name: et-EE-AnuNeural
+Gender: Female
+
+Name: et-EE-KertNeural
+Gender: Male
+
+Name: fa-IR-DilaraNeural
+Gender: Female
+
+Name: fa-IR-FaridNeural
+Gender: Male
+
+Name: fi-FI-HarriNeural
+Gender: Male
+
+Name: fi-FI-NooraNeural
+Gender: Female
+
+Name: fil-PH-AngeloNeural
+Gender: Male
+
+Name: fil-PH-BlessicaNeural
+Gender: Female
+
+Name: fr-BE-CharlineNeural
+Gender: Female
+
+Name: fr-BE-GerardNeural
+Gender: Male
+
+Name: fr-CA-AntoineNeural
+Gender: Male
+
+Name: fr-CA-JeanNeural
+Gender: Male
+
+Name: fr-CA-SylvieNeural
+Gender: Female
+
+Name: fr-CA-ThierryNeural
+Gender: Male
+
+Name: fr-CH-ArianeNeural
+Gender: Female
+
+Name: fr-CH-FabriceNeural
+Gender: Male
+
+Name: fr-FR-DeniseNeural
+Gender: Female
+
+Name: fr-FR-EloiseNeural
+Gender: Female
+
+Name: fr-FR-HenriNeural
+Gender: Male
+
+Name: fr-FR-RemyMultilingualNeural
+Gender: Male
+
+Name: fr-FR-VivienneMultilingualNeural
+Gender: Female
+
+Name: ga-IE-ColmNeural
+Gender: Male
+
+Name: ga-IE-OrlaNeural
+Gender: Female
+
+Name: gl-ES-RoiNeural
+Gender: Male
+
+Name: gl-ES-SabelaNeural
+Gender: Female
+
+Name: gu-IN-DhwaniNeural
+Gender: Female
+
+Name: gu-IN-NiranjanNeural
+Gender: Male
+
+Name: he-IL-AvriNeural
+Gender: Male
+
+Name: he-IL-HilaNeural
+Gender: Female
+
+Name: hi-IN-MadhurNeural
+Gender: Male
+
+Name: hi-IN-SwaraNeural
+Gender: Female
+
+Name: hr-HR-GabrijelaNeural
+Gender: Female
+
+Name: hr-HR-SreckoNeural
+Gender: Male
+
+Name: hu-HU-NoemiNeural
+Gender: Female
+
+Name: hu-HU-TamasNeural
+Gender: Male
+
+Name: id-ID-ArdiNeural
+Gender: Male
+
+Name: id-ID-GadisNeural
+Gender: Female
+
+Name: is-IS-GudrunNeural
+Gender: Female
+
+Name: is-IS-GunnarNeural
+Gender: Male
+
+Name: it-IT-DiegoNeural
+Gender: Male
+
+Name: it-IT-ElsaNeural
+Gender: Female
+
+Name: it-IT-GiuseppeNeural
+Gender: Male
+
+Name: it-IT-IsabellaNeural
+Gender: Female
+
+Name: ja-JP-KeitaNeural
+Gender: Male
+
+Name: ja-JP-NanamiNeural
+Gender: Female
+
+Name: jv-ID-DimasNeural
+Gender: Male
+
+Name: jv-ID-SitiNeural
+Gender: Female
+
+Name: ka-GE-EkaNeural
+Gender: Female
+
+Name: ka-GE-GiorgiNeural
+Gender: Male
+
+Name: kk-KZ-AigulNeural
+Gender: Female
+
+Name: kk-KZ-DauletNeural
+Gender: Male
+
+Name: km-KH-PisethNeural
+Gender: Male
+
+Name: km-KH-SreymomNeural
+Gender: Female
+
+Name: kn-IN-GaganNeural
+Gender: Male
+
+Name: kn-IN-SapnaNeural
+Gender: Female
+
+Name: ko-KR-HyunsuNeural
+Gender: Male
+
+Name: ko-KR-InJoonNeural
+Gender: Male
+
+Name: ko-KR-SunHiNeural
+Gender: Female
+
+Name: lo-LA-ChanthavongNeural
+Gender: Male
+
+Name: lo-LA-KeomanyNeural
+Gender: Female
+
+Name: lt-LT-LeonasNeural
+Gender: Male
+
+Name: lt-LT-OnaNeural
+Gender: Female
+
+Name: lv-LV-EveritaNeural
+Gender: Female
+
+Name: lv-LV-NilsNeural
+Gender: Male
+
+Name: mk-MK-AleksandarNeural
+Gender: Male
+
+Name: mk-MK-MarijaNeural
+Gender: Female
+
+Name: ml-IN-MidhunNeural
+Gender: Male
+
+Name: ml-IN-SobhanaNeural
+Gender: Female
+
+Name: mn-MN-BataaNeural
+Gender: Male
+
+Name: mn-MN-YesuiNeural
+Gender: Female
+
+Name: mr-IN-AarohiNeural
+Gender: Female
+
+Name: mr-IN-ManoharNeural
+Gender: Male
+
+Name: ms-MY-OsmanNeural
+Gender: Male
+
+Name: ms-MY-YasminNeural
+Gender: Female
+
+Name: mt-MT-GraceNeural
+Gender: Female
+
+Name: mt-MT-JosephNeural
+Gender: Male
+
+Name: my-MM-NilarNeural
+Gender: Female
+
+Name: my-MM-ThihaNeural
+Gender: Male
+
+Name: nb-NO-FinnNeural
+Gender: Male
+
+Name: nb-NO-PernilleNeural
+Gender: Female
+
+Name: ne-NP-HemkalaNeural
+Gender: Female
+
+Name: ne-NP-SagarNeural
+Gender: Male
+
+Name: nl-BE-ArnaudNeural
+Gender: Male
+
+Name: nl-BE-DenaNeural
+Gender: Female
+
+Name: nl-NL-ColetteNeural
+Gender: Female
+
+Name: nl-NL-FennaNeural
+Gender: Female
+
+Name: nl-NL-MaartenNeural
+Gender: Male
+
+Name: pl-PL-MarekNeural
+Gender: Male
+
+Name: pl-PL-ZofiaNeural
+Gender: Female
+
+Name: ps-AF-GulNawazNeural
+Gender: Male
+
+Name: ps-AF-LatifaNeural
+Gender: Female
+
+Name: pt-BR-AntonioNeural
+Gender: Male
+
+Name: pt-BR-FranciscaNeural
+Gender: Female
+
+Name: pt-BR-ThalitaNeural
+Gender: Female
+
+Name: pt-PT-DuarteNeural
+Gender: Male
+
+Name: pt-PT-RaquelNeural
+Gender: Female
+
+Name: ro-RO-AlinaNeural
+Gender: Female
+
+Name: ro-RO-EmilNeural
+Gender: Male
+
+Name: ru-RU-DmitryNeural
+Gender: Male
+
+Name: ru-RU-SvetlanaNeural
+Gender: Female
+
+Name: si-LK-SameeraNeural
+Gender: Male
+
+Name: si-LK-ThiliniNeural
+Gender: Female
+
+Name: sk-SK-LukasNeural
+Gender: Male
+
+Name: sk-SK-ViktoriaNeural
+Gender: Female
+
+Name: sl-SI-PetraNeural
+Gender: Female
+
+Name: sl-SI-RokNeural
+Gender: Male
+
+Name: so-SO-MuuseNeural
+Gender: Male
+
+Name: so-SO-UbaxNeural
+Gender: Female
+
+Name: sq-AL-AnilaNeural
+Gender: Female
+
+Name: sq-AL-IlirNeural
+Gender: Male
+
+Name: sr-RS-NicholasNeural
+Gender: Male
+
+Name: sr-RS-SophieNeural
+Gender: Female
+
+Name: su-ID-JajangNeural
+Gender: Male
+
+Name: su-ID-TutiNeural
+Gender: Female
+
+Name: sv-SE-MattiasNeural
+Gender: Male
+
+Name: sv-SE-SofieNeural
+Gender: Female
+
+Name: sw-KE-RafikiNeural
+Gender: Male
+
+Name: sw-KE-ZuriNeural
+Gender: Female
+
+Name: sw-TZ-DaudiNeural
+Gender: Male
+
+Name: sw-TZ-RehemaNeural
+Gender: Female
+
+Name: ta-IN-PallaviNeural
+Gender: Female
+
+Name: ta-IN-ValluvarNeural
+Gender: Male
+
+Name: ta-LK-KumarNeural
+Gender: Male
+
+Name: ta-LK-SaranyaNeural
+Gender: Female
+
+Name: ta-MY-KaniNeural
+Gender: Female
+
+Name: ta-MY-SuryaNeural
+Gender: Male
+
+Name: ta-SG-AnbuNeural
+Gender: Male
+
+Name: ta-SG-VenbaNeural
+Gender: Female
+
+Name: te-IN-MohanNeural
+Gender: Male
+
+Name: te-IN-ShrutiNeural
+Gender: Female
+
+Name: th-TH-NiwatNeural
+Gender: Male
+
+Name: th-TH-PremwadeeNeural
+Gender: Female
+
+Name: tr-TR-AhmetNeural
+Gender: Male
+
+Name: tr-TR-EmelNeural
+Gender: Female
+
+Name: uk-UA-OstapNeural
+Gender: Male
+
+Name: uk-UA-PolinaNeural
+Gender: Female
+
+Name: ur-IN-GulNeural
+Gender: Female
+
+Name: ur-IN-SalmanNeural
+Gender: Male
+
+Name: ur-PK-AsadNeural
+Gender: Male
+
+Name: ur-PK-UzmaNeural
+Gender: Female
+
+Name: uz-UZ-MadinaNeural
+Gender: Female
+
+Name: uz-UZ-SardorNeural
+Gender: Male
+
+Name: vi-VN-HoaiMyNeural
+Gender: Female
+
+Name: vi-VN-NamMinhNeural
+Gender: Male
+
+Name: zh-CN-XiaoxiaoNeural
+Gender: Female
+
+Name: zh-CN-XiaoyiNeural
+Gender: Female
+
+Name: zh-CN-YunjianNeural
+Gender: Male
+
+Name: zh-CN-YunxiNeural
+Gender: Male
+
+Name: zh-CN-YunxiaNeural
+Gender: Male
+
+Name: zh-CN-YunyangNeural
+Gender: Male
+
+Name: zh-CN-liaoning-XiaobeiNeural
+Gender: Female
+
+Name: zh-CN-shaanxi-XiaoniNeural
+Gender: Female
+
+Name: zh-HK-HiuGaaiNeural
+Gender: Female
+
+Name: zh-HK-HiuMaanNeural
+Gender: Female
+
+Name: zh-HK-WanLungNeural
+Gender: Male
+
+Name: zh-TW-HsiaoChenNeural
+Gender: Female
+
+Name: zh-TW-HsiaoYuNeural
+Gender: Female
+
+Name: zh-TW-YunJheNeural
+Gender: Male
+
+Name: zu-ZA-ThandoNeural
+Gender: Female
+
+Name: zu-ZA-ThembaNeural
+Gender: Male
diff --git a/docs/webui-en.jpg b/docs/webui-en.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..82988f50ee7c9479b4657489a6197ccb59a1980a
--- /dev/null
+++ b/docs/webui-en.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e4f07eaab5419bc2847742d5b79719746a72aba79c1400bdc676a60bb8792f
+size 683144
diff --git a/docs/webui.jpg b/docs/webui.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7ef8e56a650a26cfaa68b7248ade21533b2a4233
--- /dev/null
+++ b/docs/webui.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05cfe7b638b347c70f09d5838dc32e38b2f962d68ad26f8128ab9ac16f62a8d7
+size 670211
diff --git a/main.py b/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..e84f32ba6c7d9d9db17e4f805abc6a43bbde28f4
--- /dev/null
+++ b/main.py
@@ -0,0 +1,16 @@
+import uvicorn
+from loguru import logger
+
+from app.config import config
+
+if __name__ == "__main__":
+    logger.info(
+        "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
+    )
+    uvicorn.run(
+        app="app.asgi:app",
+        host=config.listen_host,
+        port=config.listen_port,
+        reload=config.reload_debug,
+        log_level="warning",
+    )
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1731f699c4db9c9671b0a95ea9738d15693d74b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,16 @@
+moviepy==2.1.2
+streamlit==1.45.0
+edge_tts==6.1.19
+fastapi==0.115.6
+uvicorn==0.32.1
+openai==1.56.1
+faster-whisper==1.1.0
+loguru==0.7.3
+google.generativeai==0.8.3
+dashscope==1.20.14
+g4f==0.5.2.2
+azure-cognitiveservices-speech==1.41.1
+redis==5.2.0
+python-multipart==0.0.19
+pyyaml
+requests>=2.31.0
diff --git a/resource/fonts/Charm-Bold.ttf b/resource/fonts/Charm-Bold.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..268c52af009aac1679d7e1a7bfb4a09955309a4d
--- /dev/null
+++ b/resource/fonts/Charm-Bold.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7b614c116724be24140c25f71a218be04cd7f0c32c33423aa0571963a0027eb
+size 135332
diff --git a/resource/fonts/Charm-Regular.ttf b/resource/fonts/Charm-Regular.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..89da2aeddbf2ca8e2034437ffb5c08599067c0a9
--- /dev/null
+++ b/resource/fonts/Charm-Regular.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3df8c7ae07f5d8cde91b4a033f4d281c0b9f3014f00c53644a11907b0ad08f6
+size 134560
diff --git a/resource/fonts/MicrosoftYaHeiBold.ttc b/resource/fonts/MicrosoftYaHeiBold.ttc
new file mode 100644
index 0000000000000000000000000000000000000000..2416616845f1ce94c9f6b56aa31117698d678dcf
--- /dev/null
+++ b/resource/fonts/MicrosoftYaHeiBold.ttc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:519309b7ab0479c4dc3ace5e291de5a8702175be5586e165bc810267bd4619a5
+size 16880832
diff --git a/resource/fonts/MicrosoftYaHeiNormal.ttc b/resource/fonts/MicrosoftYaHeiNormal.ttc
new file mode 100644
index 0000000000000000000000000000000000000000..7cd8e2c2258692620b025a70eec5744e6e3c31d0
--- /dev/null
+++ b/resource/fonts/MicrosoftYaHeiNormal.ttc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3084f1f88369af6bf9989c909024164d953d1e38d08734f05f28ef24b2f9d577
+size 19701556
diff --git a/resource/fonts/STHeitiLight.ttc b/resource/fonts/STHeitiLight.ttc
new file mode 100644
index 0000000000000000000000000000000000000000..b64d2b9888be8b2234dc7a3cf1ba84af9bab3e86
--- /dev/null
+++ b/resource/fonts/STHeitiLight.ttc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a57b0316cc0544f682b8fb9855e14ade79ae77340ef6a01ba9210e25b4c5a5b7
+size 55783456
diff --git a/resource/fonts/STHeitiMedium.ttc b/resource/fonts/STHeitiMedium.ttc
new file mode 100644
index 0000000000000000000000000000000000000000..fbd8d2cf6b093eaf852ccd97b9d6c5069690e0cb
--- /dev/null
+++ b/resource/fonts/STHeitiMedium.ttc
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8fa4a63e2cf500e98e64d4c73260daaba049306cf85dec9e3729bc285b7d645
+size 55754164
diff --git a/resource/fonts/UTM Kabel KT.ttf b/resource/fonts/UTM Kabel KT.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..399bd07dde4a9961244db6f4c57ab2bf3e96d7e6
Binary files /dev/null and b/resource/fonts/UTM Kabel KT.ttf differ
diff --git a/resource/public/index.html b/resource/public/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..45e8037e70dd8a6e625c21c29e7e3984f9caa365
--- /dev/null
+++ b/resource/public/index.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>MoneyPrinterTurbo</title>
+</head>
+<body>
+<h1>MoneyPrinterTurbo</h1>
+<a href="https://github.com/harry0703/MoneyPrinterTurbo">https://github.com/harry0703/MoneyPrinterTurbo</a>
+<p>
+    只需提供一个视频 主题 或 关键词 ，就可以全自动生成视频文案、视频素材、视频字幕、视频背景音乐，然后合成一个高清的短视频。
+</p>
+
+<p>
+    Simply provide a topic or keyword for a video, and it will automatically generate the video copy, video materials,
+    video subtitles, and video background music before synthesizing a high-definition short video.
+</p>
+</body>
+</html>
\ No newline at end of file
diff --git a/resource/songs/output000.mp3 b/resource/songs/output000.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..f9c65912d112a7fa6a4955532026c9f6e760786a
--- /dev/null
+++ b/resource/songs/output000.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9bae80da9326e87d4e0c96b08f0f1ee4911c1697ef9b5237ef51351a44453e49
+size 2249517
diff --git a/resource/songs/output001.mp3 b/resource/songs/output001.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..12bf645d4a54f07efb58523116cd65d354cb40fd
--- /dev/null
+++ b/resource/songs/output001.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcdfed1366cfbcf146b99c9f303b8d699ff78e78b0d2b2a406ef07a0a4894741
+size 2091189
diff --git a/resource/songs/output002.mp3 b/resource/songs/output002.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..e90f3888ac23dbbec6491c0c63efca5bdf0fb658
--- /dev/null
+++ b/resource/songs/output002.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31d03c64eac3c3c5417d33851372013c7ef15ca6780767618166f87c186ee9ee
+size 1875813
diff --git a/resource/songs/output003.mp3 b/resource/songs/output003.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..cc7a2b68b3c9a6f8d2b8b59e438ac6ce0c217fb3
--- /dev/null
+++ b/resource/songs/output003.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dceba4a2c5fcf7d0f75e838391bc2e775c4d14f945ac7a4df38fb61ffd035e31
+size 1936821
diff --git a/resource/songs/output004.mp3 b/resource/songs/output004.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..9f3b6e15fe90217d76623380e8ca626d095c5811
--- /dev/null
+++ b/resource/songs/output004.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5673e7f136905078ae9e618bc6bea7f1fa1c7b4614f9d205c4a38698d9e9f5a7
+size 2263293
diff --git a/resource/songs/output005.mp3 b/resource/songs/output005.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..33aa3f3f6ff2720a2b63084e1bac1d2c20e7cd13
--- /dev/null
+++ b/resource/songs/output005.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf9d13168502a8f390dc5b1802aec4bab13ad3bffc121c54e5c0fd573d28defb
+size 1974765
diff --git a/resource/songs/output006.mp3 b/resource/songs/output006.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..3f9744b8246beffb927ffde4df511dbeb1a2659e
--- /dev/null
+++ b/resource/songs/output006.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b311af495773569f035ed5668e6e5330c927cbcdb8fd26986ce45c7a69e32783
+size 1889685
diff --git a/resource/songs/output007.mp3 b/resource/songs/output007.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..3fdc2fd3ebb79b1babd38347b3e4a5d462a826d0
--- /dev/null
+++ b/resource/songs/output007.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a77ca1bebde4904aee56b30d97745842b0472488b13457aa270154a8c09ba446
+size 2024157
diff --git a/resource/songs/output008.mp3 b/resource/songs/output008.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..20b343a5590bdb6b2db72ded846a723a8370792c
--- /dev/null
+++ b/resource/songs/output008.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc288f070b9c27fb1e4f332e8f57b63290e6ac31f355059e695e565184f47f3c
+size 2194341
diff --git a/resource/songs/output009.mp3 b/resource/songs/output009.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..6423d0ae7bff96ff1867196ebc3a93583d3b284c
--- /dev/null
+++ b/resource/songs/output009.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99103c97bd4647683ec2115ee2177a3e83ddd9466fddb96af430d3958fdf66c8
+size 1944621
diff --git a/resource/songs/output010.mp3 b/resource/songs/output010.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..047fb1c732a3bb2a3b9cbcba8b8e516b28d6969b
--- /dev/null
+++ b/resource/songs/output010.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:711720c4dca1b8db0128c8c4fd62adb1db1cb00709f276f639483e6ec0ef09a7
+size 1896093
diff --git a/resource/songs/output011.mp3 b/resource/songs/output011.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..88151c1ed10f1197a57fd4c12009eba690ef783b
--- /dev/null
+++ b/resource/songs/output011.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:060d34b687f4be9f7a364a2fe0135e977af4c3a8820a176b98e9417a63d4bf93
+size 2126445
diff --git a/resource/songs/output012.mp3 b/resource/songs/output012.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..72f73755f13c1831e5c6d864f77e7439654cf933
--- /dev/null
+++ b/resource/songs/output012.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:647c4718d8110def2b63f0a1990e85d0307529f16b5ad04d3a0cf8586e6f9f7a
+size 2123949
diff --git a/resource/songs/output013.mp3 b/resource/songs/output013.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..06b7c7b349d09df079f465600355b875e59d7114
--- /dev/null
+++ b/resource/songs/output013.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a323b2b618b65f3ee08364551d35df29ebd0eb6267b0976c559147f453ab55b5
+size 1891269
diff --git a/resource/songs/output014.mp3 b/resource/songs/output014.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..4ca6eac1777624c5346c1af4e945c19c3ac87009
--- /dev/null
+++ b/resource/songs/output014.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90c8e9f381d31d14ff68bb8889cf7cb61e9db0b58904074645d70c207a5e4206
+size 1899213
diff --git a/resource/songs/output015.mp3 b/resource/songs/output015.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..6f5685e1c1fa9bc23f1ffe0342c7a9c135eeed95
--- /dev/null
+++ b/resource/songs/output015.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9a2b3c6ec47e3e4cb53a0e36515bb41b83153cf5bd06625c634d724a0ef96c1
+size 2252637
diff --git a/resource/songs/output016.mp3 b/resource/songs/output016.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..a7082ecb64fe5d2b028fb715404a6823680f69d0
--- /dev/null
+++ b/resource/songs/output016.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74c808294b89c349911a2781e33e83e3f37855c82bea831da1ba570d092fce98
+size 2027517
diff --git a/resource/songs/output017.mp3 b/resource/songs/output017.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..537763c360103b2be761f3c9b6d84efbcfa27650
--- /dev/null
+++ b/resource/songs/output017.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53d3681058a3b3d2fec4a00e19e9b6eaee368538d815c3d68a503f2b94fd220e
+size 1895253
diff --git a/resource/songs/output018.mp3 b/resource/songs/output018.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..f31619af000741033197034a7aac56770641a87b
--- /dev/null
+++ b/resource/songs/output018.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4224198e1286869b8ac93deb988540a69878b0982e2d2d9b151eb73f386aca01
+size 1980285
diff --git a/resource/songs/output019.mp3 b/resource/songs/output019.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..8dede74b8f5f21c198e0ef0a04cf780384aec8ec
--- /dev/null
+++ b/resource/songs/output019.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eed14b0acd9e5e29f194443d8b960713c74a1bf1907825852af65262798adec2
+size 2224917
diff --git a/resource/songs/output020.mp3 b/resource/songs/output020.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..dfa8f31370ed59fc59517402d231fe09c519c0f9
--- /dev/null
+++ b/resource/songs/output020.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:345f6bd4ab0cbe93840e13c4a26042693b0aa54233ca47c22dbcb414910b3e55
+size 1979037
diff --git a/resource/songs/output021.mp3 b/resource/songs/output021.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..245e5e1de94714320d33bb61709feeb246776a1c
--- /dev/null
+++ b/resource/songs/output021.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:226898a0c9ff75527687e95367e57dd14d1b35e241f5d756b4c0ba63e9d55bdf
+size 1883781
diff --git a/resource/songs/output022.mp3 b/resource/songs/output022.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..6fd88124b5b7bf445efd89dbca24d657ec4c02b8
--- /dev/null
+++ b/resource/songs/output022.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03fae7a69a5e4b47d807cf81a03cc7dbcc7a0de9e7240ca23853e9eafe483baa
+size 2069589
diff --git a/resource/songs/output023.mp3 b/resource/songs/output023.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..8dae3367631e2bfb1fcff118b18faf559d01fc80
--- /dev/null
+++ b/resource/songs/output023.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92b1649064d9291cd55d7403329053cda6b70e2fe43a5c4433cb27b55b32a434
+size 2166117
diff --git a/resource/songs/output024.mp3 b/resource/songs/output024.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..bea4e83dad2b0c2b971dc9d23ada00493246670c
--- /dev/null
+++ b/resource/songs/output024.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2982fe163dacddbfb4d278b626e56eaa33b97460de603ac0c3639388dd82d13b
+size 1931205
diff --git a/resource/songs/output025.mp3 b/resource/songs/output025.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..cf73a6e6a41c5b93ddfdb9e565632dd597a1ec14
--- /dev/null
+++ b/resource/songs/output025.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfe361932841c7cd37ef4794ed17961533e2cbf8b387f237a7614d24265fcc00
+size 1861821
diff --git a/resource/songs/output027.mp3 b/resource/songs/output027.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..a6936c70b2b0ab227292bbbf5bec15a2aa2e2ee4
--- /dev/null
+++ b/resource/songs/output027.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b07290cdb8396d7586aef5583acf1f67a899b905adad6be88bdce9dc6039094f
+size 2096253
diff --git a/resource/songs/output028.mp3 b/resource/songs/output028.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..aaba1f92c3a4e8b2c6327042f38dacb5372eaf09
--- /dev/null
+++ b/resource/songs/output028.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:563c7e8a294b53c58e124bfd5d3ff9fa740f89a7a608600751603cb00d99a6f6
+size 1877901
diff --git a/resource/songs/output029.mp3 b/resource/songs/output029.mp3
new file mode 100644
index 0000000000000000000000000000000000000000..d64b5ef54ef5bc2ad3425b80044c0bb56d68e7b7
--- /dev/null
+++ b/resource/songs/output029.mp3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebd57c86be424c3e958ceed39f431cec05ae79ef90190fb1af43ae73c37f8deb
+size 1361637
diff --git a/test/README.md b/test/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1deb3a9aa702a2bf043115d00511f97152a38b42
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,40 @@
+# MoneyPrinterTurbo Test Directory
+
+This directory contains unit tests for the **MoneyPrinterTurbo** project.
+
+## Directory Structure
+
+- `services/`: Tests for components in the `app/services` directory  
+  - `test_video.py`: Tests for the video service  
+  - `test_task.py`: Tests for the task service  
+  - `test_voice.py`: Tests for the voice service  
+
+## Running Tests
+
+You can run the tests using Python’s built-in `unittest` framework:
+
+```bash
+# Run all tests
+python -m unittest discover -s test
+
+# Run a specific test file
+python -m unittest test/services/test_video.py
+
+# Run a specific test class
+python -m unittest test.services.test_video.TestVideoService
+
+# Run a specific test method
+python -m unittest test.services.test_video.TestVideoService.test_preprocess_video
+````
+
+## Adding New Tests
+
+To add tests for other components, follow these guidelines:
+
+1. Create test files prefixed with `test_` in the appropriate subdirectory
+2. Use `unittest.TestCase` as the base class for your test classes
+3. Name test methods with the `test_` prefix
+
+## Test Resources
+
+Place any resource files required for testing in the `test/resources` directory.
\ No newline at end of file
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..584e3c99ed88482481b73f5061e2560d06399802
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1 @@
+# Unit test package for test
diff --git a/test/resources/1.png b/test/resources/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c62fb6fefa2467962a9e398b1d35e44ad094f75
Binary files /dev/null and b/test/resources/1.png differ
diff --git a/test/resources/1.png.mp4 b/test/resources/1.png.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..05e351b8b34d4aca01986a7d84f9f2ec9ef40f86
Binary files /dev/null and b/test/resources/1.png.mp4 differ
diff --git a/test/resources/2.png b/test/resources/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..2d11adc8cf88d509b99166a3f56fc3b4638ba343
Binary files /dev/null and b/test/resources/2.png differ
diff --git a/test/resources/2.png.mp4 b/test/resources/2.png.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..6e622c70f02458e0af2ebdb8a7268dec9c52d5c7
Binary files /dev/null and b/test/resources/2.png.mp4 differ
diff --git a/test/resources/3.png b/test/resources/3.png
new file mode 100644
index 0000000000000000000000000000000000000000..924dcee8321b5800291fd661258f1b8854ce135d
Binary files /dev/null and b/test/resources/3.png differ
diff --git a/test/resources/3.png.mp4 b/test/resources/3.png.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..9f06ed3c3535ca529248652117c60d4c08aca097
Binary files /dev/null and b/test/resources/3.png.mp4 differ
diff --git a/test/resources/4.png b/test/resources/4.png
new file mode 100644
index 0000000000000000000000000000000000000000..9d6516d3a3c0efd5c61b9a1fe819b0b26dc78931
Binary files /dev/null and b/test/resources/4.png differ
diff --git a/test/resources/5.png b/test/resources/5.png
new file mode 100644
index 0000000000000000000000000000000000000000..fdd5925cdd5272daa56c97750d01afab9acd0321
Binary files /dev/null and b/test/resources/5.png differ
diff --git a/test/resources/6.png b/test/resources/6.png
new file mode 100644
index 0000000000000000000000000000000000000000..94c89876e0339ea9e6c63789edefe85e3f5a50fc
Binary files /dev/null and b/test/resources/6.png differ
diff --git a/test/resources/7.png b/test/resources/7.png
new file mode 100644
index 0000000000000000000000000000000000000000..54bd1cea813f120ee332ed8eed0e0aa8e0ca43b0
Binary files /dev/null and b/test/resources/7.png differ
diff --git a/test/resources/8.png b/test/resources/8.png
new file mode 100644
index 0000000000000000000000000000000000000000..a9cf7ed1d249c0de38b4edbfef90adb586161611
Binary files /dev/null and b/test/resources/8.png differ
diff --git a/test/resources/9.png b/test/resources/9.png
new file mode 100644
index 0000000000000000000000000000000000000000..c39d181f4a5a4ab2876bf5c74759e231f0818201
Binary files /dev/null and b/test/resources/9.png differ
diff --git a/test/services/__init__.py b/test/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a07aa6208e5be085d5408f79a4d7e3166ca55f1f
--- /dev/null
+++ b/test/services/__init__.py
@@ -0,0 +1 @@
+# Unit test package for services
\ No newline at end of file
diff --git a/test/services/test_task.py b/test/services/test_task.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b241a97a75d3b28630c04e513aac48772a67bb5
--- /dev/null
+++ b/test/services/test_task.py
@@ -0,0 +1,66 @@
+import unittest
+import os
+import sys
+from pathlib import Path
+
+# add project root to python path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from app.services import task as tm
+from app.models.schema import MaterialInfo, VideoParams
+
+resources_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources")
+
+class TestTaskService(unittest.TestCase):
+    def setUp(self):
+        pass
+    
+    def tearDown(self):
+        pass
+    
+    def test_task_local_materials(self):
+        task_id = "00000000-0000-0000-0000-000000000000"
+        video_materials=[]
+        for i in range(1, 4):
+            video_materials.append(MaterialInfo(
+                provider="local",
+                url=os.path.join(resources_dir, f"{i}.png"),
+                duration=0
+            ))
+
+        params = VideoParams(
+            video_subject="金钱的作用",
+            video_script="金钱不仅是交换媒介，更是社会资源的分配工具。它能满足基本生存需求，如食物和住房，也能提供教育、医疗等提升生活品质的机会。拥有足够的金钱意味着更多选择权，比如职业自由或创业可能。但金钱的作用也有边界，它无法直接购买幸福、健康或真诚的人际关系。过度追逐财富可能导致价值观扭曲，忽视精神层面的需求。理想的状态是理性看待金钱，将其作为实现目标的工具而非终极目的。",
+            video_terms="money importance, wealth and society, financial freedom, money and happiness, role of money",
+            video_aspect="9:16",
+            video_concat_mode="random",
+            video_transition_mode="None",
+            video_clip_duration=3,
+            video_count=1,
+            video_source="local",
+            video_materials=video_materials,
+            video_language="",
+            voice_name="zh-CN-XiaoxiaoNeural-Female",
+            voice_volume=1.0,
+            voice_rate=1.0,
+            bgm_type="random",
+            bgm_file="",
+            bgm_volume=0.2,
+            subtitle_enabled=True,
+            subtitle_position="bottom",
+            custom_position=70.0,
+            font_name="MicrosoftYaHeiBold.ttc",
+            text_fore_color="#FFFFFF",
+            text_background_color=True,
+            font_size=60,
+            stroke_color="#000000",
+            stroke_width=1.5,
+            n_threads=2,
+            paragraph_number=1
+        )
+        result = tm.start(task_id=task_id, params=params)
+        print(result)
+    
+
+if __name__ == "__main__":
+    unittest.main() 
\ No newline at end of file
diff --git a/test/services/test_video.py b/test/services/test_video.py
new file mode 100644
index 0000000000000000000000000000000000000000..d204acc5daae83005f6b83c28927634f8414fd10
--- /dev/null
+++ b/test/services/test_video.py
@@ -0,0 +1,85 @@
+
+import unittest
+import os
+import sys
+from pathlib import Path
+from moviepy import (
+    VideoFileClip,
+)
+# add project root to python path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from app.models.schema import MaterialInfo
+from app.services import video as vd
+from app.utils import utils
+
+resources_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources")
+
+class TestVideoService(unittest.TestCase):
+    def setUp(self):
+        self.test_img_path = os.path.join(resources_dir, "1.png")
+    
+    def tearDown(self):
+        pass
+    
+    def test_preprocess_video(self):
+        if not os.path.exists(self.test_img_path):
+            self.fail(f"test image not found: {self.test_img_path}")
+        
+        # test preprocess_video function
+        m = MaterialInfo()
+        m.url = self.test_img_path
+        m.provider = "local"
+        print(m)
+        
+        materials = vd.preprocess_video([m], clip_duration=4)
+        print(materials)
+        
+        # verify result
+        self.assertIsNotNone(materials)
+        self.assertEqual(len(materials), 1)
+        self.assertTrue(materials[0].url.endswith(".mp4"))
+        
+        # moviepy get video info
+        clip = VideoFileClip(materials[0].url)
+        print(clip)
+        
+        # clean generated test video file
+        if os.path.exists(materials[0].url):
+            os.remove(materials[0].url)
+    
+    def test_wrap_text(self):
+        """test text wrapping function"""
+        try:
+            font_path = os.path.join(utils.font_dir(), "STHeitiMedium.ttc")
+            if not os.path.exists(font_path):
+                self.fail(f"font file not found: {font_path}")
+                
+            # test english text wrapping
+            test_text_en = "This is a test text for wrapping long sentences in english language"
+            
+            wrapped_text_en, text_height_en = vd.wrap_text(
+                text=test_text_en,
+                max_width=300,
+                font=font_path,
+                fontsize=30
+            )
+            print(wrapped_text_en, text_height_en)
+            # verify text is wrapped
+            self.assertIn("\n", wrapped_text_en)
+            
+            # test chinese text wrapping
+            test_text_zh = "这是一段用来测试中文长句换行的文本内容，应该会根据宽度限制进行换行处理"
+            wrapped_text_zh, text_height_zh = vd.wrap_text(
+                text=test_text_zh,
+                max_width=300,
+                font=font_path,
+                fontsize=30
+            )   
+            print(wrapped_text_zh, text_height_zh)
+            # verify chinese text is wrapped
+            self.assertIn("\n", wrapped_text_zh)
+        except Exception as e:
+            self.fail(f"test wrap_text failed: {str(e)}")
+
+if __name__ == "__main__":
+    unittest.main() 
\ No newline at end of file
diff --git a/test/services/test_voice.py b/test/services/test_voice.py
new file mode 100644
index 0000000000000000000000000000000000000000..31f179965772482ffdd93103d98228bf02dd175d
--- /dev/null
+++ b/test/services/test_voice.py
@@ -0,0 +1,107 @@
+import asyncio
+import unittest
+import os
+import sys
+from pathlib import Path
+
+# add project root to python path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from app.utils import utils
+from app.services import voice as vs
+
+temp_dir = utils.storage_dir("temp")
+
+text_en = """
+What is the meaning of life? 
+This question has puzzled philosophers, scientists, and thinkers of all kinds for centuries. 
+Throughout history, various cultures and individuals have come up with their interpretations and beliefs around the purpose of life. 
+Some say it's to seek happiness and self-fulfillment, while others believe it's about contributing to the welfare of others and making a positive impact in the world. 
+Despite the myriad of perspectives, one thing remains clear: the meaning of life is a deeply personal concept that varies from one person to another. 
+It's an existential inquiry that encourages us to reflect on our values, desires, and the essence of our existence.
+"""
+
+text_zh = """
+预计未来3天深圳冷空气活动频繁，未来两天持续阴天有小雨，出门带好雨具；
+10-11日持续阴天有小雨，日温差小，气温在13-17℃之间，体感阴凉；
+12日天气短暂好转，早晚清凉；
+"""
+
+voice_rate=1.0
+voice_volume=1.0
+                    
+class TestVoiceService(unittest.TestCase):
+    def setUp(self):
+        self.loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self.loop)
+    
+    def tearDown(self):
+        self.loop.close()
+    
+    def test_siliconflow(self):
+        voice_name = "siliconflow:FunAudioLLM/CosyVoice2-0.5B:alex-Male"
+        voice_name = vs.parse_voice_name(voice_name)
+        
+        async def _do():
+            parts = voice_name.split(":")
+            if len(parts) >= 3:
+                model = parts[1]
+                # 移除性别后缀，例如 "alex-Male" -> "alex"
+                voice_with_gender = parts[2]
+                voice = voice_with_gender.split("-")[0]
+                # 构建完整的voice参数，格式为 "model:voice"
+                full_voice = f"{model}:{voice}"
+                voice_file = f"{temp_dir}/tts-siliconflow-{voice}.mp3"
+                subtitle_file = f"{temp_dir}/tts-siliconflow-{voice}.srt"
+                sub_maker = vs.siliconflow_tts(
+                    text=text_zh, model=model, voice=full_voice, voice_file=voice_file, voice_rate=voice_rate, voice_volume=voice_volume
+                )
+                if not sub_maker:
+                    self.fail("siliconflow tts failed")
+                vs.create_subtitle(sub_maker=sub_maker, text=text_zh, subtitle_file=subtitle_file)
+                audio_duration = vs.get_audio_duration(sub_maker)
+                print(f"voice: {voice_name}, audio duration: {audio_duration}s")
+            else:
+                self.fail("siliconflow invalid voice name")
+
+        self.loop.run_until_complete(_do())
+    
+    def test_azure_tts_v1(self):
+        voice_name = "zh-CN-XiaoyiNeural-Female"
+        voice_name = vs.parse_voice_name(voice_name)
+        print(voice_name)
+        
+        voice_file = f"{temp_dir}/tts-azure-v1-{voice_name}.mp3"
+        subtitle_file = f"{temp_dir}/tts-azure-v1-{voice_name}.srt"
+        sub_maker = vs.azure_tts_v1(
+            text=text_zh, voice_name=voice_name, voice_file=voice_file, voice_rate=voice_rate
+        )
+        if not sub_maker:
+            self.fail("azure tts v1 failed")
+        vs.create_subtitle(sub_maker=sub_maker, text=text_zh, subtitle_file=subtitle_file)
+        audio_duration = vs.get_audio_duration(sub_maker)
+        print(f"voice: {voice_name}, audio duration: {audio_duration}s")
+
+    def test_azure_tts_v2(self):
+        voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
+        voice_name = vs.parse_voice_name(voice_name)
+        print(voice_name)
+
+        async def _do():
+            voice_file = f"{temp_dir}/tts-azure-v2-{voice_name}.mp3"
+            subtitle_file = f"{temp_dir}/tts-azure-v2-{voice_name}.srt"
+            sub_maker = vs.azure_tts_v2(
+                text=text_zh, voice_name=voice_name, voice_file=voice_file
+            )
+            if not sub_maker:
+                self.fail("azure tts v2 failed")
+            vs.create_subtitle(sub_maker=sub_maker, text=text_zh, subtitle_file=subtitle_file)
+            audio_duration = vs.get_audio_duration(sub_maker)
+            print(f"voice: {voice_name}, audio duration: {audio_duration}s")
+
+        self.loop.run_until_complete(_do())
+
+if __name__ == "__main__":
+    # python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v1
+    # python -m unittest test.services.test_voice.TestVoiceService.test_azure_tts_v2
+    unittest.main() 
\ No newline at end of file
diff --git a/webui.bat b/webui.bat
new file mode 100644
index 0000000000000000000000000000000000000000..fd975146a45a946f53809ea1086d7c0659da98cb
--- /dev/null
+++ b/webui.bat
@@ -0,0 +1,7 @@
+@echo off
+set CURRENT_DIR=%CD%
+echo ***** Current directory: %CURRENT_DIR% *****
+set PYTHONPATH=%CURRENT_DIR%
+
+rem set HF_ENDPOINT=https://hf-mirror.com
+streamlit run .\webui\Main.py --browser.gatherUsageStats=False --server.enableCORS=True
\ No newline at end of file
diff --git a/webui.sh b/webui.sh
new file mode 100644
index 0000000000000000000000000000000000000000..89b30899ceaf544142cd1c5edca04985e323889e
--- /dev/null
+++ b/webui.sh
@@ -0,0 +1,8 @@
+# If you could not download the model from the official site, you can use the mirror site.
+# Just remove the comment of the following line .
+# 如果你无法从官方网站下载模型，你可以使用镜像网站。
+# 只需要移除下面一行的注释即可。
+
+# export HF_ENDPOINT=https://hf-mirror.com
+
+streamlit run ./webui/Main.py --browser.serverAddress="0.0.0.0" --server.enableCORS=True --browser.gatherUsageStats=False
\ No newline at end of file
diff --git a/webui/.streamlit/config.toml b/webui/.streamlit/config.toml
new file mode 100644
index 0000000000000000000000000000000000000000..b690b747918f881f049af4e64ce29f9479004dfc
--- /dev/null
+++ b/webui/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[browser]
+gatherUsageStats = false
\ No newline at end of file
diff --git a/webui/Main.py b/webui/Main.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b55abe2daa320d1b14f4f38a43e1bf5f8adb165
--- /dev/null
+++ b/webui/Main.py
@@ -0,0 +1,985 @@
+import os
+import platform
+import sys
+from uuid import uuid4
+
+import streamlit as st
+from loguru import logger
+
+# Add the root directory of the project to the system path to allow importing modules from the project
+root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+if root_dir not in sys.path:
+    sys.path.append(root_dir)
+    print("******** sys.path ********")
+    print(sys.path)
+    print("")
+
+from app.config import config
+from app.models.schema import (
+    MaterialInfo,
+    VideoAspect,
+    VideoConcatMode,
+    VideoParams,
+    VideoTransitionMode,
+)
+from app.services import llm, voice
+from app.services import task as tm
+from app.utils import utils
+
+st.set_page_config(
+    page_title="MoneyPrinterTurbo",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="auto",
+    menu_items={
+        "Report a bug": "https://github.com/harry0703/MoneyPrinterTurbo/issues",
+        "About": "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
+        "automatically generate the video copy, video materials, video subtitles, "
+        "and video background music before synthesizing a high-definition short "
+        "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo",
+    },
+)
+
+
+streamlit_style = """
+<style>
+h1 {
+    padding-top: 0 !important;
+}
+</style>
+"""
+st.markdown(streamlit_style, unsafe_allow_html=True)
+
+# 定义资源目录
+font_dir = os.path.join(root_dir, "resource", "fonts")
+song_dir = os.path.join(root_dir, "resource", "songs")
+i18n_dir = os.path.join(root_dir, "webui", "i18n")
+config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
+system_locale = utils.get_system_locale()
+
+
+if "video_subject" not in st.session_state:
+    st.session_state["video_subject"] = ""
+if "video_script" not in st.session_state:
+    st.session_state["video_script"] = ""
+if "video_terms" not in st.session_state:
+    st.session_state["video_terms"] = ""
+if "ui_language" not in st.session_state:
+    st.session_state["ui_language"] = config.ui.get("language", system_locale)
+
+# 加载语言文件
+locales = utils.load_locales(i18n_dir)
+
+# 创建一个顶部栏，包含标题和语言选择
+title_col, lang_col = st.columns([3, 1])
+
+with title_col:
+    st.title(f"MoneyPrinterTurbo v{config.project_version}")
+
+with lang_col:
+    display_languages = []
+    selected_index = 0
+    for i, code in enumerate(locales.keys()):
+        display_languages.append(f"{code} - {locales[code].get('Language')}")
+        if code == st.session_state.get("ui_language", ""):
+            selected_index = i
+
+    selected_language = st.selectbox(
+        "Language / 语言",
+        options=display_languages,
+        index=selected_index,
+        key="top_language_selector",
+        label_visibility="collapsed",
+    )
+    if selected_language:
+        code = selected_language.split(" - ")[0].strip()
+        st.session_state["ui_language"] = code
+        config.ui["language"] = code
+
+support_locales = [
+    "zh-CN",
+    "zh-HK",
+    "zh-TW",
+    "de-DE",
+    "en-US",
+    "fr-FR",
+    "vi-VN",
+    "th-TH",
+]
+
+
+def get_all_fonts():
+    fonts = []
+    for root, dirs, files in os.walk(font_dir):
+        for file in files:
+            if file.endswith(".ttf") or file.endswith(".ttc"):
+                fonts.append(file)
+    fonts.sort()
+    return fonts
+
+
+def get_all_songs():
+    songs = []
+    for root, dirs, files in os.walk(song_dir):
+        for file in files:
+            if file.endswith(".mp3"):
+                songs.append(file)
+    return songs
+
+
+def open_task_folder(task_id):
+    try:
+        sys = platform.system()
+        path = os.path.join(root_dir, "storage", "tasks", task_id)
+        if os.path.exists(path):
+            if sys == "Windows":
+                os.system(f"start {path}")
+            if sys == "Darwin":
+                os.system(f"open {path}")
+    except Exception as e:
+        logger.error(e)
+
+
+def scroll_to_bottom():
+    js = """
+    <script>
+        console.log("scroll_to_bottom");
+        function scroll(dummy_var_to_force_repeat_execution){
+            var sections = parent.document.querySelectorAll('section.main');
+            console.log(sections);
+            for(let index = 0; index<sections.length; index++) {
+                sections[index].scrollTop = sections[index].scrollHeight;
+            }
+        }
+        scroll(1);
+    </script>
+    """
+    st.components.v1.html(js, height=0, width=0)
+
+
+def init_log():
+    logger.remove()
+    _lvl = "DEBUG"
+
+    def format_record(record):
+        # 获取日志记录中的文件全路径
+        file_path = record["file"].path
+        # 将绝对路径转换为相对于项目根目录的路径
+        relative_path = os.path.relpath(file_path, root_dir)
+        # 更新记录中的文件路径
+        record["file"].path = f"./{relative_path}"
+        # 返回修改后的格式字符串
+        # 您可以根据需要调整这里的格式
+        record["message"] = record["message"].replace(root_dir, ".")
+
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
+        return _format
+
+    logger.add(
+        sys.stdout,
+        level=_lvl,
+        format=format_record,
+        colorize=True,
+    )
+
+
+init_log()
+
+locales = utils.load_locales(i18n_dir)
+
+
+def tr(key):
+    loc = locales.get(st.session_state["ui_language"], {})
+    return loc.get("Translation", {}).get(key, key)
+
+
+# 创建基础设置折叠框
+if not config.app.get("hide_config", False):
+    with st.expander(tr("Basic Settings"), expanded=False):
+        config_panels = st.columns(3)
+        left_config_panel = config_panels[0]
+        middle_config_panel = config_panels[1]
+        right_config_panel = config_panels[2]
+
+        # 左侧面板 - 日志设置
+        with left_config_panel:
+            # 是否隐藏配置面板
+            hide_config = st.checkbox(
+                tr("Hide Basic Settings"), value=config.app.get("hide_config", False)
+            )
+            config.app["hide_config"] = hide_config
+
+            # 是否禁用日志显示
+            hide_log = st.checkbox(
+                tr("Hide Log"), value=config.ui.get("hide_log", False)
+            )
+            config.ui["hide_log"] = hide_log
+
+        # 中间面板 - LLM 设置
+
+        with middle_config_panel:
+            st.write(tr("LLM Settings"))
+            llm_providers = [
+                "OpenAI",
+                "Moonshot",
+                "Azure",
+                "Qwen",
+                "DeepSeek",
+                "Gemini",
+                "Ollama",
+                "G4f",
+                "OneAPI",
+                "Cloudflare",
+                "ERNIE",
+                "Pollinations",
+            ]
+            saved_llm_provider = config.app.get("llm_provider", "OpenAI").lower()
+            saved_llm_provider_index = 0
+            for i, provider in enumerate(llm_providers):
+                if provider.lower() == saved_llm_provider:
+                    saved_llm_provider_index = i
+                    break
+
+            llm_provider = st.selectbox(
+                tr("LLM Provider"),
+                options=llm_providers,
+                index=saved_llm_provider_index,
+            )
+            llm_helper = st.container()
+            llm_provider = llm_provider.lower()
+            config.app["llm_provider"] = llm_provider
+
+            llm_api_key = config.app.get(f"{llm_provider}_api_key", "")
+            llm_secret_key = config.app.get(
+                f"{llm_provider}_secret_key", ""
+            )  # only for baidu ernie
+            llm_base_url = config.app.get(f"{llm_provider}_base_url", "")
+            llm_model_name = config.app.get(f"{llm_provider}_model_name", "")
+            llm_account_id = config.app.get(f"{llm_provider}_account_id", "")
+
+            tips = ""
+            if llm_provider == "ollama":
+                if not llm_model_name:
+                    llm_model_name = "qwen:7b"
+                if not llm_base_url:
+                    llm_base_url = "http://localhost:11434/v1"
+
+                with llm_helper:
+                    tips = """
+                            ##### Ollama配置说明
+                            - **API Key**: 随便填写，比如 123
+                            - **Base Url**: 一般为 http://localhost:11434/v1
+                                - 如果 `MoneyPrinterTurbo` 和 `Ollama` **不在同一台机器上**，需要填写 `Ollama` 机器的IP地址
+                                - 如果 `MoneyPrinterTurbo` 是 `Docker` 部署，建议填写 `http://host.docker.internal:11434/v1`
+                            - **Model Name**: 使用 `ollama list` 查看，比如 `qwen:7b`
+                            """
+
+            if llm_provider == "openai":
+                if not llm_model_name:
+                    llm_model_name = "gpt-3.5-turbo"
+                with llm_helper:
+                    tips = """
+                            ##### OpenAI 配置说明
+                            > 需要VPN开启全局流量模式
+                            - **API Key**: [点击到官网申请](https://platform.openai.com/api-keys)
+                            - **Base Url**: 可以留空
+                            - **Model Name**: 填写**有权限**的模型，[点击查看模型列表](https://platform.openai.com/settings/organization/limits)
+                            """
+
+            if llm_provider == "moonshot":
+                if not llm_model_name:
+                    llm_model_name = "moonshot-v1-8k"
+                with llm_helper:
+                    tips = """
+                            ##### Moonshot 配置说明
+                            - **API Key**: [点击到官网申请](https://platform.moonshot.cn/console/api-keys)
+                            - **Base Url**: 固定为 https://api.moonshot.cn/v1
+                            - **Model Name**: 比如 moonshot-v1-8k，[点击查看模型列表](https://platform.moonshot.cn/docs/intro#%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8)
+                            """
+            if llm_provider == "oneapi":
+                if not llm_model_name:
+                    llm_model_name = (
+                        "claude-3-5-sonnet-20240620"  # 默认模型，可以根据需要调整
+                    )
+                with llm_helper:
+                    tips = """
+                        ##### OneAPI 配置说明
+                        - **API Key**: 填写您的 OneAPI 密钥
+                        - **Base Url**: 填写 OneAPI 的基础 URL
+                        - **Model Name**: 填写您要使用的模型名称，例如 claude-3-5-sonnet-20240620
+                        """
+
+            if llm_provider == "qwen":
+                if not llm_model_name:
+                    llm_model_name = "qwen-max"
+                with llm_helper:
+                    tips = """
+                            ##### 通义千问Qwen 配置说明
+                            - **API Key**: [点击到官网申请](https://dashscope.console.aliyun.com/apiKey)
+                            - **Base Url**: 留空
+                            - **Model Name**: 比如 qwen-max，[点击查看模型列表](https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction#3ef6d0bcf91wy)
+                            """
+
+            if llm_provider == "g4f":
+                if not llm_model_name:
+                    llm_model_name = "gpt-3.5-turbo"
+                with llm_helper:
+                    tips = """
+                            ##### gpt4free 配置说明
+                            > [GitHub开源项目](https://github.com/xtekky/gpt4free)，可以免费使用GPT模型，但是**稳定性较差**
+                            - **API Key**: 随便填写，比如 123
+                            - **Base Url**: 留空
+                            - **Model Name**: 比如 gpt-3.5-turbo，[点击查看模型列表](https://github.com/xtekky/gpt4free/blob/main/g4f/models.py#L308)
+                            """
+            if llm_provider == "azure":
+                with llm_helper:
+                    tips = """
+                            ##### Azure 配置说明
+                            > [点击查看如何部署模型](https://learn.microsoft.com/zh-cn/azure/ai-services/openai/how-to/create-resource)
+                            - **API Key**: [点击到Azure后台创建](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/OpenAI)
+                            - **Base Url**: 留空
+                            - **Model Name**: 填写你实际的部署名
+                            """
+
+            if llm_provider == "gemini":
+                if not llm_model_name:
+                    llm_model_name = "gemini-1.0-pro"
+
+                with llm_helper:
+                    tips = """
+                            ##### Gemini 配置说明
+                            > 需要VPN开启全局流量模式
+                            - **API Key**: [点击到官网申请](https://ai.google.dev/)
+                            - **Base Url**: 留空
+                            - **Model Name**: 比如 gemini-1.0-pro
+                            """
+
+            if llm_provider == "deepseek":
+                if not llm_model_name:
+                    llm_model_name = "deepseek-chat"
+                if not llm_base_url:
+                    llm_base_url = "https://api.deepseek.com"
+                with llm_helper:
+                    tips = """
+                            ##### DeepSeek 配置说明
+                            - **API Key**: [点击到官网申请](https://platform.deepseek.com/api_keys)
+                            - **Base Url**: 固定为 https://api.deepseek.com
+                            - **Model Name**: 固定为 deepseek-chat
+                            """
+
+            if llm_provider == "ernie":
+                with llm_helper:
+                    tips = """
+                            ##### 百度文心一言 配置说明
+                            - **API Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
+                            - **Secret Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
+                            - **Base Url**: 填写 **请求地址** [点击查看文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11#%E8%AF%B7%E6%B1%82%E8%AF%B4%E6%98%8E)
+                            """
+
+            if llm_provider == "pollinations":
+                if not llm_model_name:
+                    llm_model_name = "default"
+                with llm_helper:
+                    tips = """
+                            ##### Pollinations AI Configuration
+                            - **API Key**: Optional - Leave empty for public access
+                            - **Base Url**: Default is https://text.pollinations.ai/openai
+                            - **Model Name**: Use 'openai-fast' or specify a model name
+                            """
+
+            if tips and config.ui["language"] == "zh":
+                st.warning(
+                    "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问，不需要VPN \n- 注册就送额度，基本够用"
+                )
+                st.info(tips)
+
+            st_llm_api_key = st.text_input(
+                tr("API Key"), value=llm_api_key, type="password"
+            )
+            st_llm_base_url = st.text_input(tr("Base Url"), value=llm_base_url)
+            st_llm_model_name = ""
+            if llm_provider != "ernie":
+                st_llm_model_name = st.text_input(
+                    tr("Model Name"),
+                    value=llm_model_name,
+                    key=f"{llm_provider}_model_name_input",
+                )
+                if st_llm_model_name:
+                    config.app[f"{llm_provider}_model_name"] = st_llm_model_name
+            else:
+                st_llm_model_name = None
+
+            if st_llm_api_key:
+                config.app[f"{llm_provider}_api_key"] = st_llm_api_key
+            if st_llm_base_url:
+                config.app[f"{llm_provider}_base_url"] = st_llm_base_url
+            if st_llm_model_name:
+                config.app[f"{llm_provider}_model_name"] = st_llm_model_name
+            if llm_provider == "ernie":
+                st_llm_secret_key = st.text_input(
+                    tr("Secret Key"), value=llm_secret_key, type="password"
+                )
+                config.app[f"{llm_provider}_secret_key"] = st_llm_secret_key
+
+            if llm_provider == "cloudflare":
+                st_llm_account_id = st.text_input(
+                    tr("Account ID"), value=llm_account_id
+                )
+                if st_llm_account_id:
+                    config.app[f"{llm_provider}_account_id"] = st_llm_account_id
+
+        # 右侧面板 - API 密钥设置
+        with right_config_panel:
+
+            def get_keys_from_config(cfg_key):
+                api_keys = config.app.get(cfg_key, [])
+                if isinstance(api_keys, str):
+                    api_keys = [api_keys]
+                api_key = ", ".join(api_keys)
+                return api_key
+
+            def save_keys_to_config(cfg_key, value):
+                value = value.replace(" ", "")
+                if value:
+                    config.app[cfg_key] = value.split(",")
+
+            st.write(tr("Video Source Settings"))
+
+            pexels_api_key = get_keys_from_config("pexels_api_keys")
+            pexels_api_key = st.text_input(
+                tr("Pexels API Key"), value=pexels_api_key, type="password"
+            )
+            save_keys_to_config("pexels_api_keys", pexels_api_key)
+
+            pixabay_api_key = get_keys_from_config("pixabay_api_keys")
+            pixabay_api_key = st.text_input(
+                tr("Pixabay API Key"), value=pixabay_api_key, type="password"
+            )
+            save_keys_to_config("pixabay_api_keys", pixabay_api_key)
+
+llm_provider = config.app.get("llm_provider", "").lower()
+panel = st.columns(3)
+left_panel = panel[0]
+middle_panel = panel[1]
+right_panel = panel[2]
+
+params = VideoParams(video_subject="")
+uploaded_files = []
+
+with left_panel:
+    with st.container(border=True):
+        st.write(tr("Video Script Settings"))
+        params.video_subject = st.text_input(
+            tr("Video Subject"),
+            value=st.session_state["video_subject"],
+            key="video_subject_input",
+        ).strip()
+
+        video_languages = [
+            (tr("Auto Detect"), ""),
+        ]
+        for code in support_locales:
+            video_languages.append((code, code))
+
+        selected_index = st.selectbox(
+            tr("Script Language"),
+            index=0,
+            options=range(
+                len(video_languages)
+            ),  # Use the index as the internal option value
+            format_func=lambda x: video_languages[x][
+                0
+            ],  # The label is displayed to the user
+        )
+        params.video_language = video_languages[selected_index][1]
+
+        if st.button(
+            tr("Generate Video Script and Keywords"), key="auto_generate_script"
+        ):
+            with st.spinner(tr("Generating Video Script and Keywords")):
+                script = llm.generate_script(
+                    video_subject=params.video_subject, language=params.video_language
+                )
+                terms = llm.generate_terms(params.video_subject, script)
+                if "Error: " in script:
+                    st.error(tr(script))
+                elif "Error: " in terms:
+                    st.error(tr(terms))
+                else:
+                    st.session_state["video_script"] = script
+                    st.session_state["video_terms"] = ", ".join(terms)
+        params.video_script = st.text_area(
+            tr("Video Script"), value=st.session_state["video_script"], height=280
+        )
+        if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
+            if not params.video_script:
+                st.error(tr("Please Enter the Video Subject"))
+                st.stop()
+
+            with st.spinner(tr("Generating Video Keywords")):
+                terms = llm.generate_terms(params.video_subject, params.video_script)
+                if "Error: " in terms:
+                    st.error(tr(terms))
+                else:
+                    st.session_state["video_terms"] = ", ".join(terms)
+
+        params.video_terms = st.text_area(
+            tr("Video Keywords"), value=st.session_state["video_terms"]
+        )
+
+with middle_panel:
+    with st.container(border=True):
+        st.write(tr("Video Settings"))
+        video_concat_modes = [
+            (tr("Sequential"), "sequential"),
+            (tr("Random"), "random"),
+        ]
+        video_sources = [
+            (tr("Pexels"), "pexels"),
+            (tr("Pixabay"), "pixabay"),
+            (tr("Local file"), "local"),
+            (tr("TikTok"), "douyin"),
+            (tr("Bilibili"), "bilibili"),
+            (tr("Xiaohongshu"), "xiaohongshu"),
+        ]
+
+        saved_video_source_name = config.app.get("video_source", "pexels")
+        saved_video_source_index = [v[1] for v in video_sources].index(
+            saved_video_source_name
+        )
+
+        selected_index = st.selectbox(
+            tr("Video Source"),
+            options=range(len(video_sources)),
+            format_func=lambda x: video_sources[x][0],
+            index=saved_video_source_index,
+        )
+        params.video_source = video_sources[selected_index][1]
+        config.app["video_source"] = params.video_source
+
+        if params.video_source == "local":
+            uploaded_files = st.file_uploader(
+                "Upload Local Files",
+                type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
+                accept_multiple_files=True,
+            )
+
+        selected_index = st.selectbox(
+            tr("Video Concat Mode"),
+            index=1,
+            options=range(
+                len(video_concat_modes)
+            ),  # Use the index as the internal option value
+            format_func=lambda x: video_concat_modes[x][
+                0
+            ],  # The label is displayed to the user
+        )
+        params.video_concat_mode = VideoConcatMode(
+            video_concat_modes[selected_index][1]
+        )
+
+        # 视频转场模式
+        video_transition_modes = [
+            (tr("None"), VideoTransitionMode.none.value),
+            (tr("Shuffle"), VideoTransitionMode.shuffle.value),
+            (tr("FadeIn"), VideoTransitionMode.fade_in.value),
+            (tr("FadeOut"), VideoTransitionMode.fade_out.value),
+            (tr("SlideIn"), VideoTransitionMode.slide_in.value),
+            (tr("SlideOut"), VideoTransitionMode.slide_out.value),
+        ]
+        selected_index = st.selectbox(
+            tr("Video Transition Mode"),
+            options=range(len(video_transition_modes)),
+            format_func=lambda x: video_transition_modes[x][0],
+            index=0,
+        )
+        params.video_transition_mode = VideoTransitionMode(
+            video_transition_modes[selected_index][1]
+        )
+
+        video_aspect_ratios = [
+            (tr("Portrait"), VideoAspect.portrait.value),
+            (tr("Landscape"), VideoAspect.landscape.value),
+        ]
+        selected_index = st.selectbox(
+            tr("Video Ratio"),
+            options=range(
+                len(video_aspect_ratios)
+            ),  # Use the index as the internal option value
+            format_func=lambda x: video_aspect_ratios[x][
+                0
+            ],  # The label is displayed to the user
+        )
+        params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
+
+        params.video_clip_duration = st.selectbox(
+            tr("Clip Duration"), options=[2, 3, 4, 5, 6, 7, 8, 9, 10], index=1
+        )
+        params.video_count = st.selectbox(
+            tr("Number of Videos Generated Simultaneously"),
+            options=[1, 2, 3, 4, 5],
+            index=0,
+        )
+    with st.container(border=True):
+        st.write(tr("Audio Settings"))
+
+        # 添加TTS服务器选择下拉框
+        tts_servers = [
+            ("azure-tts-v1", "Azure TTS V1"),
+            ("azure-tts-v2", "Azure TTS V2"),
+            ("siliconflow", "SiliconFlow TTS"),
+        ]
+
+        # 获取保存的TTS服务器，默认为v1
+        saved_tts_server = config.ui.get("tts_server", "azure-tts-v1")
+        saved_tts_server_index = 0
+        for i, (server_value, _) in enumerate(tts_servers):
+            if server_value == saved_tts_server:
+                saved_tts_server_index = i
+                break
+
+        selected_tts_server_index = st.selectbox(
+            tr("TTS Servers"),
+            options=range(len(tts_servers)),
+            format_func=lambda x: tts_servers[x][1],
+            index=saved_tts_server_index,
+        )
+
+        selected_tts_server = tts_servers[selected_tts_server_index][0]
+        config.ui["tts_server"] = selected_tts_server
+
+        # 根据选择的TTS服务器获取声音列表
+        filtered_voices = []
+
+        if selected_tts_server == "siliconflow":
+            # 获取硅基流动的声音列表
+            filtered_voices = voice.get_siliconflow_voices()
+        else:
+            # 获取Azure的声音列表
+            all_voices = voice.get_all_azure_voices(filter_locals=None)
+
+            # 根据选择的TTS服务器筛选声音
+            for v in all_voices:
+                if selected_tts_server == "azure-tts-v2":
+                    # V2版本的声音名称中包含"v2"
+                    if "V2" in v:
+                        filtered_voices.append(v)
+                else:
+                    # V1版本的声音名称中不包含"v2"
+                    if "V2" not in v:
+                        filtered_voices.append(v)
+
+        friendly_names = {
+            v: v.replace("Female", tr("Female"))
+            .replace("Male", tr("Male"))
+            .replace("Neural", "")
+            for v in filtered_voices
+        }
+
+        saved_voice_name = config.ui.get("voice_name", "")
+        saved_voice_name_index = 0
+
+        # 检查保存的声音是否在当前筛选的声音列表中
+        if saved_voice_name in friendly_names:
+            saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
+        else:
+            # 如果不在，则根据当前UI语言选择一个默认声音
+            for i, v in enumerate(filtered_voices):
+                if v.lower().startswith(st.session_state["ui_language"].lower()):
+                    saved_voice_name_index = i
+                    break
+
+        # 如果没有找到匹配的声音，使用第一个声音
+        if saved_voice_name_index >= len(friendly_names) and friendly_names:
+            saved_voice_name_index = 0
+
+        # 确保有声音可选
+        if friendly_names:
+            selected_friendly_name = st.selectbox(
+                tr("Speech Synthesis"),
+                options=list(friendly_names.values()),
+                index=min(saved_voice_name_index, len(friendly_names) - 1)
+                if friendly_names
+                else 0,
+            )
+
+            voice_name = list(friendly_names.keys())[
+                list(friendly_names.values()).index(selected_friendly_name)
+            ]
+            params.voice_name = voice_name
+            config.ui["voice_name"] = voice_name
+        else:
+            # 如果没有声音可选，显示提示信息
+            st.warning(
+                tr(
+                    "No voices available for the selected TTS server. Please select another server."
+                )
+            )
+            params.voice_name = ""
+            config.ui["voice_name"] = ""
+
+        # 只有在有声音可选时才显示试听按钮
+        if friendly_names and st.button(tr("Play Voice")):
+            play_content = params.video_subject
+            if not play_content:
+                play_content = params.video_script
+            if not play_content:
+                play_content = tr("Voice Example")
+            with st.spinner(tr("Synthesizing Voice")):
+                temp_dir = utils.storage_dir("temp", create=True)
+                audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
+                sub_maker = voice.tts(
+                    text=play_content,
+                    voice_name=voice_name,
+                    voice_rate=params.voice_rate,
+                    voice_file=audio_file,
+                    voice_volume=params.voice_volume,
+                )
+                # if the voice file generation failed, try again with a default content.
+                if not sub_maker:
+                    play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
+                    sub_maker = voice.tts(
+                        text=play_content,
+                        voice_name=voice_name,
+                        voice_rate=params.voice_rate,
+                        voice_file=audio_file,
+                        voice_volume=params.voice_volume,
+                    )
+
+                if sub_maker and os.path.exists(audio_file):
+                    st.audio(audio_file, format="audio/mp3")
+                    if os.path.exists(audio_file):
+                        os.remove(audio_file)
+
+        # 当选择V2版本或者声音是V2声音时，显示服务区域和API key输入框
+        if selected_tts_server == "azure-tts-v2" or (
+            voice_name and voice.is_azure_v2_voice(voice_name)
+        ):
+            saved_azure_speech_region = config.azure.get("speech_region", "")
+            saved_azure_speech_key = config.azure.get("speech_key", "")
+            azure_speech_region = st.text_input(
+                tr("Speech Region"),
+                value=saved_azure_speech_region,
+                key="azure_speech_region_input",
+            )
+            azure_speech_key = st.text_input(
+                tr("Speech Key"),
+                value=saved_azure_speech_key,
+                type="password",
+                key="azure_speech_key_input",
+            )
+            config.azure["speech_region"] = azure_speech_region
+            config.azure["speech_key"] = azure_speech_key
+
+        # 当选择硅基流动时，显示API key输入框和说明信息
+        if selected_tts_server == "siliconflow" or (
+            voice_name and voice.is_siliconflow_voice(voice_name)
+        ):
+            saved_siliconflow_api_key = config.siliconflow.get("api_key", "")
+
+            siliconflow_api_key = st.text_input(
+                tr("SiliconFlow API Key"),
+                value=saved_siliconflow_api_key,
+                type="password",
+                key="siliconflow_api_key_input",
+            )
+
+            # 显示硅基流动的说明信息
+            st.info(
+                tr("SiliconFlow TTS Settings")
+                + ":\n"
+                + "- "
+                + tr("Speed: Range [0.25, 4.0], default is 1.0")
+                + "\n"
+                + "- "
+                + tr("Volume: Uses Speech Volume setting, default 1.0 maps to gain 0")
+            )
+
+            config.siliconflow["api_key"] = siliconflow_api_key
+
+        params.voice_volume = st.selectbox(
+            tr("Speech Volume"),
+            options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
+            index=2,
+        )
+
+        params.voice_rate = st.selectbox(
+            tr("Speech Rate"),
+            options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
+            index=2,
+        )
+
+        bgm_options = [
+            (tr("No Background Music"), ""),
+            (tr("Random Background Music"), "random"),
+            (tr("Custom Background Music"), "custom"),
+        ]
+        selected_index = st.selectbox(
+            tr("Background Music"),
+            index=1,
+            options=range(
+                len(bgm_options)
+            ),  # Use the index as the internal option value
+            format_func=lambda x: bgm_options[x][
+                0
+            ],  # The label is displayed to the user
+        )
+        # Get the selected background music type
+        params.bgm_type = bgm_options[selected_index][1]
+
+        # Show or hide components based on the selection
+        if params.bgm_type == "custom":
+            custom_bgm_file = st.text_input(
+                tr("Custom Background Music File"), key="custom_bgm_file_input"
+            )
+            if custom_bgm_file and os.path.exists(custom_bgm_file):
+                params.bgm_file = custom_bgm_file
+                # st.write(f":red[已选择自定义背景音乐]：**{custom_bgm_file}**")
+        params.bgm_volume = st.selectbox(
+            tr("Background Music Volume"),
+            options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+            index=2,
+        )
+
+with right_panel:
+    with st.container(border=True):
+        st.write(tr("Subtitle Settings"))
+        params.subtitle_enabled = st.checkbox(tr("Enable Subtitles"), value=True)
+        font_names = get_all_fonts()
+        saved_font_name = config.ui.get("font_name", "MicrosoftYaHeiBold.ttc")
+        saved_font_name_index = 0
+        if saved_font_name in font_names:
+            saved_font_name_index = font_names.index(saved_font_name)
+        params.font_name = st.selectbox(
+            tr("Font"), font_names, index=saved_font_name_index
+        )
+        config.ui["font_name"] = params.font_name
+
+        subtitle_positions = [
+            (tr("Top"), "top"),
+            (tr("Center"), "center"),
+            (tr("Bottom"), "bottom"),
+            (tr("Custom"), "custom"),
+        ]
+        selected_index = st.selectbox(
+            tr("Position"),
+            index=2,
+            options=range(len(subtitle_positions)),
+            format_func=lambda x: subtitle_positions[x][0],
+        )
+        params.subtitle_position = subtitle_positions[selected_index][1]
+
+        if params.subtitle_position == "custom":
+            custom_position = st.text_input(
+                tr("Custom Position (% from top)"),
+                value="70.0",
+                key="custom_position_input",
+            )
+            try:
+                params.custom_position = float(custom_position)
+                if params.custom_position < 0 or params.custom_position > 100:
+                    st.error(tr("Please enter a value between 0 and 100"))
+            except ValueError:
+                st.error(tr("Please enter a valid number"))
+
+        font_cols = st.columns([0.3, 0.7])
+        with font_cols[0]:
+            saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
+            params.text_fore_color = st.color_picker(
+                tr("Font Color"), saved_text_fore_color
+            )
+            config.ui["text_fore_color"] = params.text_fore_color
+
+        with font_cols[1]:
+            saved_font_size = config.ui.get("font_size", 60)
+            params.font_size = st.slider(tr("Font Size"), 30, 100, saved_font_size)
+            config.ui["font_size"] = params.font_size
+
+        stroke_cols = st.columns([0.3, 0.7])
+        with stroke_cols[0]:
+            params.stroke_color = st.color_picker(tr("Stroke Color"), "#000000")
+        with stroke_cols[1]:
+            params.stroke_width = st.slider(tr("Stroke Width"), 0.0, 10.0, 1.5)
+
+start_button = st.button(tr("Generate Video"), use_container_width=True, type="primary")
+if start_button:
+    config.save_config()
+    task_id = str(uuid4())
+    if not params.video_subject and not params.video_script:
+        st.error(tr("Video Script and Subject Cannot Both Be Empty"))
+        scroll_to_bottom()
+        st.stop()
+
+    if params.video_source not in ["pexels", "pixabay", "local"]:
+        st.error(tr("Please Select a Valid Video Source"))
+        scroll_to_bottom()
+        st.stop()
+
+    if params.video_source == "pexels" and not config.app.get("pexels_api_keys", ""):
+        st.error(tr("Please Enter the Pexels API Key"))
+        scroll_to_bottom()
+        st.stop()
+
+    if params.video_source == "pixabay" and not config.app.get("pixabay_api_keys", ""):
+        st.error(tr("Please Enter the Pixabay API Key"))
+        scroll_to_bottom()
+        st.stop()
+
+    if uploaded_files:
+        local_videos_dir = utils.storage_dir("local_videos", create=True)
+        for file in uploaded_files:
+            file_path = os.path.join(local_videos_dir, f"{file.file_id}_{file.name}")
+            with open(file_path, "wb") as f:
+                f.write(file.getbuffer())
+                m = MaterialInfo()
+                m.provider = "local"
+                m.url = file_path
+                if not params.video_materials:
+                    params.video_materials = []
+                params.video_materials.append(m)
+
+    log_container = st.empty()
+    log_records = []
+
+    def log_received(msg):
+        if config.ui["hide_log"]:
+            return
+        with log_container:
+            log_records.append(msg)
+            st.code("\n".join(log_records))
+
+    logger.add(log_received)
+
+    st.toast(tr("Generating Video"))
+    logger.info(tr("Start Generating Video"))
+    logger.info(utils.to_json(params))
+    scroll_to_bottom()
+
+    result = tm.start(task_id=task_id, params=params)
+    if not result or "videos" not in result:
+        st.error(tr("Video Generation Failed"))
+        logger.error(tr("Video Generation Failed"))
+        scroll_to_bottom()
+        st.stop()
+
+    video_files = result.get("videos", [])
+    st.success(tr("Video Generation Completed"))
+    try:
+        if video_files:
+            player_cols = st.columns(len(video_files) * 2 + 1)
+            for i, url in enumerate(video_files):
+                player_cols[i * 2 + 1].video(url)
+    except Exception:
+        pass
+
+    open_task_folder(task_id)
+    logger.info(tr("Video Generation Completed"))
+    scroll_to_bottom()
+
+config.save_config()
diff --git a/webui/i18n/de.json b/webui/i18n/de.json
new file mode 100644
index 0000000000000000000000000000000000000000..cedc3b74c2f360464c1c741d18f4185512ad1305
--- /dev/null
+++ b/webui/i18n/de.json
@@ -0,0 +1,105 @@
+{
+  "Language": "Deutsch",
+  "Translation": {
+    "Login Required": "Anmeldung erforderlich",
+    "Please login to access settings": "Bitte melden Sie sich an, um auf die Einstellungen zuzugreifen",
+    "Username": "Benutzername",
+    "Password": "Passwort",
+    "Login": "Anmelden",
+    "Login Error": "Anmeldefehler",
+    "Incorrect username or password": "Falscher Benutzername oder Passwort",
+    "Please enter your username and password": "Bitte geben Sie Ihren Benutzernamen und Ihr Passwort ein",
+    "Video Script Settings": "**Drehbuch / Topic des Videos**",
+    "Video Subject": "Worum soll es in dem Video gehen? (Geben Sie ein Keyword an, :red[Dank KI wird automatisch ein Drehbuch generieren])",
+    "Script Language": "Welche Sprache soll zum Generieren von Drehbüchern  verwendet werden? :red[KI generiert anhand dieses Begriffs das Drehbuch]",
+    "Generate Video Script and Keywords": "Klicken Sie hier, um mithilfe von KI ein [Video Drehbuch] und [Video Keywords] basierend auf dem **Keyword** zu generieren.",
+    "Auto Detect": "Automatisch erkennen",
+    "Video Script": "Drehbuch (Storybook) (:blue[① Optional, KI generiert  ② Die richtige Zeichensetzung hilft bei der Erstellung von Untertiteln])",
+    "Generate Video Keywords": "Klicken Sie, um KI zum Generieren zu verwenden [Video Keywords] basierend auf dem **Drehbuch**",
+    "Please Enter the Video Subject": "Bitte geben Sie zuerst das Drehbuch an",
+    "Generating Video Script and Keywords": "KI generiert ein Drehbuch und Schlüsselwörter...",
+    "Generating Video Keywords": "KI generiert Video-Schlüsselwörter...",
+    "Video Keywords": "Video Schlüsselwörter (:blue[① Optional, KI generiert ② Verwende **, (Kommas)** zur Trennung der Wörter, in englischer Sprache])",
+    "Video Settings": "**Video Einstellungen**",
+    "Video Concat Mode": "Videoverkettungsmodus",
+    "Random": "Zufällige Verkettung (empfohlen)",
+    "Sequential": "Sequentielle Verkettung",
+    "Video Transition Mode": "Video Übergangsmodus",
+    "None": "Kein Übergang",
+    "Shuffle": "Zufällige Übergänge",
+    "FadeIn": "FadeIn",
+    "FadeOut": "FadeOut",
+    "SlideIn": "SlideIn",
+    "SlideOut": "SlideOut",
+    "Video Ratio": "Video-Seitenverhältnis",
+    "Portrait": "Portrait 9:16",
+    "Landscape": "Landschaft 16:9",
+    "Clip Duration": "Maximale Dauer einzelner Videoclips in sekunden",
+    "Number of Videos Generated Simultaneously": "Anzahl der parallel generierten Videos",
+    "Audio Settings": "**Audio Einstellungen**",
+    "Speech Synthesis": "Sprachausgabe",
+    "Speech Region": "Region(:red[Erforderlich，[Region abrufen](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Key": "API-Schlüssel(:red[Erforderlich，[API-Schlüssel abrufen](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Volume": "Lautstärke der Sprachausgabe",
+    "Speech Rate": "Lesegeschwindigkeit (1,0 bedeutet 1x)",
+    "Male": "Männlich",
+    "Female": "Weiblich",
+    "Background Music": "Hintergrundmusik",
+    "No Background Music": "Ohne Hintergrundmusik",
+    "Random Background Music": "Zufällig erzeugte Hintergrundmusik",
+    "Custom Background Music": "Benutzerdefinierte Hintergrundmusik",
+    "Custom Background Music File": "Bitte gib den Pfad zur Musikdatei an:",
+    "Background Music Volume": "Lautstärke: (0.2 entspricht 20%, sollte nicht zu laut sein)",
+    "Subtitle Settings": "**Untertitel-Einstellungen**",
+    "Enable Subtitles": "Untertitel aktivieren (Wenn diese Option deaktiviert ist, werden die Einstellungen nicht genutzt)",
+    "Font": "Schriftart des Untertitels",
+    "Position": "Ausrichtung des Untertitels",
+    "Top": "Oben",
+    "Center": "Mittig",
+    "Bottom": "Unten (empfohlen)",
+    "Custom": "Benutzerdefinierte Position (70, was 70% von oben bedeutet)",
+    "Font Size": "Schriftgröße für Untertitel",
+    "Font Color": "Schriftfarbe",
+    "Stroke Color": "Kontur",
+    "Stroke Width": "Breite der Untertitelkontur",
+    "Generate Video": "Generiere Videos durch KI",
+    "Video Script and Subject Cannot Both Be Empty": "Das Video-Thema und Drehbuch dürfen nicht beide leer sein",
+    "Generating Video": "Video wird erstellt, bitte warten...",
+    "Start Generating Video": "Beginne mit der Generierung",
+    "Video Generation Completed": "Video erfolgreich generiert",
+    "Video Generation Failed": "Video Generierung fehlgeschlagen",
+    "You can download the generated video from the following links": "Sie können das generierte Video über die folgenden Links herunterladen",
+    "Basic Settings": "**Grundeinstellungen** (:blue[Klicken zum Erweitern])",
+    "Language": "Sprache",
+    "Pexels API Key": "Pexels API-Schlüssel ([API-Schlüssel abrufen](https://www.pexels.com/api/))",
+    "Pixabay API Key": "Pixabay API-Schlüssel ([API-Schlüssel abrufen](https://pixabay.com/api/docs/#api_search_videos))",
+    "LLM Provider": "KI-Modellanbieter",
+    "API Key": "API-Schlüssel (:red[Erforderlich])",
+    "Base Url": "Basis-URL",
+    "Account ID": "Konto-ID (Aus dem Cloudflare-Dashboard)",
+    "Model Name": "Modellname",
+    "Please Enter the LLM API Key": "Bitte geben Sie den **KI-Modell API-Schlüssel** ein",
+    "Please Enter the Pexels API Key": "Bitte geben Sie den **Pexels API-Schlüssel** ein",
+    "Please Enter the Pixabay API Key": "Bitte geben Sie den **Pixabay API-Schlüssel** ein",
+    "Get Help": "Wenn Sie Hilfe benötigen oder Fragen haben, können Sie dem Discord beitreten: https://harryai.cc",
+    "Video Source": "Videoquelle",
+    "TikTok": "TikTok (TikTok-Unterstützung kommt bald)",
+    "Bilibili": "Bilibili (Bilibili-Unterstützung kommt bald)",
+    "Xiaohongshu": "Xiaohongshu (Xiaohongshu-Unterstützung kommt bald)",
+    "Local file": "Lokale Datei",
+    "Play Voice": "Sprachausgabe abspielen",
+    "Voice Example": "Dies ist ein Beispieltext zum Testen der Sprachsynthese",
+    "Synthesizing Voice": "Sprachsynthese läuft, bitte warten...",
+    "TTS Provider": "Sprachsynthese-Anbieter auswählen",
+    "TTS Servers": "TTS-Server",
+    "No voices available for the selected TTS server. Please select another server.": "Keine Stimmen für den ausgewählten TTS-Server verfügbar. Bitte wählen Sie einen anderen Server.",
+    "SiliconFlow API Key": "SiliconFlow API-Schlüssel",
+    "SiliconFlow TTS Settings": "SiliconFlow TTS-Einstellungen",
+    "Speed: Range [0.25, 4.0], default is 1.0": "Geschwindigkeit: Bereich [0.25, 4.0], Standardwert ist 1.0",
+    "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Lautstärke: Verwendet die Sprachlautstärke-Einstellung, Standardwert 1.0 entspricht Verstärkung 0",
+    "Hide Log": "Protokoll ausblenden",
+    "Hide Basic Settings": "Basis-Einstellungen ausblenden\n\nWenn diese Option deaktiviert ist, wird die Basis-Einstellungen-Leiste nicht auf der Seite angezeigt.\n\nWenn Sie sie erneut anzeigen möchten, setzen Sie `hide_config = false` in `config.toml`",
+    "LLM Settings": "**LLM-Einstellungen**",
+    "Video Source Settings": "**Videoquellen-Einstellungen**"
+  }
+}
\ No newline at end of file
diff --git a/webui/i18n/en.json b/webui/i18n/en.json
new file mode 100644
index 0000000000000000000000000000000000000000..c3c9ac21cf02b648c77bbd77ae9dc50fa42b497a
--- /dev/null
+++ b/webui/i18n/en.json
@@ -0,0 +1,105 @@
+{
+  "Language": "English",
+  "Translation": {
+    "Login Required": "Login Required",
+    "Please login to access settings": "Please login to access settings",
+    "Username": "Username",
+    "Password": "Password",
+    "Login": "Login",
+    "Login Error": "Login Error",
+    "Incorrect username or password": "Incorrect username or password",
+    "Please enter your username and password": "Please enter your username and password",
+    "Video Script Settings": "**Video Script Settings**",
+    "Video Subject": "Video Subject (Provide a keyword, :red[AI will automatically generate] video script)",
+    "Script Language": "Language for Generating Video Script (AI will automatically output based on the language of your subject)",
+    "Generate Video Script and Keywords": "Click to use AI to generate [Video Script] and [Video Keywords] based on **subject**",
+    "Auto Detect": "Auto Detect",
+    "Video Script": "Video Script (:blue[① Optional, AI generated  ② Proper punctuation helps with subtitle generation])",
+    "Generate Video Keywords": "Click to use AI to generate [Video Keywords] based on **script**",
+    "Please Enter the Video Subject": "Please Enter the Video Script First",
+    "Generating Video Script and Keywords": "AI is generating video script and keywords...",
+    "Generating Video Keywords": "AI is generating video keywords...",
+    "Video Keywords": "Video Keywords (:blue[① Optional, AI generated ② Use **English commas** for separation, English only])",
+    "Video Settings": "**Video Settings**",
+    "Video Concat Mode": "Video Concatenation Mode",
+    "Random": "Random Concatenation (Recommended)",
+    "Sequential": "Sequential Concatenation",
+    "Video Transition Mode": "Video Transition Mode",
+    "None": "None",
+    "Shuffle": "Shuffle",
+    "FadeIn": "FadeIn",
+    "FadeOut": "FadeOut",
+    "SlideIn": "SlideIn",
+    "SlideOut": "SlideOut",
+    "Video Ratio": "Video Aspect Ratio",
+    "Portrait": "Portrait 9:16",
+    "Landscape": "Landscape 16:9",
+    "Clip Duration": "Maximum Duration of Video Clips (seconds)",
+    "Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
+    "Audio Settings": "**Audio Settings**",
+    "Speech Synthesis": "Speech Synthesis Voice",
+    "Speech Region": "Region(:red[Required，[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Key": "API Key(:red[Required，[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Volume": "Speech Volume (1.0 represents 100%)",
+    "Speech Rate": "Speech Rate (1.0 means 1x speed)",
+    "Male": "Male",
+    "Female": "Female",
+    "Background Music": "Background Music",
+    "No Background Music": "No Background Music",
+    "Random Background Music": "Random Background Music",
+    "Custom Background Music": "Custom Background Music",
+    "Custom Background Music File": "Please enter the file path for custom background music:",
+    "Background Music Volume": "Background Music Volume (0.2 represents 20%, background music should not be too loud)",
+    "Subtitle Settings": "**Subtitle Settings**",
+    "Enable Subtitles": "Enable Subtitles (If unchecked, the settings below will not take effect)",
+    "Font": "Subtitle Font",
+    "Position": "Subtitle Position",
+    "Top": "Top",
+    "Center": "Center",
+    "Bottom": "Bottom (Recommended)",
+    "Custom": "Custom position (70, indicating 70% down from the top)",
+    "Font Size": "Subtitle Font Size",
+    "Font Color": "Subtitle Font Color",
+    "Stroke Color": "Subtitle Outline Color",
+    "Stroke Width": "Subtitle Outline Width",
+    "Generate Video": "Generate Video",
+    "Video Script and Subject Cannot Both Be Empty": "Video Subject and Video Script cannot both be empty",
+    "Generating Video": "Generating video, please wait...",
+    "Start Generating Video": "Start Generating Video",
+    "Video Generation Completed": "Video Generation Completed",
+    "Video Generation Failed": "Video Generation Failed",
+    "You can download the generated video from the following links": "You can download the generated video from the following links",
+    "Pexels API Key": "Pexels API Key ([Get API Key](https://www.pexels.com/api/))",
+    "Pixabay API Key": "Pixabay API Key ([Get API Key](https://pixabay.com/api/docs/#api_search_videos))",
+    "Basic Settings": "**Basic Settings** (:blue[Click to expand])",
+    "Language": "Language",
+    "LLM Provider": "LLM Provider",
+    "API Key": "API Key (:red[Required])",
+    "Base Url": "Base Url",
+    "Account ID": "Account ID (Get from Cloudflare dashboard)",
+    "Model Name": "Model Name",
+    "Please Enter the LLM API Key": "Please Enter the **LLM API Key**",
+    "Please Enter the Pexels API Key": "Please Enter the **Pexels API Key**",
+    "Please Enter the Pixabay API Key": "Please Enter the **Pixabay API Key**",
+    "Get Help": "If you need help, or have any questions, you can join discord for help: https://harryai.cc",
+    "Video Source": "Video Source",
+    "TikTok": "TikTok (TikTok support is coming soon)",
+    "Bilibili": "Bilibili (Bilibili support is coming soon)",
+    "Xiaohongshu": "Xiaohongshu (Xiaohongshu support is coming soon)",
+    "Local file": "Local file",
+    "Play Voice": "Play Voice",
+    "Voice Example": "This is an example text for testing speech synthesis",
+    "Synthesizing Voice": "Synthesizing voice, please wait...",
+    "TTS Provider": "Select the voice synthesis provider",
+    "TTS Servers": "TTS Servers",
+    "No voices available for the selected TTS server. Please select another server.": "No voices available for the selected TTS server. Please select another server.",
+    "SiliconFlow API Key": "SiliconFlow API Key [Click to get](https://cloud.siliconflow.cn/account/ak)",
+    "SiliconFlow TTS Settings": "SiliconFlow TTS Settings",
+    "Speed: Range [0.25, 4.0], default is 1.0": "Speed: Range [0.25, 4.0], default is 1.0",
+    "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0",
+    "Hide Log": "Hide Log",
+    "Hide Basic Settings": "Hide Basic Settings\n\nHidden, the basic settings panel will not be displayed on the page.\n\nIf you need to display it again, please set `hide_config = false` in `config.toml`",
+    "LLM Settings": "**LLM Settings**",
+    "Video Source Settings": "**Video Source Settings**"
+  }
+}
\ No newline at end of file
diff --git a/webui/i18n/pt.json b/webui/i18n/pt.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a9d47b8ee3053f2d73a9cff9315f4bb1d0dcea5
--- /dev/null
+++ b/webui/i18n/pt.json
@@ -0,0 +1,105 @@
+{
+  "Language": "Português Brasileiro",
+  "Translation": {
+    "Login Required": "Login Necessário",
+    "Please login to access settings": "Por favor, faça login para acessar as configurações",
+    "Username": "Nome de usuário",
+    "Password": "Senha",
+    "Login": "Entrar",
+    "Login Error": "Erro de Login",
+    "Incorrect username or password": "Nome de usuário ou senha incorretos",
+    "Please enter your username and password": "Por favor, digite seu nome de usuário e senha",
+    "Video Script Settings": "**Configurações do Roteiro do Vídeo**",
+    "Video Subject": "Tema do Vídeo (Forneça uma palavra-chave, :red[a IA irá gerar automaticamente] o roteiro do vídeo)",
+    "Script Language": "Idioma para Gerar o Roteiro do Vídeo (a IA irá gerar automaticamente com base no idioma do seu tema)",
+    "Generate Video Script and Keywords": "Clique para usar a IA para gerar o [Roteiro do Vídeo] e as [Palavras-chave do Vídeo] com base no **tema**",
+    "Auto Detect": "Detectar Automaticamente",
+    "Video Script": "Roteiro do Vídeo (:blue[① Opcional, gerado pela IA  ② Pontuação adequada ajuda na geração de legendas])",
+    "Generate Video Keywords": "Clique para usar a IA para gerar [Palavras-chave do Vídeo] com base no **roteiro**",
+    "Please Enter the Video Subject": "Por favor, insira o Roteiro do Vídeo primeiro",
+    "Generating Video Script and Keywords": "A IA está gerando o roteiro do vídeo e as palavras-chave...",
+    "Generating Video Keywords": "A IA está gerando as palavras-chave do vídeo...",
+    "Video Keywords": "Palavras-chave do Vídeo (:blue[① Opcional, gerado pela IA ② Use **vírgulas em inglês** para separar, somente em inglês])",
+    "Video Settings": "**Configurações do Vídeo**",
+    "Video Concat Mode": "Modo de Concatenação de Vídeo",
+    "Random": "Concatenação Aleatória (Recomendado)",
+    "Sequential": "Concatenação Sequencial",
+    "Video Transition Mode": "Modo de Transição de Vídeo",
+    "None": "Nenhuma Transição",
+    "Shuffle": "Transição Aleatória",
+    "FadeIn": "FadeIn",
+    "FadeOut": "FadeOut",
+    "SlideIn": "SlideIn",
+    "SlideOut": "SlideOut",
+    "Video Ratio": "Proporção do Vídeo",
+    "Portrait": "Retrato 9:16",
+    "Landscape": "Paisagem 16:9",
+    "Clip Duration": "Duração Máxima dos Clipes de Vídeo (segundos)",
+    "Number of Videos Generated Simultaneously": "Número de Vídeos Gerados Simultaneamente",
+    "Audio Settings": "**Configurações de Áudio**",
+    "Speech Synthesis": "Voz de Síntese de Fala",
+    "Speech Region": "Região(:red[Obrigatório，[Obter Região](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Key": "Chave da API(:red[Obrigatório，[Obter Chave da API](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Volume": "Volume da Fala (1.0 representa 100%)",
+    "Speech Rate": "Velocidade da Fala (1.0 significa velocidade 1x)",
+    "Male": "Masculino",
+    "Female": "Feminino",
+    "Background Music": "Música de Fundo",
+    "No Background Music": "Sem Música de Fundo",
+    "Random Background Music": "Música de Fundo Aleatória",
+    "Custom Background Music": "Música de Fundo Personalizada",
+    "Custom Background Music File": "Por favor, insira o caminho do arquivo para a música de fundo personalizada:",
+    "Background Music Volume": "Volume da Música de Fundo (0.2 representa 20%, a música de fundo não deve ser muito alta)",
+    "Subtitle Settings": "**Configurações de Legendas**",
+    "Enable Subtitles": "Ativar Legendas (Se desmarcado, as configurações abaixo não terão efeito)",
+    "Font": "Fonte da Legenda",
+    "Position": "Posição da Legenda",
+    "Top": "Superior",
+    "Center": "Centralizar",
+    "Bottom": "Inferior (Recomendado)",
+    "Custom": "Posição personalizada (70, indicando 70% abaixo do topo)",
+    "Font Size": "Tamanho da Fonte da Legenda",
+    "Font Color": "Cor da Fonte da Legenda",
+    "Stroke Color": "Cor do Contorno da Legenda",
+    "Stroke Width": "Largura do Contorno da Legenda",
+    "Generate Video": "Gerar Vídeo",
+    "Video Script and Subject Cannot Both Be Empty": "O Tema do Vídeo e o Roteiro do Vídeo não podem estar ambos vazios",
+    "Generating Video": "Gerando vídeo, por favor aguarde...",
+    "Start Generating Video": "Começar a Gerar Vídeo",
+    "Video Generation Completed": "Geração do Vídeo Concluída",
+    "Video Generation Failed": "Falha na Geração do Vídeo",
+    "You can download the generated video from the following links": "Você pode baixar o vídeo gerado a partir dos seguintes links",
+    "Basic Settings": "**Configurações Básicas** (:blue[Clique para expandir])",
+    "Language": "Idioma",
+    "Pexels API Key": "Chave da API do Pexels ([Obter Chave da API](https://www.pexels.com/api/))",
+    "Pixabay API Key": "Chave da API do Pixabay ([Obter Chave da API](https://pixabay.com/api/docs/#api_search_videos))",
+    "LLM Provider": "Provedor LLM",
+    "API Key": "Chave da API (:red[Obrigatório])",
+    "Base Url": "URL Base",
+    "Account ID": "ID da Conta (Obter no painel do Cloudflare)",
+    "Model Name": "Nome do Modelo",
+    "Please Enter the LLM API Key": "Por favor, insira a **Chave da API LLM**",
+    "Please Enter the Pexels API Key": "Por favor, insira a **Chave da API do Pexels**",
+    "Please Enter the Pixabay API Key": "Por favor, insira a **Chave da API do Pixabay**",
+    "Get Help": "Se precisar de ajuda ou tiver alguma dúvida, você pode entrar no discord para obter ajuda: https://harryai.cc",
+    "Video Source": "Fonte do Vídeo",
+    "TikTok": "TikTok (Suporte para TikTok em breve)",
+    "Bilibili": "Bilibili (Suporte para Bilibili em breve)",
+    "Xiaohongshu": "Xiaohongshu (Suporte para Xiaohongshu em breve)",
+    "Local file": "Arquivo local",
+    "Play Voice": "Reproduzir Voz",
+    "Voice Example": "Este é um exemplo de texto para testar a síntese de fala",
+    "Synthesizing Voice": "Sintetizando voz, por favor aguarde...",
+    "TTS Provider": "Selecione o provedor de síntese de voz",
+    "TTS Servers": "Servidores TTS",
+    "No voices available for the selected TTS server. Please select another server.": "Não há vozes disponíveis para o servidor TTS selecionado. Por favor, selecione outro servidor.",
+    "SiliconFlow API Key": "Chave API do SiliconFlow",
+    "SiliconFlow TTS Settings": "Configurações do SiliconFlow TTS",
+    "Speed: Range [0.25, 4.0], default is 1.0": "Velocidade: Intervalo [0.25, 4.0], o padrão é 1.0",
+    "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Volume: Usa a configuração de Volume de Fala, o padrão 1.0 corresponde ao ganho 0",
+    "Hide Log": "Ocultar Log",
+    "Hide Basic Settings": "Ocultar Configurações Básicas\n\nOculto, o painel de configurações básicas não será exibido na página.\n\nSe precisar exibi-lo novamente, defina `hide_config = false` em `config.toml`",
+    "LLM Settings": "**Configurações do LLM**",
+    "Video Source Settings": "**Configurações da Fonte do Vídeo**"
+  }
+}
\ No newline at end of file
diff --git a/webui/i18n/vi.json b/webui/i18n/vi.json
new file mode 100644
index 0000000000000000000000000000000000000000..c1a604b7fd5c6f00ac86a535b248621213cb3b8a
--- /dev/null
+++ b/webui/i18n/vi.json
@@ -0,0 +1,105 @@
+{
+  "Language": "Tiếng Việt",
+  "Translation": {
+    "Login Required": "Yêu cầu đăng nhập",
+    "Please login to access settings": "Vui lòng đăng nhập để truy cập cài đặt",
+    "Username": "Tên đăng nhập",
+    "Password": "Mật khẩu",
+    "Login": "Đăng nhập",
+    "Login Error": "Lỗi đăng nhập",
+    "Incorrect username or password": "Tên đăng nhập hoặc mật khẩu không chính xác",
+    "Please enter your username and password": "Vui lòng nhập tên đăng nhập và mật khẩu của bạn",
+    "Video Script Settings": "**Cài Đặt Kịch Bản Video**",
+    "Video Subject": "Chủ Đề Video (Cung cấp một từ khóa, :red[AI sẽ tự động tạo ra] kịch bản video)",
+    "Script Language": "Ngôn Ngữ cho Việc Tạo Kịch Bản Video (AI sẽ tự động xuất ra dựa trên ngôn ngữ của chủ đề của bạn)",
+    "Generate Video Script and Keywords": "Nhấn để sử dụng AI để tạo [Kịch Bản Video] và [Từ Khóa Video] dựa trên **chủ đề**",
+    "Auto Detect": "Tự Động Phát Hiện",
+    "Video Script": "Kịch Bản Video (:blue[① Tùy chọn, AI tạo ra  ② Dấu câu chính xác giúp việc tạo phụ đề)",
+    "Generate Video Keywords": "Nhấn để sử dụng AI để tạo [Từ Khóa Video] dựa trên **kịch bản**",
+    "Please Enter the Video Subject": "Vui lòng Nhập Kịch Bản Video Trước",
+    "Generating Video Script and Keywords": "AI đang tạo kịch bản video và từ khóa...",
+    "Generating Video Keywords": "AI đang tạo từ khóa video...",
+    "Video Keywords": "Từ Khóa Video (:blue[① Tùy chọn, AI tạo ra ② Sử dụng dấu phẩy **Tiếng Anh** để phân tách, chỉ sử dụng Tiếng Anh])",
+    "Video Settings": "**Cài Đặt Video**",
+    "Video Concat Mode": "Chế Độ Nối Video",
+    "Random": "Nối Ngẫu Nhiên (Được Khuyến Nghị)",
+    "Sequential": "Nối Theo Thứ Tự",
+    "Video Transition Mode": "Chế Độ Chuyển Đổi Video",
+    "None": "Không Có Chuyển Đổi",
+    "Shuffle": "Chuyển Đổi Ngẫu Nhiên",
+    "FadeIn": "FadeIn",
+    "FadeOut": "FadeOut",
+    "SlideIn": "SlideIn",
+    "SlideOut": "SlideOut",
+    "Video Ratio": "Tỷ Lệ Khung Hình Video",
+    "Portrait": "Dọc 9:16",
+    "Landscape": "Ngang 16:9",
+    "Clip Duration": "Thời Lượng Tối Đa Của Đoạn Video (giây)",
+    "Number of Videos Generated Simultaneously": "Số Video Được Tạo Ra Đồng Thời",
+    "Audio Settings": "**Cài Đặt Âm Thanh**",
+    "Speech Synthesis": "Giọng Đọc Văn Bản",
+    "Speech Region": "Vùng(:red[Bắt Buộc，[Lấy Vùng](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Key": "Khóa API(:red[Bắt Buộc，[Lấy Khóa API](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Volume": "Âm Lượng Giọng Đọc (1.0 đại diện cho 100%)",
+    "Speech Rate": "Tốc độ đọc (1.0 biểu thị tốc độ gốc)",
+    "Male": "Nam",
+    "Female": "Nữ",
+    "Background Music": "Âm Nhạc Nền",
+    "No Background Music": "Không Có Âm Nhạc Nền",
+    "Random Background Music": "Âm Nhạc Nền Ngẫu Nhiên",
+    "Custom Background Music": "Âm Nhạc Nền Tùy Chỉnh",
+    "Custom Background Music File": "Vui lòng nhập đường dẫn tệp cho âm nhạc nền tùy chỉnh:",
+    "Background Music Volume": "Âm Lượng Âm Nhạc Nền (0.2 đại diện cho 20%, âm nhạc nền không nên quá to)",
+    "Subtitle Settings": "**Cài Đặt Phụ Đề**",
+    "Enable Subtitles": "Bật Phụ Đề (Nếu không chọn, các cài đặt dưới đây sẽ không có hiệu lực)",
+    "Font": "Phông Chữ Phụ Đề",
+    "Position": "Vị Trí Phụ Đề",
+    "Top": "Trên",
+    "Center": "Giữa",
+    "Bottom": "Dưới (Được Khuyến Nghị)",
+    "Custom": "Vị trí tùy chỉnh (70, chỉ ra là cách đầu trang 70%)",
+    "Font Size": "Cỡ Chữ Phụ Đề",
+    "Font Color": "Màu Chữ Phụ Đề",
+    "Stroke Color": "Màu Viền Phụ Đề",
+    "Stroke Width": "Độ Rộng Viền Phụ Đề",
+    "Generate Video": "Tạo Video",
+    "Video Script and Subject Cannot Both Be Empty": "Chủ Đề Video và Kịch Bản Video không thể cùng trống",
+    "Generating Video": "Đang tạo video, vui lòng đợi...",
+    "Start Generating Video": "Bắt Đầu Tạo Video",
+    "Video Generation Completed": "Hoàn Tất Tạo Video",
+    "Video Generation Failed": "Tạo Video Thất Bại",
+    "You can download the generated video from the following links": "Bạn có thể tải video được tạo ra từ các liên kết sau",
+    "Basic Settings": "**Cài Đặt Cơ Bản** (:blue[Nhấp để mở rộng])",
+    "Language": "Ngôn Ngữ",
+    "Pexels API Key": "Khóa API Pexels ([Lấy Khóa API](https://www.pexels.com/api/))",
+    "Pixabay API Key": "Khóa API Pixabay ([Lấy Khóa API](https://pixabay.com/api/docs/#api_search_videos))",
+    "LLM Provider": "Nhà Cung Cấp LLM",
+    "API Key": "Khóa API (:red[Bắt Buộc])",
+    "Base Url": "Url Cơ Bản",
+    "Account ID": "ID Tài Khoản (Lấy từ bảng điều khiển Cloudflare)",
+    "Model Name": "Tên Mô Hình",
+    "Please Enter the LLM API Key": "Vui lòng Nhập **Khóa API LLM**",
+    "Please Enter the Pexels API Key": "Vui lòng Nhập **Khóa API Pexels**",
+    "Please Enter the Pixabay API Key": "Vui lòng Nhập **Khóa API Pixabay**",
+    "Get Help": "Nếu bạn cần giúp đỡ hoặc có bất kỳ câu hỏi nào, bạn có thể tham gia discord để được giúp đỡ: https://harryai.cc",
+    "Video Source": "Nguồn Video",
+    "TikTok": "TikTok (Hỗ trợ TikTok sắp ra mắt)",
+    "Bilibili": "Bilibili (Hỗ trợ Bilibili sắp ra mắt)",
+    "Xiaohongshu": "Xiaohongshu (Hỗ trợ Xiaohongshu sắp ra mắt)",
+    "Local file": "Tệp cục bộ",
+    "Play Voice": "Phát Giọng Nói",
+    "Voice Example": "Đây là văn bản mẫu để kiểm tra tổng hợp giọng nói",
+    "Synthesizing Voice": "Đang tổng hợp giọng nói, vui lòng đợi...",
+    "TTS Provider": "Chọn nhà cung cấp tổng hợp giọng nói",
+    "TTS Servers": "Máy chủ TTS",
+    "No voices available for the selected TTS server. Please select another server.": "Không có giọng nói nào cho máy chủ TTS đã chọn. Vui lòng chọn máy chủ khác.",
+    "SiliconFlow API Key": "Khóa API SiliconFlow",
+    "SiliconFlow TTS Settings": "Cài đặt SiliconFlow TTS",
+    "Speed: Range [0.25, 4.0], default is 1.0": "Tốc độ: Phạm vi [0.25, 4.0], mặc định là 1.0",
+    "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "Âm lượng: Sử dụng cài đặt Âm lượng Giọng nói, mặc định 1.0 tương ứng với tăng ích 0",
+    "Hide Log": "Ẩn Nhật Ký",
+    "Hide Basic Settings": "Ẩn Cài Đặt Cơ Bản\n\nẨn, thanh cài đặt cơ bản sẽ không hiển thị trên trang web.\n\nNếu bạn muốn hiển thị lại, vui lòng đặt `hide_config = false` trong `config.toml`",
+    "LLM Settings": "**Cài Đặt LLM**",
+    "Video Source Settings": "**Cài Đặt Nguồn Video**"
+  }
+}
\ No newline at end of file
diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb6057eb857ff10a7b8281c1bdf44995ab36a491
--- /dev/null
+++ b/webui/i18n/zh.json
@@ -0,0 +1,105 @@
+{
+  "Language": "简体中文",
+  "Translation": {
+    "Login Required": "需要登录",
+    "Please login to access settings": "请登录后访问配置设置 (:gray[默认用户名: admin, 密码: admin, 您可以在 config.toml 中修改])",
+    "Username": "用户名",
+    "Password": "密码",
+    "Login": "登录",
+    "Login Error": "登录错误",
+    "Incorrect username or password": "用户名或密码不正确",
+    "Please enter your username and password": "请输入用户名和密码",
+    "Video Script Settings": "**文案设置**",
+    "Video Subject": "视频主题（给定一个关键词，:red[AI自动生成]视频文案）",
+    "Script Language": "生成视频脚本的语言（一般情况AI会自动根据你输入的主题语言输出）",
+    "Generate Video Script and Keywords": "点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】",
+    "Auto Detect": "自动检测",
+    "Video Script": "视频文案（:blue[①可不填，使用AI生成  ②合理使用标点断句，有助于生成字幕]）",
+    "Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键词】",
+    "Please Enter the Video Subject": "请先填写视频文案",
+    "Generating Video Script and Keywords": "AI正在生成视频文案和关键词...",
+    "Generating Video Keywords": "AI正在生成视频关键词...",
+    "Video Keywords": "视频关键词（:blue[①可不填，使用AI生成 ②用**英文逗号**分隔，只支持英文]）",
+    "Video Settings": "**视频设置**",
+    "Video Concat Mode": "视频拼接模式",
+    "Random": "随机拼接（推荐）",
+    "Sequential": "顺序拼接",
+    "Video Transition Mode": "视频转场模式",
+    "None": "无转场",
+    "Shuffle": "随机转场",
+    "FadeIn": "渐入",
+    "FadeOut": "渐出",
+    "SlideIn": "滑动入",
+    "SlideOut": "滑动出",
+    "Video Ratio": "视频比例",
+    "Portrait": "竖屏 9:16（抖音视频）",
+    "Landscape": "横屏 16:9（西瓜视频）",
+    "Clip Duration": "视频片段最大时长(秒)（**不是视频总长度**，是指每个**合成片段**的长度）",
+    "Number of Videos Generated Simultaneously": "同时生成视频数量",
+    "Audio Settings": "**音频设置**",
+    "Speech Synthesis": "朗读声音（:red[**与文案语言保持一致**。注意：V2版效果更好，但是需要API KEY]）",
+    "Speech Region": "服务区域 (:red[必填，[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Key": "API Key (:red[必填，密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
+    "Speech Volume": "朗读音量（1.0表示100%）",
+    "Speech Rate": "朗读速度（1.0表示1倍速）",
+    "Male": "男性",
+    "Female": "女性",
+    "Background Music": "背景音乐",
+    "No Background Music": "无背景音乐",
+    "Random Background Music": "随机背景音乐",
+    "Custom Background Music": "自定义背景音乐",
+    "Custom Background Music File": "请输入自定义背景音乐的文件路径",
+    "Background Music Volume": "背景音乐音量（0.2表示20%，背景声音不宜过高）",
+    "Subtitle Settings": "**字幕设置**",
+    "Enable Subtitles": "启用字幕（若取消勾选，下面的设置都将不生效）",
+    "Font": "字幕字体",
+    "Position": "字幕位置",
+    "Top": "顶部",
+    "Center": "中间",
+    "Bottom": "底部（推荐）",
+    "Custom": "自定义位置（70，表示离顶部70%的位置）",
+    "Font Size": "字幕大小",
+    "Font Color": "字幕颜色",
+    "Stroke Color": "描边颜色",
+    "Stroke Width": "描边粗细",
+    "Generate Video": "生成视频",
+    "Video Script and Subject Cannot Both Be Empty": "视频主题 和 视频文案，不能同时为空",
+    "Generating Video": "正在生成视频，请稍候...",
+    "Start Generating Video": "开始生成视频",
+    "Video Generation Completed": "视频生成完成",
+    "Video Generation Failed": "视频生成失败",
+    "You can download the generated video from the following links": "你可以从以下链接下载生成的视频",
+    "Basic Settings": "**基础设置** (:blue[点击展开])",
+    "Language": "界面语言",
+    "Pexels API Key": "Pexels API Key ([点击获取](https://www.pexels.com/api/)) :red[推荐使用]",
+    "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos)) :red[可以不用配置，如果 Pexels 无法使用，再选择Pixabay]",
+    "LLM Provider": "大模型提供商",
+    "API Key": "API Key (:red[必填，需要到大模型提供商的后台申请])",
+    "Base Url": "Base Url (可选)",
+    "Account ID": "账户ID (Cloudflare的dash面板url中获取)",
+    "Model Name": "模型名称 (:blue[需要到大模型提供商的后台确认被授权的模型名称])",
+    "Please Enter the LLM API Key": "请先填写大模型 **API Key**",
+    "Please Enter the Pexels API Key": "请先填写 **Pexels API Key**",
+    "Please Enter the Pixabay API Key": "请先填写 **Pixabay API Key**",
+    "Get Help": "有任何问题或建议，可以加入 **微信群** 求助或讨论：https://harryai.cc",
+    "Video Source": "视频来源",
+    "TikTok": "抖音 (TikTok 支持中，敬请期待)",
+    "Bilibili": "哔哩哔哩 (Bilibili 支持中，敬请期待)",
+    "Xiaohongshu": "小红书 (Xiaohongshu 支持中，敬请期待)",
+    "Local file": "本地文件",
+    "Play Voice": "试听语音合成",
+    "Voice Example": "这是一段测试语音合成的示例文本",
+    "Synthesizing Voice": "语音合成中，请稍候...",
+    "TTS Provider": "语音合成提供商",
+    "TTS Servers": "TTS服务器",
+    "No voices available for the selected TTS server. Please select another server.": "当前选择的TTS服务器没有可用的声音，请选择其他服务器。",
+    "SiliconFlow API Key": "硅基流动API密钥 [点击获取](https://cloud.siliconflow.cn/account/ak)",
+    "SiliconFlow TTS Settings": "硅基流动TTS设置",
+    "Speed: Range [0.25, 4.0], default is 1.0": "语速范围 [0.25, 4.0]，默认值为1.0",
+    "Volume: Uses Speech Volume setting, default 1.0 maps to gain 0": "音量：使用朗读音量设置，默认值1.0对应增益0",
+    "Hide Log": "隐藏日志",
+    "Hide Basic Settings": "隐藏基础设置\n\n隐藏后，基础设置面板将不会显示在页面中。\n\n如需要再次显示，请在 `config.toml` 中设置 `hide_config = false`",
+    "LLM Settings": "**大模型设置**",
+    "Video Source Settings": "**视频源设置**"
+  }
+}
\ No newline at end of file