Add speed adjustment feature to audio generation in app.py, including UI slider for playback speed control.
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from promptic import llm
|
|
15 |
from pydantic import BaseModel, ValidationError
|
16 |
from pypdf import PdfReader
|
17 |
from tenacity import retry, retry_if_exception_type
|
|
|
18 |
|
19 |
# Define multiple sets of instruction templates
|
20 |
INSTRUCTION_TEMPLATES = {
|
@@ -576,6 +577,7 @@ def generate_audio(
|
|
576 |
user_feedback: str = None,
|
577 |
original_text: str = None,
|
578 |
debug = False,
|
|
|
579 |
) -> tuple:
|
580 |
# Validate API Key
|
581 |
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
@@ -677,6 +679,17 @@ def generate_audio(
|
|
677 |
temporary_file.write(audio)
|
678 |
temporary_file.close()
|
679 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
680 |
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
681 |
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
682 |
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
@@ -782,6 +795,14 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
782 |
placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
|
783 |
info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
|
784 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
785 |
|
786 |
with gr.Column(scale=3):
|
787 |
template_dropdown = gr.Dropdown(
|
@@ -861,6 +882,7 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
861 |
prelude_dialog, podcast_dialog_instructions,
|
862 |
edited_transcript, # placeholder for edited_transcript
|
863 |
user_feedback, # placeholder for user_feedback
|
|
|
864 |
],
|
865 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
866 |
).then(
|
@@ -880,7 +902,8 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
880 |
fn=lambda use_edit, edit, *args: validate_and_generate_audio(
|
881 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
882 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
883 |
-
*args[12:]
|
|
|
884 |
),
|
885 |
inputs=[
|
886 |
use_edited_transcript, edited_transcript,
|
@@ -888,7 +911,8 @@ with gr.Blocks(title="PDF to Audio", css="""
|
|
888 |
speaker_1_voice, speaker_2_voice, api_base,
|
889 |
intro_instructions, text_instructions, scratch_pad_instructions,
|
890 |
prelude_dialog, podcast_dialog_instructions,
|
891 |
-
user_feedback, original_text_output
|
|
|
892 |
],
|
893 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
894 |
).then(
|
|
|
15 |
from pydantic import BaseModel, ValidationError
|
16 |
from pypdf import PdfReader
|
17 |
from tenacity import retry, retry_if_exception_type
|
18 |
+
from pydub import AudioSegment
|
19 |
|
20 |
# Define multiple sets of instruction templates
|
21 |
INSTRUCTION_TEMPLATES = {
|
|
|
577 |
user_feedback: str = None,
|
578 |
original_text: str = None,
|
579 |
debug = False,
|
580 |
+
speed: float = 1.0, # 追加
|
581 |
) -> tuple:
|
582 |
# Validate API Key
|
583 |
if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
|
|
|
679 |
temporary_file.write(audio)
|
680 |
temporary_file.close()
|
681 |
|
682 |
+
# ここから再生速度変更処理
|
683 |
+
if speed != 1.0:
|
684 |
+
# pydubでmp3を読み込み、速度変更
|
685 |
+
sound = AudioSegment.from_file(temporary_file.name, format="mp3")
|
686 |
+
# 再生速度変更(ピッチはそのまま)
|
687 |
+
sound = sound._spawn(sound.raw_data, overrides={
|
688 |
+
"frame_rate": int(sound.frame_rate * speed)
|
689 |
+
}).set_frame_rate(sound.frame_rate)
|
690 |
+
# 上書き保存
|
691 |
+
sound.export(temporary_file.name, format="mp3")
|
692 |
+
|
693 |
# Delete any files in the temp directory that end with .mp3 and are over a day old
|
694 |
for file in glob.glob(f"{temporary_directory}*.mp3"):
|
695 |
if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
|
|
|
795 |
placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
|
796 |
info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
|
797 |
)
|
798 |
+
speed_slider = gr.Slider(
|
799 |
+
minimum=0.5,
|
800 |
+
maximum=2.0,
|
801 |
+
value=1.0,
|
802 |
+
step=0.05,
|
803 |
+
label="再生速度 (0.5x~2.0x)",
|
804 |
+
info="音声の再生速度を調整できます。デフォルトは1.0(等倍)です。"
|
805 |
+
)
|
806 |
|
807 |
with gr.Column(scale=3):
|
808 |
template_dropdown = gr.Dropdown(
|
|
|
882 |
prelude_dialog, podcast_dialog_instructions,
|
883 |
edited_transcript, # placeholder for edited_transcript
|
884 |
user_feedback, # placeholder for user_feedback
|
885 |
+
speed_slider, # 追加
|
886 |
],
|
887 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
888 |
).then(
|
|
|
902 |
fn=lambda use_edit, edit, *args: validate_and_generate_audio(
|
903 |
*args[:12], # All inputs up to podcast_dialog_instructions
|
904 |
edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
|
905 |
+
*args[12:],
|
906 |
+
speed_slider,
|
907 |
),
|
908 |
inputs=[
|
909 |
use_edited_transcript, edited_transcript,
|
|
|
911 |
speaker_1_voice, speaker_2_voice, api_base,
|
912 |
intro_instructions, text_instructions, scratch_pad_instructions,
|
913 |
prelude_dialog, podcast_dialog_instructions,
|
914 |
+
user_feedback, original_text_output,
|
915 |
+
speed_slider,
|
916 |
],
|
917 |
outputs=[audio_output, transcript_output, original_text_output, error_output]
|
918 |
).then(
|