matsuap commited on
Commit
73cd951
·
1 Parent(s): 63bca4f

Add speed adjustment feature to audio generation in app.py, including UI slider for playback speed control.

Browse files
Files changed (1) hide show
  1. app.py +26 -2
app.py CHANGED
@@ -15,6 +15,7 @@ from promptic import llm
15
  from pydantic import BaseModel, ValidationError
16
  from pypdf import PdfReader
17
  from tenacity import retry, retry_if_exception_type
 
18
 
19
  # Define multiple sets of instruction templates
20
  INSTRUCTION_TEMPLATES = {
@@ -576,6 +577,7 @@ def generate_audio(
576
  user_feedback: str = None,
577
  original_text: str = None,
578
  debug = False,
 
579
  ) -> tuple:
580
  # Validate API Key
581
  if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
@@ -677,6 +679,17 @@ def generate_audio(
677
  temporary_file.write(audio)
678
  temporary_file.close()
679
 
 
 
 
 
 
 
 
 
 
 
 
680
  # Delete any files in the temp directory that end with .mp3 and are over a day old
681
  for file in glob.glob(f"{temporary_directory}*.mp3"):
682
  if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
@@ -782,6 +795,14 @@ with gr.Blocks(title="PDF to Audio", css="""
782
  placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
783
  info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
784
  )
 
 
 
 
 
 
 
 
785
 
786
  with gr.Column(scale=3):
787
  template_dropdown = gr.Dropdown(
@@ -861,6 +882,7 @@ with gr.Blocks(title="PDF to Audio", css="""
861
  prelude_dialog, podcast_dialog_instructions,
862
  edited_transcript, # placeholder for edited_transcript
863
  user_feedback, # placeholder for user_feedback
 
864
  ],
865
  outputs=[audio_output, transcript_output, original_text_output, error_output]
866
  ).then(
@@ -880,7 +902,8 @@ with gr.Blocks(title="PDF to Audio", css="""
880
  fn=lambda use_edit, edit, *args: validate_and_generate_audio(
881
  *args[:12], # All inputs up to podcast_dialog_instructions
882
  edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
883
- *args[12:] # user_feedback and original_text_output
 
884
  ),
885
  inputs=[
886
  use_edited_transcript, edited_transcript,
@@ -888,7 +911,8 @@ with gr.Blocks(title="PDF to Audio", css="""
888
  speaker_1_voice, speaker_2_voice, api_base,
889
  intro_instructions, text_instructions, scratch_pad_instructions,
890
  prelude_dialog, podcast_dialog_instructions,
891
- user_feedback, original_text_output
 
892
  ],
893
  outputs=[audio_output, transcript_output, original_text_output, error_output]
894
  ).then(
 
15
  from pydantic import BaseModel, ValidationError
16
  from pypdf import PdfReader
17
  from tenacity import retry, retry_if_exception_type
18
+ from pydub import AudioSegment
19
 
20
  # Define multiple sets of instruction templates
21
  INSTRUCTION_TEMPLATES = {
 
577
  user_feedback: str = None,
578
  original_text: str = None,
579
  debug = False,
580
+ speed: float = 1.0, # 追加
581
  ) -> tuple:
582
  # Validate API Key
583
  if not os.getenv("OPENAI_API_KEY") and not openai_api_key:
 
679
  temporary_file.write(audio)
680
  temporary_file.close()
681
 
682
+ # ここから再生速度変更処理
683
+ if speed != 1.0:
684
+ # pydubでmp3を読み込み、速度変更
685
+ sound = AudioSegment.from_file(temporary_file.name, format="mp3")
686
+ # 再生速度変更(ピッチはそのまま)
687
+ sound = sound._spawn(sound.raw_data, overrides={
688
+ "frame_rate": int(sound.frame_rate * speed)
689
+ }).set_frame_rate(sound.frame_rate)
690
+ # 上書き保存
691
+ sound.export(temporary_file.name, format="mp3")
692
+
693
  # Delete any files in the temp directory that end with .mp3 and are over a day old
694
  for file in glob.glob(f"{temporary_directory}*.mp3"):
695
  if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
 
795
  placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
796
  info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
797
  )
798
+ speed_slider = gr.Slider(
799
+ minimum=0.5,
800
+ maximum=2.0,
801
+ value=1.0,
802
+ step=0.05,
803
+ label="再生速度 (0.5x~2.0x)",
804
+ info="音声の再生速度を調整できます。デフォルトは1.0(等倍)です。"
805
+ )
806
 
807
  with gr.Column(scale=3):
808
  template_dropdown = gr.Dropdown(
 
882
  prelude_dialog, podcast_dialog_instructions,
883
  edited_transcript, # placeholder for edited_transcript
884
  user_feedback, # placeholder for user_feedback
885
+ speed_slider, # 追加
886
  ],
887
  outputs=[audio_output, transcript_output, original_text_output, error_output]
888
  ).then(
 
902
  fn=lambda use_edit, edit, *args: validate_and_generate_audio(
903
  *args[:12], # All inputs up to podcast_dialog_instructions
904
  edit if use_edit else "", # Use edited transcript if checkbox is checked, otherwise empty string
905
+ *args[12:],
906
+ speed_slider,
907
  ),
908
  inputs=[
909
  use_edited_transcript, edited_transcript,
 
911
  speaker_1_voice, speaker_2_voice, api_base,
912
  intro_instructions, text_instructions, scratch_pad_instructions,
913
  prelude_dialog, podcast_dialog_instructions,
914
+ user_feedback, original_text_output,
915
+ speed_slider,
916
  ],
917
  outputs=[audio_output, transcript_output, original_text_output, error_output]
918
  ).then(