matsuap commited on
Commit
63bca4f
·
1 Parent(s): e22b652

Refactor app.py to remove unused functions and comments, update UI labels to Japanese, and reorganize input sections for better clarity.

Browse files
Files changed (1) hide show
  1. app.py +58 -92
app.py CHANGED
@@ -16,19 +16,6 @@ from pydantic import BaseModel, ValidationError
16
  from pypdf import PdfReader
17
  from tenacity import retry, retry_if_exception_type
18
 
19
- import re
20
-
21
- def read_readme():
22
- readme_path = Path("README.md")
23
- if readme_path.exists():
24
- with open(readme_path, "r") as file:
25
- content = file.read()
26
- # Use regex to remove metadata enclosed in -- ... --
27
- content = re.sub(r'--.*?--', '', content, flags=re.DOTALL)
28
- return content
29
- else:
30
- return "README.md not found. Please check the repository for more information."
31
-
32
  # Define multiple sets of instruction templates
33
  INSTRUCTION_TEMPLATES = {
34
  ################# PODCAST ##################
@@ -655,16 +642,6 @@ def generate_audio(
655
  edited_transcript=edited_transcript_processed,
656
  user_feedback=user_feedback_processed
657
  )
658
- # llm_output = generate_dialogue(
659
- # '本ガイドブックは、政府情報システム開発におけるアジャイル開発の適用を支援するために用意されたものです。',
660
- # intro_instructions='',
661
- # text_instructions='',
662
- # scratch_pad_instructions='',
663
- # prelude_dialog='',
664
- # podcast_dialog_instructions='',
665
- # edited_transcript='',
666
- # user_feedback=''
667
- # )
668
  print('llm_output:', llm_output)
669
 
670
  # Generate audio from the transcript
@@ -763,108 +740,101 @@ with gr.Blocks(title="PDF to Audio", css="""
763
  """) as demo:
764
 
765
  with gr.Row(elem_id="header"):
766
- with gr.Column(scale=4):
767
- gr.Markdown("# Convert PDFs into an audio podcast, lecture, summary and others\n\nFirst, upload one or more PDFs, select options, then push Generate Audio.\n\nYou can also select a variety of custom option and direct the way the result is generated.", elem_id="title")
768
- with gr.Column(scale=1):
769
- gr.HTML('''
770
- <div id="logo_container">
771
- <img src="https://huggingface.co/spaces/lamm-mit/PDF2Audio/resolve/main/logo.png" id="logo_image" alt="Logo">
772
- </div>
773
- ''')
774
  #gr.Markdown("")
775
- submit_btn = gr.Button("Generate Audio", elem_id="submit_btn")
776
 
777
  with gr.Row(elem_id="main_container"):
778
  with gr.Column(scale=2):
779
- files = gr.Files(label="PDFs", file_types=[], )
780
 
781
  openai_api_key = gr.Textbox(
782
- label="OpenAI API Key",
783
  visible=True, # Always show the API key field
784
- placeholder="Enter your OpenAI API Key here...",
785
  type="password" # Hide the API key input
786
  )
787
  text_model = gr.Dropdown(
788
- label="Text Generation Model",
789
  choices=STANDARD_TEXT_MODELS,
790
  value="o1-preview-2024-09-12", #"gpt-4o-mini",
791
- info="Select the model to generate the dialogue text.",
792
  )
793
  audio_model = gr.Dropdown(
794
- label="Audio Generation Model",
795
  choices=STANDARD_AUDIO_MODELS,
796
  value="tts-1",
797
- info="Select the model to generate the audio.",
798
  )
799
  speaker_1_voice = gr.Dropdown(
800
- label="Speaker 1 Voice",
801
  choices=STANDARD_VOICES,
802
  value="alloy",
803
- info="Select the voice for Speaker 1.",
804
  )
805
  speaker_2_voice = gr.Dropdown(
806
- label="Speaker 2 Voice",
807
  choices=STANDARD_VOICES,
808
  value="echo",
809
- info="Select the voice for Speaker 2.",
810
  )
811
  api_base = gr.Textbox(
812
- label="Custom API Base",
813
- placeholder="Enter custom API base URL if using a custom/local model...",
814
- info="If you are using a custom or local model, provide the API base URL here, e.g.: http://localhost:8080/v1 for llama.cpp REST server.",
815
  )
816
 
817
  with gr.Column(scale=3):
818
  template_dropdown = gr.Dropdown(
819
- label="Instruction Template",
820
  choices=list(INSTRUCTION_TEMPLATES.keys()),
821
  value="podcast",
822
- info="Select the instruction template to use. You can also edit any of the fields for more tailored results.",
823
- )
824
- intro_instructions = gr.Textbox(
825
- label="Intro Instructions",
826
- lines=10,
827
- value=INSTRUCTION_TEMPLATES["podcast"]["intro"],
828
- info="Provide the introductory instructions for generating the dialogue.",
829
- )
830
- text_instructions = gr.Textbox(
831
- label="Standard Text Analysis Instructions",
832
- lines=10,
833
- placeholder="Enter text analysis instructions...",
834
- value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
835
- info="Provide the instructions for analyzing the raw data and text.",
836
- )
837
- scratch_pad_instructions = gr.Textbox(
838
- label="Scratch Pad Instructions",
839
- lines=15,
840
- value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
841
- info="Provide the scratch pad instructions for brainstorming presentation/dialogue content.",
842
  )
843
- prelude_dialog = gr.Textbox(
844
- label="Prelude Dialog",
845
- lines=5,
846
- value=INSTRUCTION_TEMPLATES["podcast"]["prelude"],
847
- info="Provide the prelude instructions before the presentation/dialogue is developed.",
848
- )
849
- podcast_dialog_instructions = gr.Textbox(
850
- label="Podcast Dialog Instructions",
851
- lines=20,
852
- value=INSTRUCTION_TEMPLATES["podcast"]["dialog"],
853
- info="Provide the instructions for generating the presentation or podcast dialogue.",
854
- )
855
-
856
- audio_output = gr.Audio(label="Audio", format="mp3", interactive=False, autoplay=False)
857
- transcript_output = gr.Textbox(label="Transcript", lines=20, show_copy_button=True)
858
- original_text_output = gr.Textbox(label="Original Text", lines=10, visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
859
  error_output = gr.Textbox(visible=False) # Hidden textbox to store error message
860
 
861
- use_edited_transcript = gr.Checkbox(label="Use Edited Transcript (check if you want to make edits to the initially generated transcript)", value=False)
862
- edited_transcript = gr.Textbox(label="Edit Transcript Here. E.g., mark edits in the text with clear instructions. E.g., '[ADD DEFINITION OF MATERIOMICS]'.", lines=20, visible=False,
863
  show_copy_button=True, interactive=False)
864
 
865
- user_feedback = gr.Textbox(label="Provide Feedback or Notes", lines=10, #placeholder="Enter your feedback or notes here..."
866
- )
867
- regenerate_btn = gr.Button("Regenerate Audio with Edits and Feedback")
868
  # Function to update the interactive state of edited_transcript
869
  def update_edit_box(checkbox_value):
870
  return gr.update(interactive=checkbox_value, lines=20 if checkbox_value else 20, visible=True if checkbox_value else False)
@@ -933,10 +903,6 @@ with gr.Blocks(title="PDF to Audio", css="""
933
  inputs=[error_output],
934
  outputs=[]
935
  )
936
-
937
- # Add README content at the bottom
938
- gr.Markdown("---") # Horizontal line to separate the interface from README
939
- gr.Markdown(read_readme())
940
 
941
  # Enable queueing for better performance
942
  demo.queue(max_size=20, default_concurrency_limit=32)
 
16
  from pypdf import PdfReader
17
  from tenacity import retry, retry_if_exception_type
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # Define multiple sets of instruction templates
20
  INSTRUCTION_TEMPLATES = {
21
  ################# PODCAST ##################
 
642
  edited_transcript=edited_transcript_processed,
643
  user_feedback=user_feedback_processed
644
  )
 
 
 
 
 
 
 
 
 
 
645
  print('llm_output:', llm_output)
646
 
647
  # Generate audio from the transcript
 
740
  """) as demo:
741
 
742
  with gr.Row(elem_id="header"):
743
+ gr.Markdown("# PDFを音声ポッドキャスト・講義・要約などに変換\n\nまず、1つ以上のPDFをアップロードし、オプションを選択してから「音声を生成」ボタンを押してください。\n\nカスタムオプションも選択でき、生成方法を細かく指示できます。", elem_id="title")
 
 
 
 
 
 
 
744
  #gr.Markdown("")
 
745
 
746
  with gr.Row(elem_id="main_container"):
747
  with gr.Column(scale=2):
748
+ files = gr.Files(label="PDFファイル", file_types=[], )
749
 
750
  openai_api_key = gr.Textbox(
751
+ label="OpenAI APIキー",
752
  visible=True, # Always show the API key field
753
+ placeholder="ここにOpenAI APIキーを入力してください...",
754
  type="password" # Hide the API key input
755
  )
756
  text_model = gr.Dropdown(
757
+ label="テキスト生成モデル",
758
  choices=STANDARD_TEXT_MODELS,
759
  value="o1-preview-2024-09-12", #"gpt-4o-mini",
760
+ info="対話テキストを生成するモデルを選択してください。",
761
  )
762
  audio_model = gr.Dropdown(
763
+ label="音声生成モデル",
764
  choices=STANDARD_AUDIO_MODELS,
765
  value="tts-1",
766
+ info="音声を生成するモデルを選択してください。",
767
  )
768
  speaker_1_voice = gr.Dropdown(
769
+ label="話者1の声",
770
  choices=STANDARD_VOICES,
771
  value="alloy",
772
+ info="話者1の音声を選択してください。",
773
  )
774
  speaker_2_voice = gr.Dropdown(
775
+ label="話者2の声",
776
  choices=STANDARD_VOICES,
777
  value="echo",
778
+ info="話者2の音声を選択してください。",
779
  )
780
  api_base = gr.Textbox(
781
+ label="カスタムAPIベースURL",
782
+ placeholder="カスタム/ローカルモデルを使う場合はAPIベースURLを入力してください...",
783
+ info="カスタムやローカルモデルを使う場合、ここにAPIベースURLを入力してください。例: http://localhost:8080/v1 (llama.cpp RESTサーバー用)",
784
  )
785
 
786
  with gr.Column(scale=3):
787
  template_dropdown = gr.Dropdown(
788
+ label="指示テンプレート",
789
  choices=list(INSTRUCTION_TEMPLATES.keys()),
790
  value="podcast",
791
+ info="使用する指示テンプレートを選択してください。各フィールドを編集してカスタマイズも可能です。",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792
  )
793
+ with gr.Accordion("プロンプト", open=False):
794
+ intro_instructions = gr.Textbox(
795
+ label="イントロ指示",
796
+ lines=10,
797
+ value=INSTRUCTION_TEMPLATES["podcast"]["intro"],
798
+ info="対話生成のためのイントロ指示を入力してください。",
799
+ )
800
+ text_instructions = gr.Textbox(
801
+ label="テキスト分析指示",
802
+ lines=10,
803
+ placeholder="テキスト分析の指示を入力してください...",
804
+ value=INSTRUCTION_TEMPLATES["podcast"]["text_instructions"],
805
+ info="生データやテキストの分析指示を入力してください。",
806
+ )
807
+ scratch_pad_instructions = gr.Textbox(
808
+ label="ブレインストーミング指示",
809
+ lines=15,
810
+ value=INSTRUCTION_TEMPLATES["podcast"]["scratch_pad"],
811
+ info="プレゼン/対話内容のブレインストーミング指示を入力してください。",
812
+ )
813
+ prelude_dialog = gr.Textbox(
814
+ label="プレリュード指示",
815
+ lines=5,
816
+ value=INSTRUCTION_TEMPLATES["podcast"]["prelude"],
817
+ info="プレゼン/対話作成前のプレリュード指示を入力してください。",
818
+ )
819
+ podcast_dialog_instructions = gr.Textbox(
820
+ label="ポッドキャスト対話指示",
821
+ lines=20,
822
+ value=INSTRUCTION_TEMPLATES["podcast"]["dialog"],
823
+ info="プレゼンやポッドキャスト対話生成の指示を入力してください。",
824
+ )
825
+
826
+ submit_btn = gr.Button("音声を生成", elem_id="submit_btn", variant="primary")
827
+ audio_output = gr.Audio(label="音声", format="mp3", interactive=False, autoplay=False)
828
+ transcript_output = gr.Textbox(label="書き起こしテキスト", lines=20, show_copy_button=True)
829
+ original_text_output = gr.Textbox(label="元テキスト", lines=10, visible=False)
830
  error_output = gr.Textbox(visible=False) # Hidden textbox to store error message
831
 
832
+ use_edited_transcript = gr.Checkbox(label="書き起こしテキストを編集する(チェックすると編集欄が有効化)", value=False)
833
+ edited_transcript = gr.Textbox(label="編集用テキスト欄(例: '[ここに定義を追加]' など明確な指示を記載)", lines=20, visible=False,
834
  show_copy_button=True, interactive=False)
835
 
836
+ user_feedback = gr.Textbox(label="フィードバック・メモ", lines=10)
837
+ regenerate_btn = gr.Button("編集・フィードバックで再生成")
 
838
  # Function to update the interactive state of edited_transcript
839
  def update_edit_box(checkbox_value):
840
  return gr.update(interactive=checkbox_value, lines=20 if checkbox_value else 20, visible=True if checkbox_value else False)
 
903
  inputs=[error_output],
904
  outputs=[]
905
  )
 
 
 
 
906
 
907
  # Enable queueing for better performance
908
  demo.queue(max_size=20, default_concurrency_limit=32)