Futuresony commited on
Commit
4f467e8
Β·
verified Β·
1 Parent(s): 647e0dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import soundfile as sf
4
+ from huggingface_hub import InferenceClient
5
+
6
+ # Initialize Facebook MMS ASR model
7
+ asr_model = pipeline("automatic-speech-recognition", model="facebook/mms-1b-all")
8
+
9
+ # Initialize Facebook MMS TTS model
10
+ tts_model = pipeline("text-to-speech", model="facebook/mms-tts")
11
+
12
+ # Initialize the Chat Model (Gemma-2-9B or Futuresony.gguf)
13
+ chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf") # Change if needed
14
+
15
+ def asr_chat_tts(audio):
16
+ """
17
+ 1. Convert Speech to Text (ASR)
18
+ 2. Process text through Chat Model (LLM)
19
+ 3. Convert response to Speech (TTS)
20
+ """
21
+ # Step 1: Transcribe speech using Facebook MMS ASR
22
+ transcription = asr_model(audio)["text"]
23
+
24
+ # Step 2: Process text through the chat model
25
+ messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
26
+ messages.append({"role": "user", "content": transcription})
27
+
28
+ response = ""
29
+ for msg in chat_client.chat_completion(messages, max_tokens=512, stream=True):
30
+ token = msg.choices[0].delta.content
31
+ response += token
32
+
33
+ # Step 3: Convert response to speech using Facebook MMS TTS
34
+ speech = tts_model(response)
35
+ output_file = "generated_speech.wav"
36
+ sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
37
+
38
+ return transcription, response, output_file
39
+
40
+ # Gradio Interface
41
+ with gr.Blocks() as demo:
42
+ gr.Markdown("<h2 style='text-align: center;'>ASR β†’ Chatbot β†’ TTS</h2>")
43
+
44
+ with gr.Row():
45
+ audio_input = gr.Audio(source="microphone", type="filepath", label="🎀 Speak Here")
46
+ text_transcription = gr.Textbox(label="πŸ“ Transcription", interactive=False)
47
+ text_response = gr.Textbox(label="πŸ€– Chatbot Response", interactive=False)
48
+ audio_output = gr.Audio(label="πŸ”Š Generated Speech")
49
+
50
+ submit_button = gr.Button("Process Speech πŸ”„")
51
+
52
+ submit_button.click(fn=asr_chat_tts, inputs=[audio_input], outputs=[text_transcription, text_response, audio_output])
53
+
54
+ # Run the App
55
+ if __name__ == "__main__":
56
+ demo.launch()
57
+