import gradio as gr import subprocess import os from huggingface_hub import InferenceClient # Initialize Chatbot Model (Futuresony.gguf) chat_client = InferenceClient("Futuresony/future_ai_12_10_2024.gguf") # Change if needed def asr_chat_tts(audio): """ 1. Convert Speech to Text using asr.py 2. Process text through Chat Model (Futuresony.gguf) 3. Convert response to Speech using tts.py """ # Step 1: Run ASR (Speech-to-Text) asr_output = subprocess.run(["python3", "asr.py", audio], capture_output=True, text=True) transcription = asr_output.stdout.strip() # Step 2: Process text through the chat model messages = [{"role": "system", "content": "You are a helpful AI assistant."}] messages.append({"role": "user", "content": transcription}) response = "" for msg in chat_client.chat_completion(messages, max_tokens=512, stream=True): token = msg.choices[0].delta.content response += token # Step 3: Run TTS (Text-to-Speech) tts_output_file = "generated_speech.wav" subprocess.run(["python3", "tts.py", response, tts_output_file]) return transcription, response, tts_output_file # Gradio Interface with gr.Blocks() as demo: gr.Markdown("

ASR → Chatbot → TTS

") with gr.Row(): audio_input = gr.Audio(source="microphone", type="filepath", label="🎤 Speak Here") text_transcription = gr.Textbox(label="📝 Transcription", interactive=False) text_response = gr.Textbox(label="🤖 Chatbot Response", interactive=False) audio_output = gr.Audio(label="🔊 Generated Speech") submit_button = gr.Button("Process Speech 🔄") submit_button.click(fn=asr_chat_tts, inputs=[audio_input], outputs=[text_transcription, text_response, audio_output]) # Run the App if __name__ == "__main__": demo.launch()