import gradio as gr from asr import transcribe_audio # Your ASR function from lid import detect_language # Your Language Identification function from tts import text_to_speech # Your TTS function from transformers import pipeline # Load the text generation model (adjust this based on your model type) text_generator = pipeline("text-generation", model="Futuresony/12_10_2024.gguf") # Function to process input def process_input(input_text=None, audio=None): if audio: # If audio is provided, convert it to text input_text = transcribe_audio(audio) if not input_text: return "No input provided", None # Detect language lang = detect_language(input_text) # Generate text using the model output_text = text_generator(input_text, max_length=100, do_sample=True)[0]['generated_text'] # Convert output text to speech output_audio = text_to_speech(output_text, lang) return output_text, output_audio # Create Gradio interface interface = gr.Interface( fn=process_input, inputs=[ gr.Textbox(label="Enter Text", placeholder="Type here..."), gr.Audio(source="microphone", type="filepath", label="Record Audio") ], outputs=[ gr.Textbox(label="Generated Text"), gr.Audio(label="Generated Speech") ], title="Speech-to-Text AI Chat", description="Input text or record audio, and the AI will respond with generated text and speech." ) # Run the demo interface.launch()