import transformers
import librosa
from transformers import pipeline
from gtts import gTTS
import os
import gradio as gr
import torch
import spaces

# Function to safely load pipeline
def load_pipeline(model_name, **kwargs):
    try:
        return transformers.pipeline(model=model_name, **kwargs)
    except Exception as e:
        print(f"Error loading {model_name} pipeline: {e}")
        return None

# Load Shuka v1 for speech recognition
@spaces.GPU
def load_shuka():
    try:
        return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
    except Exception as e:
        print(f"Error loading Shuka v1: {e}")
        return None

# Load sarvam-2b for text generation
@spaces.GPU
def load_sarvam():
    try:
        return load_pipeline('sarvamai/sarvam-2b-v0.5')
    except Exception as e:
        print(f"Error loading sarvam-2b: {e}")
        return None

# Attempt to load models
shuka_pipe = load_shuka()
sarvam_pipe = load_sarvam()

def process_audio_input(audio):
    if shuka_pipe is None:
        return "Error: Shuka v1 model is not available. Please type your message instead."
    
    try:
        audio, sr = librosa.load(audio, sr=16000)
        turns = [
            {'role': 'system', 'content': 'Respond naturally and informatively.'},
            {'role': 'user', 'content': '<|audio|>'}
        ]
        result = shuka_pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
        return result[0]['generated_text']
    except Exception as e:
        return f"Error processing audio: {str(e)}. Please type your message instead."

def generate_response(text_input):
    if sarvam_pipe is None:
        return "Error: sarvam-2b model is not available. The assistant cannot generate responses at this time."
    
    try:
        response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
        return response
    except Exception as e:
        return f"Error generating response: {str(e)}"

def text_to_speech(text, lang='hi'):
    try:
        tts = gTTS(text=text, lang=lang)
        tts.save("response.mp3")
        return "response.mp3"
    except Exception as e:
        print(f"Error in text-to-speech: {str(e)}")
        return None

def detect_language(text):
    lang_codes = {
        'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
        'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
        'ta': 'Tamil', 'te': 'Telugu', 'en': 'English'
    }
    
    for code, lang in lang_codes.items():
        if any(word in text for word in ['नमस्ते', 'હેલો', 'ನಮಸ್ಕಾರ', 'ഹലോ', 'नमस्कार', 'ਸਤ ਸ੍ਰੀ ਅਕਾਲ', 'வணக்கம்', 'నమస్కారం']):
            return 'hi'  # Default to Hindi for simplicity
    return 'en'  # Default to English if no Indic script is detected

def indic_language_assistant(input_type, audio_input, text_input):
    try:
        if input_type == "audio" and audio_input is not None:
            transcription = process_audio_input(audio_input)
        elif input_type == "text" and text_input:
            transcription = text_input
        else:
            return "Please provide either audio or text input.", "No input provided.", None

        response = generate_response(transcription)
        lang = detect_language(response)
        audio_response = text_to_speech(response, lang)
        
        return transcription, response, audio_response
    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        return error_message, error_message, None

# Create Gradio interface
iface = gr.Interface(
    fn=indic_language_assistant,
    inputs=[
        gr.Radio(["audio", "text"], label="Input Type", value="audio"),
        gr.Audio(source="microphone", type="filepath", label="Speak (if audio input selected)"),
        gr.Textbox(label="Type your message (if text input selected)")
    ],
    outputs=[
        gr.Textbox(label="Transcription/Input"),
        gr.Textbox(label="Generated Response"),
        gr.Audio(label="Audio Response")
    ],
    title="Indic Language Virtual Assistant",
    description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio."
)

# Launch the app
iface.launch()