import torch
import librosa
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
from gtts import gTTS
import gradio as gr
import spaces
from langdetect import detect

print("Using GPU for operations when available")

# Function to safely load pipeline within a GPU-decorated function
@spaces.GPU
def load_pipeline(model_name, **kwargs):
    try:
        device = 0 if torch.cuda.is_available() else "cpu"
        return pipeline(model=model_name, device=device, **kwargs)
    except Exception as e:
        print(f"Error loading {model_name} pipeline: {e}")
        return None

# Load Whisper model for speech recognition within a GPU-decorated function
@spaces.GPU
def load_whisper():
    try:
        device = 0 if torch.cuda.is_available() else "cpu"
        processor = WhisperProcessor.from_pretrained("openai/whisper-small")
        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
        return processor, model
    except Exception as e:
        print(f"Error loading Whisper model: {e}")
        return None, None

# Load sarvam-2b for text generation within a GPU-decorated function
@spaces.GPU
def load_sarvam():
    return load_pipeline('sarvamai/sarvam-2b-v0.5')

# Process audio input within a GPU-decorated function
@spaces.GPU
def process_audio_input(audio, whisper_processor, whisper_model):
    if whisper_processor is None or whisper_model is None:
        return "Error: Speech recognition model is not available. Please type your message instead."
    
    try:
        audio, sr = librosa.load(audio, sr=16000)
        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(whisper_model.device)
        predicted_ids = whisper_model.generate(input_features)
        transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
        return transcription
    except Exception as e:
        return f"Error processing audio: {str(e)}. Please type your message instead."

# Generate response within a GPU-decorated function
@spaces.GPU
def generate_response(transcription, sarvam_pipe):
    if sarvam_pipe is None:
        return "Error: Text generation model is not available."
    
    try:
        # Prepare the prompt
        prompt = f"Human: {transcription}\n\nAssistant:"
        
        # Generate response using the sarvam-2b model
        response = sarvam_pipe(prompt, max_length=200, num_return_sequences=1, do_sample=True, temperature=0.7)[0]['generated_text']
        
        # Extract the assistant's response
        assistant_response = response.split("Assistant:")[-1].strip()
        
        return assistant_response
    except Exception as e:
        return f"Error generating response: {str(e)}"

# Text-to-speech function
def text_to_speech(text, lang='hi'):
    try:
        # Use a better TTS engine for Indic languages
        if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
            tts = gTTS(text=text, lang=lang, tld='co.in')  # Use Indian TLD
        else:
            tts = gTTS(text=text, lang=lang)
        
        tts.save("response.mp3")
        return "response.mp3"
    except Exception as e:
        print(f"Error in text-to-speech: {str(e)}")
        return None

# Language detection function
def detect_language(text):
    lang_codes = {
        'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
        'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
        'ta': 'Tamil', 'te': 'Telugu', 'en': 'English'
    }
    
    try:
        detected_lang = detect(text)
        return detected_lang if detected_lang in lang_codes else 'en'
    except:
        # Fallback to simple script-based detection
        for code, lang in lang_codes.items():
            if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text):  # Devanagari script
                return 'hi'
        return 'en'  # Default to English if no Indic script is detected

@spaces.GPU
def indic_language_assistant(input_type, audio_input, text_input):
    try:
        # Load models within the GPU-decorated function
        whisper_processor, whisper_model = load_whisper()
        sarvam_pipe = load_sarvam()

        if input_type == "audio" and audio_input is not None:
            transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
        elif input_type == "text" and text_input:
            transcription = text_input
        else:
            return "Please provide either audio or text input.", "No input provided.", None

        response = generate_response(transcription, sarvam_pipe)
        lang = detect_language(response)
        audio_response = text_to_speech(response, lang)
        
        return transcription, response, audio_response
    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        return error_message, error_message, None

# Custom CSS
custom_css = """
body {
    background-color: #1a1a1a;
    color: #ffffff;
    font-family: Arial, sans-serif;
}

.container {
    max-width: 800px;
    margin: 0 auto;
    padding: 20px;
}

h1 {
    font-size: 2.5em;
    background: linear-gradient(45deg, #4a90e2, #f48fb1);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    margin-bottom: 10px;
}

h2 {
    color: #a0a0a0;
    font-weight: normal;
}

.task-container {
    display: flex;
    justify-content: space-between;
    flex-wrap: wrap;
    margin-top: 30px;
}

.task-card {
    background-color: #2a2a2a;
    border-radius: 10px;
    padding: 15px;
    margin: 10px 0;
    width: calc(50% - 10px);
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    transition: transform 0.3s ease;
}

.task-card:hover {
    transform: translateY(-5px);
}

.task-icon {
    font-size: 24px;
    margin-bottom: 10px;
}

.input-box {
    width: 100%;
    padding: 10px;
    border-radius: 20px;
    border: none;
    background-color: #333;
    color: #fff;
    margin-top: 20px;
}

.submit-btn {
    background-color: #4a90e2;
    color: white;
    border: none;
    padding: 10px 20px;
    border-radius: 20px;
    cursor: pointer;
    margin-top: 10px;
    transition: background-color 0.3s ease;
}

.submit-btn:hover {
    background-color: #3a7bd5;
}
"""

# Custom HTML
custom_html = """
<div class="container">
    <h1>Hello, User</h1>
    <h2>How can I help you today?</h2>
    <div class="task-container">
        <div class="task-card">
            <div class="task-icon">🎤</div>
            <p>Speak in any Indic language</p>
        </div>
        <div class="task-card">
            <div class="task-icon">⌨️</div>
            <p>Type in any Indic language</p>
        </div>
    </div>
</div>
"""

# Create Gradio interface
iface = gr.Interface(
    fn=indic_language_assistant,
    inputs=[
        gr.Radio(["audio", "text"], label="Input Type", value="audio"),
        gr.Audio(type="filepath", label="Speak (if audio input selected)"),
        gr.Textbox(label="Type your message (if text input selected)", elem_classes="input-box")
    ],
    outputs=[
        gr.Textbox(label="Transcription/Input"),
        gr.Textbox(label="Generated Response"),
        gr.Audio(label="Audio Response")
    ],
    title="Indic Language Virtual Assistant",
    description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio.",
    css=custom_css,
    elem_id="indic-assistant",
    theme="dark"
)

# Launch the app
iface.launch()