import streamlit as st
import torch
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
from transformers import AutoTokenizer, AutoModelForCausalLM
from diffusers import StableDiffusionPipeline
from rouge_score import rouge_scorer
from PIL import Image
import tempfile
import os
import time
import clip
import torchvision.transforms as transforms

# Use CUDA if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load translation model (Tamil to English)
translator_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt").to(device)
translator_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
translator_tokenizer.src_lang = "ta_IN"

# Load GPT-2 for creative text generation
gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Load a lightweight image generation model
pipe = StableDiffusionPipeline.from_pretrained(
    "OFA-Sys/small-stable-diffusion-v0",
    torch_dtype=torch.float32,
    use_auth_token=os.getenv("HF_TOKEN")  # Set in Hugging Face Space secrets
).to(device)
pipe.safety_checker = None  # Optional: disable for speed

# Load CLIP model for image-text similarity
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# Translation Function
def translate_tamil_to_english(text, reference=None):
    start = time.time()
    inputs = translator_tokenizer(text, return_tensors="pt").to(device)
    outputs = translator_model.generate(
        **inputs,
        forced_bos_token_id=translator_tokenizer.lang_code_to_id["en_XX"]
    )
    translated = translator_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    duration = round(time.time() - start, 2)

    rouge_l = None
    if reference:
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        score = scorer.score(reference.lower(), translated.lower())
        rouge_l = round(score["rougeL"].fmeasure, 4)

    return translated, duration, rouge_l

# Creative Text Generator with Perplexity
def generate_creative_text(prompt, max_length=100):
    start = time.time()
    input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = gen_model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50, temperature=0.9)
    text = gen_tokenizer.decode(output[0], skip_special_tokens=True)
    duration = round(time.time() - start, 2)

    tokens = text.split()
    repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)

    # Perplexity calculation
    with torch.no_grad():
        input_ids = gen_tokenizer.encode(text, return_tensors="pt").to(device)
        outputs = gen_model(input_ids, labels=input_ids)
        loss = outputs.loss
        perplexity = torch.exp(loss).item()

    return text, duration, len(tokens), round(repetition_rate, 4), round(perplexity, 4)

# AI Image Generator with CLIP Similarity
def generate_image(prompt):
    try:
        start = time.time()
        result = pipe(prompt)
        image = result.images[0].resize((256, 256))

        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
        image.save(tmp_file.name)

        # CLIP similarity
        image_input = clip_preprocess(image).unsqueeze(0).to(device)
        text_input = clip.tokenize([prompt]).to(device)

        with torch.no_grad():
            image_features = clip_model.encode_image(image_input)
            text_features = clip_model.encode_text(text_input)
            similarity = torch.cosine_similarity(image_features, text_features).item()

        return tmp_file.name, round(time.time() - start, 2), round(similarity, 4)

    except Exception as e:
        return None, f"Image generation failed: {str(e)}", None

# Streamlit UI
st.set_page_config(page_title="Tamil → English + AI Art", layout="centered")
st.title("🧠 Tamil → English + 🎨 Creative Text + AI Image")

tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
reference_input = st.text_input("📘 Optional: Reference English translation for ROUGE-L")

if st.button("🚀 Generate Output"):
    if not tamil_input.strip():
        st.warning("Please enter Tamil text.")
    else:
        with st.spinner("🔄 Translating Tamil to English..."):
            english_text, t_time, rouge_l = translate_tamil_to_english(tamil_input, reference_input)

        st.success(f"✅ Translated in {t_time} seconds")
        st.markdown(f"**📝 English Translation:** `{english_text}`")
        if rouge_l is not None:
            st.markdown(f"📊 **ROUGE-L Score:** `{rouge_l}`")
        else:
            st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")

        with st.spinner("🎨 Generating image..."):
            image_path, img_time, clip_score = generate_image(english_text)

        if image_path:
            st.success(f"🖼️ Image generated in {img_time} seconds")
            st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)
            st.markdown(f"🔍 **CLIP Text-Image Similarity:** `{clip_score}`")
        else:
            st.error(image_path)

        with st.spinner("💡 Generating creative text..."):
            creative, c_time, tokens, rep_rate, perplexity = generate_creative_text(english_text)

        st.success(f"✨ Creative text generated in {c_time} seconds")
        st.markdown("**🧠 Creative Output:**")
        st.text(creative)
        st.markdown(f"📌 Tokens: `{tokens}`")
        st.markdown(f"🔁 Repetition Rate: `{rep_rate}`")
        st.markdown(f"📉 Perplexity: `{perplexity}`")

st.markdown("---")
st.caption("Built by Sureshkumar R using MBart, GPT-2 & Stable Diffusion on Hugging Face")