File size: 5,805 Bytes
c4a1141
cbc840b
b88f708
9c3ea11
 
 
 
ab52a13
 
 
 
 
 
 
 
 
 
831a7b4
 
b88f708
ab52a13
396e877
 
ab52a13
 
 
 
 
 
 
b88f708
 
ab52a13
cbc840b
831a7b4
396e877
b88f708
ab52a13
 
 
 
 
 
9c3ea11
831a7b4
ab52a13
831a7b4
 
b88f708
ab52a13
 
cbc840b
 
396e877
ab52a13
 
 
 
cbc840b
 
 
 
 
 
 
 
b88f708
cbc840b
 
b88f708
cbc840b
396e877
831a7b4
cbc840b
 
 
 
ab52a13
cbc840b
f67d206
396e877
f67d206
 
f837ee9
f67d206
831a7b4
b88f708
 
cbc840b
 
831a7b4
 
cbc840b
831a7b4
f837ee9
831a7b4
cbc840b
831a7b4
 
 
 
 
 
 
 
 
b88f708
ab52a13
 
cbc840b
831a7b4
b88f708
ab52a13
c3b581c
b88f708
cbc840b
b88f708
 
 
9c3ea11
cbc840b
 
ab52a13
cbc840b
 
ab52a13
 
 
cbc840b
ab52a13
831a7b4
cbc840b
831a7b4
ab52a13
cbc840b
831a7b4
 
 
9c3ea11
cbc840b
831a7b4
b88f708
cbc840b
f837ee9
b88f708
ab52a13
c3b581c
f837ee9
b88f708
 
ab52a13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

import streamlit as st
import torch
import os
import time
import tempfile
from PIL import Image
import torch.nn.functional as F

from transformers import (
    MBartForConditionalGeneration,
    MBart50TokenizerFast,
    AutoTokenizer,
    AutoModelForCausalLM,
    CLIPProcessor,
    CLIPModel,
)
from diffusers import StableDiffusionPipeline
from rouge_score import rouge_scorer

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load MBart tokenizer and model
translator_model = MBartForConditionalGeneration.from_pretrained(
    "facebook/mbart-large-50-many-to-many-mmt"
).to(device)
translator_tokenizer = MBart50TokenizerFast.from_pretrained(
    "facebook/mbart-large-50-many-to-many-mmt"
)
translator_tokenizer.src_lang = "ta_IN"

# Load GPT-2
gen_tokenizer = AutoTokenizer.from_pretrained("gpt2")
gen_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device)
gen_model.eval()

# Load Stable Diffusion
pipe = StableDiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-1",
    token=os.getenv("HF_TOKEN"),
    torch_dtype=torch.float32,
).to(device)
pipe.safety_checker = None

# Load CLIP
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# ---------------- Functions ---------------- #

def translate_tamil_to_english(text, reference=None):
    start = time.time()
    inputs = translator_tokenizer(text, return_tensors="pt").to(device)
    outputs = translator_model.generate(
        **inputs,
        forced_bos_token_id=translator_tokenizer.lang_code_to_id["en_XX"]
    )
    translated = translator_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    duration = round(time.time() - start, 2)

    rouge_l = None
    if reference:
        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        score = scorer.score(reference.lower(), translated.lower())
        rouge_l = round(score["rougeL"].fmeasure, 4)

    return translated, duration, rouge_l

def generate_creative_text(prompt, max_length=100):
    start = time.time()
    input_ids = gen_tokenizer.encode(prompt, return_tensors="pt").to(device)
    output = gen_model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50, temperature=0.9)
    text = gen_tokenizer.decode(output[0], skip_special_tokens=True)
    duration = round(time.time() - start, 2)

    tokens = text.split()
    repetition_rate = sum(t1 == t2 for t1, t2 in zip(tokens, tokens[1:])) / len(tokens)

    with torch.no_grad():
        input_ids = gen_tokenizer.encode(text, return_tensors="pt").to(device)
        outputs = gen_model(input_ids, labels=input_ids)
        loss = outputs.loss
        perplexity = torch.exp(loss).item()

    return text, duration, len(tokens), round(repetition_rate, 4), round(perplexity, 4)

def generate_image(prompt):
    try:
        start = time.time()
        result = pipe(prompt)
        image = result.images[0].resize((256, 256))
        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
        image.save(tmp_file.name)
        duration = round(time.time() - start, 2)
        return tmp_file.name, duration, image
    except Exception as e:
        return None, 0, f"Image generation failed: {str(e)}"

def evaluate_clip_similarity(text, image):
    inputs = clip_processor(text=[text], images=image, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        outputs = clip_model(**inputs)
        logits_per_image = outputs.logits_per_image
        similarity_score = logits_per_image[0][0].item()
    return round(similarity_score, 4)

# ---------------- Streamlit UI ---------------- #

st.set_page_config(page_title="Tamil β†’ English + AI Art", layout="centered")
st.title("🧠 Tamil β†’ English + 🎨 Creative Text + AI Image")

tamil_input = st.text_area("✍️ Enter Tamil text here", height=150)
reference_input = st.text_input("πŸ“˜ Optional: Reference English translation for ROUGE")

if st.button("πŸš€ Generate Output"):
    if not tamil_input.strip():
        st.warning("Please enter Tamil text.")
    else:
        with st.spinner("πŸ”„ Translating Tamil to English..."):
            english_text, t_time, rouge_l = translate_tamil_to_english(tamil_input, reference_input)

        st.success(f"βœ… Translated in {t_time} seconds")
        st.markdown(f"**πŸ“ English Translation:** `{english_text}`")
        if rouge_l is not None:
            st.markdown(f"πŸ“Š **ROUGE-L Score:** `{rouge_l}`")
        else:
            st.info("ℹ️ ROUGE-L not calculated. Reference not provided.")

        with st.spinner("🎨 Generating image..."):
            image_path, img_time, image_obj = generate_image(english_text)

        if isinstance(image_obj, Image.Image):
            st.success(f"πŸ–ΌοΈ Image generated in {img_time} seconds")
            st.image(Image.open(image_path), caption="AI-Generated Image", use_column_width=True)

            with st.spinner("πŸ”Ž Evaluating CLIP similarity..."):
                clip_score = evaluate_clip_similarity(english_text, image_obj)
                st.markdown(f"πŸ” **CLIP Text-Image Similarity:** `{clip_score}`")
        else:
            st.error(image_obj)

        with st.spinner("πŸ’‘ Generating creative text..."):
            creative, c_time, tokens, rep_rate, ppl = generate_creative_text(english_text)

        st.success(f"✨ Creative text generated in {c_time} seconds")
        st.markdown(f"**🧠 Creative Output:** `{creative}`")
        st.markdown(f"πŸ“Œ Tokens: `{tokens}`, πŸ” Repetition Rate: `{rep_rate}`, πŸ“‰ Perplexity: `{ppl}`")

st.markdown("---")
st.caption("Built by Sureshkumar R using MBart, GPT-2, Stable Diffusion 2.1, and CLIP on Hugging Face πŸ€—")