Spaces:

GAASH-Lab
/

Translator-en-kas

Running

App Files Files Community

BurhaanZargar commited on 30 days ago

Commit

902cd01

1 Parent(s): 89d62bb

Done

Browse files

Files changed (3) hide show

app.py +194 -0
postBuild +1 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from IndicTransToolkit.processor import IndicProcessor
+import gradio as gr
+import requests
+from datetime import datetime
+# Supabase configuration
+SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co"
+SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImdwdG1kYmh6YmxmeWJkbm9ocW5oIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDc0NjY1NDgsImV4cCI6MjA2MzA0MjU0OH0.CfWArts6Kd_x7Wj0a_nAyGJfrFt8F7Wdy_MdYDj9e7U"                 # ← Replace with your anon/public API key
+SUPABASE_TABLE = "translations"
+# Device configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Load both models ahead of time
+model_en_to_indic = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True).to(DEVICE)
+tokenizer_en_to_indic = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True)
+model_indic_to_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True).to(DEVICE)
+tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True)
+ip = IndicProcessor(inference=True)
+# Separate save function (only called if user clicks Save button)
+def save_to_supabase(input_text, output_text, direction):
+    if not input_text.strip() or not output_text.strip():
+        return "Nothing to save."
+    # Choose table name based on direction
+    table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
+    payload = {
+        "timestamp": datetime.utcnow().isoformat(),
+        "input_text": input_text,
+        "output_text": output_text
+    }
+    headers = {
+        "apikey": SUPABASE_API_KEY,
+        "Authorization": f"Bearer {SUPABASE_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    try:
+        response = requests.post(
+            f"{SUPABASE_URL}/rest/v1/{table_name}",
+            headers=headers,
+            json=payload,
+            timeout=10
+        )
+        if response.status_code == 201:
+            return "✅ Saved successfully!"
+        else:
+            print("SAVE ERROR:", response.status_code, response.text)
+            return "❌ Failed to save."
+    except Exception as e:
+        print("SAVE EXCEPTION:", e)
+        return "❌ Save request error."
+# Function to retrieve recent translation history from Supabase
+def get_translation_history(direction="en_to_ks"):
+    table_name = "translations" if direction == "en_to_ks" else "ks_to_en_translations"
+    headers = {
+        "apikey": SUPABASE_API_KEY,
+        "Authorization": f"Bearer {SUPABASE_API_KEY}"
+    }
+    try:
+        response = requests.get(
+            f"{SUPABASE_URL}/rest/v1/{table_name}?order=timestamp.desc&limit=10",
+            headers=headers,
+            timeout=10
+        )
+        if response.status_code == 200:
+            records = response.json()
+            return "\n\n".join(
+                [f"Input: {r['input_text']} → Output: {r['output_text']}" for r in records]
+            )
+        else:
+            return "Failed to load history."
+    except Exception as e:
+        print("HISTORY FETCH ERROR:", e)
+        return "Error loading history."
+# Translation function
+def translate(text, direction):
+    if not text.strip():
+        return "Please enter some text.", gr.update(), gr.update()
+    if direction == "en_to_ks":
+        src_lang = "eng_Latn"
+        tgt_lang = "kas_Arab"
+        model = model_en_to_indic
+        tokenizer = tokenizer_en_to_indic
+    else:
+        src_lang = "kas_Arab"
+        tgt_lang = "eng_Latn"
+        model = model_indic_to_en
+        tokenizer = tokenizer_indic_to_en
+    try:
+        processed = ip.preprocess_batch([text], src_lang=src_lang, tgt_lang=tgt_lang)
+        batch = tokenizer(processed, return_tensors="pt", padding=True).to(DEVICE)
+        with torch.no_grad():
+            outputs = model.generate(
+                **batch,
+                max_length=256,
+                num_beams=5,
+                num_return_sequences=1
+            )
+        translated = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        result = ip.postprocess_batch(translated, lang=tgt_lang)[0]
+        return result, gr.update(), gr.update()
+    except Exception as e:
+        print("Translation Error:", e)
+        return "⚠️ Translation failed.", gr.update(), gr.update()
+# Toggle function to switch direction and update labels
+def switch_direction(direction, input_text_val, output_text_val):
+    new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks"
+    input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text"
+    output_label = "English Translation" if new_direction == "ks_to_en" else "Kashmiri Translation"
+    # Swap input/output text too
+    return (
+        new_direction,
+        gr.update(value=output_text_val, label=input_label),
+        gr.update(value=input_text_val, label=output_label)
+    )
+# Update your Gradio interface block
+with gr.Blocks() as interface:
+    gr.HTML("""
+<div style="display: flex; justify-content: space-between; align-items: center; padding: 10px;">
+    <img src="https://raw.githubusercontent.com/BurhaanRasheedZargar/Images/211321a234613a9c3dd944fe9367cf13d1386239/assets/left_logo.png" style="height:150px; width:auto;">
+    <h2 style="margin: 0; text-align: center;">English ↔ Kashmiri Translator</h2>
+    <img src="https://raw.githubusercontent.com/BurhaanRasheedZargar/Images/77797f7f7cbee328fa0f9d31cf3e290441e04cd3/assets/right_logo.png">
+</div>
+""")
+    translation_direction = gr.State(value="en_to_ks")
+    with gr.Row():
+        input_text = gr.Textbox(lines=2, label="English Text", placeholder="Enter text....")
+        output_text = gr.Textbox(lines=2, label="Kashmiri Translation", placeholder="Translated text....")
+    with gr.Row():
+        translate_button = gr.Button("Translate")
+        save_button = gr.Button("Save Translation")
+        switch_button = gr.Button("Switch")  # ← New button
+    save_status = gr.Textbox(label="Save Status", interactive=False)
+    history_box = gr.Textbox(lines=10, label="Translation History", interactive=False)
+    # Actions
+    translate_button.click(
+        fn=translate,
+        inputs=[input_text, translation_direction],
+        outputs=[output_text, input_text, output_text]
+    )
+    save_button.click(
+    fn=save_to_supabase,
+    inputs=[input_text, output_text, translation_direction],
+    outputs=save_status
+).then(
+    fn=get_translation_history,
+    inputs=translation_direction,
+    outputs=history_box
+)
+    switch_button.click(
+    fn=switch_direction,
+    inputs=[translation_direction, input_text, output_text],
+    outputs=[translation_direction, input_text, output_text]
+)
+if __name__ == "__main__":
+    interface.launch(share=True, inbrowser=True)

postBuild ADDED Viewed

	@@ -0,0 +1 @@


1	+ python -m nltk.downloader punkt

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch>=1.12
+transformers>=4.30.0
+sentencepiece
+nltk
+mosestokenizer
+gradio
+requests
+git+https://github.com/VarunGumma/IndicTransToolkit.git