#app.py import gradio as gr import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline from IndicTransToolkit.processor import IndicProcessor import requests from datetime import datetime import tempfile from gtts import gTTS import os import shutil DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Load models model_en_to_indic = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True).to(DEVICE) tokenizer_en_to_indic = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-1B", trust_remote_code=True) model_indic_to_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True).to(DEVICE) tokenizer_indic_to_en = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-1B", trust_remote_code=True) ip = IndicProcessor(inference=True) asr = pipeline("automatic-speech-recognition", model="openai/whisper-small") # --- Supabase settings --- SUPABASE_URL = "https://gptmdbhzblfybdnohqnh.supabase.co" SUPABASE_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImdwdG1kYmh6YmxmeWJkbm9ocW5oIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDc0NjY1NDgsImV4cCI6MjA2MzA0MjU0OH0.CfWArts6Kd_x7Wj0a_nAyGJfrFt8F7Wdy_MdYDj9e7U" # --- Supabase utilities --- def save_to_supabase(input_text, output_text, direction): if not input_text.strip() or not output_text.strip(): return "Nothing to save." table = "translations" if direction == "en_to_ks" else "ks_to_en_translations" payload = { "timestamp": datetime.utcnow().isoformat(), "input_text": input_text, "output_text": output_text } headers = { "apikey": SUPABASE_API_KEY, "Authorization": f"Bearer {SUPABASE_API_KEY}", "Content-Type": "application/json" } try: response = requests.post(f"{SUPABASE_URL}/rest/v1/{table}", json=payload, headers=headers) return "✅ Saved successfully!" if response.status_code == 201 else "❌ Failed to save." except Exception as e: logging.error("Save error: %s", e) return "❌ Save error." def get_translation_history(direction): table = "translations" if direction == "en_to_ks" else "ks_to_en_translations" headers = { "apikey": SUPABASE_API_KEY, "Authorization": f"Bearer {SUPABASE_API_KEY}" } try: res = requests.get(f"{SUPABASE_URL}/rest/v1/{table}?order=timestamp.desc&limit=10", headers=headers) if res.status_code == 200: data = res.json() return "\n\n".join([f"Input: {r['input_text']} → Output: {r['output_text']}" for r in data]) return "Failed to load history." except Exception as e: logging.error("History error: %s", e) return "Error loading history." # --- Translation with TTS integration --- def translate(text, direction, generate_tts=False): if not text.strip(): return "Enter some text.", None if direction == "en_to_ks": src_lang, tgt_lang = "eng_Latn", "kas_Arab" model, tokenizer = model_en_to_indic, tokenizer_en_to_indic else: src_lang, tgt_lang = "kas_Arab", "eng_Latn" model, tokenizer = model_indic_to_en, tokenizer_indic_to_en try: batch = ip.preprocess_batch([text], src_lang=src_lang, tgt_lang=tgt_lang) tokens = tokenizer(batch, return_tensors="pt", padding=True).to(DEVICE) with torch.no_grad(): output = model.generate(**tokens, max_length=256, num_beams=5) result = tokenizer.batch_decode(output, skip_special_tokens=True) final = ip.postprocess_batch(result, lang=tgt_lang)[0] # Generate TTS for KS→EN direction if requested audio_path = None if generate_tts and direction == "ks_to_en": audio_path = synthesize_tts(final) return final, audio_path except Exception as e: logging.error("Translation error: %s", e) return "⚠️ Translation failed.", None # --- TTS for English output --- def synthesize_tts(text): try: tts = gTTS(text=text, lang="en") with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: tts.save(f.name) return f.name except Exception as e: logging.error("TTS error: %s", e) return None # --- STT for English audio --- def transcribe_audio(audio_path): try: if not audio_path: return None, "No audio file provided" # Create a persistent copy of the audio file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: temp_path = f.name shutil.copy(audio_path, temp_path) transcription = asr(temp_path)["text"] os.unlink(temp_path) # Clean up temporary file return transcription, None except Exception as e: logging.error("STT error: %s", e) return None, f"⚠️ Transcription failed: {str(e)}" # --- Store audio file path --- def store_audio(audio_path): """Store audio path in state and return it to keep it visible""" return audio_path # --- Handle audio translation --- def handle_audio_translation(audio_path, direction): if direction != "en_to_ks": return "⚠️ Audio input is only supported for English to Kashmiri.", "", "", audio_path transcription, error = transcribe_audio(audio_path) if error: return error, "", "", audio_path translated, _ = translate(transcription, direction, generate_tts=False) return "", transcription, translated, audio_path # --- Switch UI direction --- def switch_direction(direction, input_text_val, output_text_val, audio_path): new_direction = "ks_to_en" if direction == "en_to_ks" else "en_to_ks" input_label = "Kashmiri Text" if new_direction == "ks_to_en" else "English Text" output_label = "English Translation" if new_direction == "ks_to_en" else "Kashmiri Translation" return new_direction, gr.update(value=output_text_val, label=input_label), gr.update(value=input_text_val, label=output_label), None # === Gradio Interface === with gr.Blocks() as interface: gr.HTML("""