import gradio as gr from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import pipeline import langdetect import logging import os from typing import Optional, Dict import re from functools import lru_cache, partial import asyncio from contextlib import asynccontextmanager # --- 1. Konfigurasi Awal --- os.makedirs("./cache", exist_ok=True) os.makedirs("./logs", exist_ok=True) # Set environment variables untuk Hugging Face cache os.environ["HF_HOME"] = "./cache" os.environ["TRANSFORMERS_CACHE"] = "./cache" # Environment configuration DEVICE = -1 # Selalu CPU untuk kompatibilitas MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000")) # Configure logging logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO ) logger = logging.getLogger(__name__) # Map model yang didukung MODEL_MAP = { "th": "Helsinki-NLP/opus-mt-th-en", "ja": "Helsinki-NLP/opus-mt-ja-en", "zh": "Helsinki-NLP/opus-mt-zh-en", "vi": "Helsinki-NLP/opus-mt-vi-en", } # Istilah yang dilindungi dari translasi PROTECTED_TERMS = ["2030 Aspirations", "Griffith"] # Cache untuk translator (pipeline) translators: Dict[str, pipeline] = {} # --- Pydantic Models --- class TranslationRequest(BaseModel): text: str source_lang_override: Optional[str] = None class TranslationResponse(BaseModel): translated_text: str source_language: Optional[str] = None # --- Lifespan Event Handler --- @asynccontextmanager async def lifespan(app: FastAPI): """Handler lifecycle aplikasi menggunakan lifespan""" logger.info("Memulai prapemuatan model translasi...") for lang, model_name in MODEL_MAP.items(): try: logger.info(f"Memuat model untuk bahasa: {lang} ({model_name})") translators[lang] = pipeline("translation", model=model_name, device=DEVICE) logger.info(f"Model untuk {lang} berhasil dimuat.") except Exception as e: logger.error(f"Gagal memuat model untuk {lang}: {str(e)}") logger.info("Semua model telah dimuat.") yield # Aplikasi berjalan di sini # --- Inisialisasi Aplikasi FastAPI dengan Lifespan --- app = FastAPI(title="Translation Service API", lifespan=lifespan) # --- Fungsi Utility --- def get_translator(lang: str) -> pipeline: """Mengambil translator yang sudah dimuat dari cache.""" translator = translators.get(lang) if not translator: logger.error(f"Translator untuk bahasa '{lang}' tidak ditemukan. Mungkin gagal dimuat saat startup.") raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.") return translator @lru_cache(maxsize=128) def detect_language(text: str) -> str: """Deteksi bahasa dengan cache.""" try: preview_text = text[:500] detected_lang = langdetect.detect(preview_text) if detected_lang.startswith('zh'): return 'zh' return detected_lang if detected_lang in MODEL_MAP else "en" except Exception as e: logger.warning(f"Deteksi bahasa gagal: {str(e)}. Mengasumsikan 'en'.") return "en" def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]: replacements = {} for i, term in enumerate(protected_terms): placeholder = f"__PROTECTED_{i}__" modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE) if modified_text != text: replacements[placeholder] = term text = modified_text return text, replacements def restore_terms(text: str, replacements: dict) -> str: for placeholder, term in replacements.items(): text = text.replace(placeholder, term) return text # --- Fungsi Inti dan Endpoint API --- async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse: if not text or not text.strip(): raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.") if len(text) > MAX_TEXT_LENGTH: raise HTTPException( status_code=413, detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}." ) try: if source_lang_override and source_lang_override in MODEL_MAP: source_lang = source_lang_override else: source_lang = detect_language(text) if source_lang == "en": return TranslationResponse(translated_text=text, source_language=source_lang) translator = get_translator(source_lang) modified_text, replacements = protect_terms(text, PROTECTED_TERMS) def _translate_task(): return translator(modified_text, max_length=512, num_beams=4) result = await asyncio.to_thread(_translate_task) translated_text = result[0]["translation_text"] final_text = restore_terms(translated_text, replacements) return TranslationResponse(translated_text=final_text, source_language=source_lang) except HTTPException as e: raise e except Exception as e: logger.error(f"Terjadi kesalahan saat translasi: {str(e)}") raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}") @app.post("/translate", response_model=TranslationResponse) async def translate_api(request: TranslationRequest): return await perform_translation(request.text, request.source_lang_override) @app.get("/health") async def health_check(): return {"status": "healthy", "loaded_models": list(translators.keys())} # --- Handler Gradio Async --- async def translate_gradio(text: str, source_lang: str = "auto"): if not text or not text.strip(): return "Masukkan teks untuk diterjemahkan.", "N/A" try: source_lang_param = source_lang if source_lang != "auto" else None result = await perform_translation(text, source_lang_param) return result.translated_text, result.source_language or "Unknown" except HTTPException as e: return f"Error: {e.detail}", "Error" except Exception as e: return f"Error: {str(e)}", "Error" # --- UI Gradio --- def create_gradio_interface(): with gr.Blocks( title="Multi-Language Translation Service", theme=gr.themes.Soft(), css=".gradio-container { max-width: 1200px !important; }" ) as interface: gr.Markdown(""" # 🌐 Multi-Language Translation Service Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**. ✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat. """) with gr.Row(): with gr.Column(scale=1): text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10) with gr.Row(): lang_dropdown = gr.Dropdown( choices=[ ("🔍 Auto-detect", "auto"), ("🇹🇭 Thai", "th"), ("🇯🇵 Japanese", "ja"), ("🇨🇳 Chinese", "zh"), ("🇻🇳 Vietnamese", "vi") ], value="auto", label="Source Language" ) translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg") with gr.Column(scale=1): output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False) detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1) gr.Examples( examples=[ ["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"], ["こんにちは、はじめまして。Griffith大学での研究が進んでいます。", "ja"], ["你好,很高兴认识你。我们正在为2030 Aspirations制定计划。", "zh"], ["Xin chào, rất vui được gặp bạn. Griffith là trường đại học tuyệt vời.", "vi"], ], inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang], fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh cache_examples=False ) translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang]) return interface # Mount Gradio ke FastAPI gradio_app = create_gradio_interface() app = gr.mount_gradio_app(app, gradio_app, path="/")