Spaces:
Runtime error
Runtime error
File size: 8,881 Bytes
a50bc7d acf8bfe a50bc7d 1fd0997 129257a acf8bfe 129257a db6cecb 97596e3 6ddc4c2 97596e3 6ddc4c2 a50bc7d 6ddc4c2 a50bc7d fc58506 6ddc4c2 a50bc7d 5dc46ff a973a24 6ddc4c2 a973a24 a50bc7d 97596e3 a973a24 a50bc7d 97596e3 acf8bfe db6cecb 1fd0997 a371d81 db6cecb e8b7c49 a371d81 1fd0997 db6cecb 4996216 54a9930 db6cecb 97596e3 6ddc4c2 a50bc7d 6ddc4c2 db6cecb a973a24 db6cecb a973a24 db6cecb 6ddc4c2 db6cecb 6ddc4c2 db6cecb 6ddc4c2 129257a db6cecb 129257a db6cecb e8b7c49 a50bc7d 129257a db6cecb 129257a 54a9930 1a58b56 db6cecb 6ddc4c2 db6cecb 54a9930 1a58b56 db6cecb a50bc7d 6ddc4c2 db6cecb a50bc7d db6cecb a973a24 db6cecb a973a24 acf8bfe 56ffb39 a50bc7d e8b7c49 129257a db6cecb e8b7c49 97596e3 54a9930 db6cecb e8b7c49 54a9930 e8b7c49 db6cecb 6ddc4c2 a50bc7d db6cecb 6ddc4c2 db6cecb a50bc7d db6cecb a50bc7d e8b7c49 a50bc7d acf8bfe a50bc7d 6ddc4c2 a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb a50bc7d db6cecb 6ddc4c2 db6cecb a50bc7d db6cecb a50bc7d 6ddc4c2 db6cecb 6ddc4c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import gradio as gr
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
import langdetect
import logging
import os
from typing import Optional, Dict
import re
from functools import lru_cache, partial
import asyncio
from contextlib import asynccontextmanager
# --- 1. Konfigurasi Awal ---
os.makedirs("./cache", exist_ok=True)
os.makedirs("./logs", exist_ok=True)
# Set environment variables untuk Hugging Face cache
os.environ["HF_HOME"] = "./cache"
os.environ["TRANSFORMERS_CACHE"] = "./cache"
# Environment configuration
DEVICE = -1 # Selalu CPU untuk kompatibilitas
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))
# Configure logging
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
level=logging.INFO
)
logger = logging.getLogger(__name__)
# Map model yang didukung
MODEL_MAP = {
"th": "Helsinki-NLP/opus-mt-th-en",
"ja": "Helsinki-NLP/opus-mt-ja-en",
"zh": "Helsinki-NLP/opus-mt-zh-en",
"vi": "Helsinki-NLP/opus-mt-vi-en",
}
# Istilah yang dilindungi dari translasi
PROTECTED_TERMS = ["2030 Aspirations", "Griffith"]
# Cache untuk translator (pipeline)
translators: Dict[str, pipeline] = {}
# --- Pydantic Models ---
class TranslationRequest(BaseModel):
text: str
source_lang_override: Optional[str] = None
class TranslationResponse(BaseModel):
translated_text: str
source_language: Optional[str] = None
# --- Lifespan Event Handler ---
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Handler lifecycle aplikasi menggunakan lifespan"""
logger.info("Memulai prapemuatan model translasi...")
for lang, model_name in MODEL_MAP.items():
try:
logger.info(f"Memuat model untuk bahasa: {lang} ({model_name})")
translators[lang] = pipeline("translation", model=model_name, device=DEVICE)
logger.info(f"Model untuk {lang} berhasil dimuat.")
except Exception as e:
logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
logger.info("Semua model telah dimuat.")
yield # Aplikasi berjalan di sini
# --- Inisialisasi Aplikasi FastAPI dengan Lifespan ---
app = FastAPI(title="Translation Service API", lifespan=lifespan)
# --- Fungsi Utility ---
def get_translator(lang: str) -> pipeline:
"""Mengambil translator yang sudah dimuat dari cache."""
translator = translators.get(lang)
if not translator:
logger.error(f"Translator untuk bahasa '{lang}' tidak ditemukan. Mungkin gagal dimuat saat startup.")
raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
return translator
@lru_cache(maxsize=128)
def detect_language(text: str) -> str:
"""Deteksi bahasa dengan cache."""
try:
preview_text = text[:500]
detected_lang = langdetect.detect(preview_text)
if detected_lang.startswith('zh'):
return 'zh'
return detected_lang if detected_lang in MODEL_MAP else "en"
except Exception as e:
logger.warning(f"Deteksi bahasa gagal: {str(e)}. Mengasumsikan 'en'.")
return "en"
def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
replacements = {}
for i, term in enumerate(protected_terms):
placeholder = f"__PROTECTED_{i}__"
modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
if modified_text != text:
replacements[placeholder] = term
text = modified_text
return text, replacements
def restore_terms(text: str, replacements: dict) -> str:
for placeholder, term in replacements.items():
text = text.replace(placeholder, term)
return text
# --- Fungsi Inti dan Endpoint API ---
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
if not text or not text.strip():
raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
if len(text) > MAX_TEXT_LENGTH:
raise HTTPException(
status_code=413,
detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
)
try:
if source_lang_override and source_lang_override in MODEL_MAP:
source_lang = source_lang_override
else:
source_lang = detect_language(text)
if source_lang == "en":
return TranslationResponse(translated_text=text, source_language=source_lang)
translator = get_translator(source_lang)
modified_text, replacements = protect_terms(text, PROTECTED_TERMS)
def _translate_task():
return translator(modified_text, max_length=512, num_beams=4)
result = await asyncio.to_thread(_translate_task)
translated_text = result[0]["translation_text"]
final_text = restore_terms(translated_text, replacements)
return TranslationResponse(translated_text=final_text, source_language=source_lang)
except HTTPException as e:
raise e
except Exception as e:
logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")
@app.post("/translate", response_model=TranslationResponse)
async def translate_api(request: TranslationRequest):
return await perform_translation(request.text, request.source_lang_override)
@app.get("/health")
async def health_check():
return {"status": "healthy", "loaded_models": list(translators.keys())}
# --- Handler Gradio Async ---
async def translate_gradio(text: str, source_lang: str = "auto"):
if not text or not text.strip():
return "Masukkan teks untuk diterjemahkan.", "N/A"
try:
source_lang_param = source_lang if source_lang != "auto" else None
result = await perform_translation(text, source_lang_param)
return result.translated_text, result.source_language or "Unknown"
except HTTPException as e:
return f"Error: {e.detail}", "Error"
except Exception as e:
return f"Error: {str(e)}", "Error"
# --- UI Gradio ---
def create_gradio_interface():
with gr.Blocks(
title="Multi-Language Translation Service",
theme=gr.themes.Soft(),
css=".gradio-container { max-width: 1200px !important; }"
) as interface:
gr.Markdown("""
# 🌐 Multi-Language Translation Service
Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
""")
with gr.Row():
with gr.Column(scale=1):
text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
with gr.Row():
lang_dropdown = gr.Dropdown(
choices=[
("🔍 Auto-detect", "auto"), ("🇹🇭 Thai", "th"), ("🇯🇵 Japanese", "ja"),
("🇨🇳 Chinese", "zh"), ("🇻🇳 Vietnamese", "vi")
],
value="auto", label="Source Language"
)
translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
with gr.Column(scale=1):
output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
gr.Examples(
examples=[
["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
["こんにちは、はじめまして。Griffith大学での研究が進んでいます。", "ja"],
["你好,很高兴认识你。我们正在为2030 Aspirations制定计划。", "zh"],
["Xin chào, rất vui được gặp bạn. Griffith là trường đại học tuyệt vời.", "vi"],
],
inputs=[text_input, lang_dropdown],
outputs=[output_text, detected_lang],
fn=partial(asyncio.run, translate_gradio), # Agar bisa dipakai di contoh
cache_examples=False
)
translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
return interface
# Mount Gradio ke FastAPI
gradio_app = create_gradio_interface()
app = gr.mount_gradio_app(app, gradio_app, path="/") |