File size: 8,881 Bytes
a50bc7d
acf8bfe
a50bc7d
1fd0997
129257a
acf8bfe
129257a
db6cecb
97596e3
6ddc4c2
97596e3
6ddc4c2
a50bc7d
6ddc4c2
a50bc7d
 
fc58506
6ddc4c2
a50bc7d
 
5dc46ff
a973a24
6ddc4c2
a973a24
 
a50bc7d
97596e3
a973a24
a50bc7d
97596e3
acf8bfe
 
db6cecb
1fd0997
a371d81
db6cecb
e8b7c49
a371d81
1fd0997
 
db6cecb
4996216
54a9930
db6cecb
 
97596e3
6ddc4c2
a50bc7d
 
 
 
 
 
 
 
6ddc4c2
 
 
 
db6cecb
 
a973a24
db6cecb
 
 
a973a24
db6cecb
 
6ddc4c2
 
 
 
db6cecb
6ddc4c2
db6cecb
 
 
 
 
 
 
 
6ddc4c2
129257a
db6cecb
129257a
db6cecb
 
e8b7c49
 
a50bc7d
129257a
db6cecb
129257a
 
54a9930
 
 
1a58b56
db6cecb
 
6ddc4c2
 
db6cecb
54a9930
 
1a58b56
db6cecb
 
a50bc7d
6ddc4c2
db6cecb
a50bc7d
db6cecb
a973a24
 
 
db6cecb
a973a24
acf8bfe
56ffb39
 
 
a50bc7d
e8b7c49
129257a
db6cecb
e8b7c49
97596e3
54a9930
 
db6cecb
 
 
 
e8b7c49
54a9930
e8b7c49
db6cecb
 
6ddc4c2
a50bc7d
db6cecb
 
 
 
 
 
 
 
 
 
 
6ddc4c2
db6cecb
 
 
a50bc7d
 
db6cecb
a50bc7d
e8b7c49
a50bc7d
acf8bfe
a50bc7d
 
6ddc4c2
a50bc7d
 
 
 
db6cecb
a50bc7d
 
 
db6cecb
 
a50bc7d
 
 
db6cecb
a50bc7d
 
 
db6cecb
 
a50bc7d
db6cecb
a50bc7d
db6cecb
a50bc7d
db6cecb
 
 
 
 
 
 
 
 
a50bc7d
db6cecb
6ddc4c2
db6cecb
a50bc7d
db6cecb
 
a50bc7d
 
6ddc4c2
db6cecb
6ddc4c2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import gradio as gr
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
import langdetect
import logging
import os
from typing import Optional, Dict
import re
from functools import lru_cache, partial
import asyncio
from contextlib import asynccontextmanager

# --- 1. Konfigurasi Awal ---
os.makedirs("./cache", exist_ok=True)
os.makedirs("./logs", exist_ok=True)

# Set environment variables untuk Hugging Face cache
os.environ["HF_HOME"] = "./cache"
os.environ["TRANSFORMERS_CACHE"] = "./cache"

# Environment configuration
DEVICE = -1  # Selalu CPU untuk kompatibilitas
MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "5000"))

# Configure logging
logging.basicConfig(
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    level=logging.INFO
)
logger = logging.getLogger(__name__)

# Map model yang didukung
MODEL_MAP = {
    "th": "Helsinki-NLP/opus-mt-th-en",
    "ja": "Helsinki-NLP/opus-mt-ja-en",
    "zh": "Helsinki-NLP/opus-mt-zh-en",
    "vi": "Helsinki-NLP/opus-mt-vi-en",
}

# Istilah yang dilindungi dari translasi
PROTECTED_TERMS = ["2030 Aspirations", "Griffith"]

# Cache untuk translator (pipeline)
translators: Dict[str, pipeline] = {}

# --- Pydantic Models ---
class TranslationRequest(BaseModel):
    text: str
    source_lang_override: Optional[str] = None

class TranslationResponse(BaseModel):
    translated_text: str
    source_language: Optional[str] = None

# --- Lifespan Event Handler ---
@asynccontextmanager
async def lifespan(app: FastAPI):
    """Handler lifecycle aplikasi menggunakan lifespan"""
    logger.info("Memulai prapemuatan model translasi...")
    for lang, model_name in MODEL_MAP.items():
        try:
            logger.info(f"Memuat model untuk bahasa: {lang} ({model_name})")
            translators[lang] = pipeline("translation", model=model_name, device=DEVICE)
            logger.info(f"Model untuk {lang} berhasil dimuat.")
        except Exception as e:
            logger.error(f"Gagal memuat model untuk {lang}: {str(e)}")
    logger.info("Semua model telah dimuat.")
    yield  # Aplikasi berjalan di sini

# --- Inisialisasi Aplikasi FastAPI dengan Lifespan ---
app = FastAPI(title="Translation Service API", lifespan=lifespan)

# --- Fungsi Utility ---
def get_translator(lang: str) -> pipeline:
    """Mengambil translator yang sudah dimuat dari cache."""
    translator = translators.get(lang)
    if not translator:
        logger.error(f"Translator untuk bahasa '{lang}' tidak ditemukan. Mungkin gagal dimuat saat startup.")
        raise HTTPException(status_code=500, detail=f"Model terjemahan untuk '{lang}' tidak tersedia.")
    return translator

@lru_cache(maxsize=128)
def detect_language(text: str) -> str:
    """Deteksi bahasa dengan cache."""
    try:
        preview_text = text[:500]
        detected_lang = langdetect.detect(preview_text)
        if detected_lang.startswith('zh'):
            return 'zh'
        return detected_lang if detected_lang in MODEL_MAP else "en"
    except Exception as e:
        logger.warning(f"Deteksi bahasa gagal: {str(e)}. Mengasumsikan 'en'.")
        return "en"

def protect_terms(text: str, protected_terms: list) -> tuple[str, dict]:
    replacements = {}
    for i, term in enumerate(protected_terms):
        placeholder = f"__PROTECTED_{i}__"
        modified_text = re.sub(r'\b' + re.escape(term) + r'\b', placeholder, text, flags=re.IGNORECASE)
        if modified_text != text:
            replacements[placeholder] = term
            text = modified_text
    return text, replacements

def restore_terms(text: str, replacements: dict) -> str:
    for placeholder, term in replacements.items():
        text = text.replace(placeholder, term)
    return text

# --- Fungsi Inti dan Endpoint API ---
async def perform_translation(text: str, source_lang_override: Optional[str] = None) -> TranslationResponse:
    if not text or not text.strip():
        raise HTTPException(status_code=400, detail="Teks input tidak boleh kosong.")
    if len(text) > MAX_TEXT_LENGTH:
        raise HTTPException(
            status_code=413,
            detail=f"Teks terlalu panjang. Panjang maksimal yang diizinkan: {MAX_TEXT_LENGTH}."
        )
    try:
        if source_lang_override and source_lang_override in MODEL_MAP:
            source_lang = source_lang_override
        else:
            source_lang = detect_language(text)

        if source_lang == "en":
            return TranslationResponse(translated_text=text, source_language=source_lang)

        translator = get_translator(source_lang)
        modified_text, replacements = protect_terms(text, PROTECTED_TERMS)

        def _translate_task():
            return translator(modified_text, max_length=512, num_beams=4)

        result = await asyncio.to_thread(_translate_task)
        translated_text = result[0]["translation_text"]
        final_text = restore_terms(translated_text, replacements)

        return TranslationResponse(translated_text=final_text, source_language=source_lang)
    except HTTPException as e:
        raise e
    except Exception as e:
        logger.error(f"Terjadi kesalahan saat translasi: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Proses translasi gagal: {str(e)}")

@app.post("/translate", response_model=TranslationResponse)
async def translate_api(request: TranslationRequest):
    return await perform_translation(request.text, request.source_lang_override)

@app.get("/health")
async def health_check():
    return {"status": "healthy", "loaded_models": list(translators.keys())}

# --- Handler Gradio Async ---
async def translate_gradio(text: str, source_lang: str = "auto"):
    if not text or not text.strip():
        return "Masukkan teks untuk diterjemahkan.", "N/A"
    try:
        source_lang_param = source_lang if source_lang != "auto" else None
        result = await perform_translation(text, source_lang_param)
        return result.translated_text, result.source_language or "Unknown"
    except HTTPException as e:
        return f"Error: {e.detail}", "Error"
    except Exception as e:
        return f"Error: {str(e)}", "Error"

# --- UI Gradio ---
def create_gradio_interface():
    with gr.Blocks(
        title="Multi-Language Translation Service",
        theme=gr.themes.Soft(),
        css=".gradio-container { max-width: 1200px !important; }"
    ) as interface:
        gr.Markdown("""
        # 🌐 Multi-Language Translation Service
        Terjemahkan teks dari **Thai**, **Jepang**, **Mandarin**, atau **Vietnam** ke **Inggris**.
        ✨ Fitur: Deteksi bahasa otomatis • Perlindungan istilah • Model Helsinki-NLP yang cepat.
        """)
        with gr.Row():
            with gr.Column(scale=1):
                text_input = gr.Textbox(label="📝 Input Text", placeholder="Enter text to translate...", lines=6, max_lines=10)
                with gr.Row():
                    lang_dropdown = gr.Dropdown(
                        choices=[
                            ("🔍 Auto-detect", "auto"), ("🇹🇭 Thai", "th"), ("🇯🇵 Japanese", "ja"),
                            ("🇨🇳 Chinese", "zh"), ("🇻🇳 Vietnamese", "vi")
                        ],
                        value="auto", label="Source Language"
                    )
                    translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
            with gr.Column(scale=1):
                output_text = gr.Textbox(label="🎯 Translation Result", lines=6, max_lines=10, interactive=False)
                detected_lang = gr.Textbox(label="🔍 Detected Language", interactive=False, max_lines=1)
        gr.Examples(
            examples=[
                ["สวัสดีครับ ยินดีที่ได้รู้จัก การพัฒนา 2030 Aspirations เป็นเป้าหมายสำคัญ", "th"],
                ["こんにちは、はじめまして。Griffith大学での研究が進んでいます。", "ja"],
                ["你好,很高兴认识你。我们正在为2030 Aspirations制定计划。", "zh"],
                ["Xin chào, rất vui được gặp bạn. Griffith là trường đại học tuyệt vời.", "vi"],
            ],
            inputs=[text_input, lang_dropdown],
            outputs=[output_text, detected_lang],
            fn=partial(asyncio.run, translate_gradio),  # Agar bisa dipakai di contoh
            cache_examples=False
        )
        translate_btn.click(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
        text_input.submit(fn=translate_gradio, inputs=[text_input, lang_dropdown], outputs=[output_text, detected_lang])
    return interface

# Mount Gradio ke FastAPI
gradio_app = create_gradio_interface()
app = gr.mount_gradio_app(app, gradio_app, path="/")