feliksius commited on
Commit
129257a
·
verified ·
1 Parent(s): 2d48165

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -7
app.py CHANGED
@@ -1,7 +1,8 @@
1
- import os
2
  from fastapi import FastAPI, HTTPException
3
- from transformers import pipeline
 
4
  import logging
 
5
 
6
  # Atur direktori cache untuk Hugging Face
7
  os.environ["HF_HOME"] = "/app/cache"
@@ -14,19 +15,65 @@ logger = logging.getLogger(__name__)
14
 
15
  try:
16
  logger.info("Loading translation model...")
17
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-id")
 
 
 
18
  logger.info("Model loaded successfully")
19
  except Exception as e:
20
  logger.error(f"Failed to load model: {str(e)}")
21
  raise Exception(f"Model initialization failed: {str(e)}")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  @app.post("/translate")
24
  async def translate(text: str):
25
  if not text:
26
  raise HTTPException(status_code=400, detail="Text input is required")
27
  try:
28
- result = translator(text)
29
- return {"translated_text": result[0]["translation_text"]}
 
 
 
 
 
 
 
30
  except Exception as e:
31
- logger.error(f"Translation failed: {str(e)}")
32
- raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
 
 
1
  from fastapi import FastAPI, HTTPException
2
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
3
+ import langdetect
4
  import logging
5
+ import os
6
 
7
  # Atur direktori cache untuk Hugging Face
8
  os.environ["HF_HOME"] = "/app/cache"
 
15
 
16
  try:
17
  logger.info("Loading translation model...")
18
+ # Load model dan tokenizer M2M100
19
+ model_name = "facebook/m2m100_418M"
20
+ model = M2M100ForConditionalGeneration.from_pretrained(model_name)
21
+ tokenizer = M2M100Tokenizer.from_pretrained(model_name)
22
  logger.info("Model loaded successfully")
23
  except Exception as e:
24
  logger.error(f"Failed to load model: {str(e)}")
25
  raise Exception(f"Model initialization failed: {str(e)}")
26
 
27
+ def detect_language(text: str) -> str:
28
+ """Deteksi bahasa dari teks menggunakan langdetect."""
29
+ try:
30
+ lang = langdetect.detect(text)
31
+ # Konversi kode bahasa ke format M2M100
32
+ lang_map = {
33
+ "id": "id", # Indonesia
34
+ "fr": "fr", # Prancis
35
+ "es": "es", # Spanyol
36
+ "de": "de", # Jerman
37
+ "ja": "ja", # Jepang
38
+ "zh": "zh", # Mandarin
39
+ "ru": "ru", # Rusia
40
+ "th": "th", # Thailand
41
+ }
42
+ return lang_map.get(lang, "en") # Default ke Inggris jika tidak dikenali
43
+ except Exception as e:
44
+ logger.warning(f"Language detection failed: {str(e)}, defaulting to English")
45
+ return "en"
46
+
47
+ def translate_to_english(text: str, source_lang: str) -> str:
48
+ """Terjemahkan teks ke Inggris menggunakan M2M100."""
49
+ try:
50
+ # Set bahasa sumber
51
+ tokenizer.src_lang = source_lang
52
+ # Encode teks
53
+ encoded = tokenizer(text, return_tensors="pt")
54
+ # Generate terjemahan (ke Inggris)
55
+ generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("en"))
56
+ # Decode hasil
57
+ translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
58
+ return translated_text
59
+ except Exception as e:
60
+ logger.error(f"Translation failed: {str(e)}")
61
+ raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
62
+
63
  @app.post("/translate")
64
  async def translate(text: str):
65
  if not text:
66
  raise HTTPException(status_code=400, detail="Text input is required")
67
  try:
68
+ # Deteksi bahasa
69
+ source_lang = detect_language(text)
70
+ logger.info(f"Detected source language: {source_lang}")
71
+ # Jika sudah Inggris, kembalikan teks asli
72
+ if source_lang == "en":
73
+ return {"translated_text": text}
74
+ # Terjemahkan ke Inggris
75
+ translated_text = translate_to_english(text, source_lang)
76
+ return {"translated_text": translated_text}
77
  except Exception as e:
78
+ logger.error(f"Processing failed: {str(e)}")
79
+ raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")