Commit
·
b8d971b
1
Parent(s):
5ddcc98
add select langs
Browse files
app.py
CHANGED
@@ -12,18 +12,46 @@ def init_model():
|
|
12 |
|
13 |
model, tokenizer = init_model()
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
st.title('💿facebook-m2m100_1.2B')
|
16 |
st.subheader('原文')
|
17 |
with st.form('my_form'):
|
18 |
-
text = st.text_area('Enter text:', '
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
st.subheader('翻译结果')
|
22 |
placeholder = st.markdown("", unsafe_allow_html=True)
|
23 |
if submitted:
|
24 |
with st.spinner("Translating..."):
|
25 |
-
tokenizer.src_lang =
|
26 |
encoded_zh = tokenizer(text, return_tensors="pt")
|
27 |
-
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id(
|
28 |
translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
29 |
-
placeholder.markdown(translated[0])
|
|
|
12 |
|
13 |
model, tokenizer = init_model()
|
14 |
|
15 |
+
langs = {"zh": "Chinese", "en": "English", "af": "Afrikaans", "am": "Amharic", "ar": "Arabic", "as": "Asturian",
|
16 |
+
"az": "Azerbaijani", "ba": "Bashkir", "be": "Belarusian", "bg": "Bulgarian", "bn": "Bengali", "br": "Breton",
|
17 |
+
"bs": "Bosnian", "ca": "Valencian", "ce": "Cebuano", "cs": "Czech", "cy": "Welsh", "da": "Danish",
|
18 |
+
"de": "German", "el": "Greeek", "es": "Spanish", "et": "Estonian", "fa": "Persian", "ff": "Fulah",
|
19 |
+
"fi": "Finnish", "fr": "French", "fy": "Frisian", "ga": "Irish", "gd": "Gaelic", "gl": "Galician",
|
20 |
+
"gu": "Gujarati", "ha": "Hausa", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "ht": "Creole",
|
21 |
+
"hu": "Hungarian", "hy": "Armenian", "id": "Indonesian", "ig": "Igbo", "il": "Iloko", "is": "Icelandic",
|
22 |
+
"it": "Italian", "ja": "Japanese", "jv": "Javanese", "ka": "Georgian", "kk": "Kazakh", "km": "Khmer",
|
23 |
+
"kn": "Kannada", "ko": "Korean", "lb": "Letzeburgesch", "lg": "Ganda", "ln": "Lingala", "lo": "Lao",
|
24 |
+
"lt": "Lithuanian", "lv": "Latvian", "mg": "Malagasy", "mk": "Macedonian", "ml": "Malayalam",
|
25 |
+
"mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "my": "Burmese", "ne": "Nepali", "nl": "Flemish",
|
26 |
+
"no": "Norwegian", "ns": "Sotho", "or": "Oriya", "pa": "Punjabi", "pl": "Polish", "ps": "Pashto",
|
27 |
+
"pt": "Portuguese", "ro": "Moldovan", "ru": "Russian", "sd": "Sindhi", "si": "Sinhalese", "sk": "Slovak",
|
28 |
+
"sl": "Slovenian", "so": "Somali", "sq": "Albanian", "sr": "Serbian", "ss": "Swati", "su": "Sundanese",
|
29 |
+
"sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "th": "Thai", "tl": "Tagalog", "tn": "Tswana",
|
30 |
+
"tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek", "vi": "Vietnamese", "wo": "Wolof",
|
31 |
+
"xh": "Xhosa", "yi": "Yiddish", "yo": "Yoruba", "zu": "Zulu"}
|
32 |
+
|
33 |
+
|
34 |
+
def chose_lang_format(option):
|
35 |
+
return langs[option]
|
36 |
+
|
37 |
+
|
38 |
st.title('💿facebook-m2m100_1.2B')
|
39 |
st.subheader('原文')
|
40 |
with st.form('my_form'):
|
41 |
+
text = st.text_area('Enter text:', '')
|
42 |
+
cols = st.columns(3)
|
43 |
+
submitted = cols[0].form_submit_button('翻译')
|
44 |
+
src = cols[1].selectbox(
|
45 |
+
'from', options=list(langs.keys()), format_func=chose_lang_format)
|
46 |
+
to = cols[1].selectbox(
|
47 |
+
'to', options=list(langs.keys()), format_func=chose_lang_format)
|
48 |
|
49 |
st.subheader('翻译结果')
|
50 |
placeholder = st.markdown("", unsafe_allow_html=True)
|
51 |
if submitted:
|
52 |
with st.spinner("Translating..."):
|
53 |
+
tokenizer.src_lang = src
|
54 |
encoded_zh = tokenizer(text, return_tensors="pt")
|
55 |
+
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id(to))
|
56 |
translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
57 |
+
placeholder.markdown(translated[0])
|