|
import os |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
en2ko = "KoJLabs/nllb-finetuned-en2ko" |
|
ko2en = "KoJLabs/nllb-finetuned-ko2en" |
|
style = "KoJLabs/bart-speech-style-converter" |
|
|
|
en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko) |
|
ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en) |
|
style_model = AutoModelForSeq2SeqLM.from_pretrained(style) |
|
|
|
en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko) |
|
ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en) |
|
style_tokenizer = AutoTokenizer.from_pretrained(style) |
|
|
|
def translate(source, target, text): |
|
formats = {"English":"eng_Latn", "Korean":"kor_Hang"} |
|
src = formats[source] |
|
tgt = formats[target] |
|
|
|
if src == "eng_Latn": |
|
translator = pipeline( |
|
'translation', |
|
model=en2ko_model, |
|
tokenizer=ko2en_tokenizer, |
|
src_lang=src, |
|
tgt_lang=tgt, |
|
) |
|
|
|
if src == "kor_Hang": |
|
translator = pipeline( |
|
'translation', |
|
model=ko2en_model, |
|
tokenizer=en2ko_tokenizer, |
|
src_lang=src, |
|
tgt_lang=tgt |
|
) |
|
|
|
output = translator(text) |
|
translated_text = output[0]['translation_text'] |
|
|
|
return translated_text |
|
|
|
title = 'KoTAN Translator & Speech-style converter' |
|
lang = ['English','Korean'] |
|
|
|
translator_app = gr.Interface( |
|
fn=translate, |
|
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')], |
|
outputs=[gr.outputs.Textbox(label='Translated Text')], |
|
title=title, |
|
description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)', |
|
article='Jisu, Kim. Juhwan, Lee', |
|
enable_queue=True, |
|
) |
|
|
|
translator_app.launch() |