File size: 1,932 Bytes
ac6d958 4d4c9b0 ac6d958 4d4c9b0 0a7cea6 4d4c9b0 0a7cea6 ac6d958 0a7cea6 ac6d958 0a7cea6 ac6d958 0a7cea6 ac6d958 0a7cea6 ac6d958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
en2ko = "KoJLabs/nllb-finetuned-en2ko"
ko2en = "KoJLabs/nllb-finetuned-ko2en"
style = "KoJLabs/bart-speech-style-converter"
en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
style_tokenizer = AutoTokenizer.from_pretrained(style)
def translate(source, target, text):
formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
src = formats[source]
tgt = formats[target]
if src == "eng_Latn":
translator = pipeline(
'translation',
model=en2ko_model,
tokenizer=ko2en_tokenizer,
src_lang=src,
tgt_lang=tgt,
)
if src == "kor_Hang":
translator = pipeline(
'translation',
model=ko2en_model,
tokenizer=en2ko_tokenizer,
src_lang=src,
tgt_lang=tgt
)
output = translator(text)
translated_text = output[0]['translation_text']
return translated_text
title = 'KoTAN Translator & Speech-style converter'
lang = ['English','Korean']
translator_app = gr.Interface(
fn=translate,
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
outputs=[gr.outputs.Textbox(label='Translated Text')],
title=title,
description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
article='Jisu, Kim. Juhwan, Lee',
enable_queue=True,
)
translator_app.launch() |