|
from json import load as json_load
|
|
from os import getenv
|
|
en2indic_rnn_lang = getenv("en2indic_rnn_lang")
|
|
en2indic_lang = getenv("en2indic_lang")
|
|
indic2en_lang = getenv("indic2en_lang")
|
|
xlit_rnn_conf = json_load(open("xlit_rnn_conf.json"))[en2indic_rnn_lang]
|
|
|
|
from utils_lid import IndicLID
|
|
from utils_xlit_rnn import XlitPiston
|
|
from utils_xlit import BaseEngineTransformer
|
|
from huggingface_hub import hf_hub_download
|
|
from shutil import unpack_archive
|
|
from torch import device as Device
|
|
from torch.cuda import is_available as cuda_is_available
|
|
device = Device("cuda" if cuda_is_available() else "cpu")
|
|
unpack_archive("corpus_bin_xlit.zip","corpus")
|
|
hf_model_RNN = "shethjenil/Indic-Transliteration-RNN"
|
|
hf_model_WPD = "shethjenil/Indic-Transliteration-Word-Prob-Dicts"
|
|
hf_model_XLIT = "ai4bharat/IndicXlit"
|
|
LID_model = IndicLID(hf_hub_download("ai4bharat/IndicLID-BERT","basline_nn_simple.pt"),hf_hub_download("ai4bharat/IndicLID-FTR","model_baseline_roman.bin"),hf_hub_download("ai4bharat/IndicLID-FTN","model_baseline_roman.bin"),json_load(open("lid_langs_conf.json")),device)
|
|
en2indic_RNN_model = XlitPiston(hf_hub_download(hf_model_RNN, xlit_rnn_conf["weight"]) ,hf_hub_download(hf_model_RNN, xlit_rnn_conf["script"]),hf_hub_download(hf_model_RNN, xlit_rnn_conf["vocab"]),device)
|
|
en2indic_model = BaseEngineTransformer({en2indic_lang:hf_hub_download(hf_model_WPD,f"{en2indic_lang}_word_prob_dict.json")},"corpus","lang_list_xlit.txt",hf_hub_download(hf_model_XLIT,"indicxlit-en-indic-v1.0/transformer/indicxlit.pt"),{en2indic_lang},4,True,device)
|
|
indic2en_model = BaseEngineTransformer({"en":hf_hub_download(hf_model_WPD,"en_word_prob_dict.json")},"corpus","lang_list_xlit.txt",hf_hub_download(hf_model_XLIT,"indicxlit-indic-en-v1.0/transformer/indicxlit.pt"),{"en"},4,True,device)
|
|
|
|
import gradio as gr
|
|
gr.TabbedInterface(
|
|
[
|
|
gr.Interface(
|
|
LID_model.lang_detection,
|
|
gr.Textbox(label="Enter text"),
|
|
[gr.Textbox(label="Language"), gr.Number(label="Accuracy in %")],
|
|
title="Language Detection",
|
|
),
|
|
gr.Interface(
|
|
en2indic_RNN_model.inferencer,
|
|
[
|
|
gr.Textbox(label="Enter Word"),
|
|
gr.Number(label="Enter Variation Number", value=1),
|
|
],
|
|
gr.List(label="Transliteration Result"),
|
|
title="RNN Transliteration",
|
|
),
|
|
gr.Interface(
|
|
lambda word, topk: en2indic_model._transliterate_word(
|
|
word, "en", en2indic_lang, topk, nativize_numerals=True
|
|
),
|
|
[
|
|
gr.Textbox(label="Enter Word"),
|
|
gr.Number(label="Enter Variation Number", value=1),
|
|
],
|
|
gr.List(label="Transliteration Result"),
|
|
title=f"En2Indic Transliteration",
|
|
),
|
|
gr.Interface(
|
|
lambda word, topk: indic2en_model._transliterate_word(
|
|
word, indic2en_lang, "en", topk, nativize_numerals=True
|
|
),
|
|
[
|
|
gr.Textbox(label="Enter Word"),
|
|
gr.Number(label="Enter Variation Number", value=1),
|
|
],
|
|
gr.List(label="Transliteration Result"),
|
|
title="Indic2En Transliteration",
|
|
),
|
|
],
|
|
[
|
|
"Indic Language Detection",
|
|
f"RNN en2{en2indic_rnn_lang}",
|
|
f"TRANSFORMER en2{en2indic_lang}",
|
|
f"{indic2en_lang}2en",
|
|
],
|
|
).launch()
|
|
|