File size: 3,495 Bytes
7d68ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from json import load as json_load
from os import getenv
en2indic_rnn_lang = getenv("en2indic_rnn_lang")
en2indic_lang = getenv("en2indic_lang")
indic2en_lang = getenv("indic2en_lang")
xlit_rnn_conf = json_load(open("xlit_rnn_conf.json"))[en2indic_rnn_lang]

from utils_lid import IndicLID
from utils_xlit_rnn import XlitPiston
from utils_xlit import BaseEngineTransformer
from huggingface_hub import hf_hub_download
from shutil import unpack_archive
from torch import device as Device
from torch.cuda import is_available as cuda_is_available
device = Device("cuda" if cuda_is_available() else "cpu")
unpack_archive("corpus_bin_xlit.zip","corpus")
hf_model_RNN = "shethjenil/Indic-Transliteration-RNN"
hf_model_WPD = "shethjenil/Indic-Transliteration-Word-Prob-Dicts"
hf_model_XLIT = "ai4bharat/IndicXlit"
LID_model = IndicLID(hf_hub_download("ai4bharat/IndicLID-BERT","basline_nn_simple.pt"),hf_hub_download("ai4bharat/IndicLID-FTR","model_baseline_roman.bin"),hf_hub_download("ai4bharat/IndicLID-FTN","model_baseline_roman.bin"),json_load(open("lid_langs_conf.json")),device)
en2indic_RNN_model = XlitPiston(hf_hub_download(hf_model_RNN, xlit_rnn_conf["weight"]) ,hf_hub_download(hf_model_RNN, xlit_rnn_conf["script"]),hf_hub_download(hf_model_RNN, xlit_rnn_conf["vocab"]),device)
en2indic_model = BaseEngineTransformer({en2indic_lang:hf_hub_download(hf_model_WPD,f"{en2indic_lang}_word_prob_dict.json")},"corpus","lang_list_xlit.txt",hf_hub_download(hf_model_XLIT,"indicxlit-en-indic-v1.0/transformer/indicxlit.pt"),{en2indic_lang},4,True,device)
indic2en_model = BaseEngineTransformer({"en":hf_hub_download(hf_model_WPD,"en_word_prob_dict.json")},"corpus","lang_list_xlit.txt",hf_hub_download(hf_model_XLIT,"indicxlit-indic-en-v1.0/transformer/indicxlit.pt"),{"en"},4,True,device)

import gradio as gr
gr.TabbedInterface(
    [
        gr.Interface(
            LID_model.lang_detection,
            gr.Textbox(label="Enter text"),
            [gr.Textbox(label="Language"), gr.Number(label="Accuracy in %")],
            title="Language Detection",
        ),
        gr.Interface(
            en2indic_RNN_model.inferencer,
            [
                gr.Textbox(label="Enter Word"),
                gr.Number(label="Enter Variation Number", value=1),
            ],
            gr.List(label="Transliteration Result"),
            title="RNN Transliteration",
        ),
        gr.Interface(
            lambda word, topk: en2indic_model._transliterate_word(
                word, "en", en2indic_lang, topk, nativize_numerals=True
            ),
            [
                gr.Textbox(label="Enter Word"),
                gr.Number(label="Enter Variation Number", value=1),
            ],
            gr.List(label="Transliteration Result"),
            title=f"En2Indic Transliteration",
        ),
        gr.Interface(
            lambda word, topk: indic2en_model._transliterate_word(
                word, indic2en_lang, "en", topk, nativize_numerals=True
            ),
            [
                gr.Textbox(label="Enter Word"),
                gr.Number(label="Enter Variation Number", value=1),
            ],
            gr.List(label="Transliteration Result"),
            title="Indic2En Transliteration",
        ),
    ],
    [
        "Indic Language Detection",
        f"RNN en2{en2indic_rnn_lang}",
        f"TRANSFORMER en2{en2indic_lang}",
        f"{indic2en_lang}2en",
    ],
).launch()