File size: 878 Bytes
0eb636f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import pipeline
from src.utils.config import DALAT5_MODEL

from typing import List


class DalaTransliterator:
    """
    Simple wrapper for the DalaT5 transliterator model.
    """
    def __init__(self, model_name: str = DALAT5_MODEL):
        self.pipe = pipeline("text2text-generation", model = model_name)


    def transliterate(self, text: str, max_length: int = 128) -> str:
        """
        Transliterate a given text using DalaT5.
        """
        input_text = f"Cyrillic2Latin: {text.strip()}"
        result = self.pipe(input_text, max_length = max_length)
        
        return result[0]["generated_text"]


    def batch_transliterate(self, texts: list[str], max_length: int = 128) -> List[str]:
        """
        Perform batch transliteration using DalaT5.
        """
        return [self.transliterate(t, max_length) for t in texts]