semanticdala / src /modelling /transliterate.py
crossroderick's picture
Added all files
0eb636f
raw
history blame contribute delete
878 Bytes
from transformers import pipeline
from src.utils.config import DALAT5_MODEL
from typing import List
class DalaTransliterator:
"""
Simple wrapper for the DalaT5 transliterator model.
"""
def __init__(self, model_name: str = DALAT5_MODEL):
self.pipe = pipeline("text2text-generation", model = model_name)
def transliterate(self, text: str, max_length: int = 128) -> str:
"""
Transliterate a given text using DalaT5.
"""
input_text = f"Cyrillic2Latin: {text.strip()}"
result = self.pipe(input_text, max_length = max_length)
return result[0]["generated_text"]
def batch_transliterate(self, texts: list[str], max_length: int = 128) -> List[str]:
"""
Perform batch transliteration using DalaT5.
"""
return [self.transliterate(t, max_length) for t in texts]