Spaces:
Runtime error
Runtime error
import re | |
import cn2an | |
from style_bert_vits2.nlp.symbols import PUNCTUATIONS | |
__REPLACE_MAP = { | |
"οΌ": ",", | |
"οΌ": ",", | |
"οΌ": ",", | |
"γ": ".", | |
"οΌ": "!", | |
"οΌ": "?", | |
"\n": ".", | |
"Β·": ",", | |
"γ": ",", | |
"...": "β¦", | |
"$": ".", | |
"β": "'", | |
"β": "'", | |
'"': "'", | |
"β": "'", | |
"β": "'", | |
"οΌ": "'", | |
"οΌ": "'", | |
"(": "'", | |
")": "'", | |
"γ": "'", | |
"γ": "'", | |
"γ": "'", | |
"γ": "'", | |
"[": "'", | |
"]": "'", | |
"β": "-", | |
"ο½": "-", | |
"~": "-", | |
"γ": "'", | |
"γ": "'", | |
} | |
def normalize_text(text: str) -> str: | |
numbers = re.findall(r"\d+(?:\.?\d+)?", text) | |
for number in numbers: | |
text = text.replace(number, cn2an.an2cn(number), 1) | |
text = replace_punctuation(text) | |
return text | |
def replace_punctuation(text: str) -> str: | |
text = text.replace("ε―", "ζ©").replace("ε£", "ζ―") | |
pattern = re.compile("|".join(re.escape(p) for p in __REPLACE_MAP)) | |
replaced_text = pattern.sub(lambda x: __REPLACE_MAP[x.group()], text) | |
replaced_text = re.sub( | |
r"[^\u4e00-\u9fa5" + "".join(PUNCTUATIONS) + r"]+", "", replaced_text | |
) | |
return replaced_text | |