artificial-styletts2 / Utils /text_utils.py
Dionyssos's picture
Audionar long form
a1338da
# -*- coding: utf-8 -*-
import re
import codecs
import textwrap
from num2words import num2words
# IPA Phonemizer: https://github.com/bootphon/phonemizer
import nltk
#nltk.download('punkt', download_dir='./')
#nltk.download('punkt_tab', download_dir='./')
nltk.data.path.append('.')
_pad = "$"
_punctuation = ';:,.!?¡¿—…"«»“” '
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
# Export all symbols:
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
dicts = {}
for i in range(len((symbols))):
dicts[symbols[i]] = i
class TextCleaner:
def __init__(self, dummy=None):
self.word_index_dictionary = dicts
print(len(dicts))
def __call__(self, text):
indexes = []
for char in text:
try:
indexes.append(self.word_index_dictionary[char])
except KeyError:
print(text)
return indexes
def split_into_sentences(text, max_len=120):
sentences = nltk.sent_tokenize(text)
limited_sentences = [i for sent in sentences for i in textwrap.wrap(sent, width=max_len)]
return limited_sentences
def store_ssml(text=None,
voice=None):
'''create ssml:
text : list of sentences
voice: https://github.com/MycroftAI/mimic3-voices
'''
print('\n___________________________\n', len(text),
text[0], '\n___________________________________\n')
_s = '<speak>'
for short_text in text:
# 1.44) # 1.24 for bieber
rate = min(max(.87, len(short_text) / 76), 1.14)
volume = int(74 * np.random.rand() + 24)
# text = ('<speak>'
# THe other voice does not have volume
_s += f'<prosody volume=\'{volume}\'>'
_s += f'<prosody rate=\'{rate}\'>'
_s += f'<voice name=\'{voice}\'>'
_s += '<s>'
_s += short_text
_s += '</s>'
_s += '</voice>'
_s += '</prosody>'
_s += '</prosody>'
_s += '</speak>'
print(len(text), '\n\n\n\n\n\n\n', _s)
with codecs.open('_tmp_ssml.txt', 'w', "utf-8-sig") as f:
f.write(_s)
def transliterate_number(number_string, lang='en'):
"""
Converts a number string to words in the specified language,
handling decimals, scientific notation, and preserving text
before and after the numeral.
"""
if lang == 'rmc-script_latin':
lang = 'sr'
exponential_pronoun = ' puta deset na stepen od '
comma = ' tačka '
elif lang == 'ron':
lang = 'ro'
exponential_pronoun = ' tízszer a erejéig '
comma = ' virgulă '
elif lang == 'hun':
lang = 'hu'
exponential_pronoun = ' tízszer a erejéig '
comma = ' virgula '
elif lang == 'deu':
exponential_pronoun = ' mal zehn hoch '
comma = ' komma '
else:
lang = lang[:2]
exponential_pronoun = ' times ten to the power of '
comma = ' point '
def replace_number(match):
prefix = match.group(1) or ""
number_part = match.group(2)
suffix = match.group(5) or ""
try:
if 'e' in number_part.lower():
base, exponent = number_part.lower().split('e')
base = float(base)
exponent = int(exponent)
words = num2words(
base, lang=lang) + exponential_pronoun + num2words(exponent, lang=lang)
elif '.' in number_part:
integer_part, decimal_part = number_part.split('.')
words = num2words(int(integer_part), lang=lang) + comma + " ".join(
[num2words(int(digit), lang=lang) for digit in decimal_part])
else:
words = num2words(int(number_part), lang=lang)
return prefix + words + suffix
except ValueError:
return match.group(0) # Return original if conversion fails
pattern = r'([^\d]*)(\d+(\.\d+)?([Ee][+-]?\d+)?)([^\d]*)'
return re.sub(pattern, replace_number, number_string)
def discard_leading_numeral(text):
"""Discards a leading numeral (integer or float) from a string.
Args:
text: The input string.
Returns:
The string with the leading numeral removed, or the original string
if it doesn't start with a numeral.
"""
match = re.match(r"^\s*(\d+(\.\d*)?)\s*", text)
if match:
return text[match.end():].lstrip()
else:
return text