Spaces:
Sleeping
Sleeping
import pathlib | |
try: | |
from lightning.pytorch.utilities.rank_zero import rank_zero_info | |
except ModuleNotFoundError: | |
rank_zero_info = print | |
from utils.hparams import hparams | |
_initialized = False | |
_ALL_CONSONANTS_SET = set() | |
_ALL_VOWELS_SET = set() | |
_dictionary = { | |
'AP': ['AP'], | |
'SP': ['SP'] | |
} | |
_phoneme_list: list | |
def locate_dictionary(): | |
""" | |
Search and locate the dictionary file. | |
Order: | |
1. hparams['dictionary'] | |
2. hparams['g2p_dictionary'] | |
3. 'dictionary.txt' in hparams['work_dir'] | |
4. file with same name as hparams['g2p_dictionary'] in hparams['work_dir'] | |
:return: pathlib.Path of the dictionary file | |
""" | |
assert 'dictionary' in hparams or 'g2p_dictionary' in hparams, \ | |
'Please specify a dictionary file in your config.' | |
config_dict_path = pathlib.Path(hparams['dictionary']) | |
if config_dict_path.exists(): | |
return config_dict_path | |
work_dir = pathlib.Path(hparams['work_dir']) | |
ckpt_dict_path = work_dir / config_dict_path.name | |
if ckpt_dict_path.exists(): | |
return ckpt_dict_path | |
ckpt_dict_path = work_dir / 'dictionary.txt' | |
if ckpt_dict_path.exists(): | |
return ckpt_dict_path | |
raise FileNotFoundError('Unable to locate the dictionary file. ' | |
'Please specify the right dictionary in your config.') | |
def _build_dict_and_list(): | |
global _dictionary, _phoneme_list | |
_set = set() | |
with open(locate_dictionary(), 'r', encoding='utf8') as _df: | |
_lines = _df.readlines() | |
for _line in _lines: | |
_pinyin, _ph_str = _line.strip().split('\t') | |
_dictionary[_pinyin] = _ph_str.split() | |
for _list in _dictionary.values(): | |
[_set.add(ph) for ph in _list] | |
_phoneme_list = sorted(list(_set)) | |
rank_zero_info('| load phoneme set: ' + str(_phoneme_list)) | |
def _initialize_consonants_and_vowels(): | |
# Currently we only support two-part consonant-vowel phoneme systems. | |
for _ph_list in _dictionary.values(): | |
_ph_count = len(_ph_list) | |
if _ph_count == 0 or _ph_list[0] in ['AP', 'SP']: | |
continue | |
elif len(_ph_list) == 1: | |
_ALL_VOWELS_SET.add(_ph_list[0]) | |
else: | |
_ALL_CONSONANTS_SET.add(_ph_list[0]) | |
_ALL_VOWELS_SET.add(_ph_list[1]) | |
def _initialize(): | |
global _initialized | |
if not _initialized: | |
_build_dict_and_list() | |
_initialize_consonants_and_vowels() | |
_initialized = True | |
def get_all_consonants(): | |
_initialize() | |
return sorted(_ALL_CONSONANTS_SET) | |
def get_all_vowels(): | |
_initialize() | |
return sorted(_ALL_VOWELS_SET) | |
def build_dictionary() -> dict: | |
_initialize() | |
return _dictionary | |
def build_phoneme_list() -> list: | |
_initialize() | |
return _phoneme_list | |