Spaces:

liampond
/

CantusSVS-hf

Sleeping

CantusSVS-hf / utils /phoneme_utils.py

liampond

Clean deploy snapshot

c42fe7e about 1 month ago

2.78 kB

	import pathlib

	try:
	from lightning.pytorch.utilities.rank_zero import rank_zero_info
	except ModuleNotFoundError:
	rank_zero_info = print

	from utils.hparams import hparams

	_initialized = False
	_ALL_CONSONANTS_SET = set()
	_ALL_VOWELS_SET = set()
	_dictionary = {
	'AP': ['AP'],
	'SP': ['SP']
	}
	_phoneme_list: list


	def locate_dictionary():
	"""
	Search and locate the dictionary file.
	Order:
	1. hparams['dictionary']
	2. hparams['g2p_dictionary']
	3. 'dictionary.txt' in hparams['work_dir']
	4. file with same name as hparams['g2p_dictionary'] in hparams['work_dir']
	:return: pathlib.Path of the dictionary file
	"""
	assert 'dictionary' in hparams or 'g2p_dictionary' in hparams, \
	'Please specify a dictionary file in your config.'
	config_dict_path = pathlib.Path(hparams['dictionary'])
	if config_dict_path.exists():
	return config_dict_path
	work_dir = pathlib.Path(hparams['work_dir'])
	ckpt_dict_path = work_dir / config_dict_path.name
	if ckpt_dict_path.exists():
	return ckpt_dict_path
	ckpt_dict_path = work_dir / 'dictionary.txt'
	if ckpt_dict_path.exists():
	return ckpt_dict_path
	raise FileNotFoundError('Unable to locate the dictionary file. '
	'Please specify the right dictionary in your config.')


	def _build_dict_and_list():
	global _dictionary, _phoneme_list

	_set = set()
	with open(locate_dictionary(), 'r', encoding='utf8') as _df:
	_lines = _df.readlines()
	for _line in _lines:
	_pinyin, _ph_str = _line.strip().split('\t')
	_dictionary[_pinyin] = _ph_str.split()
	for _list in _dictionary.values():
	[_set.add(ph) for ph in _list]
	_phoneme_list = sorted(list(_set))
	rank_zero_info('\| load phoneme set: ' + str(_phoneme_list))


	def _initialize_consonants_and_vowels():
	# Currently we only support two-part consonant-vowel phoneme systems.
	for _ph_list in _dictionary.values():
	_ph_count = len(_ph_list)
	if _ph_count == 0 or _ph_list[0] in ['AP', 'SP']:
	continue
	elif len(_ph_list) == 1:
	_ALL_VOWELS_SET.add(_ph_list[0])
	else:
	_ALL_CONSONANTS_SET.add(_ph_list[0])
	_ALL_VOWELS_SET.add(_ph_list[1])


	def _initialize():
	global _initialized
	if not _initialized:
	_build_dict_and_list()
	_initialize_consonants_and_vowels()
	_initialized = True


	def get_all_consonants():
	_initialize()
	return sorted(_ALL_CONSONANTS_SET)


	def get_all_vowels():
	_initialize()
	return sorted(_ALL_VOWELS_SET)


	def build_dictionary() -> dict:
	_initialize()
	return _dictionary


	def build_phoneme_list() -> list:
	_initialize()
	return _phoneme_list