File size: 2,779 Bytes
c42fe7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pathlib

try:
    from lightning.pytorch.utilities.rank_zero import rank_zero_info
except ModuleNotFoundError:
    rank_zero_info = print

from utils.hparams import hparams

_initialized = False
_ALL_CONSONANTS_SET = set()
_ALL_VOWELS_SET = set()
_dictionary = {
    'AP': ['AP'],
    'SP': ['SP']
}
_phoneme_list: list


def locate_dictionary():
    """
    Search and locate the dictionary file.
    Order:
    1. hparams['dictionary']
    2. hparams['g2p_dictionary']
    3. 'dictionary.txt' in hparams['work_dir']
    4. file with same name as hparams['g2p_dictionary'] in hparams['work_dir']
    :return: pathlib.Path of the dictionary file
    """
    assert 'dictionary' in hparams or 'g2p_dictionary' in hparams, \
        'Please specify a dictionary file in your config.'
    config_dict_path = pathlib.Path(hparams['dictionary'])
    if config_dict_path.exists():
        return config_dict_path
    work_dir = pathlib.Path(hparams['work_dir'])
    ckpt_dict_path = work_dir / config_dict_path.name
    if ckpt_dict_path.exists():
        return ckpt_dict_path
    ckpt_dict_path = work_dir / 'dictionary.txt'
    if ckpt_dict_path.exists():
        return ckpt_dict_path
    raise FileNotFoundError('Unable to locate the dictionary file. '
                            'Please specify the right dictionary in your config.')


def _build_dict_and_list():
    global _dictionary, _phoneme_list

    _set = set()
    with open(locate_dictionary(), 'r', encoding='utf8') as _df:
        _lines = _df.readlines()
    for _line in _lines:
        _pinyin, _ph_str = _line.strip().split('\t')
        _dictionary[_pinyin] = _ph_str.split()
    for _list in _dictionary.values():
        [_set.add(ph) for ph in _list]
    _phoneme_list = sorted(list(_set))
    rank_zero_info('| load phoneme set: ' + str(_phoneme_list))


def _initialize_consonants_and_vowels():
    # Currently we only support two-part consonant-vowel phoneme systems.
    for _ph_list in _dictionary.values():
        _ph_count = len(_ph_list)
        if _ph_count == 0 or _ph_list[0] in ['AP', 'SP']:
            continue
        elif len(_ph_list) == 1:
            _ALL_VOWELS_SET.add(_ph_list[0])
        else:
            _ALL_CONSONANTS_SET.add(_ph_list[0])
            _ALL_VOWELS_SET.add(_ph_list[1])


def _initialize():
    global _initialized
    if not _initialized:
        _build_dict_and_list()
        _initialize_consonants_and_vowels()
        _initialized = True


def get_all_consonants():
    _initialize()
    return sorted(_ALL_CONSONANTS_SET)


def get_all_vowels():
    _initialize()
    return sorted(_ALL_VOWELS_SET)


def build_dictionary() -> dict:
    _initialize()
    return _dictionary


def build_phoneme_list() -> list:
    _initialize()
    return _phoneme_list