Spaces:
Sleeping
Sleeping
import re | |
import librosa | |
import numpy as np | |
from scipy.io import wavfile | |
def trans_f0_seq(feature_pit, transform): | |
feature_pit = feature_pit * 2 ** (transform / 12) | |
return round(feature_pit, 1) | |
def trans_key(raw_data, key): | |
warning_tag = False | |
for i in raw_data: | |
note_seq_list = i["note_seq"].split(" ") | |
new_note_seq_list = [] | |
for note_seq in note_seq_list: | |
if note_seq != "rest": | |
new_note_seq = librosa.midi_to_note(librosa.note_to_midi(note_seq) + key, unicode=False) | |
# new_note_seq = move_key(note_seq, key) | |
new_note_seq_list.append(new_note_seq) | |
else: | |
new_note_seq_list.append(note_seq) | |
i["note_seq"] = " ".join(new_note_seq_list) | |
if i.get("f0_seq"): | |
f0_seq_list = i["f0_seq"].split(" ") | |
f0_seq_list = [float(x) for x in f0_seq_list] | |
new_f0_seq_list = [] | |
for f0_seq in f0_seq_list: | |
new_f0_seq = trans_f0_seq(f0_seq, key) | |
new_f0_seq_list.append(str(new_f0_seq)) | |
i["f0_seq"] = " ".join(new_f0_seq_list) | |
else: | |
warning_tag = True | |
if warning_tag: | |
print("Warning: parts of f0_seq do not exist, please freeze the pitch line in the editor.\r\n") | |
return raw_data | |
def resample_align_curve(points: np.ndarray, original_timestep: float, target_timestep: float, align_length: int): | |
t_max = (len(points) - 1) * original_timestep | |
curve_interp = np.interp( | |
np.arange(0, t_max, target_timestep), | |
original_timestep * np.arange(len(points)), | |
points | |
).astype(points.dtype) | |
delta_l = align_length - len(curve_interp) | |
if delta_l < 0: | |
curve_interp = curve_interp[:align_length] | |
elif delta_l > 0: | |
curve_interp = np.concatenate((curve_interp, np.full(delta_l, fill_value=curve_interp[-1])), axis=0) | |
return curve_interp | |
def parse_commandline_spk_mix(mix: str) -> dict: | |
""" | |
Parse speaker mix info from commandline | |
:param mix: Input like "opencpop" or "opencpop|qixuan" or "opencpop:0.5|qixuan:0.5" | |
:return: A dict whose keys are speaker names and values are proportions | |
""" | |
name_pattern = r'[0-9A-Za-z_-]+' | |
proportion_pattern = r'\d+(\.\d+)?' | |
single_pattern = rf'{name_pattern}(:{proportion_pattern})?' | |
assert re.fullmatch(rf'{single_pattern}(\|{single_pattern})*', mix) is not None, f'Invalid mix pattern: {mix}' | |
without_proportion = set() | |
proportion_map = {} | |
for component in mix.split('|'): | |
# If already exists | |
name_and_proportion = component.split(':') | |
assert name_and_proportion[0] not in without_proportion and name_and_proportion[0] not in proportion_map, \ | |
f'Duplicate speaker name: {name_and_proportion[0]}' | |
if ':' in component: | |
proportion_map[name_and_proportion[0]] = float(name_and_proportion[1]) | |
else: | |
without_proportion.add(name_and_proportion[0]) | |
sum_given_proportions = sum(proportion_map.values()) | |
assert sum_given_proportions < 1 or len(without_proportion) == 0, \ | |
'Proportion of all speakers should be specified if the sum of all given proportions are larger than 1.' | |
for name in without_proportion: | |
proportion_map[name] = (1 - sum_given_proportions) / len(without_proportion) | |
sum_all_proportions = sum(proportion_map.values()) | |
assert sum_all_proportions > 0, 'Sum of all proportions should be positive.' | |
for name in proportion_map: | |
proportion_map[name] /= sum_all_proportions | |
return proportion_map | |
def cross_fade(a: np.ndarray, b: np.ndarray, idx: int): | |
result = np.zeros(idx + b.shape[0]) | |
fade_len = a.shape[0] - idx | |
np.copyto(dst=result[:idx], src=a[:idx]) | |
k = np.linspace(0, 1.0, num=fade_len, endpoint=True) | |
result[idx: a.shape[0]] = (1 - k) * a[idx:] + k * b[: fade_len] | |
np.copyto(dst=result[a.shape[0]:], src=b[fade_len:]) | |
return result | |
def save_wav(wav, path, sr, norm=False): | |
if norm: | |
wav = wav / np.abs(wav).max() | |
wav *= 32767 | |
# proposed by @dsmiller | |
wavfile.write(path, sr, wav.astype(np.int16)) | |