Spaces:

liampond
/

CantusSVS-hf

Sleeping

CantusSVS-hf / utils /infer_utils.py

liampond

Clean deploy snapshot

c42fe7e about 1 month ago

4.18 kB

	import re

	import librosa
	import numpy as np
	from scipy.io import wavfile


	def trans_f0_seq(feature_pit, transform):
	feature_pit = feature_pit * 2 ** (transform / 12)
	return round(feature_pit, 1)


	def trans_key(raw_data, key):
	warning_tag = False
	for i in raw_data:
	note_seq_list = i["note_seq"].split(" ")
	new_note_seq_list = []
	for note_seq in note_seq_list:
	if note_seq != "rest":
	new_note_seq = librosa.midi_to_note(librosa.note_to_midi(note_seq) + key, unicode=False)
	# new_note_seq = move_key(note_seq, key)
	new_note_seq_list.append(new_note_seq)
	else:
	new_note_seq_list.append(note_seq)
	i["note_seq"] = " ".join(new_note_seq_list)
	if i.get("f0_seq"):
	f0_seq_list = i["f0_seq"].split(" ")
	f0_seq_list = [float(x) for x in f0_seq_list]
	new_f0_seq_list = []
	for f0_seq in f0_seq_list:
	new_f0_seq = trans_f0_seq(f0_seq, key)
	new_f0_seq_list.append(str(new_f0_seq))
	i["f0_seq"] = " ".join(new_f0_seq_list)
	else:
	warning_tag = True
	if warning_tag:
	print("Warning: parts of f0_seq do not exist, please freeze the pitch line in the editor.\r\n")
	return raw_data


	def resample_align_curve(points: np.ndarray, original_timestep: float, target_timestep: float, align_length: int):
	t_max = (len(points) - 1) * original_timestep
	curve_interp = np.interp(
	np.arange(0, t_max, target_timestep),
	original_timestep * np.arange(len(points)),
	points
	).astype(points.dtype)
	delta_l = align_length - len(curve_interp)
	if delta_l < 0:
	curve_interp = curve_interp[:align_length]
	elif delta_l > 0:
	curve_interp = np.concatenate((curve_interp, np.full(delta_l, fill_value=curve_interp[-1])), axis=0)
	return curve_interp


	def parse_commandline_spk_mix(mix: str) -> dict:
	"""
	Parse speaker mix info from commandline
	:param mix: Input like "opencpop" or "opencpop\|qixuan" or "opencpop:0.5\|qixuan:0.5"
	:return: A dict whose keys are speaker names and values are proportions
	"""
	name_pattern = r'[0-9A-Za-z_-]+'
	proportion_pattern = r'\d+(\.\d+)?'
	single_pattern = rf'{name_pattern}(:{proportion_pattern})?'
	assert re.fullmatch(rf'{single_pattern}(\\|{single_pattern})*', mix) is not None, f'Invalid mix pattern: {mix}'
	without_proportion = set()
	proportion_map = {}
	for component in mix.split('\|'):
	# If already exists
	name_and_proportion = component.split(':')
	assert name_and_proportion[0] not in without_proportion and name_and_proportion[0] not in proportion_map, \
	f'Duplicate speaker name: {name_and_proportion[0]}'
	if ':' in component:
	proportion_map[name_and_proportion[0]] = float(name_and_proportion[1])
	else:
	without_proportion.add(name_and_proportion[0])
	sum_given_proportions = sum(proportion_map.values())
	assert sum_given_proportions < 1 or len(without_proportion) == 0, \
	'Proportion of all speakers should be specified if the sum of all given proportions are larger than 1.'
	for name in without_proportion:
	proportion_map[name] = (1 - sum_given_proportions) / len(without_proportion)
	sum_all_proportions = sum(proportion_map.values())
	assert sum_all_proportions > 0, 'Sum of all proportions should be positive.'
	for name in proportion_map:
	proportion_map[name] /= sum_all_proportions
	return proportion_map


	def cross_fade(a: np.ndarray, b: np.ndarray, idx: int):
	result = np.zeros(idx + b.shape[0])
	fade_len = a.shape[0] - idx
	np.copyto(dst=result[:idx], src=a[:idx])
	k = np.linspace(0, 1.0, num=fade_len, endpoint=True)
	result[idx: a.shape[0]] = (1 - k) * a[idx:] + k * b[: fade_len]
	np.copyto(dst=result[a.shape[0]:], src=b[fade_len:])
	return result


	def save_wav(wav, path, sr, norm=False):
	if norm:
	wav = wav / np.abs(wav).max()
	wav *= 32767
	# proposed by @dsmiller
	wavfile.write(path, sr, wav.astype(np.int16))