Spaces:

liampond
/

CantusSVS-hf

Sleeping

File size: 4,184 Bytes

c42fe7e

import re

import librosa
import numpy as np
from scipy.io import wavfile


def trans_f0_seq(feature_pit, transform):
    feature_pit = feature_pit * 2 ** (transform / 12)
    return round(feature_pit, 1)


def trans_key(raw_data, key):
    warning_tag = False
    for i in raw_data:
        note_seq_list = i["note_seq"].split(" ")
        new_note_seq_list = []
        for note_seq in note_seq_list:
            if note_seq != "rest":
                new_note_seq = librosa.midi_to_note(librosa.note_to_midi(note_seq) + key, unicode=False)
                # new_note_seq = move_key(note_seq, key)
                new_note_seq_list.append(new_note_seq)
            else:
                new_note_seq_list.append(note_seq)
        i["note_seq"] = " ".join(new_note_seq_list)
        if i.get("f0_seq"):
            f0_seq_list = i["f0_seq"].split(" ")
            f0_seq_list = [float(x) for x in f0_seq_list]
            new_f0_seq_list = []
            for f0_seq in f0_seq_list:
                new_f0_seq = trans_f0_seq(f0_seq, key)
                new_f0_seq_list.append(str(new_f0_seq))
            i["f0_seq"] = " ".join(new_f0_seq_list)
        else:
            warning_tag = True
    if warning_tag:
        print("Warning: parts of f0_seq do not exist, please freeze the pitch line in the editor.\r\n")
    return raw_data


def resample_align_curve(points: np.ndarray, original_timestep: float, target_timestep: float, align_length: int):
    t_max = (len(points) - 1) * original_timestep
    curve_interp = np.interp(
        np.arange(0, t_max, target_timestep),
        original_timestep * np.arange(len(points)),
        points
    ).astype(points.dtype)
    delta_l = align_length - len(curve_interp)
    if delta_l < 0:
        curve_interp = curve_interp[:align_length]
    elif delta_l > 0:
        curve_interp = np.concatenate((curve_interp, np.full(delta_l, fill_value=curve_interp[-1])), axis=0)
    return curve_interp


def parse_commandline_spk_mix(mix: str) -> dict:
    """
    Parse speaker mix info from commandline
    :param mix: Input like "opencpop" or "opencpop|qixuan" or "opencpop:0.5|qixuan:0.5"
    :return: A dict whose keys are speaker names and values are proportions
    """
    name_pattern = r'[0-9A-Za-z_-]+'
    proportion_pattern = r'\d+(\.\d+)?'
    single_pattern = rf'{name_pattern}(:{proportion_pattern})?'
    assert re.fullmatch(rf'{single_pattern}(\|{single_pattern})*', mix) is not None, f'Invalid mix pattern: {mix}'
    without_proportion = set()
    proportion_map = {}
    for component in mix.split('|'):
        # If already exists
        name_and_proportion = component.split(':')
        assert name_and_proportion[0] not in without_proportion and name_and_proportion[0] not in proportion_map, \
            f'Duplicate speaker name: {name_and_proportion[0]}'
        if ':' in component:
            proportion_map[name_and_proportion[0]] = float(name_and_proportion[1])
        else:
            without_proportion.add(name_and_proportion[0])
    sum_given_proportions = sum(proportion_map.values())
    assert sum_given_proportions < 1 or len(without_proportion) == 0, \
        'Proportion of all speakers should be specified if the sum of all given proportions are larger than 1.'
    for name in without_proportion:
        proportion_map[name] = (1 - sum_given_proportions) / len(without_proportion)
    sum_all_proportions = sum(proportion_map.values())
    assert sum_all_proportions > 0, 'Sum of all proportions should be positive.'
    for name in proportion_map:
        proportion_map[name] /= sum_all_proportions
    return proportion_map


def cross_fade(a: np.ndarray, b: np.ndarray, idx: int):
    result = np.zeros(idx + b.shape[0])
    fade_len = a.shape[0] - idx
    np.copyto(dst=result[:idx], src=a[:idx])
    k = np.linspace(0, 1.0, num=fade_len, endpoint=True)
    result[idx: a.shape[0]] = (1 - k) * a[idx:] + k * b[: fade_len]
    np.copyto(dst=result[a.shape[0]:], src=b[fade_len:])
    return result


def save_wav(wav, path, sr, norm=False):
    if norm:
        wav = wav / np.abs(wav).max()
    wav *= 32767
    # proposed by @dsmiller
    wavfile.write(path, sr, wav.astype(np.int16))