Spaces:

liampond
/

CantusSVS-hf

Sleeping

File size: 10,676 Bytes

c42fe7e

import json
import os
import pathlib
import sys
from collections import OrderedDict
from pathlib import Path

import click
from typing import Tuple

root_dir = Path(__file__).resolve().parent.parent
os.environ['PYTHONPATH'] = str(root_dir)
sys.path.insert(0, str(root_dir))


def find_exp(exp):
    if not (root_dir / 'checkpoints' / exp).exists():
        for subdir in (root_dir / 'checkpoints').iterdir():
            if not subdir.is_dir():
                continue
            if subdir.name.startswith(exp):
                print(f'| match ckpt by prefix: {subdir.name}')
                exp = subdir.name
                break
        else:
            raise click.BadParameter(
                f'There are no matching exp starting with \'{exp}\' in \'checkpoints\' folder. '
                'Please specify \'--exp\' as the folder name or prefix.'
            )
    else:
        print(f'| found ckpt by name: {exp}')
    return exp


@click.group()
def main():
    pass


@main.command(help='Run DiffSinger acoustic model inference')
@click.argument(
    'proj', type=click.Path(
        exists=True, file_okay=True, dir_okay=False, readable=True,
        path_type=pathlib.Path, resolve_path=True
    ),
    metavar='DS_FILE'
)
@click.option(
    '--exp', type=str,
    required=True, metavar='EXP',
    callback=lambda ctx, param, value: find_exp(value),
    help='Selection of model'
)
@click.option(
    '--ckpt', type=click.IntRange(min=0),
    required=False, metavar='STEPS',
    help='Selection of checkpoint training steps'
)
@click.option(
    '--spk', type=click.STRING,
    required=False,
    help='Speaker name or mixture of speakers'
)
@click.option(
    '--out', type=click.Path(
        file_okay=False, dir_okay=True, path_type=pathlib.Path
    ),
    required=False,
    help='Path of the output folder'
)
@click.option(
    '--title', type=click.STRING,
    required=False,
    help='Title of output file'
)
@click.option(
    '--num', type=click.IntRange(min=1),
    required=False, default=1,
    help='Number of runs'
)
@click.option(
    '--key', type=click.INT,
    required=False, default=0,
    help='Key transition of pitch'
)
@click.option(
    '--gender', type=click.FloatRange(min=-1, max=1),
    required=False,
    help='Formant shifting (gender control)'
)
@click.option(
    '--seed', type=click.INT,
    required=False, default=-1,
    help='Random seed of the inference'
)
@click.option(
    '--depth', type=click.FloatRange(min=0, max=1),
    required=False,
    help='Shallow diffusion depth'
)
@click.option(
    '--steps', type=click.IntRange(min=1),
    required=False,
    help='Diffusion sampling steps'
)
@click.option(
    '--mel', is_flag=True,
    help='Save intermediate mel format instead of waveform'
)
def acoustic(
        proj: pathlib.Path,
        exp: str,
        ckpt: int,
        spk: str,
        out: pathlib.Path,
        title: str,
        num: int,
        key: int,
        gender: float,
        seed: int,
        depth: float,
        steps: int,
        mel: bool
):
    name = proj.stem if not title else title
    if out is None:
        out = proj.parent

    with open(proj, 'r', encoding='utf-8') as f:
        params = json.load(f)

    if not isinstance(params, list):
        params = [params]

    if len(params) == 0:
        print('The input file is empty.')
        exit()

    from utils.infer_utils import trans_key, parse_commandline_spk_mix

    if key != 0:
        params = trans_key(params, key)
        key_suffix = '%+dkey' % key
        if not title:
            name += key_suffix
        print(f'| key transition: {key:+d}')

    sys.argv = [
        sys.argv[0],
        '--exp_name',
        exp,
        '--infer'
    ]
    from utils.hparams import set_hparams, hparams
    set_hparams()

    # Check for vocoder path
    assert mel or (root_dir / hparams['vocoder_ckpt']).exists(), \
        f'Vocoder ckpt \'{hparams["vocoder_ckpt"]}\' not found. ' \
        f'Please put it to the checkpoints directory to run inference.'

    # For compatibility:
    # migrate timesteps, K_step, K_step_infer, diff_speedup to time_scale_factor, T_start, T_start_infer, sampling_steps
    if 'diff_speedup' not in hparams and 'pndm_speedup' in hparams:
        hparams['diff_speedup'] = hparams['pndm_speedup']
    if 'T_start' not in hparams:
        hparams['T_start'] = 1 - hparams['K_step'] / hparams['timesteps']
    if 'T_start_infer' not in hparams:
        hparams['T_start_infer'] = 1 - hparams['K_step_infer'] / hparams['timesteps']
    if 'sampling_steps' not in hparams:
        if hparams['use_shallow_diffusion']:
            hparams['sampling_steps'] = hparams['K_step_infer'] // hparams['diff_speedup']
        else:
            hparams['sampling_steps'] = hparams['timesteps'] // hparams['diff_speedup']
    if 'time_scale_factor' not in hparams:
        hparams['time_scale_factor'] = hparams['timesteps']

    if depth is not None:
        assert depth <= 1 - hparams['T_start'], (
            f"Depth should not be larger than 1 - T_start ({1 - hparams['T_start']})"
        )
        hparams['K_step_infer'] = round(hparams['timesteps'] * depth)
        hparams['T_start_infer'] = 1 - depth
    if steps is not None:
        if hparams['use_shallow_diffusion']:
            step_size = (1 - hparams['T_start_infer']) / steps
            if 'K_step_infer' in hparams:
                hparams['diff_speedup'] = round(step_size * hparams['K_step_infer'])
        else:
            if 'timesteps' in hparams:
                hparams['diff_speedup'] = round(hparams['timesteps'] / steps)
        hparams['sampling_steps'] = steps

    spk_mix = parse_commandline_spk_mix(spk) if hparams['use_spk_id'] and spk is not None else None
    for param in params:
        if gender is not None and hparams['use_key_shift_embed']:
            param['gender'] = gender

        if spk_mix is not None:
            param['spk_mix'] = spk_mix

    from inference.ds_acoustic import DiffSingerAcousticInfer
    infer_ins = DiffSingerAcousticInfer(load_vocoder=not mel, ckpt_steps=ckpt)
    print(f'| Model: {type(infer_ins.model)}')

    try:
        infer_ins.run_inference(
            params, out_dir=out, title=name, num_runs=num,
            spk_mix=spk_mix, seed=seed, save_mel=mel
        )
    except KeyboardInterrupt:
        exit(-1)


@main.command(help='Run DiffSinger variance model inference')
@click.argument(
    'proj', type=click.Path(
        exists=True, file_okay=True, dir_okay=False, readable=True,
        path_type=pathlib.Path, resolve_path=True
    ),
    metavar='DS_FILE'
)
@click.option(
    '--exp', type=str,
    required=True, metavar='EXP',
    callback=lambda ctx, param, value: find_exp(value),
    help='Selection of model'
)
@click.option(
    '--ckpt', type=click.IntRange(min=0),
    required=False, metavar='STEPS',
    help='Selection of checkpoint training steps'
)
@click.option(
    '--predict', type=click.STRING,
    multiple=True, metavar='TAGS',
    help='Parameters to predict'
)
@click.option(
    '--spk', type=click.STRING,
    required=False,
    help='Speaker name or mixture of speakers'
)
@click.option(
    '--out', type=click.Path(
        file_okay=False, dir_okay=True, path_type=pathlib.Path
    ),
    required=False,
    help='Path of the output folder'
)
@click.option(
    '--title', type=click.STRING,
    required=False,
    help='Title of output file'
)
@click.option(
    '--num', type=click.IntRange(min=1),
    required=False, default=1,
    help='Number of runs'
)
@click.option(
    '--key', type=click.INT,
    required=False, default=0,
    help='Key transition of pitch'
)
@click.option(
    '--expr', type=click.FloatRange(min=0, max=1),
    required=False, help='Static expressiveness control'
)
@click.option(
    '--seed', type=click.INT,
    required=False, default=-1,
    help='Random seed of the inference'
)
@click.option(
    '--steps', type=click.IntRange(min=1),
    required=False,
    help='Diffusion sampling steps'
)
def variance(
        proj: pathlib.Path,
        exp: str,
        ckpt: int,
        spk: str,
        predict: Tuple[str],
        out: pathlib.Path,
        title: str,
        num: int,
        key: int,
        expr: float,
        seed: int,
        steps: int
):
    name = proj.stem if not title else title
    if out is None:
        out = proj.parent
    if (not out or out.resolve() == proj.parent.resolve()) and not title:
        name += '_variance'

    with open(proj, 'r', encoding='utf-8') as f:
        params = json.load(f)

    if not isinstance(params, list):
        params = [params]
    params = [OrderedDict(p) for p in params]

    if len(params) == 0:
        print('The input file is empty.')
        exit()

    from utils.infer_utils import trans_key, parse_commandline_spk_mix

    if key != 0:
        params = trans_key(params, key)
        key_suffix = '%+dkey' % key
        if not title:
            name += key_suffix
        print(f'| key transition: {key:+d}')

    sys.argv = [
        sys.argv[0],
        '--exp_name',
        exp,
        '--infer'
    ]
    from utils.hparams import set_hparams, hparams
    set_hparams()

    # For compatibility:
    # migrate timesteps, K_step, K_step_infer, diff_speedup to time_scale_factor, T_start, T_start_infer, sampling_steps
    if 'diff_speedup' not in hparams and 'pndm_speedup' in hparams:
        hparams['diff_speedup'] = hparams['pndm_speedup']
    if 'sampling_steps' not in hparams:
        hparams['sampling_steps'] = hparams['timesteps'] // hparams['diff_speedup']
    if 'time_scale_factor' not in hparams:
        hparams['time_scale_factor'] = hparams['timesteps']

    if steps is not None:
        if 'timesteps' in hparams:
            hparams['diff_speedup'] = round(hparams['timesteps'] / steps)
        hparams['sampling_steps'] = steps

    spk_mix = parse_commandline_spk_mix(spk) if hparams['use_spk_id'] and spk is not None else None
    for param in params:
        if expr is not None:
            param['expr'] = expr

        if spk_mix is not None:
            param['ph_spk_mix_backup'] = param.get('ph_spk_mix')
            param['spk_mix_backup'] = param.get('spk_mix')
            param['ph_spk_mix'] = param['spk_mix'] = spk_mix

    from inference.ds_variance import DiffSingerVarianceInfer
    infer_ins = DiffSingerVarianceInfer(ckpt_steps=ckpt, predictions=set(predict))
    print(f'| Model: {type(infer_ins.model)}')

    try:
        infer_ins.run_inference(
            params, out_dir=out, title=name,
            num_runs=num, seed=seed
        )
    except KeyboardInterrupt:
        exit(-1)


if __name__ == '__main__':
    main()