Spaces:
Sleeping
Sleeping
# coding=utf8 | |
import argparse | |
import os | |
import pathlib | |
import sys | |
root_dir = pathlib.Path(__file__).parent.parent.resolve() | |
os.environ['PYTHONPATH'] = str(root_dir) | |
sys.path.insert(0, str(root_dir)) | |
import numpy as np | |
import torch | |
import tqdm | |
from inference.ds_acoustic import DiffSingerAcousticInfer | |
from utils.infer_utils import cross_fade, save_wav | |
from utils.hparams import set_hparams, hparams | |
parser = argparse.ArgumentParser(description='Run DiffSinger vocoder') | |
parser.add_argument('mel', type=str, help='Path to the input file') | |
parser.add_argument('--exp', type=str, required=False, help='Read vocoder class and path from chosen experiment') | |
parser.add_argument('--config', type=str, required=False, help='Read vocoder class and path from config file') | |
parser.add_argument('--class', type=str, required=False, help='Specify vocoder class') | |
parser.add_argument('--ckpt', type=str, required=False, help='Specify vocoder checkpoint path') | |
parser.add_argument('--out', type=str, required=False, help='Path of the output folder') | |
parser.add_argument('--title', type=str, required=False, help='Title of output file') | |
args = parser.parse_args() | |
mel = pathlib.Path(args.mel) | |
name = mel.stem if not args.title else args.title | |
config = None | |
if args.exp: | |
config = root_dir / 'checkpoints' / args.exp / 'config.yaml' | |
elif args.config: | |
config = pathlib.Path(args.config) | |
else: | |
assert False, 'Either argument \'--exp\' or \'--config\' should be specified.' | |
sys.argv = [ | |
sys.argv[0], | |
'--config', | |
str(config) | |
] | |
set_hparams(print_hparams=False) | |
cls = getattr(args, 'class') | |
if cls: | |
hparams['vocoder'] = cls | |
if args.ckpt: | |
hparams['vocoder_ckpt'] = args.ckpt | |
out = args.out | |
if args.out: | |
out = pathlib.Path(args.out) | |
else: | |
out = mel.parent | |
mel_seq = torch.load(mel) | |
assert isinstance(mel_seq, list), 'Not a valid mel sequence.' | |
assert len(mel_seq) > 0, 'Mel sequence is empty.' | |
sample_rate = hparams['audio_sample_rate'] | |
infer_ins = DiffSingerAcousticInfer(load_model=False) | |
def run_vocoder(path: pathlib.Path): | |
result = np.zeros(0) | |
current_length = 0 | |
for seg_mel in tqdm.tqdm(mel_seq, desc='mel segment', total=len(mel_seq)): | |
seg_audio = infer_ins.run_vocoder(seg_mel['mel'].to(infer_ins.device), f0=seg_mel['f0'].to(infer_ins.device)) | |
seg_audio = seg_audio.squeeze(0).cpu().numpy() | |
silent_length = round(seg_mel['offset'] * sample_rate) - current_length | |
if silent_length >= 0: | |
result = np.append(result, np.zeros(silent_length)) | |
result = np.append(result, seg_audio) | |
else: | |
result = cross_fade(result, seg_audio, current_length + silent_length) | |
current_length = current_length + silent_length + seg_audio.shape[0] | |
print(f'| save audio: {path}') | |
save_wav(result, path, sample_rate) | |
os.makedirs(out, exist_ok=True) | |
try: | |
run_vocoder(out / (name + '.wav')) | |
except KeyboardInterrupt: | |
exit(-1) | |