hengjie yang
Initial commit: Voice Clone App with Gradio interface
9580089
from dataclasses import dataclass
from typing import Optional, Tuple
@dataclass
class AudioConfig:
sample_rate: int = 22050
n_fft: int = 2048
hop_length: int = 512
win_length: int = 2048
n_mels: int = 80
mel_fmin: float = 0.0
mel_fmax: Optional[float] = None
@dataclass
class DataConfig:
root_dir: str = "data/raw"
valid_audio_extensions: Tuple[str, ...] = ('.wav', '.mp3', '.flac', '.m4a')
min_samples_per_speaker: int = 3
cache_size: int = 100
@dataclass
class MetaLearningConfig:
n_way: int = 4
k_shot: int = 4
k_query: int = 2
n_tasks: int = 200
batch_size: int = 1
num_workers: int = 0
use_augmentation: bool = False
@dataclass
class TrainingConfig:
learning_rate: float = 0.001
num_epochs: int = 50
checkpoint_dir: str = "checkpoints"
log_dir: str = "logs"
device: str = "cpu"
@dataclass
class Config:
audio: AudioConfig = AudioConfig()
data: DataConfig = DataConfig()
meta_learning: MetaLearningConfig = MetaLearningConfig()
training: TrainingConfig = TrainingConfig()