MeanAudio / meanaudio /model /sequence_config.py
junxiliu's picture
add needed model with proper LFS tracking
3a1da90
import dataclasses
import math
@dataclasses.dataclass
class SequenceConfig:
# general
duration: float
# audio
sampling_rate: int
spectrogram_frame_rate: int
latent_downsample_rate: int = 2
@property
def num_audio_frames(self) -> int:
# we need an integer number of latents
return self.latent_seq_len * self.spectrogram_frame_rate * self.latent_downsample_rate
@property
def latent_seq_len(self) -> int:
return int(
math.ceil(self.duration * self.sampling_rate / self.spectrogram_frame_rate /
self.latent_downsample_rate))
CONFIG_16K = SequenceConfig(duration=9.975, sampling_rate=16000, spectrogram_frame_rate=256) # !TODO fix sequnce config here -> Latent length = 312
CONFIG_44K = SequenceConfig(duration=9.975, sampling_rate=44100, spectrogram_frame_rate=512)
if __name__ == '__main__':
assert CONFIG_16K.latent_seq_len == 312
assert CONFIG_16K.clip_seq_len == 64
assert CONFIG_16K.sync_seq_len == 192
assert CONFIG_16K.num_audio_frames == 128000 # 312 * 256 * 2
print('Passed')