File size: 1,111 Bytes
3a1da90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import dataclasses
import math
@dataclasses.dataclass
class SequenceConfig:
# general
duration: float
# audio
sampling_rate: int
spectrogram_frame_rate: int
latent_downsample_rate: int = 2
@property
def num_audio_frames(self) -> int:
# we need an integer number of latents
return self.latent_seq_len * self.spectrogram_frame_rate * self.latent_downsample_rate
@property
def latent_seq_len(self) -> int:
return int(
math.ceil(self.duration * self.sampling_rate / self.spectrogram_frame_rate /
self.latent_downsample_rate))
CONFIG_16K = SequenceConfig(duration=9.975, sampling_rate=16000, spectrogram_frame_rate=256) # !TODO fix sequnce config here -> Latent length = 312
CONFIG_44K = SequenceConfig(duration=9.975, sampling_rate=44100, spectrogram_frame_rate=512)
if __name__ == '__main__':
assert CONFIG_16K.latent_seq_len == 312
assert CONFIG_16K.clip_seq_len == 64
assert CONFIG_16K.sync_seq_len == 192
assert CONFIG_16K.num_audio_frames == 128000 # 312 * 256 * 2
print('Passed')
|