File size: 1,111 Bytes
3a1da90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import dataclasses
import math


@dataclasses.dataclass
class SequenceConfig:
    # general
    duration: float

    # audio
    sampling_rate: int
    spectrogram_frame_rate: int
    latent_downsample_rate: int = 2

    @property
    def num_audio_frames(self) -> int:
        # we need an integer number of latents
        return self.latent_seq_len * self.spectrogram_frame_rate * self.latent_downsample_rate

    @property
    def latent_seq_len(self) -> int:
        return int(
            math.ceil(self.duration * self.sampling_rate / self.spectrogram_frame_rate /
                      self.latent_downsample_rate))

CONFIG_16K = SequenceConfig(duration=9.975, sampling_rate=16000, spectrogram_frame_rate=256)  # !TODO fix sequnce config here -> Latent length = 312
CONFIG_44K = SequenceConfig(duration=9.975, sampling_rate=44100, spectrogram_frame_rate=512)


if __name__ == '__main__':
    assert CONFIG_16K.latent_seq_len == 312
    assert CONFIG_16K.clip_seq_len == 64
    assert CONFIG_16K.sync_seq_len == 192
    assert CONFIG_16K.num_audio_frames == 128000  # 312 * 256 * 2

    print('Passed')