AndreasXi commited on
Commit
dd97a96
·
1 Parent(s): 0fe93da
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import warnings
2
  import spaces
3
- warnings.filterwarnings("ignore", category=FutureWarning)
4
  import logging
5
  from argparse import ArgumentParser
6
  from pathlib import Path
@@ -16,7 +16,6 @@ from meanaudio.eval_utils import (
16
  generate_fm,
17
  setup_eval_logging,
18
  )
19
-
20
  from meanaudio.model.flow_matching import FlowMatching
21
  from meanaudio.model.mean_flow import MeanFlow
22
  from meanaudio.model.networks import MeanAudio, get_mean_audio
@@ -26,6 +25,7 @@ torch.backends.cudnn.allow_tf32 = True
26
  import gc
27
  from datetime import datetime
28
  from huggingface_hub import snapshot_download
 
29
 
30
  log = logging.getLogger()
31
  device = "cpu"
@@ -137,6 +137,17 @@ def generate_audio_gradio(
137
  **{sampler_arg_name: sampler},
138
  )
139
  audio = audios[0].float().cpu()
 
 
 
 
 
 
 
 
 
 
 
140
  # text_embed = laion_clap_model.get_text_embedding(prompt, use_tensor=True).squeeze()
141
  # audio_embed = laion_clap_model.get_audio_embedding_from_data(audios[:,0,:].float().cpu(), use_tensor=True).squeeze()
142
  # scores = torch.cosine_similarity(text_embed.expand(audio_embed.shape[0], -1),
 
1
  import warnings
2
  import spaces
3
+ warnings.filterwarnings("ignore")
4
  import logging
5
  from argparse import ArgumentParser
6
  from pathlib import Path
 
16
  generate_fm,
17
  setup_eval_logging,
18
  )
 
19
  from meanaudio.model.flow_matching import FlowMatching
20
  from meanaudio.model.mean_flow import MeanFlow
21
  from meanaudio.model.networks import MeanAudio, get_mean_audio
 
25
  import gc
26
  from datetime import datetime
27
  from huggingface_hub import snapshot_download
28
+ import numpy as np
29
 
30
  log = logging.getLogger()
31
  device = "cpu"
 
137
  **{sampler_arg_name: sampler},
138
  )
139
  audio = audios[0].float().cpu()
140
+
141
+ def fade_out(x, sr, fade_ms=30):
142
+ n = len(x)
143
+ k = int(sr * fade_ms / 1000)
144
+ if k <= 0 or k >= n:
145
+ return x
146
+ w = np.linspace(1.0, 0.0, k)
147
+ x[-k:] = x[-k:] * w
148
+ return x
149
+ audio = fade_out(audio, seq_cfg.sampling_rate)
150
+
151
  # text_embed = laion_clap_model.get_text_embedding(prompt, use_tensor=True).squeeze()
152
  # audio_embed = laion_clap_model.get_audio_embedding_from_data(audios[:,0,:].float().cpu(), use_tensor=True).squeeze()
153
  # scores = torch.cosine_similarity(text_embed.expand(audio_embed.shape[0], -1),