farbverlauf commited on
Commit
92da7ef
·
1 Parent(s): 6e0def0
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import spaces
2
  import gradio as gr
3
  import torchaudio
4
  import pandas as pd
@@ -16,8 +16,8 @@ import torch
16
  from models.models import BiFormer
17
 
18
 
19
- DEVICE = torch.device('cuda')
20
- # DEVICE = torch.device('cpu')
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO)
@@ -150,7 +150,7 @@ def get_top_emotion(probabilities):
150
  return f"{LABEL_TO_EMOTION[max_idx]} ({max(probabilities)*100:.1f}%)"
151
 
152
 
153
- @spaces.GPU
154
  def process_audio(audio_path):
155
  """Main processing pipeline."""
156
  try:
@@ -312,6 +312,5 @@ def create_demo():
312
 
313
 
314
  if __name__ == "__main__":
315
- print('START DEMO')
316
  demo = create_demo()
317
  demo.launch()
 
1
+ # import spaces
2
  import gradio as gr
3
  import torchaudio
4
  import pandas as pd
 
16
  from models.models import BiFormer
17
 
18
 
19
+ # DEVICE = torch.device('cuda')
20
+ DEVICE = torch.device('cpu')
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO)
 
150
  return f"{LABEL_TO_EMOTION[max_idx]} ({max(probabilities)*100:.1f}%)"
151
 
152
 
153
+ # @spaces.GPU
154
  def process_audio(audio_path):
155
  """Main processing pipeline."""
156
  try:
 
312
 
313
 
314
  if __name__ == "__main__":
 
315
  demo = create_demo()
316
  demo.launch()
config.toml CHANGED
@@ -54,7 +54,7 @@ text_column = "text"
54
  whisper_model = "base"
55
 
56
  # Указываем, где запускать Whisper: "cuda" (GPU) или "cpu"
57
- whisper_device = "cuda"
58
 
59
  # Если для dev/test в CSV нет текста, нужно ли всё же вызывать Whisper?
60
  use_whisper_for_nontrain_if_no_text = true
@@ -120,7 +120,7 @@ audio_embedding_dim = 256 # размерность аудио-эмбеддин
120
  text_embedding_dim = 1024 # размерность текст-эмбеддинга
121
  emb_normalize = false # нормализовать ли вектор L2-нормой
122
  max_tokens = 95 # ограничение на длину текста (токенов) при токенизации
123
- device = "cuda" # "cuda" или "cpu", куда грузить модель
124
 
125
  # audio_pooling = "mean" # "mean", "cls", "max", "min", "last", "attention"
126
  # text_pooling = "cls" # "mean", "cls", "max", "min", "last", "sum", "attention"
 
54
  whisper_model = "base"
55
 
56
  # Указываем, где запускать Whisper: "cuda" (GPU) или "cpu"
57
+ whisper_device = "cpu"
58
 
59
  # Если для dev/test в CSV нет текста, нужно ли всё же вызывать Whisper?
60
  use_whisper_for_nontrain_if_no_text = true
 
120
  text_embedding_dim = 1024 # размерность текст-эмбеддинга
121
  emb_normalize = false # нормализовать ли вектор L2-нормой
122
  max_tokens = 95 # ограничение на длину текста (токенов) при токенизации
123
+ device = "cpu" # "cuda" или "cpu", куда грузить модель
124
 
125
  # audio_pooling = "mean" # "mean", "cls", "max", "min", "last", "attention"
126
  # text_pooling = "cls" # "mean", "cls", "max", "min", "last", "sum", "attention"
data_loading/feature_extractor.py CHANGED
@@ -14,8 +14,8 @@ from transformers import (
14
  from data_loading.pretrained_extractors import EmotionModel, get_model_mamba, Mamba
15
 
16
 
17
- DEVICE = torch.device('cuda')
18
- # DEVICE = torch.device('cpu')
19
 
20
 
21
  class PretrainedAudioEmbeddingExtractor:
 
14
  from data_loading.pretrained_extractors import EmotionModel, get_model_mamba, Mamba
15
 
16
 
17
+ # DEVICE = torch.device('cuda')
18
+ DEVICE = torch.device('cpu')
19
 
20
 
21
  class PretrainedAudioEmbeddingExtractor:
data_loading/pretrained_extractors.py CHANGED
@@ -14,8 +14,8 @@ from einops import rearrange, einsum
14
  from torch import Tensor
15
  from einops import rearrange
16
 
17
- DEVICE = torch.device('cuda')
18
- # DEVICE = torch.device('cpu')
19
 
20
  ## Audio models
21
 
 
14
  from torch import Tensor
15
  from einops import rearrange
16
 
17
+ # DEVICE = torch.device('cuda')
18
+ DEVICE = torch.device('cpu')
19
 
20
  ## Audio models
21