MaxGab commited on
Commit
78e4e51
·
verified ·
1 Parent(s): 0be895a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -41
app.py CHANGED
@@ -1,41 +0,0 @@
1
- import gradio as gr
2
- from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
- import soundfile as sf
5
-
6
- model_name = "facebook/musicgen-small"
7
- config = AutoConfig.from_pretrained(model_name)
8
-
9
- if not hasattr(config, 'dropout'):
10
- config.dropout = 0.1
11
- if not hasattr(config, 'layerdrop'):
12
- config.layerdrop = 0.1
13
- if not hasattr(config, 'max_position_embeddings'):
14
- config.max_position_embeddings = 2048
15
- if not hasattr(config, 'num_attention_heads'):
16
- config.num_attention_heads = 16
17
- if not hasattr(config, 'num_hidden_layers'):
18
- config.num_hidden_layers = 24
19
- if not hasattr(config, 'scale_embedding'):
20
- config.scale_embedding = False
21
- if not hasattr(config, 'hidden_size'):
22
- config.hidden_size = 1024
23
- if not hasattr(config, 'num_codebooks'):
24
- config.num_codebooks = 4
25
-
26
- model = AutoModelForCausalLM.from_pretrained(model_name, config=config)
27
- tokenizer = AutoTokenizer.from_pretrained(model_name)
28
-
29
- def text_to_audio(prompt):
30
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids
31
-
32
- with torch.no_grad():
33
- output = model.generate(input_ids)
34
-
35
- audio_data = output[0].cpu().numpy()
36
- audio_file = "generated_audio.wav"
37
- sf.write(audio_file, audio_data, 22050)
38
-
39
- return audio_file
40
-
41
- gr.Interface(fn=text_to_audio, inputs="text", outputs="audio").launch()