okewunmi commited on
Commit
b6fd3a8
·
verified ·
1 Parent(s): a87e7e2

create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from transformers import AutoModelForCausalLM
5
+ from outetts.wav_tokenizer.decoder import WavTokenizer
6
+ from yarngpt.audiotokenizer import AudioTokenizer
7
+
8
+ # Initialize the model (this runs when the app starts)
9
+ def initialize_model():
10
+ # Download model and tokenizer
11
+ hf_path = "saheedniyi/YarnGPT"
12
+ wav_tokenizer_config_path = "wavtokenizer_config.yaml"
13
+ wav_tokenizer_model_path = "wavtokenizer_model.ckpt"
14
+
15
+ # Create AudioTokenizer
16
+ audio_tokenizer = AudioTokenizer(
17
+ hf_path, wav_tokenizer_model_path, wav_tokenizer_config_path
18
+ )
19
+
20
+ # Load model
21
+ model = AutoModelForCausalLM.from_pretrained(hf_path, torch_dtype="auto").to(audio_tokenizer.device)
22
+
23
+ return model, audio_tokenizer
24
+
25
+ # Generate audio from text
26
+ def generate_speech(text, speaker_name):
27
+ # Create prompt
28
+ prompt = audio_tokenizer.create_prompt(text, speaker_name)
29
+
30
+ # Tokenize prompt
31
+ input_ids = audio_tokenizer.tokenize_prompt(prompt)
32
+
33
+ # Generate output
34
+ output = model.generate(
35
+ input_ids=input_ids,
36
+ temperature=0.1,
37
+ repetition_penalty=1.1,
38
+ max_length=4000,
39
+ )
40
+
41
+ # Convert to audio codes
42
+ codes = audio_tokenizer.get_codes(output)
43
+
44
+ # Convert codes to audio
45
+ audio = audio_tokenizer.get_audio(codes)
46
+
47
+ # Save audio temporarily
48
+ temp_path = "output.wav"
49
+ torchaudio.save(temp_path, audio, sample_rate=24000)
50
+
51
+ return temp_path
52
+
53
+ # Load model globally
54
+ print("Loading model...")
55
+ model, audio_tokenizer = initialize_model()
56
+ print("Model loaded!")
57
+
58
+ # Create Gradio interface
59
+ speakers = ["idera", "emma", "jude", "osagie", "tayo", "zainab", "joke", "regina", "remi", "umar", "chinenye"]
60
+
61
+ demo = gr.Interface(
62
+ fn=generate_speech,
63
+ inputs=[
64
+ gr.Textbox(lines=5, placeholder="Enter text here..."),
65
+ gr.Dropdown(choices=speakers, label="Speaker", value="idera")
66
+ ],
67
+ outputs=gr.Audio(type="filepath"),
68
+ title="YarnGPT: Nigerian Accented Text-to-Speech",
69
+ description="Generate natural-sounding Nigerian accented speech from text."
70
+ )
71
+
72
+ demo.launch()