miya3333 commited on
Commit
62564a6
·
verified ·
1 Parent(s): 200e5f9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -43
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import soundfile as sf
4
  from speechbrain.inference.TTS import Tacotron2
5
  from speechbrain.inference.vocoders import HIFIGAN
 
6
 
7
  # モデルのロード
8
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -11,7 +12,8 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
11
  # 推論関数の定義
12
  def synthesize_speech(text):
13
  # テキストをトークンIDに変換
14
- tokenized = tacotron2.tokenizer(text, phonemize=False)
 
15
  # トークンIDをLong型のテンソルに変換
16
  tokens = torch.LongTensor(tokenized)
17
 
@@ -34,45 +36,4 @@ iface = gr.Interface(
34
  description="Enter text to synthesize speech."
35
  )
36
 
37
- iface.launch()
38
-
39
- # import gradio as gr
40
- # import torch
41
- # from speechbrain.inference.TTS import Tacotron2
42
- # from speechbrain.inference.vocoders import HIFIGAN
43
-
44
- # # モデルのロード
45
- # hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
46
- # tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
47
-
48
- # # 推論関数の定義
49
- # def synthesize_speech(text):
50
- # # Tacotron2でmel spectrogramを生成
51
- # # テキストを直接入力として、LongTensorでラップする
52
- # mel_output, mel_length, alignment = tacotron2.encode_batch([text])
53
-
54
- # # HiFi-GANでmel spectrogramから音声を生成
55
- # waveforms = hifi_gan.decode_batch(mel_output)
56
-
57
- # # torch tensorをwavfileとして保存
58
- # torch.save(waveforms, "speech.pt")
59
- # return "speech.pt"
60
-
61
- # # Gradioインターフェースの作成
62
- # iface = gr.Interface(
63
- # fn=synthesize_speech,
64
- # inputs=gr.Textbox(lines=5, label="Input Text"),
65
- # outputs=gr.Audio(label="Output Audio", type="filepath"),
66
- # title="TTS Demo",
67
- # description="Enter text to synthesize speech."
68
- # )
69
-
70
- # iface.launch()
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
 
3
  import soundfile as sf
4
  from speechbrain.inference.TTS import Tacotron2
5
  from speechbrain.inference.vocoders import HIFIGAN
6
+ from speechbrain.dataio.dataio import read_audio
7
 
8
  # モデルのロード
9
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 
12
  # 推論関数の定義
13
  def synthesize_speech(text):
14
  # テキストをトークンIDに変換
15
+ text = text.lower()
16
+ tokenized = tacotron2.hparams.tokenize(text, phonemize=False)
17
  # トークンIDをLong型のテンソルに変換
18
  tokens = torch.LongTensor(tokenized)
19
 
 
36
  description="Enter text to synthesize speech."
37
  )
38
 
39
+ iface.launch()