miya3333 commited on
Commit
dfbe110
·
verified ·
1 Parent(s): 9fae8dd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  import soundfile as sf
4
  from speechbrain.inference.TTS import Tacotron2
5
  from speechbrain.inference.vocoders import HIFIGAN
6
- from speechbrain.dataio.dataio import read_audio
7
 
8
  # モデルのロード
9
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -12,13 +12,17 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
12
  # 推論関数の定義
13
  def synthesize_speech(text):
14
  # テキストをトークンIDに変換
15
- text = text.lower()
16
- tokenized = tacotron2.hparams.tokenize(text, phonemize=False)
17
- # トークンIDをLong型のテンソルに変換
18
- tokens = torch.LongTensor(tokenized)
 
 
 
 
19
 
20
  # Tacotron2でmel spectrogramを生成
21
- mel_output, mel_length, alignment = tacotron2.encode_batch(tokens)
22
 
23
  # HiFi-GANでmel spectrogramから音声を生成
24
  waveforms = hifi_gan.decode_batch(mel_output)
 
3
  import soundfile as sf
4
  from speechbrain.inference.TTS import Tacotron2
5
  from speechbrain.inference.vocoders import HIFIGAN
6
+ from speechbrain.utils.text_to_sequence import text_to_sequence
7
 
8
  # モデルのロード
9
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 
12
  # 推論関数の定義
13
  def synthesize_speech(text):
14
  # テキストをトークンIDに変換
15
+ sequence = text_to_sequence(
16
+ text,
17
+ hifi_gan.hparams.text_cleaners,
18
+ add_bos_eos=hifi_gan.hparams.add_bos_eos,
19
+ symbol_set=hifi_gan.hparams.symbol_set
20
+ )
21
+ # 系列をパディング
22
+ batch = tacotron2.mods.encoder.pad_sequence_pre([torch.tensor(sequence)])
23
 
24
  # Tacotron2でmel spectrogramを生成
25
+ mel_output, mel_length, alignment = tacotron2.encode_batch(batch)
26
 
27
  # HiFi-GANでmel spectrogramから音声を生成
28
  waveforms = hifi_gan.decode_batch(mel_output)