Spaces:

miya3333
/

TTSDemo

Running

miya3333 commited on Jan 3

Commit

dfbe110

verified ·

1 Parent(s): 9fae8dd

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import torch
 import soundfile as sf
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
-from speechbrain.dataio.dataio import read_audio
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -12,13 +12,17 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
 # 推論関数の定義
 def synthesize_speech(text):
     # テキストをトークンIDに変換
-    text = text.lower()
-    tokenized = tacotron2.hparams.tokenize(text, phonemize=False)
-    # トークンIDをLong型のテンソルに変換
-    tokens = torch.LongTensor(tokenized)
     # Tacotron2でmel spectrogramを生成
-    mel_output, mel_length, alignment = tacotron2.encode_batch(tokens)
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)

 import soundfile as sf
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
+from speechbrain.utils.text_to_sequence import text_to_sequence
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 # 推論関数の定義
 def synthesize_speech(text):
     # テキストをトークンIDに変換
+    sequence = text_to_sequence(
+      text,
+      hifi_gan.hparams.text_cleaners,
+      add_bos_eos=hifi_gan.hparams.add_bos_eos,
+      symbol_set=hifi_gan.hparams.symbol_set
+    )
+    # 系列をパディング
+    batch = tacotron2.mods.encoder.pad_sequence_pre([torch.tensor(sequence)])
     # Tacotron2でmel spectrogramを生成
+    mel_output, mel_length, alignment = tacotron2.encode_batch(batch)
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)