Spaces:

miya3333
/

TTSDemo

Sleeping

App Files Files Community

miya3333 commited on Jan 3

Commit

62564a6

verified ·

1 Parent(s): 200e5f9

Upload app.py

Browse files

Files changed (1) hide show

app.py +4 -43

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import soundfile as sf
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -11,7 +12,8 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
 # 推論関数の定義
 def synthesize_speech(text):
     # テキストをトークンIDに変換
-    tokenized = tacotron2.tokenizer(text, phonemize=False)
     # トークンIDをLong型のテンソルに変換
     tokens = torch.LongTensor(tokenized)
@@ -34,45 +36,4 @@ iface = gr.Interface(
     description="Enter text to synthesize speech."
 )
-iface.launch()
-# import gradio as gr
-# import torch
-# from speechbrain.inference.TTS import Tacotron2
-# from speechbrain.inference.vocoders import HIFIGAN
-# # モデルのロード
-# hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
-# tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
-# # 推論関数の定義
-# def synthesize_speech(text):
-#     # Tacotron2でmel spectrogramを生成
-#     # テキストを直接入力として、LongTensorでラップする
-#     mel_output, mel_length, alignment = tacotron2.encode_batch([text])
-#     # HiFi-GANでmel spectrogramから音声を生成
-#     waveforms = hifi_gan.decode_batch(mel_output)
-#     # torch tensorをwavfileとして保存
-#     torch.save(waveforms, "speech.pt")
-#     return "speech.pt"
-# # Gradioインターフェースの作成
-# iface = gr.Interface(
-#     fn=synthesize_speech,
-#     inputs=gr.Textbox(lines=5, label="Input Text"),
-#     outputs=gr.Audio(label="Output Audio", type="filepath"),
-#     title="TTS Demo",
-#     description="Enter text to synthesize speech."
-# )
-# iface.launch()

 import soundfile as sf
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
+from speechbrain.dataio.dataio import read_audio
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 # 推論関数の定義
 def synthesize_speech(text):
     # テキストをトークンIDに変換
+    text = text.lower()
+    tokenized = tacotron2.hparams.tokenize(text, phonemize=False)
     # トークンIDをLong型のテンソルに変換
     tokens = torch.LongTensor(tokenized)
     description="Enter text to synthesize speech."
 )
+iface.launch()