Spaces:

DD0101
/

VITS

Sleeping

DD0101 commited on Oct 27, 2024

Commit

a98d2b9

verified ·

1 Parent(s): 84fd7d5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -59,7 +59,27 @@ _ = utils.load_checkpoint("pretrained_ljs.pth", net_g, None)
 st.title("VITS Text-to-Speech Demo")
 # Input text box for user to enter text
-text_input = st.text_input("Enter text to convert to speech", value="Hello world")
 if st.button("Generate Speech"):
     # Convert the text to the appropriate format (e.g., phoneme or character representation)

 st.title("VITS Text-to-Speech Demo")
 # Input text box for user to enter text
+text_input = st.text_input("Enter text to convert to speech", value="Chào mừng các bạn đến với môn Xử lí tiếng nói")
+##### A demo for the input text #####
+# Convert the text to the appropriate format (e.g., phoneme or character representation)
+stn_tst = get_text(text_input, hps)
+with torch.no_grad():
+    x_tst = stn_tst.unsqueeze(0)
+    x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
+    audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
+# Use hps.data.sampling_rate for playing the audio
+st.text("Before Fine-tuned:")
+st.audio(audio, format="audio/wav", sample_rate=hps.data.sampling_rate)
+get_vi_audio(text_input)
+st.text("After Fine-tuned:")
+st.audio("vi_output.wav", format="audio/wav")
+##### User's Inference #####
 if st.button("Generate Speech"):
     # Convert the text to the appropriate format (e.g., phoneme or character representation)