DD0101 commited on
Commit
a98d2b9
·
verified ·
1 Parent(s): 84fd7d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -1
app.py CHANGED
@@ -59,7 +59,27 @@ _ = utils.load_checkpoint("pretrained_ljs.pth", net_g, None)
59
  st.title("VITS Text-to-Speech Demo")
60
 
61
  # Input text box for user to enter text
62
- text_input = st.text_input("Enter text to convert to speech", value="Hello world")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  if st.button("Generate Speech"):
65
  # Convert the text to the appropriate format (e.g., phoneme or character representation)
 
59
  st.title("VITS Text-to-Speech Demo")
60
 
61
  # Input text box for user to enter text
62
+ text_input = st.text_input("Enter text to convert to speech", value="Chào mừng các bạn đến với môn Xử lí tiếng nói")
63
+
64
+ ##### A demo for the input text #####
65
+ # Convert the text to the appropriate format (e.g., phoneme or character representation)
66
+ stn_tst = get_text(text_input, hps)
67
+
68
+ with torch.no_grad():
69
+ x_tst = stn_tst.unsqueeze(0)
70
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
71
+ audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
72
+
73
+ # Use hps.data.sampling_rate for playing the audio
74
+ st.text("Before Fine-tuned:")
75
+ st.audio(audio, format="audio/wav", sample_rate=hps.data.sampling_rate)
76
+
77
+ get_vi_audio(text_input)
78
+
79
+ st.text("After Fine-tuned:")
80
+ st.audio("vi_output.wav", format="audio/wav")
81
+
82
+ ##### User's Inference #####
83
 
84
  if st.button("Generate Speech"):
85
  # Convert the text to the appropriate format (e.g., phoneme or character representation)