Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,10 @@ import time
|
|
3 |
import requests
|
4 |
from streamlit.components.v1 import html
|
5 |
import os
|
|
|
|
|
|
|
6 |
import speech_recognition as sr
|
7 |
-
from audio_recorder_streamlit import audio_recorder
|
8 |
|
9 |
# Import transformers and cache the help agent for performance
|
10 |
@st.cache_resource
|
@@ -212,14 +214,16 @@ def transcribe_audio(audio_bytes):
|
|
212 |
"""Convert audio bytes to text using SpeechRecognition"""
|
213 |
recognizer = sr.Recognizer()
|
214 |
try:
|
215 |
-
#
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
218 |
|
219 |
-
with sr.AudioFile(
|
220 |
audio_data = recognizer.record(source)
|
221 |
text = recognizer.recognize_google(audio_data)
|
222 |
-
os.remove("temp_audio.wav")
|
223 |
return text.lower()
|
224 |
except sr.UnknownValueError:
|
225 |
st.error("Could not understand audio")
|
@@ -227,22 +231,11 @@ def transcribe_audio(audio_bytes):
|
|
227 |
st.error(f"Speech recognition error: {e}")
|
228 |
except Exception as e:
|
229 |
st.error(f"Error processing audio: {e}")
|
230 |
-
finally:
|
231 |
-
if os.path.exists("temp_audio.wav"):
|
232 |
-
os.remove("temp_audio.wav")
|
233 |
return ""
|
234 |
|
235 |
-
def
|
236 |
-
"""
|
237 |
-
audio_bytes = audio_recorder(
|
238 |
-
pause_threshold=2.0,
|
239 |
-
text="",
|
240 |
-
recording_color="#6C63FF",
|
241 |
-
neutral_color="#6C63FF",
|
242 |
-
icon_name="microphone",
|
243 |
-
icon_size="2x",
|
244 |
-
key=key
|
245 |
-
)
|
246 |
|
247 |
if audio_bytes:
|
248 |
with st.spinner("Processing audio..."):
|
@@ -272,7 +265,7 @@ def main():
|
|
272 |
if st.session_state.game_state == "start":
|
273 |
st.markdown("""
|
274 |
<div class="question-box">
|
275 |
-
<h3>Welcome to <span style='color:#6C63FF;'>KASOTI
|
276 |
<p>Think of something and I'll try to guess it in 20 questions or less!</p>
|
277 |
<p>Choose a category:</p>
|
278 |
<ul>
|
@@ -292,7 +285,7 @@ def main():
|
|
292 |
st.write("")
|
293 |
st.write("")
|
294 |
if st.form_submit_button("🎤", key="start_mic"):
|
295 |
-
audio_text =
|
296 |
if audio_text:
|
297 |
st.session_state.category_input = audio_text
|
298 |
st.experimental_rerun()
|
@@ -338,7 +331,7 @@ def main():
|
|
338 |
st.write("")
|
339 |
st.write("")
|
340 |
if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
|
341 |
-
audio_text =
|
342 |
if audio_text:
|
343 |
st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
|
344 |
st.experimental_rerun()
|
@@ -385,7 +378,7 @@ def main():
|
|
385 |
st.write("")
|
386 |
st.write("")
|
387 |
if st.button("🎤", key="help_mic"):
|
388 |
-
audio_text =
|
389 |
if audio_text:
|
390 |
st.session_state.help_query = audio_text
|
391 |
st.experimental_rerun()
|
@@ -419,7 +412,7 @@ def main():
|
|
419 |
st.write("")
|
420 |
st.write("")
|
421 |
if st.form_submit_button("🎤", key="confirm_mic"):
|
422 |
-
audio_text =
|
423 |
if audio_text:
|
424 |
st.session_state.confirm_input = audio_text
|
425 |
st.experimental_rerun()
|
|
|
3 |
import requests
|
4 |
from streamlit.components.v1 import html
|
5 |
import os
|
6 |
+
import base64
|
7 |
+
import io
|
8 |
+
from pydub import AudioSegment
|
9 |
import speech_recognition as sr
|
|
|
10 |
|
11 |
# Import transformers and cache the help agent for performance
|
12 |
@st.cache_resource
|
|
|
214 |
"""Convert audio bytes to text using SpeechRecognition"""
|
215 |
recognizer = sr.Recognizer()
|
216 |
try:
|
217 |
+
# Convert bytes to audio file
|
218 |
+
audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
|
219 |
+
# Export as WAV
|
220 |
+
wav_io = io.BytesIO()
|
221 |
+
audio.export(wav_io, format="wav")
|
222 |
+
wav_io.seek(0)
|
223 |
|
224 |
+
with sr.AudioFile(wav_io) as source:
|
225 |
audio_data = recognizer.record(source)
|
226 |
text = recognizer.recognize_google(audio_data)
|
|
|
227 |
return text.lower()
|
228 |
except sr.UnknownValueError:
|
229 |
st.error("Could not understand audio")
|
|
|
231 |
st.error(f"Speech recognition error: {e}")
|
232 |
except Exception as e:
|
233 |
st.error(f"Error processing audio: {e}")
|
|
|
|
|
|
|
234 |
return ""
|
235 |
|
236 |
+
def microphone_input(key):
|
237 |
+
"""Create microphone widget and return transcribed text"""
|
238 |
+
audio_bytes = st.audio_recorder("Speak your answer", key=key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
if audio_bytes:
|
241 |
with st.spinner("Processing audio..."):
|
|
|
265 |
if st.session_state.game_state == "start":
|
266 |
st.markdown("""
|
267 |
<div class="question-box">
|
268 |
+
<h3>Welcome to <span style='color:#6C63FF;'>KASOTI �</span></h3>
|
269 |
<p>Think of something and I'll try to guess it in 20 questions or less!</p>
|
270 |
<p>Choose a category:</p>
|
271 |
<ul>
|
|
|
285 |
st.write("")
|
286 |
st.write("")
|
287 |
if st.form_submit_button("🎤", key="start_mic"):
|
288 |
+
audio_text = microphone_input("start_mic")
|
289 |
if audio_text:
|
290 |
st.session_state.category_input = audio_text
|
291 |
st.experimental_rerun()
|
|
|
331 |
st.write("")
|
332 |
st.write("")
|
333 |
if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
|
334 |
+
audio_text = microphone_input(f"mic_{st.session_state.current_q}")
|
335 |
if audio_text:
|
336 |
st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
|
337 |
st.experimental_rerun()
|
|
|
378 |
st.write("")
|
379 |
st.write("")
|
380 |
if st.button("🎤", key="help_mic"):
|
381 |
+
audio_text = microphone_input("help_mic")
|
382 |
if audio_text:
|
383 |
st.session_state.help_query = audio_text
|
384 |
st.experimental_rerun()
|
|
|
412 |
st.write("")
|
413 |
st.write("")
|
414 |
if st.form_submit_button("🎤", key="confirm_mic"):
|
415 |
+
audio_text = microphone_input("confirm_mic")
|
416 |
if audio_text:
|
417 |
st.session_state.confirm_input = audio_text
|
418 |
st.experimental_rerun()
|