iisadia commited on
Commit
cf15d73
·
verified ·
1 Parent(s): f7d62cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -25
app.py CHANGED
@@ -3,8 +3,10 @@ import time
3
  import requests
4
  from streamlit.components.v1 import html
5
  import os
 
 
 
6
  import speech_recognition as sr
7
- from audio_recorder_streamlit import audio_recorder
8
 
9
  # Import transformers and cache the help agent for performance
10
  @st.cache_resource
@@ -212,14 +214,16 @@ def transcribe_audio(audio_bytes):
212
  """Convert audio bytes to text using SpeechRecognition"""
213
  recognizer = sr.Recognizer()
214
  try:
215
- # Create a temporary WAV file
216
- with open("temp_audio.wav", "wb") as f:
217
- f.write(audio_bytes)
 
 
 
218
 
219
- with sr.AudioFile("temp_audio.wav") as source:
220
  audio_data = recognizer.record(source)
221
  text = recognizer.recognize_google(audio_data)
222
- os.remove("temp_audio.wav")
223
  return text.lower()
224
  except sr.UnknownValueError:
225
  st.error("Could not understand audio")
@@ -227,22 +231,11 @@ def transcribe_audio(audio_bytes):
227
  st.error(f"Speech recognition error: {e}")
228
  except Exception as e:
229
  st.error(f"Error processing audio: {e}")
230
- finally:
231
- if os.path.exists("temp_audio.wav"):
232
- os.remove("temp_audio.wav")
233
  return ""
234
 
235
- def record_audio(key):
236
- """Record audio and return transcribed text"""
237
- audio_bytes = audio_recorder(
238
- pause_threshold=2.0,
239
- text="",
240
- recording_color="#6C63FF",
241
- neutral_color="#6C63FF",
242
- icon_name="microphone",
243
- icon_size="2x",
244
- key=key
245
- )
246
 
247
  if audio_bytes:
248
  with st.spinner("Processing audio..."):
@@ -272,7 +265,7 @@ def main():
272
  if st.session_state.game_state == "start":
273
  st.markdown("""
274
  <div class="question-box">
275
- <h3>Welcome to <span style='color:#6C63FF;'>KASOTI 🎯</span></h3>
276
  <p>Think of something and I'll try to guess it in 20 questions or less!</p>
277
  <p>Choose a category:</p>
278
  <ul>
@@ -292,7 +285,7 @@ def main():
292
  st.write("")
293
  st.write("")
294
  if st.form_submit_button("🎤", key="start_mic"):
295
- audio_text = record_audio("start_mic")
296
  if audio_text:
297
  st.session_state.category_input = audio_text
298
  st.experimental_rerun()
@@ -338,7 +331,7 @@ def main():
338
  st.write("")
339
  st.write("")
340
  if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
341
- audio_text = record_audio(f"mic_{st.session_state.current_q}")
342
  if audio_text:
343
  st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
344
  st.experimental_rerun()
@@ -385,7 +378,7 @@ def main():
385
  st.write("")
386
  st.write("")
387
  if st.button("🎤", key="help_mic"):
388
- audio_text = record_audio("help_mic")
389
  if audio_text:
390
  st.session_state.help_query = audio_text
391
  st.experimental_rerun()
@@ -419,7 +412,7 @@ def main():
419
  st.write("")
420
  st.write("")
421
  if st.form_submit_button("🎤", key="confirm_mic"):
422
- audio_text = record_audio("confirm_mic")
423
  if audio_text:
424
  st.session_state.confirm_input = audio_text
425
  st.experimental_rerun()
 
3
  import requests
4
  from streamlit.components.v1 import html
5
  import os
6
+ import base64
7
+ import io
8
+ from pydub import AudioSegment
9
  import speech_recognition as sr
 
10
 
11
  # Import transformers and cache the help agent for performance
12
  @st.cache_resource
 
214
  """Convert audio bytes to text using SpeechRecognition"""
215
  recognizer = sr.Recognizer()
216
  try:
217
+ # Convert bytes to audio file
218
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
219
+ # Export as WAV
220
+ wav_io = io.BytesIO()
221
+ audio.export(wav_io, format="wav")
222
+ wav_io.seek(0)
223
 
224
+ with sr.AudioFile(wav_io) as source:
225
  audio_data = recognizer.record(source)
226
  text = recognizer.recognize_google(audio_data)
 
227
  return text.lower()
228
  except sr.UnknownValueError:
229
  st.error("Could not understand audio")
 
231
  st.error(f"Speech recognition error: {e}")
232
  except Exception as e:
233
  st.error(f"Error processing audio: {e}")
 
 
 
234
  return ""
235
 
236
+ def microphone_input(key):
237
+ """Create microphone widget and return transcribed text"""
238
+ audio_bytes = st.audio_recorder("Speak your answer", key=key)
 
 
 
 
 
 
 
 
239
 
240
  if audio_bytes:
241
  with st.spinner("Processing audio..."):
 
265
  if st.session_state.game_state == "start":
266
  st.markdown("""
267
  <div class="question-box">
268
+ <h3>Welcome to <span style='color:#6C63FF;'>KASOTI �</span></h3>
269
  <p>Think of something and I'll try to guess it in 20 questions or less!</p>
270
  <p>Choose a category:</p>
271
  <ul>
 
285
  st.write("")
286
  st.write("")
287
  if st.form_submit_button("🎤", key="start_mic"):
288
+ audio_text = microphone_input("start_mic")
289
  if audio_text:
290
  st.session_state.category_input = audio_text
291
  st.experimental_rerun()
 
331
  st.write("")
332
  st.write("")
333
  if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
334
+ audio_text = microphone_input(f"mic_{st.session_state.current_q}")
335
  if audio_text:
336
  st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
337
  st.experimental_rerun()
 
378
  st.write("")
379
  st.write("")
380
  if st.button("🎤", key="help_mic"):
381
+ audio_text = microphone_input("help_mic")
382
  if audio_text:
383
  st.session_state.help_query = audio_text
384
  st.experimental_rerun()
 
412
  st.write("")
413
  st.write("")
414
  if st.form_submit_button("🎤", key="confirm_mic"):
415
+ audio_text = microphone_input("confirm_mic")
416
  if audio_text:
417
  st.session_state.confirm_input = audio_text
418
  st.experimental_rerun()