Spaces:

iisadia
/

KASOTI

Sleeping

App Files Files Community

iisadia commited on Apr 12

Commit

cf15d73

verified ·

1 Parent(s): f7d62cd

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -25

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ import time
 import requests
 from streamlit.components.v1 import html
 import os
 import speech_recognition as sr
-from audio_recorder_streamlit import audio_recorder
 # Import transformers and cache the help agent for performance
 @st.cache_resource
@@ -212,14 +214,16 @@ def transcribe_audio(audio_bytes):
     """Convert audio bytes to text using SpeechRecognition"""
     recognizer = sr.Recognizer()
     try:
-        # Create a temporary WAV file
-        with open("temp_audio.wav", "wb") as f:
-            f.write(audio_bytes)
-        with sr.AudioFile("temp_audio.wav") as source:
             audio_data = recognizer.record(source)
             text = recognizer.recognize_google(audio_data)
-            os.remove("temp_audio.wav")
             return text.lower()
     except sr.UnknownValueError:
         st.error("Could not understand audio")
@@ -227,22 +231,11 @@ def transcribe_audio(audio_bytes):
         st.error(f"Speech recognition error: {e}")
     except Exception as e:
         st.error(f"Error processing audio: {e}")
-    finally:
-        if os.path.exists("temp_audio.wav"):
-            os.remove("temp_audio.wav")
     return ""
-def record_audio(key):
-    """Record audio and return transcribed text"""
-    audio_bytes = audio_recorder(
-        pause_threshold=2.0,
-        text="",
-        recording_color="#6C63FF",
-        neutral_color="#6C63FF",
-        icon_name="microphone",
-        icon_size="2x",
-        key=key
-    )
     if audio_bytes:
         with st.spinner("Processing audio..."):
@@ -272,7 +265,7 @@ def main():
     if st.session_state.game_state == "start":
         st.markdown("""
         <div class="question-box">
-            <h3>Welcome to <span style='color:#6C63FF;'>KASOTI 🎯</span></h3>
             <p>Think of something and I'll try to guess it in 20 questions or less!</p>
             <p>Choose a category:</p>
             <ul>
@@ -292,7 +285,7 @@ def main():
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key="start_mic"):
-                    audio_text = record_audio("start_mic")
                     if audio_text:
                         st.session_state.category_input = audio_text
                         st.experimental_rerun()
@@ -338,7 +331,7 @@ def main():
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
-                    audio_text = record_audio(f"mic_{st.session_state.current_q}")
                     if audio_text:
                         st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
                         st.experimental_rerun()
@@ -385,7 +378,7 @@ def main():
                 st.write("")
                 st.write("")
                 if st.button("🎤", key="help_mic"):
-                    audio_text = record_audio("help_mic")
                     if audio_text:
                         st.session_state.help_query = audio_text
                         st.experimental_rerun()
@@ -419,7 +412,7 @@ def main():
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key="confirm_mic"):
-                    audio_text = record_audio("confirm_mic")
                     if audio_text:
                         st.session_state.confirm_input = audio_text
                         st.experimental_rerun()

 import requests
 from streamlit.components.v1 import html
 import os
+import base64
+import io
+from pydub import AudioSegment
 import speech_recognition as sr
 # Import transformers and cache the help agent for performance
 @st.cache_resource
     """Convert audio bytes to text using SpeechRecognition"""
     recognizer = sr.Recognizer()
     try:
+        # Convert bytes to audio file
+        audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
+        # Export as WAV
+        wav_io = io.BytesIO()
+        audio.export(wav_io, format="wav")
+        wav_io.seek(0)
+        with sr.AudioFile(wav_io) as source:
             audio_data = recognizer.record(source)
             text = recognizer.recognize_google(audio_data)
             return text.lower()
     except sr.UnknownValueError:
         st.error("Could not understand audio")
         st.error(f"Speech recognition error: {e}")
     except Exception as e:
         st.error(f"Error processing audio: {e}")
     return ""
+def microphone_input(key):
+    """Create microphone widget and return transcribed text"""
+    audio_bytes = st.audio_recorder("Speak your answer", key=key)
     if audio_bytes:
         with st.spinner("Processing audio..."):
     if st.session_state.game_state == "start":
         st.markdown("""
         <div class="question-box">
+            <h3>Welcome to <span style='color:#6C63FF;'>KASOTI �</span></h3>
             <p>Think of something and I'll try to guess it in 20 questions or less!</p>
             <p>Choose a category:</p>
             <ul>
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key="start_mic"):
+                    audio_text = microphone_input("start_mic")
                     if audio_text:
                         st.session_state.category_input = audio_text
                         st.experimental_rerun()
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key=f"mic_{st.session_state.current_q}"):
+                    audio_text = microphone_input(f"mic_{st.session_state.current_q}")
                     if audio_text:
                         st.session_state[f"answer_{st.session_state.current_q}"] = audio_text
                         st.experimental_rerun()
                 st.write("")
                 st.write("")
                 if st.button("🎤", key="help_mic"):
+                    audio_text = microphone_input("help_mic")
                     if audio_text:
                         st.session_state.help_query = audio_text
                         st.experimental_rerun()
                 st.write("")
                 st.write("")
                 if st.form_submit_button("🎤", key="confirm_mic"):
+                    audio_text = microphone_input("confirm_mic")
                     if audio_text:
                         st.session_state.confirm_input = audio_text
                         st.experimental_rerun()