Spaces:
Sleeping
Sleeping
File size: 4,228 Bytes
98d16e6 e547834 98d16e6 96c3bdf 98d16e6 96c3bdf 98d16e6 e547834 98d16e6 e547834 98d16e6 e547834 98d16e6 96c3bdf 98d16e6 e547834 98d16e6 e547834 98d16e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
import speech_recognition as sr
from time import time
import threading
from pydub import AudioSegment
from pydub.playback import play
import io
# Global variables
is_recording = False
start_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b''), crossfade=100)
end_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b'')), crossfade=100)
def play_start_sound():
try:
play(start_beep)
except:
pass
def play_end_sound():
try:
play(end_beep)
except:
pass
def start_recording(audio_time_limit):
global is_recording
is_recording = True
recognizer = sr.Recognizer()
microphone = sr.Microphone()
play_start_sound()
with microphone as source:
recognizer.adjust_for_ambient_noise(source, duration=0.5)
try:
audio = recognizer.listen(source, timeout=3, phrase_time_limit=audio_time_limit)
text = recognizer.recognize_google(audio)
return text
except sr.WaitTimeoutError:
return ""
except sr.UnknownValueError:
return ""
except Exception as e:
print(f"Error: {str(e)}")
return ""
finally:
play_end_sound()
is_recording = False
def transcribe_audio(audio_time_limit=10):
def execute_recording():
nonlocal result
result = start_recording(audio_time_limit)
result = ""
recording_thread = threading.Thread(target=execute_recording)
recording_thread.start()
start_time = time()
while is_recording and (time() - start_time) < audio_time_limit:
time_elapsed = time() - start_time
time_left = max(0, audio_time_limit - time_elapsed)
progress = 1 - (time_left / audio_time_limit)
yield {"__type__": "update", "value": f"π€ Recording... {time_left:.1f}s left", "visible": True}, {"__type__": "update", "value": "", "visible": True}
gr.sleep(0.1)
recording_thread.join()
yield {"__type__": "update", "value": "β
Done!", "visible": True}, {"__type__": "update", "value": result, "visible": True}
def create_ui():
css = """
.mic-button {
background: linear-gradient(45deg, #FF3366, #BA265D) !important;
border: none !important;
color: white !important;
padding: 12px !important;
border-radius: 50% !important;
height: 50px !important;
width: 50px !important;
margin-left: 10px !important;
}
.mic-button:hover {
transform: scale(1.05) !important;
}
.input-with-mic {
display: flex !important;
align-items: center !important;
gap: 10px !important;
}
.status-message {
font-style: italic;
color: #666;
margin-top: 5px;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("## π€ Speech to Text Converter")
with gr.Group():
with gr.Row():
text_input = gr.Textbox(
label="Your Input",
placeholder="Click the mic button and speak...",
elem_classes=["input-box"],
scale=9
)
mic_button = gr.Button(
"π€",
elem_classes=["mic-button"],
scale=1
)
status_display = gr.Textbox(
label="Status",
visible=False,
interactive=False,
elem_classes=["status-message"]
)
mic_button.click(
fn=transcribe_audio,
inputs=[gr.Slider(5, 30, value=10, label="Recording time limit (seconds)")],
outputs=[status_display, text_input],
show_progress="hidden"
)
gr.Examples(
examples=["Hello world", "How are you today?", "Please convert my speech to text"],
inputs=text_input,
label="Try these examples:"
)
return demo
if __name__ == "__main__":
demo = create_ui()
demo.launch(debug=True) |