File size: 4,228 Bytes
98d16e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e547834
98d16e6
 
 
 
 
 
 
 
 
 
 
96c3bdf
98d16e6
96c3bdf
98d16e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e547834
98d16e6
 
e547834
98d16e6
 
 
 
e547834
98d16e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96c3bdf
98d16e6
 
 
 
 
e547834
98d16e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e547834
 
98d16e6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr
import speech_recognition as sr
from time import time
import threading
from pydub import AudioSegment
from pydub.playback import play
import io

# Global variables
is_recording = False
start_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b''), crossfade=100)
end_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b'')), crossfade=100)

def play_start_sound():
    try:
        play(start_beep)
    except:
        pass

def play_end_sound():
    try:
        play(end_beep)
    except:
        pass

def start_recording(audio_time_limit):
    global is_recording
    is_recording = True
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()
    
    play_start_sound()
    
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source, duration=0.5)
        try:
            audio = recognizer.listen(source, timeout=3, phrase_time_limit=audio_time_limit)
            text = recognizer.recognize_google(audio)
            return text
        except sr.WaitTimeoutError:
            return ""
        except sr.UnknownValueError:
            return ""
        except Exception as e:
            print(f"Error: {str(e)}")
            return ""
        finally:
            play_end_sound()
            is_recording = False

def transcribe_audio(audio_time_limit=10):
    def execute_recording():
        nonlocal result
        result = start_recording(audio_time_limit)
    
    result = ""
    recording_thread = threading.Thread(target=execute_recording)
    recording_thread.start()
    
    start_time = time()
    while is_recording and (time() - start_time) < audio_time_limit:
        time_elapsed = time() - start_time
        time_left = max(0, audio_time_limit - time_elapsed)
        progress = 1 - (time_left / audio_time_limit)
        yield {"__type__": "update", "value": f"🎀 Recording... {time_left:.1f}s left", "visible": True}, {"__type__": "update", "value": "", "visible": True}
        gr.sleep(0.1)
    
    recording_thread.join()
    yield {"__type__": "update", "value": "βœ… Done!", "visible": True}, {"__type__": "update", "value": result, "visible": True}

def create_ui():
    css = """
    .mic-button {
        background: linear-gradient(45deg, #FF3366, #BA265D) !important;
        border: none !important;
        color: white !important;
        padding: 12px !important;
        border-radius: 50% !important;
        height: 50px !important;
        width: 50px !important;
        margin-left: 10px !important;
    }
    .mic-button:hover {
        transform: scale(1.05) !important;
    }
    .input-with-mic {
        display: flex !important;
        align-items: center !important;
        gap: 10px !important;
    }
    .status-message {
        font-style: italic;
        color: #666;
        margin-top: 5px;
    }
    """
    
    with gr.Blocks(css=css) as demo:
        gr.Markdown("## 🎀 Speech to Text Converter")
        
        with gr.Group():
            with gr.Row():
                text_input = gr.Textbox(
                    label="Your Input",
                    placeholder="Click the mic button and speak...",
                    elem_classes=["input-box"],
                    scale=9
                )
                mic_button = gr.Button(
                    "🎀",
                    elem_classes=["mic-button"],
                    scale=1
                )
            
            status_display = gr.Textbox(
                label="Status",
                visible=False,
                interactive=False,
                elem_classes=["status-message"]
            )
        
        mic_button.click(
            fn=transcribe_audio,
            inputs=[gr.Slider(5, 30, value=10, label="Recording time limit (seconds)")],
            outputs=[status_display, text_input],
            show_progress="hidden"
        )
        
        gr.Examples(
            examples=["Hello world", "How are you today?", "Please convert my speech to text"],
            inputs=text_input,
            label="Try these examples:"
        )
    
    return demo

if __name__ == "__main__":
    demo = create_ui()
    demo.launch(debug=True)