Spaces:

iisadia
/

Testing_LLM_Project

Sleeping

File size: 4,228 Bytes

import gradio as gr
import speech_recognition as sr
from time import time
import threading
from pydub import AudioSegment
from pydub.playback import play
import io

# Global variables
is_recording = False
start_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b''), crossfade=100)
end_beep = AudioSegment.silent(duration=200).append(AudioSegment.from_wav(io.BytesIO(b'')), crossfade=100)

def play_start_sound():
    try:
        play(start_beep)
    except:
        pass

def play_end_sound():
    try:
        play(end_beep)
    except:
        pass

def start_recording(audio_time_limit):
    global is_recording
    is_recording = True
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()
    
    play_start_sound()
    
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source, duration=0.5)
        try:
            audio = recognizer.listen(source, timeout=3, phrase_time_limit=audio_time_limit)
            text = recognizer.recognize_google(audio)
            return text
        except sr.WaitTimeoutError:
            return ""
        except sr.UnknownValueError:
            return ""
        except Exception as e:
            print(f"Error: {str(e)}")
            return ""
        finally:
            play_end_sound()
            is_recording = False

def transcribe_audio(audio_time_limit=10):
    def execute_recording():
        nonlocal result
        result = start_recording(audio_time_limit)
    
    result = ""
    recording_thread = threading.Thread(target=execute_recording)
    recording_thread.start()
    
    start_time = time()
    while is_recording and (time() - start_time) < audio_time_limit:
        time_elapsed = time() - start_time
        time_left = max(0, audio_time_limit - time_elapsed)
        progress = 1 - (time_left / audio_time_limit)
        yield {"__type__": "update", "value": f"🎤 Recording... {time_left:.1f}s left", "visible": True}, {"__type__": "update", "value": "", "visible": True}
        gr.sleep(0.1)
    
    recording_thread.join()
    yield {"__type__": "update", "value": "✅ Done!", "visible": True}, {"__type__": "update", "value": result, "visible": True}

def create_ui():
    css = """
    .mic-button {
        background: linear-gradient(45deg, #FF3366, #BA265D) !important;
        border: none !important;
        color: white !important;
        padding: 12px !important;
        border-radius: 50% !important;
        height: 50px !important;
        width: 50px !important;
        margin-left: 10px !important;
    }
    .mic-button:hover {
        transform: scale(1.05) !important;
    }
    .input-with-mic {
        display: flex !important;
        align-items: center !important;
        gap: 10px !important;
    }
    .status-message {
        font-style: italic;
        color: #666;
        margin-top: 5px;
    }
    """
    
    with gr.Blocks(css=css) as demo:
        gr.Markdown("## 🎤 Speech to Text Converter")
        
        with gr.Group():
            with gr.Row():
                text_input = gr.Textbox(
                    label="Your Input",
                    placeholder="Click the mic button and speak...",
                    elem_classes=["input-box"],
                    scale=9
                )
                mic_button = gr.Button(
                    "🎤",
                    elem_classes=["mic-button"],
                    scale=1
                )
            
            status_display = gr.Textbox(
                label="Status",
                visible=False,
                interactive=False,
                elem_classes=["status-message"]
            )
        
        mic_button.click(
            fn=transcribe_audio,
            inputs=[gr.Slider(5, 30, value=10, label="Recording time limit (seconds)")],
            outputs=[status_display, text_input],
            show_progress="hidden"
        )
        
        gr.Examples(
            examples=["Hello world", "How are you today?", "Please convert my speech to text"],
            inputs=text_input,
            label="Try these examples:"
        )
    
    return demo

if __name__ == "__main__":
    demo = create_ui()
    demo.launch(debug=True)