import gradio as gr from fastapi import FastAPI from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS, FINAL_TRANSCRIPTION_MODEL, REALTIME_TRANSCRIPTION_MODEL print(gr.__version__) # Connection configuration (separate signaling server from model server) # These will be replaced at deployment time with the correct URLs RENDER_SIGNALING_URL = "wss://render-signal-audio.onrender.com/stream" HF_SPACE_URL = "https://androidguy-speaker-diarization.hf.space" def build_ui(): """Build Gradio UI for speaker diarization""" with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo: # Add configuration variables to page using custom component gr.HTML( f""" """ ) # Header and description gr.Markdown("# 🎤 Live Speaker Diarization") gr.Markdown(f"Real-time speech recognition with automatic speaker identification") # Add transcription model info gr.Markdown(f"**Using Models:** Final: {FINAL_TRANSCRIPTION_MODEL}, Realtime: {REALTIME_TRANSCRIPTION_MODEL}") # Status indicator connection_status = gr.HTML( """
Waiting to connect...
""" ) with gr.Row(): with gr.Column(scale=2): # Conversation display with embedded JavaScript for WebRTC and audio handling conversation_display = gr.HTML( """
Click 'Start Listening' to begin...
""", label="Live Conversation" ) # Control buttons with gr.Row(): start_btn = gr.Button("â–ļī¸ Start Listening", variant="primary", size="lg") stop_btn = gr.Button("âšī¸ Stop", variant="stop", size="lg") clear_btn = gr.Button("đŸ—‘ī¸ Clear", variant="secondary", size="lg") # Status display status_output = gr.Markdown( """ ## System Status Waiting to connect... *Click Start Listening to begin* """, label="Status Information" ) with gr.Column(scale=1): # Settings gr.Markdown("## âš™ī¸ Settings") threshold_slider = gr.Slider( minimum=0.3, maximum=0.9, step=0.05, value=DEFAULT_CHANGE_THRESHOLD, label="Speaker Change Sensitivity", info="Lower = more sensitive (more speaker changes)" ) max_speakers_slider = gr.Slider( minimum=2, maximum=ABSOLUTE_MAX_SPEAKERS, step=1, value=DEFAULT_MAX_SPEAKERS, label="Maximum Speakers" ) update_btn = gr.Button("Update Settings", variant="secondary") # Instructions gr.Markdown(""" ## 📋 Instructions 1. **Start Listening** - allows browser to access microphone 2. **Speak** - system will transcribe and identify speakers 3. **Stop** when finished 4. **Clear** to reset conversation ## 🎨 Speaker Colors - 🔴 Speaker 1 (Red) - đŸŸĸ Speaker 2 (Teal) - đŸ”ĩ Speaker 3 (Blue) - 🟡 Speaker 4 (Green) - ⭐ Speaker 5 (Yellow) - đŸŸŖ Speaker 6 (Plum) - 🟤 Speaker 7 (Mint) - 🟠 Speaker 8 (Gold) """) # JavaScript to connect buttons to the script functions gr.HTML(""" """) # Set up periodic status updates def get_status(): """API call to get system status - called periodically""" import requests try: resp = requests.get(f"{HF_SPACE_URL}/status") if resp.status_code == 200: return resp.json().get('status', 'No status information') return "Error getting status" except Exception as e: return f"Connection error: {str(e)}" status_timer = gr.Timer(5) status_timer.tick(fn=get_status, outputs=status_output) return demo # Create Gradio interface demo = build_ui() def mount_ui(app: FastAPI): """Mount Gradio app to FastAPI""" app.mount("/ui", demo.app) # For standalone testing if __name__ == "__main__": demo.launch()