import gradio as gr from fastapi import FastAPI from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS, FINAL_TRANSCRIPTION_MODEL, REALTIME_TRANSCRIPTION_MODEL import os # Connection configuration (separate signaling server from model server) # These will be replaced with environment variables or defaults RENDER_SIGNALING_URL = os.environ.get("RENDER_SIGNALING_URL", "wss://render-signal-audio.onrender.com/stream") HF_SPACE_URL = os.environ.get("HF_SPACE_URL", "https://androidguy-speaker-diarization.hf.space") def build_ui(): """Build Gradio UI for speaker diarization with improved reliability""" with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo: # Add configuration variables to page using custom component gr.HTML( f""" """ ) # Header and description gr.Markdown("# 🎤 Live Speaker Diarization") gr.Markdown("Real-time speech recognition with automatic speaker identification") # Add transcription model info gr.Markdown(f"**Using Models:** Final: {FINAL_TRANSCRIPTION_MODEL}, Realtime: {REALTIME_TRANSCRIPTION_MODEL}") # Status indicator connection_status = gr.HTML( """
Waiting to connect...
""", elem_id="connection-status" ) with gr.Row(): with gr.Column(scale=2): # Conversation display with embedded JavaScript for WebRTC and audio handling conversation_display = gr.HTML( """
Click 'Start Listening' to begin...
""", label="Live Conversation" ) # Control buttons with elem_id for reliable selection with gr.Row(): start_btn = gr.Button("â–ļī¸ Start Listening", variant="primary", size="lg", elem_id="btn-start") stop_btn = gr.Button("âšī¸ Stop", variant="stop", size="lg", elem_id="btn-stop") clear_btn = gr.Button("đŸ—‘ī¸ Clear", variant="secondary", size="lg", elem_id="btn-clear") # Status display with elem_id for reliable selection status_output = gr.Markdown( """ ## System Status Waiting to connect... *Click Start Listening to begin* """, label="Status Information", elem_id="status-output" ) with gr.Column(scale=1): # Settings gr.Markdown("## âš™ī¸ Settings") threshold_slider = gr.Slider( minimum=0.3, maximum=0.9, step=0.05, value=DEFAULT_CHANGE_THRESHOLD, label="Speaker Change Sensitivity", info="Lower = more sensitive (more speaker changes)", elem_id="threshold-slider" ) max_speakers_slider = gr.Slider( minimum=2, maximum=ABSOLUTE_MAX_SPEAKERS, step=1, value=DEFAULT_MAX_SPEAKERS, label="Maximum Speakers", elem_id="speakers-slider" ) update_btn = gr.Button("Update Settings", variant="secondary", elem_id="btn-update") # Instructions gr.Markdown(""" ## 📋 Instructions 1. **Start Listening** - allows browser to access microphone 2. **Speak** - system will transcribe and identify speakers 3. **Stop** when finished 4. **Clear** to reset conversation ## 🎨 Speaker Colors - 🔴 Speaker 1 (Red) - đŸŸĸ Speaker 2 (Teal) - đŸ”ĩ Speaker 3 (Blue) - 🟡 Speaker 4 (Green) - ⭐ Speaker 5 (Yellow) - đŸŸŖ Speaker 6 (Plum) - 🟤 Speaker 7 (Mint) - 🟠 Speaker 8 (Gold) """) # Function to get backend status (for periodic updates) def get_status(): """API call to get system status - called periodically""" import requests try: # Use a short timeout to prevent UI hanging resp = requests.get(f"{HF_SPACE_URL}/status", timeout=2) if resp.status_code == 200: return resp.json().get('formatted_text', 'No status information') return "Error getting status" except Exception as e: return f"Status update unavailable: Backend may be offline" # Set up periodic status updates with shorter interval and error handling status_timer = gr.Timer(10) # 10 seconds between updates status_timer.tick(fn=get_status, outputs=status_output) return demo # Create Gradio interface demo = build_ui() def mount_ui(app: FastAPI): """Mount Gradio app to FastAPI""" app.mount("/ui", demo.app) # For standalone testing if __name__ == "__main__": demo.launch()