Spaces:

AndroidGuy
/

Speaker-Diarization

Running

App Files Files Community

Saiyaswanth007 commited on May 30

Commit

3e9ecf3

1 Parent(s): 1dd5469

revert

Browse files

Files changed (1) hide show

ui.py +472 -496

ui.py CHANGED Viewed

@@ -1,530 +1,506 @@
 import gradio as gr
-import asyncio
-import websockets
-import json
-import logging
-import time
-from typing import Dict, Any, Optional
-import threading
-from queue import Queue
-import base64
-import numpy as np
-import os
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# Environment-configurable HF Space URL (matching backend.py)
-HF_SPACE_URL = os.getenv("HF_SPACE_URL", "https://androidguy-speaker-diarization.hf.space")
-API_WS = f"wss://{HF_SPACE_URL}/ws_inference"
-class TranscriptionWebSocketServer:
-    """WebSocket server that receives audio from backend and returns transcription results"""
-    def __init__(self):
-        self.connected_clients = set()
-        self.is_running = False
-        self.websocket_server = None
-        self.conversation_history = []
-        self.processing_stats = {
-            "total_audio_chunks": 0,
-            "total_transcriptions": 0,
-            "last_audio_received": None,
-            "server_start_time": time.time(),
-            "backend_url": HF_SPACE_URL
-        }
-    async def handle_client_connection(self, websocket, path):
-        """Handle incoming WebSocket connections from the backend"""
-        client_addr = websocket.remote_address
-        logger.info(f"Backend client connected from {client_addr}")
-        self.connected_clients.add(websocket)
-        try:
-            # Send initial connection acknowledgment
-            await websocket.send(json.dumps({
-                "type": "connection_ack",
-                "status": "connected",
-                "timestamp": time.time(),
-                "message": "HuggingFace transcription service ready"
-            }))
-            # Handle incoming messages/audio data
-            async for message in websocket:
-                try:
-                    if isinstance(message, bytes):
-                        # Handle binary audio data
-                        await self.process_audio_data(message, websocket)
-                    else:
-                        # Handle text messages (JSON)
-                        await self.handle_text_message(message, websocket)
-                except Exception as e:
-                    logger.error(f"Error processing message: {e}")
-                    await self.send_error(websocket, f"Processing error: {str(e)}")
-        except websockets.exceptions.ConnectionClosed:
-            logger.info("Backend client disconnected")
-        except Exception as e:
-            logger.error(f"Client connection error: {e}")
-        finally:
-            self.connected_clients.discard(websocket)
-            logger.info(f"Client removed. Active connections: {len(self.connected_clients)}")
-    async def process_audio_data(self, audio_data: bytes, websocket):
-        """Process incoming audio data and return transcription results"""
-        try:
-            self.processing_stats["total_audio_chunks"] += 1
-            self.processing_stats["last_audio_received"] = time.time()
-            logger.debug(f"Received {len(audio_data)} bytes of audio data")
-            # Try to import and use your inference functions
-            try:
-                from inference import transcribe_audio, identify_speakers
-                # Process the audio for transcription
-                transcription_result = await transcribe_audio(audio_data)
-                if transcription_result:
-                    # Process for speaker diarization if available
-                    try:
-                        speaker_info = await identify_speakers(audio_data)
-                        transcription_result.update(speaker_info)
-                    except Exception as e:
-                        logger.warning(f"Speaker diarization failed: {e}")
-                        transcription_result["speaker"] = "Unknown"
-                    # Update conversation history
-                    self.update_conversation_history(transcription_result)
-                    # Send result back to backend
-                    response = {
-                        "type": "processing_result",
-                        "timestamp": time.time(),
-                        "data": transcription_result
                     }
-                    await websocket.send(json.dumps(response))
-                    self.processing_stats["total_transcriptions"] += 1
-                    logger.info(f"Sent transcription result: {transcription_result.get('text', '')[:50]}...")
-            except ImportError:
-                # Fallback if inference module is not available
-                logger.warning("Inference module not found, using mock transcription")
-                # Try to use shared.py for processing if available
-                try:
-                    from shared import RealtimeSpeakerDiarization
-                    # Initialize if not already initialized
-                    if not hasattr(self, 'diarization_system'):
-                        self.diarization_system = RealtimeSpeakerDiarization()
-                        await asyncio.to_thread(self.diarization_system.initialize_models)
-                        await asyncio.to_thread(self.diarization_system.start_recording)
-                    # Process the audio chunk
-                    result = await asyncio.to_thread(self.diarization_system.process_audio_chunk, audio_data)
-                    # Format result for response
-                    if result and result["status"] != "error":
-                        mock_result = {
-                            "text": result.get("text", f"[Processing {len(audio_data)} bytes]"),
-                            "speaker": f"Speaker_{result.get('speaker_id', 0) + 1}",
-                            "confidence": result.get("similarity", 0.85),
-                            "timestamp": time.time()
                         }
-                    else:
-                        # Fallback mock result
-                        mock_result = {
-                            "text": f"[Mock transcription - {len(audio_data)} bytes processed]",
-                            "speaker": "Speaker_1",
-                            "confidence": 0.85,
-                            "timestamp": time.time()
                         }
-                    # Update conversation history
-                    self.update_conversation_history(mock_result)
-                    response = {
-                        "type": "processing_result",
-                        "timestamp": time.time(),
-                        "data": mock_result
                     }
-                    await websocket.send(json.dumps(response))
-                    self.processing_stats["total_transcriptions"] += 1
-                except Exception as e:
-                    logger.warning(f"Failed to use shared module: {e}")
-                    # Basic mock transcription as last resort
-                    mock_result = {
-                        "text": f"[Mock transcription - {len(audio_data)} bytes processed]",
-                        "speaker": "Speaker_1",
-                        "confidence": 0.85,
-                        "timestamp": time.time()
                     }
-                    self.update_conversation_history(mock_result)
-                    response = {
-                        "type": "processing_result",
-                        "timestamp": time.time(),
-                        "data": mock_result
                     }
-                    await websocket.send(json.dumps(response))
-        except Exception as e:
-            logger.error(f"Audio processing error: {e}")
-            await self.send_error(websocket, f"Audio processing failed: {str(e)}")
-    async def handle_text_message(self, message: str, websocket):
-        """Handle text-based messages from backend"""
-        try:
-            data = json.loads(message)
-            message_type = data.get("type", "unknown")
-            logger.info(f"Received message type: {message_type}")
-            if message_type == "ping":
-                # Respond to ping with pong
-                await websocket.send(json.dumps({
-                    "type": "pong",
-                    "timestamp": time.time()
-                }))
-            elif message_type == "config":
-                # Handle configuration updates
-                logger.info(f"Configuration update: {data}")
-                # Apply configuration settings if available
-                settings = data.get("settings", {})
-                if "max_speakers" in settings:
-                    max_speakers = settings.get("max_speakers")
-                    logger.info(f"Setting max_speakers to {max_speakers}")
-                if "threshold" in settings:
-                    threshold = settings.get("threshold")
-                    logger.info(f"Setting speaker change threshold to {threshold}")
-                # Send acknowledgment
-                await websocket.send(json.dumps({
-                    "type": "config_ack",
-                    "message": "Configuration received",
-                    "timestamp": time.time()
-                }))
-            elif message_type == "status_request":
-                # Send status information
-                await websocket.send(json.dumps({
-                    "type": "status_response",
-                    "data": self.get_processing_stats(),
-                    "timestamp": time.time()
-                }))
-            else:
-                logger.warning(f"Unknown message type: {message_type}")
-        except json.JSONDecodeError:
-            logger.error(f"Invalid JSON received: {message}")
-            await self.send_error(websocket, "Invalid JSON format")
-    async def send_error(self, websocket, error_message: str):
-        """Send error message to client"""
-        try:
-            await websocket.send(json.dumps({
-                "type": "error",
-                "message": error_message,
-                "timestamp": time.time()
-            }))
-        except Exception as e:
-            logger.error(f"Failed to send error message: {e}")
-    def update_conversation_history(self, transcription_result: Dict[str, Any]):
-        """Update conversation history with new transcription"""
-        history_entry = {
-            "timestamp": time.time(),
-            "text": transcription_result.get("text", ""),
-            "speaker": transcription_result.get("speaker", "Unknown"),
-            "confidence": transcription_result.get("confidence", 0.0)
-        }
-        self.conversation_history.append(history_entry)
-        # Keep only last 50 entries to prevent memory issues
-        if len(self.conversation_history) > 50:
-            self.conversation_history = self.conversation_history[-50:]
-    def get_processing_stats(self):
-        """Get processing statistics"""
-        return {
-            "connected_clients": len(self.connected_clients),
-            "total_audio_chunks": self.processing_stats["total_audio_chunks"],
-            "total_transcriptions": self.processing_stats["total_transcriptions"],
-            "last_audio_received": self.processing_stats["last_audio_received"],
-            "server_uptime": time.time() - self.processing_stats["server_start_time"],
-            "conversation_entries": len(self.conversation_history),
-            "backend_url": self.processing_stats.get("backend_url", HF_SPACE_URL)
-        }
-    async def start_server(self, host="0.0.0.0", port=7860):
-        """Start the WebSocket server"""
-        try:
-            # Start WebSocket server on /ws_inference endpoint
-            self.websocket_server = await websockets.serve(
-                self.handle_client_connection,
-                host,
-                port,
-                subprotocols=[],
-                path="/ws_inference"
-            )
-            self.is_running = True
-            logger.info(f"WebSocket server started on ws://{host}:{port}/ws_inference")
-            # Keep the server running
-            await self.websocket_server.wait_closed()
-        except Exception as e:
-            logger.error(f"Failed to start WebSocket server: {e}")
-            self.is_running = False
-# Initialize the WebSocket server
-ws_server = TranscriptionWebSocketServer()
-def create_gradio_interface():
-    """Create Gradio interface for monitoring and testing"""
-    def get_server_status():
-        """Get current server status"""
-        stats = ws_server.get_processing_stats()
-        status_text = f"""
-### Server Status
-- **WebSocket Server**: {'🟢 Running' if ws_server.is_running else '🔴 Stopped'}
-- **Connected Clients**: {stats['connected_clients']}
-- **Server Uptime**: {stats['server_uptime']:.1f} seconds
-### Processing Statistics
-- **Audio Chunks Processed**: {stats['total_audio_chunks']}
-- **Transcriptions Generated**: {stats['total_transcriptions']}
-- **Last Audio Received**: {time.ctime(stats['last_audio_received']) if stats['last_audio_received'] else 'Never'}
-### Conversation
-- **History Entries**: {stats['conversation_entries']}
-        """
-        return status_text
-    def get_recent_transcriptions():
-        """Get recent transcription results"""
-        if not ws_server.conversation_history:
-            return "No transcriptions yet. Waiting for audio data from backend..."
-        recent_entries = ws_server.conversation_history[-10:]  # Last 10 entries
-        formatted_text = "### Recent Transcriptions\n\n"
-        for entry in recent_entries:
-            timestamp = time.strftime("%H:%M:%S", time.localtime(entry['timestamp']))
-            speaker = entry['speaker']
-            text = entry['text']
-            confidence = entry['confidence']
-            # Extract speaker number for color matching with shared.py
-            speaker_num = 0
-            if speaker.startswith("Speaker_"):
-                try:
-                    speaker_num = int(speaker.split("_")[1]) - 1
-                except (ValueError, IndexError):
-                    speaker_num = 0
-            # Use colors from shared.py if possible
             try:
-                from shared import SPEAKER_COLORS
-                color = SPEAKER_COLORS[speaker_num % len(SPEAKER_COLORS)]
-            except (ImportError, IndexError):
-                # Fallback colors
-                colors = ["#FF6B6B", "#4ECDC4", "#45B7D1", "#96CEB4", "#FFEAA7", "#DDA0DD", "#98D8C8", "#F7DC6F"]
-                color = colors[speaker_num % len(colors)]
-            formatted_text += f"<span style='color:{color};font-weight:bold;'>[{timestamp}] {speaker}</span> (confidence: {confidence:.2f})\n"
-            formatted_text += f"{text}\n\n"
-        return formatted_text
-    def clear_conversation_history():
-        """Clear conversation history"""
-        ws_server.conversation_history.clear()
-        return "Conversation history cleared!"
-    # Create Gradio interface
-    with gr.Blocks(
-        title="Real-time Audio Transcription Service",
-        theme=gr.themes.Soft()
-    ) as demo:
-        gr.Markdown("# 🎤 Real-time Audio Transcription Service")
-        gr.Markdown("This HuggingFace Space receives audio from your backend and returns transcription results with speaker diarization.")
-        with gr.Tab("📊 Server Status"):
-            status_display = gr.Markdown(get_server_status())
-            with gr.Row():
-                refresh_status_btn = gr.Button("🔄 Refresh Status", variant="primary")
-            refresh_status_btn.click(
-                fn=get_server_status,
-                outputs=status_display,
-                every=None
-            )
-        with gr.Tab("📝 Live Transcription"):
-            transcription_display = gr.Markdown(get_recent_transcriptions())
-            with gr.Row():
-                refresh_transcription_btn = gr.Button("🔄 Refresh Transcriptions", variant="primary")
-                clear_history_btn = gr.Button("🗑️ Clear History", variant="secondary")
-            refresh_transcription_btn.click(
-                fn=get_recent_transcriptions,
-                outputs=transcription_display
-            )
-            clear_history_btn.click(
-                fn=clear_conversation_history,
-                outputs=gr.Markdown()
-            )
-        with gr.Tab("🔧 Connection Info"):
-            gr.Markdown(f"""
-### WebSocket Connection Details
-**WebSocket Endpoint**: `wss://{HF_SPACE_URL}/ws_inference`
-### Backend Connection
-Your backend should connect to this WebSocket endpoint and:
-1. **Send Audio Data**: Stream raw audio bytes to this endpoint
-2. **Receive Results**: Get JSON responses with transcription results
-### Expected Message Flow
-**Backend → HuggingFace**:
-- Raw audio bytes (binary data)
-- Configuration messages (JSON)
-**HuggingFace → Backend**:
-```json
-{{
-    "type": "processing_result",
-    "timestamp": 1234567890.123,
-    "data": {{
-        "text": "transcribed text here",
-        "speaker": "Speaker_1",
-        "confidence": 0.95
-    }}
-}}
-```
-### Test Connection
-Your backend is configured to connect to: `{ws_server.processing_stats.get('backend_url', HF_SPACE_URL)}`
-            """)
-        with gr.Tab("🚀 API Documentation"):
-            gr.Markdown("""
-### WebSocket API Reference
-#### Endpoint
-- **URL**: `/ws_inference`
-- **Protocol**: WebSocket
-- **Accepts**: Binary audio data + JSON messages
-#### Message Types
-##### 1. Audio Processing
-- **Input**: Raw audio bytes (binary)
-- **Output**: Processing result (JSON)
-##### 2. Configuration
-- **Input**:
-```json
-{
-    "type": "config",
-    "settings": {
-        "language": "en",
-        "enable_diarization": true,
-        "max_speakers": 4,
-        "threshold": 0.65
-    }
-}
-```
-##### 3. Status Check
-- **Input**: `{"type": "status_request"}`
-- **Output**: Server statistics
-##### 4. Ping/Pong
-- **Input**: `{"type": "ping"}`
-- **Output**: `{"type": "pong", "timestamp": 1234567890}`
-#### Error Handling
-All errors are returned as:
-```json
-{
-    "type": "error",
-    "message": "Error description",
-    "timestamp": 1234567890.123
-}
-```
-            """)
     return demo
-def run_websocket_server():
-    """Run WebSocket server in background thread"""
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        logger.info("Starting WebSocket server thread...")
-        loop.run_until_complete(ws_server.start_server())
-    except Exception as e:
-        logger.error(f"WebSocket server error: {e}")
-    finally:
-        loop.close()
-# Mount UI to inference.py
-def mount_ui(app):
-    """Mount Gradio interface to FastAPI app"""
-    try:
-        demo = create_gradio_interface()
-        # Mount without starting server (FastAPI will handle it)
-        demo.mount_to_app(app)
-        logger.info("Gradio UI mounted to FastAPI app")
-        return True
-    except Exception as e:
-        logger.error(f"Error mounting UI: {e}")
-        return False
-# Start WebSocket server in background
-logger.info("Initializing WebSocket server...")
-websocket_thread = threading.Thread(target=run_websocket_server, daemon=True)
-websocket_thread.start()
-# Give server time to start
-time.sleep(2)
-# Create and launch Gradio interface
 if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True,
-        show_error=True
-    )

 import gradio as gr
+from fastapi import FastAPI
+from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS, FINAL_TRANSCRIPTION_MODEL, REALTIME_TRANSCRIPTION_MODEL
+print(gr.__version__)
+# Connection configuration (separate signaling server from model server)
+# These will be replaced at deployment time with the correct URLs
+RENDER_SIGNALING_URL = "wss://render-signal-audio.onrender.com/stream"
+HF_SPACE_URL = "https://androidguy-speaker-diarization.hf.space"
+def build_ui():
+    """Build Gradio UI for speaker diarization"""
+    with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
+        # Add configuration variables to page using custom component
+        gr.HTML(
+            f"""
+            <!-- Configuration parameters -->
+            <script>
+                window.RENDER_SIGNALING_URL = "{RENDER_SIGNALING_URL}";
+                window.HF_SPACE_URL = "{HF_SPACE_URL}";
+            </script>
+            """
+        )
+        # Header and description
+        gr.Markdown("# 🎤 Live Speaker Diarization")
+        gr.Markdown(f"Real-time speech recognition with automatic speaker identification")
+        # Add transcription model info
+        gr.Markdown(f"**Using Models:** Final: {FINAL_TRANSCRIPTION_MODEL}, Realtime: {REALTIME_TRANSCRIPTION_MODEL}")
+        # Status indicator
+        connection_status = gr.HTML(
+            """<div class="status-indicator">
+                <span id="status-text" style="color:#888;">Waiting to connect...</span>
+                <span id="status-icon" style="width:10px; height:10px; display:inline-block;
+                    background-color:#888; border-radius:50%; margin-left:5px;"></span>
+            </div>"""
+        )
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Conversation display with embedded JavaScript for WebRTC and audio handling
+                conversation_display = gr.HTML(
+                    """
+                    <div class='output' id="conversation" style='padding:20px; background:#111; border-radius:10px;
+                      min-height:400px; font-family:Arial; font-size:16px; line-height:1.5; overflow-y:auto;'>
+                      <i>Click 'Start Listening' to begin...</i>
+                    </div>
+                    <script>
+                    // Global variables
+                    let rtcConnection;
+                    let mediaStream;
+                    let wsConnection;
+                    let statusUpdateInterval;
+                    // Check connection to HF space
+                    async function checkHfConnection() {
+                        try {
+                            let response = await fetch(`${window.HF_SPACE_URL}/health`);
+                            return response.ok;
+                        } catch (err) {
+                            return false;
+                        }
                     }
+                    // Start the connection and audio streaming
+                    async function startStreaming() {
+                        try {
+                            // Update status
+                            updateStatus('connecting');
+                            // Request microphone access
+                            mediaStream = await navigator.mediaDevices.getUserMedia({audio: {
+                                echoCancellation: true,
+                                noiseSuppression: true,
+                                autoGainControl: true
+                            }});
+                            // Set up WebRTC connection to Render signaling server
+                            await setupWebRTC();
+                            // Also connect WebSocket directly to HF Space for conversation updates
+                            setupWebSocket();
+                            // Start status update interval
+                            statusUpdateInterval = setInterval(updateConnectionInfo, 5000);
+                            // Update status
+                            updateStatus('connected');
+                            document.getElementById("conversation").innerHTML = "<i>Connected! Start speaking...</i>";
+                        } catch (err) {
+                            console.error('Error starting stream:', err);
+                            updateStatus('error', err.message);
                         }
+                    }
+                    // Set up WebRTC connection to Render signaling server
+                    async function setupWebRTC() {
+                        try {
+                            if (rtcConnection) {
+                                rtcConnection.close();
+                            }
+                            // Use FastRTC's connection approach
+                            const pc = new RTCPeerConnection({
+                                iceServers: [{ urls: 'stun:stun.l.google.com:19302' }]
+                            });
+                            // Add audio track
+                            mediaStream.getAudioTracks().forEach(track => {
+                                pc.addTrack(track, mediaStream);
+                            });
+                            // Connect to FastRTC signaling via WebSocket
+                            const signalWs = new WebSocket(window.RENDER_SIGNALING_URL.replace('wss://', 'wss://'));
+                            // Handle signaling messages
+                            signalWs.onmessage = async (event) => {
+                                const message = JSON.parse(event.data);
+                                if (message.type === 'offer') {
+                                    await pc.setRemoteDescription(new RTCSessionDescription(message));
+                                    const answer = await pc.createAnswer();
+                                    await pc.setLocalDescription(answer);
+                                    signalWs.send(JSON.stringify(pc.localDescription));
+                                } else if (message.type === 'candidate') {
+                                    if (message.candidate) {
+                                        await pc.addIceCandidate(new RTCIceCandidate(message));
+                                    }
+                                }
+                            };
+                            // Send ICE candidates
+                            pc.onicecandidate = (event) => {
+                                if (event.candidate) {
+                                    signalWs.send(JSON.stringify({
+                                        type: 'candidate',
+                                        candidate: event.candidate
+                                    }));
+                                }
+                            };
+                            // Keep connection reference
+                            rtcConnection = pc;
+                            // Wait for connection to be established
+                            await new Promise((resolve, reject) => {
+                                const timeout = setTimeout(() => reject(new Error("WebRTC connection timeout")), 10000);
+                                pc.onconnectionstatechange = () => {
+                                    if (pc.connectionState === 'connected') {
+                                        clearTimeout(timeout);
+                                        resolve();
+                                    } else if (pc.connectionState === 'failed' || pc.connectionState === 'disconnected') {
+                                        clearTimeout(timeout);
+                                        reject(new Error("WebRTC connection failed"));
+                                    }
+                                };
+                            });
+                            updateStatus('connected');
+                        } catch (err) {
+                            console.error('WebRTC setup error:', err);
+                            updateStatus('error', 'WebRTC setup failed: ' + err.message);
                         }
                     }
+                    // Set up WebSocket connection to HF Space for conversation updates
+                    function setupWebSocket() {
+                        const wsUrl = window.RENDER_SIGNALING_URL.replace('stream', 'ws_relay');
+                        wsConnection = new WebSocket(wsUrl);
+                        wsConnection.onopen = () => {
+                            console.log('WebSocket connection established');
+                        };
+                        wsConnection.onmessage = (event) => {
+                            try {
+                                // Parse the JSON message
+                                const message = JSON.parse(event.data);
+                                // Process different message types
+                                switch(message.type) {
+                                    case 'transcription':
+                                        // Handle transcription data
+                                        if (message && message.data && typeof message.data === 'object') {
+                                            document.getElementById("conversation").innerHTML = message.data.conversation_html ||
+                                                JSON.stringify(message.data);
+                                        }
+                                        break;
+                                    case 'processing_result':
+                                        // Handle individual audio chunk processing result
+                                        console.log('Processing result:', message.data);
+                                        // Update status info if needed
+                                        if (message.data && message.data.status === "processed") {
+                                            const statusElem = document.getElementById('status-text');
+                                            if (statusElem) {
+                                                const speakerId = message.data.speaker_id !== undefined ?
+                                                    `Speaker ${message.data.speaker_id + 1}` : '';
+                                                if (speakerId) {
+                                                    statusElem.textContent = `Connected - ${speakerId} active`;
+                                                }
+                                            }
+                                        } else if (message.data && message.data.status === "error") {
+                                            updateStatus('error', message.data.message || 'Processing error');
+                                        }
+                                        break;
+                                    case 'connection':
+                                        console.log('Connection status:', message.status);
+                                        updateStatus(message.status === 'connected' ? 'connected' : 'warning');
+                                        break;
+                                    case 'connection_established':
+                                        console.log('Connection established:', message);
+                                        updateStatus('connected');
+                                        // If initial conversation is provided, display it
+                                        if (message.conversation) {
+                                            document.getElementById("conversation").innerHTML = message.conversation;
+                                        }
+                                        break;
+                                    case 'conversation_update':
+                                        if (message.conversation_html) {
+                                            document.getElementById("conversation").innerHTML = message.conversation_html;
+                                        }
+                                        break;
+                                    case 'conversation_cleared':
+                                        document.getElementById("conversation").innerHTML =
+                                            "<i>Conversation cleared. Start speaking again...</i>";
+                                        break;
+                                    case 'error':
+                                        console.error('Error message from server:', message.message);
+                                        updateStatus('warning', message.message);
+                                        break;
+                                    default:
+                                        // If it's just HTML content without proper JSON structure (legacy format)
+                                        document.getElementById("conversation").innerHTML = event.data;
+                                }
+                                // Auto-scroll to bottom
+                                const container = document.getElementById("conversation");
+                                container.scrollTop = container.scrollHeight;
+                            } catch (e) {
+                                // Fallback for non-JSON messages (legacy format)
+                                document.getElementById("conversation").innerHTML = event.data;
+                                // Auto-scroll to bottom
+                                const container = document.getElementById("conversation");
+                                container.scrollTop = container.scrollHeight;
+                            }
+                        };
+                        wsConnection.onerror = (error) => {
+                            console.error('WebSocket error:', error);
+                            updateStatus('warning', 'WebSocket error');
+                        };
+                        wsConnection.onclose = () => {
+                            console.log('WebSocket connection closed');
+                            // Try to reconnect after a delay
+                            setTimeout(setupWebSocket, 3000);
+                        };
                     }
+                    // Update connection info in the UI
+                    async function updateConnectionInfo() {
+                        try {
+                            const hfConnected = await checkHfConnection();
+                            if (!hfConnected) {
+                                updateStatus('warning', 'HF Space connection issue');
+                            } else if (rtcConnection?.connectionState === 'connected' ||
+                                      rtcConnection?.iceConnectionState === 'connected') {
+                                updateStatus('connected');
+                            } else {
+                                updateStatus('warning', 'Connection unstable');
+                            }
+                      } catch (err) {
+                            console.error('Error updating connection info:', err);
+                        }
                     }
+                    // Update status indicator
+                    function updateStatus(status, message = '') {
+                        const statusText = document.getElementById('status-text');
+                        const statusIcon = document.getElementById('status-icon');
+                        switch(status) {
+                            case 'connected':
+                                statusText.textContent = 'Connected';
+                                statusIcon.style.backgroundColor = '#4CAF50';
+                                break;
+                            case 'connecting':
+                                statusText.textContent = 'Connecting...';
+                                statusIcon.style.backgroundColor = '#FFC107';
+                                break;
+                            case 'disconnected':
+                                statusText.textContent = 'Disconnected';
+                                statusIcon.style.backgroundColor = '#9E9E9E';
+                                break;
+                            case 'error':
+                                statusText.textContent = 'Error: ' + message;
+                                statusIcon.style.backgroundColor = '#F44336';
+                                break;
+                            case 'warning':
+                                statusText.textContent = 'Warning: ' + message;
+                                statusIcon.style.backgroundColor = '#FF9800';
+                                break;
+                            default:
+                                statusText.textContent = 'Unknown';
+                                statusIcon.style.backgroundColor = '#9E9E9E';
+                        }
+                    }
+                    // Stop streaming and clean up
+                    function stopStreaming() {
+                        // Close WebRTC connection
+                        if (rtcConnection) {
+                            rtcConnection.close();
+                            rtcConnection = null;
+                        }
+                        // Close WebSocket
+                        if (wsConnection) {
+                            wsConnection.close();
+                            wsConnection = null;
+                        }
+                        // Stop all tracks in media stream
+                        if (mediaStream) {
+                            mediaStream.getTracks().forEach(track => track.stop());
+                            mediaStream = null;
+                        }
+                        // Clear interval
+                        if (statusUpdateInterval) {
+                            clearInterval(statusUpdateInterval);
+                            statusUpdateInterval = null;
+                        }
+                        // Update status
+                        updateStatus('disconnected');
+                    }
+                    // Set up event listeners when the DOM is loaded
+                    document.addEventListener('DOMContentLoaded', () => {
+                        updateStatus('disconnected');
+                    });
+                    </script>
+                    """,
+                    label="Live Conversation"
+                )
+                # Control buttons
+                with gr.Row():
+                    start_btn = gr.Button("▶️ Start Listening", variant="primary", size="lg")
+                    stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg")
+                # Status display
+                status_output = gr.Markdown(
+                    """
+                    ## System Status
+                    Waiting to connect...
+                    *Click Start Listening to begin*
+                    """,
+                    label="Status Information"
+                )
+            with gr.Column(scale=1):
+                # Settings
+                gr.Markdown("## ⚙️ Settings")
+                threshold_slider = gr.Slider(
+                    minimum=0.3,
+                    maximum=0.9,
+                    step=0.05,
+                    value=DEFAULT_CHANGE_THRESHOLD,
+                    label="Speaker Change Sensitivity",
+                    info="Lower = more sensitive (more speaker changes)"
+                )
+                max_speakers_slider = gr.Slider(
+                    minimum=2,
+                    maximum=ABSOLUTE_MAX_SPEAKERS,
+                    step=1,
+                    value=DEFAULT_MAX_SPEAKERS,
+                    label="Maximum Speakers"
+                )
+                update_btn = gr.Button("Update Settings", variant="secondary")
+                # Instructions
+                gr.Markdown("""
+                ## 📋 Instructions
+                1. **Start Listening** - allows browser to access microphone
+                2. **Speak** - system will transcribe and identify speakers
+                3. **Stop** when finished
+                4. **Clear** to reset conversation
+                ## 🎨 Speaker Colors
+                - 🔴 Speaker 1 (Red)
+                - 🟢 Speaker 2 (Teal)
+                - 🔵 Speaker 3 (Blue)
+                - 🟡 Speaker 4 (Green)
+                - ⭐ Speaker 5 (Yellow)
+                - 🟣 Speaker 6 (Plum)
+                - 🟤 Speaker 7 (Mint)
+                - 🟠 Speaker 8 (Gold)
+                """)
+        # JavaScript to connect buttons to the script functions
+        gr.HTML("""
+        <script>
+            // Wait for Gradio to fully load
+            document.addEventListener('DOMContentLoaded', () => {
+                // Wait a bit for Gradio buttons to be created
+                setTimeout(() => {
+                    // Get the buttons
+                    const startBtn = document.querySelector('button[aria-label="Start Listening"]');
+                    const stopBtn = document.querySelector('button[aria-label="Stop"]');
+                    const clearBtn = document.querySelector('button[aria-label="Clear"]');
+                    if (startBtn) startBtn.onclick = () => startStreaming();
+                    if (stopBtn) stopBtn.onclick = () => stopStreaming();
+                    if (clearBtn) clearBtn.onclick = () => {
+                        // Make API call to clear conversation
+                        fetch(`${window.HF_SPACE_URL}/clear`, {
+                            method: 'POST'
+                        }).then(resp => resp.json())
+                        .then(data => {
+                            document.getElementById("conversation").innerHTML =
+                                "<i>Conversation cleared. Start speaking again...</i>";
+                        });
+                    }
+                    // Set up settings update
+                    const updateBtn = document.querySelector('button[aria-label="Update Settings"]');
+                    if (updateBtn) updateBtn.onclick = () => {
+                        const threshold = document.querySelector('input[aria-label="Speaker Change Sensitivity"]').value;
+                        const maxSpeakers = document.querySelector('input[aria-label="Maximum Speakers"]').value;
+                        fetch(`${window.HF_SPACE_URL}/settings?threshold=${threshold}&max_speakers=${maxSpeakers}`, {
+                            method: 'POST'
+                        }).then(resp => resp.json())
+                        .then(data => {
+                            const statusOutput = document.querySelector('.prose');
+                            if (statusOutput) {
+                                statusOutput.innerHTML = `
+                                    <h2>System Status</h2>
+                                    <p>Settings updated:</p>
+                                    <ul>
+                                        <li>Threshold: ${threshold}</li>
+                                        <li>Max Speakers: ${maxSpeakers}</li>
+                                    </ul>
+                                    <p>Transcription Models:</p>
+                                    <ul>
+                                        <li>Final: ${window.FINAL_TRANSCRIPTION_MODEL || "distil-large-v3"}</li>
+                                        <li>Realtime: ${window.REALTIME_TRANSCRIPTION_MODEL || "distil-small.en"}</li>
+                                    </ul>
+                                `;
+                            }
+                        });
+                    }
+                }, 1000);
+            });
+        </script>
+        """)
+        # Set up periodic status updates
+        def get_status():
+            """API call to get system status - called periodically"""
+            import requests
             try:
+                resp = requests.get(f"{HF_SPACE_URL}/status")
+                if resp.status_code == 200:
+                    return resp.json().get('status', 'No status information')
+                return "Error getting status"
+            except Exception as e:
+                return f"Connection error: {str(e)}"
+        status_timer = gr.Timer(5)
+        status_timer.tick(fn=get_status, outputs=status_output)
     return demo
+# Create Gradio interface
+demo = build_ui()
+def mount_ui(app: FastAPI):
+    """Mount Gradio app to FastAPI"""
+    app.mount("/ui", demo.app)
+# For standalone testing
 if __name__ == "__main__":
+    demo.launch()