Saiyaswanth007 commited on
Commit
3a4cb0f
·
1 Parent(s): b45f016
Files changed (1) hide show
  1. ui.py +297 -518
ui.py CHANGED
@@ -1,562 +1,341 @@
1
  import gradio as gr
2
- from fastapi import FastAPI, WebSocket, WebSocketDisconnect
3
- from fastapi.responses import JSONResponse, RedirectResponse
4
  import asyncio
 
5
  import json
6
  import logging
7
- from typing import Dict, List, Optional
8
- import os
9
- from datetime import datetime
10
- import httpx
11
- import websockets
12
- from fastrtc import RTCComponent
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
- class Config:
19
- def __init__(self):
20
- # URLs should not include http/https prefix as we add it contextually
21
- self.hf_space_url = os.getenv("HF_SPACE_URL", "androidguy-speaker-diarization.hf.space")
22
- self.render_url = os.getenv("RENDER_URL", "render-signal-audio.onrender.com")
23
- self.default_threshold = float(os.getenv("DEFAULT_THRESHOLD", "0.7"))
24
- self.default_max_speakers = int(os.getenv("DEFAULT_MAX_SPEAKERS", "4"))
25
- self.max_speakers_limit = int(os.getenv("MAX_SPEAKERS_LIMIT", "8"))
26
-
27
- config = Config()
28
-
29
- class ConnectionManager:
30
- """Manage WebSocket connections"""
31
- def __init__(self):
32
- self.active_connections: List[WebSocket] = []
33
- self.conversation_history: List[Dict] = []
34
-
35
- async def connect(self, websocket: WebSocket):
36
- await websocket.accept()
37
- self.active_connections.append(websocket)
38
- logger.info(f"Client connected. Total connections: {len(self.active_connections)}")
39
 
40
- def disconnect(self, websocket: WebSocket):
41
- if websocket in self.active_connections:
42
- self.active_connections.remove(websocket)
43
- logger.info(f"Client disconnected. Total connections: {len(self.active_connections)}")
44
-
45
- async def send_personal_message(self, message: str, websocket: WebSocket):
 
 
 
 
 
 
 
 
 
46
  try:
47
- await websocket.send_text(message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  except Exception as e:
49
- logger.error(f"Error sending message: {e}")
50
- self.disconnect(websocket)
51
-
52
- async def broadcast(self, message: str):
53
- """Send message to all connected clients"""
54
- disconnected = []
55
- for connection in self.active_connections:
56
- try:
57
- await connection.send_text(message)
58
- except Exception as e:
59
- logger.error(f"Error broadcasting to connection: {e}")
60
- disconnected.append(connection)
61
-
62
- # Clean up disconnected clients
63
- for conn in disconnected:
64
- self.disconnect(conn)
65
-
66
- manager = ConnectionManager()
67
-
68
- def create_gradio_app():
69
- """Create the Gradio interface"""
70
 
71
- def get_client_js():
72
- """Return the client-side JavaScript"""
73
- return f"""
74
- <script>
75
- class SpeakerDiarizationClient {{
76
- constructor() {{
77
- this.ws = null;
78
- this.mediaStream = null;
79
- this.mediaRecorder = null;
80
- this.isRecording = false;
81
- this.baseUrl = 'https://{config.hf_space_url}';
82
- this.wsUrl = 'wss://{config.hf_space_url}/ws';
83
- this.renderUrl = 'wss://{config.render_url}/stream';
84
- this.rtcComponentSynced = false;
85
- }}
86
 
87
- async startRecording() {{
88
- try {{
89
- this.isRecording = true;
90
- this.updateStatus('connecting', 'Connecting to server...');
91
-
92
- // Connect to WebSocket for transcription updates
93
- await this.connectWebSocket();
94
-
95
- // Let the RTCComponent handle the audio streaming
96
- // This will be handled by Gradio's WebRTC component
97
-
98
- this.updateStatus('connected', 'Connected and listening');
99
-
100
- }} catch (error) {{
101
- console.error('Error starting recording:', error);
102
- this.updateStatus('error', `Failed to start: ${{error.message}}`);
103
- }}
104
- }}
105
 
106
- async connectWebSocket() {{
107
- return new Promise((resolve, reject) => {{
108
- // Only connect to the conversation updates WebSocket
109
- this.ws = new WebSocket('wss://{config.hf_space_url}/ws_transcription');
110
-
111
- this.ws.onopen = () => {{
112
- console.log('WebSocket connected for transcription updates');
113
- resolve();
114
- }};
115
-
116
- this.ws.onmessage = (event) => {{
117
- try {{
118
- const data = JSON.parse(event.data);
119
- this.handleServerMessage(data);
120
- }} catch (e) {{
121
- console.error('Error parsing message:', e);
122
- }}
123
- }};
124
-
125
- this.ws.onerror = (error) => {{
126
- console.error('WebSocket error:', error);
127
- reject(error);
128
- }};
129
 
130
- this.ws.onclose = () => {{
131
- console.log('WebSocket closed');
132
- if (this.isRecording) {{
133
- // Try to reconnect after a delay
134
- setTimeout(() => this.connectWebSocket(), 3000);
135
- }}
136
- }};
137
- }});
138
- }}
139
-
140
- handleServerMessage(data) {{
141
- switch(data.type) {{
142
- case 'transcription':
143
- this.updateConversation(data.conversation_html);
144
- break;
145
- case 'speaker_update':
146
- this.updateStatus('connected', `Active: ${{data.speaker}}`);
147
- break;
148
- case 'error':
149
- this.updateStatus('error', data.message);
150
- break;
151
- case 'status':
152
- this.updateStatus(data.status, data.message);
153
- break;
154
- }}
155
- }}
156
 
157
- stopRecording() {{
158
- this.isRecording = false;
159
-
160
- if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {{
161
- this.mediaRecorder.stop();
162
- }}
163
-
164
- if (this.mediaStream) {{
165
- this.mediaStream.getTracks().forEach(track => track.stop());
166
- this.mediaStream = null;
167
- }}
168
-
169
- if (this.ws) {{
170
- this.ws.close();
171
- this.ws = null;
172
- }}
173
-
174
- this.updateStatus('disconnected', 'Recording stopped');
175
- }}
176
 
177
- async clearConversation() {{
178
- try {{
179
- const response = await fetch(`${{this.baseUrl}}/clear`, {{
180
- method: 'POST'
181
- }});
182
-
183
- if (response.ok) {{
184
- this.updateConversation('<i>Conversation cleared. Start speaking...</i>');
185
- }}
186
- }} catch (error) {{
187
- console.error('Error clearing conversation:', error);
188
- }}
189
- }}
190
 
191
- updateConversation(html) {{
192
- const elem = document.getElementById('conversation');
193
- if (elem) {{
194
- elem.innerHTML = html;
195
- elem.scrollTop = elem.scrollHeight;
196
- }}
197
- }}
198
 
199
- updateStatus(status, message = '') {{
200
- const statusText = document.getElementById('status-text');
201
- const statusIcon = document.getElementById('status-icon');
202
-
203
- if (!statusText || !statusIcon) return;
204
-
205
- const colors = {{
206
- 'connected': '#4CAF50',
207
- 'connecting': '#FFC107',
208
- 'disconnected': '#9E9E9E',
209
- 'error': '#F44336',
210
- 'warning': '#FF9800'
211
- }};
212
-
213
- const labels = {{
214
- 'connected': 'Connected',
215
- 'connecting': 'Connecting...',
216
- 'disconnected': 'Disconnected',
217
- 'error': 'Error',
218
- 'warning': 'Warning'
219
- }};
220
-
221
- statusText.textContent = message ? `${{labels[status]}}: ${{message}}` : labels[status];
222
- statusIcon.style.backgroundColor = colors[status] || '#9E9E9E';
223
- }}
224
- }}
225
-
226
- // Global client instance
227
- window.diarizationClient = new SpeakerDiarizationClient();
228
 
229
- // Button event handlers
230
- function startListening() {{
231
- window.diarizationClient.startRecording();
232
- }}
233
-
234
- function stopListening() {{
235
- window.diarizationClient.stopRecording();
236
- }}
237
-
238
- function clearConversation() {{
239
- window.diarizationClient.clearConversation();
240
- }}
241
 
242
- // Initialize on page load
243
- document.addEventListener('DOMContentLoaded', () => {{
244
- window.diarizationClient.updateStatus('disconnected');
245
- }});
246
- </script>
247
- """
248
 
249
- with gr.Blocks(
250
- title="Real-time Speaker Diarization",
251
- theme=gr.themes.Soft(),
252
- css="""
253
- .status-indicator { margin: 10px 0; }
254
- .conversation-display {
255
- background: #f8f9fa;
256
- border: 1px solid #dee2e6;
257
- border-radius: 8px;
258
- padding: 20px;
259
- min-height: 400px;
260
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
261
- overflow-y: auto;
262
- }
263
- """
264
- ) as demo:
265
-
266
- # Inject client-side JavaScript
267
- gr.HTML(get_client_js())
268
-
269
- # Header
270
- gr.Markdown("# 🎤 Real-time Speaker Diarization")
271
- gr.Markdown("Advanced speech recognition with automatic speaker identification")
272
-
273
- # Status indicator
274
- gr.HTML(f"""
275
- <div class="status-indicator">
276
- <span id="status-text" style="color:#666;">Ready to connect</span>
277
- <span id="status-icon" style="width:12px; height:12px; display:inline-block;
278
- background-color:#9E9E9E; border-radius:50%; margin-left:8px;"></span>
279
- </div>
280
- """)
281
-
282
- with gr.Row():
283
- with gr.Column(scale=2):
284
- # Conversation display
285
- gr.HTML(f"""
286
- <div id="conversation" class="conversation-display">
287
- <i>Click 'Start Listening' to begin real-time transcription...</i>
288
- </div>
289
- """)
290
-
291
- # WebRTC component (hidden, but functional)
292
- webrtc = RTCComponent(
293
- url=f"wss://{config.render_url}/stream",
294
- streaming=False,
295
- modality="audio",
296
- mode="send-receive",
297
- audio_html_attrs="style='display:none;'", # Hide the audio element
298
- visible=True, # Make component visible but hide audio element
299
- elements=["video", "start", "stop"] # Don't include audio element
300
- )
301
-
302
- # Control buttons
303
- with gr.Row():
304
- start_btn = gr.Button(
305
- "▶️ Start Listening",
306
- variant="primary",
307
- size="lg",
308
- elem_id="start-btn"
309
- )
310
-
311
- stop_btn = gr.Button(
312
- "⏹️ Stop",
313
- variant="stop",
314
- size="lg",
315
- elem_id="stop-btn"
316
- )
317
-
318
- clear_btn = gr.Button(
319
- "🗑️ Clear",
320
- variant="secondary",
321
- size="lg",
322
- elem_id="clear-btn"
323
- )
324
-
325
- # WebRTC control functions
326
- def start_webrtc():
327
- return {
328
- webrtc: gr.update(streaming=True)
329
- }
330
-
331
- def stop_webrtc():
332
- return {
333
- webrtc: gr.update(streaming=False)
334
- }
335
-
336
- # Connect buttons to both WebRTC and JavaScript functions
337
- start_btn.click(fn=start_webrtc, outputs=[webrtc], js="startListening()")
338
- stop_btn.click(fn=stop_webrtc, outputs=[webrtc], js="stopListening()")
339
- clear_btn.click(fn=None, js="clearConversation()")
340
 
341
- with gr.Column(scale=1):
342
- gr.Markdown("## ⚙️ Settings")
343
-
344
- threshold_slider = gr.Slider(
345
- minimum=0.3,
346
- maximum=0.9,
347
- step=0.05,
348
- value=config.default_threshold,
349
- label="Speaker Change Sensitivity",
350
- info="Lower = more sensitive to speaker changes"
351
- )
352
-
353
- max_speakers_slider = gr.Slider(
354
- minimum=2,
355
- maximum=config.max_speakers_limit,
356
- step=1,
357
- value=config.default_max_speakers,
358
- label="Maximum Speakers"
359
- )
360
-
361
- # Instructions
362
- gr.Markdown("""
363
- ## 📋 How to Use
364
- 1. **Start Listening** - Grant microphone access
365
- 2. **Speak** - System transcribes and identifies speakers
366
- 3. **Stop** when finished
367
- 4. **Clear** to reset conversation
368
-
369
- ## 🎨 Speaker Colors
370
- - 🔴 Speaker 1 - 🟢 Speaker 2 - 🔵 Speaker 3 - 🟡 Speaker 4
371
- - ⭐ Speaker 5 - 🟣 Speaker 6 - 🟤 Speaker 7 - 🟠 Speaker 8
372
- """)
373
-
374
- return demo
375
-
376
- def create_fastapi_app():
377
- """Create the FastAPI backend"""
378
- app = FastAPI(title="Speaker Diarization API")
379
 
380
- @app.websocket("/ws")
381
- async def websocket_endpoint(websocket: WebSocket):
382
- await manager.connect(websocket)
383
  try:
384
- while True:
385
- # Receive audio data
386
- data = await websocket.receive_bytes()
387
-
388
- # Process audio data here
389
- # This is where you'd integrate your actual speaker diarization model
390
- result = await process_audio_chunk(data)
391
-
392
- # Send result back to client
393
- await manager.send_personal_message(
394
- json.dumps(result),
395
- websocket
396
- )
397
-
398
- except WebSocketDisconnect:
399
- manager.disconnect(websocket)
400
  except Exception as e:
401
- logger.error(f"WebSocket error: {e}")
402
- manager.disconnect(websocket)
403
-
404
- @app.post("/clear")
405
- async def clear_conversation():
406
- """Clear the conversation history"""
407
- manager.conversation_history.clear()
408
- await manager.broadcast(json.dumps({
409
- "type": "conversation_cleared"
410
- }))
411
- return {"status": "cleared"}
412
 
413
- @app.get("/health")
414
- async def health_check():
415
- """Health check endpoint"""
416
  return {
417
- "status": "healthy",
418
- "timestamp": datetime.now().isoformat(),
419
- "active_connections": len(manager.active_connections)
 
420
  }
 
 
 
 
 
 
421
 
422
- @app.get("/status")
423
- async def get_status():
424
- """Get system status"""
425
- return {
426
- "status": "online",
427
- "connections": len(manager.active_connections),
428
- "conversation_length": len(manager.conversation_history)
429
- }
 
 
430
 
431
- return app
432
-
433
- async def process_audio_chunk(audio_data: bytes) -> dict:
434
- """
435
- Process audio chunk by forwarding to the backend.
436
- This function is only used for the direct WebSocket API, not for the WebRTC component.
 
 
 
 
 
 
 
 
 
437
 
438
- Note: In production, you should primarily use the WebRTC component which has its own
439
- audio processing flow through the Render backend.
440
- """
441
- try:
442
- # Connect to the Speaker Diarization backend via WebSocket
443
- websocket_url = f"wss://{config.hf_space_url}/ws_inference"
444
- logger.info(f"Forwarding audio to diarization backend at {websocket_url}")
 
 
 
445
 
446
- async with websockets.connect(websocket_url) as websocket:
447
- # Send audio data
448
- await websocket.send(audio_data)
 
 
 
 
 
449
 
450
- # Receive the response
451
- response = await websocket.recv()
 
452
 
453
- # Parse the response
454
- try:
455
- result = json.loads(response)
456
-
457
- # Add to conversation history if it's a transcription
458
- if result.get("type") == "transcription" or result.get("type") == "conversation_update":
459
- if "conversation_html" in result:
460
- manager.conversation_history.append({
461
- "timestamp": datetime.now().isoformat(),
462
- "html": result["conversation_html"]
463
- })
464
-
465
- return result
466
- except json.JSONDecodeError:
467
- logger.error(f"Invalid JSON response: {response}")
468
- return {
469
- "type": "error",
470
- "error": "Invalid response from backend",
471
- "timestamp": datetime.now().isoformat()
472
- }
473
- except Exception as e:
474
- logger.exception(f"Error processing audio chunk: {e}")
475
- return {
476
- "type": "error",
477
- "error": str(e),
478
- "timestamp": datetime.now().isoformat()
479
- }
480
-
481
- # Create both apps
482
- fastapi_app = create_fastapi_app()
483
- gradio_app = create_gradio_app()
484
-
485
- # Root redirect - keep this simple
486
- @fastapi_app.get("/")
487
- def root():
488
- """Redirect root to the Gradio UI"""
489
- return RedirectResponse(url="/ui/") # Note the trailing slash is important
490
-
491
- # Mount Gradio app to FastAPI - use correct mounting method for Gradio
492
- try:
493
- # For newer Gradio versions
494
- fastapi_app.mount("/ui", gradio_app)
495
- except Exception as e:
496
- # Try alternative mounting method
497
- try:
498
- from gradio.routes import mount_gradio_app
499
- app = mount_gradio_app(fastapi_app, gradio_app, path="/ui")
500
- logger.info("Mounted Gradio app using mount_gradio_app")
501
- except Exception as e2:
502
- logger.error(f"Failed to mount Gradio app: {e2}")
503
- # As a last resort, try the simplest mounting
504
- fastapi_app.mount("/ui", gradio_app.app)
505
-
506
- # Add diagnostic endpoints to check connections
507
- @fastapi_app.get("/check-backend")
508
- async def check_backend():
509
- """Check connection to the Render backend"""
510
- try:
511
- # Check if we can connect to the WebSocket endpoint on Render
512
- websocket_url = f"wss://{config.render_url}/stream"
513
- logger.info(f"Checking connection to Render backend at {websocket_url}")
514
 
515
- # Don't actually connect, just return status
516
- return {
517
- "status": "configured",
518
- "render_backend_url": websocket_url,
519
- "hf_space_url": f"wss://{config.hf_space_url}/ws_inference",
520
- "rtc_component_config": {
521
- "url": f"wss://{config.render_url}/stream",
522
- "modality": "audio",
523
- "mode": "send-receive"
 
 
 
 
 
 
 
 
 
 
524
  }
525
- }
526
- except Exception as e:
527
- logger.error(f"Error checking backend: {e}")
528
- return {
529
- "status": "error",
530
- "error": str(e)
531
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
- # Log configuration on startup
534
- @fastapi_app.on_event("startup")
535
- async def log_configuration():
536
- logger.info(f"Starting UI with configuration:")
537
- logger.info(f"- HF Space URL: {config.hf_space_url}")
538
- logger.info(f"- Render URL: {config.render_url}")
539
- logger.info(f"- WebRTC URL: wss://{config.render_url}/stream")
540
- logger.info(f"- WebSocket URL: wss://{config.hf_space_url}/ws_inference")
541
- logger.info("Note: Audio will be streamed through the Render backend using WebRTC")
542
 
543
- # Test connection to Render backend
544
  try:
545
- async with websockets.connect(f"wss://{config.render_url}/stream", ping_interval=None, ping_timeout=None) as ws:
546
- logger.info("Successfully connected to Render backend WebSocket")
547
  except Exception as e:
548
- logger.error(f"Failed to connect to Render backend: {e}")
549
-
550
- # Test connection to HF Space backend
551
- try:
552
- async with websockets.connect(f"wss://{config.hf_space_url}/ws_inference", ping_interval=None, ping_timeout=None) as ws:
553
- logger.info("Successfully connected to HF Space WebSocket")
554
- except Exception as e:
555
- logger.error(f"Failed to connect to HF Space: {e}")
556
 
 
557
  if __name__ == "__main__":
558
- import uvicorn
559
- # Use the correct port for Hugging Face Spaces (7860)
560
- port = int(os.environ.get("PORT", 7860))
561
- logger.info(f"Starting server on port {port}")
562
- uvicorn.run(fastapi_app, host="0.0.0.0", port=port)
 
 
 
1
  import gradio as gr
 
 
2
  import asyncio
3
+ import websockets
4
  import json
5
  import logging
6
+ import time
7
+ from typing import Dict, Any, Optional
8
+ import threading
9
+ from queue import Queue
10
+ import base64
 
11
 
12
  # Configure logging
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
+ class TranscriptionInterface:
17
+ """Interface for real-time transcription and speaker diarization"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def __init__(self):
20
+ self.connected_clients = set()
21
+ self.message_queue = Queue()
22
+ self.is_running = False
23
+ self.websocket_server = None
24
+ self.current_transcript = ""
25
+ self.conversation_history = []
26
+
27
+ async def handle_client(self, websocket, path):
28
+ """Handle WebSocket client connections"""
29
+ client_id = f"client_{int(time.time())}"
30
+ self.connected_clients.add(websocket)
31
+
32
+ logger.info(f"Client connected: {client_id}. Total clients: {len(self.connected_clients)}")
33
+
34
  try:
35
+ # Send connection confirmation
36
+ await websocket.send(json.dumps({
37
+ "type": "connection",
38
+ "status": "connected",
39
+ "timestamp": time.time(),
40
+ "client_id": client_id
41
+ }))
42
+
43
+ async for message in websocket:
44
+ try:
45
+ if isinstance(message, bytes):
46
+ # Handle binary audio data
47
+ await self.process_audio_chunk(message, websocket)
48
+ else:
49
+ # Handle text messages
50
+ data = json.loads(message)
51
+ await self.handle_message(data, websocket)
52
+
53
+ except json.JSONDecodeError:
54
+ logger.warning(f"Invalid JSON received from client: {message}")
55
+ except Exception as e:
56
+ logger.error(f"Error processing message: {e}")
57
+
58
+ except websockets.exceptions.ConnectionClosed:
59
+ logger.info(f"Client {client_id} disconnected")
60
  except Exception as e:
61
+ logger.error(f"Client handler error: {e}")
62
+ finally:
63
+ self.connected_clients.discard(websocket)
64
+ logger.info(f"Client removed. Remaining clients: {len(self.connected_clients)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ async def process_audio_chunk(self, audio_data: bytes, websocket):
67
+ """Process incoming audio data"""
68
+ try:
69
+ # Import inference functions (assuming they exist in your setup)
70
+ from inference import process_audio_for_transcription
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Process the audio chunk
73
+ result = await process_audio_for_transcription(audio_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ if result:
76
+ # Broadcast result to all clients
77
+ await self.broadcast_result({
78
+ "type": "processing_result",
79
+ "timestamp": time.time(),
80
+ "data": result
81
+ })
82
+
83
+ # Update conversation history
84
+ if "transcription" in result:
85
+ self.update_conversation(result)
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ except ImportError:
88
+ logger.warning("Inference module not found - audio processing disabled")
89
+ except Exception as e:
90
+ logger.error(f"Error processing audio chunk: {e}")
91
+ await websocket.send(json.dumps({
92
+ "type": "error",
93
+ "message": f"Audio processing error: {str(e)}",
94
+ "timestamp": time.time()
95
+ }))
96
+
97
+ async def handle_message(self, data: Dict[str, Any], websocket):
98
+ """Handle non-audio messages from clients"""
99
+ message_type = data.get("type", "unknown")
100
+
101
+ if message_type == "config":
102
+ # Handle configuration updates
103
+ logger.info(f"Configuration update: {data}")
 
 
 
 
 
 
 
 
 
104
 
105
+ elif message_type == "request_history":
106
+ # Send conversation history to client
107
+ await websocket.send(json.dumps({
108
+ "type": "conversation_history",
109
+ "data": self.conversation_history,
110
+ "timestamp": time.time()
111
+ }))
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ elif message_type == "clear_history":
114
+ # Clear conversation history
115
+ self.conversation_history = []
116
+ self.current_transcript = ""
117
+ await self.broadcast_result({
118
+ "type": "conversation_update",
119
+ "action": "cleared",
120
+ "timestamp": time.time()
121
+ })
 
 
 
 
122
 
123
+ else:
124
+ logger.warning(f"Unknown message type: {message_type}")
125
+
126
+ async def broadcast_result(self, result: Dict[str, Any]):
127
+ """Broadcast results to all connected clients"""
128
+ if not self.connected_clients:
129
+ return
130
 
131
+ message = json.dumps(result)
132
+ disconnected = set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
+ for client in self.connected_clients.copy():
135
+ try:
136
+ await client.send(message)
137
+ except Exception as e:
138
+ logger.warning(f"Failed to send to client: {e}")
139
+ disconnected.add(client)
 
 
 
 
 
 
140
 
141
+ # Clean up disconnected clients
142
+ for client in disconnected:
143
+ self.connected_clients.discard(client)
 
 
 
144
 
145
+ def update_conversation(self, result: Dict[str, Any]):
146
+ """Update conversation history with new transcription results"""
147
+ if "transcription" in result:
148
+ transcript_data = {
149
+ "timestamp": time.time(),
150
+ "text": result["transcription"],
151
+ "speaker": result.get("speaker", "Unknown"),
152
+ "confidence": result.get("confidence", 0.0)
153
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ self.conversation_history.append(transcript_data)
156
+
157
+ # Keep only last 100 entries to prevent memory issues
158
+ if len(self.conversation_history) > 100:
159
+ self.conversation_history = self.conversation_history[-100:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ async def start_websocket_server(self, host="0.0.0.0", port=7860):
162
+ """Start the WebSocket server"""
 
163
  try:
164
+ self.websocket_server = await websockets.serve(
165
+ self.handle_client,
166
+ host,
167
+ port,
168
+ path="/ws_inference"
169
+ )
170
+ self.is_running = True
171
+ logger.info(f"WebSocket server started on {host}:{port}")
172
+
173
+ # Keep server running
174
+ await self.websocket_server.wait_closed()
175
+
 
 
 
 
176
  except Exception as e:
177
+ logger.error(f"WebSocket server error: {e}")
178
+ self.is_running = False
 
 
 
 
 
 
 
 
 
179
 
180
+ def get_status(self):
181
+ """Get current status information"""
 
182
  return {
183
+ "connected_clients": len(self.connected_clients),
184
+ "is_running": self.is_running,
185
+ "conversation_entries": len(self.conversation_history),
186
+ "last_activity": time.time()
187
  }
188
+
189
+ # Initialize the transcription interface
190
+ transcription_interface = TranscriptionInterface()
191
+
192
+ def create_gradio_interface():
193
+ """Create the Gradio interface"""
194
 
195
+ def get_server_status():
196
+ """Get server status for display"""
197
+ status = transcription_interface.get_status()
198
+ return f"""
199
+ **Server Status:**
200
+ - WebSocket Server: {'Running' if status['is_running'] else 'Stopped'}
201
+ - Connected Clients: {status['connected_clients']}
202
+ - Conversation Entries: {status['conversation_entries']}
203
+ - Last Activity: {time.ctime(status['last_activity'])}
204
+ """
205
 
206
+ def get_conversation_history():
207
+ """Get formatted conversation history"""
208
+ if not transcription_interface.conversation_history:
209
+ return "No conversation history available."
210
+
211
+ formatted_history = []
212
+ for entry in transcription_interface.conversation_history[-10:]: # Show last 10 entries
213
+ timestamp = time.ctime(entry['timestamp'])
214
+ speaker = entry.get('speaker', 'Unknown')
215
+ text = entry.get('text', '')
216
+ confidence = entry.get('confidence', 0.0)
217
+
218
+ formatted_history.append(f"**[{timestamp}] {speaker}** (confidence: {confidence:.2f})\n{text}\n")
219
+
220
+ return "\n".join(formatted_history)
221
 
222
+ def clear_conversation():
223
+ """Clear conversation history"""
224
+ transcription_interface.conversation_history = []
225
+ transcription_interface.current_transcript = ""
226
+ return "Conversation history cleared."
227
+
228
+ # Create Gradio interface
229
+ with gr.Blocks(title="Real-time Audio Transcription & Speaker Diarization") as demo:
230
+ gr.Markdown("# Real-time Audio Transcription & Speaker Diarization")
231
+ gr.Markdown("This Hugging Face Space provides WebSocket endpoints for real-time audio processing.")
232
 
233
+ with gr.Tab("Server Status"):
234
+ status_display = gr.Markdown(get_server_status())
235
+ refresh_btn = gr.Button("Refresh Status")
236
+ refresh_btn.click(get_server_status, outputs=status_display)
237
+
238
+ with gr.Tab("Live Transcription"):
239
+ gr.Markdown("### Live Conversation")
240
+ conversation_display = gr.Markdown(get_conversation_history())
241
 
242
+ with gr.Row():
243
+ refresh_conv_btn = gr.Button("Refresh Conversation")
244
+ clear_conv_btn = gr.Button("Clear History", variant="secondary")
245
 
246
+ refresh_conv_btn.click(get_conversation_history, outputs=conversation_display)
247
+ clear_conv_btn.click(clear_conversation, outputs=conversation_display)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ with gr.Tab("WebSocket Info"):
250
+ gr.Markdown("""
251
+ ### WebSocket Endpoint
252
+ Connect to this Space's WebSocket endpoint for real-time audio processing:
253
+
254
+ **WebSocket URL:** `wss://your-space-name.hf.space/ws_inference`
255
+
256
+ ### Message Format
257
+
258
+ **Audio Data:** Send raw audio bytes directly to the WebSocket
259
+
260
+ **Text Messages:** JSON format
261
+ ```json
262
+ {
263
+ "type": "config",
264
+ "settings": {
265
+ "language": "en",
266
+ "enable_diarization": true
267
+ }
268
  }
269
+ ```
270
+
271
+ ### Response Format
272
+ ```json
273
+ {
274
+ "type": "processing_result",
275
+ "timestamp": 1234567890.123,
276
+ "data": {
277
+ "transcription": "Hello world",
278
+ "speaker": "Speaker_1",
279
+ "confidence": 0.95
280
+ }
281
+ }
282
+ ```
283
+ """)
284
+
285
+ with gr.Tab("API Documentation"):
286
+ gr.Markdown("""
287
+ ### Available Endpoints
288
+
289
+ - **WebSocket:** `/ws_inference` - Main endpoint for real-time audio processing
290
+ - **HTTP:** `/health` - Check server health status
291
+ - **HTTP:** `/stats` - Get detailed statistics
292
+
293
+ ### Integration Example
294
+
295
+ ```javascript
296
+ const ws = new WebSocket('wss://your-space-name.hf.space/ws_inference');
297
+
298
+ ws.onopen = function() {
299
+ console.log('Connected to transcription service');
300
+ };
301
+
302
+ ws.onmessage = function(event) {
303
+ const data = JSON.parse(event.data);
304
+ if (data.type === 'processing_result') {
305
+ console.log('Transcription:', data.data.transcription);
306
+ console.log('Speaker:', data.data.speaker);
307
+ }
308
+ };
309
+
310
+ // Send audio data
311
+ ws.send(audioBuffer);
312
+ ```
313
+ """)
314
+
315
+ return demo
316
 
317
+ def run_websocket_server():
318
+ """Run the WebSocket server in a separate thread"""
319
+ loop = asyncio.new_event_loop()
320
+ asyncio.set_event_loop(loop)
 
 
 
 
 
321
 
 
322
  try:
323
+ loop.run_until_complete(transcription_interface.start_websocket_server())
 
324
  except Exception as e:
325
+ logger.error(f"WebSocket server thread error: {e}")
326
+ finally:
327
+ loop.close()
328
+
329
+ # Start WebSocket server in background thread
330
+ websocket_thread = threading.Thread(target=run_websocket_server, daemon=True)
331
+ websocket_thread.start()
 
332
 
333
+ # Create and launch Gradio interface
334
  if __name__ == "__main__":
335
+ demo = create_gradio_interface()
336
+ demo.launch(
337
+ server_name="0.0.0.0",
338
+ server_port=7860,
339
+ share=False,
340
+ show_error=True
341
+ )