Speaker-Diarization / inference.py
Saiyaswanth007's picture
Continous logs of faster-whisper
99ecc54
raw
history blame
16.4 kB
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from shared import RealtimeSpeakerDiarization
import numpy as np
import uvicorn
import logging
import asyncio
import json
import time
from typing import Set, Dict, Any
import traceback
# Check for RealtimeSTT and install if needed
try:
from RealtimeSTT import AudioToTextRecorder
except ImportError:
import subprocess
import sys
print("Installing RealtimeSTT dependency...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "RealtimeSTT"])
from RealtimeSTT import AudioToTextRecorder
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI(title="Real-time Speaker Diarization API", version="1.0.0")
# Add CORS middleware for browser compatibility
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global state management
diart = None
active_connections: Set[WebSocket] = set()
connection_stats: Dict[str, Any] = {
"total_connections": 0,
"current_connections": 0,
"last_audio_received": None,
"total_audio_chunks": 0
}
class ConnectionManager:
"""Manages WebSocket connections and broadcasting"""
def __init__(self):
self.active_connections: Set[WebSocket] = set()
self.connection_metadata: Dict[WebSocket, Dict] = {}
async def connect(self, websocket: WebSocket, client_id: str = None):
"""Add a new WebSocket connection"""
await websocket.accept()
self.active_connections.add(websocket)
self.connection_metadata[websocket] = {
"client_id": client_id or f"client_{int(time.time())}",
"connected_at": time.time(),
"messages_sent": 0
}
connection_stats["current_connections"] = len(self.active_connections)
connection_stats["total_connections"] += 1
# Start recording if this is the first connection and system is ready
if len(self.active_connections) == 1 and diart and not diart.is_running:
logger.info("First connection established, starting recording")
diart.start_recording()
logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
f"Total connections: {len(self.active_connections)}")
def disconnect(self, websocket: WebSocket):
"""Remove a WebSocket connection"""
if websocket in self.active_connections:
client_info = self.connection_metadata.get(websocket, {})
client_id = client_info.get("client_id", "unknown")
self.active_connections.discard(websocket)
self.connection_metadata.pop(websocket, None)
connection_stats["current_connections"] = len(self.active_connections)
# If no more connections, stop recording to save resources
if len(self.active_connections) == 0 and diart and diart.is_running:
logger.info("No active connections, stopping recording")
diart.stop_recording()
logger.info(f"WebSocket disconnected: {client_id}. "
f"Remaining connections: {len(self.active_connections)}")
async def broadcast(self, message: str):
"""Broadcast message to all active connections"""
if not self.active_connections:
return
disconnected = set()
for websocket in self.active_connections.copy():
try:
await websocket.send_text(message)
if websocket in self.connection_metadata:
self.connection_metadata[websocket]["messages_sent"] += 1
except Exception as e:
logger.warning(f"Failed to send message to client: {e}")
disconnected.add(websocket)
# Clean up disconnected clients
for ws in disconnected:
self.disconnect(ws)
def get_stats(self):
"""Get connection statistics"""
return {
"active_connections": len(self.active_connections),
"connection_metadata": {
ws_id: meta for ws_id, (ws, meta) in
enumerate(self.connection_metadata.items())
}
}
# Initialize connection manager
manager = ConnectionManager()
async def initialize_diarization_system():
"""Initialize the diarization system with proper error handling"""
global diart
try:
logger.info("Initializing diarization system...")
diart = RealtimeSpeakerDiarization()
success = diart.initialize_models()
if success:
logger.info("Models initialized successfully")
# Don't start recording yet - wait for an actual connection
# diart.start_recording()
logger.info("System ready for connections")
return True
else:
logger.error("Failed to initialize models")
return False
except Exception as e:
logger.error(f"Error initializing diarization system: {e}")
logger.error(traceback.format_exc())
return False
async def send_conversation_updates():
"""Periodically send conversation updates to all connected clients"""
update_interval = 0.5 # 500ms update intervals
last_conversation_hash = None
while True:
try:
if diart and diart.is_running and manager.active_connections:
# Get current conversation
conversation_html = diart.get_formatted_conversation()
# Only send if conversation has changed (to reduce bandwidth)
conversation_hash = hash(conversation_html)
if conversation_hash != last_conversation_hash:
# Create structured message
update_message = json.dumps({
"type": "conversation_update",
"timestamp": time.time(),
"conversation_html": conversation_html,
"status": diart.get_status_info() if hasattr(diart, 'get_status_info') else {}
})
await manager.broadcast(update_message)
last_conversation_hash = conversation_hash
except Exception as e:
logger.error(f"Error in conversation update: {e}")
await asyncio.sleep(update_interval)
@app.on_event("startup")
async def startup_event():
"""Initialize system on startup"""
logger.info("Starting Real-time Speaker Diarization Service")
# Initialize diarization system
success = await initialize_diarization_system()
if not success:
logger.error("Failed to initialize diarization system!")
# Start background update task
asyncio.create_task(send_conversation_updates())
logger.info("Background tasks started")
@app.on_event("shutdown")
async def shutdown_event():
"""Clean up on shutdown"""
logger.info("Shutting down...")
if diart:
try:
diart.stop_recording()
logger.info("Recording stopped")
# Shutdown RealtimeSTT properly if available
if hasattr(diart, 'recorder') and diart.recorder:
try:
diart.recorder.shutdown()
logger.info("Transcription model shut down")
except Exception as e:
logger.error(f"Error shutting down transcription model: {e}")
except Exception as e:
logger.error(f"Error stopping recording: {e}")
@app.get("/")
async def root():
"""Root endpoint with service information"""
return {
"service": "Real-time Speaker Diarization API",
"version": "1.0.0",
"status": "running" if diart and diart.is_running else "initializing",
"endpoints": {
"websocket": "/ws_inference",
"health": "/health",
"conversation": "/conversation",
"status": "/status"
}
}
@app.get("/health")
@app.head("/health")
async def health_check():
"""Comprehensive health check endpoint"""
system_healthy = diart and diart.is_running
return {
"status": "healthy" if system_healthy else "unhealthy",
"system_running": system_healthy,
"active_connections": len(manager.active_connections),
"connection_stats": connection_stats,
"diarization_status": diart.get_status_info() if diart and hasattr(diart, 'get_status_info') else {}
}
@app.websocket("/ws_inference")
async def ws_inference(websocket: WebSocket):
"""WebSocket endpoint for real-time audio processing"""
client_id = f"client_{int(time.time())}"
try:
await manager.connect(websocket, client_id)
# Send initial connection confirmation
initial_message = json.dumps({
"type": "connection_established",
"client_id": client_id,
"system_status": "ready" if diart and diart.is_running else "initializing",
"conversation": diart.get_formatted_conversation() if diart else ""
})
await websocket.send_text(initial_message)
# Process incoming audio data
async for data in websocket.iter_bytes():
try:
if data and diart and diart.is_running:
# Update statistics
connection_stats["last_audio_received"] = time.time()
connection_stats["total_audio_chunks"] += 1
# Process audio chunk
result = diart.process_audio_chunk(data)
# Send processing result back to client
if result:
# Ensure all numeric values are JSON serializable
for key in result:
if isinstance(result[key], np.number):
result[key] = result[key].item()
result_message = json.dumps({
"type": "processing_result",
"timestamp": time.time(),
"data": result
})
await websocket.send_text(result_message)
# Log processing result (optional)
if connection_stats["total_audio_chunks"] % 100 == 0: # Log every 100 chunks
logger.debug(f"Processed {connection_stats['total_audio_chunks']} audio chunks")
elif not diart:
logger.warning("Received audio data but diarization system not initialized")
error_message = json.dumps({
"type": "error",
"message": "Diarization system not initialized",
"timestamp": time.time()
})
await websocket.send_text(error_message)
except Exception as e:
logger.error(f"Error processing audio chunk: {e}")
# Send error message to client
error_message = json.dumps({
"type": "error",
"message": "Error processing audio",
"details": str(e),
"timestamp": time.time()
})
await websocket.send_text(error_message)
except WebSocketDisconnect:
logger.info(f"WebSocket {client_id} disconnected normally")
except Exception as e:
logger.error(f"WebSocket {client_id} error: {e}")
finally:
manager.disconnect(websocket)
@app.get("/conversation")
@app.head("/conversation")
async def get_conversation():
"""Get the current conversation as HTML"""
if not diart:
raise HTTPException(status_code=503, detail="Diarization system not initialized")
try:
conversation = diart.get_formatted_conversation()
return {
"conversation": conversation,
"timestamp": time.time(),
"system_status": diart.get_status_info() if hasattr(diart, 'get_status_info') else {}
}
except Exception as e:
logger.error(f"Error getting conversation: {e}")
raise HTTPException(status_code=500, detail="Error retrieving conversation")
@app.get("/status")
@app.head("/status")
async def get_status():
"""Get comprehensive system status information"""
if not diart:
return {"status": "system_not_initialized"}
try:
base_status = diart.get_status_info() if hasattr(diart, 'get_status_info') else {}
return {
**base_status,
"connection_stats": connection_stats,
"active_connections": len(manager.active_connections),
"system_uptime": time.time() - connection_stats.get("system_start_time", time.time())
}
except Exception as e:
logger.error(f"Error getting status: {e}")
return {"status": "error", "message": str(e)}
@app.post("/settings")
async def update_settings(threshold: float = None, max_speakers: int = None):
"""Update speaker detection settings"""
if not diart:
raise HTTPException(status_code=503, detail="Diarization system not initialized")
try:
# Validate parameters
if threshold is not None and (threshold < 0 or threshold > 1):
raise HTTPException(status_code=400, detail="Threshold must be between 0 and 1")
if max_speakers is not None and (max_speakers < 1 or max_speakers > 20):
raise HTTPException(status_code=400, detail="Max speakers must be between 1 and 20")
result = diart.update_settings(threshold, max_speakers)
return {
"result": result,
"updated_settings": {
"threshold": threshold,
"max_speakers": max_speakers
}
}
except Exception as e:
logger.error(f"Error updating settings: {e}")
raise HTTPException(status_code=500, detail="Error updating settings")
@app.post("/clear")
async def clear_conversation():
"""Clear the conversation history"""
if not diart:
raise HTTPException(status_code=503, detail="Diarization system not initialized")
try:
result = diart.clear_conversation()
# Notify all connected clients about the clear
clear_message = json.dumps({
"type": "conversation_cleared",
"timestamp": time.time()
})
await manager.broadcast(clear_message)
return {"result": result, "message": "Conversation cleared successfully"}
except Exception as e:
logger.error(f"Error clearing conversation: {e}")
raise HTTPException(status_code=500, detail="Error clearing conversation")
@app.get("/stats")
async def get_connection_stats():
"""Get detailed connection statistics"""
return {
"connection_stats": connection_stats,
"manager_stats": manager.get_stats(),
"system_info": {
"diarization_running": diart.is_running if diart else False,
"total_active_connections": len(manager.active_connections)
}
}
# Mount UI if available
try:
import ui
ui.mount_ui(app)
logger.info("Gradio UI mounted successfully")
except ImportError:
logger.warning("UI module not found, running in API-only mode")
except Exception as e:
logger.error(f"Error mounting UI: {e}")
# Initialize system start time
connection_stats["system_start_time"] = time.time()
if __name__ == "__main__":
uvicorn.run(
app,
host="0.0.0.0",
port=7860,
log_level="info",
access_log=True
)