Spaces:
Running
Running
Commit
·
99ecc54
1
Parent(s):
5c73715
Continous logs of faster-whisper
Browse files- inference.py +13 -2
- shared.py +32 -3
- test_websocket.py +0 -1
inference.py
CHANGED
@@ -69,6 +69,11 @@ class ConnectionManager:
|
|
69 |
connection_stats["current_connections"] = len(self.active_connections)
|
70 |
connection_stats["total_connections"] += 1
|
71 |
|
|
|
|
|
|
|
|
|
|
|
72 |
logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
|
73 |
f"Total connections: {len(self.active_connections)}")
|
74 |
|
@@ -83,6 +88,11 @@ class ConnectionManager:
|
|
83 |
|
84 |
connection_stats["current_connections"] = len(self.active_connections)
|
85 |
|
|
|
|
|
|
|
|
|
|
|
86 |
logger.info(f"WebSocket disconnected: {client_id}. "
|
87 |
f"Remaining connections: {len(self.active_connections)}")
|
88 |
|
@@ -130,8 +140,9 @@ async def initialize_diarization_system():
|
|
130 |
|
131 |
if success:
|
132 |
logger.info("Models initialized successfully")
|
133 |
-
|
134 |
-
|
|
|
135 |
return True
|
136 |
else:
|
137 |
logger.error("Failed to initialize models")
|
|
|
69 |
connection_stats["current_connections"] = len(self.active_connections)
|
70 |
connection_stats["total_connections"] += 1
|
71 |
|
72 |
+
# Start recording if this is the first connection and system is ready
|
73 |
+
if len(self.active_connections) == 1 and diart and not diart.is_running:
|
74 |
+
logger.info("First connection established, starting recording")
|
75 |
+
diart.start_recording()
|
76 |
+
|
77 |
logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
|
78 |
f"Total connections: {len(self.active_connections)}")
|
79 |
|
|
|
88 |
|
89 |
connection_stats["current_connections"] = len(self.active_connections)
|
90 |
|
91 |
+
# If no more connections, stop recording to save resources
|
92 |
+
if len(self.active_connections) == 0 and diart and diart.is_running:
|
93 |
+
logger.info("No active connections, stopping recording")
|
94 |
+
diart.stop_recording()
|
95 |
+
|
96 |
logger.info(f"WebSocket disconnected: {client_id}. "
|
97 |
f"Remaining connections: {len(self.active_connections)}")
|
98 |
|
|
|
140 |
|
141 |
if success:
|
142 |
logger.info("Models initialized successfully")
|
143 |
+
# Don't start recording yet - wait for an actual connection
|
144 |
+
# diart.start_recording()
|
145 |
+
logger.info("System ready for connections")
|
146 |
return True
|
147 |
else:
|
148 |
logger.error("Failed to initialize models")
|
shared.py
CHANGED
@@ -347,7 +347,11 @@ class RealtimeSpeakerDiarization:
|
|
347 |
realtime_processing_pause=0,
|
348 |
realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
|
349 |
on_realtime_transcription_update=self.live_text_detected,
|
350 |
-
on_recording_stop=self.process_final_text
|
|
|
|
|
|
|
|
|
351 |
)
|
352 |
|
353 |
logger.info("Models initialized successfully!")
|
@@ -452,8 +456,16 @@ class RealtimeSpeakerDiarization:
|
|
452 |
self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
|
453 |
self.sentence_thread.start()
|
454 |
|
455 |
-
# Start the RealtimeSTT recorder
|
456 |
-
if self.recorder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
self.recorder.start()
|
458 |
logger.info("RealtimeSTT recorder started")
|
459 |
|
@@ -472,6 +484,10 @@ class RealtimeSpeakerDiarization:
|
|
472 |
try:
|
473 |
self.recorder.stop()
|
474 |
logger.info("RealtimeSTT recorder stopped")
|
|
|
|
|
|
|
|
|
475 |
except Exception as e:
|
476 |
logger.error(f"Error stopping recorder: {e}")
|
477 |
|
@@ -616,6 +632,19 @@ class RealtimeSpeakerDiarization:
|
|
616 |
if len(audio_data.shape) > 1:
|
617 |
audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
|
618 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
619 |
# Normalize if needed
|
620 |
if np.max(np.abs(audio_data)) > 1.0:
|
621 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
|
|
347 |
realtime_processing_pause=0,
|
348 |
realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
|
349 |
on_realtime_transcription_update=self.live_text_detected,
|
350 |
+
on_recording_stop=self.process_final_text,
|
351 |
+
# Add setting to reduce log spam
|
352 |
+
verbose=False,
|
353 |
+
# Don't start processing immediately
|
354 |
+
start_on_init=False
|
355 |
)
|
356 |
|
357 |
logger.info("Models initialized successfully!")
|
|
|
456 |
self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
|
457 |
self.sentence_thread.start()
|
458 |
|
459 |
+
# Start the RealtimeSTT recorder explicitly
|
460 |
+
if self.recorder:
|
461 |
+
# First make sure it's stopped if it was running
|
462 |
+
try:
|
463 |
+
if getattr(self.recorder, '_is_running', False):
|
464 |
+
self.recorder.stop()
|
465 |
+
except Exception:
|
466 |
+
pass
|
467 |
+
|
468 |
+
# Then start it fresh
|
469 |
self.recorder.start()
|
470 |
logger.info("RealtimeSTT recorder started")
|
471 |
|
|
|
484 |
try:
|
485 |
self.recorder.stop()
|
486 |
logger.info("RealtimeSTT recorder stopped")
|
487 |
+
|
488 |
+
# Reset the last transcription
|
489 |
+
with self.transcription_lock:
|
490 |
+
self.last_transcription = ""
|
491 |
except Exception as e:
|
492 |
logger.error(f"Error stopping recorder: {e}")
|
493 |
|
|
|
632 |
if len(audio_data.shape) > 1:
|
633 |
audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
|
634 |
|
635 |
+
# Check if audio has meaningful content (not just silence)
|
636 |
+
audio_level = np.abs(audio_data).mean()
|
637 |
+
is_silence = audio_level < 0.01 # Threshold for silence
|
638 |
+
|
639 |
+
# Skip processing for silent audio
|
640 |
+
if is_silence:
|
641 |
+
return {
|
642 |
+
"status": "silent",
|
643 |
+
"buffer_size": len(self.audio_processor.audio_buffer),
|
644 |
+
"speaker_id": self.speaker_detector.current_speaker,
|
645 |
+
"conversation_html": self.current_conversation
|
646 |
+
}
|
647 |
+
|
648 |
# Normalize if needed
|
649 |
if np.max(np.abs(audio_data)) > 1.0:
|
650 |
audio_data = audio_data / np.max(np.abs(audio_data))
|
test_websocket.py
CHANGED
@@ -15,7 +15,6 @@ async def test_ws():
|
|
15 |
audio = (np.random.randn(3200) * 3000).astype(np.int16)
|
16 |
await websocket.send(audio.tobytes())
|
17 |
print(f"Sent audio chunk {i+1}/20")
|
18 |
-
await asyncio.sleep(0.05)
|
19 |
|
20 |
try:
|
21 |
while True:
|
|
|
15 |
audio = (np.random.randn(3200) * 3000).astype(np.int16)
|
16 |
await websocket.send(audio.tobytes())
|
17 |
print(f"Sent audio chunk {i+1}/20")
|
|
|
18 |
|
19 |
try:
|
20 |
while True:
|