Spaces:

AndroidGuy
/

Speaker-Diarization

Running

App Files Files Community

Saiyaswanth007 commited on May 28

Commit

99ecc54

1 Parent(s): 5c73715

Continous logs of faster-whisper

Browse files

Files changed (3) hide show

inference.py +13 -2
shared.py +32 -3
test_websocket.py +0 -1

inference.py CHANGED Viewed

@@ -69,6 +69,11 @@ class ConnectionManager:
         connection_stats["current_connections"] = len(self.active_connections)
         connection_stats["total_connections"] += 1
         logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
                    f"Total connections: {len(self.active_connections)}")
@@ -83,6 +88,11 @@ class ConnectionManager:
             connection_stats["current_connections"] = len(self.active_connections)
             logger.info(f"WebSocket disconnected: {client_id}. "
                        f"Remaining connections: {len(self.active_connections)}")
@@ -130,8 +140,9 @@ async def initialize_diarization_system():
         if success:
             logger.info("Models initialized successfully")
-            diart.start_recording()
-            logger.info("Recording started")
             return True
         else:
             logger.error("Failed to initialize models")

         connection_stats["current_connections"] = len(self.active_connections)
         connection_stats["total_connections"] += 1
+        # Start recording if this is the first connection and system is ready
+        if len(self.active_connections) == 1 and diart and not diart.is_running:
+            logger.info("First connection established, starting recording")
+            diart.start_recording()
         logger.info(f"WebSocket connected: {self.connection_metadata[websocket]['client_id']}. "
                    f"Total connections: {len(self.active_connections)}")
             connection_stats["current_connections"] = len(self.active_connections)
+            # If no more connections, stop recording to save resources
+            if len(self.active_connections) == 0 and diart and diart.is_running:
+                logger.info("No active connections, stopping recording")
+                diart.stop_recording()
             logger.info(f"WebSocket disconnected: {client_id}. "
                        f"Remaining connections: {len(self.active_connections)}")
         if success:
             logger.info("Models initialized successfully")
+            # Don't start recording yet - wait for an actual connection
+            # diart.start_recording()
+            logger.info("System ready for connections")
             return True
         else:
             logger.error("Failed to initialize models")

shared.py CHANGED Viewed

@@ -347,7 +347,11 @@ class RealtimeSpeakerDiarization:
                     realtime_processing_pause=0,
                     realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
                     on_realtime_transcription_update=self.live_text_detected,
-                    on_recording_stop=self.process_final_text
                 )
                 logger.info("Models initialized successfully!")
@@ -452,8 +456,16 @@ class RealtimeSpeakerDiarization:
             self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
             self.sentence_thread.start()
-            # Start the RealtimeSTT recorder if not already started
-            if self.recorder and not getattr(self.recorder, '_is_running', False):
                 self.recorder.start()
                 logger.info("RealtimeSTT recorder started")
@@ -472,6 +484,10 @@ class RealtimeSpeakerDiarization:
             try:
                 self.recorder.stop()
                 logger.info("RealtimeSTT recorder stopped")
             except Exception as e:
                 logger.error(f"Error stopping recorder: {e}")
@@ -616,6 +632,19 @@ class RealtimeSpeakerDiarization:
             if len(audio_data.shape) > 1:
                 audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
             # Normalize if needed
             if np.max(np.abs(audio_data)) > 1.0:
                 audio_data = audio_data / np.max(np.abs(audio_data))

                     realtime_processing_pause=0,
                     realtime_model_type=REALTIME_TRANSCRIPTION_MODEL,
                     on_realtime_transcription_update=self.live_text_detected,
+                    on_recording_stop=self.process_final_text,
+                    # Add setting to reduce log spam
+                    verbose=False,
+                    # Don't start processing immediately
+                    start_on_init=False
                 )
                 logger.info("Models initialized successfully!")
             self.sentence_thread = threading.Thread(target=self.process_sentence_queue, daemon=True)
             self.sentence_thread.start()
+            # Start the RealtimeSTT recorder explicitly
+            if self.recorder:
+                # First make sure it's stopped if it was running
+                try:
+                    if getattr(self.recorder, '_is_running', False):
+                        self.recorder.stop()
+                except Exception:
+                    pass
+                # Then start it fresh
                 self.recorder.start()
                 logger.info("RealtimeSTT recorder started")
             try:
                 self.recorder.stop()
                 logger.info("RealtimeSTT recorder stopped")
+                # Reset the last transcription
+                with self.transcription_lock:
+                    self.last_transcription = ""
             except Exception as e:
                 logger.error(f"Error stopping recorder: {e}")
             if len(audio_data.shape) > 1:
                 audio_data = np.mean(audio_data, axis=1) if audio_data.shape[1] > 1 else audio_data.flatten()
+            # Check if audio has meaningful content (not just silence)
+            audio_level = np.abs(audio_data).mean()
+            is_silence = audio_level < 0.01  # Threshold for silence
+            # Skip processing for silent audio
+            if is_silence:
+                return {
+                    "status": "silent",
+                    "buffer_size": len(self.audio_processor.audio_buffer),
+                    "speaker_id": self.speaker_detector.current_speaker,
+                    "conversation_html": self.current_conversation
+                }
             # Normalize if needed
             if np.max(np.abs(audio_data)) > 1.0:
                 audio_data = audio_data / np.max(np.abs(audio_data))

test_websocket.py CHANGED Viewed

@@ -15,7 +15,6 @@ async def test_ws():
             audio = (np.random.randn(3200) * 3000).astype(np.int16)
             await websocket.send(audio.tobytes())
             print(f"Sent audio chunk {i+1}/20")
-            await asyncio.sleep(0.05)
         try:
             while True:

             audio = (np.random.randn(3200) * 3000).astype(np.int16)
             await websocket.send(audio.tobytes())
             print(f"Sent audio chunk {i+1}/20")
         try:
             while True: