Spaces:
Sleeping
Sleeping
Commit
·
f541218
1
Parent(s):
1310d41
Check point 4
Browse files
app.py
CHANGED
@@ -419,7 +419,7 @@ class RealtimeSpeakerDiarization:
|
|
419 |
# Setup recorder configuration
|
420 |
recorder_config = {
|
421 |
'spinner': False,
|
422 |
-
'use_microphone': False, #
|
423 |
'model': FINAL_TRANSCRIPTION_MODEL,
|
424 |
'language': TRANSCRIPTION_LANGUAGE,
|
425 |
'silero_sensitivity': SILERO_SENSITIVITY,
|
@@ -558,6 +558,12 @@ class RealtimeSpeakerDiarization:
|
|
558 |
embedding = self.audio_processor.extract_embedding_from_buffer()
|
559 |
if embedding is not None:
|
560 |
self.speaker_detector.add_embedding(embedding)
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
|
562 |
except Exception as e:
|
563 |
logger.error(f"Error processing audio chunk: {e}")
|
@@ -630,7 +636,18 @@ class DiarizationHandler(AsyncStreamHandler):
|
|
630 |
|
631 |
# Global instances
|
632 |
diarization_system = RealtimeSpeakerDiarization()
|
633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
|
635 |
def initialize_system():
|
636 |
"""Initialize the diarization system"""
|
@@ -639,6 +656,8 @@ def initialize_system():
|
|
639 |
success = diarization_system.initialize_models()
|
640 |
if success:
|
641 |
audio_handler = DiarizationHandler(diarization_system)
|
|
|
|
|
642 |
return "✅ System initialized successfully!"
|
643 |
else:
|
644 |
return "❌ Failed to initialize system. Check logs for details."
|
@@ -646,6 +665,13 @@ def initialize_system():
|
|
646 |
logger.error(f"Initialization error: {e}")
|
647 |
return f"❌ Initialization error: {str(e)}"
|
648 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
649 |
def start_recording():
|
650 |
"""Start recording and transcription"""
|
651 |
try:
|
@@ -831,9 +857,6 @@ def create_interface():
|
|
831 |
return interface
|
832 |
|
833 |
|
834 |
-
# FastAPI setup for FastRTC integration
|
835 |
-
app = FastAPI()
|
836 |
-
|
837 |
@app.get("/")
|
838 |
async def root():
|
839 |
return {"message": "Real-time Speaker Diarization API"}
|
@@ -875,12 +898,6 @@ async def api_update_settings(threshold: float, max_speakers: int):
|
|
875 |
result = update_settings(threshold, max_speakers)
|
876 |
return {"result": result}
|
877 |
|
878 |
-
# FastRTC Stream setup
|
879 |
-
if audio_handler:
|
880 |
-
stream = Stream(handler=audio_handler)
|
881 |
-
app.include_router(stream.router, prefix="/stream")
|
882 |
-
|
883 |
-
|
884 |
# Main execution
|
885 |
if __name__ == "__main__":
|
886 |
import argparse
|
|
|
419 |
# Setup recorder configuration
|
420 |
recorder_config = {
|
421 |
'spinner': False,
|
422 |
+
'use_microphone': False, # Explicitly set to False - we'll feed audio via FastRTC
|
423 |
'model': FINAL_TRANSCRIPTION_MODEL,
|
424 |
'language': TRANSCRIPTION_LANGUAGE,
|
425 |
'silero_sensitivity': SILERO_SENSITIVITY,
|
|
|
558 |
embedding = self.audio_processor.extract_embedding_from_buffer()
|
559 |
if embedding is not None:
|
560 |
self.speaker_detector.add_embedding(embedding)
|
561 |
+
|
562 |
+
# Feed audio to the RealtimeSTT recorder
|
563 |
+
if self.recorder:
|
564 |
+
# Convert float32 audio to int16 bytes format for RealtimeSTT
|
565 |
+
audio_bytes = (audio_data * 32768.0).astype(np.int16).tobytes()
|
566 |
+
self.recorder.feed_audio(audio_bytes)
|
567 |
|
568 |
except Exception as e:
|
569 |
logger.error(f"Error processing audio chunk: {e}")
|
|
|
636 |
|
637 |
# Global instances
|
638 |
diarization_system = RealtimeSpeakerDiarization()
|
639 |
+
|
640 |
+
# FastAPI setup for FastRTC integration
|
641 |
+
app = FastAPI()
|
642 |
+
|
643 |
+
# Initialize an empty handler (will be set properly in initialize_system function)
|
644 |
+
audio_handler = DiarizationHandler(diarization_system)
|
645 |
+
|
646 |
+
# Create FastRTC stream
|
647 |
+
stream = Stream(handler=audio_handler)
|
648 |
+
|
649 |
+
# Include FastRTC router in FastAPI app
|
650 |
+
app.include_router(stream.router, prefix="/stream")
|
651 |
|
652 |
def initialize_system():
|
653 |
"""Initialize the diarization system"""
|
|
|
656 |
success = diarization_system.initialize_models()
|
657 |
if success:
|
658 |
audio_handler = DiarizationHandler(diarization_system)
|
659 |
+
# Update the stream's handler
|
660 |
+
stream.handler = audio_handler
|
661 |
return "✅ System initialized successfully!"
|
662 |
else:
|
663 |
return "❌ Failed to initialize system. Check logs for details."
|
|
|
665 |
logger.error(f"Initialization error: {e}")
|
666 |
return f"❌ Initialization error: {str(e)}"
|
667 |
|
668 |
+
# Add startup event to initialize the system
|
669 |
+
@app.on_event("startup")
|
670 |
+
async def startup_event():
|
671 |
+
logger.info("Initializing diarization system on startup...")
|
672 |
+
result = initialize_system()
|
673 |
+
logger.info(f"Initialization result: {result}")
|
674 |
+
|
675 |
def start_recording():
|
676 |
"""Start recording and transcription"""
|
677 |
try:
|
|
|
857 |
return interface
|
858 |
|
859 |
|
|
|
|
|
|
|
860 |
@app.get("/")
|
861 |
async def root():
|
862 |
return {"message": "Real-time Speaker Diarization API"}
|
|
|
898 |
result = update_settings(threshold, max_speakers)
|
899 |
return {"result": result}
|
900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
901 |
# Main execution
|
902 |
if __name__ == "__main__":
|
903 |
import argparse
|