Saiyaswanth007 commited on
Commit
f541218
·
1 Parent(s): 1310d41

Check point 4

Browse files
Files changed (1) hide show
  1. app.py +28 -11
app.py CHANGED
@@ -419,7 +419,7 @@ class RealtimeSpeakerDiarization:
419
  # Setup recorder configuration
420
  recorder_config = {
421
  'spinner': False,
422
- 'use_microphone': False, # Changed to True for direct microphone input
423
  'model': FINAL_TRANSCRIPTION_MODEL,
424
  'language': TRANSCRIPTION_LANGUAGE,
425
  'silero_sensitivity': SILERO_SENSITIVITY,
@@ -558,6 +558,12 @@ class RealtimeSpeakerDiarization:
558
  embedding = self.audio_processor.extract_embedding_from_buffer()
559
  if embedding is not None:
560
  self.speaker_detector.add_embedding(embedding)
 
 
 
 
 
 
561
 
562
  except Exception as e:
563
  logger.error(f"Error processing audio chunk: {e}")
@@ -630,7 +636,18 @@ class DiarizationHandler(AsyncStreamHandler):
630
 
631
  # Global instances
632
  diarization_system = RealtimeSpeakerDiarization()
633
- audio_handler = None
 
 
 
 
 
 
 
 
 
 
 
634
 
635
  def initialize_system():
636
  """Initialize the diarization system"""
@@ -639,6 +656,8 @@ def initialize_system():
639
  success = diarization_system.initialize_models()
640
  if success:
641
  audio_handler = DiarizationHandler(diarization_system)
 
 
642
  return "✅ System initialized successfully!"
643
  else:
644
  return "❌ Failed to initialize system. Check logs for details."
@@ -646,6 +665,13 @@ def initialize_system():
646
  logger.error(f"Initialization error: {e}")
647
  return f"❌ Initialization error: {str(e)}"
648
 
 
 
 
 
 
 
 
649
  def start_recording():
650
  """Start recording and transcription"""
651
  try:
@@ -831,9 +857,6 @@ def create_interface():
831
  return interface
832
 
833
 
834
- # FastAPI setup for FastRTC integration
835
- app = FastAPI()
836
-
837
  @app.get("/")
838
  async def root():
839
  return {"message": "Real-time Speaker Diarization API"}
@@ -875,12 +898,6 @@ async def api_update_settings(threshold: float, max_speakers: int):
875
  result = update_settings(threshold, max_speakers)
876
  return {"result": result}
877
 
878
- # FastRTC Stream setup
879
- if audio_handler:
880
- stream = Stream(handler=audio_handler)
881
- app.include_router(stream.router, prefix="/stream")
882
-
883
-
884
  # Main execution
885
  if __name__ == "__main__":
886
  import argparse
 
419
  # Setup recorder configuration
420
  recorder_config = {
421
  'spinner': False,
422
+ 'use_microphone': False, # Explicitly set to False - we'll feed audio via FastRTC
423
  'model': FINAL_TRANSCRIPTION_MODEL,
424
  'language': TRANSCRIPTION_LANGUAGE,
425
  'silero_sensitivity': SILERO_SENSITIVITY,
 
558
  embedding = self.audio_processor.extract_embedding_from_buffer()
559
  if embedding is not None:
560
  self.speaker_detector.add_embedding(embedding)
561
+
562
+ # Feed audio to the RealtimeSTT recorder
563
+ if self.recorder:
564
+ # Convert float32 audio to int16 bytes format for RealtimeSTT
565
+ audio_bytes = (audio_data * 32768.0).astype(np.int16).tobytes()
566
+ self.recorder.feed_audio(audio_bytes)
567
 
568
  except Exception as e:
569
  logger.error(f"Error processing audio chunk: {e}")
 
636
 
637
  # Global instances
638
  diarization_system = RealtimeSpeakerDiarization()
639
+
640
+ # FastAPI setup for FastRTC integration
641
+ app = FastAPI()
642
+
643
+ # Initialize an empty handler (will be set properly in initialize_system function)
644
+ audio_handler = DiarizationHandler(diarization_system)
645
+
646
+ # Create FastRTC stream
647
+ stream = Stream(handler=audio_handler)
648
+
649
+ # Include FastRTC router in FastAPI app
650
+ app.include_router(stream.router, prefix="/stream")
651
 
652
  def initialize_system():
653
  """Initialize the diarization system"""
 
656
  success = diarization_system.initialize_models()
657
  if success:
658
  audio_handler = DiarizationHandler(diarization_system)
659
+ # Update the stream's handler
660
+ stream.handler = audio_handler
661
  return "✅ System initialized successfully!"
662
  else:
663
  return "❌ Failed to initialize system. Check logs for details."
 
665
  logger.error(f"Initialization error: {e}")
666
  return f"❌ Initialization error: {str(e)}"
667
 
668
+ # Add startup event to initialize the system
669
+ @app.on_event("startup")
670
+ async def startup_event():
671
+ logger.info("Initializing diarization system on startup...")
672
+ result = initialize_system()
673
+ logger.info(f"Initialization result: {result}")
674
+
675
  def start_recording():
676
  """Start recording and transcription"""
677
  try:
 
857
  return interface
858
 
859
 
 
 
 
860
  @app.get("/")
861
  async def root():
862
  return {"message": "Real-time Speaker Diarization API"}
 
898
  result = update_settings(threshold, max_speakers)
899
  return {"result": result}
900
 
 
 
 
 
 
 
901
  # Main execution
902
  if __name__ == "__main__":
903
  import argparse