Saiyaswanth007 commited on
Commit
35b21b4
·
1 Parent(s): 29eb5aa

Revert portg

Browse files
Files changed (1) hide show
  1. app.py +200 -108
app.py CHANGED
@@ -562,53 +562,89 @@ class RealtimeSpeakerDiarization:
562
 
563
  # FastRTC Audio Handler
564
  # FastRTC Audio Handler for Real-time Diarization
 
 
 
 
 
 
 
 
 
 
 
565
 
566
-
567
- class DiarizationHandler:
568
  def __init__(self, diarization_system):
 
569
  self.diarization_system = diarization_system
570
  self.audio_queue = Queue()
571
  self.is_processing = False
 
572
 
573
  def copy(self):
574
- # Return a fresh handler for each new stream connection
575
  return DiarizationHandler(self.diarization_system)
576
 
577
- async def on_audio_frame(self, frame: AudioFrame):
578
- """Handle incoming audio frames from FastRTC"""
 
 
 
 
579
  try:
580
- if self.diarization_system.is_running and frame.data is not None:
581
- # Convert audio frame to numpy array
582
- if isinstance(frame.data, bytes):
583
- # Convert bytes to numpy array (assuming 16-bit PCM)
584
- audio_data = np.frombuffer(frame.data, dtype=np.int16)
585
- elif hasattr(frame, 'to_ndarray'):
586
- audio_data = frame.to_ndarray()
587
- else:
588
- audio_data = np.array(frame.data, dtype=np.float32)
589
-
590
- # Ensure audio is in the right format (mono, float32, -1 to 1 range)
591
- if audio_data.dtype == np.int16:
592
- audio_data = audio_data.astype(np.float32) / 32768.0
593
 
594
- # If stereo, convert to mono
595
- if len(audio_data.shape) > 1:
596
- audio_data = np.mean(audio_data, axis=1)
597
-
598
- # Feed to diarization system
599
- await self.process_audio_async(audio_data, frame.sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
  except Exception as e:
602
- print(f"Error processing audio frame: {e}")
 
 
603
 
604
  async def process_audio_async(self, audio_data, sample_rate=16000):
605
  """Process audio data asynchronously"""
606
  try:
607
- # Run in thread pool to avoid blocking
608
  loop = asyncio.get_event_loop()
609
  await loop.run_in_executor(
610
  None,
611
- self.diarization_system.feed_audio_data,
612
  audio_data,
613
  sample_rate
614
  )
@@ -616,15 +652,19 @@ class DiarizationHandler:
616
  print(f"Error in async audio processing: {e}")
617
 
618
 
619
- # Global instance
620
- diarization_system = RealtimeSpeakerDiarization()
621
  audio_handler = None
622
 
623
 
624
  def initialize_system():
625
  """Initialize the diarization system"""
626
- global audio_handler
627
  try:
 
 
 
 
628
  success = diarization_system.initialize_models()
629
  if success:
630
  audio_handler = DiarizationHandler(diarization_system)
@@ -632,12 +672,15 @@ def initialize_system():
632
  else:
633
  return "❌ Failed to initialize system. Please check the logs."
634
  except Exception as e:
 
635
  return f"❌ Initialization error: {str(e)}"
636
 
637
 
638
  def start_recording():
639
  """Start recording and transcription"""
640
  try:
 
 
641
  result = diarization_system.start_recording()
642
  return f"🎙️ {result} - FastRTC audio streaming is active."
643
  except Exception as e:
@@ -647,6 +690,8 @@ def start_recording():
647
  def stop_recording():
648
  """Stop recording and transcription"""
649
  try:
 
 
650
  result = diarization_system.stop_recording()
651
  return f"⏹️ {result}"
652
  except Exception as e:
@@ -656,6 +701,8 @@ def stop_recording():
656
  def clear_conversation():
657
  """Clear the conversation"""
658
  try:
 
 
659
  result = diarization_system.clear_conversation()
660
  return f"🗑️ {result}"
661
  except Exception as e:
@@ -665,6 +712,8 @@ def clear_conversation():
665
  def update_settings(threshold, max_speakers):
666
  """Update system settings"""
667
  try:
 
 
668
  result = diarization_system.update_settings(threshold, max_speakers)
669
  return f"⚙️ {result}"
670
  except Exception as e:
@@ -674,6 +723,8 @@ def update_settings(threshold, max_speakers):
674
  def get_conversation():
675
  """Get the current conversation"""
676
  try:
 
 
677
  return diarization_system.get_formatted_conversation()
678
  except Exception as e:
679
  return f"<i>Error getting conversation: {str(e)}</i>"
@@ -682,6 +733,8 @@ def get_conversation():
682
  def get_status():
683
  """Get system status"""
684
  try:
 
 
685
  return diarization_system.get_status_info()
686
  except Exception as e:
687
  return f"Error getting status: {str(e)}"
@@ -709,12 +762,17 @@ def create_interface():
709
  stop_btn = gr.Button("⏹️ Stop Recording", variant="stop", size="lg", interactive=False)
710
  clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg", interactive=False)
711
 
712
- # Audio connection status
713
  with gr.Row():
714
- connection_status = gr.HTML(
715
- value="<div style='padding: 10px; background: #fff3cd; border-radius: 5px;'>🔌 FastRTC: Not connected</div>",
716
- label="Connection Status"
717
- )
 
 
 
 
 
718
 
719
  # Status display
720
  status_output = gr.Textbox(
@@ -749,26 +807,39 @@ def create_interface():
749
  update_settings_btn = gr.Button("Update Settings", variant="secondary")
750
 
751
  # Audio settings
752
- gr.Markdown("## 🔊 Audio Settings")
753
- gr.Markdown("""
754
- **Recommended settings:**
755
- - Use a good quality microphone
756
- - Ensure stable internet connection
757
- - Speak clearly and avoid background noise
758
- - Position microphone 6-12 inches from mouth
759
- """)
 
 
760
 
761
  # Instructions
762
  gr.Markdown("## 📝 How to Use")
763
  gr.Markdown("""
764
  1. **Initialize**: Click "Initialize System" to load AI models
765
- 2. **Connect**: Allow microphone access when prompted
766
- 3. **Start**: Click "Start Recording" to begin processing
767
- 4. **Speak**: Talk into your microphone naturally
768
- 5. **Monitor**: Watch real-time transcription with speaker labels
769
- 6. **Adjust**: Fine-tune settings as needed
770
  """)
771
 
 
 
 
 
 
 
 
 
 
 
 
772
  # Speaker color legend
773
  gr.Markdown("## 🎨 Speaker Colors")
774
  speaker_colors = [
@@ -784,28 +855,19 @@ def create_interface():
784
 
785
  color_html = ""
786
  for i, (color, name) in enumerate(speaker_colors[:4]):
787
- color_html += f'<div style="display: inline-block; margin: 5px;"><span style="color:{color}; font-size: 20px;">●</span> Speaker {i+1} ({name})</div><br>'
788
 
789
- gr.HTML(color_html)
790
 
791
  # Auto-refresh conversation and status
792
  def refresh_display():
793
  try:
794
  conversation = get_conversation()
795
  status = get_status()
796
-
797
- # Update connection status based on system state
798
- if diarization_system.is_running:
799
- conn_status = "<div style='padding: 10px; background: #d4edda; border-radius: 5px;'>🟢 FastRTC: Connected & Recording</div>"
800
- elif hasattr(diarization_system, 'encoder') and diarization_system.encoder is not None:
801
- conn_status = "<div style='padding: 10px; background: #d1ecf1; border-radius: 5px;'>🔵 FastRTC: Ready to connect</div>"
802
- else:
803
- conn_status = "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>🔴 FastRTC: System not initialized</div>"
804
-
805
- return conversation, status, conn_status
806
  except Exception as e:
807
  error_msg = f"Error refreshing display: {str(e)}"
808
- return f"<i>{error_msg}</i>", error_msg, "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Error</div>"
809
 
810
  # Event handlers
811
  def on_initialize():
@@ -813,14 +875,13 @@ def create_interface():
813
  result = initialize_system()
814
  success = "successfully" in result.lower()
815
 
816
- conversation, status, conn_status = refresh_display()
817
 
818
  return (
819
  result, # status_output
820
  gr.update(interactive=success), # start_btn
821
  gr.update(interactive=success), # clear_btn
822
  conversation, # conversation_output
823
- conn_status # connection_status
824
  )
825
  except Exception as e:
826
  error_msg = f"❌ Initialization failed: {str(e)}"
@@ -829,19 +890,15 @@ def create_interface():
829
  gr.update(interactive=False),
830
  gr.update(interactive=False),
831
  "<i>System not ready</i>",
832
- "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Initialization failed</div>"
833
  )
834
 
835
  def on_start():
836
  try:
837
  result = start_recording()
838
- conversation, status, conn_status = refresh_display()
839
-
840
  return (
841
  result, # status_output
842
  gr.update(interactive=False), # start_btn
843
  gr.update(interactive=True), # stop_btn
844
- conn_status # connection_status
845
  )
846
  except Exception as e:
847
  error_msg = f"❌ Failed to start: {str(e)}"
@@ -849,19 +906,15 @@ def create_interface():
849
  error_msg,
850
  gr.update(interactive=True),
851
  gr.update(interactive=False),
852
- "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Start failed</div>"
853
  )
854
 
855
  def on_stop():
856
  try:
857
  result = stop_recording()
858
- conversation, status, conn_status = refresh_display()
859
-
860
  return (
861
  result, # status_output
862
  gr.update(interactive=True), # start_btn
863
  gr.update(interactive=False), # stop_btn
864
- conn_status # connection_status
865
  )
866
  except Exception as e:
867
  error_msg = f"❌ Failed to stop: {str(e)}"
@@ -869,13 +922,12 @@ def create_interface():
869
  error_msg,
870
  gr.update(interactive=False),
871
  gr.update(interactive=True),
872
- "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Stop failed</div>"
873
  )
874
 
875
  def on_clear():
876
  try:
877
  result = clear_conversation()
878
- conversation, status, conn_status = refresh_display()
879
  return result, conversation
880
  except Exception as e:
881
  error_msg = f"❌ Failed to clear: {str(e)}"
@@ -891,17 +943,17 @@ def create_interface():
891
  # Connect event handlers
892
  init_btn.click(
893
  on_initialize,
894
- outputs=[status_output, start_btn, clear_btn, conversation_output, connection_status]
895
  )
896
 
897
  start_btn.click(
898
  on_start,
899
- outputs=[status_output, start_btn, stop_btn, connection_status]
900
  )
901
 
902
  stop_btn.click(
903
  on_stop,
904
- outputs=[status_output, start_btn, stop_btn, connection_status]
905
  )
906
 
907
  clear_btn.click(
@@ -919,15 +971,15 @@ def create_interface():
919
  refresh_timer = gr.Timer(2.0)
920
  refresh_timer.tick(
921
  refresh_display,
922
- outputs=[conversation_output, status_output, connection_status]
923
  )
924
 
925
  return interface
926
 
927
 
928
- # FastAPI setup for HuggingFace Spaces
929
  def create_fastapi_app():
930
- """Create FastAPI app with proper FastRTC integration"""
931
  app = FastAPI(
932
  title="Real-time Speaker Diarization",
933
  description="Real-time speech recognition with speaker diarization using FastRTC",
@@ -943,8 +995,8 @@ def create_fastapi_app():
943
  return {
944
  "status": "healthy",
945
  "timestamp": time.time(),
946
- "system_initialized": hasattr(diarization_system, 'encoder') and diarization_system.encoder is not None,
947
- "recording_active": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False
948
  }
949
 
950
  @router.get("/api/conversation")
@@ -954,7 +1006,7 @@ def create_fastapi_app():
954
  return {
955
  "conversation": get_conversation(),
956
  "status": get_status(),
957
- "is_recording": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False,
958
  "timestamp": time.time()
959
  }
960
  except Exception as e:
@@ -977,40 +1029,62 @@ def create_fastapi_app():
977
 
978
  return {
979
  "result": result,
980
- "is_recording": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False,
981
  "timestamp": time.time()
982
  }
983
  except Exception as e:
984
  return {"error": str(e), "timestamp": time.time()}
985
 
986
- # FastRTC WebSocket endpoint for audio streaming
987
- @router.websocket("/ws/audio")
988
- async def websocket_audio_endpoint(websocket):
989
- """WebSocket endpoint for FastRTC audio streaming"""
990
- await websocket.accept()
991
-
992
- try:
993
- while True:
994
- # Receive audio data from FastRTC client
995
- data = await websocket.receive_bytes()
996
-
997
- if audio_handler and diarization_system.is_running:
998
- # Create audio frame and process
999
- frame = AudioFrame(data=data, sample_rate=16000)
1000
- await audio_handler.on_audio_frame(frame)
1001
-
1002
- except Exception as e:
1003
- print(f"WebSocket error: {e}")
1004
- finally:
1005
- await websocket.close()
1006
-
1007
  app.include_router(router)
1008
  return app
1009
 
1010
 
1011
- # Main application entry point
1012
- def create_app():
1013
- """Create the complete application for HuggingFace Spaces"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1014
 
1015
  # Create FastAPI app
1016
  fastapi_app = create_fastapi_app()
@@ -1021,12 +1095,19 @@ def create_app():
1021
  # Mount Gradio on FastAPI
1022
  app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
1023
 
 
 
 
1024
  return app, gradio_interface
1025
 
1026
 
1027
  # Entry point for HuggingFace Spaces
1028
  if __name__ == "__main__":
1029
  try:
 
 
 
 
1030
  # Create the application
1031
  app, interface = create_app()
1032
 
@@ -1041,6 +1122,9 @@ if __name__ == "__main__":
1041
 
1042
  except Exception as e:
1043
  print(f"Failed to launch application: {e}")
 
 
 
1044
  # Fallback - launch just Gradio interface
1045
  try:
1046
  interface = create_interface()
@@ -1050,4 +1134,12 @@ if __name__ == "__main__":
1050
  share=False
1051
  )
1052
  except Exception as fallback_error:
1053
- print(f"Fallback launch also failed: {fallback_error}")
 
 
 
 
 
 
 
 
 
562
 
563
  # FastRTC Audio Handler
564
  # FastRTC Audio Handler for Real-time Diarization
565
+ # FastRTC Audio Handler for Real-time Diarization
566
+ import asyncio
567
+ import numpy as np
568
+ from fastrtc import AsyncStreamHandler, Stream
569
+ from fastapi import FastAPI, APIRouter
570
+ import gradio as gr
571
+ import time
572
+ import os
573
+ import threading
574
+ from queue import Queue
575
+ import json
576
 
577
+ class DiarizationHandler(AsyncStreamHandler):
 
578
  def __init__(self, diarization_system):
579
+ super().__init__()
580
  self.diarization_system = diarization_system
581
  self.audio_queue = Queue()
582
  self.is_processing = False
583
+ self.sample_rate = 16000 # Default sample rate
584
 
585
  def copy(self):
586
+ """Return a fresh handler for each new stream connection"""
587
  return DiarizationHandler(self.diarization_system)
588
 
589
+ async def emit(self):
590
+ """Not used in this implementation - we only receive audio"""
591
+ return None
592
+
593
+ async def receive(self, frame):
594
+ """Receive audio data from FastRTC and process it"""
595
  try:
596
+ if not self.diarization_system.is_running:
597
+ return
 
 
 
 
 
 
 
 
 
 
 
598
 
599
+ # Extract audio data from frame
600
+ if hasattr(frame, 'data') and frame.data is not None:
601
+ audio_data = frame.data
602
+ elif hasattr(frame, 'audio') and frame.audio is not None:
603
+ audio_data = frame.audio
604
+ else:
605
+ audio_data = frame
606
+
607
+ # Convert to numpy array if needed
608
+ if isinstance(audio_data, bytes):
609
+ # Convert bytes to numpy array (assuming 16-bit PCM)
610
+ audio_array = np.frombuffer(audio_data, dtype=np.int16)
611
+ # Normalize to float32 range [-1, 1]
612
+ audio_array = audio_array.astype(np.float32) / 32768.0
613
+ elif isinstance(audio_data, (list, tuple)):
614
+ audio_array = np.array(audio_data, dtype=np.float32)
615
+ elif isinstance(audio_data, np.ndarray):
616
+ audio_array = audio_data.astype(np.float32)
617
+ else:
618
+ print(f"Unknown audio data type: {type(audio_data)}")
619
+ return
620
+
621
+ # Ensure mono audio
622
+ if len(audio_array.shape) > 1 and audio_array.shape[1] > 1:
623
+ audio_array = np.mean(audio_array, axis=1)
624
+
625
+ # Ensure 1D array
626
+ if len(audio_array.shape) > 1:
627
+ audio_array = audio_array.flatten()
628
+
629
+ # Get sample rate from frame if available
630
+ sample_rate = getattr(frame, 'sample_rate', self.sample_rate)
631
+
632
+ # Process audio asynchronously to avoid blocking
633
+ await self.process_audio_async(audio_array, sample_rate)
634
 
635
  except Exception as e:
636
+ print(f"Error in FastRTC audio receive: {e}")
637
+ import traceback
638
+ traceback.print_exc()
639
 
640
  async def process_audio_async(self, audio_data, sample_rate=16000):
641
  """Process audio data asynchronously"""
642
  try:
643
+ # Run the audio processing in a thread pool to avoid blocking
644
  loop = asyncio.get_event_loop()
645
  await loop.run_in_executor(
646
  None,
647
+ self.diarization_system.process_audio_chunk,
648
  audio_data,
649
  sample_rate
650
  )
 
652
  print(f"Error in async audio processing: {e}")
653
 
654
 
655
+ # Global instances
656
+ diarization_system = None # Will be initialized when RealtimeSpeakerDiarization is available
657
  audio_handler = None
658
 
659
 
660
  def initialize_system():
661
  """Initialize the diarization system"""
662
+ global audio_handler, diarization_system
663
  try:
664
+ if diarization_system is None:
665
+ print("Error: RealtimeSpeakerDiarization not initialized")
666
+ return "❌ Diarization system not available. Please ensure RealtimeSpeakerDiarization is properly imported."
667
+
668
  success = diarization_system.initialize_models()
669
  if success:
670
  audio_handler = DiarizationHandler(diarization_system)
 
672
  else:
673
  return "❌ Failed to initialize system. Please check the logs."
674
  except Exception as e:
675
+ print(f"Initialization error: {e}")
676
  return f"❌ Initialization error: {str(e)}"
677
 
678
 
679
  def start_recording():
680
  """Start recording and transcription"""
681
  try:
682
+ if diarization_system is None:
683
+ return "❌ System not initialized"
684
  result = diarization_system.start_recording()
685
  return f"🎙️ {result} - FastRTC audio streaming is active."
686
  except Exception as e:
 
690
  def stop_recording():
691
  """Stop recording and transcription"""
692
  try:
693
+ if diarization_system is None:
694
+ return "❌ System not initialized"
695
  result = diarization_system.stop_recording()
696
  return f"⏹️ {result}"
697
  except Exception as e:
 
701
  def clear_conversation():
702
  """Clear the conversation"""
703
  try:
704
+ if diarization_system is None:
705
+ return "❌ System not initialized"
706
  result = diarization_system.clear_conversation()
707
  return f"🗑️ {result}"
708
  except Exception as e:
 
712
  def update_settings(threshold, max_speakers):
713
  """Update system settings"""
714
  try:
715
+ if diarization_system is None:
716
+ return "❌ System not initialized"
717
  result = diarization_system.update_settings(threshold, max_speakers)
718
  return f"⚙️ {result}"
719
  except Exception as e:
 
723
  def get_conversation():
724
  """Get the current conversation"""
725
  try:
726
+ if diarization_system is None:
727
+ return "<i>System not initialized</i>"
728
  return diarization_system.get_formatted_conversation()
729
  except Exception as e:
730
  return f"<i>Error getting conversation: {str(e)}</i>"
 
733
  def get_status():
734
  """Get system status"""
735
  try:
736
+ if diarization_system is None:
737
+ return "System not initialized"
738
  return diarization_system.get_status_info()
739
  except Exception as e:
740
  return f"Error getting status: {str(e)}"
 
762
  stop_btn = gr.Button("⏹️ Stop Recording", variant="stop", size="lg", interactive=False)
763
  clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg", interactive=False)
764
 
765
+ # FastRTC Stream Interface
766
  with gr.Row():
767
+ gr.HTML("""
768
+ <div id="fastrtc-container" style="border: 2px solid #ddd; border-radius: 10px; padding: 20px; margin: 10px 0;">
769
+ <h3>🎵 Audio Stream</h3>
770
+ <p>FastRTC audio stream will appear here when recording starts.</p>
771
+ <div id="stream-status" style="padding: 10px; background: #f8f9fa; border-radius: 5px; margin-top: 10px;">
772
+ Status: Waiting for initialization...
773
+ </div>
774
+ </div>
775
+ """)
776
 
777
  # Status display
778
  status_output = gr.Textbox(
 
807
  update_settings_btn = gr.Button("Update Settings", variant="secondary")
808
 
809
  # Audio settings
810
+ gr.Markdown("## 🔊 Audio Configuration")
811
+ with gr.Accordion("Advanced Audio Settings", open=False):
812
+ gr.Markdown("""
813
+ **Current Configuration:**
814
+ - Sample Rate: 16kHz
815
+ - Audio Format: 16-bit PCM Float32 (via AudioProcessor)
816
+ - Channels: Mono (stereo converted automatically)
817
+ - Buffer Size: 1024 samples for real-time processing
818
+ - Processing: Uses existing AudioProcessor.extract_embedding()
819
+ """)
820
 
821
  # Instructions
822
  gr.Markdown("## 📝 How to Use")
823
  gr.Markdown("""
824
  1. **Initialize**: Click "Initialize System" to load AI models
825
+ 2. **Start**: Click "Start Recording" to begin processing
826
+ 3. **Connect**: The FastRTC stream will activate automatically
827
+ 4. **Allow Access**: Grant microphone permissions when prompted
828
+ 5. **Speak**: Talk naturally into your microphone
829
+ 6. **Monitor**: Watch real-time transcription with speaker colors
830
  """)
831
 
832
+ # Performance tips
833
+ with gr.Accordion("💡 Performance Tips", open=False):
834
+ gr.Markdown("""
835
+ - Use Chrome/Edge for best FastRTC performance
836
+ - Ensure stable internet connection
837
+ - Use headphones to prevent echo
838
+ - Position microphone 6-12 inches away
839
+ - Minimize background noise
840
+ - Allow browser microphone access
841
+ """)
842
+
843
  # Speaker color legend
844
  gr.Markdown("## 🎨 Speaker Colors")
845
  speaker_colors = [
 
855
 
856
  color_html = ""
857
  for i, (color, name) in enumerate(speaker_colors[:4]):
858
+ color_html += f'<div style="margin: 3px 0;"><span style="color:{color}; font-size: 16px; font-weight: bold;">●</span> Speaker {i+1} ({name})</div>'
859
 
860
+ gr.HTML(f"<div style='font-size: 14px;'>{color_html}</div>")
861
 
862
  # Auto-refresh conversation and status
863
  def refresh_display():
864
  try:
865
  conversation = get_conversation()
866
  status = get_status()
867
+ return conversation, status
 
 
 
 
 
 
 
 
 
868
  except Exception as e:
869
  error_msg = f"Error refreshing display: {str(e)}"
870
+ return f"<i>{error_msg}</i>", error_msg
871
 
872
  # Event handlers
873
  def on_initialize():
 
875
  result = initialize_system()
876
  success = "successfully" in result.lower()
877
 
878
+ conversation, status = refresh_display()
879
 
880
  return (
881
  result, # status_output
882
  gr.update(interactive=success), # start_btn
883
  gr.update(interactive=success), # clear_btn
884
  conversation, # conversation_output
 
885
  )
886
  except Exception as e:
887
  error_msg = f"❌ Initialization failed: {str(e)}"
 
890
  gr.update(interactive=False),
891
  gr.update(interactive=False),
892
  "<i>System not ready</i>",
 
893
  )
894
 
895
  def on_start():
896
  try:
897
  result = start_recording()
 
 
898
  return (
899
  result, # status_output
900
  gr.update(interactive=False), # start_btn
901
  gr.update(interactive=True), # stop_btn
 
902
  )
903
  except Exception as e:
904
  error_msg = f"❌ Failed to start: {str(e)}"
 
906
  error_msg,
907
  gr.update(interactive=True),
908
  gr.update(interactive=False),
 
909
  )
910
 
911
  def on_stop():
912
  try:
913
  result = stop_recording()
 
 
914
  return (
915
  result, # status_output
916
  gr.update(interactive=True), # start_btn
917
  gr.update(interactive=False), # stop_btn
 
918
  )
919
  except Exception as e:
920
  error_msg = f"❌ Failed to stop: {str(e)}"
 
922
  error_msg,
923
  gr.update(interactive=False),
924
  gr.update(interactive=True),
 
925
  )
926
 
927
  def on_clear():
928
  try:
929
  result = clear_conversation()
930
+ conversation, status = refresh_display()
931
  return result, conversation
932
  except Exception as e:
933
  error_msg = f"❌ Failed to clear: {str(e)}"
 
943
  # Connect event handlers
944
  init_btn.click(
945
  on_initialize,
946
+ outputs=[status_output, start_btn, clear_btn, conversation_output]
947
  )
948
 
949
  start_btn.click(
950
  on_start,
951
+ outputs=[status_output, start_btn, stop_btn]
952
  )
953
 
954
  stop_btn.click(
955
  on_stop,
956
+ outputs=[status_output, start_btn, stop_btn]
957
  )
958
 
959
  clear_btn.click(
 
971
  refresh_timer = gr.Timer(2.0)
972
  refresh_timer.tick(
973
  refresh_display,
974
+ outputs=[conversation_output, status_output]
975
  )
976
 
977
  return interface
978
 
979
 
980
+ # FastAPI setup for API endpoints
981
  def create_fastapi_app():
982
+ """Create FastAPI app with API endpoints"""
983
  app = FastAPI(
984
  title="Real-time Speaker Diarization",
985
  description="Real-time speech recognition with speaker diarization using FastRTC",
 
995
  return {
996
  "status": "healthy",
997
  "timestamp": time.time(),
998
+ "system_initialized": diarization_system is not None and hasattr(diarization_system, 'encoder') and diarization_system.encoder is not None,
999
+ "recording_active": diarization_system.is_running if diarization_system and hasattr(diarization_system, 'is_running') else False
1000
  }
1001
 
1002
  @router.get("/api/conversation")
 
1006
  return {
1007
  "conversation": get_conversation(),
1008
  "status": get_status(),
1009
+ "is_recording": diarization_system.is_running if diarization_system and hasattr(diarization_system, 'is_running') else False,
1010
  "timestamp": time.time()
1011
  }
1012
  except Exception as e:
 
1029
 
1030
  return {
1031
  "result": result,
1032
+ "is_recording": diarization_system.is_running if diarization_system and hasattr(diarization_system, 'is_running') else False,
1033
  "timestamp": time.time()
1034
  }
1035
  except Exception as e:
1036
  return {"error": str(e), "timestamp": time.time()}
1037
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1038
  app.include_router(router)
1039
  return app
1040
 
1041
 
1042
+ # Function to setup FastRTC stream
1043
+ def setup_fastrtc_stream(app):
1044
+ """Setup FastRTC stream with proper configuration"""
1045
+ try:
1046
+ if audio_handler is None:
1047
+ print("Warning: Audio handler not initialized. Initialize system first.")
1048
+ return None
1049
+
1050
+ # Get HuggingFace token for TURN server (optional)
1051
+ hf_token = os.environ.get("HF_TOKEN")
1052
+
1053
+ # Configure RTC settings
1054
+ rtc_config = {
1055
+ "iceServers": [
1056
+ {"urls": "stun:stun.l.google.com:19302"},
1057
+ {"urls": "stun:stun1.l.google.com:19302"}
1058
+ ]
1059
+ }
1060
+
1061
+ # Create FastRTC stream
1062
+ stream = Stream(
1063
+ handler=audio_handler,
1064
+ rtc_configuration=rtc_config,
1065
+ modality="audio",
1066
+ mode="receive" # We only receive audio, don't send
1067
+ )
1068
+
1069
+ # Mount the stream
1070
+ app.mount("/stream", stream)
1071
+ print("✅ FastRTC stream configured successfully!")
1072
+ return stream
1073
+
1074
+ except Exception as e:
1075
+ print(f"⚠️ Warning: Failed to setup FastRTC stream: {e}")
1076
+ print("Audio streaming may not work properly.")
1077
+ return None
1078
+
1079
+
1080
+ # Main application setup
1081
+ def create_app(diarization_sys=None):
1082
+ """Create the complete application"""
1083
+ global diarization_system
1084
+
1085
+ # Set the diarization system
1086
+ if diarization_sys is not None:
1087
+ diarization_system = diarization_sys
1088
 
1089
  # Create FastAPI app
1090
  fastapi_app = create_fastapi_app()
 
1095
  # Mount Gradio on FastAPI
1096
  app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
1097
 
1098
+ # Setup FastRTC stream (will be called after initialization)
1099
+ # Note: The stream setup happens when the system is initialized
1100
+
1101
  return app, gradio_interface
1102
 
1103
 
1104
  # Entry point for HuggingFace Spaces
1105
  if __name__ == "__main__":
1106
  try:
1107
+ # Import your diarization system here
1108
+ # from your_module import RealtimeSpeakerDiarization
1109
+ # diarization_system = RealtimeSpeakerDiarization()
1110
+
1111
  # Create the application
1112
  app, interface = create_app()
1113
 
 
1122
 
1123
  except Exception as e:
1124
  print(f"Failed to launch application: {e}")
1125
+ import traceback
1126
+ traceback.print_exc()
1127
+
1128
  # Fallback - launch just Gradio interface
1129
  try:
1130
  interface = create_interface()
 
1134
  share=False
1135
  )
1136
  except Exception as fallback_error:
1137
+ print(f"Fallback launch also failed: {fallback_error}")
1138
+
1139
+
1140
+ # Helper function to initialize with your diarization system
1141
+ def initialize_with_diarization_system(diarization_sys):
1142
+ """Initialize the application with your diarization system"""
1143
+ global diarization_system
1144
+ diarization_system = diarization_sys
1145
+ return create_app(diarization_sys)