Real_Time_diarization

Sleeping

App Files Files Community

Saiyaswanth007 commited on 18 days ago

Commit

691302d

1 Parent(s): 7177b58

Revert portg

Browse files

Files changed (1) hide show

app.py +347 -231

app.py CHANGED Viewed

@@ -560,106 +560,177 @@ class RealtimeSpeakerDiarization:
 # FastRTC Audio Handler
-class DiarizationHandler(AsyncStreamHandler):
     def __init__(self, diarization_system):
-        super().__init__()
         self.diarization_system = diarization_system
     def copy(self):
         # Return a fresh handler for each new stream connection
         return DiarizationHandler(self.diarization_system)
-    async def emit(self):
-        """Not used in this implementation"""
-        return None
-    async def receive(self, frame):
-        """Receive audio data from FastRTC and process it"""
         try:
-            if self.diarization_system.is_running:
-                # Frame should be a numpy array of audio data
-                if hasattr(frame, 'data'):
-                    audio_data = frame.data
                 else:
-                    audio_data = frame
-                # Feed audio data to the diarization system
-                self.diarization_system.feed_audio_data(audio_data)
         except Exception as e:
-            print(f"Error in FastRTC handler: {e}")
 # Global instance
 diarization_system = RealtimeSpeakerDiarization()
 def initialize_system():
     """Initialize the diarization system"""
-    success = diarization_system.initialize_models()
-    if success:
-        return "✅ System initialized successfully! Models loaded."
-    else:
-        return "❌ Failed to initialize system. Please check the logs."
 def start_recording():
     """Start recording and transcription"""
-    return diarization_system.start_recording()
 def stop_recording():
     """Stop recording and transcription"""
-    return diarization_system.stop_recording()
 def clear_conversation():
     """Clear the conversation"""
-    return diarization_system.clear_conversation()
 def update_settings(threshold, max_speakers):
     """Update system settings"""
-    return diarization_system.update_settings(threshold, max_speakers)
 def get_conversation():
     """Get the current conversation"""
-    return diarization_system.get_formatted_conversation()
 def get_status():
     """Get system status"""
-    return diarization_system.get_status_info()
 # Create Gradio interface
 def create_interface():
-    with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Monochrome()) as interface:
         gr.Markdown("# 🎤 Real-time Speech Recognition with Speaker Diarization")
-        gr.Markdown("This app performs real-time speech recognition with automatic speaker identification and color-coding.")
         with gr.Row():
             with gr.Column(scale=2):
                 # Main conversation display
                 conversation_output = gr.HTML(
-                    value="<i>Click 'Initialize System' to start...</i>",
-                    label="Live Conversation"
                 )
                 # Control buttons
                 with gr.Row():
-                    init_btn = gr.Button("🔧 Initialize System", variant="secondary")
-                    start_btn = gr.Button("🎙️ Start Recording", variant="primary", interactive=False)
-                    stop_btn = gr.Button("⏹️ Stop Recording", variant="stop", interactive=False)
-                    clear_btn = gr.Button("🗑️ Clear Conversation", interactive=False)
                 # Status display
                 status_output = gr.Textbox(
                     label="System Status",
-                    value="System not initialized",
-                    lines=8,
-                    interactive=False
                 )
             with gr.Column(scale=1):
@@ -670,276 +741,321 @@ def create_interface():
                     minimum=0.1,
                     maximum=0.95,
                     step=0.05,
-                    value=DEFAULT_CHANGE_THRESHOLD,
                     label="Speaker Change Sensitivity",
-                    info="Lower values = more sensitive to speaker changes"
                 )
                 max_speakers_slider = gr.Slider(
                     minimum=2,
-                    maximum=ABSOLUTE_MAX_SPEAKERS,
                     step=1,
-                    value=DEFAULT_MAX_SPEAKERS,
                     label="Maximum Number of Speakers"
                 )
-                update_settings_btn = gr.Button("Update Settings")
                 # Instructions
-                gr.Markdown("## 📝 Instructions")
                 gr.Markdown("""
-                1. Click **Initialize System** to load models
-                2. Click **Start Recording** to begin processing
-                3. Use the FastRTC interface below to connect your microphone
-                4. Allow microphone access when prompted
-                5. Speak into your microphone
-                6. Watch real-time transcription with speaker labels
-                7. Adjust settings as needed
                 """)
                 # Speaker color legend
                 gr.Markdown("## 🎨 Speaker Colors")
-                color_info = []
-                for i, (color, name) in enumerate(zip(SPEAKER_COLORS, SPEAKER_COLOR_NAMES)):
-                    color_info.append(f'<span style="color:{color};">■</span> Speaker {i+1} ({name})')
-                gr.HTML("<br>".join(color_info[:DEFAULT_MAX_SPEAKERS]))
-                # FastRTC Integration Notice
-                gr.Markdown("""
-                ## ℹ️ About FastRTC
-                This app uses FastRTC for low-latency audio streaming.
-                For optimal performance, use a modern browser and allow microphone access when prompted.
-                """)
         # Auto-refresh conversation and status
         def refresh_display():
-            return diarization_system.get_formatted_conversation(), diarization_system.get_status_info()
         # Event handlers
         def on_initialize():
-            result = initialize_system()
-            if "successfully" in result:
                 return (
-                    result,
-                    gr.update(interactive=True),   # start_btn
-                    gr.update(interactive=True),   # clear_btn
-                    get_conversation(),
-                    get_status()
                 )
-            else:
                 return (
-                    result,
-                    gr.update(interactive=False),  # start_btn
-                    gr.update(interactive=False),  # clear_btn
-                    get_conversation(),
-                    get_status()
                 )
         def on_start():
-            result = start_recording()
-            return (
-                result,
-                gr.update(interactive=False),  # start_btn
-                gr.update(interactive=True),   # stop_btn
-            )
         def on_stop():
-            result = stop_recording()
-            return (
-                result,
-                gr.update(interactive=True),   # start_btn
-                gr.update(interactive=False),  # stop_btn
-            )
         # Connect event handlers
         init_btn.click(
             on_initialize,
-            outputs=[status_output, start_btn, clear_btn, conversation_output, status_output]
         )
         start_btn.click(
             on_start,
-            outputs=[status_output, start_btn, stop_btn]
         )
         stop_btn.click(
             on_stop,
-            outputs=[status_output, start_btn, stop_btn]
         )
         clear_btn.click(
-            clear_conversation,
-            outputs=[status_output]
         )
         update_settings_btn.click(
-            update_settings,
             inputs=[threshold_slider, max_speakers_slider],
             outputs=[status_output]
         )
-        # Auto-refresh every 2 seconds when recording
         refresh_timer = gr.Timer(2.0)
         refresh_timer.tick(
             refresh_display,
-            outputs=[conversation_output, status_output]
         )
     return interface
-# Create API router for endpoints
-router = APIRouter()
-# Health check endpoint
-@router.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "timestamp": time.time(),
-        "system_initialized": diarization_system.encoder is not None,
-        "recording_active": diarization_system.is_running
-    }
-# API endpoint to get conversation
-@router.get("/api/conversation")
-async def get_conversation_api():
-    """API endpoint to get current conversation"""
-    return {
-        "conversation": diarization_system.get_formatted_conversation(),
-        "status": diarization_system.get_status_info(),
-        "is_recording": diarization_system.is_running
-    }
-# API endpoint to control recording
-@router.post("/api/control/{action}")
-async def control_recording(action: str):
-    """API endpoint to control recording (start/stop/clear/initialize)"""
-    if action == "start":
-        result = diarization_system.start_recording()
-    elif action == "stop":
-        result = diarization_system.stop_recording()
-    elif action == "clear":
-        result = diarization_system.clear_conversation()
-    elif action == "initialize":
-        result = initialize_system()
-    else:
-        return {"error": "Invalid action. Use: start, stop, clear, or initialize"}
-    return {"result": result, "is_recording": diarization_system.is_running}
-# Main application setup
-def create_app():
-    """Create and configure the FastAPI app with Gradio and FastRTC"""
-    # Create FastAPI app
     app = FastAPI(
         title="Real-time Speaker Diarization",
         description="Real-time speech recognition with speaker diarization using FastRTC",
         version="1.0.0"
     )
-    # Include API routes
-    app.include_router(router)
-    # Create Gradio interface
-    gradio_interface = create_interface()
-    # Mount Gradio interface
-    app = gr.mount_gradio_app(app, gradio_interface, path="/")
-    # Setup FastRTC stream
-    try:
-        # Create the handler
-        handler = DiarizationHandler(diarization_system)
-        # Get TURN credentials
-        hf_token = os.environ.get("HF_TOKEN")
-        if not hf_token:
-            print("Warning: HF_TOKEN not set. Audio streaming may not work properly.")
-            # Use basic STUN server as fallback
-            rtc_config = {
-                "iceServers": [{"urls": "stun:stun.l.google.com:19302"}]
             }
-        else:
-            # Get Cloudflare TURN credentials
-            try:
-                turn_credentials = get_cloudflare_turn_credentials(hf_token)
-                # Safely extract credentials from the response
-                ice_servers = []
-                # Always add STUN server
-                ice_servers.append({"urls": "stun:stun.l.google.com:19302"})
-                # Check for and add TURN server if available
-                if turn_credentials and isinstance(turn_credentials, dict):
-                    # Handle different possible structures
-                    if 'iceServers' in turn_credentials:
-                        # If credentials already have iceServers, use them directly
-                        rtc_config = turn_credentials
-                    elif 'urls' in turn_credentials and isinstance(turn_credentials['urls'], list) and turn_credentials['urls']:
-                        # Structure: {urls: [...], username: "...", credential: "..."}
-                        ice_servers.append({
-                            "urls": [f"turn:{url}" for url in turn_credentials["urls"]],
-                            "username": turn_credentials.get("username", ""),
-                            "credential": turn_credentials.get("credential", "")
-                        })
-                        rtc_config = {"iceServers": ice_servers}
-                    elif 'url' in turn_credentials:
-                        # Structure with single URL
-                        ice_servers.append({
-                            "urls": f"turn:{turn_credentials['url']}",
-                            "username": turn_credentials.get("username", ""),
-                            "credential": turn_credentials.get("credential", "")
-                        })
-                        rtc_config = {"iceServers": ice_servers}
-                    else:
-                        print("Warning: Unexpected TURN credentials format. Using STUN only.")
-                        rtc_config = {"iceServers": ice_servers}
-                else:
-                    print("Warning: Could not get TURN credentials. Using STUN only.")
-                    rtc_config = {"iceServers": ice_servers}
-            except Exception as e:
-                print(f"Warning: Error getting TURN credentials: {e}. Using STUN only.")
-                rtc_config = {
-                    "iceServers": [{"urls": "stun:stun.l.google.com:19302"}]
-                }
-        # Create FastRTC stream
-        stream = Stream(
-            handler=handler,
-            rtc_configuration=rtc_config,
-            modality="audio",
-            mode="send-receive"
-        )
-        # Add FastRTC endpoints
-        app.mount("/stream", stream)
-        print("FastRTC stream configured successfully!")
-    except Exception as e:
-        print(f"Warning: Failed to setup FastRTC stream: {e}")
-        print("Audio streaming will not be available.")
     return app
-# Main entry point
 if __name__ == "__main__":
-    # Create the app
-    app = create_app()
-    interface = create_interface()
-        # Simple launch - HF Spaces will handle host/port automatically
-    interface.launch(
-        share=False,  # Not needed in HF Spaces
-        server_name="0.0.0.0",  # Required for HF Spaces
-        # Don't specify server_port - let HF Spaces handle it
-    )

 # FastRTC Audio Handler
+# FastRTC Audio Handler for Real-time Diarization
+import asyncio
+import numpy as np
+from fastrtc import FastRTCClient, AudioFrame
+from fastapi import FastAPI, APIRouter
+import gradio as gr
+import time
+import os
+import threading
+from queue import Queue
+import json
+class DiarizationHandler:
     def __init__(self, diarization_system):
         self.diarization_system = diarization_system
+        self.audio_queue = Queue()
+        self.is_processing = False
     def copy(self):
         # Return a fresh handler for each new stream connection
         return DiarizationHandler(self.diarization_system)
+    async def on_audio_frame(self, frame: AudioFrame):
+        """Handle incoming audio frames from FastRTC"""
         try:
+            if self.diarization_system.is_running and frame.data is not None:
+                # Convert audio frame to numpy array
+                if isinstance(frame.data, bytes):
+                    # Convert bytes to numpy array (assuming 16-bit PCM)
+                    audio_data = np.frombuffer(frame.data, dtype=np.int16)
+                elif hasattr(frame, 'to_ndarray'):
+                    audio_data = frame.to_ndarray()
                 else:
+                    audio_data = np.array(frame.data, dtype=np.float32)
+                # Ensure audio is in the right format (mono, float32, -1 to 1 range)
+                if audio_data.dtype == np.int16:
+                    audio_data = audio_data.astype(np.float32) / 32768.0
+                # If stereo, convert to mono
+                if len(audio_data.shape) > 1:
+                    audio_data = np.mean(audio_data, axis=1)
+                # Feed to diarization system
+                await self.process_audio_async(audio_data, frame.sample_rate)
+        except Exception as e:
+            print(f"Error processing audio frame: {e}")
+    async def process_audio_async(self, audio_data, sample_rate=16000):
+        """Process audio data asynchronously"""
+        try:
+            # Run in thread pool to avoid blocking
+            loop = asyncio.get_event_loop()
+            await loop.run_in_executor(
+                None,
+                self.diarization_system.feed_audio_data,
+                audio_data,
+                sample_rate
+            )
         except Exception as e:
+            print(f"Error in async audio processing: {e}")
 # Global instance
 diarization_system = RealtimeSpeakerDiarization()
+audio_handler = None
 def initialize_system():
     """Initialize the diarization system"""
+    global audio_handler
+    try:
+        success = diarization_system.initialize_models()
+        if success:
+            audio_handler = DiarizationHandler(diarization_system)
+            return "✅ System initialized successfully! Models loaded and FastRTC handler ready."
+        else:
+            return "❌ Failed to initialize system. Please check the logs."
+    except Exception as e:
+        return f"❌ Initialization error: {str(e)}"
 def start_recording():
     """Start recording and transcription"""
+    try:
+        result = diarization_system.start_recording()
+        return f"🎙️ {result} - FastRTC audio streaming is active."
+    except Exception as e:
+        return f"❌ Failed to start recording: {str(e)}"
 def stop_recording():
     """Stop recording and transcription"""
+    try:
+        result = diarization_system.stop_recording()
+        return f"⏹️ {result}"
+    except Exception as e:
+        return f"❌ Failed to stop recording: {str(e)}"
 def clear_conversation():
     """Clear the conversation"""
+    try:
+        result = diarization_system.clear_conversation()
+        return f"🗑️ {result}"
+    except Exception as e:
+        return f"❌ Failed to clear conversation: {str(e)}"
 def update_settings(threshold, max_speakers):
     """Update system settings"""
+    try:
+        result = diarization_system.update_settings(threshold, max_speakers)
+        return f"⚙️ {result}"
+    except Exception as e:
+        return f"❌ Failed to update settings: {str(e)}"
 def get_conversation():
     """Get the current conversation"""
+    try:
+        return diarization_system.get_formatted_conversation()
+    except Exception as e:
+        return f"<i>Error getting conversation: {str(e)}</i>"
 def get_status():
     """Get system status"""
+    try:
+        return diarization_system.get_status_info()
+    except Exception as e:
+        return f"Error getting status: {str(e)}"
 # Create Gradio interface
 def create_interface():
+    with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as interface:
         gr.Markdown("# 🎤 Real-time Speech Recognition with Speaker Diarization")
+        gr.Markdown("This app performs real-time speech recognition with automatic speaker identification using FastRTC for low-latency audio streaming.")
         with gr.Row():
             with gr.Column(scale=2):
                 # Main conversation display
                 conversation_output = gr.HTML(
+                    value="<div style='padding: 20px; background: #f5f5f5; border-radius: 10px;'><i>Click 'Initialize System' to start...</i></div>",
+                    label="Live Conversation",
+                    elem_id="conversation_display"
                 )
                 # Control buttons
                 with gr.Row():
+                    init_btn = gr.Button("🔧 Initialize System", variant="secondary", size="lg")
+                    start_btn = gr.Button("🎙️ Start Recording", variant="primary", size="lg", interactive=False)
+                    stop_btn = gr.Button("⏹️ Stop Recording", variant="stop", size="lg", interactive=False)
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="lg", interactive=False)
+                # Audio connection status
+                with gr.Row():
+                    connection_status = gr.HTML(
+                        value="<div style='padding: 10px; background: #fff3cd; border-radius: 5px;'>🔌 FastRTC: Not connected</div>",
+                        label="Connection Status"
+                    )
                 # Status display
                 status_output = gr.Textbox(
                     label="System Status",
+                    value="System not initialized. Please click 'Initialize System' to begin.",
+                    lines=6,
+                    interactive=False,
+                    show_copy_button=True
                 )
             with gr.Column(scale=1):
                     minimum=0.1,
                     maximum=0.95,
                     step=0.05,
+                    value=0.5,  # DEFAULT_CHANGE_THRESHOLD
                     label="Speaker Change Sensitivity",
+                    info="Lower = more sensitive to speaker changes"
                 )
                 max_speakers_slider = gr.Slider(
                     minimum=2,
+                    maximum=10,  # ABSOLUTE_MAX_SPEAKERS
                     step=1,
+                    value=4,  # DEFAULT_MAX_SPEAKERS
                     label="Maximum Number of Speakers"
                 )
+                update_settings_btn = gr.Button("Update Settings", variant="secondary")
+                # Audio settings
+                gr.Markdown("## 🔊 Audio Settings")
+                gr.Markdown("""
+                **Recommended settings:**
+                - Use a good quality microphone
+                - Ensure stable internet connection
+                - Speak clearly and avoid background noise
+                - Position microphone 6-12 inches from mouth
+                """)
                 # Instructions
+                gr.Markdown("## 📝 How to Use")
                 gr.Markdown("""
+                1. **Initialize**: Click "Initialize System" to load AI models
+                2. **Connect**: Allow microphone access when prompted
+                3. **Start**: Click "Start Recording" to begin processing
+                4. **Speak**: Talk into your microphone naturally
+                5. **Monitor**: Watch real-time transcription with speaker labels
+                6. **Adjust**: Fine-tune settings as needed
                 """)
                 # Speaker color legend
                 gr.Markdown("## 🎨 Speaker Colors")
+                speaker_colors = [
+                    ("#FF6B6B", "Red"),
+                    ("#4ECDC4", "Teal"),
+                    ("#45B7D1", "Blue"),
+                    ("#96CEB4", "Green"),
+                    ("#FFEAA7", "Yellow"),
+                    ("#DDA0DD", "Plum"),
+                    ("#98D8C8", "Mint"),
+                    ("#F7DC6F", "Gold")
+                ]
+                color_html = ""
+                for i, (color, name) in enumerate(speaker_colors[:4]):
+                    color_html += f'<div style="display: inline-block; margin: 5px;"><span style="color:{color}; font-size: 20px;">●</span> Speaker {i+1} ({name})</div><br>'
+                gr.HTML(color_html)
         # Auto-refresh conversation and status
         def refresh_display():
+            try:
+                conversation = get_conversation()
+                status = get_status()
+                # Update connection status based on system state
+                if diarization_system.is_running:
+                    conn_status = "<div style='padding: 10px; background: #d4edda; border-radius: 5px;'>🟢 FastRTC: Connected & Recording</div>"
+                elif hasattr(diarization_system, 'encoder') and diarization_system.encoder is not None:
+                    conn_status = "<div style='padding: 10px; background: #d1ecf1; border-radius: 5px;'>🔵 FastRTC: Ready to connect</div>"
+                else:
+                    conn_status = "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>🔴 FastRTC: System not initialized</div>"
+                return conversation, status, conn_status
+            except Exception as e:
+                error_msg = f"Error refreshing display: {str(e)}"
+                return f"<i>{error_msg}</i>", error_msg, "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Error</div>"
         # Event handlers
         def on_initialize():
+            try:
+                result = initialize_system()
+                success = "successfully" in result.lower()
+                conversation, status, conn_status = refresh_display()
                 return (
+                    result,  # status_output
+                    gr.update(interactive=success),   # start_btn
+                    gr.update(interactive=success),   # clear_btn
+                    conversation,  # conversation_output
+                    conn_status   # connection_status
                 )
+            except Exception as e:
+                error_msg = f"❌ Initialization failed: {str(e)}"
                 return (
+                    error_msg,
+                    gr.update(interactive=False),
+                    gr.update(interactive=False),
+                    "<i>System not ready</i>",
+                    "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Initialization failed</div>"
                 )
         def on_start():
+            try:
+                result = start_recording()
+                conversation, status, conn_status = refresh_display()
+                return (
+                    result,  # status_output
+                    gr.update(interactive=False),  # start_btn
+                    gr.update(interactive=True),   # stop_btn
+                    conn_status  # connection_status
+                )
+            except Exception as e:
+                error_msg = f"❌ Failed to start: {str(e)}"
+                return (
+                    error_msg,
+                    gr.update(interactive=True),
+                    gr.update(interactive=False),
+                    "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Start failed</div>"
+                )
         def on_stop():
+            try:
+                result = stop_recording()
+                conversation, status, conn_status = refresh_display()
+                return (
+                    result,  # status_output
+                    gr.update(interactive=True),   # start_btn
+                    gr.update(interactive=False),  # stop_btn
+                    conn_status  # connection_status
+                )
+            except Exception as e:
+                error_msg = f"❌ Failed to stop: {str(e)}"
+                return (
+                    error_msg,
+                    gr.update(interactive=False),
+                    gr.update(interactive=True),
+                    "<div style='padding: 10px; background: #f8d7da; border-radius: 5px;'>❌ FastRTC: Stop failed</div>"
+                )
+        def on_clear():
+            try:
+                result = clear_conversation()
+                conversation, status, conn_status = refresh_display()
+                return result, conversation
+            except Exception as e:
+                error_msg = f"❌ Failed to clear: {str(e)}"
+                return error_msg, "<i>Error clearing conversation</i>"
+        def on_update_settings(threshold, max_speakers):
+            try:
+                result = update_settings(threshold, max_speakers)
+                return result
+            except Exception as e:
+                return f"❌ Failed to update settings: {str(e)}"
         # Connect event handlers
         init_btn.click(
             on_initialize,
+            outputs=[status_output, start_btn, clear_btn, conversation_output, connection_status]
         )
         start_btn.click(
             on_start,
+            outputs=[status_output, start_btn, stop_btn, connection_status]
         )
         stop_btn.click(
             on_stop,
+            outputs=[status_output, start_btn, stop_btn, connection_status]
         )
         clear_btn.click(
+            on_clear,
+            outputs=[status_output, conversation_output]
         )
         update_settings_btn.click(
+            on_update_settings,
             inputs=[threshold_slider, max_speakers_slider],
             outputs=[status_output]
         )
+        # Auto-refresh every 2 seconds when active
         refresh_timer = gr.Timer(2.0)
         refresh_timer.tick(
             refresh_display,
+            outputs=[conversation_output, status_output, connection_status]
         )
     return interface
+# FastAPI setup for HuggingFace Spaces
+def create_fastapi_app():
+    """Create FastAPI app with proper FastRTC integration"""
     app = FastAPI(
         title="Real-time Speaker Diarization",
         description="Real-time speech recognition with speaker diarization using FastRTC",
         version="1.0.0"
     )
+    # API Routes
+    router = APIRouter()
+    @router.get("/health")
+    async def health_check():
+        """Health check endpoint"""
+        return {
+            "status": "healthy",
+            "timestamp": time.time(),
+            "system_initialized": hasattr(diarization_system, 'encoder') and diarization_system.encoder is not None,
+            "recording_active": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False
+        }
+    @router.get("/api/conversation")
+    async def get_conversation_api():
+        """Get current conversation"""
+        try:
+            return {
+                "conversation": get_conversation(),
+                "status": get_status(),
+                "is_recording": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False,
+                "timestamp": time.time()
+            }
+        except Exception as e:
+            return {"error": str(e), "timestamp": time.time()}
+    @router.post("/api/control/{action}")
+    async def control_recording(action: str):
+        """Control recording actions"""
+        try:
+            if action == "start":
+                result = start_recording()
+            elif action == "stop":
+                result = stop_recording()
+            elif action == "clear":
+                result = clear_conversation()
+            elif action == "initialize":
+                result = initialize_system()
+            else:
+                return {"error": "Invalid action. Use: start, stop, clear, or initialize"}
+            return {
+                "result": result,
+                "is_recording": diarization_system.is_running if hasattr(diarization_system, 'is_running') else False,
+                "timestamp": time.time()
             }
+        except Exception as e:
+            return {"error": str(e), "timestamp": time.time()}
+    # FastRTC WebSocket endpoint for audio streaming
+    @router.websocket("/ws/audio")
+    async def websocket_audio_endpoint(websocket):
+        """WebSocket endpoint for FastRTC audio streaming"""
+        await websocket.accept()
+        try:
+            while True:
+                # Receive audio data from FastRTC client
+                data = await websocket.receive_bytes()
+                if audio_handler and diarization_system.is_running:
+                    # Create audio frame and process
+                    frame = AudioFrame(data=data, sample_rate=16000)
+                    await audio_handler.on_audio_frame(frame)
+        except Exception as e:
+            print(f"WebSocket error: {e}")
+        finally:
+            await websocket.close()
+    app.include_router(router)
     return app
+# Main application entry point
+def create_app():
+    """Create the complete application for HuggingFace Spaces"""
+    # Create FastAPI app
+    fastapi_app = create_fastapi_app()
+    # Create Gradio interface
+    gradio_interface = create_interface()
+    # Mount Gradio on FastAPI
+    app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
+    return app, gradio_interface
+# Entry point for HuggingFace Spaces
 if __name__ == "__main__":
+    try:
+        # Create the application
+        app, interface = create_app()
+        # Launch for HuggingFace Spaces
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=int(os.environ.get("PORT", 7860)),
+            share=False,
+            show_error=True,
+            quiet=False
+        )
+    except Exception as e:
+        print(f"Failed to launch application: {e}")
+        # Fallback - launch just Gradio interface
+        try:
+            interface = create_interface()
+            interface.launch(
+                server_name="0.0.0.0",
+                server_port=int(os.environ.get("PORT", 7860)),
+                share=False
+            )
+        except Exception as fallback_error:
+            print(f"Fallback launch also failed: {fallback_error}")