import gradio as gr
from fastapi import FastAPI
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS, FINAL_TRANSCRIPTION_MODEL, REALTIME_TRANSCRIPTION_MODEL
print(gr.__version__)
# Connection configuration (separate signaling server from model server)
# These will be replaced at deployment time with the correct URLs
RENDER_SIGNALING_URL = "wss://render-signal-audio.onrender.com/stream"
HF_SPACE_URL = "https://androidguy-speaker-diarization.hf.space"
def build_ui():
"""Build Gradio UI for speaker diarization"""
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
# Add configuration variables to page using custom component
gr.HTML(
f"""
"""
)
# Header and description
gr.Markdown("# đ¤ Live Speaker Diarization")
gr.Markdown(f"Real-time speech recognition with automatic speaker identification")
# Add transcription model info
gr.Markdown(f"**Using Models:** Final: {FINAL_TRANSCRIPTION_MODEL}, Realtime: {REALTIME_TRANSCRIPTION_MODEL}")
# Status indicator
connection_status = gr.HTML(
"""
Waiting to connect...
"""
)
with gr.Row():
with gr.Column(scale=2):
# Conversation display with embedded JavaScript for WebRTC and audio handling
conversation_display = gr.HTML(
"""
Click 'Start Listening' to begin...
""",
label="Live Conversation"
)
# Control buttons
with gr.Row():
start_btn = gr.Button("âļī¸ Start Listening", variant="primary", size="lg")
stop_btn = gr.Button("âšī¸ Stop", variant="stop", size="lg")
clear_btn = gr.Button("đī¸ Clear", variant="secondary", size="lg")
# Status display
status_output = gr.Markdown(
"""
## System Status
Waiting to connect...
*Click Start Listening to begin*
""",
label="Status Information"
)
with gr.Column(scale=1):
# Settings
gr.Markdown("## âī¸ Settings")
threshold_slider = gr.Slider(
minimum=0.3,
maximum=0.9,
step=0.05,
value=DEFAULT_CHANGE_THRESHOLD,
label="Speaker Change Sensitivity",
info="Lower = more sensitive (more speaker changes)"
)
max_speakers_slider = gr.Slider(
minimum=2,
maximum=ABSOLUTE_MAX_SPEAKERS,
step=1,
value=DEFAULT_MAX_SPEAKERS,
label="Maximum Speakers"
)
update_btn = gr.Button("Update Settings", variant="secondary")
# Instructions
gr.Markdown("""
## đ Instructions
1. **Start Listening** - allows browser to access microphone
2. **Speak** - system will transcribe and identify speakers
3. **Stop** when finished
4. **Clear** to reset conversation
## đ¨ Speaker Colors
- đ´ Speaker 1 (Red)
- đĸ Speaker 2 (Teal)
- đĩ Speaker 3 (Blue)
- đĄ Speaker 4 (Green)
- â Speaker 5 (Yellow)
- đŖ Speaker 6 (Plum)
- đ¤ Speaker 7 (Mint)
- đ Speaker 8 (Gold)
""")
# JavaScript to connect buttons to the script functions
gr.HTML("""
""")
# Set up periodic status updates
def get_status():
"""API call to get system status - called periodically"""
import requests
try:
resp = requests.get(f"{HF_SPACE_URL}/status")
if resp.status_code == 200:
return resp.json().get('status', 'No status information')
return "Error getting status"
except Exception as e:
return f"Connection error: {str(e)}"
status_timer = gr.Timer(5)
status_timer.tick(fn=get_status, outputs=status_output)
return demo
# Create Gradio interface
demo = build_ui()
def mount_ui(app: FastAPI):
"""Mount Gradio app to FastAPI"""
app.mount("/ui", demo.app)
# For standalone testing
if __name__ == "__main__":
demo.launch()