Spaces:
Runtime error
Runtime error
File size: 1,387 Bytes
99da818 017628c 99da818 5ef62cb 99da818 5ef62cb 017628c 99da818 017628c 99da818 5ef62cb 017628c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
import torch
from pyannote.audio import Inference
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os
# β
Use HF token from Hugging Face Space secrets
hf_token = os.getenv("HF_TOKEN")
# π Load model with authentication
model = Inference("pyannote/embedding", use_auth_token=hf_token, window="whole")
# π§ Load known speaker embeddings
speaker_embeddings = {}
for speaker in os.listdir("known_speakers"):
if speaker.endswith(".wav"):
emb = model(f"known_speakers/{speaker}")
speaker_embeddings[speaker.replace(".wav", "")] = emb
def identify_speaker(audio):
input_embedding = model(audio)
best_score = -1
best_speaker = "Unknown"
for name, emb in speaker_embeddings.items():
score = cosine_similarity(input_embedding.numpy().reshape(1, -1), emb.numpy().reshape(1, -1))[0][0]
if score > best_score:
best_score = score
best_speaker = name
return f"π§ Identified Speaker: {best_speaker}\nπ§ͺ Similarity Score: {best_score:.2f}"
# π Launch Gradio UI
gr.Interface(
fn=identify_speaker,
inputs=gr.Audio(source="microphone", type="filepath", label="ποΈ Upload or record voice"),
outputs="text",
title="π€ Speaker Identification App",
description="Upload a voice clip to identify the speaker."
).launch()
|