|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
import os |
|
from huggingface_hub import login |
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
if HF_TOKEN: |
|
login(token=HF_TOKEN) |
|
|
|
|
|
MODEL_ID = "badrex/JASR" |
|
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID, token=HF_TOKEN) |
|
|
|
def transcribe(audio): |
|
sr, y = audio |
|
|
|
if y.ndim > 1: |
|
y = y.mean(axis=1) |
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
demo = gr.Interface( |
|
transcribe, |
|
gr.Audio(sources="microphone"), |
|
"text", |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |