import gradio as gr from transformers import pipeline import numpy as np import os from huggingface_hub import login import spaces # Get token from Space secrets HF_TOKEN = os.environ.get("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) # Load model from your private repo MODEL_ID = "badrex/JASR" # Change this to match your repo! transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID) @spaces.GPU def transcribe(audio): sr, y = audio # Convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] demo = gr.Interface( fn=transcribe, inputs=gr.Audio(), outputs="text", title="JASR 🐐 Dialectal Arabic ASR", description="""

By Badr al-Absi with β€οΈπŸ€πŸ’š


This is JASR, pronounced Jasir, an automatic speech recognition system optimized for the regional dialects of Jazirat al-Arab, or the Arabian Peninsula. The model is a fine-tune of the speech foundation model w2v-BERT 2.0, a 580M pre-trained speech encoder.

Simply upload an audio file πŸ“€ or record yourself speaking πŸŽ™οΈβΊοΈ to try out the model!

""", examples=examples if examples else None, cache_examples=False, # Disable caching to avoid issues flagging_mode=None, ) if __name__ == "__main__": demo.launch()