import gradio as gr from transformers import pipeline import numpy as np import os from huggingface_hub import login import spaces HF_TOKEN = os.environ.get("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) MODEL_ID = "badrex/JASRv1.1" transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID) @spaces.GPU def transcribe(audio): sr, y = audio # convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] examples = [] examples_dir = "examples" if os.path.exists(examples_dir): for filename in os.listdir(examples_dir): if filename.endswith((".wav", ".mp3", ".ogg")): examples.append([os.path.join(examples_dir, filename)]) print(f"Found {len(examples)} example files") else: print("Examples directory not found") demo = gr.Interface( fn=transcribe, inputs=gr.Audio(), outputs="text", title="

JASR v1.1 🎙️
Speech Recognition for Dialectal Arabic

", description="""

Developed with ❤ by Badr al-Absi

Marhaban 👋🏼

This is a demo for JASR, pronounced Jāsir [جاسِر], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic. The current running instance is optimized for the regional dialects of Jazirat al-Arab, or the Arabian Peninsula. JASR is still under active development.

Simply upload an audio file 📤 or record yourself speaking 🎙️⏺️ to try out the model!

""", examples=examples if examples else None, example_labels=[ "Kuwait Theatre", "Saudi Radio Poetry", "News Report (MSA)", "San3ani Arabic male", "San3ani Arabic female", "Khaleeji Theatre", "TEDx KSA", "Yousif Saif Football Commentary", "Khaleeji Theatre 2", "TV Drama", "KSA Theatre", "TV Drama 2", "Radio Jeddah (KSA)", "Omani Theatre", "Khaleeji Drama", "Radio News", "TEDx KSA 2", "Radio Jeddah (KSA) 2", ], cache_examples=False, examples_per_page=18, flagging_mode=None, ) if __name__ == "__main__": demo.launch()