import gradio as gr from transformers import pipeline import numpy as np import os from huggingface_hub import login import spaces HF_TOKEN = os.environ.get("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) MODEL_ID = "badrex/JASRv1.1" transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID) @spaces.GPU def transcribe(audio): sr, y = audio # convert to mono if stereo if y.ndim > 1: y = y.mean(axis=1) y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] examples = [] examples_dir = "examples" if os.path.exists(examples_dir): for filename in os.listdir(examples_dir): if filename.endswith((".wav", ".mp3", ".ogg")): examples.append([os.path.join(examples_dir, filename)]) print(f"Found {len(examples)} example files") else: print("Examples directory not found") demo = gr.Interface( fn=transcribe, inputs=gr.Audio(), outputs="text", title="

JASR 🎙️
Speech Recognition for Dialectal Arabic

", description="""

Developed with ❤ by Badr al-Absi

Marhaban 👋🏼

This is a demo for JASR, pronounced Jāsir [جاسِر], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic. The current running instance is optimized for the regional dialects of Jazirat al-Arab, or the Arabian Peninsula. JASR is still under active development.

Simply upload an audio file 📤 or record yourself speaking 🎙️⏺️ to try out the model!

""", examples=examples if examples else None, cache_examples=False, flagging_mode=None, ) if __name__ == "__main__": demo.launch()