|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
import os |
|
from huggingface_hub import login |
|
import spaces |
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
if HF_TOKEN: |
|
login(token=HF_TOKEN) |
|
|
|
MODEL_ID = "badrex/JASR" |
|
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID) |
|
|
|
@spaces.GPU |
|
def transcribe(audio): |
|
sr, y = audio |
|
|
|
if y.ndim > 1: |
|
y = y.mean(axis=1) |
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
|
|
examples = [] |
|
examples_dir = "examples" |
|
if os.path.exists(examples_dir): |
|
for filename in os.listdir(examples_dir): |
|
if filename.endswith((".wav", ".mp3", ".ogg")): |
|
examples.append([os.path.join(examples_dir, filename)]) |
|
|
|
print(f"Found {len(examples)} example files") |
|
else: |
|
print("Examples directory not found") |
|
|
|
|
|
demo = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(), |
|
outputs="text", |
|
title="JASR π Dialectal Arabic Speech Recognition", |
|
description=""" |
|
<div class="centered-content"> |
|
<div> |
|
<p> |
|
By <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a> with β€οΈπ€π |
|
</p> |
|
<br> |
|
<p style="font-size: 15px; line-height: 1.8;"> |
|
Marhaba ππΌ |
|
<br> |
|
<br> |
|
This is a demo for JASR, pronounced <i>Jasir</i>, a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic. |
|
The current running instance is optimized for the regional dialects of <i>Jazirat al-Arab</i>, or the Arabian Peninsula. |
|
<br> |
|
<p style="font-size: 15px; line-height: 1.8;"> |
|
Simply <strong>upload an audio file</strong> π€ or <strong>record yourself speaking</strong> ποΈβΊοΈ to try out the model! |
|
</p> |
|
</div> |
|
</div> |
|
""", |
|
examples=examples if examples else None, |
|
cache_examples=False, |
|
flagging_mode=None, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |