JASRv1.1

Running on Zero

File size: 2,276 Bytes

8e73cee
 
 
 
 
f9a373a
8e73cee
 
 
 
 
e96836c
f9a373a
8e73cee
f9a373a
8e73cee
 
e96836c
8e73cee
 
 
 
 
 
3acdbb1
 
 
 
 
 
 
 
 
 
 
 
 
8e73cee
11ad5cc
 
 
e7bdca2
11ad5cc
 
 
 
 
 
 
 
e7bdca2
3b558b0
4b9672f
e412337
e96836c
e7bdca2
11ad5cc
 
 
 
 
 
 
 
e96836c
11ad5cc
8e73cee

import gradio as gr
from transformers import pipeline
import numpy as np
import os
from huggingface_hub import login
import spaces

HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)

MODEL_ID = "badrex/JASR"
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)

@spaces.GPU
def transcribe(audio):
    sr, y = audio
    # convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    return transcriber({"sampling_rate": sr, "raw": y})["text"]


examples = []
examples_dir = "examples"
if os.path.exists(examples_dir):
    for filename in os.listdir(examples_dir):
        if filename.endswith((".wav", ".mp3", ".ogg")):
            examples.append([os.path.join(examples_dir, filename)])
    
    print(f"Found {len(examples)} example files")
else:
    print("Examples directory not found")

    
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(),
    outputs="text",
    title="JASR 🎙️ Dialectal Arabic Speech Recognition",
    description="""
        <div class="centered-content">
            <div>
                <p>
                By <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a> with ❤️🤍💚 
                </p>
                <br>
                <p style="font-size: 15px; line-height: 1.8;">
                Marhaban 👋🏼
                <br>
                <br>
                 This is a demo for JASR, pronounced <i>Jasir</i> [ جاسِر ], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic.
                 The current running instance is optimized for the regional dialects of <i>Jazirat al-Arab</i>, or the Arabian Peninsula. 
                 JASR is still under active development.
                <br>                   
                <p style="font-size: 15px; line-height: 1.8;">
                Simply <strong>upload an audio file</strong> 📤 or <strong>record yourself speaking</strong> 🎙️⏺️ to try out the model!
                </p>
            </div>
        </div>
        """,
    examples=examples if examples else None,
    cache_examples=False,  
    flagging_mode=None,
)

if __name__ == "__main__":
    demo.launch()