Spaces:
badrex
/
Running on Zero

File size: 2,276 Bytes
8e73cee
 
 
 
 
f9a373a
8e73cee
 
 
 
 
e96836c
f9a373a
8e73cee
f9a373a
8e73cee
 
e96836c
8e73cee
 
 
 
 
 
3acdbb1
 
 
 
 
 
 
 
 
 
 
 
 
8e73cee
11ad5cc
 
 
e7bdca2
11ad5cc
 
 
 
 
 
 
 
e7bdca2
3b558b0
4b9672f
e412337
e96836c
e7bdca2
11ad5cc
 
 
 
 
 
 
 
e96836c
11ad5cc
8e73cee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
from transformers import pipeline
import numpy as np
import os
from huggingface_hub import login
import spaces

HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)

MODEL_ID = "badrex/JASR"
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)

@spaces.GPU
def transcribe(audio):
    sr, y = audio
    # convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    return transcriber({"sampling_rate": sr, "raw": y})["text"]


examples = []
examples_dir = "examples"
if os.path.exists(examples_dir):
    for filename in os.listdir(examples_dir):
        if filename.endswith((".wav", ".mp3", ".ogg")):
            examples.append([os.path.join(examples_dir, filename)])
    
    print(f"Found {len(examples)} example files")
else:
    print("Examples directory not found")

    
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(),
    outputs="text",
    title="JASR 🎙️ Dialectal Arabic Speech Recognition",
    description="""
        <div class="centered-content">
            <div>
                <p>
                By <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a> with ❤️🤍💚 
                </p>
                <br>
                <p style="font-size: 15px; line-height: 1.8;">
                Marhaban 👋🏼
                <br>
                <br>
                 This is a demo for JASR, pronounced <i>Jasir</i> [ جاسِر ], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic.
                 The current running instance is optimized for the regional dialects of <i>Jazirat al-Arab</i>, or the Arabian Peninsula. 
                 JASR is still under active development.
                <br>                   
                <p style="font-size: 15px; line-height: 1.8;">
                Simply <strong>upload an audio file</strong> 📤 or <strong>record yourself speaking</strong> 🎙️⏺️ to try out the model!
                </p>
            </div>
        </div>
        """,
    examples=examples if examples else None,
    cache_examples=False,  
    flagging_mode=None,
)

if __name__ == "__main__":
    demo.launch()