Spaces:
badrex
/
Running on Zero

File size: 2,860 Bytes
8e73cee
 
 
 
 
f9a373a
8e73cee
 
 
 
 
21b9cd3
f9a373a
8e73cee
f9a373a
8e73cee
 
e96836c
8e73cee
 
 
 
 
 
3acdbb1
 
 
 
 
 
 
 
 
 
 
 
 
8e73cee
11ad5cc
 
 
e854c6e
11ad5cc
 
 
 
51c6475
11ad5cc
 
 
e7bdca2
3b558b0
4b9672f
32bd56a
e96836c
e7bdca2
11ad5cc
 
 
 
 
 
 
 
89c776b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f55b341
e96836c
2b0c602
11ad5cc
8e73cee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
from transformers import pipeline
import numpy as np
import os
from huggingface_hub import login
import spaces

HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
    login(token=HF_TOKEN)

MODEL_ID = "badrex/JASRv1.1"
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)

@spaces.GPU
def transcribe(audio):
    sr, y = audio
    # convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    return transcriber({"sampling_rate": sr, "raw": y})["text"]


examples = []
examples_dir = "examples"
if os.path.exists(examples_dir):
    for filename in os.listdir(examples_dir):
        if filename.endswith((".wav", ".mp3", ".ogg")):
            examples.append([os.path.join(examples_dir, filename)])
    
    print(f"Found {len(examples)} example files")
else:
    print("Examples directory not found")

    
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(),
    outputs="text",
    title="<div>JASR v1.1 🎙️ <br>Speech Recognition for Dialectal Arabic</div>",
    description="""
        <div class="centered-content">
            <div>
                <p>
                Developed with ❤ by <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a>  
                </p>
                <br>
                <p style="font-size: 15px; line-height: 1.8;">
                Marhaban 👋🏼
                <br>
                <br>
                 This is a demo for JASR, pronounced <i>Jāsir</i> [جاسِر], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic.
                 The current running instance is optimized for the regional dialects of <i>Jazirat al-Arab</i>, or the Arabian Peninsula. 
                 JASR is still under active development.
                <br>                   
                <p style="font-size: 15px; line-height: 1.8;">
                Simply <strong>upload an audio file</strong> 📤 or <strong>record yourself speaking</strong> 🎙️⏺️ to try out the model!
                </p>
            </div>
        </div>
        """,
    examples=examples if examples else None,
    example_labels=[
        "Kuwait Theatre",
        "Saudi Radio Poetry", 
        "News Report (MSA)", 
        "San3ani Arabic male", 
        "San3ani Arabic female", 
        "Khaleeji Theatre",
        "TEDx KSA",
        "Yousif Saif Football Commentary", 
        "Khaleeji Theatre 2",
        "TV Drama", 
        "KSA Theatre", 
        "TV Drama 2",
        "Radio Jeddah (KSA)", 
        "Omani Theatre", 
        "Khaleeji Drama", 
        "Radio News", 
        "TEDx KSA 2",
        "Radio Jeddah (KSA) 2",
    ],
    cache_examples=False,  
    examples_per_page=18,
    flagging_mode=None,
)

if __name__ == "__main__":
    demo.launch()