File size: 2,860 Bytes
8e73cee f9a373a 8e73cee 21b9cd3 f9a373a 8e73cee f9a373a 8e73cee e96836c 8e73cee 3acdbb1 8e73cee 11ad5cc e854c6e 11ad5cc 51c6475 11ad5cc e7bdca2 3b558b0 4b9672f 32bd56a e96836c e7bdca2 11ad5cc 89c776b f55b341 e96836c 2b0c602 11ad5cc 8e73cee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
from transformers import pipeline
import numpy as np
import os
from huggingface_hub import login
import spaces
HF_TOKEN = os.environ.get("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
MODEL_ID = "badrex/JASRv1.1"
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID)
@spaces.GPU
def transcribe(audio):
sr, y = audio
# convert to mono if stereo
if y.ndim > 1:
y = y.mean(axis=1)
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
examples = []
examples_dir = "examples"
if os.path.exists(examples_dir):
for filename in os.listdir(examples_dir):
if filename.endswith((".wav", ".mp3", ".ogg")):
examples.append([os.path.join(examples_dir, filename)])
print(f"Found {len(examples)} example files")
else:
print("Examples directory not found")
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(),
outputs="text",
title="<div>JASR v1.1 🎙️ <br>Speech Recognition for Dialectal Arabic</div>",
description="""
<div class="centered-content">
<div>
<p>
Developed with ❤ by <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a>
</p>
<br>
<p style="font-size: 15px; line-height: 1.8;">
Marhaban 👋🏼
<br>
<br>
This is a demo for JASR, pronounced <i>Jāsir</i> [جاسِر], a Transformer-based automatic speech recognition (ASR) system for dialectal Arabic.
The current running instance is optimized for the regional dialects of <i>Jazirat al-Arab</i>, or the Arabian Peninsula.
JASR is still under active development.
<br>
<p style="font-size: 15px; line-height: 1.8;">
Simply <strong>upload an audio file</strong> 📤 or <strong>record yourself speaking</strong> 🎙️⏺️ to try out the model!
</p>
</div>
</div>
""",
examples=examples if examples else None,
example_labels=[
"Kuwait Theatre",
"Saudi Radio Poetry",
"News Report (MSA)",
"San3ani Arabic male",
"San3ani Arabic female",
"Khaleeji Theatre",
"TEDx KSA",
"Yousif Saif Football Commentary",
"Khaleeji Theatre 2",
"TV Drama",
"KSA Theatre",
"TV Drama 2",
"Radio Jeddah (KSA)",
"Omani Theatre",
"Khaleeji Drama",
"Radio News",
"TEDx KSA 2",
"Radio Jeddah (KSA) 2",
],
cache_examples=False,
examples_per_page=18,
flagging_mode=None,
)
if __name__ == "__main__":
demo.launch() |