File size: 2,819 Bytes
07a50af
 
23545c8
07a50af
 
23545c8
 
87966ec
 
 
23545c8
07a50af
 
 
 
 
 
 
 
b04a244
87966ec
b04a244
87966ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23545c8
87966ec
 
23545c8
 
 
 
87966ec
 
 
23545c8
 
 
07a50af
 
 
23545c8
07a50af
3f395fc
 
 
9a76962
3f395fc
9a76962
3f395fc
d8edf3c
3f395fc
d8edf3c
3f395fc
d8edf3c
3f395fc
d8edf3c
3f395fc
23545c8
87966ec
 
07a50af
 
 
b04a244
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
from transformers import pipeline
import os

# Load the model
print("Loading model...")
model_id = "badrex/mms-300m-arabic-dialect-identifier"
classifier = pipeline("audio-classification", model=model_id)
print("Model loaded successfully")

# Define dialect mapping
dialect_mapping = {
    "MSA": "Modern Standard Arabic",
    "Egyptian": "Egyptian Arabic",
    "Gulf": "Gulf Arabic",
    "Levantine": "Levantine Arabic",
    "Maghrebi": "Maghrebi Arabic"
}

def predict_dialect(audio):
    if audio is None:
        return {"Error": 1.0}
    
    # The audio input from Gradio is a tuple of (sample_rate, audio_array)
    sr, audio_array = audio
    
    # Process the audio input
    if len(audio_array.shape) > 1:
        audio_array = audio_array.mean(axis=1)  # Convert stereo to mono
    
    print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
    
    # Classify the dialect
    predictions = classifier({"sampling_rate": sr, "raw": audio_array})
    
    # Format results for display
    results = {}
    for pred in predictions:
        dialect_name = dialect_mapping.get(pred['label'], pred['label'])
        results[dialect_name] = float(pred['score'])
    
    return results

# Manually prepare example file paths without metadata
examples = []
examples_dir = "examples"
if os.path.exists(examples_dir):
    for filename in os.listdir(examples_dir):
        if filename.endswith((".wav", ".mp3", ".ogg")):
            examples.append([os.path.join(examples_dir, filename)])
    
    print(f"Found {len(examples)} example files")
else:
    print("Examples directory not found")

# Create the Gradio interface
demo = gr.Interface(
    fn=predict_dialect,
    inputs=gr.Audio(),
    outputs=gr.Label(num_top_classes=5, label="Predicted Dialect"),
    title="🎙️ **Arabic Dialect Identification in Speech!**",
    description="""
        Use this AI-powered tool to identify five major Arabic varieties from just a short audio clip:

        ✦ Modern Standard Arabic (MSA) - The formal language of media and education

        ✦ Egyptian Arabic - The dialect of Cairo, Alexandria, and popular Arabic cinema 

        ✦ Gulf Arabic - Spoken across Saudi Arabia, UAE, Kuwait, Qatar, Bahrain, and Oman

        ✦ Levantine Arabic - The dialect of Syria, Lebanon, Jordan, and Palestine

        ✦ Maghrebi Arabic - The distinctive varieties of Morocco, Algeria, Tunisia, and Libya

        Simply **upload an audio file** or **record yourself speaking** to see which dialect you match! Perfect for language learners, linguistics enthusiasts, or anyone curious about Arabic language variation.""",
    examples=examples if examples else None,
    cache_examples=False,  # Disable caching to avoid issues
    flagging_mode=None
)

# Launch the app
demo.launch()