import gradio as gr from transformers import pipeline import os import numpy as np import torch # Load the model print("Loading model...") model_id = "badrex/mms-300m-arabic-dialect-identifier" classifier = pipeline("audio-classification", model=model_id) print("Model loaded successfully") # Define dialect mapping dialect_mapping = { "MSA": "Modern Standard Arabic", "Egyptian": "Egyptian Arabic", "Gulf": "Gulf Arabic", "Levantine": "Levantine Arabic", "Maghrebi": "Maghrebi Arabic" } def predict_dialect(audio): if audio is None: return {"Error": 1.0} # The audio input from Gradio is a tuple of (sample_rate, audio_array) sr, audio_array = audio # Process the audio input if len(audio_array.shape) > 1: audio_array = audio_array.mean(axis=1) # Convert stereo to mono # Convert audio to float32 if it's not already (fix for Chrome recording issue) if audio_array.dtype != np.float32: # Normalize to [-1, 1] range as expected by the model if audio_array.dtype == np.int16: audio_array = audio_array.astype(np.float32) / 32768.0 else: audio_array = audio_array.astype(np.float32) print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}") # Classify the dialect predictions = classifier({"sampling_rate": sr, "raw": audio_array}) # Format results for display results = {} for pred in predictions: dialect_name = dialect_mapping.get(pred['label'], pred['label']) results[dialect_name] = float(pred['score']) return results # Manually prepare example file paths without metadata examples = [] examples_dir = "examples" if os.path.exists(examples_dir): for filename in os.listdir(examples_dir): if filename.endswith((".wav", ".mp3", ".ogg")): examples.append([os.path.join(examples_dir, filename)]) print(f"Found {len(examples)} example files") else: print("Examples directory not found") # Create the Gradio interface demo = gr.Interface( fn=predict_dialect, inputs=gr.Audio(), outputs=gr.Label(num_top_classes=5, label="Predicted Dialect"), title="🎙️ Arabic Dialect Identification in Speech!", description="""
Use this AI speech model to identify five major Arabic varieties from just a short audio clip.
The following Arabic language varieties are supported:
✦ Modern Standard Arabic (MSA) - The formal language of media and education
✦ Egyptian Arabic - The dialect of Cairo, Alexandria, and popular Arabic cinema
✦ Gulf Arabic - Spoken across Saudi Arabia, UAE, Kuwait, Qatar, Bahrain, and Oman
✦ Levantine Arabic - The dialect of Syria, Lebanon, Jordan, and Palestine
✦ Maghrebi Arabic - The distinctive varieties of Morocco, Algeria, Tunisia, and Libya
Simply upload an audio file or record yourself speaking to see which dialect you match!
Perfect for language learners, linguistics enthusiasts, or anyone curious about Arabic language variation.
The demo is based on a Transformer model adapted for the ADI task badrex/mms-300m-arabic-dialect-identifier.
Developed with ❤️🤍💚 by Badr Alabsi