import gradio as gr import os import tempfile import requests from moviepy.editor import VideoFileClip import random import json # --- Lightweight AccentAnalyzer class --- class AccentAnalyzer: def __init__(self): self.accent_profiles = { "American": { "features": ["rhotic", "flapped_t", "cot_caught_merger"], "description": "American English accent with rhotic pronunciation and typical North American features." }, "British": { "features": ["non_rhotic", "t_glottalization", "trap_bath_split"], "description": "British English accent with non-rhotic pronunciation and typical UK features." }, "Australian": { "features": ["non_rhotic", "flat_a", "high_rising_terminal"], "description": "Australian English accent with distinctive vowel sounds and intonation patterns." }, "Canadian": { "features": ["rhotic", "canadian_raising", "eh_tag"], "description": "Canadian English accent with features of both American and British English." }, "Indian": { "features": ["retroflex_consonants", "monophthongization", "syllable_timing"], "description": "Indian English accent influenced by native Indian languages." }, "Irish": { "features": ["dental_fricatives", "alveolar_l", "soft_consonants"], "description": "Irish English accent with distinctive rhythm and consonant patterns." }, "Scottish": { "features": ["rolled_r", "monophthongs", "glottal_stops"], "description": "Scottish English accent with strong consonants and distinctive vowel patterns." }, "South African": { "features": ["non_rhotic", "kit_split", "kw_hw_distinction"], "description": "South African English accent with influences from Afrikaans and other local languages." } } self._load_or_create_accent_data() def _load_or_create_accent_data(self): # For demo: just create simulated data in-memory self.accent_data = self._create_simulated_accent_data() def _create_simulated_accent_data(self): accent_data = {} for accent, profile in self.accent_profiles.items(): accent_data[accent] = { "primary_features": profile["features"], "feature_probabilities": {} } for feature in profile["features"]: accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9) all_features = set() for a, p in self.accent_profiles.items(): all_features.update(p["features"]) for feature in all_features: if feature not in profile["features"]: accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4) return accent_data def _extract_features(self, audio_path): # This is a simulated feature extraction for the demo. # In a real application, this would use SpeechBrain or similar ML models # to extract actual phonetic features from the audio. all_features = set() for accent, profile in self.accent_profiles.items(): all_features.update(profile["features"]) detected_features = {} for feature in all_features: # Simulate detection of features with varying probabilities detected_features[feature] = random.uniform(0.1, 0.9) return detected_features def _calculate_accent_scores(self, detected_features): accent_scores = {} for accent, data in self.accent_data.items(): score = 0 total_weight = 0 for feature, probability in detected_features.items(): expected_prob = data["feature_probabilities"].get(feature, 0.1) weight = 3.0 if feature in data["primary_features"] else 1.0 # Give more weight to primary features feature_score = probability * expected_prob * weight score += feature_score total_weight += weight if total_weight > 0: accent_scores[accent] = (score / total_weight) * 100 else: accent_scores[accent] = 0 return accent_scores def _generate_explanation(self, accent_type, confidence): if confidence >= 70: confidence_level = "high confidence" certainty = "is very clear" elif confidence >= 50: confidence_level = "moderate confidence" certainty = "is present" else: confidence_level = "low confidence" certainty = "may be present" description = self.accent_profiles[accent_type]["description"] second_accent = self._get_second_most_likely_accent(accent_type) explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present." return explanation def _get_second_most_likely_accent(self, primary_accent): # Simple rule-based selection for demo purposes accent_similarities = { "American": ["Canadian", "British"], "British": ["Australian", "Irish"], "Australian": ["British", "South African"], "Canadian": ["American", "British"], "Indian": ["British", "South African"], "Irish": ["Scottish", "British"], "Scottish": ["Irish", "British"], "South African": ["Australian", "British"] } # Pick a random similar accent from the predefined list return random.choice(accent_similarities[primary_accent]) def analyze_accent(self, audio_path): """ Analyzes the accent from an audio file. In this demo, it simulates feature extraction and accent scoring. """ detected_features = self._extract_features(audio_path) accent_scores = self._calculate_accent_scores(detected_features) # Find the accent with the highest score accent_type = max(accent_scores, key=accent_scores.get) confidence = accent_scores[accent_type] explanation = self._generate_explanation(accent_type, confidence) return { "accent_type": accent_type, "confidence": confidence, "explanation": explanation, "all_scores": accent_scores # Useful for debugging or more detailed display } # --- Utility: Download video and extract audio --- def download_and_extract_audio(url): """ Downloads a video from a URL and extracts its audio to a WAV file. Handles both direct MP4 links and YouTube URLs (using pytubefix). """ temp_dir = tempfile.mkdtemp() video_path = os.path.join(temp_dir, "video.mp4") audio_path = os.path.join(temp_dir, "audio.wav") try: # Download video # Check for YouTube URL patterns (simplified for demo) if "youtube.com/" in url or "youtu.be/" in url: try: from pytubefix import YouTube yt = YouTube(url) # Try to get a progressive stream (video + audio) stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() if not stream: # Fallback to separate audio stream if progressive not found stream = yt.streams.filter(only_audio=True).first() if not stream: raise RuntimeError("No suitable video or audio stream found for YouTube URL.") # Download the stream stream.download(output_path=temp_dir, filename="video.mp4") except ImportError: raise ImportError("pytubefix is not installed. Please install it with 'pip install pytubefix'.") except Exception as e: # Catch specific YouTube errors, e.g., age restriction, unavailable raise RuntimeError(f"Error downloading YouTube video: {e}. Try running locally or use a direct MP4 link.") else: # Direct MP4 download response = requests.get(url, stream=True) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) with open(video_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) # Extract audio using moviepy clip = VideoFileClip(video_path) clip.audio.write_audiofile(audio_path, logger=None) # logger=None suppresses moviepy output clip.close() return audio_path finally: # Clean up the video file immediately after audio extraction if os.path.exists(video_path): os.remove(video_path) # The temp_dir itself will be handled by Gradio's internal tempfile management, # or you can add os.rmdir(temp_dir) if you manage temp_dir manually. # --- Gradio interface --- def analyze_from_url(url): """ Gradio interface function to analyze accent from a given video URL. """ if not url: return "Please enter a video URL.", "N/A", "No URL provided." try: audio_path = download_and_extract_audio(url) analyzer = AccentAnalyzer() results = analyzer.analyze_accent(audio_path) # Clean up the temporary audio file after analysis if os.path.exists(audio_path): os.remove(audio_path) return ( results["accent_type"], f"{results['confidence']:.1f}%", results["explanation"] ) except Exception as e: # Catch and display any errors during the process return ( "Error", "0%", f"Error processing video/audio: {e}. Please ensure the URL is valid and publicly accessible." ) # Create the Gradio interface iface = gr.Interface( fn=analyze_from_url, inputs=gr.Textbox( label="Enter Public Video URL (YouTube or direct MP4)", placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or https://samplelib.com/lib/preview/mp4/sample-5s.mp4" ), outputs=[ gr.Textbox(label="Detected Accent"), gr.Textbox(label="Confidence Score"), gr.Textbox(label="Explanation") ], title="English Accent Analyzer (Rule-Based Demo)", description=""" Paste a public video URL (YouTube or direct MP4) to detect the English accent and confidence score. **Important Notes:** * This is a **DEMO** using a simulated accent analysis model, not a real machine learning model. * It uses `pytubefix` for YouTube links and `requests`/`moviepy` for direct MP4s. * YouTube video extraction can sometimes be temperamental due to YouTube's changing policies or region restrictions. Direct MP4 links are generally more reliable. * **Sample MP4 URL for testing:** `https://samplelib.com/lib/preview/mp4/sample-5s.mp4` """ ) # Launch the Gradio interface # `share=False` for local deployment (no public link generated) # For Hugging Face Spaces, you typically don't need `iface.launch()` as the platform handles it. # However, if you're running it locally to test before deployment, keep this block. if __name__ == "__main__": iface.launch(debug=True, share=False)