Spaces:

fahadqazi
/

accent-classifier

Sleeping

File size: 4,256 Bytes

import gradio as gr
import torch
import tempfile
import os
import requests
from moviepy import VideoFileClip
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
import torchaudio

# Load Whisper model to confirm English
whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")

# Placeholder accent classifier (replace with real one or your own logic)
def classify_accent(audio_tensor, sample_rate):
    # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
    # We'll fake a classification here for demonstration
    return {
        "accent": "American",
        "confidence": 87.2,
        "summary": "The speaker uses rhotic pronunciation and North American intonation."
    }

def download_video(url):
    video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
    response = requests.get(url, stream=True)
    with open(video_path, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024*1024):
            if chunk:
                f.write(chunk)
    return video_path

def extract_audio(video_path):
    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
    clip = VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
    return audio_path

def transcribe(audio_path):
    result = whisper_pipe(audio_path)
    return result['text']

def analyze_accent(url_or_file):
    try:
        if url_or_file.startswith("http"):
            video_path = download_video(url_or_file)
        else:
            video_path = url_or_file

        print("Video path:", video_path)

        audio_path = extract_audio(video_path)


        print("Audio path:", audio_path)



        # Load audio with torchaudio
        waveform, sample_rate = torchaudio.load(audio_path)

        # Transcription (to verify English)
        transcript = transcribe(audio_path)
        if len(transcript.strip()) < 3:
            return "Could not understand speech. Please try another video."

        # Accent classification
        result = classify_accent(waveform, sample_rate)

        output = f"**Accent**: {result['accent']}\n\n"
        output += f"**Confidence**: {result['confidence']}%\n\n"
        output += f"**Explanation**: {result['summary']}\n\n"
        output += f"**Transcript** (first 200 chars): {transcript[:200]}..."

        # Clean up temp files
        if isinstance(url_or_file, str):
            os.remove(video_path)
        if isinstance(url_or_file, str):
            os.remove(video_path)
        if isinstance(url_or_file, str):
            os.remove(video_path)
        if isinstance(url_or_file, str):
            os.remove(video_path)
        os.remove(audio_path)

        return output
    except Exception as e:
        return f"❌ Error: {str(e)}"
    

# gr.Interface(
#     fn=analyze_accent,
#     inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
#     outputs=gr.Markdown(label="Accent Analysis Result"),
#     title="English Accent Classifier",
#     description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",

#     examples=[
#         ["https://example.com/sample.mp4"],  # example URL
#         [open("cleo-abram.mp4", "rb")]  # local file example
#     ],
#     live=True
# ).launch()



with gr.Blocks() as demo:
    gr.Markdown("# English Accent Classifier")

    with gr.Tab("From URL"):
        url_input = gr.Textbox(label="Video URL (MP4)")
        url_output = gr.Markdown()
        gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)

    with gr.Tab("From File"):
        file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
        file_output = gr.Markdown()
        gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)


        gr.Examples(
            examples=[
                [os.getcwd() + "/examples/cleo-abram.mp4"],
            ],
            inputs=file_input,
            outputs=file_output,
            fn=analyze_accent,
            label="Example MP4 Videos"
        )

demo.launch()