File size: 2,059 Bytes
6e645b6
 
 
0eb093e
3521f10
5b9cbca
3521f10
6e645b6
0eb093e
 
c74678d
3521f10
0eb093e
 
5b9cbca
3521f10
 
 
 
5b9cbca
0eb093e
6e645b6
3521f10
 
5b9cbca
3521f10
 
5b9cbca
2ba44e2
18d5ab3
2ba44e2
6e645b6
3521f10
 
18d5ab3
 
3521f10
 
18d5ab3
0eb093e
3521f10
 
 
0eb093e
3521f10
0eb093e
5b9cbca
3521f10
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
import torchaudio
import os
import re
import streamlit as st
from difflib import SequenceMatcher
from transformers import pipeline

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Whisper model for transcription
MODEL_NAME = "alvanlii/whisper-small-cantonese"
language = "zh"
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=60,
    device=device
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")

# Load quality rating model
rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")

# Sentiment label mapping
label_map = {"Negative": "Very Poor", "Neutral": "Neutral", "Positive": "Very Good"}

def remove_punctuation(text):
    return re.sub(r'[^\w\s]', '', text)

def transcribe_audio(audio_path):
    transcript = pipe(audio_path)["text"]
    return remove_punctuation(transcript)

def rate_quality(text):
    result = rating_pipe(text)[0]
    return label_map.get(result["label"], "Unknown")

# Streamlit UI
st.set_page_config(page_title="Cantonese Audio Transcription & Analysis", layout="centered")
st.title("🗣️ Cantonese Audio Transcriber & Sentiment Analyzer")
st.markdown("Upload your Cantonese audio file, and we will transcribe and analyze its sentiment.")

uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
    with st.spinner("Processing audio..."):
        temp_audio_path = "temp_audio.wav"
        with open(temp_audio_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        transcript = transcribe_audio(temp_audio_path)
        sentiment = rate_quality(transcript)
        os.remove(temp_audio_path)
    
    st.subheader("Transcription")
    st.text_area("", transcript, height=150)
    
    st.subheader("Sentiment Analysis")
    st.markdown(f"### 🎭 Sentiment: **{sentiment}**")
    
    st.success("Processing complete! 🎉")