File size: 2,107 Bytes
6e645b6
 
 
0eb093e
3521f10
5b9cbca
3521f10
6e645b6
0eb093e
 
c74678d
3521f10
0eb093e
 
5b9cbca
3521f10
 
 
 
5b9cbca
0eb093e
6e645b6
3521f10
 
5b9cbca
3521f10
19f3fad
5b9cbca
2ba44e2
18d5ab3
2ba44e2
6e645b6
3521f10
 
18d5ab3
 
3521f10
 
18d5ab3
0eb093e
3521f10
d974db8
3521f10
0eb093e
3521f10
0eb093e
5b9cbca
3521f10
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
import torchaudio
import os
import re
import streamlit as st
from difflib import SequenceMatcher
from transformers import pipeline

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Whisper model for transcription
MODEL_NAME = "alvanlii/whisper-small-cantonese"
language = "zh"
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=60,
    device=device
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")

# Load quality rating model
rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")

# Sentiment label mapping
label_map = {"Very Negative": "Very Poor", "Negative": "Poor", "Neutral": "Neutral", "Positive": "Good", "Very Positive": "Very Good"}

def remove_punctuation(text):
    return re.sub(r'[^\w\s]', '', text)

def transcribe_audio(audio_path):
    transcript = pipe(audio_path)["text"]
    return remove_punctuation(transcript)

def rate_quality(text):
    result = rating_pipe(text)[0]
    return label_map.get(result["label"], "Unknown")

# Streamlit UI
st.set_page_config(page_title="Cantonese Audio Transcription & Analysis", layout="centered")
st.title("🗣️ Customer Service Conversation Quality Analyzer")
st.markdown("Upload your Cantonese audio file, and we will transcribe and analyze its sentiment.")

uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
    with st.spinner("Processing audio..."):
        temp_audio_path = "temp_audio.wav"
        with open(temp_audio_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        transcript = transcribe_audio(temp_audio_path)
        sentiment = rate_quality(transcript)
        os.remove(temp_audio_path)
    
    st.subheader("Transcription")
    st.text_area("", transcript, height=150)
    
    st.subheader("Sentiment Analysis")
    st.markdown(f"### 🎭 Sentiment: **{sentiment}**")
    
    st.success("Processing complete! 🎉")