File size: 2,107 Bytes
6e645b6 0eb093e 3521f10 5b9cbca 3521f10 6e645b6 0eb093e c74678d 3521f10 0eb093e 5b9cbca 3521f10 5b9cbca 0eb093e 6e645b6 3521f10 5b9cbca 3521f10 19f3fad 5b9cbca 2ba44e2 18d5ab3 2ba44e2 6e645b6 3521f10 18d5ab3 3521f10 18d5ab3 0eb093e 3521f10 d974db8 3521f10 0eb093e 3521f10 0eb093e 5b9cbca 3521f10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import torch
import torchaudio
import os
import re
import streamlit as st
from difflib import SequenceMatcher
from transformers import pipeline
# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Whisper model for transcription
MODEL_NAME = "alvanlii/whisper-small-cantonese"
language = "zh"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=60,
device=device
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
# Load quality rating model
rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
# Sentiment label mapping
label_map = {"Very Negative": "Very Poor", "Negative": "Poor", "Neutral": "Neutral", "Positive": "Good", "Very Positive": "Very Good"}
def remove_punctuation(text):
return re.sub(r'[^\w\s]', '', text)
def transcribe_audio(audio_path):
transcript = pipe(audio_path)["text"]
return remove_punctuation(transcript)
def rate_quality(text):
result = rating_pipe(text)[0]
return label_map.get(result["label"], "Unknown")
# Streamlit UI
st.set_page_config(page_title="Cantonese Audio Transcription & Analysis", layout="centered")
st.title("🗣️ Customer Service Conversation Quality Analyzer")
st.markdown("Upload your Cantonese audio file, and we will transcribe and analyze its sentiment.")
uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
with st.spinner("Processing audio..."):
temp_audio_path = "temp_audio.wav"
with open(temp_audio_path, "wb") as f:
f.write(uploaded_file.getbuffer())
transcript = transcribe_audio(temp_audio_path)
sentiment = rate_quality(transcript)
os.remove(temp_audio_path)
st.subheader("Transcription")
st.text_area("", transcript, height=150)
st.subheader("Sentiment Analysis")
st.markdown(f"### 🎭 Sentiment: **{sentiment}**")
st.success("Processing complete! 🎉")
|