import torch import torchaudio import os import re import streamlit as st from difflib import SequenceMatcher from transformers import pipeline # Device setup device = "cuda" if torch.cuda.is_available() else "cpu" # Load Whisper model for transcription MODEL_NAME = "alvanlii/whisper-small-cantonese" language = "zh" pipe = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=60, device=device ) pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe") # Load quality rating model rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis") # Sentiment label mapping label_map = {"Negative": "Very Poor", "Neutral": "Neutral", "Positive": "Very Good"} def remove_punctuation(text): return re.sub(r'[^\w\s]', '', text) def transcribe_audio(audio_path): transcript = pipe(audio_path)["text"] return remove_punctuation(transcript) def rate_quality(text): result = rating_pipe(text)[0] return label_map.get(result["label"], "Unknown") # Streamlit UI st.set_page_config(page_title="Cantonese Audio Transcription & Analysis", layout="centered") st.title("🗣️ Cantonese Audio Transcriber & Sentiment Analyzer") st.markdown("Upload your Cantonese audio file, and we will transcribe and analyze its sentiment.") uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a"]) if uploaded_file is not None: with st.spinner("Processing audio..."): temp_audio_path = "temp_audio.wav" with open(temp_audio_path, "wb") as f: f.write(uploaded_file.getbuffer()) transcript = transcribe_audio(temp_audio_path) sentiment = rate_quality(transcript) os.remove(temp_audio_path) st.subheader("Transcription") st.text_area("", transcript, height=150) st.subheader("Sentiment Analysis") st.markdown(f"### 🎭 Sentiment: **{sentiment}**") st.success("Processing complete! 🎉")