import streamlit as st import torch import torchaudio import os from transformers import pipeline # Device setup device = "cuda" if torch.cuda.is_available() else "cpu" # Load Whisper model for Cantonese transcription MODEL_NAME = "alvanlii/whisper-small-cantonese" language = "zh" transcriber = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=60, device=device ) transcriber.model.config.forced_decoder_ids = transcriber.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe") # Load Summarization model summarizer = pipeline("summarization", model="Ayaka/bart-base-cantonese") # Load quality rating model rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis") # Streamlit UI setup st.set_page_config(page_title="Cantonese Audio Analysis", layout="centered") st.title("🌟 Cantonese Audio Analysis") st.write("Upload a Cantonese audio file to transcribe, summarize, and evaluate its quality.") # File uploader audio_file = st.file_uploader("Upload your audio file (WAV format)", type=["wav"]) def transcribe_audio(audio_path): return transcriber(audio_path)["text"] def summarize_text(text): return summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] def rate_quality(text): result = rating_pipe(text[:512])[0] label_map = {"Very Negative": "Very Poor", "Negative": "Poor", "Neutral": "Neutral", "Positive": "Good", "Very Positive": "Very Good"} return label_map.get(result["label"], "Unknown") if audio_file: st.audio(audio_file, format="audio/wav") temp_audio_path = "temp_audio.wav" with open(temp_audio_path, "wb") as f: f.write(audio_file.read()) with st.spinner("Transcribing audio..."): transcript = transcribe_audio(temp_audio_path) st.subheader("📝 Transcript") st.write(transcript) with st.spinner("Summarizing transcript..."): summary = summarize_text(transcript) st.subheader("📖 Summary") st.write(summary) with st.spinner("Evaluating conversation quality..."): quality_rating = rate_quality(summary) st.subheader("🏆 Quality Rating") st.write(f"**{quality_rating}**") os.remove(temp_audio_path)