File size: 2,059 Bytes
6e645b6 0eb093e 3521f10 5b9cbca 3521f10 6e645b6 0eb093e c74678d 3521f10 0eb093e 5b9cbca 3521f10 5b9cbca 0eb093e 6e645b6 3521f10 5b9cbca 3521f10 5b9cbca 2ba44e2 18d5ab3 2ba44e2 6e645b6 3521f10 18d5ab3 3521f10 18d5ab3 0eb093e 3521f10 0eb093e 3521f10 0eb093e 5b9cbca 3521f10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import torch
import torchaudio
import os
import re
import streamlit as st
from difflib import SequenceMatcher
from transformers import pipeline
# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Whisper model for transcription
MODEL_NAME = "alvanlii/whisper-small-cantonese"
language = "zh"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=60,
device=device
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
# Load quality rating model
rating_pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
# Sentiment label mapping
label_map = {"Negative": "Very Poor", "Neutral": "Neutral", "Positive": "Very Good"}
def remove_punctuation(text):
return re.sub(r'[^\w\s]', '', text)
def transcribe_audio(audio_path):
transcript = pipe(audio_path)["text"]
return remove_punctuation(transcript)
def rate_quality(text):
result = rating_pipe(text)[0]
return label_map.get(result["label"], "Unknown")
# Streamlit UI
st.set_page_config(page_title="Cantonese Audio Transcription & Analysis", layout="centered")
st.title("🗣️ Cantonese Audio Transcriber & Sentiment Analyzer")
st.markdown("Upload your Cantonese audio file, and we will transcribe and analyze its sentiment.")
uploaded_file = st.file_uploader("Upload an audio file (WAV, MP3, etc.)", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
with st.spinner("Processing audio..."):
temp_audio_path = "temp_audio.wav"
with open(temp_audio_path, "wb") as f:
f.write(uploaded_file.getbuffer())
transcript = transcribe_audio(temp_audio_path)
sentiment = rate_quality(transcript)
os.remove(temp_audio_path)
st.subheader("Transcription")
st.text_area("", transcript, height=150)
st.subheader("Sentiment Analysis")
st.markdown(f"### 🎭 Sentiment: **{sentiment}**")
st.success("Processing complete! 🎉")
|