File size: 5,037 Bytes
0a86b5f 795e7cf 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f ce0d2f1 84a344d 0a86b5f ce0d2f1 0a86b5f ce0d2f1 0a86b5f 795e7cf 0a86b5f b4fa920 795e7cf 0a86b5f 795e7cf 0a86b5f ce5a771 ce0d2f1 0a86b5f 795e7cf ce0d2f1 795e7cf 0a86b5f 795e7cf 0a86b5f 795e7cf ce0d2f1 b5f9baf 795e7cf 0a86b5f 795e7cf ce0d2f1 6c203d8 0a86b5f b5f9baf ce0d2f1 795e7cf 0a86b5f ce0d2f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import streamlit as st
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torchaudio
import os
import re
from difflib import SequenceMatcher
import numpy as np
# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load Whisper model with adjusted parameters for better memory handling
MODEL_NAME = "alvanlii/whisper-small-cantonese"
language = "zh"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
generate_kwargs={
"no_repeat_ngram_size": 3,
"repetition_penalty": 1.3,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 50
}
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
# Similarity check to remove repeated phrases
def is_similar(a, b, threshold=0.8):
return SequenceMatcher(None, a, b).ratio() > threshold
def remove_repeated_phrases(text):
sentences = re.split(r'(?<=[γοΌοΌ])', text)
cleaned_sentences = []
for sentence in sentences:
if not cleaned_sentences or not is_similar(sentence.strip(), cleaned_sentences[-1].strip()):
cleaned_sentences.append(sentence.strip())
return " ".join(cleaned_sentences)
# Remove punctuation
def remove_punctuation(text):
return re.sub(r'[^\w\s]', '', text)
# Transcription function (adjusted for punctuation and repetition removal)
def transcribe_audio(audio_path):
waveform, sample_rate = torchaudio.load(audio_path)
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
waveform = waveform.squeeze(0).numpy()
duration = waveform.shape[0] / sample_rate
if duration > 60:
chunk_size = sample_rate * 55
step_size = sample_rate * 50
results = []
for start in range(0, waveform.shape[0], step_size):
chunk = waveform[start:start + chunk_size]
if chunk.shape[0] == 0:
break
transcript = pipe({"sampling_rate": sample_rate, "raw": chunk})["text"]
results.append(remove_punctuation(transcript))
return remove_punctuation(remove_repeated_phrases(" ".join(results)))
return remove_punctuation(remove_repeated_phrases(pipe({"sampling_rate": sample_rate, "raw": waveform})["text"]))
# Sentiment analysis model
sentiment_pipe = pipeline("text-classification", model="Leo0129/CustomModel-multilingual-sentiment-analysis", device=device)
# Rate sentiment with batch processing
def rate_quality(text):
chunks = [text[i:i+512] for i in range(0, len(text), 512)]
results = sentiment_pipe(chunks, batch_size=4)
label_map = {"Very Negative": "Very Poor", "Negative": "Poor", "Neutral": "Neutral", "Positive": "Good", "Very Positive": "Very Good"}
processed_results = [label_map.get(res["label"], "Unknown") for res in results]
return max(set(processed_results), key=processed_results.count)
# Streamlit main interface
def main():
st.set_page_config(page_title="Customer Service Quality Analyzer", page_icon="ποΈ")
# Custom CSS styling
st.markdown("""
<style>
.header {
background: linear-gradient(45deg, #FF9A6C, #FF6B6B);
border-radius: 15px;
padding: 2rem;
text-align: center;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
margin-bottom: 2rem;
}
</style>
""", unsafe_allow_html=True)
st.markdown("""
<div class="header">
<h1 style='margin:0;'>ποΈ Customer Service Quality Analyzer</h1>
<p style='color: white;'>Evaluate the service quality with simple uploading!</p>
</div>
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("π€ Please upload your Cantonese customer service audio file", type=["wav", "mp3", "flac"])
if uploaded_file is not None:
temp_audio_path = "uploaded_audio.wav"
with open(temp_audio_path, "wb") as f:
f.write(uploaded_file.getbuffer())
progress_bar = st.progress(0)
# Step 1: Audio transcription
with st.spinner('π Step 1: Transcribing audio, please wait...'):
transcript = transcribe_audio(temp_audio_path)
progress_bar.progress(50)
st.write("**Transcript:**", transcript)
# Step 2: Sentiment Analysis
with st.spinner('π§ββοΈ Step 2: Analyzing sentiment, please wait...'):
quality_rating = rate_quality(transcript)
progress_bar.progress(100)
st.write("**Sentiment Analysis Result:**", quality_rating)
result_text = f"Transcript:\n{transcript}\n\nSentiment Analysis Result: {quality_rating}"
st.download_button(label="π₯ Download Analysis Report", data=result_text, file_name="analysis_report.txt")
st.markdown("βIf you encounter any issues, please contact customer support: π§ **abc@hellotoby.com**")
os.remove(temp_audio_path)
if __name__ == "__main__":
main() |