Spaces:

xujinheng666
/

CS_Quality_Analysis_FinalProject

Running

xujinheng666 commited on Mar 21

Commit

18d5ab3

verified ·

1 Parent(s): 2ba44e2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def remove_repeated_phrases(text):
     return " ".join(cleaned_sentences)
 def remove_punctuation(text):
-    return re.sub(r'[^\w\s]', '', text)  # Remove all non-word and non-space characters
 def transcribe_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
@@ -78,9 +78,21 @@ def translate(text):
         translations.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
     return " ".join(translations)
 # Streamlit UI
 st.set_page_config(page_title="Cantonese Speech Processing", layout="wide")
-st.title("🎤 Cantonese Audio Transcription & Translation")
 st.write("Upload an audio file to transcribe, translate, and analyze quality.")
 uploaded_file = st.file_uploader("Upload your audio file (WAV format)", type=["wav"])
@@ -98,4 +110,8 @@ if uploaded_file is not None:
         st.subheader("🌍 Translation")
         st.text_area("Translated Text", translated_text, height=150)
         st.success("Processing complete!")

     return " ".join(cleaned_sentences)
 def remove_punctuation(text):
+    return re.sub(r'[^\w\s]', '', text)
 def transcribe_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
         translations.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
     return " ".join(translations)
+# Load quality rating model
+rating_pipe = pipeline("text-classification", model="Leo0129/CustomModel_dianping-chinese")
+def rate_quality(text):
+    chunks = [text[i:i+512] for i in range(0, len(text), 512)]
+    results = []
+    for chunk in chunks:
+        result = rating_pipe(chunk)[0]
+        label_map = {"LABEL_0": "Poor", "LABEL_1": "Neutral", "LABEL_2": "Good"}
+        results.append(label_map.get(result["label"], "Unknown"))
+    return max(set(results), key=results.count)
 # Streamlit UI
 st.set_page_config(page_title="Cantonese Speech Processing", layout="wide")
+st.title("🎤 Cantonese Audio Transcription, Translation & Quality Rating")
 st.write("Upload an audio file to transcribe, translate, and analyze quality.")
 uploaded_file = st.file_uploader("Upload your audio file (WAV format)", type=["wav"])
         st.subheader("🌍 Translation")
         st.text_area("Translated Text", translated_text, height=150)
+        quality_rating = rate_quality(translated_text)
+        st.subheader("⭐ Quality Rating")
+        st.write(f"**Rating:** {quality_rating}")
         st.success("Processing complete!")