xujinheng666 commited on
Commit
18d5ab3
Β·
verified Β·
1 Parent(s): 2ba44e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -2
app.py CHANGED
@@ -42,7 +42,7 @@ def remove_repeated_phrases(text):
42
  return " ".join(cleaned_sentences)
43
 
44
  def remove_punctuation(text):
45
- return re.sub(r'[^\w\s]', '', text) # Remove all non-word and non-space characters
46
 
47
  def transcribe_audio(audio_path):
48
  waveform, sample_rate = torchaudio.load(audio_path)
@@ -78,9 +78,21 @@ def translate(text):
78
  translations.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
79
  return " ".join(translations)
80
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  # Streamlit UI
82
  st.set_page_config(page_title="Cantonese Speech Processing", layout="wide")
83
- st.title("🎀 Cantonese Audio Transcription & Translation")
84
  st.write("Upload an audio file to transcribe, translate, and analyze quality.")
85
 
86
  uploaded_file = st.file_uploader("Upload your audio file (WAV format)", type=["wav"])
@@ -98,4 +110,8 @@ if uploaded_file is not None:
98
  st.subheader("🌍 Translation")
99
  st.text_area("Translated Text", translated_text, height=150)
100
 
 
 
 
 
101
  st.success("Processing complete!")
 
42
  return " ".join(cleaned_sentences)
43
 
44
  def remove_punctuation(text):
45
+ return re.sub(r'[^\w\s]', '', text)
46
 
47
  def transcribe_audio(audio_path):
48
  waveform, sample_rate = torchaudio.load(audio_path)
 
78
  translations.append(tokenizer.decode(outputs[0], skip_special_tokens=True))
79
  return " ".join(translations)
80
 
81
+ # Load quality rating model
82
+ rating_pipe = pipeline("text-classification", model="Leo0129/CustomModel_dianping-chinese")
83
+
84
+ def rate_quality(text):
85
+ chunks = [text[i:i+512] for i in range(0, len(text), 512)]
86
+ results = []
87
+ for chunk in chunks:
88
+ result = rating_pipe(chunk)[0]
89
+ label_map = {"LABEL_0": "Poor", "LABEL_1": "Neutral", "LABEL_2": "Good"}
90
+ results.append(label_map.get(result["label"], "Unknown"))
91
+ return max(set(results), key=results.count)
92
+
93
  # Streamlit UI
94
  st.set_page_config(page_title="Cantonese Speech Processing", layout="wide")
95
+ st.title("🎀 Cantonese Audio Transcription, Translation & Quality Rating")
96
  st.write("Upload an audio file to transcribe, translate, and analyze quality.")
97
 
98
  uploaded_file = st.file_uploader("Upload your audio file (WAV format)", type=["wav"])
 
110
  st.subheader("🌍 Translation")
111
  st.text_area("Translated Text", translated_text, height=150)
112
 
113
+ quality_rating = rate_quality(translated_text)
114
+ st.subheader("⭐ Quality Rating")
115
+ st.write(f"**Rating:** {quality_rating}")
116
+
117
  st.success("Processing complete!")