xujinheng666 commited on
Commit
22b7073
·
verified ·
1 Parent(s): 32cfaa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -13,12 +13,10 @@ def load_models():
13
  )
14
  st.session_state.transcription_pipe.model.config.forced_decoder_ids = st.session_state.transcription_pipe.tokenizer.get_decoder_prompt_ids(language="zh", task="transcribe")
15
 
16
- st.session_state.translation_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
17
- st.session_state.translation_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
18
 
19
- st.session_state.summary_pipe = pipeline("summarization", model="Falconsai/text_summarization")
20
-
21
- st.session_state.rating_pipe = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
22
 
23
  def transcribe_audio(audio_path):
24
  pipe = st.session_state.transcription_pipe
@@ -31,9 +29,6 @@ def translate_text(text):
31
  outputs = model.generate(inputs["input_ids"], max_length=1000, num_beams=5)
32
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
33
 
34
- def summarize_text(text):
35
- return st.session_state.summary_pipe(text)[0]['summary_text']
36
-
37
  def rate_quality(text):
38
  result = st.session_state.rating_pipe(text)[0]
39
  label_map = {"LABEL_0": "Poor", "LABEL_1": "Average", "LABEL_2": "Good"}
@@ -56,7 +51,6 @@ def main():
56
 
57
  transcript = transcribe_audio(file_path)
58
  translation = translate_text(transcript)
59
- summary = summarize_text(translation)
60
  rating = rate_quality(translation)
61
 
62
  os.remove(file_path)
@@ -64,12 +58,9 @@ def main():
64
  st.subheader("Transcription")
65
  st.write(transcript)
66
 
67
- st.subheader("Translation (English)")
68
  st.write(translation)
69
 
70
- st.subheader("Summary")
71
- st.write(summary)
72
-
73
  st.subheader("Conversation Quality Rating")
74
  st.write(rating)
75
 
 
13
  )
14
  st.session_state.transcription_pipe.model.config.forced_decoder_ids = st.session_state.transcription_pipe.tokenizer.get_decoder_prompt_ids(language="zh", task="transcribe")
15
 
16
+ st.session_state.translation_tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")
17
+ st.session_state.translation_model = AutoModelForSeq2SeqLM.from_pretrained("botisan-ai/mt5-translate-yue-zh")
18
 
19
+ st.session_state.rating_pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")
 
 
20
 
21
  def transcribe_audio(audio_path):
22
  pipe = st.session_state.transcription_pipe
 
29
  outputs = model.generate(inputs["input_ids"], max_length=1000, num_beams=5)
30
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
31
 
 
 
 
32
  def rate_quality(text):
33
  result = st.session_state.rating_pipe(text)[0]
34
  label_map = {"LABEL_0": "Poor", "LABEL_1": "Average", "LABEL_2": "Good"}
 
51
 
52
  transcript = transcribe_audio(file_path)
53
  translation = translate_text(transcript)
 
54
  rating = rate_quality(translation)
55
 
56
  os.remove(file_path)
 
58
  st.subheader("Transcription")
59
  st.write(transcript)
60
 
61
+ st.subheader("Translation (Chinese)")
62
  st.write(translation)
63
 
 
 
 
64
  st.subheader("Conversation Quality Rating")
65
  st.write(rating)
66