Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,10 @@ def load_models():
|
|
13 |
)
|
14 |
st.session_state.transcription_pipe.model.config.forced_decoder_ids = st.session_state.transcription_pipe.tokenizer.get_decoder_prompt_ids(language="zh", task="transcribe")
|
15 |
|
16 |
-
st.session_state.translation_tokenizer = AutoTokenizer.from_pretrained("
|
17 |
-
st.session_state.translation_model = AutoModelForSeq2SeqLM.from_pretrained("
|
18 |
|
19 |
-
st.session_state.
|
20 |
-
|
21 |
-
st.session_state.rating_pipe = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
22 |
|
23 |
def transcribe_audio(audio_path):
|
24 |
pipe = st.session_state.transcription_pipe
|
@@ -31,9 +29,6 @@ def translate_text(text):
|
|
31 |
outputs = model.generate(inputs["input_ids"], max_length=1000, num_beams=5)
|
32 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
33 |
|
34 |
-
def summarize_text(text):
|
35 |
-
return st.session_state.summary_pipe(text)[0]['summary_text']
|
36 |
-
|
37 |
def rate_quality(text):
|
38 |
result = st.session_state.rating_pipe(text)[0]
|
39 |
label_map = {"LABEL_0": "Poor", "LABEL_1": "Average", "LABEL_2": "Good"}
|
@@ -56,7 +51,6 @@ def main():
|
|
56 |
|
57 |
transcript = transcribe_audio(file_path)
|
58 |
translation = translate_text(transcript)
|
59 |
-
summary = summarize_text(translation)
|
60 |
rating = rate_quality(translation)
|
61 |
|
62 |
os.remove(file_path)
|
@@ -64,12 +58,9 @@ def main():
|
|
64 |
st.subheader("Transcription")
|
65 |
st.write(transcript)
|
66 |
|
67 |
-
st.subheader("Translation (
|
68 |
st.write(translation)
|
69 |
|
70 |
-
st.subheader("Summary")
|
71 |
-
st.write(summary)
|
72 |
-
|
73 |
st.subheader("Conversation Quality Rating")
|
74 |
st.write(rating)
|
75 |
|
|
|
13 |
)
|
14 |
st.session_state.transcription_pipe.model.config.forced_decoder_ids = st.session_state.transcription_pipe.tokenizer.get_decoder_prompt_ids(language="zh", task="transcribe")
|
15 |
|
16 |
+
st.session_state.translation_tokenizer = AutoTokenizer.from_pretrained("botisan-ai/mt5-translate-yue-zh")
|
17 |
+
st.session_state.translation_model = AutoModelForSeq2SeqLM.from_pretrained("botisan-ai/mt5-translate-yue-zh")
|
18 |
|
19 |
+
st.session_state.rating_pipe = pipeline("text-classification", model="uer/roberta-base-finetuned-dianping-chinese")
|
|
|
|
|
20 |
|
21 |
def transcribe_audio(audio_path):
|
22 |
pipe = st.session_state.transcription_pipe
|
|
|
29 |
outputs = model.generate(inputs["input_ids"], max_length=1000, num_beams=5)
|
30 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
31 |
|
|
|
|
|
|
|
32 |
def rate_quality(text):
|
33 |
result = st.session_state.rating_pipe(text)[0]
|
34 |
label_map = {"LABEL_0": "Poor", "LABEL_1": "Average", "LABEL_2": "Good"}
|
|
|
51 |
|
52 |
transcript = transcribe_audio(file_path)
|
53 |
translation = translate_text(transcript)
|
|
|
54 |
rating = rate_quality(translation)
|
55 |
|
56 |
os.remove(file_path)
|
|
|
58 |
st.subheader("Transcription")
|
59 |
st.write(transcript)
|
60 |
|
61 |
+
st.subheader("Translation (Chinese)")
|
62 |
st.write(translation)
|
63 |
|
|
|
|
|
|
|
64 |
st.subheader("Conversation Quality Rating")
|
65 |
st.write(rating)
|
66 |
|