aeresd commited on
Commit
98b3199
·
verified ·
1 Parent(s): 8635ea4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -30
app.py CHANGED
@@ -1,6 +1,10 @@
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import streamlit as st
 
 
 
 
4
 
5
  # ✅ Step 1: Emoji 翻译模型(你自己训练的模型)
6
  emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned"
@@ -22,26 +26,20 @@ model_options = {
22
  # ✅ 页面配置
23
  st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
24
 
25
- # ✅ 页面标题
26
- st.title("🧠 Emoji-based Offensive Language Classifier")
 
 
 
 
 
27
 
28
- st.markdown("""
29
- This application translates emojis in a sentence and classifies whether the final sentence is offensive or not using two AI models.
30
- - The **first model** translates emoji or symbolic phrases into standard Chinese text.
31
- - The **second model** performs offensive language detection.
32
- """)
33
 
34
- # Streamlit 侧边栏模型选择
35
- selected_model = st.sidebar.selectbox("Choose classification model", list(model_options.keys()))
36
- selected_model_id = model_options[selected_model]
37
- classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
38
 
39
- # ✅ 输入区域
40
- st.markdown("### ✍️ Input your sentence:")
41
- default_text = "你是🐷"
42
- text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
43
-
44
- # ✅ 主逻辑封装函数
45
  def classify_emoji_text(text: str):
46
  prompt = f"输入:{text}\n输出:"
47
  input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
@@ -53,21 +51,70 @@ def classify_emoji_text(text: str):
53
  result = classifier(translated_text)[0]
54
  label = result["label"]
55
  score = result["score"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- return translated_text, label, score
 
 
 
 
 
 
 
 
 
 
58
 
59
- # ✅ 触发按钮
60
- if st.button("🚦 Analyze"):
61
- with st.spinner("🔍 Processing..."):
62
- try:
63
- translated, label, score = classify_emoji_text(text)
64
- st.markdown("### 🔄 Translated sentence:")
65
- st.code(translated, language="text")
 
 
 
66
 
67
- st.markdown(f"### 🎯 Prediction: `{label}`")
68
- st.markdown(f"### 📊 Confidence Score: `{score:.2%}`")
 
 
 
 
69
 
70
- except Exception as e:
71
- st.error(f"❌ An error occurred during processing:\n\n{e}")
 
 
 
 
 
72
  else:
73
- st.info("👈 Please input text and click the button to classify.")
 
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import streamlit as st
4
+ from PIL import Image
5
+ import pytesseract
6
+ import pandas as pd
7
+ import plotly.express as px
8
 
9
  # ✅ Step 1: Emoji 翻译模型(你自己训练的模型)
10
  emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned"
 
26
  # ✅ 页面配置
27
  st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
28
 
29
+ # ✅ 侧边栏: 选择模型
30
+ with st.sidebar:
31
+ st.header("🧠 Settings")
32
+ moderation_type = st.selectbox("Select Task Type", ["Normal Text", "Bullet Screen Text"])
33
+ selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
34
+ selected_model_id = model_options[selected_model]
35
+ classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
36
 
37
+ # 初始化历史记录
38
+ if "history" not in st.session_state:
39
+ st.session_state.history = []
 
 
40
 
41
+ # 核心函数: 翻译并分类
 
 
 
42
 
 
 
 
 
 
 
43
  def classify_emoji_text(text: str):
44
  prompt = f"输入:{text}\n输出:"
45
  input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
 
51
  result = classifier(translated_text)[0]
52
  label = result["label"]
53
  score = result["score"]
54
+ reasoning = f"The sentence was flagged as '{label}' due to potentially offensive phrases. Consider replacing emotionally charged, ambiguous, or abusive terms."
55
+
56
+ st.session_state.history.append({"text": text, "translated": translated_text, "label": label, "score": score, "reason": reasoning})
57
+ return translated_text, label, score, reasoning
58
+
59
+ # 页面主体
60
+ st.title("🚨 Emoji Offensive Text Detector & Analysis")
61
+
62
+ # 输入区域
63
+ st.markdown("### ✍️ Input your sentence or upload screenshot:")
64
+ col1, col2 = st.columns(2)
65
+ with col1:
66
+ default_text = "你是🐷"
67
+ text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
68
+ if st.button("🚦 Analyze Text"):
69
+ with st.spinner("🔍 Processing..."):
70
+ try:
71
+ translated, label, score, reason = classify_emoji_text(text)
72
+ st.markdown("#### 🔄 Translated sentence:")
73
+ st.code(translated, language="text")
74
+
75
+ st.markdown(f"#### 🎯 Prediction: {label}")
76
+ st.markdown(f"#### 📊 Confidence Score: {score:.2%}")
77
+ st.markdown(f"#### 🧠 Model Explanation:")
78
+ st.info(reason)
79
+ except Exception as e:
80
+ st.error(f"❌ An error occurred during processing:\n\n{e}")
81
 
82
+ with col2:
83
+ uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"])
84
+ if uploaded_file is not None:
85
+ image = Image.open(uploaded_file)
86
+ st.image(image, caption="Uploaded Screenshot", use_column_width=True)
87
+ if st.button("🛠️ OCR & Analyze Image"):
88
+ with st.spinner("🧠 Extracting text via OCR..."):
89
+ ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
90
+ st.markdown("#### 📋 Extracted Text:")
91
+ st.code(ocr_text)
92
+ classify_emoji_text(ocr_text)
93
 
94
+ # 分析仪表盘
95
+ st.markdown("---")
96
+ st.title("📊 Violation Analysis Dashboard")
97
+ if st.session_state.history:
98
+ df = pd.DataFrame(st.session_state.history)
99
+ # 饼图
100
+ label_counts = df["label"].value_counts().reset_index()
101
+ label_counts.columns = ["Category", "Count"]
102
+ fig = px.pie(label_counts, names="Category", values="Count", title="Offensive Category Distribution")
103
+ st.plotly_chart(fig)
104
 
105
+ st.markdown("### 🧾 Offensive Terms & Suggestions")
106
+ for item in st.session_state.history:
107
+ st.markdown(f"- 🔹 **Input:** {item['text']}")
108
+ st.markdown(f" - ✨ **Translated:** {item['translated']}")
109
+ st.markdown(f" - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence")
110
+ st.markdown(f" - 🔧 **Suggestion:** {item['reason']}")
111
 
112
+ # 雷达图演示示例(可替换为动态数据)
113
+ radar_df = pd.DataFrame({
114
+ "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
115
+ "Score": [0.7, 0.4, 0.3, 0.5, 0.6]
116
+ })
117
+ radar_fig = px.line_polar(radar_df, r='Score', theta='Category', line_close=True, title="⚠️ Risk Radar by Category")
118
+ st.plotly_chart(radar_fig)
119
  else:
120
+ st.info("⚠️ No classification data available yet.")