Spaces:

aeresd
/

test_1

Sleeping

App Files Files Community

aeresd commited on May 20

Commit

dc1bdc8

verified ·

1 Parent(s): a77ff54

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -80

app.py CHANGED Viewed

@@ -29,52 +29,31 @@ st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨",
 # ✅ 侧边栏: 选择模型
 with st.sidebar:
     st.header("🧠 Settings")
-    selected_model = st.selectbox(
-        "Choose classification model", list(model_options.keys())
-    )
     selected_model_id = model_options[selected_model]
-    classifier = pipeline(
-        "text-classification",
-        model=selected_model_id,
-        device=0 if torch.cuda.is_available() else -1
-    )
 # 初始化历史记录
 if "history" not in st.session_state:
     st.session_state.history = []
 # 核心函数: 翻译并分类
 def classify_emoji_text(text: str):
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
     with torch.no_grad():
-        output_ids = emoji_model.generate(
-            **input_ids, max_new_tokens=64, do_sample=False
-        )
-    decoded = emoji_tokenizer.decode(
-        output_ids[0], skip_special_tokens=True
-    )
-    translated_text = (
-        decoded.split("输出：")[-1].strip()
-        if "输出：" in decoded
-        else decoded.strip()
-    )
     result = classifier(translated_text)[0]
     label = result["label"]
     score = result["score"]
-    reasoning = (
-        f"The sentence was flagged as '{label}' due to potentially offensive phrases. "
-        "Consider replacing emotionally charged, ambiguous, or abusive terms."
-    )
-    st.session_state.history.append({
-        "text": text,
-        "translated": translated_text,
-        "label": label,
-        "score": score,
-        "reason": reasoning
-    })
     return translated_text, label, score, reasoning
 # 页面主体
@@ -85,9 +64,7 @@ st.markdown("### ✍️ Input your sentence or upload screenshot:")
 col1, col2 = st.columns(2)
 with col1:
     default_text = "你是🐷"
-    text = st.text_area(
-        "Enter sentence with emojis:", value=default_text, height=150
-    )
     if st.button("🚦 Analyze Text"):
         with st.spinner("🔍 Processing..."):
             try:
@@ -97,23 +74,19 @@ with col1:
                 st.markdown(f"#### 🎯 Prediction: {label}")
                 st.markdown(f"#### 📊 Confidence Score: {score:.2%}")
-                st.markdown("#### 🧠 Model Explanation:")
                 st.info(reason)
             except Exception as e:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
 with col2:
-    uploaded_file = st.file_uploader(
-        "Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"]
-    )
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
         if st.button("🛠️ OCR & Analyze Image"):
             with st.spinner("🧠 Extracting text via OCR..."):
-                ocr_text = pytesseract.image_to_string(
-                    image, lang="chi_sim+eng"
-                ).strip()
                 st.markdown("#### 📋 Extracted Text:")
                 st.code(ocr_text)
                 classify_emoji_text(ocr_text)
@@ -122,53 +95,26 @@ with col2:
 st.markdown("---")
 st.title("📊 Violation Analysis Dashboard")
 if st.session_state.history:
     st.markdown("### 🧾 Offensive Terms & Suggestions")
     for item in st.session_state.history:
         st.markdown(f"- 🔹 **Input:** {item['text']}")
         st.markdown(f"   - ✨ **Translated:** {item['translated']}")
-        st.markdown(
-            f"   - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence"
-        )
-        st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}  ")
-    # 雷达图
     radar_df = pd.DataFrame({
         "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
         "Score": [0.7, 0.4, 0.3, 0.5, 0.6]
     })
-    radar_fig = px.line_polar(
-        radar_df,
-        r='Score',
-        theta='Category',
-        line_close=True,
-        title="⚠️ Risk Radar by Category",
-        color_discrete_sequence=['black'],
-        template='simple_white'
-    )
-    radar_fig.update_layout(
-        polar=dict(
-            gridshape='circular',
-            bgcolor='white',
-            radialaxis=dict(
-                showticklabels=False,
-                ticks='',
-                showgrid=True,
-                gridcolor='lightgrey',
-                gridwidth=1,
-                linecolor='black',
-                linewidth=2
-            ),
-            angularaxis=dict(
-                showticklabels=False,
-                ticks='',
-                showline=True,
-                linecolor='black',
-                linewidth=2
-            )
-        ),
-        paper_bgcolor='white',
-        plot_bgcolor='white'
-    )
     st.plotly_chart(radar_fig)
 else:
-    st.info("⚠️ No classification data available yet.")

 # ✅ 侧边栏: 选择模型
 with st.sidebar:
     st.header("🧠 Settings")
+    moderation_type = st.selectbox("Select Task Type", ["Normal Text", "Bullet Screen Text"])
+    selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
     selected_model_id = model_options[selected_model]
+    classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
 # 初始化历史记录
 if "history" not in st.session_state:
     st.session_state.history = []
 # 核心函数: 翻译并分类
 def classify_emoji_text(text: str):
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
     with torch.no_grad():
+        output_ids = emoji_model.generate(**input_ids, max_new_tokens=64, do_sample=False)
+    decoded = emoji_tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    translated_text = decoded.split("输出：")[-1].strip() if "输出：" in decoded else decoded.strip()
     result = classifier(translated_text)[0]
     label = result["label"]
     score = result["score"]
+    reasoning = f"The sentence was flagged as '{label}' due to potentially offensive phrases. Consider replacing emotionally charged, ambiguous, or abusive terms."
+    st.session_state.history.append({"text": text, "translated": translated_text, "label": label, "score": score, "reason": reasoning})
     return translated_text, label, score, reasoning
 # 页面主体
 col1, col2 = st.columns(2)
 with col1:
     default_text = "你是🐷"
+    text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
     if st.button("🚦 Analyze Text"):
         with st.spinner("🔍 Processing..."):
             try:
                 st.markdown(f"#### 🎯 Prediction: {label}")
                 st.markdown(f"#### 📊 Confidence Score: {score:.2%}")
+                st.markdown(f"#### 🧠 Model Explanation:")
                 st.info(reason)
             except Exception as e:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
 with col2:
+    uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
         if st.button("🛠️ OCR & Analyze Image"):
             with st.spinner("🧠 Extracting text via OCR..."):
+                ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
                 st.markdown("#### 📋 Extracted Text:")
                 st.code(ocr_text)
                 classify_emoji_text(ocr_text)
 st.markdown("---")
 st.title("📊 Violation Analysis Dashboard")
 if st.session_state.history:
+    df = pd.DataFrame(st.session_state.history)
+    # 饼图
+    label_counts = df["label"].value_counts().reset_index()
+    label_counts.columns = ["Category", "Count"]
+    fig = px.pie(label_counts, names="Category", values="Count", title="Offensive Category Distribution")
+    st.plotly_chart(fig)
     st.markdown("### 🧾 Offensive Terms & Suggestions")
     for item in st.session_state.history:
         st.markdown(f"- 🔹 **Input:** {item['text']}")
         st.markdown(f"   - ✨ **Translated:** {item['translated']}")
+        st.markdown(f"   - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence")
+        st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}")
+    # 雷达图演示示例（可替换为动态数据）
     radar_df = pd.DataFrame({
         "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
         "Score": [0.7, 0.4, 0.3, 0.5, 0.6]
     })
+    radar_fig = px.line_polar(radar_df, r='Score', theta='Category', line_close=True, title="⚠️ Risk Radar by Category")
     st.plotly_chart(radar_fig)
 else:
+    st.info("⚠️ No classification data available yet.")