Spaces:

aeresd
/

test_1

Sleeping

App Files Files Community

aeresd commited on May 20

Commit

6e7a57d

verified ·

1 Parent(s): cd7f587

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -13

app.py CHANGED Viewed

@@ -29,10 +29,15 @@ st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨",
 # ✅ 侧边栏: 选择模型
 with st.sidebar:
     st.header("🧠 Settings")
-    selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
     selected_model_id = model_options[selected_model]
-    classifier = pipeline("text-classification", model=selected_model_id,
-                          device=0 if torch.cuda.is_available() else -1)
 # 初始化历史记录
 if "history" not in st.session_state:
@@ -43,9 +48,17 @@ def classify_emoji_text(text: str):
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
     with torch.no_grad():
-        output_ids = emoji_model.generate(**input_ids, max_new_tokens=64, do_sample=False)
-    decoded = emoji_tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    translated_text = decoded.split("输出：")[-1].strip() if "输出：" in decoded else decoded.strip()
     result = classifier(translated_text)[0]
     label = result["label"]
@@ -72,7 +85,9 @@ st.markdown("### ✍️ Input your sentence or upload screenshot:")
 col1, col2 = st.columns(2)
 with col1:
     default_text = "你是🐷"
-    text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
     if st.button("🚦 Analyze Text"):
         with st.spinner("🔍 Processing..."):
             try:
@@ -88,13 +103,17 @@ with col1:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
 with col2:
-    uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
         if st.button("🛠️ OCR & Analyze Image"):
             with st.spinner("🧠 Extracting text via OCR..."):
-                ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
                 st.markdown("#### 📋 Extracted Text:")
                 st.code(ocr_text)
                 classify_emoji_text(ocr_text)
@@ -102,16 +121,17 @@ with col2:
 # 分析仪表盘
 st.markdown("---")
 st.title("📊 Violation Analysis Dashboard")
 if st.session_state.history:
     st.markdown("### 🧾 Offensive Terms & Suggestions")
     for item in st.session_state.history:
         st.markdown(f"- 🔹 **Input:** {item['text']}")
         st.markdown(f"   - ✨ **Translated:** {item['translated']}")
-        st.markdown(f"   - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence")
-        st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}")
-    # 雷达图演示示例（可替换为动态数据）
     radar_df = pd.DataFrame({
         "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
         "Score": [0.7, 0.4, 0.3, 0.5, 0.6]

 # ✅ 侧边栏: 选择模型
 with st.sidebar:
     st.header("🧠 Settings")
+    selected_model = st.selectbox(
+        "Choose classification model", list(model_options.keys())
+    )
     selected_model_id = model_options[selected_model]
+    classifier = pipeline(
+        "text-classification",
+        model=selected_model_id,
+        device=0 if torch.cuda.is_available() else -1
+    )
 # 初始化历史记录
 if "history" not in st.session_state:
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
     with torch.no_grad():
+        output_ids = emoji_model.generate(
+            **input_ids, max_new_tokens=64, do_sample=False
+        )
+    decoded = emoji_tokenizer.decode(
+        output_ids[0], skip_special_tokens=True
+    )
+    translated_text = (
+        decoded.split("输出：")[-1].strip()
+        if "输出：" in decoded
+        else decoded.strip()
+    )
     result = classifier(translated_text)[0]
     label = result["label"]
 col1, col2 = st.columns(2)
 with col1:
     default_text = "你是🐷"
+    text = st.text_area(
+        "Enter sentence with emojis:", value=default_text, height=150
+    )
     if st.button("🚦 Analyze Text"):
         with st.spinner("🔍 Processing..."):
             try:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
 with col2:
+    uploaded_file = st.file_uploader(
+        "Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"]
+    )
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
         if st.button("🛠️ OCR & Analyze Image"):
             with st.spinner("🧠 Extracting text via OCR..."):
+                ocr_text = pytesseract.image_to_string(
+                    image, lang="chi_sim+eng"
+                ).strip()
                 st.markdown("#### 📋 Extracted Text:")
                 st.code(ocr_text)
                 classify_emoji_text(ocr_text)
 # 分析仪表盘
 st.markdown("---")
 st.title("📊 Violation Analysis Dashboard")
 if st.session_state.history:
     st.markdown("### 🧾 Offensive Terms & Suggestions")
     for item in st.session_state.history:
         st.markdown(f"- 🔹 **Input:** {item['text']}")
         st.markdown(f"   - ✨ **Translated:** {item['translated']}")
+        st.markdown(
+            f"   - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence"
+        )
+        st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}  ")
+    # 雷达图
     radar_df = pd.DataFrame({
         "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
         "Score": [0.7, 0.4, 0.3, 0.5, 0.6]