Spaces:

aeresd
/

test_1

Sleeping

App Files Files Community

aeresd commited on May 20

Commit

68c3cca

verified ·

1 Parent(s): dc1bdc8

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -58

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import pytesseract
 import pandas as pd
 import plotly.express as px
-# ✅ Step 1: Emoji 翻译模型（你自己训练的模型）
 emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned"
 emoji_tokenizer = AutoTokenizer.from_pretrained(emoji_model_id, trust_remote_code=True)
 emoji_model = AutoModelForCausalLM.from_pretrained(
@@ -16,30 +16,34 @@ emoji_model = AutoModelForCausalLM.from_pretrained(
 ).to("cuda" if torch.cuda.is_available() else "cpu")
 emoji_model.eval()
-# ✅ Step 2: 可选择的冒犯性文本识别模型
 model_options = {
     "Toxic-BERT": "unitary/toxic-bert",
     "Roberta Offensive": "cardiffnlp/twitter-roberta-base-offensive",
     "BERT Emotion": "bhadresh-savani/bert-base-go-emotion"
 }
-# ✅ 页面配置
 st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
-# ✅ 侧边栏: 选择模型
 with st.sidebar:
-    st.header("🧠 Settings")
-    moderation_type = st.selectbox("Select Task Type", ["Normal Text", "Bullet Screen Text"])
-    selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
-    selected_model_id = model_options[selected_model]
-    classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
 # 初始化历史记录
 if "history" not in st.session_state:
     st.session_state.history = []
-# 核心函数: 翻译并分类
 def classify_emoji_text(text: str):
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
@@ -56,65 +60,66 @@ def classify_emoji_text(text: str):
     st.session_state.history.append({"text": text, "translated": translated_text, "label": label, "score": score, "reason": reasoning})
     return translated_text, label, score, reasoning
-# 页面主体
-st.title("🚨 Emoji Offensive Text Detector & Analysis")
-# 输入区域
-st.markdown("### ✍️ Input your sentence or upload screenshot:")
-col1, col2 = st.columns(2)
-with col1:
     default_text = "你是🐷"
     text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
-    if st.button("🚦 Analyze Text"):
         with st.spinner("🔍 Processing..."):
             try:
                 translated, label, score, reason = classify_emoji_text(text)
-                st.markdown("#### 🔄 Translated sentence:")
                 st.code(translated, language="text")
-                st.markdown(f"#### 🎯 Prediction: {label}")
-                st.markdown(f"#### 📊 Confidence Score: {score:.2%}")
-                st.markdown(f"#### 🧠 Model Explanation:")
                 st.info(reason)
             except Exception as e:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
-with col2:
     uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
-        if st.button("🛠️ OCR & Analyze Image"):
-            with st.spinner("🧠 Extracting text via OCR..."):
-                ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
-                st.markdown("#### 📋 Extracted Text:")
-                st.code(ocr_text)
-                classify_emoji_text(ocr_text)
-# 分析仪表盘
-st.markdown("---")
-st.title("📊 Violation Analysis Dashboard")
-if st.session_state.history:
-    df = pd.DataFrame(st.session_state.history)
-    # 饼图
-    label_counts = df["label"].value_counts().reset_index()
-    label_counts.columns = ["Category", "Count"]
-    fig = px.pie(label_counts, names="Category", values="Count", title="Offensive Category Distribution")
-    st.plotly_chart(fig)
-    st.markdown("### 🧾 Offensive Terms & Suggestions")
-    for item in st.session_state.history:
-        st.markdown(f"- 🔹 **Input:** {item['text']}")
-        st.markdown(f"   - ✨ **Translated:** {item['translated']}")
-        st.markdown(f"   - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence")
-        st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}")
-    # 雷达图演示示例（可替换为动态数据）
-    radar_df = pd.DataFrame({
-        "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
-        "Score": [0.7, 0.4, 0.3, 0.5, 0.6]
-    })
-    radar_fig = px.line_polar(radar_df, r='Score', theta='Category', line_close=True, title="⚠️ Risk Radar by Category")
-    st.plotly_chart(radar_fig)
-else:
-    st.info("⚠️ No classification data available yet.")

 import pandas as pd
 import plotly.express as px
+# Step 1: Emoji 翻译模型（你自己训练的模型）
 emoji_model_id = "JenniferHJF/qwen1.5-emoji-finetuned"
 emoji_tokenizer = AutoTokenizer.from_pretrained(emoji_model_id, trust_remote_code=True)
 emoji_model = AutoModelForCausalLM.from_pretrained(
 ).to("cuda" if torch.cuda.is_available() else "cpu")
 emoji_model.eval()
+# Step 2: 可选择的冒犯性文本识别模型
 model_options = {
     "Toxic-BERT": "unitary/toxic-bert",
     "Roberta Offensive": "cardiffnlp/twitter-roberta-base-offensive",
     "BERT Emotion": "bhadresh-savani/bert-base-go-emotion"
 }
+# 页面配置
 st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
+# 页面布局
 with st.sidebar:
+    st.header("🧠 Navigation")
+    section = st.radio("Select Mode:", ["📍 Text Moderation", "📊 Text Analysis"])
+    if section == "📍 Text Moderation":
+        selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
+        selected_model_id = model_options[selected_model]
+        classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
+    elif section == "📊 Text Analysis":
+        st.markdown("You can view editing suggestions based on past analyses.")
 # 初始化历史记录
 if "history" not in st.session_state:
     st.session_state.history = []
+# Emoji 文本翻译与分类函数
 def classify_emoji_text(text: str):
     prompt = f"输入：{text}\n输出："
     input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
     st.session_state.history.append({"text": text, "translated": translated_text, "label": label, "score": score, "reason": reasoning})
     return translated_text, label, score, reasoning
+# 功能逻辑
+if section == "📍 Text Moderation":
+    st.title("📍 Offensive Text Classification")
+    st.markdown("### ✍️ Input your sentence:")
     default_text = "你是🐷"
     text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
+    if st.button("🚦 Analyze"):
         with st.spinner("🔍 Processing..."):
             try:
                 translated, label, score, reason = classify_emoji_text(text)
+                st.markdown("### 🔄 Translated sentence:")
                 st.code(translated, language="text")
+                st.markdown(f"### 🎯 Prediction: `{label}`")
+                st.markdown(f"### 📊 Confidence Score: `{score:.2%}`")
+                st.markdown("### 🧠 Model Explanation:")
                 st.info(reason)
             except Exception as e:
                 st.error(f"❌ An error occurred during processing:\n\n{e}")
+    st.markdown("---")
+    st.markdown("### 🖼️ Or upload a screenshot of bullet comments:")
     uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg", "jpeg", "png"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded Screenshot", use_column_width=True)
+        with st.spinner("🧠 Extracting text via OCR..."):
+            ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng")
+            st.markdown("#### 📋 Extracted Text:")
+            st.code(ocr_text.strip())
+            translated, label, score, reason = classify_emoji_text(ocr_text.strip())
+            st.markdown("### 🔄 Translated sentence:")
+            st.code(translated, language="text")
+            st.markdown(f"### 🎯 Prediction: `{label}`")
+            st.markdown(f"### 📊 Confidence Score: `{score:.2%}`")
+            st.markdown("### 🧠 Model Explanation:")
+            st.info(reason)
+elif section == "📊 Text Analysis":
+    st.title("📊 Violation Analysis Dashboard")
+    if st.session_state.history:
+        st.markdown("### 🧾 Offensive Terms & Suggestions")
+        for item in st.session_state.history:
+            st.markdown(f"- 🔹 **Input:** `{item['text']}`")
+            st.markdown(f"   - ✨ **Translated:** `{item['translated']}`")
+            st.markdown(f"   - ❗ **Label:** `{item['label']}` with **{item['score']:.2%}** confidence")
+            st.markdown(f"   - 🔧 **Suggestion:** {item['reason']}")
+        radar_df = pd.DataFrame({
+            "Category": ["Insult", "Abuse", "Discrimination", "Hate Speech", "Vulgarity"],
+            "Score": [0.7, 0.4, 0.3, 0.5, 0.6]
+        })
+        radar_fig = px.line_polar(radar_df, r='Score', theta='Category', line_close=True, title="⚠️ Risk Radar by Category")
+        st.plotly_chart(radar_fig)
+    else:
+        st.info("⚠️ No classification data available yet.")