File size: 3,016 Bytes
bd9feeb 9284803 691ee4d bd9feeb 9284803 bd9feeb 88f8bd0 b64c976 a8b7aaa b64c976 88f8bd0 b64c976 e375d90 b64c976 bd9feeb a8b7aaa 9284803 88f8bd0 b64c976 bd9feeb b64c976 bd9feeb b64c976 bd9feeb 88f8bd0 b64c976 bd9feeb b64c976 bd9feeb b64c976 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import streamlit as st
# ✅ Step 1: Emoji 翻译模型(你自己训练的模型)
emoji_model_id = "jenniferhk008/roberta-hfl-emoji-aug3epoch"
emoji_tokenizer = AutoTokenizer.from_pretrained(emoji_model_id, trust_remote_code=True)
emoji_model = AutoModelForCausalLM.from_pretrained(
emoji_model_id,
trust_remote_code=True,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
).to("cuda" if torch.cuda.is_available() else "cpu")
emoji_model.eval()
# ✅ Step 2: 可选择的冒犯性文本识别模型
model_options = {
"Toxic-BERT": "unitary/toxic-bert",
"Roberta Offensive": "cardiffnlp/twitter-roberta-base-offensive",
"BERT Emotion": "bhadresh-savani/bert-base-go-emotion"
}
# ✅ 页面配置
st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
# ✅ 页面标题
st.title("🧠 Emoji-based Offensive Language Classifier")
st.markdown("""
This application translates emojis in a sentence and classifies whether the final sentence is offensive or not using two AI models.
- The **first model** translates emoji or symbolic phrases into standard Chinese text.
- The **second model** performs offensive language detection.
""")
# Streamlit 侧边栏模型选择
selected_model = st.sidebar.selectbox("Choose classification model", list(model_options.keys()))
selected_model_id = model_options[selected_model]
classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
# ✅ 输入区域
st.markdown("### ✍️ Input your sentence:")
default_text = "你是🐷"
text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
# ✅ 主逻辑封装函数
def classify_emoji_text(text: str):
prompt = f"输入:{text}\n输出:"
input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
with torch.no_grad():
output_ids = emoji_model.generate(**input_ids, max_new_tokens=64, do_sample=False)
decoded = emoji_tokenizer.decode(output_ids[0], skip_special_tokens=True)
translated_text = decoded.split("输出:")[-1].strip() if "输出:" in decoded else decoded.strip()
result = classifier(translated_text)[0]
label = result["label"]
score = result["score"]
return translated_text, label, score
# ✅ 触发按钮
if st.button("🚦 Analyze"):
with st.spinner("🔍 Processing..."):
try:
translated, label, score = classify_emoji_text(text)
st.markdown("### 🔄 Translated sentence:")
st.code(translated, language="text")
st.markdown(f"### 🎯 Prediction: `{label}`")
st.markdown(f"### 📊 Confidence Score: `{score:.2%}`")
except Exception as e:
st.error(f"❌ An error occurred during processing:\n\n{e}")
else:
st.info("👈 Please input text and click the button to classify.")
|