Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer | |
from motif_tagging import detect_motifs | |
# Load models | |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment") | |
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment") | |
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo" | |
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True) | |
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
LABELS = [...] | |
THRESHOLDS = {...} | |
PATTERN_LABELS = LABELS[:15] | |
DANGER_LABELS = LABELS[15:18] | |
EXPLANATIONS = {...} | |
PATTERN_WEIGHTS = {...} | |
def custom_sentiment(text): | |
inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
outputs = sentiment_model(**inputs) | |
probs = torch.nn.functional.softmax(outputs.logits, dim=1) | |
label_idx = torch.argmax(probs).item() | |
label_map = {0: "supportive", 1: "undermining"} | |
label = label_map[label_idx] | |
score = probs[0][label_idx].item() | |
return {"label": label, "score": score} | |
def calculate_abuse_level(scores, thresholds, motif_hits=None): | |
weighted_scores = [] | |
for label, score in zip(LABELS, scores): | |
if score > thresholds[label]: | |
weight = PATTERN_WEIGHTS.get(label, 1.0) | |
weighted_scores.append(score * weight) | |
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0 | |
motif_hits = motif_hits or [] | |
if any(label in motif_hits for label in DANGER_LABELS): | |
base_score = max(base_score, 75.0) | |
return base_score | |
def interpret_abuse_level(score): | |
if score > 80: | |
return "Extreme / High Risk" | |
elif score > 60: | |
return "Severe / Harmful Pattern Present" | |
elif score > 40: | |
return "Likely Abuse" | |
elif score > 20: | |
return "Mild Concern" | |
return "Very Low / Likely Safe" | |
def analyze_single_message(text): | |
if not text.strip(): | |
return "No input provided." | |
sentiment = custom_sentiment(text) | |
thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment['label'] == "undermining" else THRESHOLDS.copy() | |
motif_flags, matched_phrases = detect_motifs(text) | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy() | |
abuse_score = calculate_abuse_level(scores, thresholds, [label for label, _ in matched_phrases]) | |
summary = interpret_abuse_level(abuse_score) | |
return f"Abuse Risk Score: {abuse_score}% β {summary}\nSentiment: {sentiment['label']} ({sentiment['score']*100:.2f}%)" | |
def analyze_composite(msg1, msg2, msg3): | |
results = [analyze_single_message(t) for t in [msg1, msg2, msg3]] | |
composite_score = np.mean([ | |
float(line.split('%')[0].split()[-1]) if 'Abuse Risk Score:' in line else 0 | |
for line in results | |
]) | |
final_summary = interpret_abuse_level(composite_score) | |
composite_result = f"\n\nComposite Abuse Risk Score: {composite_score:.2f}% β {final_summary}" | |
return results[0], results[1], results[2], composite_result | |
iface = gr.Interface( | |
fn=analyze_composite, | |
inputs=[ | |
gr.Textbox(label="Message 1"), | |
gr.Textbox(label="Message 2"), | |
gr.Textbox(label="Message 3") | |
], | |
outputs=[ | |
gr.Textbox(label="Message 1 Result"), | |
gr.Textbox(label="Message 2 Result"), | |
gr.Textbox(label="Message 3 Result"), | |
gr.Textbox(label="Composite Score Summary") | |
], | |
title="Abuse Pattern Detector (Multi-Message)", | |
live=False, | |
allow_flagging="manual" | |
) | |
if __name__ == "__main__": | |
iface.launch() |