Tether / app.py
SamanthaStorm's picture
Update app.py
a6c0cf2 verified
raw
history blame
3.91 kB
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer
from motif_tagging import detect_motifs
# Load models
sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment")
sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment")
model_name = "SamanthaStorm/autotrain-c1un8-p8vzo"
model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True)
tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True)
LABELS = [...]
THRESHOLDS = {...}
PATTERN_LABELS = LABELS[:15]
DANGER_LABELS = LABELS[15:18]
EXPLANATIONS = {...}
PATTERN_WEIGHTS = {...}
def custom_sentiment(text):
inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = sentiment_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
label_idx = torch.argmax(probs).item()
label_map = {0: "supportive", 1: "undermining"}
label = label_map[label_idx]
score = probs[0][label_idx].item()
return {"label": label, "score": score}
def calculate_abuse_level(scores, thresholds, motif_hits=None):
weighted_scores = []
for label, score in zip(LABELS, scores):
if score > thresholds[label]:
weight = PATTERN_WEIGHTS.get(label, 1.0)
weighted_scores.append(score * weight)
base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0
motif_hits = motif_hits or []
if any(label in motif_hits for label in DANGER_LABELS):
base_score = max(base_score, 75.0)
return base_score
def interpret_abuse_level(score):
if score > 80:
return "Extreme / High Risk"
elif score > 60:
return "Severe / Harmful Pattern Present"
elif score > 40:
return "Likely Abuse"
elif score > 20:
return "Mild Concern"
return "Very Low / Likely Safe"
def analyze_single_message(text):
if not text.strip():
return "No input provided."
sentiment = custom_sentiment(text)
thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment['label'] == "undermining" else THRESHOLDS.copy()
motif_flags, matched_phrases = detect_motifs(text)
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = model(**inputs)
scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy()
abuse_score = calculate_abuse_level(scores, thresholds, [label for label, _ in matched_phrases])
summary = interpret_abuse_level(abuse_score)
return f"Abuse Risk Score: {abuse_score}% β€” {summary}\nSentiment: {sentiment['label']} ({sentiment['score']*100:.2f}%)"
def analyze_composite(msg1, msg2, msg3):
results = [analyze_single_message(t) for t in [msg1, msg2, msg3]]
composite_score = np.mean([
float(line.split('%')[0].split()[-1]) if 'Abuse Risk Score:' in line else 0
for line in results
])
final_summary = interpret_abuse_level(composite_score)
composite_result = f"\n\nComposite Abuse Risk Score: {composite_score:.2f}% β€” {final_summary}"
return results[0], results[1], results[2], composite_result
iface = gr.Interface(
fn=analyze_composite,
inputs=[
gr.Textbox(label="Message 1"),
gr.Textbox(label="Message 2"),
gr.Textbox(label="Message 3")
],
outputs=[
gr.Textbox(label="Message 1 Result"),
gr.Textbox(label="Message 2 Result"),
gr.Textbox(label="Message 3 Result"),
gr.Textbox(label="Composite Score Summary")
],
title="Abuse Pattern Detector (Multi-Message)",
live=False,
allow_flagging="manual"
)
if __name__ == "__main__":
iface.launch()