import gradio as gr import torch import numpy as np from transformers import AutoModelForSequenceClassification, AutoTokenizer, RobertaForSequenceClassification, RobertaTokenizer from motif_tagging import detect_motifs # Load models sentiment_model = AutoModelForSequenceClassification.from_pretrained("SamanthaStorm/tether-sentiment") sentiment_tokenizer = AutoTokenizer.from_pretrained("SamanthaStorm/tether-sentiment") model_name = "SamanthaStorm/autotrain-c1un8-p8vzo" model = RobertaForSequenceClassification.from_pretrained(model_name, trust_remote_code=True) tokenizer = RobertaTokenizer.from_pretrained(model_name, trust_remote_code=True) LABELS = [...] THRESHOLDS = {...} PATTERN_LABELS = LABELS[:15] DANGER_LABELS = LABELS[15:18] EXPLANATIONS = {...} PATTERN_WEIGHTS = {...} def custom_sentiment(text): inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = sentiment_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1) label_idx = torch.argmax(probs).item() label_map = {0: "supportive", 1: "undermining"} label = label_map[label_idx] score = probs[0][label_idx].item() return {"label": label, "score": score} def calculate_abuse_level(scores, thresholds, motif_hits=None): weighted_scores = [] for label, score in zip(LABELS, scores): if score > thresholds[label]: weight = PATTERN_WEIGHTS.get(label, 1.0) weighted_scores.append(score * weight) base_score = round(np.mean(weighted_scores) * 100, 2) if weighted_scores else 0.0 motif_hits = motif_hits or [] if any(label in motif_hits for label in DANGER_LABELS): base_score = max(base_score, 75.0) return base_score def interpret_abuse_level(score): if score > 80: return "Extreme / High Risk" elif score > 60: return "Severe / Harmful Pattern Present" elif score > 40: return "Likely Abuse" elif score > 20: return "Mild Concern" return "Very Low / Likely Safe" def analyze_single_message(text): if not text.strip(): return "No input provided." sentiment = custom_sentiment(text) thresholds = {k: v * 0.8 for k, v in THRESHOLDS.items()} if sentiment['label'] == "undermining" else THRESHOLDS.copy() motif_flags, matched_phrases = detect_motifs(text) inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) scores = torch.sigmoid(outputs.logits.squeeze(0)).numpy() abuse_score = calculate_abuse_level(scores, thresholds, [label for label, _ in matched_phrases]) summary = interpret_abuse_level(abuse_score) return f"Abuse Risk Score: {abuse_score}% — {summary}\nSentiment: {sentiment['label']} ({sentiment['score']*100:.2f}%)" def analyze_composite(msg1, msg2, msg3): results = [analyze_single_message(t) for t in [msg1, msg2, msg3]] composite_score = np.mean([ float(line.split('%')[0].split()[-1]) if 'Abuse Risk Score:' in line else 0 for line in results ]) final_summary = interpret_abuse_level(composite_score) composite_result = f"\n\nComposite Abuse Risk Score: {composite_score:.2f}% — {final_summary}" return results[0], results[1], results[2], composite_result iface = gr.Interface( fn=analyze_composite, inputs=[ gr.Textbox(label="Message 1"), gr.Textbox(label="Message 2"), gr.Textbox(label="Message 3") ], outputs=[ gr.Textbox(label="Message 1 Result"), gr.Textbox(label="Message 2 Result"), gr.Textbox(label="Message 3 Result"), gr.Textbox(label="Composite Score Summary") ], title="Abuse Pattern Detector (Multi-Message)", live=False, allow_flagging="manual" ) if __name__ == "__main__": iface.launch()