Sleepyriizi commited on
Commit
3477806
Β·
1 Parent(s): ceb10cd
Files changed (2) hide show
  1. app.py +142 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Orify Text Detector – Space edition (Zero-GPU ready)
3
+
4
+ β€’ Three ModernBERT-base checkpoints (soft-vote)
5
+ β€’ Per-line colour coding, probability tool-tips, top-3 AI model hints
6
+ β€’ Everything fetched automatically from the weight repo and cached
7
+ """
8
+
9
+ # ── Imports ──────────────────────────────────────────────────────────────
10
+ from pathlib import Path
11
+ import re, torch, gradio as gr
12
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
13
+ from huggingface_hub import hf_hub_download
14
+ import spaces
15
+
16
+ # ── Configuration ────────────────────────────────────────────────────────
17
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
19
+
20
+ FILE_MAP = { # local alias -> file in repo
21
+ "ensamble_1" : "ensamble_1",
22
+ "ensamble_2.bin": "ensamble_2.bin",
23
+ "ensamble_3" : "ensamble_3"
24
+ }
25
+
26
+ BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
27
+ NUM_LABELS = 41
28
+
29
+ LABELS = { # id β†’ friendly label (unchanged)
30
+ 0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B",
31
+ 5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
32
+ 9: "dolly", 10: "dolly-v2-12b", 11: "flan_t5_base",
33
+ 12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
34
+ 15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
35
+ 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
36
+ 21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
37
+ 25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
38
+ 28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
39
+ 31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
40
+ 34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
41
+ 37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
42
+ }
43
+
44
+ # ── CSS (kept identical) ────────────────────────────────────────────────
45
+ CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
46
+ :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
47
+ body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
48
+ textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
49
+ .output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
50
+ .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
51
+ .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
52
+ """
53
+
54
+ # ── Model loading (download once, then cached) ───────────────────────────
55
+ print("πŸ”„ Downloading weights …")
56
+ local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
57
+ for alias, fname in FILE_MAP.items()}
58
+
59
+ print("🧩 Loading tokenizer & models …")
60
+ tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
61
+
62
+ models = []
63
+ for alias, path in local_paths.items():
64
+ net = AutoModelForSequenceClassification.from_pretrained(
65
+ BASE_MODEL_NAME, num_labels=NUM_LABELS)
66
+ net.load_state_dict(torch.load(path, map_location=DEVICE))
67
+ net.to(DEVICE).eval()
68
+ models.append(net)
69
+
70
+ # ── Helpers ──────────────────────────────────────────────────────────────
71
+ def tidy(txt: str) -> str:
72
+ txt = txt.replace("\r\n", "\n").replace("\r", "\n")
73
+ txt = re.sub(r"\n\s*\n+", "\n\n", txt)
74
+ txt = re.sub(r"[ \t]+", " ", txt)
75
+ txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
76
+ txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
77
+ return txt.strip()
78
+
79
+ def infer(segment: str):
80
+ """Return (human%, ai%, [top-3 ai model names])."""
81
+ inputs = tokeniser(segment, return_tensors="pt", truncation=True,
82
+ padding=True).to(DEVICE)
83
+ with torch.no_grad():
84
+ probs = torch.stack([
85
+ torch.softmax(m(**inputs).logits, dim=1) for m in models
86
+ ]).mean(dim=0)[0]
87
+
88
+ ai_probs = probs.clone(); ai_probs[24] = 0 # null out human idx
89
+ ai_score = ai_probs.sum().item() * 100
90
+ human_score = 100 - ai_score
91
+ top3 = torch.topk(ai_probs, 3).indices.tolist()
92
+ top3_names = [LABELS[i] for i in top3]
93
+ return human_score, ai_score, top3_names
94
+
95
+ # ── Inference + explanation ──────────────────────────────────────────────
96
+ @spaces.GPU
97
+ def analyse(text: str):
98
+ if not text.strip():
99
+ return "✏️ Please paste or type some text to analyse…"
100
+
101
+ lines = tidy(text).split("\n")
102
+ highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0
103
+
104
+ for ln in lines:
105
+ if not ln.strip():
106
+ highlighted.append("<br>")
107
+ continue
108
+ n += 1
109
+ h, ai, top3 = infer(ln)
110
+ h_tot += h; ai_tot += ai
111
+ tooltip = (f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}"
112
+ if ai > h else f"Human {h:.2f}%")
113
+ cls = "ai-line" if ai > h else "human-line"
114
+ span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
115
+ f"{gr.utils.sanitize_html(ln)}</span>")
116
+ highlighted.append(span)
117
+
118
+ verdict = (f"<p><strong>Overall verdict:</strong> "
119
+ f"<span class='human-line' style='padding:4px 8px;'>"
120
+ f"Human-written {h_tot/n:.2f}%</span>"
121
+ if h_tot >= ai_tot else
122
+ f"<p><strong>Overall verdict:</strong> "
123
+ f"<span class='ai-line' style='padding:4px 8px;'>"
124
+ f"AI-generated {ai_tot/n:.2f}%</span>")
125
+ return verdict + "<hr>" + "<br>".join(highlighted)
126
+
127
+ # ── Interface ────────────────────────────────────────────────────────────
128
+ with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
129
+ gr.Markdown("""
130
+ ### Orify Text Detector
131
+ Paste any English text and press **Analyse**.
132
+ <span class='human-line'>Green</span> = human | <span class='ai-line'>Red</span> = AI.
133
+ Hover a line to see confidence and the top-3 AI models it resembles.
134
+ """)
135
+ inp = gr.Textbox(lines=8, placeholder="Paste text here …",
136
+ elem_classes=["input-area"])
137
+ out = gr.HTML("", elem_classes=["output-box"])
138
+ gr.Button("Analyse").click(analyse, inp, out)
139
+ gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
140
+
141
+ if __name__ == "__main__":
142
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ torch
3
+ git+https://github.com/huggingface/transformers