Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
3477806
1
Parent(s):
ceb10cd
init
Browse files- app.py +142 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Orify Text Detector β Space edition (Zero-GPU ready)
|
3 |
+
|
4 |
+
β’ Three ModernBERT-base checkpoints (soft-vote)
|
5 |
+
β’ Per-line colour coding, probability tool-tips, top-3 AI model hints
|
6 |
+
β’ Everything fetched automatically from the weight repo and cached
|
7 |
+
"""
|
8 |
+
|
9 |
+
# ββ Imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
10 |
+
from pathlib import Path
|
11 |
+
import re, torch, gradio as gr
|
12 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
13 |
+
from huggingface_hub import hf_hub_download
|
14 |
+
import spaces
|
15 |
+
|
16 |
+
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
17 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
18 |
+
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
|
19 |
+
|
20 |
+
FILE_MAP = { # local alias -> file in repo
|
21 |
+
"ensamble_1" : "ensamble_1",
|
22 |
+
"ensamble_2.bin": "ensamble_2.bin",
|
23 |
+
"ensamble_3" : "ensamble_3"
|
24 |
+
}
|
25 |
+
|
26 |
+
BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
|
27 |
+
NUM_LABELS = 41
|
28 |
+
|
29 |
+
LABELS = { # id β friendly label (unchanged)
|
30 |
+
0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B",
|
31 |
+
5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
|
32 |
+
9: "dolly", 10: "dolly-v2-12b", 11: "flan_t5_base",
|
33 |
+
12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
|
34 |
+
15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
|
35 |
+
18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
|
36 |
+
21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
|
37 |
+
25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
|
38 |
+
28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
|
39 |
+
31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
|
40 |
+
34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
|
41 |
+
37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
|
42 |
+
}
|
43 |
+
|
44 |
+
# ββ CSS (kept identical) ββββββββββββββββββββββββββββββββββββββββββββββββ
|
45 |
+
CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
|
46 |
+
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
|
47 |
+
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
|
48 |
+
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
|
49 |
+
.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
|
50 |
+
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
|
51 |
+
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
|
52 |
+
"""
|
53 |
+
|
54 |
+
# ββ Model loading (download once, then cached) βββββββββββββββββββββββββββ
|
55 |
+
print("π Downloading weights β¦")
|
56 |
+
local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
|
57 |
+
for alias, fname in FILE_MAP.items()}
|
58 |
+
|
59 |
+
print("π§© Loading tokenizer & models β¦")
|
60 |
+
tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
|
61 |
+
|
62 |
+
models = []
|
63 |
+
for alias, path in local_paths.items():
|
64 |
+
net = AutoModelForSequenceClassification.from_pretrained(
|
65 |
+
BASE_MODEL_NAME, num_labels=NUM_LABELS)
|
66 |
+
net.load_state_dict(torch.load(path, map_location=DEVICE))
|
67 |
+
net.to(DEVICE).eval()
|
68 |
+
models.append(net)
|
69 |
+
|
70 |
+
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
71 |
+
def tidy(txt: str) -> str:
|
72 |
+
txt = txt.replace("\r\n", "\n").replace("\r", "\n")
|
73 |
+
txt = re.sub(r"\n\s*\n+", "\n\n", txt)
|
74 |
+
txt = re.sub(r"[ \t]+", " ", txt)
|
75 |
+
txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
|
76 |
+
txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
|
77 |
+
return txt.strip()
|
78 |
+
|
79 |
+
def infer(segment: str):
|
80 |
+
"""Return (human%, ai%, [top-3 ai model names])."""
|
81 |
+
inputs = tokeniser(segment, return_tensors="pt", truncation=True,
|
82 |
+
padding=True).to(DEVICE)
|
83 |
+
with torch.no_grad():
|
84 |
+
probs = torch.stack([
|
85 |
+
torch.softmax(m(**inputs).logits, dim=1) for m in models
|
86 |
+
]).mean(dim=0)[0]
|
87 |
+
|
88 |
+
ai_probs = probs.clone(); ai_probs[24] = 0 # null out human idx
|
89 |
+
ai_score = ai_probs.sum().item() * 100
|
90 |
+
human_score = 100 - ai_score
|
91 |
+
top3 = torch.topk(ai_probs, 3).indices.tolist()
|
92 |
+
top3_names = [LABELS[i] for i in top3]
|
93 |
+
return human_score, ai_score, top3_names
|
94 |
+
|
95 |
+
# ββ Inference + explanation ββββββββββββββββββββββββββββββββββββββββββββββ
|
96 |
+
@spaces.GPU
|
97 |
+
def analyse(text: str):
|
98 |
+
if not text.strip():
|
99 |
+
return "βοΈ Please paste or type some text to analyseβ¦"
|
100 |
+
|
101 |
+
lines = tidy(text).split("\n")
|
102 |
+
highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0
|
103 |
+
|
104 |
+
for ln in lines:
|
105 |
+
if not ln.strip():
|
106 |
+
highlighted.append("<br>")
|
107 |
+
continue
|
108 |
+
n += 1
|
109 |
+
h, ai, top3 = infer(ln)
|
110 |
+
h_tot += h; ai_tot += ai
|
111 |
+
tooltip = (f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}"
|
112 |
+
if ai > h else f"Human {h:.2f}%")
|
113 |
+
cls = "ai-line" if ai > h else "human-line"
|
114 |
+
span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
|
115 |
+
f"{gr.utils.sanitize_html(ln)}</span>")
|
116 |
+
highlighted.append(span)
|
117 |
+
|
118 |
+
verdict = (f"<p><strong>Overall verdict:</strong> "
|
119 |
+
f"<span class='human-line' style='padding:4px 8px;'>"
|
120 |
+
f"Human-written {h_tot/n:.2f}%</span>"
|
121 |
+
if h_tot >= ai_tot else
|
122 |
+
f"<p><strong>Overall verdict:</strong> "
|
123 |
+
f"<span class='ai-line' style='padding:4px 8px;'>"
|
124 |
+
f"AI-generated {ai_tot/n:.2f}%</span>")
|
125 |
+
return verdict + "<hr>" + "<br>".join(highlighted)
|
126 |
+
|
127 |
+
# ββ Interface ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
128 |
+
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
|
129 |
+
gr.Markdown("""
|
130 |
+
### Orify Text Detector
|
131 |
+
Paste any English text and press **Analyse**.
|
132 |
+
<span class='human-line'>Green</span> = humanβ|β<span class='ai-line'>Red</span> = AI.
|
133 |
+
Hover a line to see confidence and the top-3 AI models it resembles.
|
134 |
+
""")
|
135 |
+
inp = gr.Textbox(lines=8, placeholder="Paste text here β¦",
|
136 |
+
elem_classes=["input-area"])
|
137 |
+
out = gr.HTML("", elem_classes=["output-box"])
|
138 |
+
gr.Button("Analyse").click(analyse, inp, out)
|
139 |
+
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
git+https://github.com/huggingface/transformers
|