File size: 6,487 Bytes
3477806
f1ccd02
3477806
f1ccd02
 
 
3477806
 
f1ccd02
3477806
919951a
 
3477806
 
 
 
f1ccd02
decced4
919951a
f1ccd02
decced4
f1ccd02
919951a
f1ccd02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3477806
 
 
 
f1ccd02
3477806
f1ccd02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3477806
f1ccd02
 
 
 
 
3477806
f1ccd02
3477806
f1ccd02
 
3477806
f1ccd02
 
3477806
f1ccd02
 
 
 
 
 
 
 
 
 
 
 
 
3477806
919951a
f1ccd02
 
 
 
 
 
 
 
 
 
 
 
3477806
 
f1ccd02
3477806
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
  Orify Text Detector  – full-width UI + explicit verdict (Zero-GPU ready)

  β€’ Three ModernBERT-base checkpoints (soft-vote)
  β€’ Per-line highlights, hover tool-tips, and a big verdict banner
  β€’ Weights auto-downloaded & cached
"""

# ── Imports ─────────────────────────────────────────────────────────────
from pathlib import Path
import os, re, html, typing
import torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces

# ── Robust torch.compile shim (same as before) ──────────────────────────
if hasattr(torch, "compile"):
    def _no_compile(model: typing.Any = None, *args, **kwargs):
        if callable(model):
            return model
        return lambda fn: fn
    torch.compile = _no_compile
    os.environ["TORCHINDUCTOR_DISABLED"] = "1"

# ── Config ──────────────────────────────────────────────────────────────
DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP    = {"ensamble_1":"ensamble_1",
               "ensamble_2.bin":"ensamble_2.bin",
               "ensamble_3":"ensamble_3"}
BASE_MODEL  = "answerdotai/ModernBERT-base"
NUM_LABELS  = 41

LABELS = {i:name for i, name in enumerate([
    "13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
    "dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
    "flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
    "gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
    "llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
    "opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
    "t0-11b","t0-3b","text-davinci-002","text-davinci-003"
])}

# ── CSS (full-width layout) ─────────────────────────────────────────────
CSS = """
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
input,textarea,.output-box{width:100%;box-sizing:border-box}
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""

# ── Load weights & models - one time ─────────────────────────────────────
print("πŸ”„ Downloading weights …")
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
               for a,f in FILE_MAP.items()}

print("🧩 Initialising models …")
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
models=[]
for p in local_paths.values():
    m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
                                                           num_labels=NUM_LABELS)
    m.load_state_dict(torch.load(p,map_location=DEVICE))
    m.to(DEVICE).eval()
    models.append(m)

# ── Helpers ─────────────────────────────────────────────────────────────
def tidy(txt:str)->str:
    txt=txt.replace("\r\n","\n").replace("\r","\n")
    txt=re.sub(r"\n\s*\n+","\n\n",txt)
    txt=re.sub(r"[ \t]+"," ",txt)
    txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
    txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
    return txt.strip()

def infer(seg:str):
    inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
    with torch.no_grad():
        probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
    ai_probs=probs.clone(); ai_probs[24]=0
    ai=ai_probs.sum().item()*100; human=100-ai
    top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
    return human,ai,top3

# ── Main analyse fn ─────────────────────────────────────────────────────
@spaces.GPU
def analyse(txt:str):
    if not txt.strip():
        return "✏️ Please paste or type some text to analyse…"
    lines=tidy(txt).split("\n")
    out, h_sum, ai_sum, n=[],0.0,0.0,0
    for ln in lines:
        if not ln.strip(): out.append("<br>"); continue
        n+=1
        h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
        cls="ai-line" if ai>h else "human-line"
        tip=f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
        out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
    human_avg,ai_avg=h_sum/n,ai_sum/n
    verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
             f"Human-written {human_avg:.2f}%</span>"
             if human_avg>=ai_avg else
             f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
             f"AI-generated {ai_avg:.2f}%</span>")
    return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)

# ── Gradio UI ───────────────────────────────────────────────────────────
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
    gr.Markdown("## Orify Text Detector")
    gr.Markdown(
        "Paste text, click **Analyse**.<br>"
        "<span class='human-line'>Green</span>=human &nbsp;|&nbsp; "
        "<span class='ai-line'>Red</span>=AI.<br>"
        "Hover a line to see confidence & top-3 AI models."
    )
    inp=gr.Textbox(lines=8,placeholder="Paste text here …")
    btn=gr.Button("Analyse")
    out=gr.HTML(elem_classes=["output-box"])
    btn.click(analyse,inp,out)
    gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")

if __name__=="__main__":
    demo.launch()