Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,487 Bytes
3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 919951a 3477806 f1ccd02 decced4 919951a f1ccd02 decced4 f1ccd02 919951a f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 f1ccd02 3477806 919951a f1ccd02 3477806 f1ccd02 3477806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
"""
Orify Text Detector β full-width UI + explicit verdict (Zero-GPU ready)
β’ Three ModernBERT-base checkpoints (soft-vote)
β’ Per-line highlights, hover tool-tips, and a big verdict banner
β’ Weights auto-downloaded & cached
"""
# ββ Imports βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
from pathlib import Path
import os, re, html, typing
import torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces
# ββ Robust torch.compile shim (same as before) ββββββββββββββββββββββββββ
if hasattr(torch, "compile"):
def _no_compile(model: typing.Any = None, *args, **kwargs):
if callable(model):
return model
return lambda fn: fn
torch.compile = _no_compile
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP = {"ensamble_1":"ensamble_1",
"ensamble_2.bin":"ensamble_2.bin",
"ensamble_3":"ensamble_3"}
BASE_MODEL = "answerdotai/ModernBERT-base"
NUM_LABELS = 41
LABELS = {i:name for i, name in enumerate([
"13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
"dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
"flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
"gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
"llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
"opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
"t0-11b","t0-3b","text-davinci-002","text-davinci-003"
])}
# ββ CSS (full-width layout) βββββββββββββββββββββββββββββββββββββββββββββ
CSS = """
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
input,textarea,.output-box{width:100%;box-sizing:border-box}
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""
# ββ Load weights & models - one time βββββββββββββββββββββββββββββββββββββ
print("π Downloading weights β¦")
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
for a,f in FILE_MAP.items()}
print("π§© Initialising models β¦")
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
models=[]
for p in local_paths.values():
m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
num_labels=NUM_LABELS)
m.load_state_dict(torch.load(p,map_location=DEVICE))
m.to(DEVICE).eval()
models.append(m)
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def tidy(txt:str)->str:
txt=txt.replace("\r\n","\n").replace("\r","\n")
txt=re.sub(r"\n\s*\n+","\n\n",txt)
txt=re.sub(r"[ \t]+"," ",txt)
txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
return txt.strip()
def infer(seg:str):
inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
with torch.no_grad():
probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
ai_probs=probs.clone(); ai_probs[24]=0
ai=ai_probs.sum().item()*100; human=100-ai
top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
return human,ai,top3
# ββ Main analyse fn βββββββββββββββββββββββββββββββββββββββββββββββββββββ
@spaces.GPU
def analyse(txt:str):
if not txt.strip():
return "βοΈ Please paste or type some text to analyseβ¦"
lines=tidy(txt).split("\n")
out, h_sum, ai_sum, n=[],0.0,0.0,0
for ln in lines:
if not ln.strip(): out.append("<br>"); continue
n+=1
h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
cls="ai-line" if ai>h else "human-line"
tip=f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
human_avg,ai_avg=h_sum/n,ai_sum/n
verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
f"Human-written {human_avg:.2f}%</span>"
if human_avg>=ai_avg else
f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
f"AI-generated {ai_avg:.2f}%</span>")
return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
gr.Markdown("## Orify Text Detector")
gr.Markdown(
"Paste text, click **Analyse**.<br>"
"<span class='human-line'>Green</span>=human | "
"<span class='ai-line'>Red</span>=AI.<br>"
"Hover a line to see confidence & top-3 AI models."
)
inp=gr.Textbox(lines=8,placeholder="Paste text here β¦")
btn=gr.Button("Analyse")
out=gr.HTML(elem_classes=["output-box"])
btn.click(analyse,inp,out)
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
if __name__=="__main__":
demo.launch()
|