Sleepyriizi's picture
Update app.py
f1ccd02 verified
"""
Orify Text Detector – full-width UI + explicit verdict (Zero-GPU ready)
β€’ Three ModernBERT-base checkpoints (soft-vote)
β€’ Per-line highlights, hover tool-tips, and a big verdict banner
β€’ Weights auto-downloaded & cached
"""
# ── Imports ─────────────────────────────────────────────────────────────
from pathlib import Path
import os, re, html, typing
import torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces
# ── Robust torch.compile shim (same as before) ──────────────────────────
if hasattr(torch, "compile"):
def _no_compile(model: typing.Any = None, *args, **kwargs):
if callable(model):
return model
return lambda fn: fn
torch.compile = _no_compile
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
# ── Config ──────────────────────────────────────────────────────────────
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP = {"ensamble_1":"ensamble_1",
"ensamble_2.bin":"ensamble_2.bin",
"ensamble_3":"ensamble_3"}
BASE_MODEL = "answerdotai/ModernBERT-base"
NUM_LABELS = 41
LABELS = {i:name for i, name in enumerate([
"13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
"dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
"flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
"gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
"llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
"opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
"t0-11b","t0-3b","text-davinci-002","text-davinci-003"
])}
# ── CSS (full-width layout) ─────────────────────────────────────────────
CSS = """
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
input,textarea,.output-box{width:100%;box-sizing:border-box}
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""
# ── Load weights & models - one time ─────────────────────────────────────
print("πŸ”„ Downloading weights …")
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
for a,f in FILE_MAP.items()}
print("🧩 Initialising models …")
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
models=[]
for p in local_paths.values():
m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
num_labels=NUM_LABELS)
m.load_state_dict(torch.load(p,map_location=DEVICE))
m.to(DEVICE).eval()
models.append(m)
# ── Helpers ─────────────────────────────────────────────────────────────
def tidy(txt:str)->str:
txt=txt.replace("\r\n","\n").replace("\r","\n")
txt=re.sub(r"\n\s*\n+","\n\n",txt)
txt=re.sub(r"[ \t]+"," ",txt)
txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
return txt.strip()
def infer(seg:str):
inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
with torch.no_grad():
probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
ai_probs=probs.clone(); ai_probs[24]=0
ai=ai_probs.sum().item()*100; human=100-ai
top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
return human,ai,top3
# ── Main analyse fn ─────────────────────────────────────────────────────
@spaces.GPU
def analyse(txt:str):
if not txt.strip():
return "✏️ Please paste or type some text to analyse…"
lines=tidy(txt).split("\n")
out, h_sum, ai_sum, n=[],0.0,0.0,0
for ln in lines:
if not ln.strip(): out.append("<br>"); continue
n+=1
h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
cls="ai-line" if ai>h else "human-line"
tip=f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
human_avg,ai_avg=h_sum/n,ai_sum/n
verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
f"Human-written {human_avg:.2f}%</span>"
if human_avg>=ai_avg else
f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
f"AI-generated {ai_avg:.2f}%</span>")
return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)
# ── Gradio UI ───────────────────────────────────────────────────────────
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
gr.Markdown("## Orify Text Detector")
gr.Markdown(
"Paste text, click **Analyse**.<br>"
"<span class='human-line'>Green</span>=human &nbsp;|&nbsp; "
"<span class='ai-line'>Red</span>=AI.<br>"
"Hover a line to see confidence & top-3 AI models."
)
inp=gr.Textbox(lines=8,placeholder="Paste text here …")
btn=gr.Button("Analyse")
out=gr.HTML(elem_classes=["output-box"])
btn.click(analyse,inp,out)
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
if __name__=="__main__":
demo.launch()