""" Orify Text Detector – full-width UI + explicit verdict (Zero-GPU ready) • Three ModernBERT-base checkpoints (soft-vote) • Per-line highlights, hover tool-tips, and a big verdict banner • Weights auto-downloaded & cached """ # ── Imports ───────────────────────────────────────────────────────────── from pathlib import Path import os, re, html, typing import torch, gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification from huggingface_hub import hf_hub_download import spaces # ── Robust torch.compile shim (same as before) ────────────────────────── if hasattr(torch, "compile"): def _no_compile(model: typing.Any = None, *args, **kwargs): if callable(model): return model return lambda fn: fn torch.compile = _no_compile os.environ["TORCHINDUCTOR_DISABLED"] = "1" # ── Config ────────────────────────────────────────────────────────────── DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights" FILE_MAP = {"ensamble_1":"ensamble_1", "ensamble_2.bin":"ensamble_2.bin", "ensamble_3":"ensamble_3"} BASE_MODEL = "answerdotai/ModernBERT-base" NUM_LABELS = 41 LABELS = {i:name for i, name in enumerate([ "13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci", "dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small", "flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo", "gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b", "llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b", "opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b", "t0-11b","t0-3b","text-davinci-002","text-davinci-003" ])} # ── CSS (full-width layout) ───────────────────────────────────────────── CSS = """ :root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px} body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box} input,textarea,.output-box{width:100%;box-sizing:border-box} textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)} .output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px} .ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px} .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px} .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor} """ # ── Load weights & models - one time ───────────────────────────────────── print("🔄 Downloading weights …") local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True) for a,f in FILE_MAP.items()} print("🧩 Initialising models …") tok = AutoTokenizer.from_pretrained(BASE_MODEL) models=[] for p in local_paths.values(): m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, num_labels=NUM_LABELS) m.load_state_dict(torch.load(p,map_location=DEVICE)) m.to(DEVICE).eval() models.append(m) # ── Helpers ───────────────────────────────────────────────────────────── def tidy(txt:str)->str: txt=txt.replace("\r\n","\n").replace("\r","\n") txt=re.sub(r"\n\s*\n+","\n\n",txt) txt=re.sub(r"[ \t]+"," ",txt) txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt) txt=re.sub(r"(?"); continue n+=1 h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai cls="ai-line" if ai>h else "human-line" tip=f"AI {ai:.2f}% • Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%" out.append(f"{html.escape(ln)}") human_avg,ai_avg=h_sum/n,ai_sum/n verdict=(f"" f"Human-written {human_avg:.2f}%" if human_avg>=ai_avg else f"" f"AI-generated {ai_avg:.2f}%") return f"