Spaces:

Sleepyriizi
/

Orify-Text-Detection

Running on Zero

File size: 6,487 Bytes

"""
  Orify Text Detector  – full-width UI + explicit verdict (Zero-GPU ready)

  • Three ModernBERT-base checkpoints (soft-vote)
  • Per-line highlights, hover tool-tips, and a big verdict banner
  • Weights auto-downloaded & cached
"""

# ── Imports ─────────────────────────────────────────────────────────────
from pathlib import Path
import os, re, html, typing
import torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces

# ── Robust torch.compile shim (same as before) ──────────────────────────
if hasattr(torch, "compile"):
    def _no_compile(model: typing.Any = None, *args, **kwargs):
        if callable(model):
            return model
        return lambda fn: fn
    torch.compile = _no_compile
    os.environ["TORCHINDUCTOR_DISABLED"] = "1"

# ── Config ──────────────────────────────────────────────────────────────
DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP    = {"ensamble_1":"ensamble_1",
               "ensamble_2.bin":"ensamble_2.bin",
               "ensamble_3":"ensamble_3"}
BASE_MODEL  = "answerdotai/ModernBERT-base"
NUM_LABELS  = 41

LABELS = {i:name for i, name in enumerate([
    "13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
    "dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
    "flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
    "gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
    "llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
    "opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
    "t0-11b","t0-3b","text-davinci-002","text-davinci-003"
])}

# ── CSS (full-width layout) ─────────────────────────────────────────────
CSS = """
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
input,textarea,.output-box{width:100%;box-sizing:border-box}
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""

# ── Load weights & models - one time ─────────────────────────────────────
print("🔄 Downloading weights …")
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
               for a,f in FILE_MAP.items()}

print("🧩 Initialising models …")
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
models=[]
for p in local_paths.values():
    m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
                                                           num_labels=NUM_LABELS)
    m.load_state_dict(torch.load(p,map_location=DEVICE))
    m.to(DEVICE).eval()
    models.append(m)

# ── Helpers ─────────────────────────────────────────────────────────────
def tidy(txt:str)->str:
    txt=txt.replace("\r\n","\n").replace("\r","\n")
    txt=re.sub(r"\n\s*\n+","\n\n",txt)
    txt=re.sub(r"[ \t]+"," ",txt)
    txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
    txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
    return txt.strip()

def infer(seg:str):
    inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
    with torch.no_grad():
        probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
    ai_probs=probs.clone(); ai_probs[24]=0
    ai=ai_probs.sum().item()*100; human=100-ai
    top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
    return human,ai,top3

# ── Main analyse fn ─────────────────────────────────────────────────────
@spaces.GPU
def analyse(txt:str):
    if not txt.strip():
        return "✏️ Please paste or type some text to analyse…"
    lines=tidy(txt).split("\n")
    out, h_sum, ai_sum, n=[],0.0,0.0,0
    for ln in lines:
        if not ln.strip(): out.append("<br>"); continue
        n+=1
        h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
        cls="ai-line" if ai>h else "human-line"
        tip=f"AI {ai:.2f}% • Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
        out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
    human_avg,ai_avg=h_sum/n,ai_sum/n
    verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
             f"Human-written {human_avg:.2f}%</span>"
             if human_avg>=ai_avg else
             f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
             f"AI-generated {ai_avg:.2f}%</span>")
    return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)

# ── Gradio UI ───────────────────────────────────────────────────────────
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
    gr.Markdown("## Orify Text Detector")
    gr.Markdown(
        "Paste text, click **Analyse**.<br>"
        "<span class='human-line'>Green</span>=human &nbsp;|&nbsp; "
        "<span class='ai-line'>Red</span>=AI.<br>"
        "Hover a line to see confidence & top-3 AI models."
    )
    inp=gr.Textbox(lines=8,placeholder="Paste text here …")
    btn=gr.Button("Analyse")
    out=gr.HTML(elem_classes=["output-box"])
    btn.click(analyse,inp,out)
    gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble © 2025</sub>")

if __name__=="__main__":
    demo.launch()