Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,586 Bytes
3477806 decced4 3477806 decced4 65cb2f2 3477806 65cb2f2 3477806 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
"""
Orify Text Detector β Space edition (Zero-GPU ready)
β’ Three ModernBERT-base checkpoints (soft-vote)
β’ Per-line colour coding, probability tool-tips, top-3 AI model hints
β’ Everything fetched automatically from the weight repo and cached
"""
# ββ Imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
from pathlib import Path
import re, torch, gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from huggingface_hub import hf_hub_download
import spaces
import os, types # add `types`
# ββββββββββββββββββ robust torch.compile shim βββββββββββββββββββββββββ
if hasattr(torch, "compile"):
def _no_compile(model: types.Any = None, *args, **kwargs):
"""
1. If called as torch.compile(model, β¦) β just return the model.
2. If called as torch.compile(**kw) β return a decorator that
immediately gives back the class / fn it decorates.
"""
if callable(model): # pattern 1
return model
# pattern 2 (used by ModernBERT via @torch.compile(...))
def decorator(fn):
return fn
return decorator
torch.compile = _no_compile # monkey-patch
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
# (everything below is unchanged)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
FILE_MAP = {"ensamble_1":"ensamble_1",
"ensamble_2.bin":"ensamble_2.bin",
"ensamble_3":"ensamble_3"}
BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
NUM_LABELS = 41
LABELS = { # id β friendly label (unchanged)
0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B",
5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
9: "dolly", 10: "dolly-v2-12b", 11: "flan_t5_base",
12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
}
# ββ CSS (kept identical) ββββββββββββββββββββββββββββββββββββββββββββββββ
CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
.output-box{min-height:160px}.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
"""
# ββ Model loading (download once, then cached) βββββββββββββββββββββββββββ
print("π Downloading weights β¦")
local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
for alias, fname in FILE_MAP.items()}
print("π§© Loading tokenizer & models β¦")
tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
models = []
for alias, path in local_paths.items():
net = AutoModelForSequenceClassification.from_pretrained(
BASE_MODEL_NAME, num_labels=NUM_LABELS)
net.load_state_dict(torch.load(path, map_location=DEVICE))
net.to(DEVICE).eval()
models.append(net)
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def tidy(txt: str) -> str:
txt = txt.replace("\r\n", "\n").replace("\r", "\n")
txt = re.sub(r"\n\s*\n+", "\n\n", txt)
txt = re.sub(r"[ \t]+", " ", txt)
txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
return txt.strip()
def infer(segment: str):
"""Return (human%, ai%, [top-3 ai model names])."""
inputs = tokeniser(segment, return_tensors="pt", truncation=True,
padding=True).to(DEVICE)
with torch.no_grad():
probs = torch.stack([
torch.softmax(m(**inputs).logits, dim=1) for m in models
]).mean(dim=0)[0]
ai_probs = probs.clone(); ai_probs[24] = 0 # null out human idx
ai_score = ai_probs.sum().item() * 100
human_score = 100 - ai_score
top3 = torch.topk(ai_probs, 3).indices.tolist()
top3_names = [LABELS[i] for i in top3]
return human_score, ai_score, top3_names
# ββ Inference + explanation ββββββββββββββββββββββββββββββββββββββββββββββ
@spaces.GPU
def analyse(text: str):
if not text.strip():
return "βοΈ Please paste or type some text to analyseβ¦"
lines = tidy(text).split("\n")
highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0
for ln in lines:
if not ln.strip():
highlighted.append("<br>")
continue
n += 1
h, ai, top3 = infer(ln)
h_tot += h; ai_tot += ai
tooltip = (f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}"
if ai > h else f"Human {h:.2f}%")
cls = "ai-line" if ai > h else "human-line"
span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
f"{gr.utils.sanitize_html(ln)}</span>")
highlighted.append(span)
verdict = (f"<p><strong>Overall verdict:</strong> "
f"<span class='human-line' style='padding:4px 8px;'>"
f"Human-written {h_tot/n:.2f}%</span>"
if h_tot >= ai_tot else
f"<p><strong>Overall verdict:</strong> "
f"<span class='ai-line' style='padding:4px 8px;'>"
f"AI-generated {ai_tot/n:.2f}%</span>")
return verdict + "<hr>" + "<br>".join(highlighted)
# ββ Interface ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
gr.Markdown("""
### Orify Text Detector
Paste any English text and press **Analyse**.
<span class='human-line'>Green</span> = humanβ|β<span class='ai-line'>Red</span> = AI.
Hover a line to see confidence and the top-3 AI models it resembles.
""")
inp = gr.Textbox(lines=8, placeholder="Paste text here β¦",
elem_classes=["input-area"])
out = gr.HTML("", elem_classes=["output-box"])
gr.Button("Analyse").click(analyse, inp, out)
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
if __name__ == "__main__":
demo.launch()
|