Spaces:
Running
Running
File size: 1,305 Bytes
6f034a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import torch
from .model_loader import get_model_tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def perplexity_to_ai_likelihood(ppl: float) -> float:
# You can tune these parameters
min_ppl = 10 # very confident it's AI
max_ppl = 100 # very confident it's human
# Clamp to bounds
ppl = max(min_ppl, min(ppl, max_ppl))
# Invert and scale: lower perplexity -> higher AI-likelihood
likelihood = 1 - ((ppl - min_ppl) / (max_ppl - min_ppl))
return round(likelihood * 100, 2)
def classify_text(text: str):
model, tokenizer = get_model_tokenizer()
inputs = tokenizer(text, return_tensors="pt",
truncation=True, padding=True)
input_ids = inputs["input_ids"].to(device)
attention_mask = inputs["attention_mask"].to(device)
with torch.no_grad():
outputs = model(
input_ids, attention_mask=attention_mask, labels=input_ids)
loss = outputs.loss
perplexity = torch.exp(loss).item()
if perplexity < 55:
result = "AI-generated"
elif perplexity < 80:
result = "Probably AI-generated"
else:
result = "Human-written"
likelihood_result=perplexity_to_ai_likelihood(perplexity)
return result, perplexity,likelihood_result
|