Spaces:
Running
Running
import torch | |
from .model_loader import get_model_tokenizer | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
def perplexity_to_ai_likelihood(ppl: float) -> float: | |
# You can tune these parameters | |
min_ppl = 10 # very confident it's AI | |
max_ppl = 100 # very confident it's human | |
# Clamp to bounds | |
ppl = max(min_ppl, min(ppl, max_ppl)) | |
# Invert and scale: lower perplexity -> higher AI-likelihood | |
likelihood = 1 - ((ppl - min_ppl) / (max_ppl - min_ppl)) | |
return round(likelihood * 100, 2) | |
def classify_text(text: str): | |
model, tokenizer = get_model_tokenizer() | |
inputs = tokenizer(text, return_tensors="pt", | |
truncation=True, padding=True) | |
input_ids = inputs["input_ids"].to(device) | |
attention_mask = inputs["attention_mask"].to(device) | |
with torch.no_grad(): | |
outputs = model( | |
input_ids, attention_mask=attention_mask, labels=input_ids) | |
loss = outputs.loss | |
perplexity = torch.exp(loss).item() | |
if perplexity < 55: | |
result = "AI-generated" | |
elif perplexity < 80: | |
result = "Probably AI-generated" | |
else: | |
result = "Human-written" | |
likelihood_result=perplexity_to_ai_likelihood(perplexity) | |
return result, perplexity,likelihood_result | |