import torch from .model_loader import get_model_tokenizer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def perplexity_to_ai_likelihood(ppl: float) -> float: # You can tune these parameters min_ppl = 10 # very confident it's AI max_ppl = 100 # very confident it's human # Clamp to bounds ppl = max(min_ppl, min(ppl, max_ppl)) # Invert and scale: lower perplexity -> higher AI-likelihood likelihood = 1 - ((ppl - min_ppl) / (max_ppl - min_ppl)) return round(likelihood * 100, 2) def classify_text(text: str): model, tokenizer = get_model_tokenizer() inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) input_ids = inputs["input_ids"].to(device) attention_mask = inputs["attention_mask"].to(device) with torch.no_grad(): outputs = model( input_ids, attention_mask=attention_mask, labels=input_ids) loss = outputs.loss perplexity = torch.exp(loss).item() if perplexity < 55: result = "AI-generated" elif perplexity < 80: result = "Probably AI-generated" else: result = "Human-written" likelihood_result=perplexity_to_ai_likelihood(perplexity) return result, perplexity,likelihood_result