Commit
·
9e3739b
1
Parent(s):
c8685b3
Further changed the model
Browse files
app.py
CHANGED
@@ -1,53 +1,53 @@
|
|
1 |
# app.py: AI Detection and Plagiarism Check API
|
2 |
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import tempfile
|
6 |
-
from fastapi import FastAPI, UploadFile, File, HTTPException
|
7 |
-
from fastapi.responses import JSONResponse
|
8 |
-
from PyPDF2 import PdfReader
|
9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
10 |
|
11 |
-
app = FastAPI(
|
12 |
|
|
|
13 |
MODEL_NAME = "roberta-base-openai-detector"
|
14 |
-
MAX_LENGTH = 512
|
15 |
-
|
16 |
-
# Load model and tokenizer at startup
|
17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
18 |
-
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
with torch.no_grad():
|
34 |
outputs = model(**inputs)
|
35 |
-
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
36 |
-
ai_prob = probs[0][1].item() # Index 1 is usually the AI class
|
37 |
-
return round(ai_prob * 100, 2)
|
38 |
-
|
39 |
-
@app.post("/analyze")
|
40 |
-
async def analyze(file: UploadFile = File(...)):
|
41 |
-
if not file.filename.lower().endswith(".pdf"):
|
42 |
-
raise HTTPException(400, "Only PDF files are supported.")
|
43 |
-
|
44 |
-
text = extract_text_from_pdf(file)
|
45 |
-
if len(text) < 200:
|
46 |
-
raise HTTPException(400, "Text too short for reliable detection.")
|
47 |
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
# from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
|
|
|
1 |
# app.py: AI Detection and Plagiarism Check API
|
2 |
|
3 |
+
from fastapi import FastAPI, UploadFile, File
|
4 |
+
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
5 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
+
import torch
|
7 |
|
8 |
+
app = FastAPI()
|
9 |
|
10 |
+
# Load model and tokenizer
|
11 |
MODEL_NAME = "roberta-base-openai-detector"
|
|
|
|
|
|
|
12 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
13 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
|
14 |
+
|
15 |
+
# Root route to avoid 404 error on Hugging Face
|
16 |
+
@app.get("/")
|
17 |
+
def read_root():
|
18 |
+
return {
|
19 |
+
"message": "Welcome to the Essay AI Detection API!",
|
20 |
+
"usage": "POST your essay text file to /detect to check if it's AI-generated.",
|
21 |
+
}
|
22 |
+
|
23 |
+
# Define response schema
|
24 |
+
class DetectionResult(BaseModel):
|
25 |
+
label: str
|
26 |
+
confidence: float
|
27 |
+
|
28 |
+
# POST route for AI content detection
|
29 |
+
@app.post("/detect", response_model=DetectionResult)
|
30 |
+
async def detect_ai(file: UploadFile = File(...)):
|
31 |
+
content = await file.read()
|
32 |
+
text = content.decode("utf-8")
|
33 |
+
|
34 |
+
# Tokenize and predict
|
35 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
36 |
with torch.no_grad():
|
37 |
outputs = model(**inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
logits = outputs.logits
|
40 |
+
probs = torch.softmax(logits, dim=1).squeeze().tolist()
|
41 |
+
|
42 |
+
labels = ["Human", "AI-generated"]
|
43 |
+
pred_index = int(torch.argmax(logits))
|
44 |
+
pred_label = labels[pred_index]
|
45 |
+
confidence = probs[pred_index]
|
46 |
|
47 |
+
return {
|
48 |
+
"label": pred_label,
|
49 |
+
"confidence": round(confidence, 4),
|
50 |
+
}
|
51 |
|
52 |
|
53 |
# from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
|