Essay-Grader commited on
Commit
9e3739b
·
1 Parent(s): c8685b3

Further changed the model

Browse files
Files changed (1) hide show
  1. app.py +39 -39
app.py CHANGED
@@ -1,53 +1,53 @@
1
  # app.py: AI Detection and Plagiarism Check API
2
 
3
- import os
4
- import torch
5
- import tempfile
6
- from fastapi import FastAPI, UploadFile, File, HTTPException
7
- from fastapi.responses import JSONResponse
8
- from PyPDF2 import PdfReader
9
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
10
 
11
- app = FastAPI(title="AI Detection Only", version="1.0.0")
12
 
 
13
  MODEL_NAME = "roberta-base-openai-detector"
14
- MAX_LENGTH = 512
15
-
16
- # Load model and tokenizer at startup
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
18
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).eval()
19
-
20
- def extract_text_from_pdf(file: UploadFile) -> str:
21
- try:
22
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
23
- tmp.write(file.file.read())
24
- tmp_path = tmp.name
25
- reader = PdfReader(tmp_path)
26
- text = " ".join(page.extract_text() or "" for page in reader.pages)
27
- return text.strip()
28
- except Exception as e:
29
- raise HTTPException(500, f"PDF processing failed: {str(e)}")
30
-
31
- def detect_ai_percentage(text: str) -> float:
32
- inputs = tokenizer(text, truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
 
 
 
 
 
 
 
 
33
  with torch.no_grad():
34
  outputs = model(**inputs)
35
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
36
- ai_prob = probs[0][1].item() # Index 1 is usually the AI class
37
- return round(ai_prob * 100, 2)
38
-
39
- @app.post("/analyze")
40
- async def analyze(file: UploadFile = File(...)):
41
- if not file.filename.lower().endswith(".pdf"):
42
- raise HTTPException(400, "Only PDF files are supported.")
43
-
44
- text = extract_text_from_pdf(file)
45
- if len(text) < 200:
46
- raise HTTPException(400, "Text too short for reliable detection.")
47
 
48
- ai_score = detect_ai_percentage(text)
49
- return JSONResponse({"ai_generated": ai_score})
 
 
 
 
 
50
 
 
 
 
 
51
 
52
 
53
  # from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
 
1
  # app.py: AI Detection and Plagiarism Check API
2
 
3
+ from fastapi import FastAPI, UploadFile, File
4
+ from pydantic import BaseModel
 
 
 
 
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ import torch
7
 
8
+ app = FastAPI()
9
 
10
+ # Load model and tokenizer
11
  MODEL_NAME = "roberta-base-openai-detector"
 
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
14
+
15
+ # Root route to avoid 404 error on Hugging Face
16
+ @app.get("/")
17
+ def read_root():
18
+ return {
19
+ "message": "Welcome to the Essay AI Detection API!",
20
+ "usage": "POST your essay text file to /detect to check if it's AI-generated.",
21
+ }
22
+
23
+ # Define response schema
24
+ class DetectionResult(BaseModel):
25
+ label: str
26
+ confidence: float
27
+
28
+ # POST route for AI content detection
29
+ @app.post("/detect", response_model=DetectionResult)
30
+ async def detect_ai(file: UploadFile = File(...)):
31
+ content = await file.read()
32
+ text = content.decode("utf-8")
33
+
34
+ # Tokenize and predict
35
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
36
  with torch.no_grad():
37
  outputs = model(**inputs)
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ logits = outputs.logits
40
+ probs = torch.softmax(logits, dim=1).squeeze().tolist()
41
+
42
+ labels = ["Human", "AI-generated"]
43
+ pred_index = int(torch.argmax(logits))
44
+ pred_label = labels[pred_index]
45
+ confidence = probs[pred_index]
46
 
47
+ return {
48
+ "label": pred_label,
49
+ "confidence": round(confidence, 4),
50
+ }
51
 
52
 
53
  # from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks