Spaces:

Essay-Grader
/

Detection_and_Plagiarism_Check

Running

App Files Files Community

Essay-Grader commited on May 2

Commit

9e3739b

1 Parent(s): c8685b3

Further changed the model

Browse files

Files changed (1) hide show

app.py +39 -39

app.py CHANGED Viewed

@@ -1,53 +1,53 @@
 # app.py: AI Detection and Plagiarism Check API
-import os
-import torch
-import tempfile
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.responses import JSONResponse
-from PyPDF2 import PdfReader
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-app = FastAPI(title="AI Detection Only", version="1.0.0")
 MODEL_NAME = "roberta-base-openai-detector"
-MAX_LENGTH = 512
-# Load model and tokenizer at startup
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).eval()
-def extract_text_from_pdf(file: UploadFile) -> str:
-    try:
-        with tempfile.NamedTemporaryFile(delete=False) as tmp:
-            tmp.write(file.file.read())
-            tmp_path = tmp.name
-        reader = PdfReader(tmp_path)
-        text = " ".join(page.extract_text() or "" for page in reader.pages)
-        return text.strip()
-    except Exception as e:
-        raise HTTPException(500, f"PDF processing failed: {str(e)}")
-def detect_ai_percentage(text: str) -> float:
-    inputs = tokenizer(text, truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        ai_prob = probs[0][1].item()  # Index 1 is usually the AI class
-        return round(ai_prob * 100, 2)
-@app.post("/analyze")
-async def analyze(file: UploadFile = File(...)):
-    if not file.filename.lower().endswith(".pdf"):
-        raise HTTPException(400, "Only PDF files are supported.")
-    text = extract_text_from_pdf(file)
-    if len(text) < 200:
-        raise HTTPException(400, "Text too short for reliable detection.")
-    ai_score = detect_ai_percentage(text)
-    return JSONResponse({"ai_generated": ai_score})
 # from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks

 # app.py: AI Detection and Plagiarism Check API
+from fastapi import FastAPI, UploadFile, File
+from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+app = FastAPI()
+# Load model and tokenizer
 MODEL_NAME = "roberta-base-openai-detector"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+# Root route to avoid 404 error on Hugging Face
+@app.get("/")
+def read_root():
+    return {
+        "message": "Welcome to the Essay AI Detection API!",
+        "usage": "POST your essay text file to /detect to check if it's AI-generated.",
+    }
+# Define response schema
+class DetectionResult(BaseModel):
+    label: str
+    confidence: float
+# POST route for AI content detection
+@app.post("/detect", response_model=DetectionResult)
+async def detect_ai(file: UploadFile = File(...)):
+    content = await file.read()
+    text = content.decode("utf-8")
+    # Tokenize and predict
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
+    logits = outputs.logits
+    probs = torch.softmax(logits, dim=1).squeeze().tolist()
+    labels = ["Human", "AI-generated"]
+    pred_index = int(torch.argmax(logits))
+    pred_label = labels[pred_index]
+    confidence = probs[pred_index]
+    return {
+        "label": pred_label,
+        "confidence": round(confidence, 4),
+    }
 # from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks