import os import asyncio import logging from io import BytesIO from fastapi import HTTPException, UploadFile, status, Depends from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from nltk.tokenize import sent_tokenize from .inferencer import classify_text from .preprocess import parse_docx, parse_pdf, parse_txt security = HTTPBearer() # Verify Bearer token from Authorization header async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): token = credentials.credentials expected_token = os.getenv("MY_SECRET_TOKEN") if token != expected_token: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Invalid or expired token" ) return token # Classify plain text input async def handle_text_analysis(text: str): text = text.strip() if not text or len(text.split()) < 10: raise HTTPException(status_code=400, detail="Text must contain at least 10 words") if len(text) > 10000: raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters") label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text) return { "result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood } # Extract text from uploaded files (.docx, .pdf, .txt) async def extract_file_contents(file: UploadFile) -> str: content = await file.read() file_stream = BytesIO(content) if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return parse_docx(file_stream) elif file.content_type == "application/pdf": return parse_pdf(file_stream) elif file.content_type == "text/plain": return parse_txt(file_stream) else: raise HTTPException( status_code=415, detail="Invalid file type. Only .docx, .pdf, and .txt are allowed." ) # Classify text from uploaded file async def handle_file_upload(file: UploadFile): try: file_contents = await extract_file_contents(file) if len(file_contents) > 10000: return {"message": "File contains more than 10,000 characters."} cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() if not cleaned_text: raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.") label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text) return { "content": file_contents, "result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood } except Exception as e: logging.error(f"Error processing file: {e}") raise HTTPException(status_code=500, detail="Error processing the file") # Analyze each sentence in plain text input async def handle_sentence_level_analysis(text: str): text = text.strip() if text[-1] != ".": text+="." if len(text) > 10000: raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters") sentences = sent_tokenize(text, language="english") results = [] for sentence in sentences: if not sentence.strip(): continue label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, sentence) results.append({ "sentence": sentence, "label": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood }) return {"analysis": results} # Analyze each sentence from uploaded file async def handle_file_sentence(file: UploadFile): try: file_contents = await extract_file_contents(file) if len(file_contents) > 10000: return {"message": "File contains more than 10,000 characters."} cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() if not cleaned_text: raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.") result = await handle_sentence_level_analysis(cleaned_text) return { "content": file_contents, **result } except Exception as e: logging.error(f"Error processing file: {e}") raise HTTPException(status_code=500, detail="Error processing the file") # Optional synchronous helper function def classify(text: str): return classify_text(text)