Spaces:

can-org
/

AI-Checker

Running

File size: 4,647 Bytes

import asyncio
from io import BytesIO
from fastapi import HTTPException, UploadFile, status, Depends
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import os
from features.nepali_text_classifier.inferencer import classify_text
from  features.nepali_text_classifier.preprocess import *
import re

security = HTTPBearer()

def contains_english(text: str) -> bool:
    # Remove escape characters
    cleaned = text.replace("\n", "").replace("\t", "")
    return bool(re.search(r'[a-zA-Z]', cleaned))


async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
    token = credentials.credentials
    expected_token = os.getenv("MY_SECRET_TOKEN")
    if token != expected_token:
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Invalid or expired token"
        )
    return token

async def nepali_text_analysis(text: str):
    end_symbol_for_NP_text(text)
    words = text.split()
    if len(words) < 10:
        raise HTTPException(status_code=400, detail="Text must contain at least 10 words")
    if len(text) > 10000:
        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")

    result = await asyncio.to_thread(classify_text, text)

    return result


#Extract text form uploaded files(.docx,.pdf,.txt)
async def extract_file_contents(file:UploadFile)-> str:
    content = await file.read()
    file_stream = BytesIO(content)
    if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        return parse_docx(file_stream)
    elif file.content_type =="application/pdf":
        return parse_pdf(file_stream)
    elif file.content_type =="text/plain":
        return parse_txt(file_stream)
    else:
        raise HTTPException(status_code=415,detail="Invalid file type. Only .docx,.pdf and .txt are allowed")

async def handle_file_upload(file: UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        end_symbol_for_NP_text(file_contents)
        if len(file_contents) > 10000:
            raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")

        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
        if not cleaned_text:
            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
        
        result = await asyncio.to_thread(classify_text, cleaned_text)
        return result
    except Exception as e:
        logging.error(f"Error processing file: {e}")
        raise HTTPException(status_code=500, detail="Error processing the file")



async def handle_sentence_level_analysis(text: str):
    text = text.strip()
    if len(text) > 10000:
        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
    
    end_symbol_for_NP_text(text)

    # Split text into sentences
    sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]

    results = []
    for sentence in sentences:
        end_symbol_for_NP_text(sentence)
        result = await asyncio.to_thread(classify_text, sentence)
        results.append({
            "text": sentence,
            "result": result["label"],
            "likelihood": result["confidence"]
        })

    return {"analysis": results}


async def handle_file_sentence(file:UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        if len(file_contents) > 10000:
            raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")

        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
        if not cleaned_text:
            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
        # Ensure text ends with danda so last sentence is included

        # Split text into sentences
        sentences = [s.strip() + "।" for s in cleaned_text.split("।") if s.strip()]

        results = []
        for sentence in sentences:
            end_symbol_for_NP_text(sentence)

            result = await asyncio.to_thread(classify_text, sentence)
            results.append({
                "text": sentence,
                "result": result["label"],
                "likelihood": result["confidence"]
            })

        return {"analysis": results}

    except Exception as e:
        logging.error(f"Error processing file: {e}")
        raise HTTPException(status_code=500, detail="Error processing the file")


def classify(text: str):
    return classify_text(text)