Spaces:
Sleeping
Sleeping
File size: 7,764 Bytes
d6bab4e 1b5f903 d6bab4e 1b5f903 d6bab4e a1c0d1f 1b5f903 d6bab4e 1b5f903 d6bab4e 1b5f903 d6bab4e de7714f d6bab4e 1b5f903 d6bab4e 1b5f903 d6bab4e 1b5f903 d6bab4e 1b5f903 d6bab4e de7714f 76e8a07 a1c0d1f 76e8a07 a1c0d1f 76e8a07 d6bab4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# # ! pip uninstall -y tensorflow
# # ! pip install "python-doctr[torch,viz]"
# from fastapi import FastAPI, UploadFile, File
# from fastapi.responses import JSONResponse
# from utils import dev_number, roman_number, dev_letter, roman_letter
# import tempfile
# app = FastAPI()
# @app.post("/ocr_dev_number/")
# async def extract_dev_number(image: UploadFile = File(...)):
# # Save uploaded image temporarily
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
# content = await image.read()
# tmp.write(content)
# tmp_path = tmp.name
# # predict the image
# predicted_str = dev_number(tmp_path)
# # Return result as JSON
# return JSONResponse(content={"predicted_str": predicted_str})
# @app.post("/ocr_roman_number/")
# async def extract_roman_number(image: UploadFile = File(...)):
# # Save uploaded image temporarily
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
# content = await image.read()
# tmp.write(content)
# tmp_path = tmp.name
# # predict the image
# predicted_str = roman_number(tmp_path)
# # Return result as JSON
# return JSONResponse(content={"predicted_str": predicted_str})
# @app.post("/ocr_dev_letter/")
# async def extract_dev_letter(image: UploadFile = File(...)):
# # Save uploaded image temporarily
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
# content = await image.read()
# tmp.write(content)
# tmp_path = tmp.name
# # predict the image
# predicted_str = dev_letter(tmp_path)
# # Return result as JSON
# return JSONResponse(content={"predicted_str": predicted_str})
# @app.post("/ocr_roman_letter/")
# async def extract_roman_letter(image: UploadFile = File(...)):
# # Save uploaded image temporarily
# with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
# content = await image.read()
# tmp.write(content)
# tmp_path = tmp.name
# # predict the image
# predicted_str = roman_letter(tmp_path)
# # Return result as JSON
# return JSONResponse(content={"predicted_str": predicted_str})
import os
import tempfile
from typing import Literal
from fastapi import FastAPI, UploadFile, File, HTTPException, Depends
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import shutil
# Import from optimized utils
from utils import dev_number, roman_number, dev_letter, roman_letter, predict_ne, perform_citizenship_ocr
app = FastAPI(
title="OCR API",
description="API for optical character recognition of Roman and Devanagari text",
version="1.0.0"
)
class OCRResponse(BaseModel):
"""Response model for OCR endpoints"""
predicted_str: str
confidence: float = None # Optional confidence field
# Helper function to handle file uploads consistently
async def save_upload_file_tmp(upload_file: UploadFile) -> str:
"""Save an upload file to a temporary file and return the path"""
try:
# Create a temporary file with the appropriate suffix
suffix = os.path.splitext(upload_file.filename)[1] if upload_file.filename else ".png"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
# Get the file content
content = await upload_file.read()
# Write content to temporary file
tmp.write(content)
tmp_path = tmp.name
return tmp_path
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
# Generic OCR function that can be reused across endpoints
async def process_ocr_request(
image: UploadFile = File(...),
ocr_function=None
):
"""Process an OCR request using the specified OCR function"""
if not ocr_function:
raise HTTPException(status_code=500, detail="OCR function not specified")
try:
# Save uploaded image temporarily
tmp_path = await save_upload_file_tmp(image)
# Process the image with the specified OCR function
result = ocr_function(tmp_path)
# Clean up the temporary file
os.unlink(tmp_path)
# Handle different types of results (string vs doctr output)
if isinstance(result, str):
return JSONResponse(content={"predicted_str": result})
else:
# For doctr results, extract the text (adapt as needed based on doctr output format)
# This assumes roman_letter function returns a structure with extractable text
extracted_text = " ".join([block.value for page in result.pages for block in page.blocks])
return JSONResponse(content={"predicted_str": extracted_text})
except Exception as e:
# Ensure we clean up even if there's an error
if 'tmp_path' in locals() and os.path.exists(tmp_path):
os.unlink(tmp_path)
raise HTTPException(status_code=500, detail=f"OCR processing error: {str(e)}")
# Endpoints with minimal duplication
@app.post("/ocr/", summary="Generic OCR endpoint")
async def extract_text(
image: UploadFile = File(...),
model_type: Literal["dev_number", "roman_number", "dev_letter", "roman_letter"] = "roman_letter"
):
"""
Generic OCR endpoint that can handle any supported recognition type.
- **image**: Image file to process
- **model_type**: Type of OCR to perform
"""
ocr_functions = {
"dev_number": dev_number,
"roman_number": roman_number,
"dev_letter": dev_letter,
"roman_letter": roman_letter,
}
if model_type not in ocr_functions:
raise HTTPException(status_code=400, detail=f"Invalid model type: {model_type}")
return await process_ocr_request(image, ocr_functions[model_type])
# For backward compatibility, keep the original endpoints
@app.post("/ocr_dev_number/")
async def extract_dev_number(image: UploadFile = File(...)):
"""Extract Devanagari numbers from an image"""
return await process_ocr_request(image, dev_number)
@app.post("/ocr_roman_number/")
async def extract_roman_number(image: UploadFile = File(...)):
"""Extract Roman numbers from an image"""
return await process_ocr_request(image, roman_number)
@app.post("/ocr_dev_letter/")
async def extract_dev_letter(image: UploadFile = File(...)):
"""Extract Devanagari letters from an image"""
return await process_ocr_request(image, dev_letter)
@app.post("/ocr_roman_letter/")
async def extract_roman_letter(image: UploadFile = File(...)):
"""Extract Roman letters from an image"""
return await process_ocr_request(image, roman_letter)
@app.post("/predict_ne")
async def classify_ne(image: UploadFile = File(...)):
"""Predict Named Entities from an image"""
# Placeholder for Named Entity Recognition logic
image_path = await save_upload_file_tmp(image)
prediction = predict_ne(
image_path=image_path,
# model="models/nepali_english_classifier.pth", # Update with actual model path
device="cpu" # or "cpu"
)
# Implement the logic as per your requirements
return JSONResponse(content={"predicted": prediction})
@app.post("/ocr_citizenship/")
async def ocr_citizenship(image: UploadFile = File(...)):
"""OCR the provided Nepali Citizenship card"""
image_path = await save_upload_file_tmp(image)
prediction = perform_citizenship_ocr(
image_path=image_path,
)
return JSONResponse(content=prediction)
# Health check endpoint
@app.get("/health")
async def health_check():
"""Health check endpoint to verify the API is running"""
return {"status": "healthy"} |