""" api_backend.py FastAPI backend for flowchart-to-English processing. This API supports receiving an image file, running YOLO-based detection to identify boxes and arrows, performing OCR, and generating structured JSON + English summary of the flowchart. Endpoints: - POST /process-image: Accepts image input and returns structured flowchart data. """ from fastapi import FastAPI, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse import uvicorn from PIL import Image import io import json import base64 import os # ๐Ÿ”ฅ Initialize FastAPI app app = FastAPI() # ๐Ÿ”“ Enable CORS with more specific configuration for Hugging Face Spaces app.add_middleware( CORSMiddleware, allow_origins=["*", "https://venkatviswa-flowchart-to-text.hf.space"], # Include your specific domain allow_credentials=True, allow_methods=["GET", "POST", "OPTIONS"], # Explicitly allow methods allow_headers=["*"], expose_headers=["*"], ) # Add a health check endpoint @app.get("/") async def health_check(): """Health check endpoint to verify API is running.""" return {"status": "ok", "message": "API is running"} @app.options("/process-image") async def options_process_image(): """Handle OPTIONS requests for the process-image endpoint.""" return {} # Lazy-loading for ML modules to avoid startup issues SKIP_MODEL_LOADING = os.getenv("SKIP_MODEL_LOADING", "0") == "1" yolo_module = None ocr_module = None graph_module = None summarizer_module = None def load_modules(): global yolo_module, ocr_module, graph_module, summarizer_module if yolo_module is None: # Only import these when needed, not during startup from yolo_module import run_yolo as yolo_run from ocr_module import extract_text as ocr_extract, count_elements, validate_structure from graph_module import map_arrows, build_flowchart_json from summarizer_module import summarize_flowchart yolo_module = {"run_yolo": yolo_run} ocr_module = { "extract_text": ocr_extract, "count_elements": count_elements, "validate_structure": validate_structure } graph_module = { "map_arrows": map_arrows, "build_flowchart_json": build_flowchart_json } summarizer_module = {"summarize_flowchart": summarize_flowchart} @app.post("/process-image") async def process_image( file: UploadFile = File(...), debug: str = Form("false") ): """ Receives an uploaded flowchart image, performs object detection and OCR, constructs a structured flowchart JSON, and generates a plain-English summary. Args: file (UploadFile): Flowchart image file (.png, .jpg, .jpeg). debug (str): "true" to enable debug mode (includes OCR logs and YOLO preview). Returns: JSONResponse: Contains flowchart structure, summary, debug output, and optional YOLO overlay. """ # Lazy load modules when first request comes in load_modules() debug_mode = debug.lower() == "true" debug_log = [] if debug_mode: debug_log.append("๐Ÿ“ฅ Received file upload") print(f"๐Ÿ“ฅ File received: {file.filename}") # ๐Ÿ–ผ๏ธ Convert file bytes to RGB image contents = await file.read() image = Image.open(io.BytesIO(contents)).convert("RGB") if debug_mode: debug_log.append("โœ… Image converted to RGB") print("โœ… Image converted to RGB") # ๐Ÿ“ฆ YOLO Detection for boxes and arrows boxes, arrows, vis_debug = yolo_module["run_yolo"](image) if debug_mode: debug_log.append(f"๐Ÿ“ฆ Detected {len(boxes)} boxes, {len(arrows)} arrows") # ๐Ÿ” Run OCR on each detected box for box in boxes: box["text"] = ocr_module["extract_text"](image, box["bbox"], debug=debug_mode) print(f"๐Ÿ” OCR for {box['id']}: {box['text']}") if debug_mode: debug_log.append(f"๐Ÿ” {box['id']}: {box['text']}") # ๐Ÿง  Build structured JSON from nodes and edges flowchart_json = graph_module["build_flowchart_json"](boxes, arrows) print("๐Ÿง  Flowchart JSON:", json.dumps(flowchart_json, indent=2)) # โœ… Validate structure structure_info = ocr_module["count_elements"](boxes, arrows, debug=debug_mode) validation = ocr_module["validate_structure"]( flowchart_json, expected_boxes=structure_info["box_count"], expected_arrows=len(arrows), debug=debug_mode ) if debug_mode: debug_log.append(f"๐Ÿงพ Validation: {validation}") # โœ๏ธ Generate plain-English summary summary = summarizer_module["summarize_flowchart"](flowchart_json) print("๐Ÿ“ Summary:", summary) # ๐Ÿ–ผ๏ธ Encode YOLO debug image (if debug enabled) yolo_vis = None if debug_mode and vis_debug: vis_io = io.BytesIO() vis_debug.save(vis_io, format="PNG") yolo_vis = base64.b64encode(vis_io.getvalue()).decode("utf-8") # ๐Ÿ“ค Return full response return JSONResponse({ "flowchart": flowchart_json, "summary": summary, "yolo_vis": yolo_vis, "debug": "\n".join(debug_log) if debug_mode else "" }) if __name__ == "__main__": # Run the FastAPI app using Uvicorn # Get port from environment variable or use default 7860 port = int(os.getenv("API_PORT", 7860)) uvicorn.run(app, host="0.0.0.0", port=port)