Spaces:

venkatviswa
/

flowchart-to-text

Sleeping

flowchart-to-text / api_backend.py

Venkat V

fixes to urls and hf spaces description

4e099cb 2 months ago

4.04 kB

	"""
	api_backend.py

	FastAPI backend for flowchart-to-English processing. This API supports receiving
	an image file, running YOLO-based detection to identify boxes and arrows, performing
	OCR, and generating structured JSON + English summary of the flowchart.

	Endpoints:
	- POST /process-image: Accepts image input and returns structured flowchart data.
	"""

	from fastapi import FastAPI, UploadFile, File, Form
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	import uvicorn
	from PIL import Image
	import io
	import json
	import base64
	import os

	# 🔧 Import local processing modules
	from yolo_module import run_yolo
	from ocr_module import extract_text, count_elements, validate_structure
	from graph_module import map_arrows, build_flowchart_json
	from summarizer_module import summarize_flowchart

	# 🔥 Initialize FastAPI app
	app = FastAPI()

	# 🔓 Enable CORS to allow frontend (e.g., Streamlit on localhost) to connect
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # In production, replace with allowed frontend domain
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	@app.post("/process-image")
	async def process_image(
	file: UploadFile = File(...),
	debug: str = Form("false")
	):
	"""
	Receives an uploaded flowchart image, performs object detection and OCR,
	constructs a structured flowchart JSON, and generates a plain-English summary.

	Args:
	file (UploadFile): Flowchart image file (.png, .jpg, .jpeg).
	debug (str): "true" to enable debug mode (includes OCR logs and YOLO preview).

	Returns:
	JSONResponse: Contains flowchart structure, summary, debug output, and optional YOLO overlay.
	"""
	debug_mode = debug.lower() == "true"
	debug_log = []

	if debug_mode:
	debug_log.append("📥 Received file upload")
	print(f"📥 File received: {file.filename}")

	# 🖼️ Convert file bytes to RGB image
	contents = await file.read()
	image = Image.open(io.BytesIO(contents)).convert("RGB")
	if debug_mode:
	debug_log.append("✅ Image converted to RGB")
	print("✅ Image converted to RGB")

	# 📦 YOLO Detection for boxes and arrows
	boxes, arrows, vis_debug = run_yolo(image)
	if debug_mode:
	debug_log.append(f"📦 Detected {len(boxes)} boxes, {len(arrows)} arrows")

	# 🔍 Run OCR on each detected box
	for box in boxes:
	box["text"] = extract_text(image, box["bbox"], debug=debug_mode)
	print(f"🔍 OCR for {box['id']}: {box['text']}")
	if debug_mode:
	debug_log.append(f"🔍 {box['id']}: {box['text']}")


	# 🧠 Build structured JSON from nodes and edges
	flowchart_json = build_flowchart_json(boxes, arrows)
	print("🧠 Flowchart JSON:", json.dumps(flowchart_json, indent=2))

	# ✅ Validate structure
	structure_info = count_elements(boxes, arrows, debug=debug_mode)
	validation = validate_structure(
	flowchart_json,
	expected_boxes=structure_info["box_count"],
	expected_arrows=len(arrows),
	debug=debug_mode
	)
	if debug_mode:
	debug_log.append(f"🧾 Validation: {validation}")

	# ✍️ Generate plain-English summary
	summary = summarize_flowchart(flowchart_json)
	print("📝 Summary:", summary)

	# 🖼️ Encode YOLO debug image (if debug enabled)
	yolo_vis = None
	if debug_mode and vis_debug:
	vis_io = io.BytesIO()
	vis_debug.save(vis_io, format="PNG")
	yolo_vis = base64.b64encode(vis_io.getvalue()).decode("utf-8")

	# 📤 Return full response
	return JSONResponse({
	"flowchart": flowchart_json,
	"summary": summary,
	"yolo_vis": yolo_vis,
	"debug": "\n".join(debug_log) if debug_mode else ""
	})


	if __name__ == "__main__":
	# Run the FastAPI app using Uvicorn
	# Get port from environment variable or use default 7860
	port = int(os.getenv("API_PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port)