Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

bookscanner_app / app.py

ugolefoo

Create app.py

430a6c7 verified 2 months ago

raw

history blame

3.58 kB

	import cv2
	import numpy as np
	import pytesseract
	import requests
	import pandas as pd
	import gradio as gr
	from io import BytesIO

	# ──────────────────────────────────────────────────────────────
	# 1. Utility: Detect rectangular contours (approximate book covers)
	# ──────────────────────────────────────────────────────────────
	def detect_book_regions(image: np.ndarray, min_area=10000, eps_coef=0.02):
	"""
	Detect rectangular regions in an image that likely correspond to book covers.
	Returns a list of bounding boxes: (x, y, w, h).
	"""
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	edges = cv2.Canny(blurred, 50, 150)

	# Dilate + erode to close gaps
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
	closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

	contours, _ = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	boxes = []

	for cnt in contours:
	area = cv2.contourArea(cnt)
	if area < min_area:
	continue

	peri = cv2.arcLength(cnt, True)
	approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)

	# Keep only quadrilaterals
	if len(approx) == 4:
	x, y, w, h = cv2.boundingRect(approx)
	ar = w / float(h)
	# Filter by typical book-cover aspect ratios
	if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
	boxes.append((x, y, w, h))

	# Sort left→right, top→bottom
	boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
	return boxes

	# ──────────────────────────────────────────────────────────────
	# 2. OCR on a cropped region
	# ──────────────────────────────────────────────────────────────
	def ocr_on_region(image: np.ndarray, box: tuple):
	"""
	Crop the image to the given box and run Tesseract OCR.
	Return the raw OCR text.
	"""
	x, y, w, h = box
	cropped = image[y:y+h, x:x+w]
	gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
	_, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	custom_config = r'--oem 3 --psm 6'
	text = pytesseract.image_to_string(thresh_crop, config=custom_config)
	return text.strip()

	# ──────────────────────────────────────────────────────────────
	# 3. Query OpenLibrary API
	# ──────────────────────────────────────────────────────────────
	def query_openlibrary(title_text: str, author_text: str = None):
	"""
	Search OpenLibrary by title (and optional author).
	Return a dict with title, author_name, publisher, first_publish_year, or None.
	"""
	base_url = "https://openlibrary.org/search.json"
	params = {"title": title_text}
	if author_text:
	params["author"] = author_text

	try:
	resp = requests.get(base_url, params=params, timeout=5)