Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

ugolefoo commited on Jun 4

Commit

c82a662

verified ·

1 Parent(s): cece48d

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -138

app.py CHANGED Viewed

@@ -8,84 +8,22 @@ import uuid
 import os
 # ──────────────────────────────────────────────────────────────
-# 1. Utility: Detect rectangular contours (approximate book covers)
 # ──────────────────────────────────────────────────────────────
-def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02):
     """
-    Detect rectangular regions in an image that likely correspond to book covers.
-    Returns a list of bounding boxes: (x, y, w, h).
-    """
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-    edges = cv2.Canny(blurred, 50, 150)
-    # Dilate + erode to close gaps
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
-    contours, _ = cv2.findContours(
-        closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
-    )
-    boxes = []
-    for cnt in contours:
-        area = cv2.contourArea(cnt)
-        if area < min_area:
-            continue
-        peri = cv2.arcLength(cnt, True)
-        approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)
-        # Keep only quadrilaterals
-        if len(approx) == 4:
-            x, y, w, h = cv2.boundingRect(approx)
-            ar = w / float(h)
-            # Filter by typical book-cover aspect ratios
-            # (you can loosen/tighten these ranges if needed)
-            if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
-                boxes.append((x, y, w, h))
-    # Sort left→right, then top→bottom
-    boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
-    return boxes
-# ──────────────────────────────────────────────────────────────
-# 2. OCR on a cropped region
-# ──────────────────────────────────────────────────────────────
-def ocr_on_region(image: np.ndarray, box: tuple):
-    """
-    Crop the image to the given box and run Tesseract OCR.
     Return the raw OCR text.
     """
-    x, y, w, h = box
-    cropped = image[y : y + h, x : x + w]
-    gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
-    _, thresh_crop = cv2.threshold(
-        gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
-    )
-    custom_config = r"--oem 3 --psm 6"
-    text = pytesseract.image_to_string(thresh_crop, config=custom_config)
-    return text.strip()
-# ──────────────────────────────────────────────────────────────
-# 3. OCR on the full image (fallback)
-# ──────────────────────────────────────────────────────────────
-def ocr_full_image(image: np.ndarray):
-    """
-    Run OCR on the entire image if no covers were detected.
-    Return the full OCR text (string).
-    """
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # Optionally threshold entire image as well
-    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    custom_config = r"--oem 3 --psm 6"
-    text = pytesseract.image_to_string(thresh, config=custom_config)
     return text.strip()
 # ──────────────────────────────────────────────────────────────
-# 4. Query OpenLibrary API
 # ──────────────────────────────────────────────────────────────
-def query_openlibrary(title_text: str, author_text: str = None):
     """
     Search OpenLibrary by title (and optional author).
     Return a dict with title, author_name, publisher, first_publish_year, or None.
@@ -113,66 +51,38 @@ def query_openlibrary(title_text: str, author_text: str = None):
     return None
 # ──────────────────────────────────────────────────────────────
-# 5. Process one uploaded image
 # ──────────────────────────────────────────────────────────────
 def process_image(image_file):
     """
-    Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
-    detect covers → OCR → OpenLibrary.
-    If no covers are found, fall back to OCR on the full image once.
-    Write CSV to a temp file and return (DataFrame, filepath).
     """
     # Convert PIL to OpenCV BGR
     img = np.array(image_file)[:, :, ::-1].copy()
-    # 1) Try to detect individual covers
-    boxes = detect_book_regions(img)
-    records = []
-    if boxes:
-        # If we found boxes, run OCR + lookup for each
-        for box in boxes:
-            ocr_text = ocr_on_region(img, box)
-            lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
-            if not lines:
-                continue
-            title_guess = lines[0]
-            author_guess = lines[1] if len(lines) > 1 else None
-            meta = query_openlibrary(title_guess, author_guess)
-            if meta:
-                records.append(meta)
-            else:
-                # No OpenLibrary match → still include OCR result
-                records.append(
-                    {
-                        "title": title_guess,
-                        "author_name": author_guess or "",
-                        "publisher": "",
-                        "first_publish_year": "",
-                    }
-                )
-    else:
-        # 2) FALLBACK: no boxes detected → OCR on full image once
-        full_text = ocr_full_image(img)
-        lines = [l.strip() for l in full_text.splitlines() if l.strip()]
-        if lines:
-            # Use first line as title guess, second (if any) as author guess
-            title_guess = lines[0]
-            author_guess = lines[1] if len(lines) > 1 else None
-            meta = query_openlibrary(title_guess, author_guess)
-            if meta:
-                records.append(meta)
-            else:
-                records.append(
-                    {
-                        "title": title_guess,
-                        "author_name": author_guess or "",
-                        "publisher": "",
-                        "first_publish_year": "",
-                    }
-                )
-        # If lines is empty, records remains empty
     # Build DataFrame (even if empty)
     df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
@@ -187,35 +97,34 @@ def process_image(image_file):
     return df, temp_path
 # ──────────────────────────────────────────────────────────────
-# 6. Build the Gradio Interface
 # ──────────────────────────────────────────────────────────────
 def build_interface():
-    with gr.Blocks(title="Book Cover Scanner") as demo:
         gr.Markdown(
             """
-            ## Book Cover Scanner + Metadata Lookup
-            1. Upload a photo containing one or multiple book covers
-            2. The app will:
-               - Detect individual covers (rectangles).
-               - If any are found, OCR each one and query OpenLibrary for metadata.
-               - If **no** rectangles are detected, OCR the **entire image** once.
-            3. Display all detected/guessed books in a table.
-            4. Download a CSV of the results.
-            **Tips:**
-            - For best cover detection: use a flat, well-lit photo with minimal glare/obstructions.
-            - You can also place each cover on a plain background (e.g., a white tabletop).
             """
         )
         with gr.Row():
-            img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
             run_button = gr.Button("Scan & Lookup")
         output_table = gr.Dataframe(
             headers=["title", "author_name", "publisher", "first_publish_year"],
-            label="Detected Books + Metadata",
             datatype="pandas",
         )
         download_file = gr.File(label="Download CSV")

 import os
 # ──────────────────────────────────────────────────────────────
+# 1. OCR on the full image (always)
 # ──────────────────────────────────────────────────────────────
+def ocr_full_image(image: np.ndarray) -> str:
     """
+    Run Tesseract OCR on the entire image (no thresholding).
     Return the raw OCR text.
     """
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # Note: we’re NOT thresholding here—sometimes stylized covers lose detail under THRESH_OTSU.
+    text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
     return text.strip()
 # ──────────────────────────────────────────────────────────────
+# 2. Query OpenLibrary API
 # ──────────────────────────────────────────────────────────────
+def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
     """
     Search OpenLibrary by title (and optional author).
     Return a dict with title, author_name, publisher, first_publish_year, or None.
     return None
 # ──────────────────────────────────────────────────────────────
+# 3. Process one uploaded image (single OCR pass)
 # ──────────────────────────────────────────────────────────────
 def process_image(image_file):
     """
+    Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
+    OCR the entire image, parse first two lines for title/author,
+    query OpenLibrary once, and return a DataFrame + CSV file path.
     """
     # Convert PIL to OpenCV BGR
     img = np.array(image_file)[:, :, ::-1].copy()
+    # 1) Run OCR on full image
+    full_text = ocr_full_image(img)
+    lines = [line.strip() for line in full_text.splitlines() if line.strip()]
+    records = []
+    if lines:
+        # Use first line as title, second (if exists) as author
+        title_guess = lines[0]
+        author_guess = lines[1] if len(lines) > 1 else None
+        meta = query_openlibrary(title_guess, author_guess)
+        if meta:
+            records.append(meta)
+        else:
+            # No match → still include OCR guesses
+            records.append({
+                "title": title_guess,
+                "author_name": author_guess or "",
+                "publisher": "",
+                "first_publish_year": "",
+            })
     # Build DataFrame (even if empty)
     df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
     return df, temp_path
 # ──────────────────────────────────────────────────────────────
+# 4. Build the Gradio Interface
 # ──────────────────────────────────────────────────────────────
 def build_interface():
+    with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
         gr.Markdown(
             """
+            ## Book Cover OCR + OpenLibrary Lookup
+            1. Upload a photo of a single book cover (or any cover‐style image).
+            2. The app will run OCR on the full image, take:
+               - the **first line** as a “title” guess, and
+               - the **second line** (if any) as an “author” guess, then
+               - query OpenLibrary once for metadata.
+            3. You’ll see the result in a table and can download a CSV.
+            > **Note:**
+            > • Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
+            > • If you have multiple covers in one photo, only the first “title/author” will be used.
             """
         )
         with gr.Row():
+            img_in = gr.Image(type="pil", label="Upload Single Book Cover")
             run_button = gr.Button("Scan & Lookup")
         output_table = gr.Dataframe(
             headers=["title", "author_name", "publisher", "first_publish_year"],
+            label="Detected Book Metadata",
             datatype="pandas",
         )
         download_file = gr.File(label="Download CSV")