Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

ugolefoo commited on Jun 4

Commit

b97942e

verified ·

1 Parent(s): 972f9e7

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -2

app.py CHANGED Viewed

@@ -54,6 +54,115 @@ def ocr_on_region(image: np.ndarray, box: tuple):
     Return the raw OCR text.
     """
     x, y, w, h = box
-    cropped = image[y:y+h, x:x+w]
     gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
-    _, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2._

     Return the raw OCR text.
     """
     x, y, w, h = box
+    cropped = image[y:y + h, x:x + w]
     gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
+    _, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    custom_config = r'--oem 3 --psm 6'
+    text = pytesseract.image_to_string(thresh_crop, config=custom_config)
+    return text.strip()
+# ──────────────────────────────────────────────────────────────
+# 3. Query OpenLibrary API
+# ──────────────────────────────────────────────────────────────
+def query_openlibrary(title_text: str, author_text: str = None):
+    """
+    Search OpenLibrary by title (and optional author).
+    Return a dict with title, author_name, publisher, first_publish_year, or None.
+    """
+    base_url = "https://openlibrary.org/search.json"
+    params = {"title": title_text}
+    if author_text:
+        params["author"] = author_text
+    try:
+        resp = requests.get(base_url, params=params, timeout=5)
+        resp.raise_for_status()
+        data = resp.json()
+        if data.get("docs"):
+            doc = data["docs"][0]
+            return {
+                "title": doc.get("title", ""),
+                "author_name": ", ".join(doc.get("author_name", [])),
+                "publisher": ", ".join(doc.get("publisher", [])),
+                "first_publish_year": doc.get("first_publish_year", "")
+            }
+    except Exception as e:
+        print(f"OpenLibrary query failed: {e}")
+    return None
+# ──────────────────────────────────────────────────────────────
+# 4. Process one uploaded image
+# ──────────────────────────────────────────────────────────────
+def process_image(image_file):
+    """
+    Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, detect covers → OCR → OpenLibrary.
+    Return a DataFrame and CSV bytes.
+    """
+    img = np.array(image_file)[:, :, ::-1].copy()  # PIL to OpenCV BGR
+    boxes = detect_book_regions(img)
+    records = []
+    for box in boxes:
+        ocr_text = ocr_on_region(img, box)
+        lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
+        if not lines:
+            continue
+        title_guess = lines[0]
+        author_guess = lines[1] if len(lines) > 1 else None
+        meta = query_openlibrary(title_guess, author_guess)
+        if meta:
+            records.append(meta)
+        else:
+            records.append({
+                "title": title_guess,
+                "author_name": author_guess or "",
+                "publisher": "",
+                "first_publish_year": "",
+            })
+    if not records:
+        df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
+        return df_empty, df_empty.to_csv(index=False).encode()
+    df = pd.DataFrame(records)
+    csv_bytes = df.to_csv(index=False).encode()
+    return df, csv_bytes
+# ──────────────────────────────────────────────────────────────
+# 5. Build the Gradio Interface
+# ──────────────────────────────────────────────────────────────
+def build_interface():
+    with gr.Blocks(title="Book Cover Scanner") as demo:
+        gr.Markdown(
+            """
+            ## Book Cover Scanner + Metadata Lookup
+            1. Upload a photo containing one or multiple book covers
+            2. The app will detect each cover, run OCR, then query OpenLibrary for metadata
+            3. Results appear in a table below, and you can download a CSV
+            """
+        )
+        with gr.Row():
+            img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
+            run_button = gr.Button("Scan & Lookup")
+        output_table = gr.Dataframe(
+            headers=["title", "author_name", "publisher", "first_publish_year"],
+            label="Detected Books with Metadata"
+        )
+        download_btn = gr.Download(label="Download CSV")
+        def on_run(image):
+            df, csv_bytes = process_image(image)
+            return df, csv_bytes
+        run_button.click(fn=on_run, inputs=[img_in], outputs=[output_table, download_btn])
+    return demo
+if __name__ == "__main__":
+    demo_app = build_interface()
+    demo_app.launch()