Spaces:

ugolefoo
/

bookscanner_app

Runtime error

App Files Files Community

ugolefoo commited on 30 days ago

Commit

fde34e3

verified ·

1 Parent(s): c82a662

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -12

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ def ocr_full_image(image: np.ndarray) -> str:
     Return the raw OCR text.
     """
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    # Note: we’re NOT thresholding here—sometimes stylized covers lose detail under THRESH_OTSU.
     text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
     return text.strip()
@@ -55,15 +55,38 @@ def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
 # ──────────────────────────────────────────────────────────────
 def process_image(image_file):
     """
-    Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
-    OCR the entire image, parse first two lines for title/author,
     query OpenLibrary once, and return a DataFrame + CSV file path.
     """
     # Convert PIL to OpenCV BGR
     img = np.array(image_file)[:, :, ::-1].copy()
     # 1) Run OCR on full image
-    full_text = ocr_full_image(img)
     lines = [line.strip() for line in full_text.splitlines() if line.strip()]
     records = []
@@ -76,7 +99,7 @@ def process_image(image_file):
         if meta:
             records.append(meta)
         else:
-            # No match → still include OCR guesses
             records.append({
                 "title": title_guess,
                 "author_name": author_guess or "",
@@ -100,21 +123,21 @@ def process_image(image_file):
 # 4. Build the Gradio Interface
 # ──────────────────────────────────────────────────────────────
 def build_interface():
-    with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
         gr.Markdown(
             """
             ## Book Cover OCR + OpenLibrary Lookup
-            1. Upload a photo of a single book cover (or any cover‐style image).
             2. The app will run OCR on the full image, take:
                - the **first line** as a “title” guess, and
-               - the **second line** (if any) as an “author” guess, then
-               - query OpenLibrary once for metadata.
-            3. You’ll see the result in a table and can download a CSV.
             > **Note:**
-            > • Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
-            > • If you have multiple covers in one photo, only the first “title/author” will be used.
             """
         )

     Return the raw OCR text.
     """
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    # We skip explicit thresholding—sometimes stylized covers lose detail under THRESH_OTSU.
     text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
     return text.strip()
 # ──────────────────────────────────────────────────────────────
 def process_image(image_file):
     """
+    Gradio passes either a PIL image or None.
+    If image_file is None, return an empty DataFrame and empty CSV.
+    Otherwise, convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author,
     query OpenLibrary once, and return a DataFrame + CSV file path.
     """
+    if image_file is None:
+        # No image provided → return empty table + an empty CSV file
+        df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
+        empty_bytes = df_empty.to_csv(index=False).encode()
+        unique_name = f"books_{uuid.uuid4().hex}.csv"
+        temp_path = os.path.join("/tmp", unique_name)
+        with open(temp_path, "wb") as f:
+            f.write(empty_bytes)
+        return df_empty, temp_path
     # Convert PIL to OpenCV BGR
     img = np.array(image_file)[:, :, ::-1].copy()
     # 1) Run OCR on full image
+    try:
+        full_text = ocr_full_image(img)
+    except pytesseract.pytesseract.TesseractNotFoundError:
+        # If Tesseract isn’t installed, return empty DataFrame and log the issue
+        print("ERROR: Tesseract not found. Did you add apt.txt with 'tesseract-ocr'?")
+        df_error = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
+        error_bytes = df_error.to_csv(index=False).encode()
+        unique_name = f"books_{uuid.uuid4().hex}.csv"
+        temp_path = os.path.join("/tmp", unique_name)
+        with open(temp_path, "wb") as f:
+            f.write(error_bytes)
+        return df_error, temp_path
     lines = [line.strip() for line in full_text.splitlines() if line.strip()]
     records = []
         if meta:
             records.append(meta)
         else:
+            # No OpenLibrary match → still include OCR guesses
             records.append({
                 "title": title_guess,
                 "author_name": author_guess or "",
 # 4. Build the Gradio Interface
 # ──────────────────────────────────────────────────────────────
 def build_interface():
+    with gr.Blocks(title="Single‐Cover OCR + OpenLibrary Lookup") as demo:
         gr.Markdown(
             """
             ## Book Cover OCR + OpenLibrary Lookup
+            1. Upload a photo of a single book cover.
             2. The app will run OCR on the full image, take:
                - the **first line** as a “title” guess, and
+               - the **second line** as an “author” guess (if present), then
+               - query OpenLibrary for metadata.
+            3. Results display in a table and can be downloaded as CSV.
             > **Note:**
+            > • Ensure Tesseract OCR is installed (see `apt.txt`).
+            > • If no image is uploaded, the table and CSV will be empty.
             """
         )