ugolefoo commited on
Commit
c82a662
Β·
verified Β·
1 Parent(s): cece48d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -138
app.py CHANGED
@@ -8,84 +8,22 @@ import uuid
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
- # 1. Utility: Detect rectangular contours (approximate book covers)
12
  # ──────────────────────────────────────────────────────────────
13
- def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02):
14
  """
15
- Detect rectangular regions in an image that likely correspond to book covers.
16
- Returns a list of bounding boxes: (x, y, w, h).
17
- """
18
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
20
- edges = cv2.Canny(blurred, 50, 150)
21
-
22
- # Dilate + erode to close gaps
23
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
24
- closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
25
-
26
- contours, _ = cv2.findContours(
27
- closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
28
- )
29
- boxes = []
30
-
31
- for cnt in contours:
32
- area = cv2.contourArea(cnt)
33
- if area < min_area:
34
- continue
35
-
36
- peri = cv2.arcLength(cnt, True)
37
- approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)
38
-
39
- # Keep only quadrilaterals
40
- if len(approx) == 4:
41
- x, y, w, h = cv2.boundingRect(approx)
42
- ar = w / float(h)
43
- # Filter by typical book-cover aspect ratios
44
- # (you can loosen/tighten these ranges if needed)
45
- if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
46
- boxes.append((x, y, w, h))
47
-
48
- # Sort left→right, then top→bottom
49
- boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
50
- return boxes
51
-
52
- # ──────────────────────────────────────────────────────────────
53
- # 2. OCR on a cropped region
54
- # ──────────────────────────────────────────────────────────────
55
- def ocr_on_region(image: np.ndarray, box: tuple):
56
- """
57
- Crop the image to the given box and run Tesseract OCR.
58
  Return the raw OCR text.
59
  """
60
- x, y, w, h = box
61
- cropped = image[y : y + h, x : x + w]
62
- gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
63
- _, thresh_crop = cv2.threshold(
64
- gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
65
- )
66
- custom_config = r"--oem 3 --psm 6"
67
- text = pytesseract.image_to_string(thresh_crop, config=custom_config)
68
- return text.strip()
69
-
70
- # ──────────────────────────────────────────────────────────────
71
- # 3. OCR on the full image (fallback)
72
- # ──────────────────────────────────────────────────────────────
73
- def ocr_full_image(image: np.ndarray):
74
- """
75
- Run OCR on the entire image if no covers were detected.
76
- Return the full OCR text (string).
77
- """
78
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
79
- # Optionally threshold entire image as well
80
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
81
- custom_config = r"--oem 3 --psm 6"
82
- text = pytesseract.image_to_string(thresh, config=custom_config)
83
  return text.strip()
84
 
85
  # ──────────────────────────────────────────────────────────────
86
- # 4. Query OpenLibrary API
87
  # ──────────────────────────────────────────────────────────────
88
- def query_openlibrary(title_text: str, author_text: str = None):
89
  """
90
  Search OpenLibrary by title (and optional author).
91
  Return a dict with title, author_name, publisher, first_publish_year, or None.
@@ -113,66 +51,38 @@ def query_openlibrary(title_text: str, author_text: str = None):
113
  return None
114
 
115
  # ──────────────────────────────────────────────────────────────
116
- # 5. Process one uploaded image
117
  # ──────────────────────────────────────────────────────────────
118
  def process_image(image_file):
119
  """
120
- Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
121
- detect covers β†’ OCR β†’ OpenLibrary.
122
- If no covers are found, fall back to OCR on the full image once.
123
- Write CSV to a temp file and return (DataFrame, filepath).
124
  """
125
  # Convert PIL to OpenCV BGR
126
  img = np.array(image_file)[:, :, ::-1].copy()
127
 
128
- # 1) Try to detect individual covers
129
- boxes = detect_book_regions(img)
130
- records = []
131
 
132
- if boxes:
133
- # If we found boxes, run OCR + lookup for each
134
- for box in boxes:
135
- ocr_text = ocr_on_region(img, box)
136
- lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
137
- if not lines:
138
- continue
139
-
140
- title_guess = lines[0]
141
- author_guess = lines[1] if len(lines) > 1 else None
142
- meta = query_openlibrary(title_guess, author_guess)
143
- if meta:
144
- records.append(meta)
145
- else:
146
- # No OpenLibrary match β†’ still include OCR result
147
- records.append(
148
- {
149
- "title": title_guess,
150
- "author_name": author_guess or "",
151
- "publisher": "",
152
- "first_publish_year": "",
153
- }
154
- )
155
- else:
156
- # 2) FALLBACK: no boxes detected β†’ OCR on full image once
157
- full_text = ocr_full_image(img)
158
- lines = [l.strip() for l in full_text.splitlines() if l.strip()]
159
- if lines:
160
- # Use first line as title guess, second (if any) as author guess
161
- title_guess = lines[0]
162
- author_guess = lines[1] if len(lines) > 1 else None
163
- meta = query_openlibrary(title_guess, author_guess)
164
- if meta:
165
- records.append(meta)
166
- else:
167
- records.append(
168
- {
169
- "title": title_guess,
170
- "author_name": author_guess or "",
171
- "publisher": "",
172
- "first_publish_year": "",
173
- }
174
- )
175
- # If lines is empty, records remains empty
176
 
177
  # Build DataFrame (even if empty)
178
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
@@ -187,35 +97,34 @@ def process_image(image_file):
187
  return df, temp_path
188
 
189
  # ──────────────────────────────────────────────────────────────
190
- # 6. Build the Gradio Interface
191
  # ──────────────────────────────────────────────────────────────
192
  def build_interface():
193
- with gr.Blocks(title="Book Cover Scanner") as demo:
194
  gr.Markdown(
195
  """
196
- ## Book Cover Scanner + Metadata Lookup
197
-
198
- 1. Upload a photo containing one or multiple book covers
199
- 2. The app will:
200
- - Detect individual covers (rectangles).
201
- - If any are found, OCR each one and query OpenLibrary for metadata.
202
- - If **no** rectangles are detected, OCR the **entire image** once.
203
- 3. Display all detected/guessed books in a table.
204
- 4. Download a CSV of the results.
205
-
206
- **Tips:**
207
- - For best cover detection: use a flat, well-lit photo with minimal glare/obstructions.
208
- - You can also place each cover on a plain background (e.g., a white tabletop).
209
  """
210
  )
211
 
212
  with gr.Row():
213
- img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
214
  run_button = gr.Button("Scan & Lookup")
215
 
216
  output_table = gr.Dataframe(
217
  headers=["title", "author_name", "publisher", "first_publish_year"],
218
- label="Detected Books + Metadata",
219
  datatype="pandas",
220
  )
221
  download_file = gr.File(label="Download CSV")
 
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
+ # 1. OCR on the full image (always)
12
  # ──────────────────────────────────────────────────────────────
13
+ def ocr_full_image(image: np.ndarray) -> str:
14
  """
15
+ Run Tesseract OCR on the entire image (no thresholding).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  Return the raw OCR text.
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
+ # Note: we’re NOT thresholding hereβ€”sometimes stylized covers lose detail under THRESH_OTSU.
20
+ text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
 
 
21
  return text.strip()
22
 
23
  # ──────────────────────────────────────────────────────────────
24
+ # 2. Query OpenLibrary API
25
  # ──────────────────────────────────────────────────────────────
26
+ def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
27
  """
28
  Search OpenLibrary by title (and optional author).
29
  Return a dict with title, author_name, publisher, first_publish_year, or None.
 
51
  return None
52
 
53
  # ──────────────────────────────────────────────────────────────
54
+ # 3. Process one uploaded image (single OCR pass)
55
  # ──────────────────────────────────────────────────────────────
56
  def process_image(image_file):
57
  """
58
+ Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
59
+ OCR the entire image, parse first two lines for title/author,
60
+ query OpenLibrary once, and return a DataFrame + CSV file path.
 
61
  """
62
  # Convert PIL to OpenCV BGR
63
  img = np.array(image_file)[:, :, ::-1].copy()
64
 
65
+ # 1) Run OCR on full image
66
+ full_text = ocr_full_image(img)
67
+ lines = [line.strip() for line in full_text.splitlines() if line.strip()]
68
 
69
+ records = []
70
+ if lines:
71
+ # Use first line as title, second (if exists) as author
72
+ title_guess = lines[0]
73
+ author_guess = lines[1] if len(lines) > 1 else None
74
+ meta = query_openlibrary(title_guess, author_guess)
75
+
76
+ if meta:
77
+ records.append(meta)
78
+ else:
79
+ # No match β†’ still include OCR guesses
80
+ records.append({
81
+ "title": title_guess,
82
+ "author_name": author_guess or "",
83
+ "publisher": "",
84
+ "first_publish_year": "",
85
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # Build DataFrame (even if empty)
88
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
 
97
  return df, temp_path
98
 
99
  # ──────────────────────────────────────────────────────────────
100
+ # 4. Build the Gradio Interface
101
  # ──────────────────────────────────────────────────────────────
102
  def build_interface():
103
+ with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
104
  gr.Markdown(
105
  """
106
+ ## Book Cover OCR + OpenLibrary Lookup
107
+
108
+ 1. Upload a photo of a single book cover (or any cover‐style image).
109
+ 2. The app will run OCR on the full image, take:
110
+ - the **first line** as a β€œtitle” guess, and
111
+ - the **second line** (if any) as an β€œauthor” guess, then
112
+ - query OpenLibrary once for metadata.
113
+ 3. You’ll see the result in a table and can download a CSV.
114
+
115
+ > **Note:**
116
+ > β€’ Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
117
+ > β€’ If you have multiple covers in one photo, only the first β€œtitle/author” will be used.
 
118
  """
119
  )
120
 
121
  with gr.Row():
122
+ img_in = gr.Image(type="pil", label="Upload Single Book Cover")
123
  run_button = gr.Button("Scan & Lookup")
124
 
125
  output_table = gr.Dataframe(
126
  headers=["title", "author_name", "publisher", "first_publish_year"],
127
+ label="Detected Book Metadata",
128
  datatype="pandas",
129
  )
130
  download_file = gr.File(label="Download CSV")