ugolefoo commited on
Commit
cece48d
Β·
verified Β·
1 Parent(s): d668e84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -47
app.py CHANGED
@@ -8,22 +8,84 @@ import uuid
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
- # 1. OCR on the full image (always)
12
  # ──────────────────────────────────────────────────────────────
13
- def ocr_full_image(image: np.ndarray) -> str:
14
  """
15
- Run Tesseract OCR on the entire image (no thresholding).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  Return the raw OCR text.
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
- # Note: we’re NOT thresholding hereβ€”sometimes stylized covers lose detail under THRESH_OTSU.
20
- text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
 
 
21
  return text.strip()
22
 
23
  # ──────────────────────────────────────────────────────────────
24
- # 2. Query OpenLibrary API
25
  # ──────────────────────────────────────────────────────────────
26
- def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
27
  """
28
  Search OpenLibrary by title (and optional author).
29
  Return a dict with title, author_name, publisher, first_publish_year, or None.
@@ -51,38 +113,66 @@ def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
51
  return None
52
 
53
  # ──────────────────────────────────────────────────────────────
54
- # 3. Process one uploaded image (single OCR pass)
55
  # ──────────────────────────────────────────────────────────────
56
  def process_image(image_file):
57
  """
58
- Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
59
- OCR the entire image, parse first two lines for title/author,
60
- query OpenLibrary once, and return a DataFrame + CSV file path.
 
61
  """
62
  # Convert PIL to OpenCV BGR
63
  img = np.array(image_file)[:, :, ::-1].copy()
64
 
65
- # 1) Run OCR on full image
66
- full_text = ocr_full_image(img)
67
- lines = [line.strip() for line in full_text.splitlines() if line.strip()]
68
-
69
  records = []
70
- if lines:
71
- # Use first line as title, second (if exists) as author
72
- title_guess = lines[0]
73
- author_guess = lines[1] if len(lines) > 1 else None
74
- meta = query_openlibrary(title_guess, author_guess)
75
-
76
- if meta:
77
- records.append(meta)
78
- else:
79
- # No match β†’ still include OCR guesses
80
- records.append({
81
- "title": title_guess,
82
- "author_name": author_guess or "",
83
- "publisher": "",
84
- "first_publish_year": "",
85
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # Build DataFrame (even if empty)
88
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
@@ -97,34 +187,35 @@ def process_image(image_file):
97
  return df, temp_path
98
 
99
  # ──────────────────────────────────────────────────────────────
100
- # 4. Build the Gradio Interface
101
  # ──────────────────────────────────────────────────────────────
102
  def build_interface():
103
- with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
104
  gr.Markdown(
105
  """
106
- ## Book Cover OCR + OpenLibrary Lookup
107
-
108
- 1. Upload a photo of a single book cover (or any cover‐style image).
109
- 2. The app will run OCR on the full image, take:
110
- - the **first line** as a β€œtitle” guess, and
111
- - the **second line** (if any) as an β€œauthor” guess, then
112
- - query OpenLibrary once for metadata.
113
- 3. You’ll see the result in a table and can download a CSV.
114
-
115
- > **Note:**
116
- > β€’ Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
117
- > β€’ If you have multiple covers in one photo, only the first β€œtitle/author” will be used.
 
118
  """
119
  )
120
 
121
  with gr.Row():
122
- img_in = gr.Image(type="pil", label="Upload Single Book Cover")
123
  run_button = gr.Button("Scan & Lookup")
124
 
125
  output_table = gr.Dataframe(
126
  headers=["title", "author_name", "publisher", "first_publish_year"],
127
- label="Detected Book Metadata",
128
  datatype="pandas",
129
  )
130
  download_file = gr.File(label="Download CSV")
 
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
+ # 1. Utility: Detect rectangular contours (approximate book covers)
12
  # ──────────────────────────────────────────────────────────────
13
+ def detect_book_regions(image: np.ndarray, min_area=5000, eps_coef=0.02):
14
  """
15
+ Detect rectangular regions in an image that likely correspond to book covers.
16
+ Returns a list of bounding boxes: (x, y, w, h).
17
+ """
18
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
20
+ edges = cv2.Canny(blurred, 50, 150)
21
+
22
+ # Dilate + erode to close gaps
23
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
24
+ closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
25
+
26
+ contours, _ = cv2.findContours(
27
+ closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
28
+ )
29
+ boxes = []
30
+
31
+ for cnt in contours:
32
+ area = cv2.contourArea(cnt)
33
+ if area < min_area:
34
+ continue
35
+
36
+ peri = cv2.arcLength(cnt, True)
37
+ approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)
38
+
39
+ # Keep only quadrilaterals
40
+ if len(approx) == 4:
41
+ x, y, w, h = cv2.boundingRect(approx)
42
+ ar = w / float(h)
43
+ # Filter by typical book-cover aspect ratios
44
+ # (you can loosen/tighten these ranges if needed)
45
+ if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
46
+ boxes.append((x, y, w, h))
47
+
48
+ # Sort left→right, then top→bottom
49
+ boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
50
+ return boxes
51
+
52
+ # ──────────────────────────────────────────────────────────────
53
+ # 2. OCR on a cropped region
54
+ # ──────────────────────────────────────────────────────────────
55
+ def ocr_on_region(image: np.ndarray, box: tuple):
56
+ """
57
+ Crop the image to the given box and run Tesseract OCR.
58
  Return the raw OCR text.
59
  """
60
+ x, y, w, h = box
61
+ cropped = image[y : y + h, x : x + w]
62
+ gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
63
+ _, thresh_crop = cv2.threshold(
64
+ gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
65
+ )
66
+ custom_config = r"--oem 3 --psm 6"
67
+ text = pytesseract.image_to_string(thresh_crop, config=custom_config)
68
+ return text.strip()
69
+
70
+ # ──────────────────────────────────────────────────────────────
71
+ # 3. OCR on the full image (fallback)
72
+ # ──────────────────────────────────────────────────────────────
73
+ def ocr_full_image(image: np.ndarray):
74
+ """
75
+ Run OCR on the entire image if no covers were detected.
76
+ Return the full OCR text (string).
77
+ """
78
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
79
+ # Optionally threshold entire image as well
80
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
81
+ custom_config = r"--oem 3 --psm 6"
82
+ text = pytesseract.image_to_string(thresh, config=custom_config)
83
  return text.strip()
84
 
85
  # ──────────────────────────────────────────────────────────────
86
+ # 4. Query OpenLibrary API
87
  # ──────────────────────────────────────────────────────────────
88
+ def query_openlibrary(title_text: str, author_text: str = None):
89
  """
90
  Search OpenLibrary by title (and optional author).
91
  Return a dict with title, author_name, publisher, first_publish_year, or None.
 
113
  return None
114
 
115
  # ──────────────────────────────────────────────────────────────
116
+ # 5. Process one uploaded image
117
  # ──────────────────────────────────────────────────────────────
118
  def process_image(image_file):
119
  """
120
+ Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
121
+ detect covers β†’ OCR β†’ OpenLibrary.
122
+ If no covers are found, fall back to OCR on the full image once.
123
+ Write CSV to a temp file and return (DataFrame, filepath).
124
  """
125
  # Convert PIL to OpenCV BGR
126
  img = np.array(image_file)[:, :, ::-1].copy()
127
 
128
+ # 1) Try to detect individual covers
129
+ boxes = detect_book_regions(img)
 
 
130
  records = []
131
+
132
+ if boxes:
133
+ # If we found boxes, run OCR + lookup for each
134
+ for box in boxes:
135
+ ocr_text = ocr_on_region(img, box)
136
+ lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
137
+ if not lines:
138
+ continue
139
+
140
+ title_guess = lines[0]
141
+ author_guess = lines[1] if len(lines) > 1 else None
142
+ meta = query_openlibrary(title_guess, author_guess)
143
+ if meta:
144
+ records.append(meta)
145
+ else:
146
+ # No OpenLibrary match β†’ still include OCR result
147
+ records.append(
148
+ {
149
+ "title": title_guess,
150
+ "author_name": author_guess or "",
151
+ "publisher": "",
152
+ "first_publish_year": "",
153
+ }
154
+ )
155
+ else:
156
+ # 2) FALLBACK: no boxes detected β†’ OCR on full image once
157
+ full_text = ocr_full_image(img)
158
+ lines = [l.strip() for l in full_text.splitlines() if l.strip()]
159
+ if lines:
160
+ # Use first line as title guess, second (if any) as author guess
161
+ title_guess = lines[0]
162
+ author_guess = lines[1] if len(lines) > 1 else None
163
+ meta = query_openlibrary(title_guess, author_guess)
164
+ if meta:
165
+ records.append(meta)
166
+ else:
167
+ records.append(
168
+ {
169
+ "title": title_guess,
170
+ "author_name": author_guess or "",
171
+ "publisher": "",
172
+ "first_publish_year": "",
173
+ }
174
+ )
175
+ # If lines is empty, records remains empty
176
 
177
  # Build DataFrame (even if empty)
178
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
 
187
  return df, temp_path
188
 
189
  # ──────────────────────────────────────────────────────────────
190
+ # 6. Build the Gradio Interface
191
  # ──────────────────────────────────────────────────────────────
192
  def build_interface():
193
+ with gr.Blocks(title="Book Cover Scanner") as demo:
194
  gr.Markdown(
195
  """
196
+ ## Book Cover Scanner + Metadata Lookup
197
+
198
+ 1. Upload a photo containing one or multiple book covers
199
+ 2. The app will:
200
+ - Detect individual covers (rectangles).
201
+ - If any are found, OCR each one and query OpenLibrary for metadata.
202
+ - If **no** rectangles are detected, OCR the **entire image** once.
203
+ 3. Display all detected/guessed books in a table.
204
+ 4. Download a CSV of the results.
205
+
206
+ **Tips:**
207
+ - For best cover detection: use a flat, well-lit photo with minimal glare/obstructions.
208
+ - You can also place each cover on a plain background (e.g., a white tabletop).
209
  """
210
  )
211
 
212
  with gr.Row():
213
+ img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
214
  run_button = gr.Button("Scan & Lookup")
215
 
216
  output_table = gr.Dataframe(
217
  headers=["title", "author_name", "publisher", "first_publish_year"],
218
+ label="Detected Books + Metadata",
219
  datatype="pandas",
220
  )
221
  download_file = gr.File(label="Download CSV")