ugolefoo commited on
Commit
d668e84
Β·
verified Β·
1 Parent(s): bbc77e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -82
app.py CHANGED
@@ -8,68 +8,22 @@ import uuid
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
- # 1. Utility: Detect rectangular contours (approximate book covers)
12
  # ──────────────────────────────────────────────────────────────
13
- def detect_book_regions(image: np.ndarray, min_area=10000, eps_coef=0.02):
14
  """
15
- Detect rectangular regions in an image that likely correspond to book covers.
16
- Returns a list of bounding boxes: (x, y, w, h).
17
- """
18
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
20
- edges = cv2.Canny(blurred, 50, 150)
21
-
22
- # Dilate + erode to close gaps
23
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
24
- closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
25
-
26
- contours, _ = cv2.findContours(
27
- closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
28
- )
29
- boxes = []
30
-
31
- for cnt in contours:
32
- area = cv2.contourArea(cnt)
33
- if area < min_area:
34
- continue
35
-
36
- peri = cv2.arcLength(cnt, True)
37
- approx = cv2.approxPolyDP(cnt, eps_coef * peri, True)
38
-
39
- # Keep only quadrilaterals
40
- if len(approx) == 4:
41
- x, y, w, h = cv2.boundingRect(approx)
42
- ar = w / float(h)
43
- # Filter by typical book-cover aspect ratios
44
- if 0.4 < ar < 0.9 or 1.0 < ar < 1.6:
45
- boxes.append((x, y, w, h))
46
-
47
- # Sort left→right, top→bottom
48
- boxes = sorted(boxes, key=lambda b: (b[1], b[0]))
49
- return boxes
50
-
51
- # ──────────────────────────────────────────────────────────────
52
- # 2. OCR on a cropped region
53
- # ──────────────────────────────────────────────────────────────
54
- def ocr_on_region(image: np.ndarray, box: tuple):
55
- """
56
- Crop the image to the given box and run Tesseract OCR.
57
  Return the raw OCR text.
58
  """
59
- x, y, w, h = box
60
- cropped = image[y : y + h, x : x + w]
61
- gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
62
- _, thresh_crop = cv2.threshold(
63
- gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
64
- )
65
- custom_config = r"--oem 3 --psm 6"
66
- text = pytesseract.image_to_string(thresh_crop, config=custom_config)
67
  return text.strip()
68
 
69
  # ──────────────────────────────────────────────────────────────
70
- # 3. Query OpenLibrary API
71
  # ──────────────────────────────────────────────────────────────
72
- def query_openlibrary(title_text: str, author_text: str = None):
73
  """
74
  Search OpenLibrary by title (and optional author).
75
  Return a dict with title, author_name, publisher, first_publish_year, or None.
@@ -97,23 +51,24 @@ def query_openlibrary(title_text: str, author_text: str = None):
97
  return None
98
 
99
  # ──────────────────────────────────────────────────────────────
100
- # 4. Process one uploaded image
101
  # ──────────────────────────────────────────────────────────────
102
  def process_image(image_file):
103
  """
104
- Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, detect covers β†’ OCR β†’ OpenLibrary.
105
- Write CSV to a temp file and return (DataFrame, filepath).
 
106
  """
107
- img = np.array(image_file)[:, :, ::-1].copy() # PIL to OpenCV BGR
108
- boxes = detect_book_regions(img)
109
- records = []
110
 
111
- for box in boxes:
112
- ocr_text = ocr_on_region(img, box)
113
- lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
114
- if not lines:
115
- continue
116
 
 
 
 
117
  title_guess = lines[0]
118
  author_guess = lines[1] if len(lines) > 1 else None
119
  meta = query_openlibrary(title_guess, author_guess)
@@ -121,20 +76,19 @@ def process_image(image_file):
121
  if meta:
122
  records.append(meta)
123
  else:
124
- records.append(
125
- {
126
- "title": title_guess,
127
- "author_name": author_guess or "",
128
- "publisher": "",
129
- "first_publish_year": "",
130
- }
131
- )
132
 
133
  # Build DataFrame (even if empty)
134
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
135
  csv_bytes = df.to_csv(index=False).encode()
136
 
137
- # Write to a unique temporary file
138
  unique_name = f"books_{uuid.uuid4().hex}.csv"
139
  temp_path = os.path.join("/tmp", unique_name)
140
  with open(temp_path, "wb") as f:
@@ -143,26 +97,34 @@ def process_image(image_file):
143
  return df, temp_path
144
 
145
  # ──────────────────────────────────────────────────────────────
146
- # 5. Build the Gradio Interface
147
  # ──────────────────────────────────────────────────────────────
148
  def build_interface():
149
- with gr.Blocks(title="Book Cover Scanner") as demo:
150
  gr.Markdown(
151
  """
152
- ## Book Cover Scanner + Metadata Lookup
153
- 1. Upload a photo containing one or multiple book covers
154
- 2. The app will detect each cover, run OCR, then query OpenLibrary for metadata
155
- 3. Results appear in a table below, and you can download a CSV
 
 
 
 
 
 
 
 
156
  """
157
  )
158
 
159
  with gr.Row():
160
- img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
161
  run_button = gr.Button("Scan & Lookup")
162
 
163
  output_table = gr.Dataframe(
164
  headers=["title", "author_name", "publisher", "first_publish_year"],
165
- label="Detected Books with Metadata",
166
  datatype="pandas",
167
  )
168
  download_file = gr.File(label="Download CSV")
 
8
  import os
9
 
10
  # ──────────────────────────────────────────────────────────────
11
+ # 1. OCR on the full image (always)
12
  # ──────────────────────────────────────────────────────────────
13
+ def ocr_full_image(image: np.ndarray) -> str:
14
  """
15
+ Run Tesseract OCR on the entire image (no thresholding).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  Return the raw OCR text.
17
  """
18
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
+ # Note: we’re NOT thresholding hereβ€”sometimes stylized covers lose detail under THRESH_OTSU.
20
+ text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
 
 
 
 
 
21
  return text.strip()
22
 
23
  # ──────────────────────────────────────────────────────────────
24
+ # 2. Query OpenLibrary API
25
  # ──────────────────────────────────────────────────────────────
26
+ def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
27
  """
28
  Search OpenLibrary by title (and optional author).
29
  Return a dict with title, author_name, publisher, first_publish_year, or None.
 
51
  return None
52
 
53
  # ──────────────────────────────────────────────────────────────
54
+ # 3. Process one uploaded image (single OCR pass)
55
  # ──────────────────────────────────────────────────────────────
56
  def process_image(image_file):
57
  """
58
+ Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
59
+ OCR the entire image, parse first two lines for title/author,
60
+ query OpenLibrary once, and return a DataFrame + CSV file path.
61
  """
62
+ # Convert PIL to OpenCV BGR
63
+ img = np.array(image_file)[:, :, ::-1].copy()
 
64
 
65
+ # 1) Run OCR on full image
66
+ full_text = ocr_full_image(img)
67
+ lines = [line.strip() for line in full_text.splitlines() if line.strip()]
 
 
68
 
69
+ records = []
70
+ if lines:
71
+ # Use first line as title, second (if exists) as author
72
  title_guess = lines[0]
73
  author_guess = lines[1] if len(lines) > 1 else None
74
  meta = query_openlibrary(title_guess, author_guess)
 
76
  if meta:
77
  records.append(meta)
78
  else:
79
+ # No match β†’ still include OCR guesses
80
+ records.append({
81
+ "title": title_guess,
82
+ "author_name": author_guess or "",
83
+ "publisher": "",
84
+ "first_publish_year": "",
85
+ })
 
86
 
87
  # Build DataFrame (even if empty)
88
  df = pd.DataFrame(records, columns=["title", "author_name", "publisher", "first_publish_year"])
89
  csv_bytes = df.to_csv(index=False).encode()
90
 
91
+ # Write CSV to a unique temporary file
92
  unique_name = f"books_{uuid.uuid4().hex}.csv"
93
  temp_path = os.path.join("/tmp", unique_name)
94
  with open(temp_path, "wb") as f:
 
97
  return df, temp_path
98
 
99
  # ──────────────────────────────────────────────────────────────
100
+ # 4. Build the Gradio Interface
101
  # ──────────────────────────────────────────────────────────────
102
  def build_interface():
103
+ with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
104
  gr.Markdown(
105
  """
106
+ ## Book Cover OCR + OpenLibrary Lookup
107
+
108
+ 1. Upload a photo of a single book cover (or any cover‐style image).
109
+ 2. The app will run OCR on the full image, take:
110
+ - the **first line** as a β€œtitle” guess, and
111
+ - the **second line** (if any) as an β€œauthor” guess, then
112
+ - query OpenLibrary once for metadata.
113
+ 3. You’ll see the result in a table and can download a CSV.
114
+
115
+ > **Note:**
116
+ > β€’ Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
117
+ > β€’ If you have multiple covers in one photo, only the first β€œtitle/author” will be used.
118
  """
119
  )
120
 
121
  with gr.Row():
122
+ img_in = gr.Image(type="pil", label="Upload Single Book Cover")
123
  run_button = gr.Button("Scan & Lookup")
124
 
125
  output_table = gr.Dataframe(
126
  headers=["title", "author_name", "publisher", "first_publish_year"],
127
+ label="Detected Book Metadata",
128
  datatype="pandas",
129
  )
130
  download_file = gr.File(label="Download CSV")