ugolefoo commited on
Commit
b97942e
Β·
verified Β·
1 Parent(s): 972f9e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -2
app.py CHANGED
@@ -54,6 +54,115 @@ def ocr_on_region(image: np.ndarray, box: tuple):
54
  Return the raw OCR text.
55
  """
56
  x, y, w, h = box
57
- cropped = image[y:y+h, x:x+w]
58
  gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
59
- _, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2._
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  Return the raw OCR text.
55
  """
56
  x, y, w, h = box
57
+ cropped = image[y:y + h, x:x + w]
58
  gray_crop = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
59
+ _, thresh_crop = cv2.threshold(gray_crop, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
60
+ custom_config = r'--oem 3 --psm 6'
61
+ text = pytesseract.image_to_string(thresh_crop, config=custom_config)
62
+ return text.strip()
63
+
64
+ # ──────────────────────────────────────────────────────────────
65
+ # 3. Query OpenLibrary API
66
+ # ──────────────────────────────────────────────────────────────
67
+ def query_openlibrary(title_text: str, author_text: str = None):
68
+ """
69
+ Search OpenLibrary by title (and optional author).
70
+ Return a dict with title, author_name, publisher, first_publish_year, or None.
71
+ """
72
+ base_url = "https://openlibrary.org/search.json"
73
+ params = {"title": title_text}
74
+ if author_text:
75
+ params["author"] = author_text
76
+
77
+ try:
78
+ resp = requests.get(base_url, params=params, timeout=5)
79
+ resp.raise_for_status()
80
+ data = resp.json()
81
+ if data.get("docs"):
82
+ doc = data["docs"][0]
83
+ return {
84
+ "title": doc.get("title", ""),
85
+ "author_name": ", ".join(doc.get("author_name", [])),
86
+ "publisher": ", ".join(doc.get("publisher", [])),
87
+ "first_publish_year": doc.get("first_publish_year", "")
88
+ }
89
+ except Exception as e:
90
+ print(f"OpenLibrary query failed: {e}")
91
+
92
+ return None
93
+
94
+ # ──────────────────────────────────────────────────────────────
95
+ # 4. Process one uploaded image
96
+ # ──────────────────────────────────────────────────────────────
97
+ def process_image(image_file):
98
+ """
99
+ Gradio passes a PIL image or numpy array. Convert to OpenCV BGR, detect covers β†’ OCR β†’ OpenLibrary.
100
+ Return a DataFrame and CSV bytes.
101
+ """
102
+ img = np.array(image_file)[:, :, ::-1].copy() # PIL to OpenCV BGR
103
+ boxes = detect_book_regions(img)
104
+ records = []
105
+
106
+ for box in boxes:
107
+ ocr_text = ocr_on_region(img, box)
108
+ lines = [l.strip() for l in ocr_text.splitlines() if l.strip()]
109
+ if not lines:
110
+ continue
111
+
112
+ title_guess = lines[0]
113
+ author_guess = lines[1] if len(lines) > 1 else None
114
+ meta = query_openlibrary(title_guess, author_guess)
115
+
116
+ if meta:
117
+ records.append(meta)
118
+ else:
119
+ records.append({
120
+ "title": title_guess,
121
+ "author_name": author_guess or "",
122
+ "publisher": "",
123
+ "first_publish_year": "",
124
+ })
125
+
126
+ if not records:
127
+ df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
128
+ return df_empty, df_empty.to_csv(index=False).encode()
129
+
130
+ df = pd.DataFrame(records)
131
+ csv_bytes = df.to_csv(index=False).encode()
132
+ return df, csv_bytes
133
+
134
+ # ──────────────────────────────────────────────────────────────
135
+ # 5. Build the Gradio Interface
136
+ # ──────────────────────────────────────────────────────────────
137
+ def build_interface():
138
+ with gr.Blocks(title="Book Cover Scanner") as demo:
139
+ gr.Markdown(
140
+ """
141
+ ## Book Cover Scanner + Metadata Lookup
142
+ 1. Upload a photo containing one or multiple book covers
143
+ 2. The app will detect each cover, run OCR, then query OpenLibrary for metadata
144
+ 3. Results appear in a table below, and you can download a CSV
145
+ """
146
+ )
147
+
148
+ with gr.Row():
149
+ img_in = gr.Image(type="pil", label="Upload Image of Book Covers")
150
+ run_button = gr.Button("Scan & Lookup")
151
+
152
+ output_table = gr.Dataframe(
153
+ headers=["title", "author_name", "publisher", "first_publish_year"],
154
+ label="Detected Books with Metadata"
155
+ )
156
+ download_btn = gr.Download(label="Download CSV")
157
+
158
+ def on_run(image):
159
+ df, csv_bytes = process_image(image)
160
+ return df, csv_bytes
161
+
162
+ run_button.click(fn=on_run, inputs=[img_in], outputs=[output_table, download_btn])
163
+
164
+ return demo
165
+
166
+ if __name__ == "__main__":
167
+ demo_app = build_interface()
168
+ demo_app.launch()