ugolefoo commited on
Commit
fde34e3
Β·
verified Β·
1 Parent(s): c82a662

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -12
app.py CHANGED
@@ -16,7 +16,7 @@ def ocr_full_image(image: np.ndarray) -> str:
16
  Return the raw OCR text.
17
  """
18
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
- # Note: we’re NOT thresholding hereβ€”sometimes stylized covers lose detail under THRESH_OTSU.
20
  text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
21
  return text.strip()
22
 
@@ -55,15 +55,38 @@ def query_openlibrary(title_text: str, author_text: str = None) -> dict | None:
55
  # ──────────────────────────────────────────────────────────────
56
  def process_image(image_file):
57
  """
58
- Gradio passes a PIL image or numpy array. Convert to OpenCV BGR,
59
- OCR the entire image, parse first two lines for title/author,
 
60
  query OpenLibrary once, and return a DataFrame + CSV file path.
61
  """
 
 
 
 
 
 
 
 
 
 
62
  # Convert PIL to OpenCV BGR
63
  img = np.array(image_file)[:, :, ::-1].copy()
64
 
65
  # 1) Run OCR on full image
66
- full_text = ocr_full_image(img)
 
 
 
 
 
 
 
 
 
 
 
 
67
  lines = [line.strip() for line in full_text.splitlines() if line.strip()]
68
 
69
  records = []
@@ -76,7 +99,7 @@ def process_image(image_file):
76
  if meta:
77
  records.append(meta)
78
  else:
79
- # No match β†’ still include OCR guesses
80
  records.append({
81
  "title": title_guess,
82
  "author_name": author_guess or "",
@@ -100,21 +123,21 @@ def process_image(image_file):
100
  # 4. Build the Gradio Interface
101
  # ──────────────────────────────────────────────────────────────
102
  def build_interface():
103
- with gr.Blocks(title="Book Cover OCR + Lookup (Single‐Cover Mode)") as demo:
104
  gr.Markdown(
105
  """
106
  ## Book Cover OCR + OpenLibrary Lookup
107
 
108
- 1. Upload a photo of a single book cover (or any cover‐style image).
109
  2. The app will run OCR on the full image, take:
110
  - the **first line** as a β€œtitle” guess, and
111
- - the **second line** (if any) as an β€œauthor” guess, then
112
- - query OpenLibrary once for metadata.
113
- 3. You’ll see the result in a table and can download a CSV.
114
 
115
  > **Note:**
116
- > β€’ Because we skip rectangle detection, any visible text on your cover (large, legible fonts) should be picked up.
117
- > β€’ If you have multiple covers in one photo, only the first β€œtitle/author” will be used.
118
  """
119
  )
120
 
 
16
  Return the raw OCR text.
17
  """
18
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
+ # We skip explicit thresholdingβ€”sometimes stylized covers lose detail under THRESH_OTSU.
20
  text = pytesseract.image_to_string(gray, config="--oem 3 --psm 6")
21
  return text.strip()
22
 
 
55
  # ──────────────────────────────────────────────────────────────
56
  def process_image(image_file):
57
  """
58
+ Gradio passes either a PIL image or None.
59
+ If image_file is None, return an empty DataFrame and empty CSV.
60
+ Otherwise, convert to OpenCV BGR, OCR the entire image, parse first two lines for title/author,
61
  query OpenLibrary once, and return a DataFrame + CSV file path.
62
  """
63
+ if image_file is None:
64
+ # No image provided β†’ return empty table + an empty CSV file
65
+ df_empty = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
66
+ empty_bytes = df_empty.to_csv(index=False).encode()
67
+ unique_name = f"books_{uuid.uuid4().hex}.csv"
68
+ temp_path = os.path.join("/tmp", unique_name)
69
+ with open(temp_path, "wb") as f:
70
+ f.write(empty_bytes)
71
+ return df_empty, temp_path
72
+
73
  # Convert PIL to OpenCV BGR
74
  img = np.array(image_file)[:, :, ::-1].copy()
75
 
76
  # 1) Run OCR on full image
77
+ try:
78
+ full_text = ocr_full_image(img)
79
+ except pytesseract.pytesseract.TesseractNotFoundError:
80
+ # If Tesseract isn’t installed, return empty DataFrame and log the issue
81
+ print("ERROR: Tesseract not found. Did you add apt.txt with 'tesseract-ocr'?")
82
+ df_error = pd.DataFrame(columns=["title", "author_name", "publisher", "first_publish_year"])
83
+ error_bytes = df_error.to_csv(index=False).encode()
84
+ unique_name = f"books_{uuid.uuid4().hex}.csv"
85
+ temp_path = os.path.join("/tmp", unique_name)
86
+ with open(temp_path, "wb") as f:
87
+ f.write(error_bytes)
88
+ return df_error, temp_path
89
+
90
  lines = [line.strip() for line in full_text.splitlines() if line.strip()]
91
 
92
  records = []
 
99
  if meta:
100
  records.append(meta)
101
  else:
102
+ # No OpenLibrary match β†’ still include OCR guesses
103
  records.append({
104
  "title": title_guess,
105
  "author_name": author_guess or "",
 
123
  # 4. Build the Gradio Interface
124
  # ──────────────────────────────────────────────────────────────
125
  def build_interface():
126
+ with gr.Blocks(title="Single‐Cover OCR + OpenLibrary Lookup") as demo:
127
  gr.Markdown(
128
  """
129
  ## Book Cover OCR + OpenLibrary Lookup
130
 
131
+ 1. Upload a photo of a single book cover.
132
  2. The app will run OCR on the full image, take:
133
  - the **first line** as a β€œtitle” guess, and
134
+ - the **second line** as an β€œauthor” guess (if present), then
135
+ - query OpenLibrary for metadata.
136
+ 3. Results display in a table and can be downloaded as CSV.
137
 
138
  > **Note:**
139
+ > β€’ Ensure Tesseract OCR is installed (see `apt.txt`).
140
+ > β€’ If no image is uploaded, the table and CSV will be empty.
141
  """
142
  )
143