Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -109,9 +109,12 @@ class OCRProcessor:
|
|
109 |
ocr_logger.info("PDF document detected, enforcing page limit")
|
110 |
payload['filetype'] = 'PDF'
|
111 |
|
112 |
-
# Prepare file for OCR API - using file
|
|
|
|
|
|
|
113 |
files = {
|
114 |
-
'file': (os.path.basename(file_path),
|
115 |
}
|
116 |
|
117 |
headers = {
|
@@ -132,9 +135,18 @@ class OCRProcessor:
|
|
132 |
ocr_logger.info(f"OCR API status code: {response.status_code}")
|
133 |
|
134 |
# Log response text for debugging (first 200 chars)
|
135 |
-
|
|
|
136 |
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
try:
|
140 |
result = response.json()
|
@@ -177,8 +189,7 @@ class OCRProcessor:
|
|
177 |
"text": ""
|
178 |
}
|
179 |
finally:
|
180 |
-
#
|
181 |
-
files['file'][1].close()
|
182 |
|
183 |
def _extract_text_from_result(self, result: Dict) -> str:
|
184 |
"""
|
|
|
109 |
ocr_logger.info("PDF document detected, enforcing page limit")
|
110 |
payload['filetype'] = 'PDF'
|
111 |
|
112 |
+
# Prepare file for OCR API - using file data as bytes to avoid file handle issues
|
113 |
+
with open(file_path, 'rb') as f:
|
114 |
+
file_data = f.read()
|
115 |
+
|
116 |
files = {
|
117 |
+
'file': (os.path.basename(file_path), file_data, file_type)
|
118 |
}
|
119 |
|
120 |
headers = {
|
|
|
135 |
ocr_logger.info(f"OCR API status code: {response.status_code}")
|
136 |
|
137 |
# Log response text for debugging (first 200 chars)
|
138 |
+
response_preview = response.text[:200] if hasattr(response, 'text') else "No text content"
|
139 |
+
ocr_logger.info(f"OCR API response preview: {response_preview}...")
|
140 |
|
141 |
+
try:
|
142 |
+
response.raise_for_status()
|
143 |
+
except Exception as e:
|
144 |
+
ocr_logger.error(f"HTTP Error: {str(e)}")
|
145 |
+
return {
|
146 |
+
"success": False,
|
147 |
+
"error": f"OCR API HTTP Error: {str(e)}",
|
148 |
+
"text": ""
|
149 |
+
}
|
150 |
|
151 |
try:
|
152 |
result = response.json()
|
|
|
189 |
"text": ""
|
190 |
}
|
191 |
finally:
|
192 |
+
# No need to close file handle as we're using bytes directly
|
|
|
193 |
|
194 |
def _extract_text_from_result(self, result: Dict) -> str:
|
195 |
"""
|