ApsidalSolid4 commited on
Commit
d611c30
·
verified ·
1 Parent(s): 37dcbee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -109,9 +109,12 @@ class OCRProcessor:
109
  ocr_logger.info("PDF document detected, enforcing page limit")
110
  payload['filetype'] = 'PDF'
111
 
112
- # Prepare file for OCR API - using file object directly as in the test script
 
 
 
113
  files = {
114
- 'file': (os.path.basename(file_path), open(file_path, 'rb'))
115
  }
116
 
117
  headers = {
@@ -132,9 +135,18 @@ class OCRProcessor:
132
  ocr_logger.info(f"OCR API status code: {response.status_code}")
133
 
134
  # Log response text for debugging (first 200 chars)
135
- ocr_logger.info(f"OCR API response preview: {response.text[:200]}...")
 
136
 
137
- response.raise_for_status()
 
 
 
 
 
 
 
 
138
 
139
  try:
140
  result = response.json()
@@ -177,8 +189,7 @@ class OCRProcessor:
177
  "text": ""
178
  }
179
  finally:
180
- # Close the file handle
181
- files['file'][1].close()
182
 
183
  def _extract_text_from_result(self, result: Dict) -> str:
184
  """
 
109
  ocr_logger.info("PDF document detected, enforcing page limit")
110
  payload['filetype'] = 'PDF'
111
 
112
+ # Prepare file for OCR API - using file data as bytes to avoid file handle issues
113
+ with open(file_path, 'rb') as f:
114
+ file_data = f.read()
115
+
116
  files = {
117
+ 'file': (os.path.basename(file_path), file_data, file_type)
118
  }
119
 
120
  headers = {
 
135
  ocr_logger.info(f"OCR API status code: {response.status_code}")
136
 
137
  # Log response text for debugging (first 200 chars)
138
+ response_preview = response.text[:200] if hasattr(response, 'text') else "No text content"
139
+ ocr_logger.info(f"OCR API response preview: {response_preview}...")
140
 
141
+ try:
142
+ response.raise_for_status()
143
+ except Exception as e:
144
+ ocr_logger.error(f"HTTP Error: {str(e)}")
145
+ return {
146
+ "success": False,
147
+ "error": f"OCR API HTTP Error: {str(e)}",
148
+ "text": ""
149
+ }
150
 
151
  try:
152
  result = response.json()
 
189
  "text": ""
190
  }
191
  finally:
192
+ # No need to close file handle as we're using bytes directly
 
193
 
194
  def _extract_text_from_result(self, result: Dict) -> str:
195
  """