rodrigomasini commited on
Commit
92d0a39
·
verified ·
1 Parent(s): af729d4

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +31 -0
mdr_pdf_parser.py CHANGED
@@ -1125,6 +1125,17 @@ class _MDR_TextRecognizer(_MDR_PredictBase):
1125
  padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
1126
  padding[:, :, 0:tw] = resized # Place resized image into padded canvas
1127
  print(f" DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
 
 
 
 
 
 
 
 
 
 
 
1128
  return padding
1129
 
1130
  def __call__(self, img_list):
@@ -1218,6 +1229,26 @@ class _MDR_TextSystem:
1218
  boxes_for_valid_crops = [boxes[i] for i in valid_idxs]
1219
  print(f" DEBUG OCR SYS: Number of valid crops to process: {len(valid_crops)}") # DEBUG
1220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1221
  if self.use_cls and self.classifier and classify:
1222
  print(f" DEBUG OCR SYS: Applying classifier to {len(valid_crops)} crops...") # DEBUG
1223
  try:
 
1125
  padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
1126
  padding[:, :, 0:tw] = resized # Place resized image into padded canvas
1127
  print(f" DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
1128
+ # ---- START LOGGING NORMALIZED CROP PROPERTIES ----
1129
+ print(f" DEBUG RECOGNIZER: Normalized Crop Properties (before ONNX): "
1130
+ f"dtype: {padding.dtype}, " # Should be float32
1131
+ f"MinPx: {np.min(padding):.4f}, "
1132
+ f"MaxPx: {np.max(padding):.4f}, "
1133
+ f"MeanPx: {np.mean(padding):.4f}")
1134
+ if np.all(padding == 0):
1135
+ print(" DEBUG RECOGNIZER: WARNING - Normalized image is all zeros!")
1136
+ elif np.all(padding == padding[0,0,0]): # Check if all elements are the same
1137
+ print(f" DEBUG RECOGNIZER: WARNING - Normalized image is a constant value: {padding[0,0,0]}")
1138
+ # ---- END LOGGING NORMALIZED CROP PROPERTIES ----
1139
  return padding
1140
 
1141
  def __call__(self, img_list):
 
1229
  boxes_for_valid_crops = [boxes[i] for i in valid_idxs]
1230
  print(f" DEBUG OCR SYS: Number of valid crops to process: {len(valid_crops)}") # DEBUG
1231
 
1232
+ # ---- START LOGGING CROP PROPERTIES ----
1233
+ if valid_crops:
1234
+ print(" DEBUG OCR SYS: Logging properties of first few valid crops (and Box 21 if present):")
1235
+ indices_to_log = list(range(min(3, len(valid_crops)))) # Log first 3
1236
+ # Try to find original index of Box 21 if we can map it back, this is a bit tricky here
1237
+ # For simplicity, let's just log the first few. If Box 21 was among them, we'd see it.
1238
+
1239
+ for i_log_idx, crop_idx in enumerate(indices_to_log):
1240
+ crop_image_np = valid_crops[crop_idx]
1241
+ if crop_image_np is not None and crop_image_np.size > 0:
1242
+ print(f" Crop for Recognizer (Index {crop_idx}): "
1243
+ f"Shape: {crop_image_np.shape}, "
1244
+ f"dtype: {crop_image_np.dtype}, "
1245
+ f"MinPx: {np.min(crop_image_np)}, "
1246
+ f"MaxPx: {np.max(crop_image_np)}, "
1247
+ f"MeanPx: {np.mean(crop_image_np):.2f}")
1248
+ else:
1249
+ print(f" Crop for Recognizer (Index {crop_idx}): Is None or empty.")
1250
+ # ---- END LOGGING CROP PROPERTIES ----
1251
+
1252
  if self.use_cls and self.classifier and classify:
1253
  print(f" DEBUG OCR SYS: Applying classifier to {len(valid_crops)} crops...") # DEBUG
1254
  try: