Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +31 -0
mdr_pdf_parser.py
CHANGED
@@ -1125,6 +1125,17 @@ class _MDR_TextRecognizer(_MDR_PredictBase):
|
|
1125 |
padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
1126 |
padding[:, :, 0:tw] = resized # Place resized image into padded canvas
|
1127 |
print(f" DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1128 |
return padding
|
1129 |
|
1130 |
def __call__(self, img_list):
|
@@ -1218,6 +1229,26 @@ class _MDR_TextSystem:
|
|
1218 |
boxes_for_valid_crops = [boxes[i] for i in valid_idxs]
|
1219 |
print(f" DEBUG OCR SYS: Number of valid crops to process: {len(valid_crops)}") # DEBUG
|
1220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1221 |
if self.use_cls and self.classifier and classify:
|
1222 |
print(f" DEBUG OCR SYS: Applying classifier to {len(valid_crops)} crops...") # DEBUG
|
1223 |
try:
|
|
|
1125 |
padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
1126 |
padding[:, :, 0:tw] = resized # Place resized image into padded canvas
|
1127 |
print(f" DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
|
1128 |
+
# ---- START LOGGING NORMALIZED CROP PROPERTIES ----
|
1129 |
+
print(f" DEBUG RECOGNIZER: Normalized Crop Properties (before ONNX): "
|
1130 |
+
f"dtype: {padding.dtype}, " # Should be float32
|
1131 |
+
f"MinPx: {np.min(padding):.4f}, "
|
1132 |
+
f"MaxPx: {np.max(padding):.4f}, "
|
1133 |
+
f"MeanPx: {np.mean(padding):.4f}")
|
1134 |
+
if np.all(padding == 0):
|
1135 |
+
print(" DEBUG RECOGNIZER: WARNING - Normalized image is all zeros!")
|
1136 |
+
elif np.all(padding == padding[0,0,0]): # Check if all elements are the same
|
1137 |
+
print(f" DEBUG RECOGNIZER: WARNING - Normalized image is a constant value: {padding[0,0,0]}")
|
1138 |
+
# ---- END LOGGING NORMALIZED CROP PROPERTIES ----
|
1139 |
return padding
|
1140 |
|
1141 |
def __call__(self, img_list):
|
|
|
1229 |
boxes_for_valid_crops = [boxes[i] for i in valid_idxs]
|
1230 |
print(f" DEBUG OCR SYS: Number of valid crops to process: {len(valid_crops)}") # DEBUG
|
1231 |
|
1232 |
+
# ---- START LOGGING CROP PROPERTIES ----
|
1233 |
+
if valid_crops:
|
1234 |
+
print(" DEBUG OCR SYS: Logging properties of first few valid crops (and Box 21 if present):")
|
1235 |
+
indices_to_log = list(range(min(3, len(valid_crops)))) # Log first 3
|
1236 |
+
# Try to find original index of Box 21 if we can map it back, this is a bit tricky here
|
1237 |
+
# For simplicity, let's just log the first few. If Box 21 was among them, we'd see it.
|
1238 |
+
|
1239 |
+
for i_log_idx, crop_idx in enumerate(indices_to_log):
|
1240 |
+
crop_image_np = valid_crops[crop_idx]
|
1241 |
+
if crop_image_np is not None and crop_image_np.size > 0:
|
1242 |
+
print(f" Crop for Recognizer (Index {crop_idx}): "
|
1243 |
+
f"Shape: {crop_image_np.shape}, "
|
1244 |
+
f"dtype: {crop_image_np.dtype}, "
|
1245 |
+
f"MinPx: {np.min(crop_image_np)}, "
|
1246 |
+
f"MaxPx: {np.max(crop_image_np)}, "
|
1247 |
+
f"MeanPx: {np.mean(crop_image_np):.2f}")
|
1248 |
+
else:
|
1249 |
+
print(f" Crop for Recognizer (Index {crop_idx}): Is None or empty.")
|
1250 |
+
# ---- END LOGGING CROP PROPERTIES ----
|
1251 |
+
|
1252 |
if self.use_cls and self.classifier and classify:
|
1253 |
print(f" DEBUG OCR SYS: Applying classifier to {len(valid_crops)} crops...") # DEBUG
|
1254 |
try:
|