Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +104 -44
mdr_pdf_parser.py
CHANGED
@@ -708,32 +708,56 @@ class _MDR_DBPostProcess:
|
|
708 |
scores.append(score)
|
709 |
return boxes, scores
|
710 |
|
711 |
-
|
|
|
712 |
h, w = bmp.shape
|
|
|
713 |
contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
714 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
715 |
boxes, scores = [], []
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
737 |
return np.array(boxes, dtype="int32"), scores
|
738 |
|
739 |
def _unclip(self, box, ratio):
|
@@ -779,20 +803,30 @@ class _MDR_DBPostProcess:
|
|
779 |
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
|
780 |
return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
|
781 |
|
|
|
782 |
def __call__(self, outs_dict, shape_list):
|
783 |
-
pred = outs_dict['maps'][:, 0, :, :]
|
784 |
-
seg = pred > self.thresh
|
|
|
|
|
|
|
785 |
boxes_batch = []
|
786 |
for batch_idx in range(pred.shape[0]):
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
return boxes_batch
|
797 |
|
798 |
class _MDR_TextDetector(_MDR_PredictBase):
|
@@ -849,24 +883,50 @@ class _MDR_TextDetector(_MDR_PredictBase):
|
|
849 |
new_boxes.append(box)
|
850 |
return np.array(new_boxes)
|
851 |
|
|
|
852 |
def __call__(self, img):
|
853 |
ori_im = img.copy()
|
854 |
data = {"image": img}
|
|
|
855 |
data = mdr_ocr_transform(data, self.pre_op)
|
856 |
if data is None:
|
|
|
857 |
return None
|
858 |
-
|
859 |
-
|
|
|
|
|
860 |
return None
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
866 |
preds = {"maps": outputs[0]}
|
867 |
-
|
868 |
-
|
869 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
870 |
|
871 |
class _MDR_ClsPostProcess:
|
872 |
|
|
|
708 |
scores.append(score)
|
709 |
return boxes, scores
|
710 |
|
711 |
+
# In class _MDR_DBPostProcess:
|
712 |
+
def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
|
713 |
h, w = bmp.shape
|
714 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw}x{dh}.") # DEBUG
|
715 |
contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
716 |
+
num_contours_found = len(contours)
|
717 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.") # DEBUG
|
718 |
+
|
719 |
+
num_contours_to_process = min(num_contours_found, self.max_cand)
|
720 |
+
if num_contours_found > self.max_cand:
|
721 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours.") # DEBUG
|
722 |
+
|
723 |
boxes, scores = [], []
|
724 |
+
kept_boxes_count = 0
|
725 |
+
for i in range(num_contours_to_process):
|
726 |
+
contour = contours[i]
|
727 |
+
pts_mini_box, sside = self._get_mini_boxes(contour)
|
728 |
+
if sside < self.min_sz:
|
729 |
+
# print(f" DEBUG OCR: Contour {i} too small (sside {sside} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
|
730 |
+
continue
|
731 |
+
|
732 |
+
pts_arr = np.array(pts_mini_box)
|
733 |
+
# score_mode is 'fast' by default
|
734 |
+
current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
|
735 |
+
|
736 |
+
if self.box_thresh > current_score:
|
737 |
+
# print(f" DEBUG OCR: Contour {i} score {current_score:.4f} < box_thresh {self.box_thresh}. Skipping.") # Can be too verbose
|
738 |
+
continue
|
739 |
+
|
740 |
+
try:
|
741 |
+
# unclip_ratio is self.unclip_r (default 1.5)
|
742 |
+
box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
|
743 |
+
except Exception as e_unclip:
|
744 |
+
# print(f" DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
|
745 |
+
continue
|
746 |
+
|
747 |
+
box_final, sside_final = self._get_mini_boxes(box_unclipped)
|
748 |
+
if sside_final < self.min_sz + 2: # min_sz is 3
|
749 |
+
# print(f" DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final} < {self.min_sz + 2}). Skipping.") # Can be too verbose
|
750 |
+
continue
|
751 |
+
|
752 |
+
box_final_arr = np.array(box_final)
|
753 |
+
# Rescale to original image dimensions
|
754 |
+
box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
|
755 |
+
box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
|
756 |
+
|
757 |
+
boxes.append(box_final_arr.astype("int32"))
|
758 |
+
scores.append(current_score)
|
759 |
+
kept_boxes_count +=1
|
760 |
+
print(f" DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.") # DEBUG
|
761 |
return np.array(boxes, dtype="int32"), scores
|
762 |
|
763 |
def _unclip(self, box, ratio):
|
|
|
803 |
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
|
804 |
return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
|
805 |
|
806 |
+
# In class _MDR_DBPostProcess:
|
807 |
def __call__(self, outs_dict, shape_list):
|
808 |
+
pred = outs_dict['maps'][:, 0, :, :]
|
809 |
+
seg = pred > self.thresh
|
810 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}") # DEBUG
|
811 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}") # DEBUG
|
812 |
+
|
813 |
boxes_batch = []
|
814 |
for batch_idx in range(pred.shape[0]):
|
815 |
+
sh, sw, _, _ = shape_list[batch_idx]
|
816 |
+
current_pred_map = pred[batch_idx]
|
817 |
+
current_seg_map = seg[batch_idx]
|
818 |
+
|
819 |
+
mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
|
820 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc {sh}x{sw}. Sum of mask pixels: {np.sum(mask)}") # DEBUG
|
821 |
+
|
822 |
+
if self.box_t == 'poly':
|
823 |
+
boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, sw, sh)
|
824 |
+
elif self.box_t == 'quad':
|
825 |
+
boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, sw, sh)
|
826 |
+
else:
|
827 |
+
raise ValueError("box_type must be 'quad' or 'poly'")
|
828 |
+
print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing (after score filtering within _boxes_from_bitmap).") # DEBUG
|
829 |
+
boxes_batch.append({'points': boxes})
|
830 |
return boxes_batch
|
831 |
|
832 |
class _MDR_TextDetector(_MDR_PredictBase):
|
|
|
883 |
new_boxes.append(box)
|
884 |
return np.array(new_boxes)
|
885 |
|
886 |
+
# In class _MDR_TextDetector:
|
887 |
def __call__(self, img):
|
888 |
ori_im = img.copy()
|
889 |
data = {"image": img}
|
890 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}") # DEBUG
|
891 |
data = mdr_ocr_transform(data, self.pre_op)
|
892 |
if data is None:
|
893 |
+
print(" DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.") # DEBUG
|
894 |
return None
|
895 |
+
|
896 |
+
processed_img, shape_list = data
|
897 |
+
if processed_img is None:
|
898 |
+
print(" DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.") # DEBUG
|
899 |
return None
|
900 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}") # DEBUG
|
901 |
+
|
902 |
+
img_for_onnx = np.expand_dims(processed_img, axis=0)
|
903 |
+
shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
|
904 |
+
img_for_onnx = img_for_onnx.copy() # Ensure it's a contiguous array if ONNX runtime is sensitive
|
905 |
+
|
906 |
+
inputs = self.get_input_feed(self.input_name, img_for_onnx)
|
907 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...") # DEBUG
|
908 |
+
try:
|
909 |
+
outputs = self.sess.run(self.output_name, input_feed=inputs)
|
910 |
+
except Exception as e:
|
911 |
+
print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e}") # DEBUG
|
912 |
+
import traceback
|
913 |
+
traceback.print_exc()
|
914 |
+
return None # Stop if inference fails
|
915 |
+
print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}") # DEBUG
|
916 |
+
|
917 |
preds = {"maps": outputs[0]}
|
918 |
+
# post_op is _MDR_DBPostProcess
|
919 |
+
post_res = self.post_op(preds, shape_list_for_onnx)
|
920 |
+
|
921 |
+
boxes_from_post = post_res[0]['points']
|
922 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}") # DEBUG
|
923 |
+
|
924 |
+
if self.args.det_box_type == 'poly':
|
925 |
+
final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
|
926 |
+
else: # 'quad'
|
927 |
+
final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
|
928 |
+
print(f" DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}") # DEBUG
|
929 |
+
return final_boxes
|
930 |
|
931 |
class _MDR_ClsPostProcess:
|
932 |
|