rodrigomasini commited on
Commit
d8adf3f
·
verified ·
1 Parent(s): 1181510

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +104 -44
mdr_pdf_parser.py CHANGED
@@ -708,32 +708,56 @@ class _MDR_DBPostProcess:
708
  scores.append(score)
709
  return boxes, scores
710
 
711
- def _boxes_from_bitmap(self, pred, bmp, dw, dh):
 
712
  h, w = bmp.shape
 
713
  contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
714
- num_contours = min(len(contours), self.max_cand)
 
 
 
 
 
 
715
  boxes, scores = [], []
716
- for i in range(num_contours):
717
- contour = contours[i]
718
- pts, sside = self._get_mini_boxes(contour)
719
- if sside < self.min_sz:
720
- continue
721
- pts = np.array(pts)
722
- score = self._box_score_fast(pred, pts.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
723
- if self.box_thresh > score:
724
- continue
725
- try:
726
- box = self._unclip(pts, self.unclip_r).reshape(-1, 1, 2)
727
- except:
728
- continue
729
- box, sside = self._get_mini_boxes(box)
730
- if sside < self.min_sz + 2:
731
- continue
732
- box = np.array(box)
733
- box[:, 0] = np.clip(np.round(box[:, 0] / w * dw), 0, dw)
734
- box[:, 1] = np.clip(np.round(box[:, 1] / h * dh), 0, dh)
735
- boxes.append(box.astype("int32"))
736
- scores.append(score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
  return np.array(boxes, dtype="int32"), scores
738
 
739
  def _unclip(self, box, ratio):
@@ -779,20 +803,30 @@ class _MDR_DBPostProcess:
779
  cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
780
  return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
781
 
 
782
  def __call__(self, outs_dict, shape_list):
783
- pred = outs_dict['maps'][:, 0, :, :]
784
- seg = pred > self.thresh
 
 
 
785
  boxes_batch = []
786
  for batch_idx in range(pred.shape[0]):
787
- sh, sw, _, _ = shape_list[batch_idx]
788
- mask = cv2.dilate(np.array(seg[batch_idx]).astype(np.uint8), self.dila_k) if self.dila_k is not None else seg[batch_idx]
789
- if self.box_t == 'poly':
790
- boxes, _ = self._polygons_from_bitmap(pred[batch_idx], mask, sw, sh)
791
- elif self.box_t == 'quad':
792
- boxes, _ = self._boxes_from_bitmap(pred[batch_idx], mask, sw, sh)
793
- else:
794
- raise ValueError("box_type must be 'quad' or 'poly'")
795
- boxes_batch.append({'points': boxes})
 
 
 
 
 
 
796
  return boxes_batch
797
 
798
  class _MDR_TextDetector(_MDR_PredictBase):
@@ -849,24 +883,50 @@ class _MDR_TextDetector(_MDR_PredictBase):
849
  new_boxes.append(box)
850
  return np.array(new_boxes)
851
 
 
852
  def __call__(self, img):
853
  ori_im = img.copy()
854
  data = {"image": img}
 
855
  data = mdr_ocr_transform(data, self.pre_op)
856
  if data is None:
 
857
  return None
858
- img, shape_list = data
859
- if img is None:
 
 
860
  return None
861
- img = np.expand_dims(img, axis=0)
862
- shape_list = np.expand_dims(shape_list, axis=0)
863
- img = img.copy()
864
- inputs = self.get_input_feed(self.input_name, img)
865
- outputs = self.sess.run(self.output_name, input_feed=inputs)
 
 
 
 
 
 
 
 
 
 
 
 
866
  preds = {"maps": outputs[0]}
867
- post_res = self.post_op(preds, shape_list)
868
- boxes = post_res[0]['points']
869
- return self._filter_poly(boxes, ori_im.shape) if self.args.det_box_type == 'poly' else self._filter_quad(boxes, ori_im.shape)
 
 
 
 
 
 
 
 
 
870
 
871
  class _MDR_ClsPostProcess:
872
 
 
708
  scores.append(score)
709
  return boxes, scores
710
 
711
+ # In class _MDR_DBPostProcess:
712
+ def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
713
  h, w = bmp.shape
714
+ print(f" DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw}x{dh}.") # DEBUG
715
  contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
716
+ num_contours_found = len(contours)
717
+ print(f" DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.") # DEBUG
718
+
719
+ num_contours_to_process = min(num_contours_found, self.max_cand)
720
+ if num_contours_found > self.max_cand:
721
+ print(f" DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours.") # DEBUG
722
+
723
  boxes, scores = [], []
724
+ kept_boxes_count = 0
725
+ for i in range(num_contours_to_process):
726
+ contour = contours[i]
727
+ pts_mini_box, sside = self._get_mini_boxes(contour)
728
+ if sside < self.min_sz:
729
+ # print(f" DEBUG OCR: Contour {i} too small (sside {sside} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
730
+ continue
731
+
732
+ pts_arr = np.array(pts_mini_box)
733
+ # score_mode is 'fast' by default
734
+ current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
735
+
736
+ if self.box_thresh > current_score:
737
+ # print(f" DEBUG OCR: Contour {i} score {current_score:.4f} < box_thresh {self.box_thresh}. Skipping.") # Can be too verbose
738
+ continue
739
+
740
+ try:
741
+ # unclip_ratio is self.unclip_r (default 1.5)
742
+ box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
743
+ except Exception as e_unclip:
744
+ # print(f" DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
745
+ continue
746
+
747
+ box_final, sside_final = self._get_mini_boxes(box_unclipped)
748
+ if sside_final < self.min_sz + 2: # min_sz is 3
749
+ # print(f" DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final} < {self.min_sz + 2}). Skipping.") # Can be too verbose
750
+ continue
751
+
752
+ box_final_arr = np.array(box_final)
753
+ # Rescale to original image dimensions
754
+ box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
755
+ box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
756
+
757
+ boxes.append(box_final_arr.astype("int32"))
758
+ scores.append(current_score)
759
+ kept_boxes_count +=1
760
+ print(f" DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.") # DEBUG
761
  return np.array(boxes, dtype="int32"), scores
762
 
763
  def _unclip(self, box, ratio):
 
803
  cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
804
  return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
805
 
806
+ # In class _MDR_DBPostProcess:
807
  def __call__(self, outs_dict, shape_list):
808
+ pred = outs_dict['maps'][:, 0, :, :]
809
+ seg = pred > self.thresh
810
+ print(f" DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}") # DEBUG
811
+ print(f" DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}") # DEBUG
812
+
813
  boxes_batch = []
814
  for batch_idx in range(pred.shape[0]):
815
+ sh, sw, _, _ = shape_list[batch_idx]
816
+ current_pred_map = pred[batch_idx]
817
+ current_seg_map = seg[batch_idx]
818
+
819
+ mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
820
+ print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc {sh}x{sw}. Sum of mask pixels: {np.sum(mask)}") # DEBUG
821
+
822
+ if self.box_t == 'poly':
823
+ boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, sw, sh)
824
+ elif self.box_t == 'quad':
825
+ boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, sw, sh)
826
+ else:
827
+ raise ValueError("box_type must be 'quad' or 'poly'")
828
+ print(f" DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing (after score filtering within _boxes_from_bitmap).") # DEBUG
829
+ boxes_batch.append({'points': boxes})
830
  return boxes_batch
831
 
832
  class _MDR_TextDetector(_MDR_PredictBase):
 
883
  new_boxes.append(box)
884
  return np.array(new_boxes)
885
 
886
+ # In class _MDR_TextDetector:
887
  def __call__(self, img):
888
  ori_im = img.copy()
889
  data = {"image": img}
890
+ print(f" DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}") # DEBUG
891
  data = mdr_ocr_transform(data, self.pre_op)
892
  if data is None:
893
+ print(" DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.") # DEBUG
894
  return None
895
+
896
+ processed_img, shape_list = data
897
+ if processed_img is None:
898
+ print(" DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.") # DEBUG
899
  return None
900
+ print(f" DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}") # DEBUG
901
+
902
+ img_for_onnx = np.expand_dims(processed_img, axis=0)
903
+ shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
904
+ img_for_onnx = img_for_onnx.copy() # Ensure it's a contiguous array if ONNX runtime is sensitive
905
+
906
+ inputs = self.get_input_feed(self.input_name, img_for_onnx)
907
+ print(f" DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...") # DEBUG
908
+ try:
909
+ outputs = self.sess.run(self.output_name, input_feed=inputs)
910
+ except Exception as e:
911
+ print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e}") # DEBUG
912
+ import traceback
913
+ traceback.print_exc()
914
+ return None # Stop if inference fails
915
+ print(f" DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}") # DEBUG
916
+
917
  preds = {"maps": outputs[0]}
918
+ # post_op is _MDR_DBPostProcess
919
+ post_res = self.post_op(preds, shape_list_for_onnx)
920
+
921
+ boxes_from_post = post_res[0]['points']
922
+ print(f" DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}") # DEBUG
923
+
924
+ if self.args.det_box_type == 'poly':
925
+ final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
926
+ else: # 'quad'
927
+ final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
928
+ print(f" DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}") # DEBUG
929
+ return final_boxes
930
 
931
  class _MDR_ClsPostProcess:
932