Spaces:

ResearchMAGIC
/

teammrag-parser-moreai

Sleeping

App Files Files Community

rodrigomasini commited on about 1 month ago

Commit

2699f7d

verified ·

1 Parent(s): 75717e3

Update mdr_pdf_parser.py

Browse files

Files changed (1) hide show

mdr_pdf_parser.py +295 -311

mdr_pdf_parser.py CHANGED Viewed

@@ -712,17 +712,18 @@ class _MDR_DBPostProcess:
       scores.append(score)
     return boxes, scores
-  # In class _MDR_DBPostProcess:
   def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
     h, w = bmp.shape
-    print(f"    DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw}x{dh}.") # DEBUG
     contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
     num_contours_found = len(contours)
-    print(f"    DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.") # DEBUG
     num_contours_to_process = min(num_contours_found, self.max_cand)
     if num_contours_found > self.max_cand:
-        print(f"    DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours.") # DEBUG
     boxes, scores = [], []
     kept_boxes_count = 0
@@ -730,11 +731,10 @@ class _MDR_DBPostProcess:
         contour = contours[i]
         pts_mini_box, sside = self._get_mini_boxes(contour)
         if sside < self.min_sz:
-            # print(f"      DEBUG OCR: Contour {i} too small (sside {sside} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
             continue
         pts_arr = np.array(pts_mini_box)
-        # score_mode is 'fast' by default
         current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
         if self.box_thresh > current_score:
@@ -742,7 +742,6 @@ class _MDR_DBPostProcess:
             continue
         try:
-            # unclip_ratio is self.unclip_r (default 1.5)
             box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
         except Exception as e_unclip:
             # print(f"      DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
@@ -750,18 +749,17 @@ class _MDR_DBPostProcess:
         box_final, sside_final = self._get_mini_boxes(box_unclipped)
         if sside_final < self.min_sz + 2: # min_sz is 3
-            # print(f"      DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final} < {self.min_sz + 2}). Skipping.") # Can be too verbose
             continue
         box_final_arr = np.array(box_final)
-        # Rescale to original image dimensions
         box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
         box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
         boxes.append(box_final_arr.astype("int32"))
         scores.append(current_score)
         kept_boxes_count +=1
-    print(f"    DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.") # DEBUG
     return np.array(boxes, dtype="int32"), scores
   def _unclip(self, box, ratio):
@@ -807,29 +805,35 @@ class _MDR_DBPostProcess:
     cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
     return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
-  # In class _MDR_DBPostProcess:
   def __call__(self, outs_dict, shape_list):
     pred = outs_dict['maps'][:, 0, :, :]
     seg = pred > self.thresh
-    print(f"  DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}") # DEBUG
-    print(f"  DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}") # DEBUG
     boxes_batch = []
     for batch_idx in range(pred.shape[0]):
-        sh, sw, _, _ = shape_list[batch_idx]
         current_pred_map = pred[batch_idx]
         current_seg_map = seg[batch_idx]
         mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
-        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc {sh}x{sw}. Sum of mask pixels: {np.sum(mask)}") # DEBUG
         if self.box_t == 'poly':
-            boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, sw, sh)
         elif self.box_t == 'quad':
-            boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, sw, sh)
         else:
             raise ValueError("box_type must be 'quad' or 'poly'")
-        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing (after score filtering within _boxes_from_bitmap).") # DEBUG
         boxes_batch.append({'points': boxes})
     return boxes_batch
@@ -887,49 +891,71 @@ class _MDR_TextDetector(_MDR_PredictBase):
           new_boxes.append(box)
       return np.array(new_boxes)
-  # In class _MDR_TextDetector:
   def __call__(self, img):
     ori_im = img.copy()
     data = {"image": img}
-    print(f"  DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}") # DEBUG
-    data = mdr_ocr_transform(data, self.pre_op)
     if data is None:
-        print("  DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.") # DEBUG
-        return None
-    processed_img, shape_list = data
     if processed_img is None:
-        print("  DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.") # DEBUG
-        return None
-    print(f"  DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}") # DEBUG
     img_for_onnx = np.expand_dims(processed_img, axis=0)
     shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
-    img_for_onnx = img_for_onnx.copy() # Ensure it's a contiguous array if ONNX runtime is sensitive
     inputs = self.get_input_feed(self.input_name, img_for_onnx)
-    print(f"  DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...") # DEBUG
     try:
         outputs = self.sess.run(self.output_name, input_feed=inputs)
-    except Exception as e:
-        print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e}") # DEBUG
         import traceback
         traceback.print_exc()
-        return None # Stop if inference fails
-    print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}") # DEBUG
     preds = {"maps": outputs[0]}
-    # post_op is _MDR_DBPostProcess
-    post_res = self.post_op(preds, shape_list_for_onnx)
     boxes_from_post = post_res[0]['points']
-    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}") # DEBUG
     if self.args.det_box_type == 'poly':
         final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
     else: # 'quad'
         final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
-    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}") # DEBUG
     return final_boxes
 class _MDR_ClsPostProcess:
@@ -1090,56 +1116,64 @@ class _MDR_TextRecognizer(_MDR_PredictBase):
     self.input_name = self.get_input_name(self.sess)
     self.output_name = self.get_output_name(self.sess)
-  # In class _MDR_TextRecognizer
   def _resize_norm(self, img, max_r): # img is a single crop
     imgC, imgH, imgW = self.shape # e.g., (3, 48, 320)
     h_orig, w_orig = img.shape[:2]
     print(f"    DEBUG RECOGNIZER: _resize_norm input crop shape: ({h_orig}, {w_orig}), target shape: {self.shape}, max_r_batch: {max_r:.2f}")
     if h_orig == 0 or w_orig == 0:
-        print(f"    DEBUG RECOGNIZER: _resize_norm received zero-dimension crop. Returning zeros.")
         return np.zeros((imgC, imgH, imgW), dtype=np.float32)
     r_current = w_orig / float(h_orig)
-    # tw is target width, calculated to maintain aspect ratio up to imgW, using max of current ratio and batch max ratio
     tw = min(imgW, int(ceil(imgH * r_current)))
-    tw = max(1, tw) # Ensure target width is at least 1
     print(f"    DEBUG RECOGNIZER: _resize_norm calculated target width (tw): {tw} for target height (imgH): {imgH}")
     try:
-        resized = cv2.resize(img, (tw, imgH)) # Resize to (target_width, fixed_height)
-    except Exception as e_resize:
-        print(f"    DEBUG RECOGNIZER: _resize_norm cv2.resize failed: {e_resize}. Original shape ({h_orig},{w_orig}), target ({tw},{imgH})")
-        # Fallback: return zeros or try to pad original without resize if resize fails
         return np.zeros((imgC, imgH, imgW), dtype=np.float32)
     resized = resized.astype("float32")
-    # ... rest of the normalization ...
-    # (This part seems standard, but worth checking if the image becomes all black/white after this)
-    if imgC == 1 and len(resized.shape) == 3: # if model expects grayscale but crop is color
         resized = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
-        resized = resized[:, :, np.newaxis] # Add channel dim
-    if len(resized.shape) == 2: # if grayscale and no channel dim
-        resized = resized[:, :, np.newaxis]
-    resized = resized.transpose((2, 0, 1)) / 255.0 # HWC to CHW and scale to 0-1
-    resized -= 0.5 # Normalize to -0.5 to 0.5
-    resized /= 0.5 # Normalize to -1 to 1
     padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
-    padding[:, :, 0:tw] = resized # Place resized image into padded canvas
     print(f"    DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
-     # ---- START LOGGING NORMALIZED CROP PROPERTIES ----
     print(f"    DEBUG RECOGNIZER: Normalized Crop Properties (before ONNX): "
-          f"dtype: {padding.dtype}, " # Should be float32
-          f"MinPx: {np.min(padding):.4f}, "
-          f"MaxPx: {np.max(padding):.4f}, "
-          f"MeanPx: {np.mean(padding):.4f}")
     if np.all(padding == 0):
         print("    DEBUG RECOGNIZER: WARNING - Normalized image is all zeros!")
-    elif np.all(padding == padding[0,0,0]): # Check if all elements are the same
-         print(f"    DEBUG RECOGNIZER: WARNING - Normalized image is a constant value: {padding[0,0,0]}")
-    # ---- END LOGGING NORMALIZED CROP PROPERTIES ----
     return padding
   def __call__(self, img_list):
@@ -1193,133 +1227,65 @@ class _MDR_TextSystem:
     try: return list(sorted(boxes, key=key))
     except: return list(boxes) # Fallback
-  def __call__(self, img, classify=True): # classify is True by default
-    ori_im = img.copy()
-    print(f"  DEBUG OCR SYS: _MDR_TextSystem called. Original image shape: {ori_im.shape}") # DEBUG
-    boxes = self.detector(img) # This is _MDR_TextDetector
-    if boxes is None or len(boxes) == 0:
-        print("  DEBUG OCR SYS: Detector returned no boxes. Returning empty fragments.") # DEBUG
-        return [], [] # This is what currently leads to "0 fragments found" if detector fails
-    print(f"  DEBUG OCR SYS: Detector returned {len(boxes)} boxes. Proceeding to crop and recognize.") # DEBUG
-    boxes = self._sort_boxes(boxes) # Sorting happens here
-    crops = []
-    for i, b in enumerate(boxes):
-        try:
-            crop_img = mdr_get_rotated_crop(ori_im, b)
-            if crop_img is None:
-                print(f"    DEBUG OCR SYS: Crop {i+1}/{len(boxes)} is None.") # DEBUG
-                crops.append(None)
-            elif crop_img.shape[0] == 0 or crop_img.shape[1] == 0:
-                print(f"    DEBUG OCR SYS: Crop {i+1}/{len(boxes)} has zero dimension: {crop_img.shape}") # DEBUG
-                crops.append(None)
-            else:
-                crops.append(crop_img)
-                # Optionally save these crops for manual inspection:
-                # if self.save_crop: cv2.imwrite(os.path.join(self.crop_dir, f"debug_crop_before_cls_{self.crop_idx + i}.png"), crop_img)
-        except Exception as e_crop:
-            print(f"    DEBUG OCR SYS: Error cropping box {i+1}/{len(boxes)}: {e_crop}") # DEBUG
-            crops.append(None)
-    valid_idxs = [i for i, c in enumerate(crops) if c is not None and c.shape[0] > 0 and c.shape[1] > 0]
-    if not valid_idxs:
-        print("  DEBUG OCR SYS: No valid crops obtained after attempting to crop detected boxes. Returning empty fragments.") # DEBUG
-        return [], []
-    # Filter crops and corresponding boxes
-    valid_crops = [crops[i] for i in valid_idxs]
-    boxes_for_valid_crops = [boxes[i] for i in valid_idxs]
-    print(f"  DEBUG OCR SYS: Number of valid crops to process: {len(valid_crops)}") # DEBUG
-    # ---- START LOGGING CROP PROPERTIES ----
-    if valid_crops:
-        print("  DEBUG OCR SYS: Logging properties of first few valid crops (and Box 21 if present):")
-        indices_to_log = list(range(min(3, len(valid_crops)))) # Log first 3
-        # Try to find original index of Box 21 if we can map it back, this is a bit tricky here
-        # For simplicity, let's just log the first few. If Box 21 was among them, we'd see it.
-        for i_log_idx, crop_idx in enumerate(indices_to_log):
-            crop_image_np = valid_crops[crop_idx]
-            if crop_image_np is not None and crop_image_np.size > 0:
-                print(f"    Crop for Recognizer (Index {crop_idx}): "
-                      f"Shape: {crop_image_np.shape}, "
-                      f"dtype: {crop_image_np.dtype}, "
-                      f"MinPx: {np.min(crop_image_np)}, "
-                      f"MaxPx: {np.max(crop_image_np)}, "
-                      f"MeanPx: {np.mean(crop_image_np):.2f}")
-            else:
-                print(f"    Crop for Recognizer (Index {crop_idx}): Is None or empty.")
-    # ---- END LOGGING CROP PROPERTIES ----
-    if self.use_cls and self.classifier and classify:
-        print(f"  DEBUG OCR SYS: Applying classifier to {len(valid_crops)} crops...") # DEBUG
-        try:
-            # The classifier might modify valid_crops in-place (e.g., rotate them)
-            classified_crops, cls_results = self.classifier(valid_crops) # classifier returns list, results
-            print(f"    DEBUG OCR SYS: Classifier results count: {len(cls_results)}. First few: {cls_results[:3]}") # DEBUG
-            valid_crops = classified_crops # Update with potentially rotated crops
-        except Exception as e_cls:
-            print(f"    DEBUG OCR SYS: Classifier error: {e_cls}. Using unclassified crops.") # DEBUG
-            # Continue with unclassified (but valid) crops
-    print(f"  DEBUG OCR SYS: Applying recognizer to {len(valid_crops)} crops...") # DEBUG
-      # ---- START TEMP CODE TO SAVE CROPS ----
-    save_crop_path_dir = Path("/tmp/temp_recognizer_crops") # Use /tmp
-    save_crop_path_dir.mkdir(parents=True, exist_ok=True)
-    for i_crop, crop_image_np in enumerate(valid_crops):
-        try:
-            # Ensure crop_image_np is a valid image array (e.g., uint8)
-            if crop_image_np is not None and crop_image_np.size > 0:
-                 # OpenCV expects BGR if color, or grayscale
-                cv2.imwrite(str(save_crop_path_dir / f"crop_to_recognize_{self.crop_idx + i_crop}.png"), crop_image_np)
-            else:
-                print(f"    DEBUG OCR SYS: Crop {i_crop} is None or empty, not saving.")
-        except Exception as e_save:
-            print(f"    DEBUG OCR SYS: Failed to save crop {i_crop}: {e_save}")
-    print(f"  DEBUG OCR SYS: Saved {len(valid_crops)} crops for recognizer to {save_crop_path_dir}")
-    # ---- END TEMP CODE TO SAVE CROPS ----
     try:
-        rec_res = self.recognizer(valid_crops) # rec_res is a list of [text, score]
-        print(f"    DEBUG OCR SYS: Recognizer results count: {len(rec_res)}. First few results: {rec_res[:3]}") # DEBUG
-    except Exception as e_rec:
-        print(f"    DEBUG OCR SYS: Recognizer error: {e_rec}. Returning empty fragments.") # DEBUG
-        return [], [] # If recognizer fails, we can't proceed
-    final_boxes, final_rec_tuples = [], [] # Changed final_rec to final_rec_tuples
-    if len(boxes_for_valid_crops) != len(rec_res):
-        print(f"  DEBUG OCR SYS: Mismatch! Boxes count {len(boxes_for_valid_crops)} != Recognizer results count {len(rec_res)}. This should not happen.")
-        # Handle this gracefully, perhaps by taking the minimum length
-        min_len = min(len(boxes_for_valid_crops), len(rec_res))
-        boxes_to_iterate = boxes_for_valid_crops[:min_len]
-        rec_res_to_iterate = rec_res[:min_len]
-    else:
-        boxes_to_iterate = boxes_for_valid_crops
-        rec_res_to_iterate = rec_res
-    print(f"  DEBUG OCR SYS: Filtering {len(rec_res_to_iterate)} recognition results with drop_score: {self.drop_score}") # DEBUG
-    for i, (box, res_tuple) in enumerate(zip(boxes_to_iterate, rec_res_to_iterate)):
-        txt, score = res_tuple
-        print(f"    DEBUG OCR SYS: Box {i+1} - Recognized: '{txt}', Score: {score:.4f}") # DEBUG
-        if score >= self.drop_score and txt and not mdr_is_whitespace(txt): # Added check for non-empty/whitespace
-            final_boxes.append(box)
-            final_rec_tuples.append(res_tuple)
-        else:
-            reason = []
-            if score < self.drop_score: reason.append(f"score {score:.2f} < {self.drop_score}")
-            if not txt: reason.append("empty text")
-            if txt and mdr_is_whitespace(txt): reason.append("whitespace text")
-            print(f"      DEBUG OCR SYS: Box {i+1} DROPPED. Reason(s): {', '.join(reason)}") # DEBUG
-    if self.save_crop: # This is false by default in _MDR_ONNXParams
-        # Ensure crop_dir exists if you enable this
-        # self._save_crops(valid_crops, rec_res) # Pass original rec_res to save all attempts if needed
-        pass
-    print(f"  DEBUG OCR SYS: Returning {len(final_boxes)} final boxes and {len(final_rec_tuples)} final recognition results.") # DEBUG
-    return final_boxes, final_rec_tuples # Ensure this returns tuples of (text, score)
   def _save_crops(self, crops, recs):
       mdr_ensure_directory(self.crop_dir)
@@ -1645,35 +1611,34 @@ class MDROcrEngine:
         except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
     return self._text_system
   def find_text_fragments(self, image_np: np.ndarray) -> Generator[MDROcrFragment, None, None]:
     """Finds and recognizes text fragments in a NumPy image (BGR)."""
     system = self._get_system()
     if system is None:
-        print("  DEBUG OCR Engine: MDR OCR System unavailable. No fragments will be found.") # DEBUG
-        return # Empty generator
-    img_for_system = self._preprocess(image_np) # _preprocess handles BGR/BGRA/GRAY to BGR
-    print(f"  DEBUG OCR Engine: Image preprocessed for TextSystem. Shape: {img_for_system.shape}") # DEBUG
     try:
-        # system.__call__ should return (list_of_boxes, list_of_tuples_text_score)
-        boxes, recs = system(img_for_system) # recs should be list of (text, score)
     except Exception as e:
-        print(f"  DEBUG OCR Engine: Error during TextSystem prediction: {e}") # DEBUG
         import traceback
         traceback.print_exc()
-        return # Empty generator
     if not boxes or not recs:
-        print(f"  DEBUG OCR Engine: TextSystem returned no boxes ({len(boxes)}) or no recs ({len(recs)}). No fragments generated.") # DEBUG
-        return # Empty generator
     if len(boxes) != len(recs):
-        print(f"  DEBUG OCR Engine: Mismatch between boxes ({len(boxes)}) and recs ({len(recs)}) from TextSystem. This is problematic.")
-        # Potentially try to recover by taking the minimum length, or just return
         return
-    print(f"  DEBUG OCR Engine: TextSystem returned {len(boxes)} boxes and {len(recs)} recognition results. Converting to MDROcrFragment.") # DEBUG
     fragments_generated_count = 0
     for i, (box_pts, rec_tuple) in enumerate(zip(boxes, recs)):
         if not isinstance(rec_tuple, (list, tuple)) or len(rec_tuple) != 2:
@@ -1681,25 +1646,26 @@ class MDROcrEngine:
             continue
         txt, conf = rec_tuple
-        # The filtering by drop_score and whitespace should have happened in _MDR_TextSystem
-        # But we can add a redundant check or rely on it.
-        # For MDROcrFragment, we just need valid text and geometry.
-        if not txt or mdr_is_whitespace(txt): # Basic check, though system should filter
-            # print(f"    DEBUG OCR Engine: Fragment {i} has empty/whitespace text after system call. Text: '{txt}'. Skipping.")
             continue
-        pts = [(float(p[0]), float(p[1])) for p in box_pts]
-        if len(pts) == 4:
-            r = MDRRectangle(lt=pts[0], rt=pts[1], rb=pts[2], lb=pts[3])
-            if r.is_valid and r.area > 1: # Ensure valid geometry
-                yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
-                fragments_generated_count += 1
             # else:
-                # print(f"    DEBUG OCR Engine: Fragment {i} has invalid/small rectangle. Area: {r.area}. Valid: {r.is_valid}. Skipping.")
-        # else:
-            # print(f"    DEBUG OCR Engine: Fragment {i} box_pts not length 4: {len(pts)}. Skipping.")
-    print(f"  DEBUG OCR Engine: Generated {fragments_generated_count} MDROcrFragment objects.") # DEBUG
   def _preprocess(self, img: np.ndarray) -> np.ndarray:
     if len(img.shape) == 3 and img.shape[2] == 4:
@@ -1729,47 +1695,101 @@ def mdr_prepare_reader_inputs(inputs: Dict[str, torch.Tensor], model: LayoutLMv3
     return {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
 def mdr_parse_reader_logits(logits: torch.Tensor, length: int) -> List[int]:
-    print(f"mdr_parse_reader_logits: Called with logits shape: {logits.shape}, length: {length}") # ADDED
     if length == 0:
-        print("mdr_parse_reader_logits: length is 0, returning empty list.") # ADDED
         return []
-    # --- Debugging the slice ---
-    print(f"mdr_parse_reader_logits: Attempting to slice logits with [1 : {length + 1}, :{length}]") # ADDED
     try:
         rel_logits = logits[1 : length + 1, :length]
-        print(f"mdr_parse_reader_logits: rel_logits shape: {rel_logits.shape}") # ADDED
     except IndexError as e:
-        print(f"mdr_parse_reader_logits: IndexError during rel_logits slicing! Error: {e}") # ADDED
         import traceback
         traceback.print_exc()
-        raise # Re-raise to see it in FastAPI error handling if possible
     orders = rel_logits.argmax(dim=1).tolist()
-    print(f"mdr_parse_reader_logits: Initial orders calculated. Count: {len(orders)}") # ADDED
-    loop_count = 0 # ADDED to detect potential infinite loops
-    max_loops = length * length # A generous upper bound for loop iterations; adjust if needed
     while True:
         loop_count += 1
         if loop_count > max_loops:
-            print(f"mdr_parse_reader_logits: Exceeded max_loops ({max_loops}), breaking while loop to prevent infinite loop.") # ADDED
-            break # Safety break
-        print(f"mdr_parse_reader_logits: While loop iteration: {loop_count}") # ADDED
         conflicts = defaultdict(list)
         [conflicts[order].append(idx) for idx, order in enumerate(orders)]
-        conflicting_orders = {o: idxs for o, idxs in conflicts.items() if len(idxs) > 1}
-        if not conflicting_orders:
-            print("mdr_parse_reader_logits: No conflicting orders, breaking while loop.") # ADDED
             break
-        print(f"mdr_parse_reader_logits: Found {len(conflicting_orders)} conflicting orders.") # ADDED
-        # ... (rest of the conflict resolution logic) ...
-        # Consider adding prints inside the inner loops too if it still hangs here.
-    print(f"mdr_parse_reader_logits: While loop finished after {loop_count} iterations. Returning {len(orders)} orders.") # ADDED
     return orders
 # --- MDR Layout Reading Engine ---
@@ -1790,45 +1810,30 @@ class MDRLayoutReader:
         self._device = "cpu"
         print("MDRLayoutReader: CUDA not available. Setting device to cpu.")
   def _get_model(self) -> LayoutLMv3ForTokenClassification | None:
     if self._model is None:
-      cache = mdr_ensure_directory(self._model_path)
       name = "microsoft/layoutlmv3-base"
-      # The h_path was for a specific fine-tuned model 'hantian/layoutreader'
-      # If you intend to use a specific fine-tuned head, ensure it's correctly downloaded
-      # and compatible. For now, let's assume microsoft/layoutlmv3-base is the target
-      # if a more specific one isn't found or intended.
-      # The original code had a slightly confusing h_path logic.
-      # Let's simplify to prioritize a local cache of "microsoft/layoutlmv3-base"
-      # or a specific model if `self._model_path` points to a complete model directory.
-      model_load_path = name # Default to Hugging Face model name
-      local_files_only_flag = False
-      # Check if self._model_path is a directory containing a full model
-      # (e.g., config.json, pytorch_model.bin)
-      # This part of the original logic for 'h_path' was a bit specific.
-      # For LayoutLMv3, usually, you'd just use "microsoft/layoutlmv3-base"
-      # and let transformers handle caching, or provide a path to a fully saved model.
-      # Let's assume the primary goal is to load "microsoft/layoutlmv3-base"
-      # and allow it to be cached in `self._model_path/layoutreader`
-      # The `cache_dir` argument to `from_pretrained` handles this.
-      print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{model_load_path}'. Cache dir: {cache}")
       try:
         self._model = LayoutLMv3ForTokenClassification.from_pretrained(
-            model_load_path,
-            cache_dir=cache, # Transformers will cache here
-            local_files_only=local_files_only_flag, # Set to True if you want to force local only after first download
-            num_labels=_MDR_MAX_LEN+1 # This is for the classification head
         )
         # Explicitly move model to the determined device
-        self._model.to(torch.device(self._device)) # MODIFIED LINE
         self._model.eval()
-        print(f"MDR LayoutReader model '{model_load_path}' loaded successfully on device: {self._model.device}.") # Use model.device
       except Exception as e:
-        print(f"ERROR loading MDR LayoutReader model '{model_load_path}': {e}")
         import traceback
         traceback.print_exc()
         self._model = None
@@ -1836,68 +1841,47 @@ class MDRLayoutReader:
   def determine_reading_order(self, layouts: list[MDRLayoutElement], size: tuple[int, int]) -> list[MDRLayoutElement]:
     w, h = size
-    if w <= 0 or h <= 0 or not layouts:
-        print("MDRLayoutReader: Invalid size or no layouts, returning early.")
         return layouts
     model = self._get_model()
-    if model is None: # Fallback geometric sort
-        print("MDRLayoutReader: Model is None, using fallback geometric sort.")
-        layouts.sort(key=lambda l: (l.rect.lt[1], l.rect.lt[0]))
-        nfo = 0
-        for l in layouts:
-            l.fragments.sort(key=lambda f: (f.rect.lt[1], f.rect.lt[0]))
-            [setattr(f, 'order', i + nfo) for i, f in enumerate(l.fragments)]
-            nfo += len(l.fragments)
-        return layouts
-    print("MDRLayoutReader: Preparing bboxes...") # ADDED
     bbox_list = self._prepare_bboxes(layouts, w, h)
-    print(f"MDRLayoutReader: Prepared {len(bbox_list) if bbox_list else 'None or 0'} bboxes.")
-    if bbox_list is None or len(bbox_list) == 0:
-        print("MDRLayoutReader: No bboxes to process, returning layouts.")
         return layouts
-    l_size = 1000.0
-    xs = l_size / float(w)
-    ys = l_size / float(h)
-    scaled_bboxes = []
-    for bbox in bbox_list:
-        x0, y0, x1, y1 = bbox.value
-        sx0 = max(0, min(l_size - 1, round(x0 * xs)))
-        sy0 = max(0, min(l_size - 1, round(y0 * ys)))
-        sx1 = max(0, min(l_size - 1, round(x1 * xs)))
-        sy1 = max(0, min(l_size - 1, round(y1 * ys)))
-        scaled_bboxes.append([min(sx0, sx1), min(sy0, sy1), max(sx0, sx1), max(sy0, sy1)])
-    print("MDRLayoutReader: Scaled bboxes prepared. Count: ", len(scaled_bboxes))
-    orders = []
     try:
         with torch.no_grad():
-            print("MDRLayoutReader: Creating reader inputs...") # ADDED
-            inputs = mdr_boxes_to_reader_inputs(scaled_bboxes)
-            print("MDRLayoutReader: Preparing inputs for model device...") # ADDED
             inputs = mdr_prepare_reader_inputs(inputs, model)
-            print("MDRLayoutReader: Running model inference...") # ADDED
             logits = model(**inputs).logits.cpu().squeeze(0)
-            print("MDRLayoutReader: Model inference complete. Parsing logits...") # ADDED
             orders = mdr_parse_reader_logits(logits, len(bbox_list))
-            print(f"MDRLayoutReader: Logits parsed. Orders count: {len(orders)}") # ADDED
     except Exception as e:
         print(f"MDR LayoutReader prediction error: {e}")
         import traceback
-        traceback.print_exc() # ADDED for full traceback
-        return layouts # Fallback
-    if len(orders) != len(bbox_list):
-        print(f"MDR LayoutReader order mismatch. Orders: {len(orders)}, BBoxes: {len(bbox_list)}")
-        return layouts # Fallback
-    for i, order_idx in enumerate(orders):
-        bbox_list[i].order = order_idx
     print("MDRLayoutReader: Applying order...")
-    result_layouts = self._apply_order(layouts, bbox_list)
-    print("MDRLayoutReader: Order applied. Returning layouts.") # ADDED
     return result_layouts
   def _prepare_bboxes(self, layouts: list[MDRLayoutElement], w: int, h: int) -> list[_MDR_ReaderBBox] | None:

       scores.append(score)
     return boxes, scores
+# In class _MDR_DBPostProcess:
   def _boxes_from_bitmap(self, pred, bmp, dw, dh): # pred is the probability map, bmp is the binarized map
     h, w = bmp.shape
+    # ADDED: More detailed logging
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Processing bitmap of shape {h}x{w} for original dimensions {dw:.1f}x{dh:.1f}.")
     contours, _ = cv2.findContours((bmp * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
     num_contours_found = len(contours)
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Found {num_contours_found} raw contours.")
     num_contours_to_process = min(num_contours_found, self.max_cand)
     if num_contours_found > self.max_cand:
+        print(f"    DEBUG OCR: _boxes_from_bitmap: Processing limited to {self.max_cand} contours (max_candidates).")
     boxes, scores = [], []
     kept_boxes_count = 0
         contour = contours[i]
         pts_mini_box, sside = self._get_mini_boxes(contour)
         if sside < self.min_sz:
+            # print(f"      DEBUG OCR: Contour {i} too small (sside {sside:.2f} < min_sz {self.min_sz}). Skipping.") # Can be too verbose
             continue
         pts_arr = np.array(pts_mini_box)
         current_score = self._box_score_fast(pred, pts_arr.reshape(-1, 2)) if self.score_m == "fast" else self._box_score_slow(pred, contour)
         if self.box_thresh > current_score:
             continue
         try:
             box_unclipped = self._unclip(pts_arr, self.unclip_r).reshape(-1, 1, 2)
         except Exception as e_unclip:
             # print(f"      DEBUG OCR: Contour {i} unclip failed: {e_unclip}. Skipping.") # Can be too verbose
         box_final, sside_final = self._get_mini_boxes(box_unclipped)
         if sside_final < self.min_sz + 2: # min_sz is 3
+            # print(f"      DEBUG OCR: Contour {i} final size after unclip too small (sside_final {sside_final:.2f} < {self.min_sz + 2}). Skipping.") # Can be too verbose
             continue
         box_final_arr = np.array(box_final)
         box_final_arr[:, 0] = np.clip(np.round(box_final_arr[:, 0] / w * dw), 0, dw)
         box_final_arr[:, 1] = np.clip(np.round(box_final_arr[:, 1] / h * dh), 0, dh)
         boxes.append(box_final_arr.astype("int32"))
         scores.append(current_score)
         kept_boxes_count +=1
+    print(f"    DEBUG OCR: _boxes_from_bitmap: Kept {kept_boxes_count} boxes after all filtering (size, score, unclip). Configured box_thresh: {self.box_thresh}, min_sz: {self.min_sz}.")
     return np.array(boxes, dtype="int32"), scores
   def _unclip(self, box, ratio):
     cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype("int32"), 1)
     return cv2.mean(bmp[ymin : ymax + 1, xmin : xmax + 1], mask)[0] if np.sum(mask) > 0 else 0.0
   def __call__(self, outs_dict, shape_list):
     pred = outs_dict['maps'][:, 0, :, :]
     seg = pred > self.thresh
+    # ADDED: More detailed logging
+    print(f"  DEBUG OCR: _MDR_DBPostProcess: pred map shape: {pred.shape}, seg map shape: {seg.shape}, configured thresh: {self.thresh}")
+    print(f"  DEBUG OCR: _MDR_DBPostProcess: Number of pixels in seg map above threshold (sum of all batches): {np.sum(seg)}")
     boxes_batch = []
     for batch_idx in range(pred.shape[0]):
+        # MODIFIED: Ensure sh, sw are floats for division if they come from shape_list
+        sh_orig, sw_orig, rh_ratio, rw_ratio = shape_list[batch_idx]
+        # The dw, dh for _boxes_from_bitmap should be the original image dimensions before DetResizeForTest
+        # shape_list contains [src_h, src_w, ratio_h, ratio_w]
+        # So dw = src_w, dh = src_h
+        dw_orig, dh_orig = sw_orig, sh_orig
         current_pred_map = pred[batch_idx]
         current_seg_map = seg[batch_idx]
         mask = cv2.dilate(np.array(current_seg_map).astype(np.uint8), self.dila_k) if self.dila_k is not None else current_seg_map
+        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Input shape to postproc (orig) {dh_orig:.1f}x{dw_orig:.1f}. Sum of mask pixels: {np.sum(mask)}")
         if self.box_t == 'poly':
+            boxes, scores = self._polygons_from_bitmap(current_pred_map, mask, dw_orig, dh_orig)
         elif self.box_t == 'quad':
+            boxes, scores = self._boxes_from_bitmap(current_pred_map, mask, dw_orig, dh_orig) # Pass original dimensions
         else:
             raise ValueError("box_type must be 'quad' or 'poly'")
+        print(f"  DEBUG OCR: _MDR_DBPostProcess (batch {batch_idx}): Found {len(boxes)} boxes from bitmap processing.")
         boxes_batch.append({'points': boxes})
     return boxes_batch
           new_boxes.append(box)
       return np.array(new_boxes)
+# In class _MDR_TextDetector:
   def __call__(self, img):
     ori_im = img.copy()
     data = {"image": img}
+    print(f"  DEBUG OCR: _MDR_TextDetector: Original image shape: {ori_im.shape}")
+    # Preprocessing
+    try:
+        data = mdr_ocr_transform(data, self.pre_op)
+    except Exception as e_preproc:
+        print(f"  DEBUG OCR: _MDR_TextDetector: Error during preprocessing (mdr_ocr_transform): {e_preproc}")
+        import traceback
+        traceback.print_exc()
+        return np.array([]) # Return empty array on failure
     if data is None:
+        print("  DEBUG OCR: _MDR_TextDetector: Preprocessing (mdr_ocr_transform) returned None. No text will be detected.")
+        return np.array([])
+    processed_img, shape_list = data # shape_list is [src_h, src_w, ratio_h, ratio_w]
     if processed_img is None:
+        print("  DEBUG OCR: _MDR_TextDetector: Processed image after transform is None. No text will be detected.")
+        return np.array([])
+    print(f"  DEBUG OCR: _MDR_TextDetector: Processed image shape for ONNX: {processed_img.shape}, shape_list: {shape_list}")
     img_for_onnx = np.expand_dims(processed_img, axis=0)
     shape_list_for_onnx = np.expand_dims(shape_list, axis=0)
+    img_for_onnx = img_for_onnx.copy()
     inputs = self.get_input_feed(self.input_name, img_for_onnx)
+    print(f"  DEBUG OCR: _MDR_TextDetector: Running ONNX inference for text detection...")
     try:
         outputs = self.sess.run(self.output_name, input_feed=inputs)
+    except Exception as e_infer:
+        print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference for detection failed: {e_infer}")
         import traceback
         traceback.print_exc()
+        return np.array([]) # Return empty array on failure
+    print(f"  DEBUG OCR: _MDR_TextDetector: ONNX inference done. Output map shape: {outputs[0].shape}")
     preds = {"maps": outputs[0]}
+    try:
+        post_res = self.post_op(preds, shape_list_for_onnx)
+    except Exception as e_postproc:
+        print(f"  DEBUG OCR: _MDR_TextDetector: Error during DBPostProcess: {e_postproc}")
+        import traceback
+        traceback.print_exc()
+        return np.array([])
+    if not post_res or not post_res[0].get('points'):
+        print("  DEBUG OCR: _MDR_TextDetector: DBPostProcess returned no points.")
+        return np.array([])
     boxes_from_post = post_res[0]['points']
+    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes from DBPostProcess before final filtering: {len(boxes_from_post)}")
+    if not isinstance(boxes_from_post, (list, np.ndarray)) or len(boxes_from_post) == 0: # Check if it's empty or not list-like
+        print("  DEBUG OCR: _MDR_TextDetector: No boxes from DBPostProcess to filter.")
+        return np.array([])
     if self.args.det_box_type == 'poly':
         final_boxes = self._filter_poly(boxes_from_post, ori_im.shape)
     else: # 'quad'
         final_boxes = self._filter_quad(boxes_from_post, ori_im.shape)
+    print(f"  DEBUG OCR: _MDR_TextDetector: Boxes after final poly/quad filtering: {len(final_boxes)}")
     return final_boxes
 class _MDR_ClsPostProcess:
     self.input_name = self.get_input_name(self.sess)
     self.output_name = self.get_output_name(self.sess)
+# In class _MDR_TextRecognizer
   def _resize_norm(self, img, max_r): # img is a single crop
     imgC, imgH, imgW = self.shape # e.g., (3, 48, 320)
     h_orig, w_orig = img.shape[:2]
+    # ADDED: Log input crop shape
     print(f"    DEBUG RECOGNIZER: _resize_norm input crop shape: ({h_orig}, {w_orig}), target shape: {self.shape}, max_r_batch: {max_r:.2f}")
     if h_orig == 0 or w_orig == 0:
+        print(f"    DEBUG RECOGNIZER: _resize_norm received zero-dimension crop ({h_orig}x{w_orig}). Returning zeros.")
         return np.zeros((imgC, imgH, imgW), dtype=np.float32)
     r_current = w_orig / float(h_orig)
     tw = min(imgW, int(ceil(imgH * r_current)))
+    tw = max(1, tw)
     print(f"    DEBUG RECOGNIZER: _resize_norm calculated target width (tw): {tw} for target height (imgH): {imgH}")
     try:
+        resized = cv2.resize(img, (tw, imgH))
+    except cv2.error as e_resize: # Catch specific cv2 error
+        print(f"    DEBUG RECOGNIZER: _resize_norm cv2.resize failed: {e_resize}. Original shape ({h_orig},{w_orig}), target ({tw},{imgH}). Returning zeros.")
         return np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    except Exception as e_resize_general: # Catch any other unexpected error
+        print(f"    DEBUG RECOGNIZER: _resize_norm general error during resize: {e_resize_general}. Original shape ({h_orig},{w_orig}), target ({tw},{imgH}). Returning zeros.")
+        import traceback
+        traceback.print_exc()
+        return np.zeros((imgC, imgH, imgW), dtype=np.float32)
     resized = resized.astype("float32")
+    if imgC == 1 and len(resized.shape) == 3:
         resized = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
+    if len(resized.shape) == 2:
+        resized = resized[:, :, np.newaxis] # Add channel dim if grayscale
+    # Ensure resized has 3 channels if imgC is 3, even if input was grayscale
+    if imgC == 3 and resized.shape[2] == 1:
+        resized = cv2.cvtColor(resized, cv2.COLOR_GRAY2BGR)
+    resized = resized.transpose((2, 0, 1)) / 255.0
+    resized -= 0.5
+    resized /= 0.5
     padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding[:, :, 0:tw] = resized
     print(f"    DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
+    # ADDED: Log normalized crop properties
+    min_px, max_px, mean_px = np.min(padding), np.max(padding), np.mean(padding)
     print(f"    DEBUG RECOGNIZER: Normalized Crop Properties (before ONNX): "
+          f"dtype: {padding.dtype}, "
+          f"MinPx: {min_px:.4f}, "
+          f"MaxPx: {max_px:.4f}, "
+          f"MeanPx: {mean_px:.4f}")
     if np.all(padding == 0):
         print("    DEBUG RECOGNIZER: WARNING - Normalized image is all zeros!")
+    elif np.abs(max_px - min_px) < 1e-6 : # Check if all elements are (close to) the same
+         print(f"    DEBUG RECOGNIZER: WARNING - Normalized image is a constant value: {mean_px:.4f}")
     return padding
   def __call__(self, img_list):
     try: return list(sorted(boxes, key=key))
     except: return list(boxes) # Fallback
+# In class _MDR_TextRecognizer
+  def _resize_norm(self, img, max_r): # img is a single crop
+    imgC, imgH, imgW = self.shape # e.g., (3, 48, 320)
+    h_orig, w_orig = img.shape[:2]
+    # ADDED: Log input crop shape
+    print(f"    DEBUG RECOGNIZER: _resize_norm input crop shape: ({h_orig}, {w_orig}), target shape: {self.shape}, max_r_batch: {max_r:.2f}")
+    if h_orig == 0 or w_orig == 0:
+        print(f"    DEBUG RECOGNIZER: _resize_norm received zero-dimension crop ({h_orig}x{w_orig}). Returning zeros.")
+        return np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    r_current = w_orig / float(h_orig)
+    tw = min(imgW, int(ceil(imgH * r_current)))
+    tw = max(1, tw)
+    print(f"    DEBUG RECOGNIZER: _resize_norm calculated target width (tw): {tw} for target height (imgH): {imgH}")
     try:
+        resized = cv2.resize(img, (tw, imgH))
+    except cv2.error as e_resize: # Catch specific cv2 error
+        print(f"    DEBUG RECOGNIZER: _resize_norm cv2.resize failed: {e_resize}. Original shape ({h_orig},{w_orig}), target ({tw},{imgH}). Returning zeros.")
+        return np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    except Exception as e_resize_general: # Catch any other unexpected error
+        print(f"    DEBUG RECOGNIZER: _resize_norm general error during resize: {e_resize_general}. Original shape ({h_orig},{w_orig}), target ({tw},{imgH}). Returning zeros.")
+        import traceback
+        traceback.print_exc()
+        return np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    resized = resized.astype("float32")
+    if imgC == 1 and len(resized.shape) == 3:
+        resized = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
+    if len(resized.shape) == 2:
+        resized = resized[:, :, np.newaxis] # Add channel dim if grayscale
+    # Ensure resized has 3 channels if imgC is 3, even if input was grayscale
+    if imgC == 3 and resized.shape[2] == 1:
+        resized = cv2.cvtColor(resized, cv2.COLOR_GRAY2BGR)
+    resized = resized.transpose((2, 0, 1)) / 255.0
+    resized -= 0.5
+    resized /= 0.5
+    padding = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding[:, :, 0:tw] = resized
+    print(f"    DEBUG RECOGNIZER: _resize_norm output padded shape: {padding.shape}")
+    # ADDED: Log normalized crop properties
+    min_px, max_px, mean_px = np.min(padding), np.max(padding), np.mean(padding)
+    print(f"    DEBUG RECOGNIZER: Normalized Crop Properties (before ONNX): "
+          f"dtype: {padding.dtype}, "
+          f"MinPx: {min_px:.4f}, "
+          f"MaxPx: {max_px:.4f}, "
+          f"MeanPx: {mean_px:.4f}")
+    if np.all(padding == 0):
+        print("    DEBUG RECOGNIZER: WARNING - Normalized image is all zeros!")
+    elif np.abs(max_px - min_px) < 1e-6 : # Check if all elements are (close to) the same
+         print(f"    DEBUG RECOGNIZER: WARNING - Normalized image is a constant value: {mean_px:.4f}")
+    return padding
   def _save_crops(self, crops, recs):
       mdr_ensure_directory(self.crop_dir)
         except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
     return self._text_system
+# In class MDROcrEngine:
   def find_text_fragments(self, image_np: np.ndarray) -> Generator[MDROcrFragment, None, None]:
     """Finds and recognizes text fragments in a NumPy image (BGR)."""
     system = self._get_system()
     if system is None:
+        print("  DEBUG OCR Engine: MDR OCR System unavailable. No fragments will be found.")
+        return
+    img_for_system = self._preprocess(image_np)
+    print(f"  DEBUG OCR Engine: Image preprocessed for TextSystem. Shape: {img_for_system.shape}")
     try:
+        boxes, recs = system(img_for_system)
     except Exception as e:
+        print(f"  DEBUG OCR Engine: Error during TextSystem prediction: {e}")
         import traceback
         traceback.print_exc()
+        return
     if not boxes or not recs:
+        print(f"  DEBUG OCR Engine: TextSystem returned no boxes ({len(boxes) if boxes is not None else 'None'}) or no recs ({len(recs) if recs is not None else 'None'}). No fragments generated.")
+        return
     if len(boxes) != len(recs):
+        print(f"  DEBUG OCR Engine: Mismatch between boxes ({len(boxes)}) and recs ({len(recs)}) from TextSystem. This is problematic. No fragments generated.")
         return
+    print(f"  DEBUG OCR Engine: TextSystem returned {len(boxes)} boxes and {len(recs)} recognition results. Converting to MDROcrFragment.")
     fragments_generated_count = 0
     for i, (box_pts, rec_tuple) in enumerate(zip(boxes, recs)):
         if not isinstance(rec_tuple, (list, tuple)) or len(rec_tuple) != 2:
             continue
         txt, conf = rec_tuple
+        if not txt or mdr_is_whitespace(txt):
+            # print(f"    DEBUG OCR Engine: Fragment {i} has empty/whitespace text after system call. Text: '{txt}'. Skipping.") # Already logged in TextSystem
             continue
+        try:
+            pts = [(float(p[0]), float(p[1])) for p in box_pts]
+            if len(pts) == 4:
+                r = MDRRectangle(lt=pts[0], rt=pts[1], rb=pts[2], lb=pts[3])
+                if r.is_valid and r.area > 1:
+                    yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
+                    fragments_generated_count += 1
+                # else:
+                    # print(f"    DEBUG OCR Engine: Fragment {i} has invalid/small rectangle. Area: {r.area:.2f}. Valid: {r.is_valid}. Skipping.")
             # else:
+                # print(f"    DEBUG OCR Engine: Fragment {i} box_pts not length 4: {len(pts)}. Skipping.")
+        except Exception as e_frag:
+            print(f"    DEBUG OCR Engine: Error creating MDROcrFragment for item {i}: {e_frag}")
+            continue
+    print(f"  DEBUG OCR Engine: Generated {fragments_generated_count} MDROcrFragment objects.")
   def _preprocess(self, img: np.ndarray) -> np.ndarray:
     if len(img.shape) == 3 and img.shape[2] == 4:
     return {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
 def mdr_parse_reader_logits(logits: torch.Tensor, length: int) -> List[int]:
+    print(f"mdr_parse_reader_logits: Called with logits shape: {logits.shape}, length: {length}")
     if length == 0:
+        print("mdr_parse_reader_logits: length is 0, returning empty list.")
         return []
+    print(f"mdr_parse_reader_logits: Attempting to slice logits with [1 : {length + 1}, :{length}]")
     try:
         rel_logits = logits[1 : length + 1, :length]
+        print(f"mdr_parse_reader_logits: rel_logits shape: {rel_logits.shape}")
     except IndexError as e:
+        print(f"mdr_parse_reader_logits: IndexError during rel_logits slicing! Error: {e}")
         import traceback
         traceback.print_exc()
+        # Depending on desired behavior, either raise or return empty/fallback
+        return list(range(length)) # Fallback to sequential order if slicing fails
     orders = rel_logits.argmax(dim=1).tolist()
+    print(f"mdr_parse_reader_logits: Initial orders calculated. Count: {len(orders)}")
+    # ADDED: Loop safeguard
+    loop_count = 0
+    # Max loops: if N items, N^2 is a generous limit for pairwise comparisons/adjustments.
+    # For N=33, N^2 = 1089. For N=21, N^2 = 441. This matches the logs.
+    # A tighter bound might be N * (N-1) / 2 or N * some_factor.
+    # Let's use N * N as seen in logs, or a fixed large number if N is small.
+    max_loops = max(50, length * length) # Ensure at least 50 loops for small N
     while True:
         loop_count += 1
         if loop_count > max_loops:
+            print(f"mdr_parse_reader_logits: Exceeded max_loops ({max_loops}), breaking while loop to prevent infinite loop.")
+            break
+        # print(f"mdr_parse_reader_logits: While loop iteration: {loop_count}") # Can be too verbose
         conflicts = defaultdict(list)
         [conflicts[order].append(idx) for idx, order in enumerate(orders)]
+        # Filter to find actual conflicting orders (where multiple original indices map to the same target order)
+        conflicting_orders_map = {o: idxs for o, idxs in conflicts.items() if len(idxs) > 1}
+        if not conflicting_orders_map:
+            # print("mdr_parse_reader_logits: No conflicting orders, breaking while loop.") # Verbose
             break
+        # Log only if there are actual conflicts to resolve
+        if loop_count == 1 or loop_count % 10 == 0 : # Log first and every 10th iteration with conflicts
+             print(f"mdr_parse_reader_logits: While loop iteration: {loop_count}. Found {len(conflicting_orders_map)} conflicting orders.")
+        for order_val, c_idxs in conflicting_orders_map.items():
+            # This logic seems to pick the one with the highest score for that conflicting order.
+            # It might need more sophisticated tie-breaking if scores are identical or very close.
+            # The original logic was:
+            # best_idx = -1; max_score = -float('inf')
+            # for c_idx in c_idxs:
+            #     score = rel_logits[c_idx, order_val].item()
+            #     if score > max_score: max_score = score; best_idx = c_idx
+            # for c_idx in c_idxs:
+            #     if c_idx != best_idx: orders[c_idx] = -1 # Mark for re-evaluation or different assignment
+            # Simpler approach: keep the first one, mark others to be reassigned.
+            # This might not be optimal but could break cycles.
+            # A more robust solution might involve graph-based cycle detection or a different assignment strategy.
+            # For now, let's stick to a slight modification of the implied original logic:
+            # The one with the highest confidence for *that specific order_val* keeps it.
+            # Others get their order reset to their own index (diagonal) or -1 to be re-evaluated.
+            if not c_idxs: continue
+            best_c_idx_for_this_order = -1
+            max_confidence_for_this_order = -float('inf')
+            for current_c_idx in c_idxs:
+                confidence = rel_logits[current_c_idx, order_val].item()
+                if confidence > max_confidence_for_this_order:
+                    max_confidence_for_this_order = confidence
+                    best_c_idx_for_this_order = current_c_idx
+            # Now, for all conflicting indices for this 'order_val',
+            # if they are not the 'best_c_idx_for_this_order',
+            # they need a new order. A simple strategy is to make them point to themselves initially.
+            # Or, find their next best alternative.
+            for current_c_idx in c_idxs:
+                if current_c_idx != best_c_idx_for_this_order:
+                    # Option 1: Reset to self (might not resolve complex cycles)
+                    # orders[current_c_idx] = current_c_idx
+                    # Option 2: Find next best order for this current_c_idx, excluding the conflicting 'order_val'
+                    # Create a temporary copy of its logits row, set the conflicting order's logit to -inf
+                    temp_logits_row = rel_logits[current_c_idx, :].clone()
+                    temp_logits_row[order_val] = -float('inf')
+                    orders[current_c_idx] = temp_logits_row.argmax().item()
+    print(f"mdr_parse_reader_logits: While loop finished after {loop_count} iterations. Returning {len(orders)} orders.")
     return orders
 # --- MDR Layout Reading Engine ---
         self._device = "cpu"
         print("MDRLayoutReader: CUDA not available. Setting device to cpu.")
+# In class MDRLayoutReader:
   def _get_model(self) -> LayoutLMv3ForTokenClassification | None:
     if self._model is None:
+      cache = mdr_ensure_directory(self._model_path) # This should be self._model_path / "layoutreader"
+      # Correct cache path for transformers
+      layoutreader_cache_dir = Path(self._model_dir) / "layoutreader" # Assuming _model_dir is the main one
+      mdr_ensure_directory(str(layoutreader_cache_dir))
       name = "microsoft/layoutlmv3-base"
+      print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{name}'. Cache dir: {layoutreader_cache_dir}")
       try:
         self._model = LayoutLMv3ForTokenClassification.from_pretrained(
+            name, # Use the HF model name
+            cache_dir=str(layoutreader_cache_dir),
+            local_files_only=False, # Allow download on first run
+            num_labels=_MDR_MAX_LEN+1
         )
         # Explicitly move model to the determined device
+        self._model.to(torch.device(self._device)) # ENSURE THIS LINE IS PRESENT AND CORRECT
         self._model.eval()
+        print(f"MDR LayoutReader model '{name}' loaded successfully on device: {self._model.device}.")
       except Exception as e:
+        print(f"ERROR loading MDR LayoutReader model '{name}': {e}")
         import traceback
         traceback.print_exc()
         self._model = None
   def determine_reading_order(self, layouts: list[MDRLayoutElement], size: tuple[int, int]) -> list[MDRLayoutElement]:
     w, h = size
+    if w <= 0 or h <= 0: # ADDED check for invalid size
+        print("MDRLayoutReader: Invalid image size (w or h <= 0), returning layouts as is.")
         return layouts
+    if not layouts:
+        print("MDRLayoutReader: No layouts to process, returning empty list.")
+        return [] # Return empty list if no layouts
     model = self._get_model()
+    # ... (rest of the method, add logging as needed) ...
+    print("MDRLayoutReader: Preparing bboxes...")
     bbox_list = self._prepare_bboxes(layouts, w, h)
+    if bbox_list is None or len(bbox_list) == 0: # Check if bbox_list is None or empty
+        print("MDRLayoutReader: No bboxes prepared from layouts, returning layouts as is (possibly sorted geometrically).")
+        # Fallback geometric sort if no bboxes could be prepared
+        layouts.sort(key=lambda l: (l.rect.lt[1], l.rect.lt[0]))
         return layouts
+    print(f"MDRLayoutReader: Prepared {len(bbox_list)} bboxes.")
+    # ... (rest of the scaling and inference logic) ...
     try:
         with torch.no_grad():
+            print("MDRLayoutReader: Creating reader inputs...")
+            inputs = mdr_boxes_to_reader_inputs(scaled_bboxes) # scaled_bboxes comes from the loop above
+            print("MDRLayoutReader: Preparing inputs for model device...")
             inputs = mdr_prepare_reader_inputs(inputs, model)
+            print("MDRLayoutReader: Running model inference...")
             logits = model(**inputs).logits.cpu().squeeze(0)
+            print("MDRLayoutReader: Model inference complete. Parsing logits...")
             orders = mdr_parse_reader_logits(logits, len(bbox_list))
+            print(f"MDRLayoutReader: Logits parsed. Orders count: {len(orders)}")
     except Exception as e:
         print(f"MDR LayoutReader prediction error: {e}")
         import traceback
+        traceback.print_exc()
+        # Fallback geometric sort on error
+        layouts.sort(key=lambda l: (l.rect.lt[1], l.rect.lt[0]))
+        return layouts
+    # ... (rest of applying order) ...
     print("MDRLayoutReader: Applying order...")
+    result_layouts = self._apply_order(layouts, bbox_list) # Ensure bbox_list has 'order' attribute set
+    print("MDRLayoutReader: Order applied. Returning layouts.")
     return result_layouts
   def _prepare_bboxes(self, layouts: list[MDRLayoutElement], w: int, h: int) -> list[_MDR_ReaderBBox] | None: