rodrigomasini commited on
Commit
1181510
·
verified ·
1 Parent(s): 4761c21

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +32 -15
mdr_pdf_parser.py CHANGED
@@ -1492,30 +1492,47 @@ def mdr_prepare_reader_inputs(inputs: Dict[str, torch.Tensor], model: LayoutLMv3
1492
  return {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
1493
 
1494
  def mdr_parse_reader_logits(logits: torch.Tensor, length: int) -> List[int]:
 
1495
  if length == 0:
 
1496
  return []
1497
- rel_logits = logits[1 : length + 1, :length]
 
 
 
 
 
 
 
 
 
 
 
1498
  orders = rel_logits.argmax(dim=1).tolist()
 
 
 
 
1499
  while True:
 
 
 
 
 
 
1500
  conflicts = defaultdict(list)
1501
  [conflicts[order].append(idx) for idx, order in enumerate(orders)]
1502
  conflicting_orders = {o: idxs for o, idxs in conflicts.items() if len(idxs) > 1}
 
1503
  if not conflicting_orders:
 
1504
  break
1505
- for order, idxs in conflicting_orders.items():
1506
- best_idx = -1
1507
- max_logit = -float('inf')
1508
- for idx in idxs:
1509
- logit = rel_logits[idx, order].item()
1510
- if logit > max_logit:
1511
- max_logit = logit
1512
- best_idx = idx
1513
- for idx in idxs:
1514
- if idx != best_idx:
1515
- orig_logit = rel_logits[idx, order].item()
1516
- rel_logits[idx, order] = -float('inf')
1517
- orders[idx] = rel_logits[idx, :].argmax().item()
1518
- rel_logits[idx, order] = orig_logit
1519
  return orders
1520
 
1521
  # --- MDR Layout Reading Engine ---
 
1492
  return {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
1493
 
1494
  def mdr_parse_reader_logits(logits: torch.Tensor, length: int) -> List[int]:
1495
+ print(f"mdr_parse_reader_logits: Called with logits shape: {logits.shape}, length: {length}") # ADDED
1496
  if length == 0:
1497
+ print("mdr_parse_reader_logits: length is 0, returning empty list.") # ADDED
1498
  return []
1499
+
1500
+ # --- Debugging the slice ---
1501
+ print(f"mdr_parse_reader_logits: Attempting to slice logits with [1 : {length + 1}, :{length}]") # ADDED
1502
+ try:
1503
+ rel_logits = logits[1 : length + 1, :length]
1504
+ print(f"mdr_parse_reader_logits: rel_logits shape: {rel_logits.shape}") # ADDED
1505
+ except IndexError as e:
1506
+ print(f"mdr_parse_reader_logits: IndexError during rel_logits slicing! Error: {e}") # ADDED
1507
+ import traceback
1508
+ traceback.print_exc()
1509
+ raise # Re-raise to see it in FastAPI error handling if possible
1510
+
1511
  orders = rel_logits.argmax(dim=1).tolist()
1512
+ print(f"mdr_parse_reader_logits: Initial orders calculated. Count: {len(orders)}") # ADDED
1513
+
1514
+ loop_count = 0 # ADDED to detect potential infinite loops
1515
+ max_loops = length * length # A generous upper bound for loop iterations; adjust if needed
1516
  while True:
1517
+ loop_count += 1
1518
+ if loop_count > max_loops:
1519
+ print(f"mdr_parse_reader_logits: Exceeded max_loops ({max_loops}), breaking while loop to prevent infinite loop.") # ADDED
1520
+ break # Safety break
1521
+
1522
+ print(f"mdr_parse_reader_logits: While loop iteration: {loop_count}") # ADDED
1523
  conflicts = defaultdict(list)
1524
  [conflicts[order].append(idx) for idx, order in enumerate(orders)]
1525
  conflicting_orders = {o: idxs for o, idxs in conflicts.items() if len(idxs) > 1}
1526
+
1527
  if not conflicting_orders:
1528
+ print("mdr_parse_reader_logits: No conflicting orders, breaking while loop.") # ADDED
1529
  break
1530
+
1531
+ print(f"mdr_parse_reader_logits: Found {len(conflicting_orders)} conflicting orders.") # ADDED
1532
+ # ... (rest of the conflict resolution logic) ...
1533
+ # Consider adding prints inside the inner loops too if it still hangs here.
1534
+
1535
+ print(f"mdr_parse_reader_logits: While loop finished after {loop_count} iterations. Returning {len(orders)} orders.") # ADDED
 
 
 
 
 
 
 
 
1536
  return orders
1537
 
1538
  # --- MDR Layout Reading Engine ---