Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +26 -4
mdr_pdf_parser.py
CHANGED
@@ -1542,9 +1542,13 @@ class MDRLayoutReader:
|
|
1542 |
def determine_reading_order(self, layouts: list[MDRLayoutElement], size: tuple[int, int]) -> list[MDRLayoutElement]:
|
1543 |
w, h = size
|
1544 |
if w <= 0 or h <= 0 or not layouts:
|
|
|
1545 |
return layouts
|
1546 |
-
|
1547 |
-
|
|
|
|
|
|
|
1548 |
layouts.sort(key=lambda l: (l.rect.lt[1], l.rect.lt[0]))
|
1549 |
nfo = 0
|
1550 |
for l in layouts:
|
@@ -1552,9 +1556,15 @@ class MDRLayoutReader:
|
|
1552 |
[setattr(f, 'order', i + nfo) for i, f in enumerate(l.fragments)]
|
1553 |
nfo += len(l.fragments)
|
1554 |
return layouts
|
|
|
|
|
1555 |
bbox_list = self._prepare_bboxes(layouts, w, h)
|
|
|
|
|
1556 |
if bbox_list is None or len(bbox_list) == 0:
|
|
|
1557 |
return layouts
|
|
|
1558 |
l_size = 1000.0
|
1559 |
xs = l_size / float(w)
|
1560 |
ys = l_size / float(h)
|
@@ -1566,22 +1576,34 @@ class MDRLayoutReader:
|
|
1566 |
sx1 = max(0, min(l_size - 1, round(x1 * xs)))
|
1567 |
sy1 = max(0, min(l_size - 1, round(y1 * ys)))
|
1568 |
scaled_bboxes.append([min(sx0, sx1), min(sy0, sy1), max(sx0, sx1), max(sy0, sy1)])
|
|
|
1569 |
orders = []
|
1570 |
try:
|
1571 |
with torch.no_grad():
|
|
|
1572 |
inputs = mdr_boxes_to_reader_inputs(scaled_bboxes)
|
|
|
1573 |
inputs = mdr_prepare_reader_inputs(inputs, model)
|
|
|
1574 |
logits = model(**inputs).logits.cpu().squeeze(0)
|
|
|
1575 |
orders = mdr_parse_reader_logits(logits, len(bbox_list))
|
|
|
1576 |
except Exception as e:
|
1577 |
print(f"MDR LayoutReader prediction error: {e}")
|
|
|
|
|
1578 |
return layouts # Fallback
|
|
|
1579 |
if len(orders) != len(bbox_list):
|
1580 |
-
print("MDR LayoutReader order mismatch")
|
1581 |
return layouts # Fallback
|
1582 |
for i, order_idx in enumerate(orders):
|
1583 |
bbox_list[i].order = order_idx
|
1584 |
-
|
|
|
|
|
|
|
1585 |
|
1586 |
def _prepare_bboxes(self, layouts: list[MDRLayoutElement], w: int, h: int) -> list[_MDR_ReaderBBox] | None:
|
1587 |
line_h = self._estimate_line_h(layouts)
|
|
|
1542 |
def determine_reading_order(self, layouts: list[MDRLayoutElement], size: tuple[int, int]) -> list[MDRLayoutElement]:
|
1543 |
w, h = size
|
1544 |
if w <= 0 or h <= 0 or not layouts:
|
1545 |
+
print("MDRLayoutReader: Invalid size or no layouts, returning early.")
|
1546 |
return layouts
|
1547 |
+
|
1548 |
+
model = self._get_model()
|
1549 |
+
|
1550 |
+
if model is None: # Fallback geometric sort
|
1551 |
+
print("MDRLayoutReader: Model is None, using fallback geometric sort.")
|
1552 |
layouts.sort(key=lambda l: (l.rect.lt[1], l.rect.lt[0]))
|
1553 |
nfo = 0
|
1554 |
for l in layouts:
|
|
|
1556 |
[setattr(f, 'order', i + nfo) for i, f in enumerate(l.fragments)]
|
1557 |
nfo += len(l.fragments)
|
1558 |
return layouts
|
1559 |
+
|
1560 |
+
print("MDRLayoutReader: Preparing bboxes...") # ADDED
|
1561 |
bbox_list = self._prepare_bboxes(layouts, w, h)
|
1562 |
+
print(f"MDRLayoutReader: Prepared {len(bbox_list) if bbox_list else 'None or 0'} bboxes.")
|
1563 |
+
|
1564 |
if bbox_list is None or len(bbox_list) == 0:
|
1565 |
+
print("MDRLayoutReader: No bboxes to process, returning layouts.")
|
1566 |
return layouts
|
1567 |
+
|
1568 |
l_size = 1000.0
|
1569 |
xs = l_size / float(w)
|
1570 |
ys = l_size / float(h)
|
|
|
1576 |
sx1 = max(0, min(l_size - 1, round(x1 * xs)))
|
1577 |
sy1 = max(0, min(l_size - 1, round(y1 * ys)))
|
1578 |
scaled_bboxes.append([min(sx0, sx1), min(sy0, sy1), max(sx0, sx1), max(sy0, sy1)])
|
1579 |
+
print("MDRLayoutReader: Scaled bboxes prepared. Count: ", len(scaled_bboxes))
|
1580 |
orders = []
|
1581 |
try:
|
1582 |
with torch.no_grad():
|
1583 |
+
print("MDRLayoutReader: Creating reader inputs...") # ADDED
|
1584 |
inputs = mdr_boxes_to_reader_inputs(scaled_bboxes)
|
1585 |
+
print("MDRLayoutReader: Preparing inputs for model device...") # ADDED
|
1586 |
inputs = mdr_prepare_reader_inputs(inputs, model)
|
1587 |
+
print("MDRLayoutReader: Running model inference...") # ADDED
|
1588 |
logits = model(**inputs).logits.cpu().squeeze(0)
|
1589 |
+
print("MDRLayoutReader: Model inference complete. Parsing logits...") # ADDED
|
1590 |
orders = mdr_parse_reader_logits(logits, len(bbox_list))
|
1591 |
+
print(f"MDRLayoutReader: Logits parsed. Orders count: {len(orders)}") # ADDED
|
1592 |
except Exception as e:
|
1593 |
print(f"MDR LayoutReader prediction error: {e}")
|
1594 |
+
import traceback
|
1595 |
+
traceback.print_exc() # ADDED for full traceback
|
1596 |
return layouts # Fallback
|
1597 |
+
|
1598 |
if len(orders) != len(bbox_list):
|
1599 |
+
print(f"MDR LayoutReader order mismatch. Orders: {len(orders)}, BBoxes: {len(bbox_list)}")
|
1600 |
return layouts # Fallback
|
1601 |
for i, order_idx in enumerate(orders):
|
1602 |
bbox_list[i].order = order_idx
|
1603 |
+
print("MDRLayoutReader: Applying order...")
|
1604 |
+
result_layouts = self._apply_order(layouts, bbox_list)
|
1605 |
+
print("MDRLayoutReader: Order applied. Returning layouts.") # ADDED
|
1606 |
+
return result_layouts
|
1607 |
|
1608 |
def _prepare_bboxes(self, layouts: list[MDRLayoutElement], w: int, h: int) -> list[_MDR_ReaderBBox] | None:
|
1609 |
line_h = self._estimate_line_h(layouts)
|