Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +3 -3
mdr_pdf_parser.py
CHANGED
@@ -222,7 +222,7 @@ class MDRTableLayoutParsedFormat(Enum):
|
|
222 |
HTML = auto()
|
223 |
|
224 |
|
225 |
-
@dataclass
|
226 |
class MDRBaseLayoutElement:
|
227 |
"""Base class for layout elements found on a page."""
|
228 |
rect: MDRRectangle;
|
@@ -1852,7 +1852,7 @@ class MDROcrEngine:
|
|
1852 |
# much lower thresholds so we actually get some candidate masks:
|
1853 |
det_db_thresh=0.1,
|
1854 |
det_db_box_thresh=0.3,
|
1855 |
-
drop_score=0.
|
1856 |
use_angle_cls=False,
|
1857 |
)
|
1858 |
try:
|
@@ -1906,7 +1906,7 @@ class MDROcrEngine:
|
|
1906 |
try:
|
1907 |
pts = [(float(p[0]), float(p[1])) for p in box_pts]
|
1908 |
if len(pts) == 4:
|
1909 |
-
r = MDRRectangle(lt=pts[0], rt=pts[1],
|
1910 |
if r.is_valid and r.area > 1:
|
1911 |
yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
|
1912 |
fragments_generated_count += 1
|
|
|
222 |
HTML = auto()
|
223 |
|
224 |
|
225 |
+
@dataclass(eq=False)
|
226 |
class MDRBaseLayoutElement:
|
227 |
"""Base class for layout elements found on a page."""
|
228 |
rect: MDRRectangle;
|
|
|
1852 |
# much lower thresholds so we actually get some candidate masks:
|
1853 |
det_db_thresh=0.1,
|
1854 |
det_db_box_thresh=0.3,
|
1855 |
+
drop_score=0.0,
|
1856 |
use_angle_cls=False,
|
1857 |
)
|
1858 |
try:
|
|
|
1906 |
try:
|
1907 |
pts = [(float(p[0]), float(p[1])) for p in box_pts]
|
1908 |
if len(pts) == 4:
|
1909 |
+
r = MDRRectangle(lt=pts[0], rt=pts[1], lb=pts[2], rb=pts[3])
|
1910 |
if r.is_valid and r.area > 1:
|
1911 |
yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
|
1912 |
fragments_generated_count += 1
|