rodrigomasini commited on
Commit
cf01558
·
verified ·
1 Parent(s): d217802

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +3 -3
mdr_pdf_parser.py CHANGED
@@ -222,7 +222,7 @@ class MDRTableLayoutParsedFormat(Enum):
222
  HTML = auto()
223
 
224
 
225
- @dataclass
226
  class MDRBaseLayoutElement:
227
  """Base class for layout elements found on a page."""
228
  rect: MDRRectangle;
@@ -1852,7 +1852,7 @@ class MDROcrEngine:
1852
  # much lower thresholds so we actually get some candidate masks:
1853
  det_db_thresh=0.1,
1854
  det_db_box_thresh=0.3,
1855
- drop_score=0.1,
1856
  use_angle_cls=False,
1857
  )
1858
  try:
@@ -1906,7 +1906,7 @@ class MDROcrEngine:
1906
  try:
1907
  pts = [(float(p[0]), float(p[1])) for p in box_pts]
1908
  if len(pts) == 4:
1909
- r = MDRRectangle(lt=pts[0], rt=pts[1], rb=pts[2], lb=pts[3])
1910
  if r.is_valid and r.area > 1:
1911
  yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
1912
  fragments_generated_count += 1
 
222
  HTML = auto()
223
 
224
 
225
+ @dataclass(eq=False)
226
  class MDRBaseLayoutElement:
227
  """Base class for layout elements found on a page."""
228
  rect: MDRRectangle;
 
1852
  # much lower thresholds so we actually get some candidate masks:
1853
  det_db_thresh=0.1,
1854
  det_db_box_thresh=0.3,
1855
+ drop_score=0.0,
1856
  use_angle_cls=False,
1857
  )
1858
  try:
 
1906
  try:
1907
  pts = [(float(p[0]), float(p[1])) for p in box_pts]
1908
  if len(pts) == 4:
1909
+ r = MDRRectangle(lt=pts[0], rt=pts[1], lb=pts[2], rb=pts[3])
1910
  if r.is_valid and r.area > 1:
1911
  yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
1912
  fragments_generated_count += 1