rodrigomasini commited on
Commit
39ba005
·
verified ·
1 Parent(s): 4e26825

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +12 -1
mdr_pdf_parser.py CHANGED
@@ -1624,7 +1624,18 @@ class MDROcrEngine:
1624
  def _get_system(self) -> _MDR_TextSystem | None:
1625
  if self._text_system is None:
1626
  paths = {k: str(Path(self._model_dir)/Path(*p)) for k,p in _MDR_OCR_MODELS.items()}
1627
- self._onnx_params = _MDR_ONNXParams(use_gpu=(self._device=="cpu"), det_model_dir=paths["det"], cls_model_dir=paths["cls"], rec_model_dir=paths["rec"], rec_char_dict_path=paths["keys"])
 
 
 
 
 
 
 
 
 
 
 
1628
  try: self._text_system = _MDR_TextSystem(self._onnx_params); print(f"MDR OCR System initialized.")
1629
  except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
1630
  return self._text_system
 
1624
  def _get_system(self) -> _MDR_TextSystem | None:
1625
  if self._text_system is None:
1626
  paths = {k: str(Path(self._model_dir)/Path(*p)) for k,p in _MDR_OCR_MODELS.items()}
1627
+ # In MDROcrEngine._get_system()
1628
+ self._onnx_params = _MDR_ONNXParams(
1629
+ use_gpu=(self._device=="cpu"),
1630
+ det_model_dir=paths["det"],
1631
+ cls_model_dir=paths["cls"],
1632
+ rec_model_dir=paths["rec"],
1633
+ rec_char_dict_path=paths["keys"],
1634
+ # --- EXPERIMENT: INCREASE THRESHOLDS ---
1635
+ det_db_thresh=0.5, # Original was 0.3
1636
+ det_db_box_thresh=0.8, # Original was 0.6
1637
+ # --- END EXPERIMENT ---
1638
+ )
1639
  try: self._text_system = _MDR_TextSystem(self._onnx_params); print(f"MDR OCR System initialized.")
1640
  except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
1641
  return self._text_system