Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +12 -1
mdr_pdf_parser.py
CHANGED
@@ -1624,7 +1624,18 @@ class MDROcrEngine:
|
|
1624 |
def _get_system(self) -> _MDR_TextSystem | None:
|
1625 |
if self._text_system is None:
|
1626 |
paths = {k: str(Path(self._model_dir)/Path(*p)) for k,p in _MDR_OCR_MODELS.items()}
|
1627 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1628 |
try: self._text_system = _MDR_TextSystem(self._onnx_params); print(f"MDR OCR System initialized.")
|
1629 |
except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
|
1630 |
return self._text_system
|
|
|
1624 |
def _get_system(self) -> _MDR_TextSystem | None:
|
1625 |
if self._text_system is None:
|
1626 |
paths = {k: str(Path(self._model_dir)/Path(*p)) for k,p in _MDR_OCR_MODELS.items()}
|
1627 |
+
# In MDROcrEngine._get_system()
|
1628 |
+
self._onnx_params = _MDR_ONNXParams(
|
1629 |
+
use_gpu=(self._device=="cpu"),
|
1630 |
+
det_model_dir=paths["det"],
|
1631 |
+
cls_model_dir=paths["cls"],
|
1632 |
+
rec_model_dir=paths["rec"],
|
1633 |
+
rec_char_dict_path=paths["keys"],
|
1634 |
+
# --- EXPERIMENT: INCREASE THRESHOLDS ---
|
1635 |
+
det_db_thresh=0.5, # Original was 0.3
|
1636 |
+
det_db_box_thresh=0.8, # Original was 0.6
|
1637 |
+
# --- END EXPERIMENT ---
|
1638 |
+
)
|
1639 |
try: self._text_system = _MDR_TextSystem(self._onnx_params); print(f"MDR OCR System initialized.")
|
1640 |
except Exception as e: print(f"ERROR initializing MDR OCR System: {e}"); self._text_system = None
|
1641 |
return self._text_system
|