rodrigomasini commited on
Commit
90c536f
·
verified ·
1 Parent(s): 6af19d4

Update mdr_pdf_parser.py

Browse files
Files changed (1) hide show
  1. mdr_pdf_parser.py +7 -5
mdr_pdf_parser.py CHANGED
@@ -1780,10 +1780,12 @@ def mdr_correct_layout_fragments(ocr_engine: 'MDROcrEngine', source_img: Image,
1780
 
1781
  # --- MDR OCR Engine ---
1782
 
1783
- _MDR_OCR_MODELS = {"det": ("ppocrv4", "det", "det.onnx"), "cls": ("ppocrv4", "cls", "cls.onnx"),
1784
- "rec": ("ppocrv4", "rec", "rec.onnx"), "keys": ("ch_ppocr_server_v2.0", "ppocr_keys_v1.txt")}
 
 
1785
 
1786
- _MDR_OCR_URL_BASE = "https://huggingface.co/moskize/OnnxOCR/resolve/main/"
1787
 
1788
 
1789
  @dataclass
@@ -1797,7 +1799,7 @@ class _MDR_ONNXParams:
1797
 
1798
  # Attributes with default values (Group 1)
1799
  use_angle_cls: bool = True
1800
- rec_image_shape: str = "3,48,320"
1801
  cls_image_shape: str = "3,48,192"
1802
  cls_batch_num: int = 6
1803
  cls_thresh: float = 0.9
@@ -1853,7 +1855,7 @@ class MDROcrEngine:
1853
  det_model_dir=paths["det"],
1854
  cls_model_dir=paths["cls"],
1855
  rec_model_dir=paths["rec"],
1856
- rec_char_dict_path=paths["keys"],
1857
  # much lower thresholds so we actually get some candidate masks:
1858
  det_db_thresh=0.1,
1859
  det_db_box_thresh=0.3,
 
1780
 
1781
  # --- MDR OCR Engine ---
1782
 
1783
+ _MDR_OCR_MODELS = {"det": ("ppocr_onnx", "model", "det_model", "en_PP-OCRv3_det_infer.onnx"),
1784
+ "cls": ("ppocr_onnx", "model", "cls_model", "ch_ppocr_mobile_v2.0_cls_infer.onnx"),
1785
+ "rec": ("ppocr_onnx", "model", "rec_model", "en_PP-OCRv3_rec_infer.onnx"),
1786
+ "keys": ("ppocr_onnx", "ppocr", "utils", "dict", "en_dict.txt")}
1787
 
1788
+ _MDR_OCR_URL_BASE = "https://raw.githubusercontent.com/Kazuhito00/PaddleOCR-ONNX-Sample/main/"
1789
 
1790
 
1791
  @dataclass
 
1799
 
1800
  # Attributes with default values (Group 1)
1801
  use_angle_cls: bool = True
1802
+ rec_image_shape: str = "3,48,640"
1803
  cls_image_shape: str = "3,48,192"
1804
  cls_batch_num: int = 6
1805
  cls_thresh: float = 0.9
 
1855
  det_model_dir=paths["det"],
1856
  cls_model_dir=paths["cls"],
1857
  rec_model_dir=paths["rec"],
1858
+ rec_char_dict_path=None,
1859
  # much lower thresholds so we actually get some candidate masks:
1860
  det_db_thresh=0.1,
1861
  det_db_box_thresh=0.3,