Spaces:

ResearchMAGIC
/

teammrag-parser-moreai

Sleeping

rodrigomasini commited on 12 days ago

Commit

0112306

verified ·

1 Parent(s): 559377c

Update mdr_pdf_parser.py

Files changed (1) hide show

mdr_pdf_parser.py CHANGED Viewed

@@ -1577,7 +1577,7 @@ def mdr_get_rotated_crop(img, points):
     h = int(max(np.linalg.norm(pts[0] - pts[3]), np.linalg.norm(pts[1] - pts[2])))
     std = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
     M = cv2.getPerspectiveTransform(pts, std)
-    dst = cv2.warpPerspective(img, M, (w, h), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC)
     dh, dw = dst.shape[0:2]
     if dh > 0 and dw > 0 and dh * 1.0 / dw >= 1.5:
         dst = cv2.rotate(dst, cv2.ROTATE_90_CLOCKWISE)
@@ -2171,7 +2171,7 @@ class MDRLayoutReader:
             layoutreader_cache_dir = Path(self._model_path)  # self._model_path is like "./mdr_models/layoutreader"
             mdr_ensure_directory(str(layoutreader_cache_dir))  # Ensure this specific directory exists
-            name = "Cahya/layoutlmv3-base-finetuned-doclaynet"
             print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{name}'. Cache dir: {layoutreader_cache_dir}")
             try:
@@ -2822,7 +2822,7 @@ class MDRExtractionEngine:
         ori_im_cv = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
         gray_cv = cv2.cvtColor(ori_im_cv, cv2.COLOR_BGR2GRAY)
-        clahe_obj = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
         enhanced_gray_cv = clahe_obj.apply(gray_cv)
         # Convert back to BGR for downstream components that might expect 3 channels
         # (even if they only use one, like the detector)

     h = int(max(np.linalg.norm(pts[0] - pts[3]), np.linalg.norm(pts[1] - pts[2])))
     std = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
     M = cv2.getPerspectiveTransform(pts, std)
+    dst = cv2.warpPerspective(img, M, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(128, 128, 128), flags=cv2.INTER_CUBIC)
     dh, dw = dst.shape[0:2]
     if dh > 0 and dw > 0 and dh * 1.0 / dw >= 1.5:
         dst = cv2.rotate(dst, cv2.ROTATE_90_CLOCKWISE)
             layoutreader_cache_dir = Path(self._model_path)  # self._model_path is like "./mdr_models/layoutreader"
             mdr_ensure_directory(str(layoutreader_cache_dir))  # Ensure this specific directory exists
+            name = "lakshya-rawat/document-qa-model"
             print(f"MDRLayoutReader: Attempting to load LayoutLMv3 model '{name}'. Cache dir: {layoutreader_cache_dir}")
             try:
         ori_im_cv = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR)
         gray_cv = cv2.cvtColor(ori_im_cv, cv2.COLOR_BGR2GRAY)
+        clahe_obj = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8, 8))
         enhanced_gray_cv = clahe_obj.apply(gray_cv)
         # Convert back to BGR for downstream components that might expect 3 channels
         # (even if they only use one, like the detector)