Spaces:

ChaseHan
/

Latex2Layout_PDF_Layout_Parsing

Running

App Files Files Community

ChaseHan commited on Apr 18

Commit

f56f5ba

verified ·

1 Parent(s): 74651af

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -28

app.py CHANGED Viewed

@@ -2,15 +2,22 @@ import gradio as gr
 import cv2
 import numpy as np
 import os
-import requests
 import json
 from PIL import Image
 import io
 import base64
 from openai import OpenAI
-# API endpoints
-YOLO_API_ENDPOINT = "https://api.example.com/yolo"  # Replace with actual YOLO API endpoint
 # Qwen API configuration
 QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
@@ -39,7 +46,7 @@ def encode_image(image_array):
 def detect_layout(image):
     """
-    Perform layout detection on the uploaded image using YOLO API.
     Args:
         image: The uploaded image as a numpy array
@@ -51,46 +58,44 @@ def detect_layout(image):
     if image is None:
         return None, "Error: No image uploaded."
-    # Convert numpy array to PIL Image
-    pil_image = Image.fromarray(image)
-    # Convert PIL Image to bytes for API request
-    img_byte_arr = io.BytesIO()
-    pil_image.save(img_byte_arr, format='PNG')
-    img_byte_arr = img_byte_arr.getvalue()
-    # Prepare API request
-    files = {'image': ('image.png', img_byte_arr, 'image/png')}
     try:
-        # Call YOLO API
-        response = requests.post(YOLO_API_ENDPOINT, files=files)
-        response.raise_for_status()
-        detection_results = response.json()
         # Create a copy of the image for visualization
         annotated_image = image.copy()
         # Draw detection results
-        for detection in detection_results:
-            x1, y1, x2, y2 = detection['bbox']
-            cls_name = detection['class']
-            conf = detection['confidence']
             # Generate a color for each class
             color = tuple(np.random.randint(0, 255, 3).tolist())
             # Draw bounding box and label
-            cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
             label = f'{cls_name} {conf:.2f}'
             (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-            cv2.rectangle(annotated_image, (int(x1), int(y1)-label_height-5), (int(x1)+label_width, int(y1)), color, -1)
-            cv2.putText(annotated_image, label, (int(x1), int(y1)-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
         # Format layout information for Qwen
-        layout_info = json.dumps(detection_results, indent=2)
-        return annotated_image, layout_info
     except Exception as e:
         return None, f"Error during layout detection: {str(e)}"

 import cv2
 import numpy as np
 import os
 import json
 from PIL import Image
 import io
 import base64
 from openai import OpenAI
+from ultralytics import YOLO
+# Load the Latex2Layout model
+model_path = "latex2layout_object_detection_yolov8.pt"
+if not os.path.exists(model_path):
+    raise FileNotFoundError(f"Model file not found at {model_path}")
+try:
+    model = YOLO(model_path)
+except Exception as e:
+    raise RuntimeError(f"Failed to load Latex2Layout model: {e}")
 # Qwen API configuration
 QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 def detect_layout(image):
     """
+    Perform layout detection on the uploaded image using local YOLO model.
     Args:
         image: The uploaded image as a numpy array
     if image is None:
         return None, "Error: No image uploaded."
     try:
+        # Run detection using local YOLO model
+        results = model(image)
+        result = results[0]
         # Create a copy of the image for visualization
         annotated_image = image.copy()
+        layout_info = []
         # Draw detection results
+        for box in result.boxes:
+            # Get bounding box coordinates
+            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
+            conf = float(box.conf[0])
+            cls_id = int(box.cls[0])
+            cls_name = result.names[cls_id]
             # Generate a color for each class
             color = tuple(np.random.randint(0, 255, 3).tolist())
             # Draw bounding box and label
+            cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
             label = f'{cls_name} {conf:.2f}'
             (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
+            cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+            # Add detection to layout info
+            layout_info.append({
+                'bbox': [x1, y1, x2, y2],
+                'class': cls_name,
+                'confidence': conf
+            })
         # Format layout information for Qwen
+        layout_info_str = json.dumps(layout_info, indent=2)
+        return annotated_image, layout_info_str
     except Exception as e:
         return None, f"Error during layout detection: {str(e)}"