Spaces:

ChaseHan
/

Latex2Layout_PDF_Layout_Parsing

Running

App Files Files Community

ChaseHan commited on Apr 18

Commit

923f8ae

verified ·

1 Parent(s): 15de4c7

re

Browse files

Files changed (1) hide show

app.py +75 -97

app.py CHANGED Viewed

@@ -1,29 +1,30 @@
 import gradio as gr
 import cv2
 import numpy as np
-import requests
 from ultralytics import YOLO
 # Load the Latex2Layout model
 model_path = "latex2layout_object_detection_yolov8.pt"
-latex2layout_model = YOLO(model_path)
-def detect_layout(image):
     """
-    Perform layout detection on the uploaded image using the Latex2Layout model.
     Args:
-        image: The uploaded image (numpy array)
     Returns:
-        annotated_image: Image with detection boxes drawn
-        layout_info: Text description of detected layout elements
     """
     if image is None:
         return None, "Error: No image uploaded."
-    # Run detection
-    results = latex2layout_model(image)
     result = results[0]
     # Create a copy of the image for visualization
@@ -33,108 +34,74 @@ def detect_layout(image):
     # Get image dimensions
     img_height, img_width = image.shape[:2]
-    # Process detection results
     for box in result.boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
         conf = float(box.conf[0])
         cls_id = int(box.cls[0])
         cls_name = result.names[cls_id]
-        # Draw bounding box and label on the image
         color = tuple(np.random.randint(0, 255, 3).tolist())
         cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
         label = f'{cls_name} {conf:.2f}'
         (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
         cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
         cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-        # Format layout info for Qwen2.5-VL
-        layout_annotations.append(f"{cls_name} at position ({x1},{y1},{x2},{y2}) with confidence {conf:.2f}")
-    layout_info = "Detected layout elements: " + "; ".join(layout_annotations) if layout_annotations else "No layout elements detected."
-    return annotated_image, layout_info
-def call_qwen_vl_api(api_url, image, layout_info, question):
-    """
-    Call the Qwen2.5-VL API with the image, layout info, and user question.
-    Args:
-        api_url: The URL of the Qwen2.5-VL API
-        image: The uploaded image (numpy array)
-        layout_info: Text description of detected layout elements
-        question: User's question about the image and layout
-    Returns:
-        answer: Response from the Qwen2.5-VL API
-    """
-    if not api_url:
-        return "Error: Please provide a valid Qwen2.5-VL API URL."
-    if not question:
-        return "Error: Please enter a question."
-    try:
-        # Convert image to a format suitable for API (e.g., base64 or raw bytes might be needed; adjust per API spec)
-        # Here, we assume the API accepts a URL or raw data; for simplicity, we use a placeholder
-        payload = {
-            "image": image.tolist(),  # Adjust this based on API requirements (e.g., base64 encoding)
-            "prompt": f"{layout_info}\n\nQuestion: {question}",
-        }
-        response = requests.post(api_url, json=payload, timeout=30)
-        response.raise_for_status()  # Raise an error for bad status codes
-        return response.json().get("answer", "Error: No answer received from API.")
-    except requests.exceptions.RequestException as e:
-        return f"Error: API call failed - {str(e)}"
-def process_image_and_question(api_url, image, question):
     """
-    Process the image with Latex2Layout and query Qwen2.5-VL API.
     Args:
-        api_url: Qwen2.5-VL API URL
-        image: Uploaded image
-        question: User's question
     Returns:
-        annotated_image: Image with detection boxes
-        layout_info: Detected layout description
-        answer: API response to the question
     """
-    annotated_image, layout_info = detect_layout(image)
-    if annotated_image is None:
-        return None, layout_info, "Error: Detection failed."
-    answer = call_qwen_vl_api(api_url, image, layout_info, question)
-    return annotated_image, layout_info, answer
 # Custom CSS for styling
 custom_css = """
     .container { max-width: 1200px; margin: auto; }
     .button-primary { background-color: #4CAF50; color: white; }
     .gr-image { border: 2px solid #ddd; border-radius: 5px; }
     .gr-textbox { font-family: monospace; }
 """
-# Create Gradio interface
 with gr.Blocks(
-    title="Latex2Layout Detection & QA",
     theme=gr.themes.Default(),
     css=custom_css
 ) as demo:
     gr.Markdown(
         """
-        # Latex2Layout Layout Detection & Q&A
-        Upload an image to detect layout elements using the **Latex2Layout** model, then ask questions about the layout and image content using the Qwen2.5-VL API.
         """
     )
-    # API URL input
-    api_url_input = gr.Textbox(
-        label="Qwen2.5-VL API URL",
-        placeholder="Enter the Qwen2.5-VL API URL here",
-        value=""
-    )
-    # Main layout
     with gr.Row():
         # Input column
         with gr.Column(scale=1):
@@ -144,49 +111,60 @@ with gr.Blocks(
                 height=400,
                 elem_classes="gr-image"
             )
-            question_input = gr.Textbox(
-                label="Ask a Question",
-                placeholder="e.g., What is the layout structure of this image?",
-                lines=2
-            )
-            submit_btn = gr.Button(
-                "Detect & Ask",
                 variant="primary",
                 elem_classes="button-primary"
             )
-            gr.Markdown("**Tip**: Provide a clear image and specific question for best results.")
         # Output column
         with gr.Column(scale=1):
             output_image = gr.Image(
-                label="Detected Layout",
                 height=400,
                 elem_classes="gr-image"
             )
-            layout_output = gr.Textbox(
-                label="Layout Information",
-                lines=5,
-                max_lines=10,
                 elem_classes="gr-textbox"
             )
-            answer_output = gr.Textbox(
-                label="Answer",
-                lines=5,
-                max_lines=10,
-                elem_classes="gr-textbox"
             )
-    # Event handler
-    submit_btn.click(
-        fn=process_image_and_question,
-        inputs=[api_url_input, input_image, question_input],
-        outputs=[output_image, layout_output, answer_output],
         _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
         show_progress=True
     ).then(
-        fn=lambda: gr.update(value="Detect & Ask"),
-        outputs=submit_btn,
-        _js="() => { document.querySelector('.button-primary').innerText = 'Detect & Ask'; }"
     )
 # Launch the application

 import gradio as gr
 import cv2
 import numpy as np
+import os
+import tempfile
 from ultralytics import YOLO
 # Load the Latex2Layout model
 model_path = "latex2layout_object_detection_yolov8.pt"
+model = YOLO(model_path)
+def detect_and_visualize(image):
     """
+    Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
     Args:
+        image: The uploaded image
     Returns:
+        annotated_image: Image with detection boxes
+        layout_annotations: Annotations in YOLO format
     """
     if image is None:
         return None, "Error: No image uploaded."
+    # Run detection using the Latex2Layout model
+    results = model(image)
     result = results[0]
     # Create a copy of the image for visualization
     # Get image dimensions
     img_height, img_width = image.shape[:2]
+    # Draw detection results
     for box in result.boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
         conf = float(box.conf[0])
         cls_id = int(box.cls[0])
         cls_name = result.names[cls_id]
+        # Generate a color for each class
         color = tuple(np.random.randint(0, 255, 3).tolist())
+        # Draw bounding box and label
         cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
         label = f'{cls_name} {conf:.2f}'
         (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
         cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
         cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        # Convert to YOLO format (normalized)
+        x_center = (x1 + x2) / (2 * img_width)
+        y_center = (y1 + y2) / (2 * img_height)
+        width = (x2 - x1) / img_width
+        height = (y2 - y1) / img_height
+        layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+    return annotated_image, "\n".join(layout_annotations)
+def save_layout_annotations(layout_annotations_str):
     """
+    Save layout annotations to a temporary file and return the file path.
     Args:
+        layout_annotations_str: Annotations string in YOLO format
     Returns:
+        file_path: Path to the saved annotation file
     """
+    if not layout_annotations_str:
+        return None
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
+    with open(temp_file.name, "w") as f:
+        f.write(layout_annotations_str)
+    return temp_file.name
 # Custom CSS for styling
 custom_css = """
     .container { max-width: 1200px; margin: auto; }
     .button-primary { background-color: #4CAF50; color: white; }
+    .button-secondary { background-color: #008CBA; color: white; }
     .gr-image { border: 2px solid #ddd; border-radius: 5px; }
     .gr-textbox { font-family: monospace; }
 """
+# Create Gradio interface with enhanced styling
 with gr.Blocks(
+    title="Latex2Layout Detection",
     theme=gr.themes.Default(),
     css=custom_css
 ) as demo:
+    # Header with instructions
     gr.Markdown(
         """
+        # Latex2Layout Layout Detection
+        Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
         """
     )
+    # Main layout with two columns
     with gr.Row():
         # Input column
         with gr.Column(scale=1):
                 height=400,
                 elem_classes="gr-image"
             )
+            detect_btn = gr.Button(
+                "Start Detection",
                 variant="primary",
                 elem_classes="button-primary"
             )
+            gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
         # Output column
         with gr.Column(scale=1):
             output_image = gr.Image(
+                label="Detection Results",
                 height=400,
                 elem_classes="gr-image"
             )
+            layout_annotations = gr.Textbox(
+                label="Layout Annotations (YOLO Format)",
+                lines=10,
+                max_lines=15,
                 elem_classes="gr-textbox"
             )
+            download_btn = gr.Button(
+                "Download Annotations",
+                variant="secondary",
+                elem_classes="button-secondary"
             )
+            download_file = gr.File(
+                label="Download File",
+                interactive=False
+            )
+    # Example image button (optional)
+    with gr.Row():
+        gr.Button("Load Example Image").click(
+            fn=lambda: cv2.imread("example_image.jpg"),
+            outputs=input_image
+        )
+    # Event handlers
+    detect_btn.click(
+        fn=detect_and_visualize,
+        inputs=input_image,
+        outputs=[output_image, layout_annotations],
         _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
         show_progress=True
     ).then(
+        fn=lambda: gr.update(value="Start Detection"),
+        outputs=detect_btn,
+        _js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
+    )
+    download_btn.click(
+        fn=save_layout_annotations,
+        inputs=layout_annotations,
+        outputs=download_file
     )
 # Launch the application