Spaces:

ChaseHan
/

Latex2Layout_PDF_Layout_Parsing

Running

File size: 5,620 Bytes

71e7eab
 
 
 
 
 
 
88158ba
 
71e7eab
 
 
 
88158ba
71e7eab
 
88158ba
71e7eab
 
88158ba
 
71e7eab
88158ba
 
71e7eab
88158ba
 
71e7eab
 
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
 
88158ba
71e7eab
88158ba
71e7eab
 
 
 
88158ba
71e7eab
 
88158ba
71e7eab
 
 
 
 
 
88158ba
71e7eab
 
 
 
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
 
88158ba
71e7eab
 
88158ba
71e7eab
88158ba
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
71e7eab
 
88158ba
 
 
 
 
 
 
 
71e7eab
 
 
88158ba
 
 
71e7eab
 
88158ba
 
71e7eab
88158ba

import gradio as gr
import cv2
import numpy as np
import os
import tempfile
from ultralytics import YOLO

# Load the Latex2Layout model
model_path = "latex2layout_object_detection_yolov8.pt"
model = YOLO(model_path)

def detect_and_visualize(image):
    """
    Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
    
    Args:
        image: The uploaded image
        
    Returns:
        annotated_image: Image with detection boxes
        layout_annotations: Annotations in YOLO format
    """
    if image is None:
        return None, "Error: No image uploaded."
    
    # Run detection using the Latex2Layout model
    results = model(image)
    result = results[0]
    
    # Create a copy of the image for visualization
    annotated_image = image.copy()
    layout_annotations = []
    
    # Get image dimensions
    img_height, img_width = image.shape[:2]
    
    # Draw detection results
    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        cls_name = result.names[cls_id]
        
        # Generate a color for each class
        color = tuple(np.random.randint(0, 255, 3).tolist())
        
        # Draw bounding box and label
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
        label = f'{cls_name} {conf:.2f}'
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
        cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # Convert to YOLO format (normalized)
        x_center = (x1 + x2) / (2 * img_width)
        y_center = (y1 + y2) / (2 * img_height)
        width = (x2 - x1) / img_width
        height = (y2 - y1) / img_height
        layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return annotated_image, "\n".join(layout_annotations)

def save_layout_annotations(layout_annotations_str):
    """
    Save layout annotations to a temporary file and return the file path.
    
    Args:
        layout_annotations_str: Annotations string in YOLO format
        
    Returns:
        file_path: Path to the saved annotation file
    """
    if not layout_annotations_str:
        return None
    
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
    with open(temp_file.name, "w") as f:
        f.write(layout_annotations_str)
    return temp_file.name

# Custom CSS for styling
custom_css = """
    .container { max-width: 1200px; margin: auto; }
    .button-primary { background-color: #4CAF50; color: white; }
    .button-secondary { background-color: #008CBA; color: white; }
    .gr-image { border: 2px solid #ddd; border-radius: 5px; }
    .gr-textbox { font-family: monospace; }
"""

# Create Gradio interface with enhanced styling
with gr.Blocks(
    title="Latex2Layout Detection",
    theme=gr.themes.Default(),
    css=custom_css
) as demo:
    # Header with instructions
    gr.Markdown(
        """
        # Latex2Layout Layout Detection
        Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
        """
    )
    
    # Main layout with two columns
    with gr.Row():
        # Input column
        with gr.Column(scale=1):
            input_image = gr.Image(
                label="Upload Image",
                type="numpy",
                height=400,
                elem_classes="gr-image"
            )
            detect_btn = gr.Button(
                "Start Detection",
                variant="primary",
                elem_classes="button-primary"
            )
            gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
        
        # Output column
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Detection Results",
                height=400,
                elem_classes="gr-image"
            )
            layout_annotations = gr.Textbox(
                label="Layout Annotations (YOLO Format)",
                lines=10,
                max_lines=15,
                elem_classes="gr-textbox"
            )
            download_btn = gr.Button(
                "Download Annotations",
                variant="secondary",
                elem_classes="button-secondary"
            )
            download_file = gr.File(
                label="Download File",
                interactive=False
            )
    
    # Example image button (optional)
    with gr.Row():
        gr.Button("Load Example Image").click(
            fn=lambda: cv2.imread("example_image.jpg"),
            outputs=input_image
        )
    
    # Event handlers
    detect_btn.click(
        fn=detect_and_visualize,
        inputs=input_image,
        outputs=[output_image, layout_annotations],
        _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
        show_progress=True
    ).then(
        fn=lambda: gr.update(value="Start Detection"),
        outputs=detect_btn,
        _js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
    )
    
    download_btn.click(
        fn=save_layout_annotations,
        inputs=layout_annotations,
        outputs=download_file
    )


# Launch the application
if __name__ == "__main__":
    demo.launch()