File size: 5,620 Bytes
71e7eab
 
 
 
 
 
 
88158ba
 
71e7eab
 
 
 
88158ba
71e7eab
 
88158ba
71e7eab
 
88158ba
 
71e7eab
88158ba
 
71e7eab
88158ba
 
71e7eab
 
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
 
88158ba
71e7eab
88158ba
71e7eab
 
 
 
88158ba
71e7eab
 
88158ba
71e7eab
 
 
 
 
 
88158ba
71e7eab
 
 
 
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
88158ba
71e7eab
 
88158ba
71e7eab
 
88158ba
71e7eab
88158ba
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71e7eab
88158ba
71e7eab
 
88158ba
 
 
 
 
 
 
 
71e7eab
 
 
88158ba
 
 
71e7eab
 
88158ba
 
71e7eab
88158ba
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import gradio as gr
import cv2
import numpy as np
import os
import tempfile
from ultralytics import YOLO

# Load the Latex2Layout model
model_path = "latex2layout_object_detection_yolov8.pt"
model = YOLO(model_path)

def detect_and_visualize(image):
    """
    Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
    
    Args:
        image: The uploaded image
        
    Returns:
        annotated_image: Image with detection boxes
        layout_annotations: Annotations in YOLO format
    """
    if image is None:
        return None, "Error: No image uploaded."
    
    # Run detection using the Latex2Layout model
    results = model(image)
    result = results[0]
    
    # Create a copy of the image for visualization
    annotated_image = image.copy()
    layout_annotations = []
    
    # Get image dimensions
    img_height, img_width = image.shape[:2]
    
    # Draw detection results
    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        cls_name = result.names[cls_id]
        
        # Generate a color for each class
        color = tuple(np.random.randint(0, 255, 3).tolist())
        
        # Draw bounding box and label
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
        label = f'{cls_name} {conf:.2f}'
        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
        cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        
        # Convert to YOLO format (normalized)
        x_center = (x1 + x2) / (2 * img_width)
        y_center = (y1 + y2) / (2 * img_height)
        width = (x2 - x1) / img_width
        height = (y2 - y1) / img_height
        layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    return annotated_image, "\n".join(layout_annotations)

def save_layout_annotations(layout_annotations_str):
    """
    Save layout annotations to a temporary file and return the file path.
    
    Args:
        layout_annotations_str: Annotations string in YOLO format
        
    Returns:
        file_path: Path to the saved annotation file
    """
    if not layout_annotations_str:
        return None
    
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
    with open(temp_file.name, "w") as f:
        f.write(layout_annotations_str)
    return temp_file.name

# Custom CSS for styling
custom_css = """
    .container { max-width: 1200px; margin: auto; }
    .button-primary { background-color: #4CAF50; color: white; }
    .button-secondary { background-color: #008CBA; color: white; }
    .gr-image { border: 2px solid #ddd; border-radius: 5px; }
    .gr-textbox { font-family: monospace; }
"""

# Create Gradio interface with enhanced styling
with gr.Blocks(
    title="Latex2Layout Detection",
    theme=gr.themes.Default(),
    css=custom_css
) as demo:
    # Header with instructions
    gr.Markdown(
        """
        # Latex2Layout Layout Detection
        Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
        """
    )
    
    # Main layout with two columns
    with gr.Row():
        # Input column
        with gr.Column(scale=1):
            input_image = gr.Image(
                label="Upload Image",
                type="numpy",
                height=400,
                elem_classes="gr-image"
            )
            detect_btn = gr.Button(
                "Start Detection",
                variant="primary",
                elem_classes="button-primary"
            )
            gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
        
        # Output column
        with gr.Column(scale=1):
            output_image = gr.Image(
                label="Detection Results",
                height=400,
                elem_classes="gr-image"
            )
            layout_annotations = gr.Textbox(
                label="Layout Annotations (YOLO Format)",
                lines=10,
                max_lines=15,
                elem_classes="gr-textbox"
            )
            download_btn = gr.Button(
                "Download Annotations",
                variant="secondary",
                elem_classes="button-secondary"
            )
            download_file = gr.File(
                label="Download File",
                interactive=False
            )
    
    # Example image button (optional)
    with gr.Row():
        gr.Button("Load Example Image").click(
            fn=lambda: cv2.imread("example_image.jpg"),
            outputs=input_image
        )
    
    # Event handlers
    detect_btn.click(
        fn=detect_and_visualize,
        inputs=input_image,
        outputs=[output_image, layout_annotations],
        _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
        show_progress=True
    ).then(
        fn=lambda: gr.update(value="Start Detection"),
        outputs=detect_btn,
        _js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
    )
    
    download_btn.click(
        fn=save_layout_annotations,
        inputs=layout_annotations,
        outputs=download_file
    )


# Launch the application
if __name__ == "__main__":
    demo.launch()