File size: 5,620 Bytes
71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba 71e7eab 88158ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import gradio as gr
import cv2
import numpy as np
import os
import tempfile
from ultralytics import YOLO
# Load the Latex2Layout model
model_path = "latex2layout_object_detection_yolov8.pt"
model = YOLO(model_path)
def detect_and_visualize(image):
"""
Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
Args:
image: The uploaded image
Returns:
annotated_image: Image with detection boxes
layout_annotations: Annotations in YOLO format
"""
if image is None:
return None, "Error: No image uploaded."
# Run detection using the Latex2Layout model
results = model(image)
result = results[0]
# Create a copy of the image for visualization
annotated_image = image.copy()
layout_annotations = []
# Get image dimensions
img_height, img_width = image.shape[:2]
# Draw detection results
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
conf = float(box.conf[0])
cls_id = int(box.cls[0])
cls_name = result.names[cls_id]
# Generate a color for each class
color = tuple(np.random.randint(0, 255, 3).tolist())
# Draw bounding box and label
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
label = f'{cls_name} {conf:.2f}'
(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
# Convert to YOLO format (normalized)
x_center = (x1 + x2) / (2 * img_width)
y_center = (y1 + y2) / (2 * img_height)
width = (x2 - x1) / img_width
height = (y2 - y1) / img_height
layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
return annotated_image, "\n".join(layout_annotations)
def save_layout_annotations(layout_annotations_str):
"""
Save layout annotations to a temporary file and return the file path.
Args:
layout_annotations_str: Annotations string in YOLO format
Returns:
file_path: Path to the saved annotation file
"""
if not layout_annotations_str:
return None
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
with open(temp_file.name, "w") as f:
f.write(layout_annotations_str)
return temp_file.name
# Custom CSS for styling
custom_css = """
.container { max-width: 1200px; margin: auto; }
.button-primary { background-color: #4CAF50; color: white; }
.button-secondary { background-color: #008CBA; color: white; }
.gr-image { border: 2px solid #ddd; border-radius: 5px; }
.gr-textbox { font-family: monospace; }
"""
# Create Gradio interface with enhanced styling
with gr.Blocks(
title="Latex2Layout Detection",
theme=gr.themes.Default(),
css=custom_css
) as demo:
# Header with instructions
gr.Markdown(
"""
# Latex2Layout Layout Detection
Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
"""
)
# Main layout with two columns
with gr.Row():
# Input column
with gr.Column(scale=1):
input_image = gr.Image(
label="Upload Image",
type="numpy",
height=400,
elem_classes="gr-image"
)
detect_btn = gr.Button(
"Start Detection",
variant="primary",
elem_classes="button-primary"
)
gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
# Output column
with gr.Column(scale=1):
output_image = gr.Image(
label="Detection Results",
height=400,
elem_classes="gr-image"
)
layout_annotations = gr.Textbox(
label="Layout Annotations (YOLO Format)",
lines=10,
max_lines=15,
elem_classes="gr-textbox"
)
download_btn = gr.Button(
"Download Annotations",
variant="secondary",
elem_classes="button-secondary"
)
download_file = gr.File(
label="Download File",
interactive=False
)
# Example image button (optional)
with gr.Row():
gr.Button("Load Example Image").click(
fn=lambda: cv2.imread("example_image.jpg"),
outputs=input_image
)
# Event handlers
detect_btn.click(
fn=detect_and_visualize,
inputs=input_image,
outputs=[output_image, layout_annotations],
_js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
show_progress=True
).then(
fn=lambda: gr.update(value="Start Detection"),
outputs=detect_btn,
_js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
)
download_btn.click(
fn=save_layout_annotations,
inputs=layout_annotations,
outputs=download_file
)
# Launch the application
if __name__ == "__main__":
demo.launch()
|