import gradio as gr import cv2 import numpy as np import os import tempfile from ultralytics import YOLO # Load the Latex2Layout model model_path = "latex2layout_object_detection_yolov8.pt" model = YOLO(model_path) def detect_and_visualize(image): """ Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results. Args: image: The uploaded image Returns: annotated_image: Image with detection boxes layout_annotations: Annotations in YOLO format """ if image is None: return None, "Error: No image uploaded." # Run detection using the Latex2Layout model results = model(image) result = results[0] # Create a copy of the image for visualization annotated_image = image.copy() layout_annotations = [] # Get image dimensions img_height, img_width = image.shape[:2] # Draw detection results for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy()) conf = float(box.conf[0]) cls_id = int(box.cls[0]) cls_name = result.names[cls_id] # Generate a color for each class color = tuple(np.random.randint(0, 255, 3).tolist()) # Draw bounding box and label cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2) label = f'{cls_name} {conf:.2f}' (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1) cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # Convert to YOLO format (normalized) x_center = (x1 + x2) / (2 * img_width) y_center = (y1 + y2) / (2 * img_height) width = (x2 - x1) / img_width height = (y2 - y1) / img_height layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}") return annotated_image, "\n".join(layout_annotations) def save_layout_annotations(layout_annotations_str): """ Save layout annotations to a temporary file and return the file path. Args: layout_annotations_str: Annotations string in YOLO format Returns: file_path: Path to the saved annotation file """ if not layout_annotations_str: return None temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") with open(temp_file.name, "w") as f: f.write(layout_annotations_str) return temp_file.name # Custom CSS for styling custom_css = """ .container { max-width: 1200px; margin: auto; } .button-primary { background-color: #4CAF50; color: white; } .button-secondary { background-color: #008CBA; color: white; } .gr-image { border: 2px solid #ddd; border-radius: 5px; } .gr-textbox { font-family: monospace; } """ # Create Gradio interface with enhanced styling with gr.Blocks( title="Latex2Layout Detection", theme=gr.themes.Default(), css=custom_css ) as demo: # Header with instructions gr.Markdown( """ # Latex2Layout Layout Detection Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format. """ ) # Main layout with two columns with gr.Row(): # Input column with gr.Column(scale=1): input_image = gr.Image( label="Upload Image", type="numpy", height=400, elem_classes="gr-image" ) detect_btn = gr.Button( "Start Detection", variant="primary", elem_classes="button-primary" ) gr.Markdown("**Tip**: Upload a clear image for optimal detection results.") # Output column with gr.Column(scale=1): output_image = gr.Image( label="Detection Results", height=400, elem_classes="gr-image" ) layout_annotations = gr.Textbox( label="Layout Annotations (YOLO Format)", lines=10, max_lines=15, elem_classes="gr-textbox" ) download_btn = gr.Button( "Download Annotations", variant="secondary", elem_classes="button-secondary" ) download_file = gr.File( label="Download File", interactive=False ) # Example image button (optional) with gr.Row(): gr.Button("Load Example Image").click( fn=lambda: cv2.imread("example_image.jpg"), outputs=input_image ) # Event handlers detect_btn.click( fn=detect_and_visualize, inputs=input_image, outputs=[output_image, layout_annotations], _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }", show_progress=True ).then( fn=lambda: gr.update(value="Start Detection"), outputs=detect_btn, _js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }" ) download_btn.click( fn=save_layout_annotations, inputs=layout_annotations, outputs=download_file ) # Launch the application if __name__ == "__main__": demo.launch()