Spaces:

ChaseHan
/

Latex2Layout_PDF_Layout_Parsing

Running

Latex2Layout_PDF_Layout_Parsing / app.py

Hanf Chase

88158ba 2 months ago

5.62 kB

	import gradio as gr
	import cv2
	import numpy as np
	import os
	import tempfile
	from ultralytics import YOLO

	# Load the Latex2Layout model
	model_path = "latex2layout_object_detection_yolov8.pt"
	model = YOLO(model_path)

	def detect_and_visualize(image):
	"""
	Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.

	Args:
	image: The uploaded image

	Returns:
	annotated_image: Image with detection boxes
	layout_annotations: Annotations in YOLO format
	"""
	if image is None:
	return None, "Error: No image uploaded."

	# Run detection using the Latex2Layout model
	results = model(image)
	result = results[0]

	# Create a copy of the image for visualization
	annotated_image = image.copy()
	layout_annotations = []

	# Get image dimensions
	img_height, img_width = image.shape[:2]

	# Draw detection results
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
	conf = float(box.conf[0])
	cls_id = int(box.cls[0])
	cls_name = result.names[cls_id]

	# Generate a color for each class
	color = tuple(np.random.randint(0, 255, 3).tolist())

	# Draw bounding box and label
	cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
	label = f'{cls_name} {conf:.2f}'
	(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
	cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

	# Convert to YOLO format (normalized)
	x_center = (x1 + x2) / (2 * img_width)
	y_center = (y1 + y2) / (2 * img_height)
	width = (x2 - x1) / img_width
	height = (y2 - y1) / img_height
	layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

	return annotated_image, "\n".join(layout_annotations)

	def save_layout_annotations(layout_annotations_str):
	"""
	Save layout annotations to a temporary file and return the file path.

	Args:
	layout_annotations_str: Annotations string in YOLO format

	Returns:
	file_path: Path to the saved annotation file
	"""
	if not layout_annotations_str:
	return None

	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
	with open(temp_file.name, "w") as f:
	f.write(layout_annotations_str)
	return temp_file.name

	# Custom CSS for styling
	custom_css = """
	.container { max-width: 1200px; margin: auto; }
	.button-primary { background-color: #4CAF50; color: white; }
	.button-secondary { background-color: #008CBA; color: white; }
	.gr-image { border: 2px solid #ddd; border-radius: 5px; }
	.gr-textbox { font-family: monospace; }
	"""

	# Create Gradio interface with enhanced styling
	with gr.Blocks(
	title="Latex2Layout Detection",
	theme=gr.themes.Default(),
	css=custom_css
	) as demo:
	# Header with instructions
	gr.Markdown(
	"""
	# Latex2Layout Layout Detection
	Upload an image to detect layout elements using the Latex2Layout model. View the annotated image and download the results in YOLO format.
	"""
	)

	# Main layout with two columns
	with gr.Row():
	# Input column
	with gr.Column(scale=1):
	input_image = gr.Image(
	label="Upload Image",
	type="numpy",
	height=400,
	elem_classes="gr-image"
	)
	detect_btn = gr.Button(
	"Start Detection",
	variant="primary",
	elem_classes="button-primary"
	)
	gr.Markdown("Tip: Upload a clear image for optimal detection results.")

	# Output column
	with gr.Column(scale=1):
	output_image = gr.Image(
	label="Detection Results",
	height=400,
	elem_classes="gr-image"
	)
	layout_annotations = gr.Textbox(
	label="Layout Annotations (YOLO Format)",
	lines=10,
	max_lines=15,
	elem_classes="gr-textbox"
	)
	download_btn = gr.Button(
	"Download Annotations",
	variant="secondary",
	elem_classes="button-secondary"
	)
	download_file = gr.File(
	label="Download File",
	interactive=False
	)

	# Example image button (optional)
	with gr.Row():
	gr.Button("Load Example Image").click(
	fn=lambda: cv2.imread("example_image.jpg"),
	outputs=input_image
	)

	# Event handlers
	detect_btn.click(
	fn=detect_and_visualize,
	inputs=input_image,
	outputs=[output_image, layout_annotations],
	_js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
	show_progress=True
	).then(
	fn=lambda: gr.update(value="Start Detection"),
	outputs=detect_btn,
	_js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
	)

	download_btn.click(
	fn=save_layout_annotations,
	inputs=layout_annotations,
	outputs=download_file
	)


	# Launch the application
	if __name__ == "__main__":
	demo.launch()