ChaseHan commited on
Commit
923f8ae
·
verified ·
1 Parent(s): 15de4c7
Files changed (1) hide show
  1. app.py +75 -97
app.py CHANGED
@@ -1,29 +1,30 @@
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
- import requests
 
5
  from ultralytics import YOLO
6
 
7
  # Load the Latex2Layout model
8
  model_path = "latex2layout_object_detection_yolov8.pt"
9
- latex2layout_model = YOLO(model_path)
10
 
11
- def detect_layout(image):
12
  """
13
- Perform layout detection on the uploaded image using the Latex2Layout model.
14
 
15
  Args:
16
- image: The uploaded image (numpy array)
17
 
18
  Returns:
19
- annotated_image: Image with detection boxes drawn
20
- layout_info: Text description of detected layout elements
21
  """
22
  if image is None:
23
  return None, "Error: No image uploaded."
24
 
25
- # Run detection
26
- results = latex2layout_model(image)
27
  result = results[0]
28
 
29
  # Create a copy of the image for visualization
@@ -33,108 +34,74 @@ def detect_layout(image):
33
  # Get image dimensions
34
  img_height, img_width = image.shape[:2]
35
 
36
- # Process detection results
37
  for box in result.boxes:
38
  x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
39
  conf = float(box.conf[0])
40
  cls_id = int(box.cls[0])
41
  cls_name = result.names[cls_id]
42
 
43
- # Draw bounding box and label on the image
44
  color = tuple(np.random.randint(0, 255, 3).tolist())
 
 
45
  cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
46
  label = f'{cls_name} {conf:.2f}'
47
  (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
48
  cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
49
  cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
50
 
51
- # Format layout info for Qwen2.5-VL
52
- layout_annotations.append(f"{cls_name} at position ({x1},{y1},{x2},{y2}) with confidence {conf:.2f}")
53
-
54
- layout_info = "Detected layout elements: " + "; ".join(layout_annotations) if layout_annotations else "No layout elements detected."
55
- return annotated_image, layout_info
56
-
57
- def call_qwen_vl_api(api_url, image, layout_info, question):
58
- """
59
- Call the Qwen2.5-VL API with the image, layout info, and user question.
60
-
61
- Args:
62
- api_url: The URL of the Qwen2.5-VL API
63
- image: The uploaded image (numpy array)
64
- layout_info: Text description of detected layout elements
65
- question: User's question about the image and layout
66
-
67
- Returns:
68
- answer: Response from the Qwen2.5-VL API
69
- """
70
- if not api_url:
71
- return "Error: Please provide a valid Qwen2.5-VL API URL."
72
- if not question:
73
- return "Error: Please enter a question."
74
 
75
- try:
76
- # Convert image to a format suitable for API (e.g., base64 or raw bytes might be needed; adjust per API spec)
77
- # Here, we assume the API accepts a URL or raw data; for simplicity, we use a placeholder
78
- payload = {
79
- "image": image.tolist(), # Adjust this based on API requirements (e.g., base64 encoding)
80
- "prompt": f"{layout_info}\n\nQuestion: {question}",
81
- }
82
- response = requests.post(api_url, json=payload, timeout=30)
83
- response.raise_for_status() # Raise an error for bad status codes
84
- return response.json().get("answer", "Error: No answer received from API.")
85
- except requests.exceptions.RequestException as e:
86
- return f"Error: API call failed - {str(e)}"
87
 
88
- def process_image_and_question(api_url, image, question):
89
  """
90
- Process the image with Latex2Layout and query Qwen2.5-VL API.
91
 
92
  Args:
93
- api_url: Qwen2.5-VL API URL
94
- image: Uploaded image
95
- question: User's question
96
 
97
  Returns:
98
- annotated_image: Image with detection boxes
99
- layout_info: Detected layout description
100
- answer: API response to the question
101
  """
102
- annotated_image, layout_info = detect_layout(image)
103
- if annotated_image is None:
104
- return None, layout_info, "Error: Detection failed."
105
 
106
- answer = call_qwen_vl_api(api_url, image, layout_info, question)
107
- return annotated_image, layout_info, answer
 
 
108
 
109
  # Custom CSS for styling
110
  custom_css = """
111
  .container { max-width: 1200px; margin: auto; }
112
  .button-primary { background-color: #4CAF50; color: white; }
 
113
  .gr-image { border: 2px solid #ddd; border-radius: 5px; }
114
  .gr-textbox { font-family: monospace; }
115
  """
116
 
117
- # Create Gradio interface
118
  with gr.Blocks(
119
- title="Latex2Layout Detection & QA",
120
  theme=gr.themes.Default(),
121
  css=custom_css
122
  ) as demo:
 
123
  gr.Markdown(
124
  """
125
- # Latex2Layout Layout Detection & Q&A
126
- Upload an image to detect layout elements using the **Latex2Layout** model, then ask questions about the layout and image content using the Qwen2.5-VL API.
127
  """
128
  )
129
 
130
- # API URL input
131
- api_url_input = gr.Textbox(
132
- label="Qwen2.5-VL API URL",
133
- placeholder="Enter the Qwen2.5-VL API URL here",
134
- value=""
135
- )
136
-
137
- # Main layout
138
  with gr.Row():
139
  # Input column
140
  with gr.Column(scale=1):
@@ -144,49 +111,60 @@ with gr.Blocks(
144
  height=400,
145
  elem_classes="gr-image"
146
  )
147
- question_input = gr.Textbox(
148
- label="Ask a Question",
149
- placeholder="e.g., What is the layout structure of this image?",
150
- lines=2
151
- )
152
- submit_btn = gr.Button(
153
- "Detect & Ask",
154
  variant="primary",
155
  elem_classes="button-primary"
156
  )
157
- gr.Markdown("**Tip**: Provide a clear image and specific question for best results.")
158
 
159
  # Output column
160
  with gr.Column(scale=1):
161
  output_image = gr.Image(
162
- label="Detected Layout",
163
  height=400,
164
  elem_classes="gr-image"
165
  )
166
- layout_output = gr.Textbox(
167
- label="Layout Information",
168
- lines=5,
169
- max_lines=10,
170
  elem_classes="gr-textbox"
171
  )
172
- answer_output = gr.Textbox(
173
- label="Answer",
174
- lines=5,
175
- max_lines=10,
176
- elem_classes="gr-textbox"
177
  )
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Event handler
180
- submit_btn.click(
181
- fn=process_image_and_question,
182
- inputs=[api_url_input, input_image, question_input],
183
- outputs=[output_image, layout_output, answer_output],
184
  _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
185
  show_progress=True
186
  ).then(
187
- fn=lambda: gr.update(value="Detect & Ask"),
188
- outputs=submit_btn,
189
- _js="() => { document.querySelector('.button-primary').innerText = 'Detect & Ask'; }"
 
 
 
 
 
 
190
  )
191
 
192
  # Launch the application
 
1
  import gradio as gr
2
  import cv2
3
  import numpy as np
4
+ import os
5
+ import tempfile
6
  from ultralytics import YOLO
7
 
8
  # Load the Latex2Layout model
9
  model_path = "latex2layout_object_detection_yolov8.pt"
10
+ model = YOLO(model_path)
11
 
12
+ def detect_and_visualize(image):
13
  """
14
+ Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
15
 
16
  Args:
17
+ image: The uploaded image
18
 
19
  Returns:
20
+ annotated_image: Image with detection boxes
21
+ layout_annotations: Annotations in YOLO format
22
  """
23
  if image is None:
24
  return None, "Error: No image uploaded."
25
 
26
+ # Run detection using the Latex2Layout model
27
+ results = model(image)
28
  result = results[0]
29
 
30
  # Create a copy of the image for visualization
 
34
  # Get image dimensions
35
  img_height, img_width = image.shape[:2]
36
 
37
+ # Draw detection results
38
  for box in result.boxes:
39
  x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
40
  conf = float(box.conf[0])
41
  cls_id = int(box.cls[0])
42
  cls_name = result.names[cls_id]
43
 
44
+ # Generate a color for each class
45
  color = tuple(np.random.randint(0, 255, 3).tolist())
46
+
47
+ # Draw bounding box and label
48
  cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
49
  label = f'{cls_name} {conf:.2f}'
50
  (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
51
  cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
52
  cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
53
 
54
+ # Convert to YOLO format (normalized)
55
+ x_center = (x1 + x2) / (2 * img_width)
56
+ y_center = (y1 + y2) / (2 * img_height)
57
+ width = (x2 - x1) / img_width
58
+ height = (y2 - y1) / img_height
59
+ layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ return annotated_image, "\n".join(layout_annotations)
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ def save_layout_annotations(layout_annotations_str):
64
  """
65
+ Save layout annotations to a temporary file and return the file path.
66
 
67
  Args:
68
+ layout_annotations_str: Annotations string in YOLO format
 
 
69
 
70
  Returns:
71
+ file_path: Path to the saved annotation file
 
 
72
  """
73
+ if not layout_annotations_str:
74
+ return None
 
75
 
76
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
77
+ with open(temp_file.name, "w") as f:
78
+ f.write(layout_annotations_str)
79
+ return temp_file.name
80
 
81
  # Custom CSS for styling
82
  custom_css = """
83
  .container { max-width: 1200px; margin: auto; }
84
  .button-primary { background-color: #4CAF50; color: white; }
85
+ .button-secondary { background-color: #008CBA; color: white; }
86
  .gr-image { border: 2px solid #ddd; border-radius: 5px; }
87
  .gr-textbox { font-family: monospace; }
88
  """
89
 
90
+ # Create Gradio interface with enhanced styling
91
  with gr.Blocks(
92
+ title="Latex2Layout Detection",
93
  theme=gr.themes.Default(),
94
  css=custom_css
95
  ) as demo:
96
+ # Header with instructions
97
  gr.Markdown(
98
  """
99
+ # Latex2Layout Layout Detection
100
+ Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
101
  """
102
  )
103
 
104
+ # Main layout with two columns
 
 
 
 
 
 
 
105
  with gr.Row():
106
  # Input column
107
  with gr.Column(scale=1):
 
111
  height=400,
112
  elem_classes="gr-image"
113
  )
114
+ detect_btn = gr.Button(
115
+ "Start Detection",
 
 
 
 
 
116
  variant="primary",
117
  elem_classes="button-primary"
118
  )
119
+ gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
120
 
121
  # Output column
122
  with gr.Column(scale=1):
123
  output_image = gr.Image(
124
+ label="Detection Results",
125
  height=400,
126
  elem_classes="gr-image"
127
  )
128
+ layout_annotations = gr.Textbox(
129
+ label="Layout Annotations (YOLO Format)",
130
+ lines=10,
131
+ max_lines=15,
132
  elem_classes="gr-textbox"
133
  )
134
+ download_btn = gr.Button(
135
+ "Download Annotations",
136
+ variant="secondary",
137
+ elem_classes="button-secondary"
 
138
  )
139
+ download_file = gr.File(
140
+ label="Download File",
141
+ interactive=False
142
+ )
143
+
144
+ # Example image button (optional)
145
+ with gr.Row():
146
+ gr.Button("Load Example Image").click(
147
+ fn=lambda: cv2.imread("example_image.jpg"),
148
+ outputs=input_image
149
+ )
150
 
151
+ # Event handlers
152
+ detect_btn.click(
153
+ fn=detect_and_visualize,
154
+ inputs=input_image,
155
+ outputs=[output_image, layout_annotations],
156
  _js="() => { document.querySelector('.button-primary').innerText = 'Processing...'; }",
157
  show_progress=True
158
  ).then(
159
+ fn=lambda: gr.update(value="Start Detection"),
160
+ outputs=detect_btn,
161
+ _js="() => { document.querySelector('.button-primary').innerText = 'Start Detection'; }"
162
+ )
163
+
164
+ download_btn.click(
165
+ fn=save_layout_annotations,
166
+ inputs=layout_annotations,
167
+ outputs=download_file
168
  )
169
 
170
  # Launch the application