Sompote commited on
Commit
56ec8b1
Β·
verified Β·
1 Parent(s): 7377508

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +411 -0
app.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import base64
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ import io
7
+
8
+ def process_with_openrouter(image, prompt, api_key, model="google/gemini-2.5-pro", temperature=0.5):
9
+ """Process image with OpenRouter API for object detection"""
10
+ if not api_key:
11
+ return "Please enter your OpenRouter API key", "error"
12
+
13
+ if image is None:
14
+ return "Please upload an image", "error"
15
+
16
+ try:
17
+ buffered = io.BytesIO()
18
+ image.save(buffered, format="PNG")
19
+ img_base64 = base64.b64encode(buffered.getvalue()).decode()
20
+
21
+ headers = {
22
+ "Authorization": f"Bearer {api_key}",
23
+ "Content-Type": "application/json"
24
+ }
25
+
26
+ data = {
27
+ "model": model,
28
+ "messages": [
29
+ {
30
+ "role": "user",
31
+ "content": [
32
+ {"type": "text", "text": prompt},
33
+ {
34
+ "type": "image_url",
35
+ "image_url": {"url": f"data:image/png;base64,{img_base64}"}
36
+ }
37
+ ]
38
+ }
39
+ ],
40
+ "temperature": temperature
41
+ }
42
+
43
+ response = requests.post(
44
+ "https://openrouter.ai/api/v1/chat/completions",
45
+ headers=headers,
46
+ json=data,
47
+ timeout=60
48
+ )
49
+
50
+ if response.status_code == 200:
51
+ result = response.json()
52
+ content = result['choices'][0]['message']['content']
53
+
54
+ if '```json' in content:
55
+ content = content.split('```json')[1].split('```')[0].strip()
56
+ elif '```' in content:
57
+ content = content.split('```')[1].split('```')[0].strip()
58
+
59
+ return content, None
60
+ else:
61
+ return f"Error: {response.status_code} - {response.text}", "error"
62
+
63
+ except Exception as e:
64
+ return f"Error processing request: {str(e)}", "error"
65
+
66
+ def draw_bounding_boxes(image, detections):
67
+ """Draw bounding boxes with detailed labels on the image"""
68
+ if not detections or len(detections) == 0:
69
+ return image
70
+
71
+ annotated_image = image.copy()
72
+ draw = ImageDraw.Draw(annotated_image)
73
+
74
+ try:
75
+ font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14)
76
+ small_font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 12)
77
+ except:
78
+ font = ImageFont.load_default()
79
+ small_font = ImageFont.load_default()
80
+
81
+ colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#FFA500", "#800080"]
82
+
83
+ for i, detection in enumerate(detections):
84
+ if all(key in detection for key in ['x', 'y', 'width', 'height']):
85
+ x = detection['x'] * image.width
86
+ y = detection['y'] * image.height
87
+ width = detection['width'] * image.width
88
+ height = detection['height'] * image.height
89
+
90
+ # Get detection information
91
+ label = detection.get('label', f'Detection {i+1}')
92
+ class_name = detection.get('class', 'unknown')
93
+ details = detection.get('details', '')
94
+ criteria_match = detection.get('criteria_match', '')
95
+ confidence = detection.get('confidence', 1.0)
96
+
97
+ x1, y1 = int(x), int(y)
98
+ x2, y2 = int(x + width), int(y + height)
99
+
100
+ x1 = max(0, min(x1, image.width))
101
+ y1 = max(0, min(y1, image.height))
102
+ x2 = max(0, min(x2, image.width))
103
+ y2 = max(0, min(y2, image.height))
104
+
105
+ color = colors[i % len(colors)]
106
+
107
+ # Draw bounding box with thicker line for better visibility
108
+ draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
109
+
110
+ # Create multi-line label with detailed information
111
+ display_lines = []
112
+ display_lines.append(f"{class_name} ({confidence:.2f})")
113
+
114
+ if details:
115
+ # Truncate details if too long
116
+ details_short = details[:40] + "..." if len(details) > 40 else details
117
+ display_lines.append(details_short)
118
+
119
+ if criteria_match:
120
+ display_lines.append(f"Criteria: {criteria_match}")
121
+
122
+ # Calculate total label size
123
+ max_width = 0
124
+ total_height = 0
125
+ line_heights = []
126
+
127
+ for line in display_lines:
128
+ text_bbox = draw.textbbox((0, 0), line, font=small_font)
129
+ line_width = text_bbox[2] - text_bbox[0]
130
+ line_height = text_bbox[3] - text_bbox[1]
131
+ max_width = max(max_width, line_width)
132
+ total_height += line_height + 2
133
+ line_heights.append(line_height)
134
+
135
+ # Position label above the box, or below if no space above
136
+ if y1 - total_height - 4 >= 0:
137
+ label_y = y1 - total_height - 4
138
+ else:
139
+ label_y = y2 + 2
140
+
141
+ label_x = x1
142
+
143
+ # Ensure label stays within image bounds
144
+ if label_x + max_width > image.width:
145
+ label_x = image.width - max_width - 4
146
+
147
+ # Draw label background
148
+ draw.rectangle(
149
+ [label_x - 2, label_y, label_x + max_width + 4, label_y + total_height + 2],
150
+ fill=color,
151
+ outline=color
152
+ )
153
+
154
+ # Draw each line of text
155
+ current_y = label_y + 2
156
+ for j, line in enumerate(display_lines):
157
+ draw.text((label_x + 2, current_y), line, fill="white", font=small_font)
158
+ current_y += line_heights[j] + 2
159
+
160
+ return annotated_image
161
+
162
+ def create_detection_prompt(detailed_classes, confidence_threshold=0.5, detection_mode="specific"):
163
+ """Create a detection prompt for detailed class specifications with different modes"""
164
+ if isinstance(detailed_classes, str):
165
+ detailed_classes = [cls.strip() for cls in detailed_classes.split('\n') if cls.strip()]
166
+
167
+ # Build detailed detection instructions
168
+ if detection_mode == "specific":
169
+ condition_text = "ONLY detect objects that match these specific detailed criteria. Ignore all other objects:"
170
+ elif detection_mode == "include":
171
+ condition_text = "Detect objects matching these detailed criteria AND any other objects you can identify:"
172
+ else: # "exclude"
173
+ condition_text = "Detect all objects EXCEPT those matching these detailed criteria. Avoid detecting:"
174
+
175
+ # Format each detailed class specification
176
+ detailed_specs = []
177
+ for i, spec in enumerate(detailed_classes, 1):
178
+ detailed_specs.append(f"{i}. {spec}")
179
+
180
+ classes_text = "\n".join(detailed_specs) if detailed_specs else "No specific criteria provided"
181
+
182
+ prompt = f"""{condition_text}
183
+
184
+ {classes_text}
185
+
186
+ Detection Instructions:
187
+ - Carefully analyze each object against the detailed specifications above
188
+ - Only include detections with confidence above {confidence_threshold}
189
+ - For each detection, provide specific measurements, characteristics, or details when possible
190
+ - Be precise about the criteria matching (e.g., actual crack width, size measurements, specific conditions)
191
+
192
+ Output a JSON list where each entry contains:
193
+ - "x": normalized x coordinate (0-1) of top-left corner
194
+ - "y": normalized y coordinate (0-1) of top-left corner
195
+ - "width": normalized width (0-1) of the bounding box
196
+ - "height": normalized height (0-1) of the bounding box
197
+ - "label": detailed description with measurements/characteristics and confidence score
198
+ - "confidence": confidence score (0-1)
199
+ - "class": the general category name
200
+ - "details": specific measurements, characteristics, or conditions observed
201
+ - "criteria_match": which detailed criteria this detection matches (reference number from list above)
202
+
203
+ Example format for crack detection:
204
+ [{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "crack width ~3mm, length ~15cm (0.92)", "confidence": 0.92, "class": "crack", "details": "width: 3mm, length: 15cm, surface: concrete", "criteria_match": 1}}]"""
205
+
206
+ return prompt
207
+
208
+ def create_interface():
209
+ """Create the Gradio interface for object detection"""
210
+ with gr.Blocks(title="Detailed Object Detection", theme=gr.themes.Soft()) as demo:
211
+ gr.Markdown("# πŸ” Detailed Object Detection with Custom Specifications")
212
+ gr.Markdown("Detect objects with detailed specifications (e.g., 'crack width more than 2mm', 'rust spots larger than 5cm')")
213
+
214
+ with gr.Row():
215
+ with gr.Column(scale=1):
216
+ gr.Markdown("## βš™οΈ Configuration")
217
+ api_key = gr.Textbox(
218
+ label="OpenRouter API Key",
219
+ placeholder="Enter your OpenRouter API key...",
220
+ type="password"
221
+ )
222
+
223
+ model = gr.Dropdown(
224
+ choices=[
225
+ "google/gemini-2.5-pro",
226
+ "google/gemini-1.5-pro",
227
+ "google/gemini-1.5-flash",
228
+ "anthropic/claude-3.5-sonnet",
229
+ "openai/gpt-4o",
230
+ "openai/gpt-4o-mini"
231
+ ],
232
+ value="google/gemini-2.5-pro",
233
+ label="Detection Model"
234
+ )
235
+
236
+ detection_mode = gr.Radio(
237
+ choices=[
238
+ ("Detect Only These Specifications", "specific"),
239
+ ("Include These + Others", "include"),
240
+ ("Exclude These Specifications", "exclude")
241
+ ],
242
+ value="specific",
243
+ label="Detection Mode",
244
+ info="How to handle the specified detailed criteria"
245
+ )
246
+
247
+ detailed_specifications = gr.Textbox(
248
+ label="Detailed Detection Specifications",
249
+ placeholder="""Enter each specification on a new line, e.g.:
250
+ crack width more than 2mm
251
+ rust spots larger than 5cm in diameter
252
+ concrete spalling deeper than 1cm
253
+ structural damage with visible deformation
254
+ paint peeling areas greater than 10cmΒ²""",
255
+ value="""crack width more than 2mm
256
+ rust spots larger than 5cm in diameter
257
+ concrete spalling deeper than 1cm""",
258
+ lines=8,
259
+ info="Enter detailed specifications, one per line"
260
+ )
261
+
262
+ confidence_threshold = gr.Slider(
263
+ minimum=0.1,
264
+ maximum=1.0,
265
+ value=0.5,
266
+ step=0.05,
267
+ label="Confidence Threshold",
268
+ info="Minimum confidence for detection"
269
+ )
270
+
271
+ temperature = gr.Slider(
272
+ minimum=0,
273
+ maximum=1,
274
+ value=0.3,
275
+ step=0.05,
276
+ label="Temperature",
277
+ info="Lower values for more consistent results"
278
+ )
279
+
280
+ image_input = gr.Image(
281
+ type="pil",
282
+ label="Upload Image for Detection"
283
+ )
284
+
285
+ detect_btn = gr.Button("πŸš€ Detect Objects", variant="primary", size="lg")
286
+
287
+ with gr.Column(scale=1):
288
+ gr.Markdown("## πŸ“Š Detection Results")
289
+
290
+ annotated_image = gr.Image(
291
+ label="Detected Objects",
292
+ type="pil"
293
+ )
294
+
295
+ detection_results = gr.Textbox(
296
+ label="Detection Details (JSON)",
297
+ lines=10,
298
+ show_copy_button=True
299
+ )
300
+
301
+ detection_summary = gr.Textbox(
302
+ label="Detection Summary",
303
+ lines=3
304
+ )
305
+
306
+ def process_detection(image, detailed_specs, conf_threshold, api_key_val, model_val, temp_val, mode_val):
307
+ if not api_key_val:
308
+ return None, "❌ Please enter your OpenRouter API key", "No API key provided"
309
+
310
+ if image is None:
311
+ return None, "❌ Please upload an image", "No image uploaded"
312
+
313
+ if not detailed_specs or not detailed_specs.strip():
314
+ return None, "❌ Please enter at least one detailed specification", "No specifications provided"
315
+
316
+ try:
317
+ prompt = create_detection_prompt(detailed_specs, conf_threshold, mode_val)
318
+
319
+ result, error = process_with_openrouter(image, prompt, api_key_val, model_val, temp_val)
320
+
321
+ if error:
322
+ return None, f"❌ Error: {result}", "Detection failed"
323
+
324
+ detections = json.loads(result)
325
+
326
+ if isinstance(detections, list) and len(detections) > 0:
327
+ annotated_img = draw_bounding_boxes(image, detections)
328
+
329
+ filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold]
330
+
331
+ mode_descriptions = {
332
+ "specific": "Detecting only objects matching detailed specifications",
333
+ "include": "Including specified detailed criteria + other objects",
334
+ "exclude": "Excluding objects matching detailed specifications"
335
+ }
336
+
337
+ summary_text = f"βœ… {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects"
338
+
339
+ if filtered_detections:
340
+ # Group by class and show details
341
+ class_details = {}
342
+ for det in filtered_detections:
343
+ class_name = det.get('class', 'unknown')
344
+ details = det.get('details', '')
345
+ criteria_match = det.get('criteria_match', '')
346
+
347
+ if class_name not in class_details:
348
+ class_details[class_name] = []
349
+
350
+ class_details[class_name].append({
351
+ 'details': details,
352
+ 'criteria': criteria_match,
353
+ 'confidence': det.get('confidence', 1.0)
354
+ })
355
+
356
+ summary_text += "\n\nDetailed Results:"
357
+ for class_name, items in class_details.items():
358
+ summary_text += f"\nβ€’ {class_name} ({len(items)} found):"
359
+ for item in items[:3]: # Show first 3 items
360
+ summary_text += f"\n - {item['details']} (conf: {item['confidence']:.2f})"
361
+ if item['criteria']:
362
+ summary_text += f" [criteria: {item['criteria']}]"
363
+ if len(items) > 3:
364
+ summary_text += f"\n ... and {len(items)-3} more"
365
+
366
+ return annotated_img, json.dumps(filtered_detections, indent=2), summary_text
367
+ else:
368
+ return image, "No objects detected matching detailed specifications", "No detections matching criteria above confidence threshold"
369
+
370
+ except json.JSONDecodeError:
371
+ return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed"
372
+ except Exception as e:
373
+ return None, f"❌ Error: {str(e)}", "Processing error"
374
+
375
+ detect_btn.click(
376
+ process_detection,
377
+ inputs=[image_input, detailed_specifications, confidence_threshold, api_key, model, temperature, detection_mode],
378
+ outputs=[annotated_image, detection_results, detection_summary]
379
+ )
380
+
381
+ gr.Markdown("""
382
+ ## πŸ’‘ Usage Tips
383
+ - **Specific Mode**: Only detect objects matching your detailed specifications
384
+ - **Include Mode**: Detect your specified criteria plus any other objects found
385
+ - **Exclude Mode**: Detect everything except objects matching your specifications
386
+
387
+ ### Example Detailed Specifications:
388
+ ```
389
+ crack width more than 2mm
390
+ rust spots larger than 5cm in diameter
391
+ concrete spalling deeper than 1cm
392
+ structural damage with visible deformation
393
+ paint peeling areas greater than 10cmΒ²
394
+ corrosion affecting more than 20% of surface area
395
+ missing bolts or fasteners
396
+ water damage stains larger than 15cm
397
+ ```
398
+
399
+ - Enter one detailed specification per line
400
+ - Be specific about measurements, sizes, conditions
401
+ - Adjust confidence threshold to filter weak detections
402
+ - Use lower temperature values for consistent results
403
+ - Get your API key from [openrouter.ai](https://openrouter.ai/)
404
+ """)
405
+
406
+ return demo
407
+
408
+ if __name__ == "__main__":
409
+ print("πŸš€ Starting Object Detection App...")
410
+ demo = create_interface()
411
+ demo.launch(share=False, inbrowser=True)