Sompote commited on
Commit
b0e0067
·
verified ·
1 Parent(s): 440751c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -136
app.py CHANGED
@@ -5,7 +5,7 @@ import base64
5
  from PIL import Image, ImageDraw, ImageFont
6
  import io
7
 
8
- def process_with_openrouter(image, prompt, api_key, model="openai/gpt-5-chat", temperature=0.5):
9
  """Process image with OpenRouter API for object detection"""
10
  if not api_key:
11
  return "Please enter your OpenRouter API key", "error"
@@ -64,7 +64,7 @@ def process_with_openrouter(image, prompt, api_key, model="openai/gpt-5-chat", t
64
  return f"Error processing request: {str(e)}", "error"
65
 
66
  def draw_bounding_boxes(image, detections):
67
- """Draw bounding boxes with detailed labels on the image"""
68
  if not detections or len(detections) == 0:
69
  return image
70
 
@@ -72,13 +72,26 @@ def draw_bounding_boxes(image, detections):
72
  draw = ImageDraw.Draw(annotated_image)
73
 
74
  try:
75
- font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14)
76
- small_font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 12)
77
  except:
78
  font = ImageFont.load_default()
79
- small_font = ImageFont.load_default()
80
 
81
- colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#FFA500", "#800080"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  for i, detection in enumerate(detections):
84
  if all(key in detection for key in ['x', 'y', 'width', 'height']):
@@ -87,12 +100,8 @@ def draw_bounding_boxes(image, detections):
87
  width = detection['width'] * image.width
88
  height = detection['height'] * image.height
89
 
90
- # Get detection information
91
- label = detection.get('label', f'Detection {i+1}')
92
- class_name = detection.get('class', 'unknown')
93
- details = detection.get('details', '')
94
- criteria_match = detection.get('criteria_match', '')
95
- confidence = detection.get('confidence', 1.0)
96
 
97
  x1, y1 = int(x), int(y)
98
  x2, y2 = int(x + width), int(y + height)
@@ -102,114 +111,114 @@ def draw_bounding_boxes(image, detections):
102
  x2 = max(0, min(x2, image.width))
103
  y2 = max(0, min(y2, image.height))
104
 
105
- color = colors[i % len(colors)]
 
 
 
 
 
 
106
 
107
- # Draw bounding box with thicker line for better visibility
108
  draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
109
 
110
- # Create multi-line label with detailed information
111
- display_lines = []
112
- display_lines.append(f"{class_name} ({confidence:.2f})")
113
-
114
- if details:
115
- # Truncate details if too long
116
- details_short = details[:40] + "..." if len(details) > 40 else details
117
- display_lines.append(details_short)
118
-
119
- if criteria_match:
120
- display_lines.append(f"Criteria: {criteria_match}")
121
-
122
- # Calculate total label size
123
- max_width = 0
124
- total_height = 0
125
- line_heights = []
126
-
127
- for line in display_lines:
128
- text_bbox = draw.textbbox((0, 0), line, font=small_font)
129
- line_width = text_bbox[2] - text_bbox[0]
130
- line_height = text_bbox[3] - text_bbox[1]
131
- max_width = max(max_width, line_width)
132
- total_height += line_height + 2
133
- line_heights.append(line_height)
134
 
135
  # Position label above the box, or below if no space above
136
- if y1 - total_height - 4 >= 0:
137
- label_y = y1 - total_height - 4
138
  else:
139
- label_y = y2 + 2
140
 
141
  label_x = x1
142
 
143
  # Ensure label stays within image bounds
144
- if label_x + max_width > image.width:
145
- label_x = image.width - max_width - 4
146
 
147
  # Draw label background
148
  draw.rectangle(
149
- [label_x - 2, label_y, label_x + max_width + 4, label_y + total_height + 2],
150
  fill=color,
151
  outline=color
152
  )
153
 
154
- # Draw each line of text
155
- current_y = label_y + 2
156
- for j, line in enumerate(display_lines):
157
- draw.text((label_x + 2, current_y), line, fill="white", font=small_font)
158
- current_y += line_heights[j] + 2
159
 
160
  return annotated_image
161
 
162
- def create_detection_prompt(detailed_classes, confidence_threshold=0.5, detection_mode="specific"):
163
- """Create a detection prompt for detailed class specifications with different modes"""
164
- if isinstance(detailed_classes, str):
165
- detailed_classes = [cls.strip() for cls in detailed_classes.split('\n') if cls.strip()]
166
 
167
- # Build detailed detection instructions
168
  if detection_mode == "specific":
169
- condition_text = "ONLY detect objects that match these specific detailed criteria. Ignore all other objects:"
170
  elif detection_mode == "include":
171
- condition_text = "Detect objects matching these detailed criteria AND any other objects you can identify:"
172
  else: # "exclude"
173
- condition_text = "Detect all objects EXCEPT those matching these detailed criteria. Avoid detecting:"
174
 
175
- # Format each detailed class specification
176
- detailed_specs = []
177
- for i, spec in enumerate(detailed_classes, 1):
178
- detailed_specs.append(f"{i}. {spec}")
 
 
 
 
 
 
 
179
 
180
- classes_text = "\n".join(detailed_specs) if detailed_specs else "No specific criteria provided"
181
 
182
  prompt = f"""{condition_text}
183
 
184
  {classes_text}
185
 
186
  Detection Instructions:
187
- - Carefully analyze each object against the detailed specifications above
 
188
  - Only include detections with confidence above {confidence_threshold}
189
- - For each detection, provide specific measurements, characteristics, or details when possible
190
- - Be precise about the criteria matching (e.g., actual crack width, size measurements, specific conditions)
 
 
 
 
 
 
 
191
 
192
  Output a JSON list where each entry contains:
193
  - "x": normalized x coordinate (0-1) of top-left corner
194
  - "y": normalized y coordinate (0-1) of top-left corner
195
  - "width": normalized width (0-1) of the bounding box
196
  - "height": normalized height (0-1) of the bounding box
197
- - "label": detailed description with measurements/characteristics and confidence score
198
  - "confidence": confidence score (0-1)
199
- - "class": the general category name
200
- - "details": specific measurements, characteristics, or conditions observed
201
- - "criteria_match": which detailed criteria this detection matches (reference number from list above)
 
 
202
 
203
- Example format for crack detection:
204
- [{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "crack width ~3mm, length ~15cm (0.92)", "confidence": 0.92, "class": "crack", "details": "width: 3mm, length: 15cm, surface: concrete", "criteria_match": 1}}]"""
205
 
206
  return prompt
207
 
208
  def create_interface():
209
  """Create the Gradio interface for object detection"""
210
- with gr.Blocks(title="Detailed Object Detection", theme=gr.themes.Soft()) as demo:
211
- gr.Markdown("# 🔍 Detailed Object Detection with Custom Specifications")
212
- gr.Markdown("Detect objects with detailed specifications (e.g., 'crack width more than 2mm', 'rust spots larger than 5cm')")
213
 
214
  with gr.Row():
215
  with gr.Column(scale=1):
@@ -230,6 +239,8 @@ def create_interface():
230
 
231
  model_preset = gr.Dropdown(
232
  choices=[
 
 
233
  "openai/gpt-5-chat",
234
  "openai/gpt-5-mini",
235
  "anthropic/claude-opus-4.1",
@@ -241,7 +252,7 @@ def create_interface():
241
  "openai/gpt-4o",
242
  "openai/gpt-4o-mini"
243
  ],
244
- value="openai/gpt-5-chat",
245
  label="Preset Models",
246
  info="Select from popular OpenRouter models",
247
  visible=True
@@ -256,28 +267,28 @@ def create_interface():
256
 
257
  detection_mode = gr.Radio(
258
  choices=[
259
- ("Detect Only These Specifications", "specific"),
260
- ("Include These + Others", "include"),
261
- ("Exclude These Specifications", "exclude")
262
  ],
263
  value="specific",
264
  label="Detection Mode",
265
- info="How to handle the specified detailed criteria"
266
  )
267
 
268
- detailed_specifications = gr.Textbox(
269
- label="Detailed Detection Specifications",
270
- placeholder="""Enter each specification on a new line, e.g.:
271
- crack width more than 2mm
272
- rust spots larger than 5cm in diameter
273
- concrete spalling deeper than 1cm
274
- structural damage with visible deformation
275
- paint peeling areas greater than 10cm²""",
276
- value="""crack width more than 2mm
277
- rust spots larger than 5cm in diameter
278
- concrete spalling deeper than 1cm""",
279
  lines=8,
280
- info="Enter detailed specifications, one per line"
281
  )
282
 
283
  confidence_threshold = gr.Slider(
@@ -337,15 +348,15 @@ concrete spalling deeper than 1cm""",
337
  outputs=[model_preset, custom_model_input]
338
  )
339
 
340
- def process_detection(image, detailed_specs, conf_threshold, api_key_val, use_preset_val, model_preset_val, custom_model_val, temp_val, mode_val):
341
  if not api_key_val:
342
  return None, "❌ Please enter your OpenRouter API key", "No API key provided"
343
 
344
  if image is None:
345
  return None, "❌ Please upload an image", "No image uploaded"
346
 
347
- if not detailed_specs or not detailed_specs.strip():
348
- return None, "❌ Please enter at least one detailed specification", "No specifications provided"
349
 
350
  # Determine which model to use
351
  if use_preset_val == "Custom Model":
@@ -356,7 +367,7 @@ concrete spalling deeper than 1cm""",
356
  final_model = model_preset_val
357
 
358
  try:
359
- prompt = create_detection_prompt(detailed_specs, conf_threshold, mode_val)
360
 
361
  result, error = process_with_openrouter(image, prompt, api_key_val, final_model, temp_val)
362
 
@@ -371,43 +382,41 @@ concrete spalling deeper than 1cm""",
371
  filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold]
372
 
373
  mode_descriptions = {
374
- "specific": "Detecting only objects matching detailed specifications",
375
- "include": "Including specified detailed criteria + other objects",
376
- "exclude": "Excluding objects matching detailed specifications"
377
  }
378
 
379
  summary_text = f"✅ {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects\n🤖 Model: {final_model}"
380
 
381
  if filtered_detections:
382
- # Group by class and show details
383
- class_details = {}
384
  for det in filtered_detections:
385
  class_name = det.get('class', 'unknown')
386
- details = det.get('details', '')
387
- criteria_match = det.get('criteria_match', '')
388
 
389
- if class_name not in class_details:
390
- class_details[class_name] = []
 
 
 
 
391
 
392
- class_details[class_name].append({
393
- 'details': details,
394
- 'criteria': criteria_match,
395
- 'confidence': det.get('confidence', 1.0)
396
- })
397
 
398
- summary_text += "\n\nDetailed Results:"
399
- for class_name, items in class_details.items():
400
- summary_text += f"\n• {class_name} ({len(items)} found):"
401
- for item in items[:3]: # Show first 3 items
402
- summary_text += f"\n - {item['details']} (conf: {item['confidence']:.2f})"
403
- if item['criteria']:
404
- summary_text += f" [criteria: {item['criteria']}]"
405
- if len(items) > 3:
406
- summary_text += f"\n ... and {len(items)-3} more"
407
 
408
  return annotated_img, json.dumps(filtered_detections, indent=2), summary_text
409
  else:
410
- return image, "No objects detected matching detailed specifications", "No detections matching criteria above confidence threshold"
411
 
412
  except json.JSONDecodeError:
413
  return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed"
@@ -416,41 +425,57 @@ concrete spalling deeper than 1cm""",
416
 
417
  detect_btn.click(
418
  process_detection,
419
- inputs=[image_input, detailed_specifications, confidence_threshold, api_key, use_preset, model_preset, custom_model_input, temperature, detection_mode],
420
  outputs=[annotated_image, detection_results, detection_summary]
421
  )
422
 
423
  gr.Markdown("""
424
  ## 💡 Usage Tips
425
- - **Specific Mode**: Only detect objects matching your detailed specifications
426
- - **Include Mode**: Detect your specified criteria plus any other objects found
427
- - **Exclude Mode**: Detect everything except objects matching your specifications
 
 
 
 
 
 
 
 
 
 
428
 
429
  ### 🤖 Model Selection
430
  **Default Models (Recommended):**
431
- - `openai/gpt-5-chat` - Latest GPT-5 with advanced vision capabilities (Default)
 
 
432
  - `openai/gpt-5-mini` - Faster, efficient GPT-5 variant
433
  - `anthropic/claude-opus-4.1` - Next-gen Claude with superior reasoning
434
  - `x-ai/grok-4` - Advanced Grok model with detailed analysis
435
 
436
  **Custom Models**: Enter any OpenRouter model ID from [openrouter.ai/models](https://openrouter.ai/models)
437
 
438
- ### Example Detailed Specifications:
439
  ```
440
- crack width more than 2mm
441
- rust spots larger than 5cm in diameter
442
- concrete spalling deeper than 1cm
443
- structural damage with visible deformation
444
- paint peeling areas greater than 10cm²
445
- corrosion affecting more than 20% of surface area
446
- missing bolts or fasteners
447
- water damage stains larger than 15cm
448
  ```
449
 
450
- - Enter one detailed specification per line
451
- - Be specific about measurements, sizes, conditions
 
 
 
 
 
 
 
452
  - Adjust confidence threshold to filter weak detections
453
- - Use lower temperature values for consistent results
454
  - Get your API key from [openrouter.ai](https://openrouter.ai/)
455
  """)
456
 
 
5
  from PIL import Image, ImageDraw, ImageFont
6
  import io
7
 
8
+ def process_with_openrouter(image, prompt, api_key, model="qwen/qwen2.5-vl-32b-instruct", temperature=0.5):
9
  """Process image with OpenRouter API for object detection"""
10
  if not api_key:
11
  return "Please enter your OpenRouter API key", "error"
 
64
  return f"Error processing request: {str(e)}", "error"
65
 
66
  def draw_bounding_boxes(image, detections):
67
+ """Draw bounding boxes with class names only, same color per class"""
68
  if not detections or len(detections) == 0:
69
  return image
70
 
 
72
  draw = ImageDraw.Draw(annotated_image)
73
 
74
  try:
75
+ font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 16)
 
76
  except:
77
  font = ImageFont.load_default()
 
78
 
79
+ # Predefined colors for different classes
80
+ class_colors = {
81
+ "Class I": "#FF0000", # Red
82
+ "Class II": "#00FF00", # Green
83
+ "Class III": "#0000FF", # Blue
84
+ "Class IV": "#FFFF00", # Yellow
85
+ "Class V": "#FF00FF", # Magenta
86
+ "Class VI": "#00FFFF", # Cyan
87
+ "Class VII": "#FFA500", # Orange
88
+ "Class VIII": "#800080", # Purple
89
+ "Class IX": "#008000", # Dark Green
90
+ "Class X": "#FF1493", # Deep Pink
91
+ }
92
+
93
+ # Fallback colors if more than 10 classes
94
+ fallback_colors = ["#8B4513", "#2F4F4F", "#DC143C", "#00CED1", "#FF4500", "#DA70D6", "#32CD32", "#FF6347"]
95
 
96
  for i, detection in enumerate(detections):
97
  if all(key in detection for key in ['x', 'y', 'width', 'height']):
 
100
  width = detection['width'] * image.width
101
  height = detection['height'] * image.height
102
 
103
+ # Get class name - this is what we'll display
104
+ class_name = detection.get('class', f'Class {i+1}')
 
 
 
 
105
 
106
  x1, y1 = int(x), int(y)
107
  x2, y2 = int(x + width), int(y + height)
 
111
  x2 = max(0, min(x2, image.width))
112
  y2 = max(0, min(y2, image.height))
113
 
114
+ # Get consistent color for this class
115
+ if class_name in class_colors:
116
+ color = class_colors[class_name]
117
+ else:
118
+ # Use hash of class name to get consistent color
119
+ color_index = hash(class_name) % len(fallback_colors)
120
+ color = fallback_colors[color_index]
121
 
122
+ # Draw bounding box
123
  draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
124
 
125
+ # Calculate label size
126
+ text_bbox = draw.textbbox((0, 0), class_name, font=font)
127
+ text_width = text_bbox[2] - text_bbox[0]
128
+ text_height = text_bbox[3] - text_bbox[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # Position label above the box, or below if no space above
131
+ if y1 - text_height - 6 >= 0:
132
+ label_y = y1 - text_height - 6
133
  else:
134
+ label_y = y2 + 4
135
 
136
  label_x = x1
137
 
138
  # Ensure label stays within image bounds
139
+ if label_x + text_width + 4 > image.width:
140
+ label_x = image.width - text_width - 4
141
 
142
  # Draw label background
143
  draw.rectangle(
144
+ [label_x - 2, label_y - 2, label_x + text_width + 2, label_y + text_height + 2],
145
  fill=color,
146
  outline=color
147
  )
148
 
149
+ # Draw class name
150
+ draw.text((label_x, label_y), class_name, fill="white", font=font)
 
 
 
151
 
152
  return annotated_image
153
 
154
+ def create_detection_prompt(class_descriptions, confidence_threshold=0.5, detection_mode="specific"):
155
+ """Create a detection prompt for class descriptions with condition checking"""
156
+ if isinstance(class_descriptions, str):
157
+ class_descriptions = [cls.strip() for cls in class_descriptions.split('\n') if cls.strip()]
158
 
159
+ # Build detection instructions
160
  if detection_mode == "specific":
161
+ condition_text = "ONLY detect objects that match these class descriptions and their conditions. Ignore all other objects:"
162
  elif detection_mode == "include":
163
+ condition_text = "Detect objects matching these class descriptions AND any other objects you can identify:"
164
  else: # "exclude"
165
+ condition_text = "Detect all objects EXCEPT those matching these class descriptions. Avoid detecting:"
166
 
167
+ # Format each class description
168
+ class_specs = []
169
+ for i, description in enumerate(class_descriptions, 1):
170
+ # Parse class name and description if formatted as "Class Name: description"
171
+ if ':' in description:
172
+ class_name, class_desc = description.split(':', 1)
173
+ class_name = class_name.strip()
174
+ class_desc = class_desc.strip()
175
+ class_specs.append(f"Class {i} ({class_name}): {class_desc}")
176
+ else:
177
+ class_specs.append(f"Class {i}: {description}")
178
 
179
+ classes_text = "\n".join(class_specs) if class_specs else "No class descriptions provided"
180
 
181
  prompt = f"""{condition_text}
182
 
183
  {classes_text}
184
 
185
  Detection Instructions:
186
+ - Analyze each object against the class descriptions above
187
+ - Check if objects meet the specified conditions for each class
188
  - Only include detections with confidence above {confidence_threshold}
189
+ - Assign objects to the most appropriate class based on the descriptions
190
+
191
+ SCALE/RULER DETECTION FOR CRACK MEASUREMENT:
192
+ - First look for scales, rulers, measurement tools, or reference objects in the image
193
+ - If found, identify the scale markings and determine the measurement reference
194
+ - Use the scale to calculate actual crack widths in millimeters or appropriate units
195
+ - For crack classifications, measure crack width using the identified scale
196
+ - Include actual measurements in your analysis (e.g., "2.5mm crack width based on ruler scale")
197
+ - If no scale is visible, estimate crack width relative to common objects or provide qualitative assessment
198
 
199
  Output a JSON list where each entry contains:
200
  - "x": normalized x coordinate (0-1) of top-left corner
201
  - "y": normalized y coordinate (0-1) of top-left corner
202
  - "width": normalized width (0-1) of the bounding box
203
  - "height": normalized height (0-1) of the bounding box
204
+ - "label": brief description with confidence score
205
  - "confidence": confidence score (0-1)
206
+ - "class": the assigned class name (e.g., "Class I", "Class II", etc.)
207
+ - "description": why this object matches the class criteria
208
+ - "class_number": the class number from the list above (1, 2, 3, etc.)
209
+ - "measured_width": actual crack width measurement if scale is available (e.g., "2.5mm", "1.2cm")
210
+ - "measurement_method": how the measurement was obtained (e.g., "ruler scale", "coin reference", "estimated")
211
 
212
+ Example format:
213
+ [{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "Structural crack (0.92)", "confidence": 0.92, "class": "Class I", "description": "Crack width exceeds 2mm threshold based on ruler measurement", "class_number": 1, "measured_width": "2.5mm", "measurement_method": "ruler scale"}}]"""
214
 
215
  return prompt
216
 
217
  def create_interface():
218
  """Create the Gradio interface for object detection"""
219
+ with gr.Blocks(title="Class-Based Object Detection", theme=gr.themes.Soft()) as demo:
220
+ gr.Markdown("# 🔍 Class-Based Object Detection with Descriptions")
221
+ gr.Markdown("Define classes with descriptions and conditions. Objects will be classified and annotated with class names only.")
222
 
223
  with gr.Row():
224
  with gr.Column(scale=1):
 
239
 
240
  model_preset = gr.Dropdown(
241
  choices=[
242
+ "qwen/qwen2.5-vl-32b-instruct",
243
+ "qwen/qwen-vl-max",
244
  "openai/gpt-5-chat",
245
  "openai/gpt-5-mini",
246
  "anthropic/claude-opus-4.1",
 
252
  "openai/gpt-4o",
253
  "openai/gpt-4o-mini"
254
  ],
255
+ value="qwen/qwen2.5-vl-32b-instruct",
256
  label="Preset Models",
257
  info="Select from popular OpenRouter models",
258
  visible=True
 
267
 
268
  detection_mode = gr.Radio(
269
  choices=[
270
+ ("Detect Only These Classes", "specific"),
271
+ ("Include These Classes + Others", "include"),
272
+ ("Exclude These Classes", "exclude")
273
  ],
274
  value="specific",
275
  label="Detection Mode",
276
+ info="How to handle the specified class descriptions"
277
  )
278
 
279
+ class_descriptions = gr.Textbox(
280
+ label="Class Descriptions",
281
+ placeholder="""Define each class with its description and conditions, e.g.:
282
+ Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement)
283
+ Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
284
+ Rust Damage: Rust spots larger than 5cm in diameter
285
+ Concrete Spalling: Concrete spalling deeper than 1cm
286
+ Paint Defects: Paint peeling areas greater than 10cm²""",
287
+ value="""Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement)
288
+ Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
289
+ Rust Damage: Rust spots larger than 5cm in diameter""",
290
  lines=8,
291
+ info="Enter class descriptions, one per line. Format: 'Class Name: Description' or just 'Description'"
292
  )
293
 
294
  confidence_threshold = gr.Slider(
 
348
  outputs=[model_preset, custom_model_input]
349
  )
350
 
351
+ def process_detection(image, class_desc, conf_threshold, api_key_val, use_preset_val, model_preset_val, custom_model_val, temp_val, mode_val):
352
  if not api_key_val:
353
  return None, "❌ Please enter your OpenRouter API key", "No API key provided"
354
 
355
  if image is None:
356
  return None, "❌ Please upload an image", "No image uploaded"
357
 
358
+ if not class_desc or not class_desc.strip():
359
+ return None, "❌ Please enter at least one class description", "No class descriptions provided"
360
 
361
  # Determine which model to use
362
  if use_preset_val == "Custom Model":
 
367
  final_model = model_preset_val
368
 
369
  try:
370
+ prompt = create_detection_prompt(class_desc, conf_threshold, mode_val)
371
 
372
  result, error = process_with_openrouter(image, prompt, api_key_val, final_model, temp_val)
373
 
 
382
  filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold]
383
 
384
  mode_descriptions = {
385
+ "specific": "Detecting only objects matching class descriptions",
386
+ "include": "Including specified classes + other objects",
387
+ "exclude": "Excluding objects matching class descriptions"
388
  }
389
 
390
  summary_text = f"✅ {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects\n🤖 Model: {final_model}"
391
 
392
  if filtered_detections:
393
+ # Group by class and show counts
394
+ class_counts = {}
395
  for det in filtered_detections:
396
  class_name = det.get('class', 'unknown')
397
+ description = det.get('description', '')
398
+ confidence = det.get('confidence', 1.0)
399
 
400
+ if class_name not in class_counts:
401
+ class_counts[class_name] = {
402
+ 'count': 0,
403
+ 'avg_confidence': 0,
404
+ 'descriptions': []
405
+ }
406
 
407
+ class_counts[class_name]['count'] += 1
408
+ class_counts[class_name]['avg_confidence'] += confidence
409
+ if description and description not in class_counts[class_name]['descriptions']:
410
+ class_counts[class_name]['descriptions'].append(description)
 
411
 
412
+ summary_text += "\n\nClass Detection Results:"
413
+ for class_name, data in class_counts.items():
414
+ avg_conf = data['avg_confidence'] / data['count']
415
+ summary_text += f"\n• {class_name}: {data['count']} detected (avg conf: {avg_conf:.2f})"
 
 
 
 
 
416
 
417
  return annotated_img, json.dumps(filtered_detections, indent=2), summary_text
418
  else:
419
+ return image, "No objects detected matching class descriptions", "No detections matching criteria above confidence threshold"
420
 
421
  except json.JSONDecodeError:
422
  return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed"
 
425
 
426
  detect_btn.click(
427
  process_detection,
428
+ inputs=[image_input, class_descriptions, confidence_threshold, api_key, use_preset, model_preset, custom_model_input, temperature, detection_mode],
429
  outputs=[annotated_image, detection_results, detection_summary]
430
  )
431
 
432
  gr.Markdown("""
433
  ## 💡 Usage Tips
434
+ - **Specific Mode**: Only detect objects matching your class descriptions
435
+ - **Include Mode**: Detect your specified classes plus any other objects found
436
+ - **Exclude Mode**: Detect everything except objects matching your class descriptions
437
+
438
+ ### 🏷️ Class Definition
439
+ **Format Options:**
440
+ 1. `Class Name: Description` - e.g., "Severe Cracks: Crack width more than 2mm"
441
+ 2. `Description only` - Will be automatically assigned as "Class I", "Class II", etc.
442
+
443
+ **Annotation Behavior:**
444
+ - Images show only class names (e.g., "Class I", "Class II")
445
+ - Same class = same color throughout the image
446
+ - Clean, simple visual identification
447
 
448
  ### 🤖 Model Selection
449
  **Default Models (Recommended):**
450
+ - `qwen/qwen2.5-vl-32b-instruct` - Advanced Qwen vision model optimized for detailed analysis (Default)
451
+ - `qwen/qwen-vl-max` - Premium Qwen vision model with maximum capabilities
452
+ - `openai/gpt-5-chat` - Latest GPT-5 with advanced vision capabilities
453
  - `openai/gpt-5-mini` - Faster, efficient GPT-5 variant
454
  - `anthropic/claude-opus-4.1` - Next-gen Claude with superior reasoning
455
  - `x-ai/grok-4` - Advanced Grok model with detailed analysis
456
 
457
  **Custom Models**: Enter any OpenRouter model ID from [openrouter.ai/models](https://openrouter.ai/models)
458
 
459
+ ### Example Class Descriptions:
460
  ```
461
+ Severe Cracks: Crack width more than 2mm (use ruler/scale for measurement)
462
+ Minor Cracks: Crack width 0.5-2mm (measure using visible scale)
463
+ Rust Damage: Rust spots larger than 5cm in diameter
464
+ Concrete Spalling: Concrete spalling deeper than 1cm
465
+ Paint Defects: Paint peeling areas greater than 10cm²
466
+ Water Damage: Water damage stains larger than 15cm
 
 
467
  ```
468
 
469
+ ### 📏 Scale-Based Measurement:
470
+ - **Automatic Scale Detection**: The system looks for rulers, measuring tools, or reference objects
471
+ - **Precise Measurements**: When scales are found, actual crack widths are calculated
472
+ - **Measurement Methods**: Supports rulers, crack gauges, coins, or other reference objects
473
+ - **Enhanced Classification**: More accurate class assignment based on measured dimensions
474
+
475
+ - Enter one class description per line
476
+ - Be specific about conditions and measurements
477
+ - Objects will be classified and labeled with class names only
478
  - Adjust confidence threshold to filter weak detections
 
479
  - Get your API key from [openrouter.ai](https://openrouter.ai/)
480
  """)
481