Spaces:
Running
Running
import gradio as gr | |
import requests | |
import json | |
import base64 | |
from PIL import Image, ImageDraw, ImageFont | |
import io | |
def process_with_openrouter(image, prompt, api_key, model="google/gemini-2.5-pro", temperature=0.5): | |
"""Process image with OpenRouter API for object detection""" | |
if not api_key: | |
return "Please enter your OpenRouter API key", "error" | |
if image is None: | |
return "Please upload an image", "error" | |
try: | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
img_base64 = base64.b64encode(buffered.getvalue()).decode() | |
headers = { | |
"Authorization": f"Bearer {api_key}", | |
"Content-Type": "application/json" | |
} | |
data = { | |
"model": model, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": f"data:image/png;base64,{img_base64}"} | |
} | |
] | |
} | |
], | |
"temperature": temperature | |
} | |
response = requests.post( | |
"https://openrouter.ai/api/v1/chat/completions", | |
headers=headers, | |
json=data, | |
timeout=60 | |
) | |
if response.status_code == 200: | |
result = response.json() | |
content = result['choices'][0]['message']['content'] | |
if '```json' in content: | |
content = content.split('```json')[1].split('```')[0].strip() | |
elif '```' in content: | |
content = content.split('```')[1].split('```')[0].strip() | |
return content, None | |
else: | |
return f"Error: {response.status_code} - {response.text}", "error" | |
except Exception as e: | |
return f"Error processing request: {str(e)}", "error" | |
def draw_bounding_boxes(image, detections): | |
"""Draw bounding boxes with detailed labels on the image""" | |
if not detections or len(detections) == 0: | |
return image | |
annotated_image = image.copy() | |
draw = ImageDraw.Draw(annotated_image) | |
try: | |
font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14) | |
small_font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 12) | |
except: | |
font = ImageFont.load_default() | |
small_font = ImageFont.load_default() | |
colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#FFA500", "#800080"] | |
for i, detection in enumerate(detections): | |
if all(key in detection for key in ['x', 'y', 'width', 'height']): | |
x = detection['x'] * image.width | |
y = detection['y'] * image.height | |
width = detection['width'] * image.width | |
height = detection['height'] * image.height | |
# Get detection information | |
label = detection.get('label', f'Detection {i+1}') | |
class_name = detection.get('class', 'unknown') | |
details = detection.get('details', '') | |
criteria_match = detection.get('criteria_match', '') | |
confidence = detection.get('confidence', 1.0) | |
x1, y1 = int(x), int(y) | |
x2, y2 = int(x + width), int(y + height) | |
x1 = max(0, min(x1, image.width)) | |
y1 = max(0, min(y1, image.height)) | |
x2 = max(0, min(x2, image.width)) | |
y2 = max(0, min(y2, image.height)) | |
color = colors[i % len(colors)] | |
# Draw bounding box with thicker line for better visibility | |
draw.rectangle([x1, y1, x2, y2], outline=color, width=4) | |
# Create multi-line label with detailed information | |
display_lines = [] | |
display_lines.append(f"{class_name} ({confidence:.2f})") | |
if details: | |
# Truncate details if too long | |
details_short = details[:40] + "..." if len(details) > 40 else details | |
display_lines.append(details_short) | |
if criteria_match: | |
display_lines.append(f"Criteria: {criteria_match}") | |
# Calculate total label size | |
max_width = 0 | |
total_height = 0 | |
line_heights = [] | |
for line in display_lines: | |
text_bbox = draw.textbbox((0, 0), line, font=small_font) | |
line_width = text_bbox[2] - text_bbox[0] | |
line_height = text_bbox[3] - text_bbox[1] | |
max_width = max(max_width, line_width) | |
total_height += line_height + 2 | |
line_heights.append(line_height) | |
# Position label above the box, or below if no space above | |
if y1 - total_height - 4 >= 0: | |
label_y = y1 - total_height - 4 | |
else: | |
label_y = y2 + 2 | |
label_x = x1 | |
# Ensure label stays within image bounds | |
if label_x + max_width > image.width: | |
label_x = image.width - max_width - 4 | |
# Draw label background | |
draw.rectangle( | |
[label_x - 2, label_y, label_x + max_width + 4, label_y + total_height + 2], | |
fill=color, | |
outline=color | |
) | |
# Draw each line of text | |
current_y = label_y + 2 | |
for j, line in enumerate(display_lines): | |
draw.text((label_x + 2, current_y), line, fill="white", font=small_font) | |
current_y += line_heights[j] + 2 | |
return annotated_image | |
def create_detection_prompt(detailed_classes, confidence_threshold=0.5, detection_mode="specific"): | |
"""Create a detection prompt for detailed class specifications with different modes""" | |
if isinstance(detailed_classes, str): | |
detailed_classes = [cls.strip() for cls in detailed_classes.split('\n') if cls.strip()] | |
# Build detailed detection instructions | |
if detection_mode == "specific": | |
condition_text = "ONLY detect objects that match these specific detailed criteria. Ignore all other objects:" | |
elif detection_mode == "include": | |
condition_text = "Detect objects matching these detailed criteria AND any other objects you can identify:" | |
else: # "exclude" | |
condition_text = "Detect all objects EXCEPT those matching these detailed criteria. Avoid detecting:" | |
# Format each detailed class specification | |
detailed_specs = [] | |
for i, spec in enumerate(detailed_classes, 1): | |
detailed_specs.append(f"{i}. {spec}") | |
classes_text = "\n".join(detailed_specs) if detailed_specs else "No specific criteria provided" | |
prompt = f"""{condition_text} | |
{classes_text} | |
Detection Instructions: | |
- Carefully analyze each object against the detailed specifications above | |
- Only include detections with confidence above {confidence_threshold} | |
- For each detection, provide specific measurements, characteristics, or details when possible | |
- Be precise about the criteria matching (e.g., actual crack width, size measurements, specific conditions) | |
Output a JSON list where each entry contains: | |
- "x": normalized x coordinate (0-1) of top-left corner | |
- "y": normalized y coordinate (0-1) of top-left corner | |
- "width": normalized width (0-1) of the bounding box | |
- "height": normalized height (0-1) of the bounding box | |
- "label": detailed description with measurements/characteristics and confidence score | |
- "confidence": confidence score (0-1) | |
- "class": the general category name | |
- "details": specific measurements, characteristics, or conditions observed | |
- "criteria_match": which detailed criteria this detection matches (reference number from list above) | |
Example format for crack detection: | |
[{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "crack width ~3mm, length ~15cm (0.92)", "confidence": 0.92, "class": "crack", "details": "width: 3mm, length: 15cm, surface: concrete", "criteria_match": 1}}]""" | |
return prompt | |
def create_interface(): | |
"""Create the Gradio interface for object detection""" | |
with gr.Blocks(title="Detailed Object Detection", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π Detailed Object Detection with Custom Specifications") | |
gr.Markdown("Detect objects with detailed specifications (e.g., 'crack width more than 2mm', 'rust spots larger than 5cm')") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("## βοΈ Configuration") | |
api_key = gr.Textbox( | |
label="OpenRouter API Key", | |
placeholder="Enter your OpenRouter API key...", | |
type="password" | |
) | |
model = gr.Dropdown( | |
choices=[ | |
"google/gemini-2.5-pro", | |
"google/gemini-1.5-pro", | |
"google/gemini-1.5-flash", | |
"anthropic/claude-3.5-sonnet", | |
"openai/gpt-4o", | |
"openai/gpt-4o-mini" | |
], | |
value="google/gemini-2.5-pro", | |
label="Detection Model" | |
) | |
detection_mode = gr.Radio( | |
choices=[ | |
("Detect Only These Specifications", "specific"), | |
("Include These + Others", "include"), | |
("Exclude These Specifications", "exclude") | |
], | |
value="specific", | |
label="Detection Mode", | |
info="How to handle the specified detailed criteria" | |
) | |
detailed_specifications = gr.Textbox( | |
label="Detailed Detection Specifications", | |
placeholder="""Enter each specification on a new line, e.g.: | |
crack width more than 2mm | |
rust spots larger than 5cm in diameter | |
concrete spalling deeper than 1cm | |
structural damage with visible deformation | |
paint peeling areas greater than 10cmΒ²""", | |
value="""crack width more than 2mm | |
rust spots larger than 5cm in diameter | |
concrete spalling deeper than 1cm""", | |
lines=8, | |
info="Enter detailed specifications, one per line" | |
) | |
confidence_threshold = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.5, | |
step=0.05, | |
label="Confidence Threshold", | |
info="Minimum confidence for detection" | |
) | |
temperature = gr.Slider( | |
minimum=0, | |
maximum=1, | |
value=0.3, | |
step=0.05, | |
label="Temperature", | |
info="Lower values for more consistent results" | |
) | |
image_input = gr.Image( | |
type="pil", | |
label="Upload Image for Detection" | |
) | |
detect_btn = gr.Button("π Detect Objects", variant="primary", size="lg") | |
with gr.Column(scale=1): | |
gr.Markdown("## π Detection Results") | |
annotated_image = gr.Image( | |
label="Detected Objects", | |
type="pil" | |
) | |
detection_results = gr.Textbox( | |
label="Detection Details (JSON)", | |
lines=10, | |
show_copy_button=True | |
) | |
detection_summary = gr.Textbox( | |
label="Detection Summary", | |
lines=3 | |
) | |
def process_detection(image, detailed_specs, conf_threshold, api_key_val, model_val, temp_val, mode_val): | |
if not api_key_val: | |
return None, "β Please enter your OpenRouter API key", "No API key provided" | |
if image is None: | |
return None, "β Please upload an image", "No image uploaded" | |
if not detailed_specs or not detailed_specs.strip(): | |
return None, "β Please enter at least one detailed specification", "No specifications provided" | |
try: | |
prompt = create_detection_prompt(detailed_specs, conf_threshold, mode_val) | |
result, error = process_with_openrouter(image, prompt, api_key_val, model_val, temp_val) | |
if error: | |
return None, f"β Error: {result}", "Detection failed" | |
detections = json.loads(result) | |
if isinstance(detections, list) and len(detections) > 0: | |
annotated_img = draw_bounding_boxes(image, detections) | |
filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold] | |
mode_descriptions = { | |
"specific": "Detecting only objects matching detailed specifications", | |
"include": "Including specified detailed criteria + other objects", | |
"exclude": "Excluding objects matching detailed specifications" | |
} | |
summary_text = f"β {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects" | |
if filtered_detections: | |
# Group by class and show details | |
class_details = {} | |
for det in filtered_detections: | |
class_name = det.get('class', 'unknown') | |
details = det.get('details', '') | |
criteria_match = det.get('criteria_match', '') | |
if class_name not in class_details: | |
class_details[class_name] = [] | |
class_details[class_name].append({ | |
'details': details, | |
'criteria': criteria_match, | |
'confidence': det.get('confidence', 1.0) | |
}) | |
summary_text += "\n\nDetailed Results:" | |
for class_name, items in class_details.items(): | |
summary_text += f"\nβ’ {class_name} ({len(items)} found):" | |
for item in items[:3]: # Show first 3 items | |
summary_text += f"\n - {item['details']} (conf: {item['confidence']:.2f})" | |
if item['criteria']: | |
summary_text += f" [criteria: {item['criteria']}]" | |
if len(items) > 3: | |
summary_text += f"\n ... and {len(items)-3} more" | |
return annotated_img, json.dumps(filtered_detections, indent=2), summary_text | |
else: | |
return image, "No objects detected matching detailed specifications", "No detections matching criteria above confidence threshold" | |
except json.JSONDecodeError: | |
return None, f"β Invalid JSON response: {result}", "JSON parsing failed" | |
except Exception as e: | |
return None, f"β Error: {str(e)}", "Processing error" | |
detect_btn.click( | |
process_detection, | |
inputs=[image_input, detailed_specifications, confidence_threshold, api_key, model, temperature, detection_mode], | |
outputs=[annotated_image, detection_results, detection_summary] | |
) | |
gr.Markdown(""" | |
## π‘ Usage Tips | |
- **Specific Mode**: Only detect objects matching your detailed specifications | |
- **Include Mode**: Detect your specified criteria plus any other objects found | |
- **Exclude Mode**: Detect everything except objects matching your specifications | |
### Example Detailed Specifications: | |
``` | |
crack width more than 2mm | |
rust spots larger than 5cm in diameter | |
concrete spalling deeper than 1cm | |
structural damage with visible deformation | |
paint peeling areas greater than 10cmΒ² | |
corrosion affecting more than 20% of surface area | |
missing bolts or fasteners | |
water damage stains larger than 15cm | |
``` | |
- Enter one detailed specification per line | |
- Be specific about measurements, sizes, conditions | |
- Adjust confidence threshold to filter weak detections | |
- Use lower temperature values for consistent results | |
- Get your API key from [openrouter.ai](https://openrouter.ai/) | |
""") | |
return demo | |
if __name__ == "__main__": | |
print("π Starting Object Detection App...") | |
demo = create_interface() | |
demo.launch(share=False, inbrowser=True) |