import gradio as gr import requests import json import base64 from PIL import Image, ImageDraw, ImageFont import io def process_with_openrouter(image, prompt, api_key, model="qwen/qwen2.5-vl-32b-instruct", temperature=0.5): """Process image with OpenRouter API for object detection""" if not api_key: return "Please enter your OpenRouter API key", "error" if image is None: return "Please upload an image", "error" try: buffered = io.BytesIO() image.save(buffered, format="PNG") img_base64 = base64.b64encode(buffered.getvalue()).decode() headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } data = { "model": model, "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"} } ] } ], "temperature": temperature } response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data, timeout=60 ) if response.status_code == 200: result = response.json() content = result['choices'][0]['message']['content'] if '```json' in content: content = content.split('```json')[1].split('```')[0].strip() elif '```' in content: content = content.split('```')[1].split('```')[0].strip() return content, None else: return f"Error: {response.status_code} - {response.text}", "error" except Exception as e: return f"Error processing request: {str(e)}", "error" def draw_bounding_boxes(image, detections): """Draw bounding boxes with class names only, same color per class""" if not detections or len(detections) == 0: return image annotated_image = image.copy() draw = ImageDraw.Draw(annotated_image) try: font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 16) except: font = ImageFont.load_default() # Predefined colors for different classes class_colors = { "Class I": "#FF0000", # Red "Class II": "#00FF00", # Green "Class III": "#0000FF", # Blue "Class IV": "#FFFF00", # Yellow "Class V": "#FF00FF", # Magenta "Class VI": "#00FFFF", # Cyan "Class VII": "#FFA500", # Orange "Class VIII": "#800080", # Purple "Class IX": "#008000", # Dark Green "Class X": "#FF1493", # Deep Pink } # Fallback colors if more than 10 classes fallback_colors = ["#8B4513", "#2F4F4F", "#DC143C", "#00CED1", "#FF4500", "#DA70D6", "#32CD32", "#FF6347"] for i, detection in enumerate(detections): if all(key in detection for key in ['x', 'y', 'width', 'height']): x = detection['x'] * image.width y = detection['y'] * image.height width = detection['width'] * image.width height = detection['height'] * image.height # Get class name - this is what we'll display class_name = detection.get('class', f'Class {i+1}') x1, y1 = int(x), int(y) x2, y2 = int(x + width), int(y + height) x1 = max(0, min(x1, image.width)) y1 = max(0, min(y1, image.height)) x2 = max(0, min(x2, image.width)) y2 = max(0, min(y2, image.height)) # Get consistent color for this class if class_name in class_colors: color = class_colors[class_name] else: # Use hash of class name to get consistent color color_index = hash(class_name) % len(fallback_colors) color = fallback_colors[color_index] # Draw bounding box draw.rectangle([x1, y1, x2, y2], outline=color, width=4) # Calculate label size text_bbox = draw.textbbox((0, 0), class_name, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] # Position label above the box, or below if no space above if y1 - text_height - 6 >= 0: label_y = y1 - text_height - 6 else: label_y = y2 + 4 label_x = x1 # Ensure label stays within image bounds if label_x + text_width + 4 > image.width: label_x = image.width - text_width - 4 # Draw label background draw.rectangle( [label_x - 2, label_y - 2, label_x + text_width + 2, label_y + text_height + 2], fill=color, outline=color ) # Draw class name draw.text((label_x, label_y), class_name, fill="white", font=font) return annotated_image def create_detection_prompt(class_descriptions, confidence_threshold=0.5, detection_mode="specific"): """Create a detection prompt for class descriptions with condition checking""" if isinstance(class_descriptions, str): class_descriptions = [cls.strip() for cls in class_descriptions.split('\n') if cls.strip()] # Build detection instructions if detection_mode == "specific": condition_text = "ONLY detect objects that match these class descriptions and their conditions. Ignore all other objects:" elif detection_mode == "include": condition_text = "Detect objects matching these class descriptions AND any other objects you can identify:" else: # "exclude" condition_text = "Detect all objects EXCEPT those matching these class descriptions. Avoid detecting:" # Format each class description class_specs = [] for i, description in enumerate(class_descriptions, 1): # Parse class name and description if formatted as "Class Name: description" if ':' in description: class_name, class_desc = description.split(':', 1) class_name = class_name.strip() class_desc = class_desc.strip() class_specs.append(f"Class {i} ({class_name}): {class_desc}") else: class_specs.append(f"Class {i}: {description}") classes_text = "\n".join(class_specs) if class_specs else "No class descriptions provided" prompt = f"""{condition_text} {classes_text} Detection Instructions: - Analyze each object against the class descriptions above - Check if objects meet the specified conditions for each class - Only include detections with confidence above {confidence_threshold} - Assign objects to the most appropriate class based on the descriptions SCALE/RULER DETECTION FOR CRACK MEASUREMENT: - First look for scales, rulers, measurement tools, or reference objects in the image - If found, identify the scale markings and determine the measurement reference - Use the scale to calculate actual crack widths in millimeters or appropriate units - For crack classifications, measure crack width using the identified scale - Include actual measurements in your analysis (e.g., "2.5mm crack width based on ruler scale") - If no scale is visible, estimate crack width relative to common objects or provide qualitative assessment Output a JSON list where each entry contains: - "x": normalized x coordinate (0-1) of top-left corner - "y": normalized y coordinate (0-1) of top-left corner - "width": normalized width (0-1) of the bounding box - "height": normalized height (0-1) of the bounding box - "label": brief description with confidence score - "confidence": confidence score (0-1) - "class": the assigned class name (e.g., "Class I", "Class II", etc.) - "description": why this object matches the class criteria - "class_number": the class number from the list above (1, 2, 3, etc.) - "measured_width": actual crack width measurement if scale is available (e.g., "2.5mm", "1.2cm") - "measurement_method": how the measurement was obtained (e.g., "ruler scale", "coin reference", "estimated") Example format: [{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "Structural crack (0.92)", "confidence": 0.92, "class": "Class I", "description": "Crack width exceeds 2mm threshold based on ruler measurement", "class_number": 1, "measured_width": "2.5mm", "measurement_method": "ruler scale"}}]""" return prompt def create_interface(): """Create the Gradio interface for object detection""" with gr.Blocks(title="Class-Based Object Detection", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🔍 Class-Based Object Detection with Descriptions") gr.Markdown("Define classes with descriptions and conditions. Objects will be classified and annotated with class names only.") with gr.Row(): with gr.Column(scale=1): gr.Markdown("## ⚙️ Configuration") api_key = gr.Textbox( label="OpenRouter API Key", placeholder="Enter your OpenRouter API key...", type="password" ) with gr.Row(): use_preset = gr.Radio( choices=["Preset Model", "Custom Model"], value="Preset Model", label="Model Selection", info="Choose preset or enter custom OpenRouter model" ) model_preset = gr.Dropdown( choices=[ "qwen/qwen2.5-vl-32b-instruct", "qwen/qwen-vl-max", "openai/gpt-5-chat", "openai/gpt-5-mini", "anthropic/claude-opus-4.1", "x-ai/grok-4", "google/gemini-2.5-pro", "google/gemini-1.5-pro", "google/gemini-1.5-flash", "anthropic/claude-3.5-sonnet", "openai/gpt-4o", "openai/gpt-4o-mini" ], value="qwen/qwen2.5-vl-32b-instruct", label="Preset Models", info="Select from popular OpenRouter models", visible=True ) custom_model_input = gr.Textbox( label="Custom Model ID", placeholder="Enter any OpenRouter model ID (e.g., google/gemini-1.5-flash, anthropic/claude-3-haiku)", visible=False, info="Copy model IDs from openrouter.ai/models" ) detection_mode = gr.Radio( choices=[ ("Detect Only These Classes", "specific"), ("Include These Classes + Others", "include"), ("Exclude These Classes", "exclude") ], value="specific", label="Detection Mode", info="How to handle the specified class descriptions" ) class_descriptions = gr.Textbox( label="Class Descriptions", placeholder="""Define each class with its description and conditions, e.g.: Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement) Minor Cracks: Crack width 0.5-2mm (measure using visible scale) Rust Damage: Rust spots larger than 5cm in diameter Concrete Spalling: Concrete spalling deeper than 1cm Paint Defects: Paint peeling areas greater than 10cm²""", value="""Severe Cracks: Crack width more than 2mm (use ruler/scale if present for measurement) Minor Cracks: Crack width 0.5-2mm (measure using visible scale) Rust Damage: Rust spots larger than 5cm in diameter""", lines=8, info="Enter class descriptions, one per line. Format: 'Class Name: Description' or just 'Description'" ) confidence_threshold = gr.Slider( minimum=0.1, maximum=1.0, value=0.5, step=0.05, label="Confidence Threshold", info="Minimum confidence for detection" ) temperature = gr.Slider( minimum=0, maximum=1, value=0.3, step=0.05, label="Temperature", info="Lower values for more consistent results" ) image_input = gr.Image( type="pil", label="Upload Image for Detection" ) detect_btn = gr.Button("🚀 Detect Objects", variant="primary", size="lg") with gr.Column(scale=1): gr.Markdown("## 📊 Detection Results") annotated_image = gr.Image( label="Detected Objects", type="pil" ) detection_results = gr.Textbox( label="Detection Details (JSON)", lines=10, show_copy_button=True ) detection_summary = gr.Textbox( label="Detection Summary", lines=3 ) # Show/hide model input based on selection def update_model_visibility(use_preset_val): if use_preset_val == "Custom Model": return gr.update(visible=False), gr.update(visible=True) else: return gr.update(visible=True), gr.update(visible=False) use_preset.change( update_model_visibility, inputs=[use_preset], outputs=[model_preset, custom_model_input] ) def process_detection(image, class_desc, conf_threshold, api_key_val, use_preset_val, model_preset_val, custom_model_val, temp_val, mode_val): if not api_key_val: return None, "❌ Please enter your OpenRouter API key", "No API key provided" if image is None: return None, "❌ Please upload an image", "No image uploaded" if not class_desc or not class_desc.strip(): return None, "❌ Please enter at least one class description", "No class descriptions provided" # Determine which model to use if use_preset_val == "Custom Model": if not custom_model_val or custom_model_val.strip() == "": return None, "❌ Please enter a custom model ID", "Custom model required" final_model = custom_model_val.strip() else: final_model = model_preset_val try: prompt = create_detection_prompt(class_desc, conf_threshold, mode_val) result, error = process_with_openrouter(image, prompt, api_key_val, final_model, temp_val) if error: return None, f"❌ Error: {result}", "Detection failed" detections = json.loads(result) if isinstance(detections, list) and len(detections) > 0: annotated_img = draw_bounding_boxes(image, detections) filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold] mode_descriptions = { "specific": "Detecting only objects matching class descriptions", "include": "Including specified classes + other objects", "exclude": "Excluding objects matching class descriptions" } summary_text = f"✅ {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects\n🤖 Model: {final_model}" if filtered_detections: # Group by class and show counts class_counts = {} for det in filtered_detections: class_name = det.get('class', 'unknown') description = det.get('description', '') confidence = det.get('confidence', 1.0) if class_name not in class_counts: class_counts[class_name] = { 'count': 0, 'avg_confidence': 0, 'descriptions': [] } class_counts[class_name]['count'] += 1 class_counts[class_name]['avg_confidence'] += confidence if description and description not in class_counts[class_name]['descriptions']: class_counts[class_name]['descriptions'].append(description) summary_text += "\n\nClass Detection Results:" for class_name, data in class_counts.items(): avg_conf = data['avg_confidence'] / data['count'] summary_text += f"\n• {class_name}: {data['count']} detected (avg conf: {avg_conf:.2f})" return annotated_img, json.dumps(filtered_detections, indent=2), summary_text else: return image, "No objects detected matching class descriptions", "No detections matching criteria above confidence threshold" except json.JSONDecodeError: return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed" except Exception as e: return None, f"❌ Error: {str(e)}", "Processing error" detect_btn.click( process_detection, inputs=[image_input, class_descriptions, confidence_threshold, api_key, use_preset, model_preset, custom_model_input, temperature, detection_mode], outputs=[annotated_image, detection_results, detection_summary] ) gr.Markdown(""" ## 💡 Usage Tips - **Specific Mode**: Only detect objects matching your class descriptions - **Include Mode**: Detect your specified classes plus any other objects found - **Exclude Mode**: Detect everything except objects matching your class descriptions ### 🏷️ Class Definition **Format Options:** 1. `Class Name: Description` - e.g., "Severe Cracks: Crack width more than 2mm" 2. `Description only` - Will be automatically assigned as "Class I", "Class II", etc. **Annotation Behavior:** - Images show only class names (e.g., "Class I", "Class II") - Same class = same color throughout the image - Clean, simple visual identification ### 🤖 Model Selection **Default Models (Recommended):** - `qwen/qwen2.5-vl-32b-instruct` - Advanced Qwen vision model optimized for detailed analysis (Default) - `qwen/qwen-vl-max` - Premium Qwen vision model with maximum capabilities - `openai/gpt-5-chat` - Latest GPT-5 with advanced vision capabilities - `openai/gpt-5-mini` - Faster, efficient GPT-5 variant - `anthropic/claude-opus-4.1` - Next-gen Claude with superior reasoning - `x-ai/grok-4` - Advanced Grok model with detailed analysis **Custom Models**: Enter any OpenRouter model ID from [openrouter.ai/models](https://openrouter.ai/models) ### Example Class Descriptions: ``` Severe Cracks: Crack width more than 2mm (use ruler/scale for measurement) Minor Cracks: Crack width 0.5-2mm (measure using visible scale) Rust Damage: Rust spots larger than 5cm in diameter Concrete Spalling: Concrete spalling deeper than 1cm Paint Defects: Paint peeling areas greater than 10cm² Water Damage: Water damage stains larger than 15cm ``` ### 📏 Scale-Based Measurement: - **Automatic Scale Detection**: The system looks for rulers, measuring tools, or reference objects - **Precise Measurements**: When scales are found, actual crack widths are calculated - **Measurement Methods**: Supports rulers, crack gauges, coins, or other reference objects - **Enhanced Classification**: More accurate class assignment based on measured dimensions - Enter one class description per line - Be specific about conditions and measurements - Objects will be classified and labeled with class names only - Adjust confidence threshold to filter weak detections - Get your API key from [openrouter.ai](https://openrouter.ai/) """) return demo if __name__ == "__main__": print("🚀 Starting Object Detection App...") demo = create_interface() demo.launch(share=False, inbrowser=True)