Spaces:

Sompote
/

crack_detection

Running

File size: 17,742 Bytes

56ec8b1

import gradio as gr
import requests
import json
import base64
from PIL import Image, ImageDraw, ImageFont
import io

def process_with_openrouter(image, prompt, api_key, model="google/gemini-2.5-pro", temperature=0.5):
    """Process image with OpenRouter API for object detection"""
    if not api_key:
        return "Please enter your OpenRouter API key", "error"
    
    if image is None:
        return "Please upload an image", "error"
    
    try:
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_base64 = base64.b64encode(buffered.getvalue()).decode()
        
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        
        data = {
            "model": model,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:image/png;base64,{img_base64}"}
                        }
                    ]
                }
            ],
            "temperature": temperature
        }
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            json=data,
            timeout=60
        )
        
        if response.status_code == 200:
            result = response.json()
            content = result['choices'][0]['message']['content']
            
            if '```json' in content:
                content = content.split('```json')[1].split('```')[0].strip()
            elif '```' in content:
                content = content.split('```')[1].split('```')[0].strip()
            
            return content, None
        else:
            return f"Error: {response.status_code} - {response.text}", "error"
            
    except Exception as e:
        return f"Error processing request: {str(e)}", "error"

def draw_bounding_boxes(image, detections):
    """Draw bounding boxes with detailed labels on the image"""
    if not detections or len(detections) == 0:
        return image
    
    annotated_image = image.copy()
    draw = ImageDraw.Draw(annotated_image)
    
    try:
        font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 14)
        small_font = ImageFont.truetype("/System/Library/Fonts/Arial.ttf", 12)
    except:
        font = ImageFont.load_default()
        small_font = ImageFont.load_default()
    
    colors = ["#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#FFA500", "#800080"]
    
    for i, detection in enumerate(detections):
        if all(key in detection for key in ['x', 'y', 'width', 'height']):
            x = detection['x'] * image.width
            y = detection['y'] * image.height
            width = detection['width'] * image.width
            height = detection['height'] * image.height
            
            # Get detection information
            label = detection.get('label', f'Detection {i+1}')
            class_name = detection.get('class', 'unknown')
            details = detection.get('details', '')
            criteria_match = detection.get('criteria_match', '')
            confidence = detection.get('confidence', 1.0)
            
            x1, y1 = int(x), int(y)
            x2, y2 = int(x + width), int(y + height)
            
            x1 = max(0, min(x1, image.width))
            y1 = max(0, min(y1, image.height))
            x2 = max(0, min(x2, image.width))
            y2 = max(0, min(y2, image.height))
            
            color = colors[i % len(colors)]
            
            # Draw bounding box with thicker line for better visibility
            draw.rectangle([x1, y1, x2, y2], outline=color, width=4)
            
            # Create multi-line label with detailed information
            display_lines = []
            display_lines.append(f"{class_name} ({confidence:.2f})")
            
            if details:
                # Truncate details if too long
                details_short = details[:40] + "..." if len(details) > 40 else details
                display_lines.append(details_short)
            
            if criteria_match:
                display_lines.append(f"Criteria: {criteria_match}")
            
            # Calculate total label size
            max_width = 0
            total_height = 0
            line_heights = []
            
            for line in display_lines:
                text_bbox = draw.textbbox((0, 0), line, font=small_font)
                line_width = text_bbox[2] - text_bbox[0]
                line_height = text_bbox[3] - text_bbox[1]
                max_width = max(max_width, line_width)
                total_height += line_height + 2
                line_heights.append(line_height)
            
            # Position label above the box, or below if no space above
            if y1 - total_height - 4 >= 0:
                label_y = y1 - total_height - 4
            else:
                label_y = y2 + 2
            
            label_x = x1
            
            # Ensure label stays within image bounds
            if label_x + max_width > image.width:
                label_x = image.width - max_width - 4
            
            # Draw label background
            draw.rectangle(
                [label_x - 2, label_y, label_x + max_width + 4, label_y + total_height + 2],
                fill=color,
                outline=color
            )
            
            # Draw each line of text
            current_y = label_y + 2
            for j, line in enumerate(display_lines):
                draw.text((label_x + 2, current_y), line, fill="white", font=small_font)
                current_y += line_heights[j] + 2
    
    return annotated_image

def create_detection_prompt(detailed_classes, confidence_threshold=0.5, detection_mode="specific"):
    """Create a detection prompt for detailed class specifications with different modes"""
    if isinstance(detailed_classes, str):
        detailed_classes = [cls.strip() for cls in detailed_classes.split('\n') if cls.strip()]
    
    # Build detailed detection instructions
    if detection_mode == "specific":
        condition_text = "ONLY detect objects that match these specific detailed criteria. Ignore all other objects:"
    elif detection_mode == "include":
        condition_text = "Detect objects matching these detailed criteria AND any other objects you can identify:"
    else:  # "exclude"
        condition_text = "Detect all objects EXCEPT those matching these detailed criteria. Avoid detecting:"
    
    # Format each detailed class specification
    detailed_specs = []
    for i, spec in enumerate(detailed_classes, 1):
        detailed_specs.append(f"{i}. {spec}")
    
    classes_text = "\n".join(detailed_specs) if detailed_specs else "No specific criteria provided"
    
    prompt = f"""{condition_text}

{classes_text}

Detection Instructions:
- Carefully analyze each object against the detailed specifications above
- Only include detections with confidence above {confidence_threshold}
- For each detection, provide specific measurements, characteristics, or details when possible
- Be precise about the criteria matching (e.g., actual crack width, size measurements, specific conditions)

Output a JSON list where each entry contains:
- "x": normalized x coordinate (0-1) of top-left corner
- "y": normalized y coordinate (0-1) of top-left corner  
- "width": normalized width (0-1) of the bounding box
- "height": normalized height (0-1) of the bounding box
- "label": detailed description with measurements/characteristics and confidence score
- "confidence": confidence score (0-1)
- "class": the general category name
- "details": specific measurements, characteristics, or conditions observed
- "criteria_match": which detailed criteria this detection matches (reference number from list above)

Example format for crack detection:
[{{"x": 0.1, "y": 0.2, "width": 0.3, "height": 0.4, "label": "crack width ~3mm, length ~15cm (0.92)", "confidence": 0.92, "class": "crack", "details": "width: 3mm, length: 15cm, surface: concrete", "criteria_match": 1}}]"""
    
    return prompt

def create_interface():
    """Create the Gradio interface for object detection"""
    with gr.Blocks(title="Detailed Object Detection", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🔍 Detailed Object Detection with Custom Specifications")
        gr.Markdown("Detect objects with detailed specifications (e.g., 'crack width more than 2mm', 'rust spots larger than 5cm')")
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("## ⚙️ Configuration")
                api_key = gr.Textbox(
                    label="OpenRouter API Key",
                    placeholder="Enter your OpenRouter API key...",
                    type="password"
                )
                
                model = gr.Dropdown(
                    choices=[
                        "google/gemini-2.5-pro",
                        "google/gemini-1.5-pro",
                        "google/gemini-1.5-flash",
                        "anthropic/claude-3.5-sonnet",
                        "openai/gpt-4o",
                        "openai/gpt-4o-mini"
                    ],
                    value="google/gemini-2.5-pro",
                    label="Detection Model"
                )
                
                detection_mode = gr.Radio(
                    choices=[
                        ("Detect Only These Specifications", "specific"),
                        ("Include These + Others", "include"), 
                        ("Exclude These Specifications", "exclude")
                    ],
                    value="specific",
                    label="Detection Mode",
                    info="How to handle the specified detailed criteria"
                )
                
                detailed_specifications = gr.Textbox(
                    label="Detailed Detection Specifications",
                    placeholder="""Enter each specification on a new line, e.g.:
crack width more than 2mm
rust spots larger than 5cm in diameter
concrete spalling deeper than 1cm
structural damage with visible deformation
paint peeling areas greater than 10cm²""",
                    value="""crack width more than 2mm
rust spots larger than 5cm in diameter
concrete spalling deeper than 1cm""",
                    lines=8,
                    info="Enter detailed specifications, one per line"
                )
                
                confidence_threshold = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.5,
                    step=0.05,
                    label="Confidence Threshold",
                    info="Minimum confidence for detection"
                )
                
                temperature = gr.Slider(
                    minimum=0,
                    maximum=1,
                    value=0.3,
                    step=0.05,
                    label="Temperature",
                    info="Lower values for more consistent results"
                )
                
                image_input = gr.Image(
                    type="pil",
                    label="Upload Image for Detection"
                )
                
                detect_btn = gr.Button("🚀 Detect Objects", variant="primary", size="lg")
                
            with gr.Column(scale=1):
                gr.Markdown("## 📊 Detection Results")
                
                annotated_image = gr.Image(
                    label="Detected Objects",
                    type="pil"
                )
                
                detection_results = gr.Textbox(
                    label="Detection Details (JSON)",
                    lines=10,
                    show_copy_button=True
                )
                
                detection_summary = gr.Textbox(
                    label="Detection Summary",
                    lines=3
                )
        
        def process_detection(image, detailed_specs, conf_threshold, api_key_val, model_val, temp_val, mode_val):
            if not api_key_val:
                return None, "❌ Please enter your OpenRouter API key", "No API key provided"
            
            if image is None:
                return None, "❌ Please upload an image", "No image uploaded"
            
            if not detailed_specs or not detailed_specs.strip():
                return None, "❌ Please enter at least one detailed specification", "No specifications provided"
            
            try:
                prompt = create_detection_prompt(detailed_specs, conf_threshold, mode_val)
                
                result, error = process_with_openrouter(image, prompt, api_key_val, model_val, temp_val)
                
                if error:
                    return None, f"❌ Error: {result}", "Detection failed"
                
                detections = json.loads(result)
                
                if isinstance(detections, list) and len(detections) > 0:
                    annotated_img = draw_bounding_boxes(image, detections)
                    
                    filtered_detections = [d for d in detections if d.get('confidence', 1.0) >= conf_threshold]
                    
                    mode_descriptions = {
                        "specific": "Detecting only objects matching detailed specifications",
                        "include": "Including specified detailed criteria + other objects", 
                        "exclude": "Excluding objects matching detailed specifications"
                    }
                    
                    summary_text = f"✅ {mode_descriptions.get(mode_val, 'Detection')} - Found {len(filtered_detections)} objects"
                    
                    if filtered_detections:
                        # Group by class and show details
                        class_details = {}
                        for det in filtered_detections:
                            class_name = det.get('class', 'unknown')
                            details = det.get('details', '')
                            criteria_match = det.get('criteria_match', '')
                            
                            if class_name not in class_details:
                                class_details[class_name] = []
                            
                            class_details[class_name].append({
                                'details': details,
                                'criteria': criteria_match,
                                'confidence': det.get('confidence', 1.0)
                            })
                        
                        summary_text += "\n\nDetailed Results:"
                        for class_name, items in class_details.items():
                            summary_text += f"\n• {class_name} ({len(items)} found):"
                            for item in items[:3]:  # Show first 3 items
                                summary_text += f"\n  - {item['details']} (conf: {item['confidence']:.2f})"
                                if item['criteria']:
                                    summary_text += f" [criteria: {item['criteria']}]"
                            if len(items) > 3:
                                summary_text += f"\n  ... and {len(items)-3} more"
                    
                    return annotated_img, json.dumps(filtered_detections, indent=2), summary_text
                else:
                    return image, "No objects detected matching detailed specifications", "No detections matching criteria above confidence threshold"
                    
            except json.JSONDecodeError:
                return None, f"❌ Invalid JSON response: {result}", "JSON parsing failed"
            except Exception as e:
                return None, f"❌ Error: {str(e)}", "Processing error"
        
        detect_btn.click(
            process_detection,
            inputs=[image_input, detailed_specifications, confidence_threshold, api_key, model, temperature, detection_mode],
            outputs=[annotated_image, detection_results, detection_summary]
        )
        
        gr.Markdown("""
        ## 💡 Usage Tips
        - **Specific Mode**: Only detect objects matching your detailed specifications
        - **Include Mode**: Detect your specified criteria plus any other objects found
        - **Exclude Mode**: Detect everything except objects matching your specifications
        
        ### Example Detailed Specifications:
        ```
        crack width more than 2mm
        rust spots larger than 5cm in diameter
        concrete spalling deeper than 1cm
        structural damage with visible deformation
        paint peeling areas greater than 10cm²
        corrosion affecting more than 20% of surface area
        missing bolts or fasteners
        water damage stains larger than 15cm
        ```
        
        - Enter one detailed specification per line
        - Be specific about measurements, sizes, conditions
        - Adjust confidence threshold to filter weak detections
        - Use lower temperature values for consistent results
        - Get your API key from [openrouter.ai](https://openrouter.ai/)
        """)
    
    return demo

if __name__ == "__main__":
    print("🚀 Starting Object Detection App...")
    demo = create_interface()
    demo.launch(share=False, inbrowser=True)