File size: 3,100 Bytes
4622b44
 
 
 
d144786
4622b44
d144786
4622b44
 
 
 
 
d144786
 
 
4622b44
d144786
 
 
 
 
4622b44
d144786
 
 
 
 
4622b44
 
 
 
d144786
4622b44
 
d144786
 
 
 
 
 
 
 
 
 
4622b44
d144786
 
4622b44
d144786
 
4622b44
d144786
4622b44
 
 
 
 
 
 
 
 
d144786
4622b44
d144786
 
 
4622b44
d144786
 
 
 
 
 
 
 
 
 
4622b44
 
 
 
 
d144786
 
 
4622b44
d144786
 
4622b44
d144786
4622b44
d144786
4622b44
 
 
d144786
 
 
4622b44
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from ultralytics import YOLO
import json


model_path = "best.pt" 
model = YOLO(model_path)

def preprocess_image(image):
    """Apply enhancement filters and resize image before detection."""
    image = np.array(image)

    
    image = cv2.convertScaleAbs(image, alpha=0.8, beta=0)  # Brightness reduction
    image = cv2.GaussianBlur(image, (3, 3), 0)  # Denoising
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])  # Sharpening
    image = cv2.filter2D(image, -1, kernel)

    
    height, width = image.shape[:2]
    new_width = 800
    new_height = int((new_width / width) * height)
    image = cv2.resize(image, (new_width, new_height))

    return image

def imageRotation(image):
    """Dummy function for now."""
    return image

def vision_ai_api(image, label):
    """Dummy function simulating API call. Returns dummy JSON response."""
    return {
        "label": label,
        "extracted_data": {
            "name": "John Doe",
            "dob": "01-01-1990",
            "id_number": "1234567890"
        }
    }

def predict(image):
    image = preprocess_image(image)  # Apply preprocessing

    results = model(image, conf=0.85)
    detected_classes = set()
    labels = []
    cropped_images = {}

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            cls = int(box.cls[0])
            class_name = model.names[cls]

            detected_classes.add(class_name)
            labels.append(f"{class_name} {conf:.2f}")

            # Crop detected region
            cropped = image[y1:y2, x1:x2]
            cropped_pil = Image.fromarray(cropped)

            # Call Vision AI API separately for front & back
            api_response = vision_ai_api(cropped_pil, class_name)

            # Store cropped images & API response
            cropped_images[class_name] = {
                "image": cropped_pil,
                "api_response": json.dumps(api_response, indent=4)
            }

    # Identify missing classes
    possible_classes = {"front", "back"}
    missing_classes = possible_classes - detected_classes
    if missing_classes:
        labels.append(f"Missing: {', '.join(missing_classes)}")

    # Prepare Gradio outputs (separate front & back images and responses)
    front_image = cropped_images.get("front", {}).get("image", None)
    back_image = cropped_images.get("back", {}).get("image", None)

    front_response = cropped_images.get("front", {}).get("api_response", "{}")
    back_response = cropped_images.get("back", {}).get("api_response", "{}")

    return front_image, front_response, back_image, back_response, labels

# Gradio Interface
iface = gr.Interface(
    fn=predict, 
    inputs="image", 
    outputs=["image", "text", "image", "text", "text"],  
    title="License Field Detection (Front & Back Card)",
    description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
)

iface.launch()