File size: 3,116 Bytes
dd2ba72
 
 
 
1f353b4
 
 
 
dd2ba72
4735088
dd2ba72
78753d4
 
dd2ba72
1dd705c
d85faf4
bfa5692
1f353b4
 
e51c033
 
 
 
1f353b4
 
dd2ba72
e51c033
1f353b4
e51c033
 
 
dd2ba72
1f353b4
 
 
e51c033
 
1f353b4
e51c033
 
 
 
 
1f353b4
dd2ba72
1f353b4
 
 
dd2ba72
1f353b4
 
 
 
dd2ba72
1f353b4
 
 
 
 
 
dd2ba72
1f353b4
dd2ba72
e51c033
1f353b4
 
 
 
 
 
 
 
 
 
 
 
 
 
dd2ba72
1f353b4
 
 
dd2ba72
1f353b4
 
28eb4e5
1f353b4
 
 
 
 
 
 
 
dd2ba72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import torch
import cv2
import pytesseract
import numpy as np
from PIL import Image
import sys
import os

from ultralytics import YOLO

# Load model
model = YOLO("/home/user/app/best.pt")  # আপনি যেই path এ best.pt রেখেছেন



    
# Frame processing function
def process_frame(frame):
    # Resize image to 640x640
    frame_resized = cv2.resize(frame, (640, 640))
    
    img = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2RGB)
    img_tensor = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0
    img_tensor = img_tensor.unsqueeze(0)

    # Run inference with the YOLO model (no need to manually apply nms)
    results = model(img_tensor, augment=False)
    
    # Extract results (list of detections)
    detections = results.xywh[0]  # YOLO's detection results

    extracted_texts = []
    confidences = []

    for det in detections:
        x_center, y_center, width, height, conf, cls = det.tolist()
        if conf > 0.5:
            # Convert from YOLO format to bounding box format
            x1 = int((x_center - width / 2) * 640)
            y1 = int((y_center - height / 2) * 640)
            x2 = int((x_center + width / 2) * 640)
            y2 = int((y_center + height / 2) * 640)
            cls = int(cls)

            label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
            label = label_map.get(cls, "Unknown")
            percent = f"{conf * 100:.2f}%"

            # Draw box & label
            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(frame, f"{label}: {percent}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # OCR
            lp_crop = frame[y1:y2, x1:x2]
            gray = cv2.cvtColor(lp_crop, cv2.COLOR_BGR2GRAY)
            text = pytesseract.image_to_string(gray, config="--psm 6 -l ben")
            extracted_texts.append(text.strip())
            confidences.append(percent)

    return frame, "\n".join(extracted_texts), ", ".join(confidences)


# Input handler
def process_input(input_file):
    file_path = input_file.name

    if file_path.endswith(('.mp4', '.avi', '.mov')):
        cap = cv2.VideoCapture(file_path)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            return None, "Couldn't read video", ""
    else:
        frame = cv2.imread(file_path)
        if frame is None:
            return None, "Invalid image", ""

    processed_frame, text, confidence = process_frame(frame)
    processed_pil = Image.fromarray(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB))
    return processed_pil, text, confidence

interface = gr.Interface(
    fn=process_input,
    inputs=gr.File(type="filepath", label="Upload Image or Video"),
    outputs=[
        gr.Image(type="pil", label="Detected Output"),
        gr.Textbox(label="Detected Text (Bangla)"),
        gr.Textbox(label="Confidence (%)")
    ],
    title="YOLOv10n License Plate Detector (Bangla)",
    description="Upload an image or video. Detects plates and extracts Bangla text using OCR (CPU)."
)

interface.launch()