import gradio as gr
import cv2
import numpy as np
import torch

# Load the YOLOv7 model
model = torch.hub.load('WongKinYiu/yolov7', 'yolov7')

def detect_objects(image):
    img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    results = model(img)  # Perform inference

    # Process results
    detections = results.xyxy[0].numpy()  # Get detections in xyxy format
    annotated_image = image.copy()

    for *box, conf, cls in detections:
        # Draw bounding boxes on the image
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (255, 0, 0), 2)
        label = f'{model.names[int(cls)]}: {conf:.2f}'
        cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    return annotated_image

def detect_live_objects(video):
    # Capture frames from the video feed and process them
    img = cv2.cvtColor(video, cv2.COLOR_RGB2BGR)
    results = model(img)

    # Process results
    detections = results.xyxy[0].numpy()
    annotated_image = video.copy()

    for *box, conf, cls in detections:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (255, 0, 0), 2)
        label = f'{model.names[int(cls)]}: {conf:.2f}'
        cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    return annotated_image

# Create Gradio interface
with gr.Blocks() as app:
    gr.Markdown("# YOLOv7 Object Detection App")
    
    with gr.Tab("Image Classification"):
        image_input = gr.Image(label="Upload Image", type="numpy")
        output_image = gr.Image(label="Detected Objects", type="numpy")
        
        classify_button = gr.Button("Detect Objects")
        
        classify_button.click(fn=detect_objects, inputs=image_input, outputs=output_image)

    with gr.Tab("Live Detection"):
        video_input = gr.Video(label="Webcam Feed", type="numpy")
        output_video = gr.Video(label="Live Detected Objects", type="numpy")

        video_button = gr.Button("Start Live Detection")
        
        video_button.click(fn=detect_live_objects, inputs=video_input, outputs=output_video)

# Launch the interface
if __name__ == "__main__":
    app.launch(debug=True)