ScouterAI / tools /bbox_drawing_tool.py
stevenbucaille's picture
Add initial project structure with core functionality for image processing agents
7e327f2
raw
history blame
1.74 kB
from typing import List, Union, Dict
from smolagents import Tool, AgentImage
import cv2
import numpy as np
from PIL import Image
class BBoxDrawingTool(Tool):
name = "bbox_drawing"
description = """
Given an image and a list of bounding boxes, draw the bounding boxes on the image.
The image is a PIL image.
The bounding boxes are a list of dictionaries with the following keys:
- box: a list of 4 numbers [xmin, ymin, xmax, ymax]
- score: a number between 0 and 1
- label: a string.
The output is the image with the bounding boxes drawn on it.
"""
inputs = {
"image": {
"type": "image",
"description": "The image to draw the bounding boxes on",
},
"bboxes": {
"type": "array",
"description": "The list of bounding boxes to draw on the image",
},
}
output_type = "image"
def __init__(self):
super().__init__()
def forward(
self,
image: AgentImage,
bboxes: List[Dict[str, Union[str, float, List]]],
):
np_image = np.array(image)
cv2_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
for bbox in bboxes:
print(bbox)
print(bbox["box"])
cv2_image = self.draw_bbox(cv2_image, bbox["box"])
pil_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))
return pil_image
def draw_bbox(self, image: AgentImage, bbox: List[int]):
x1, y1, x2, y2 = tuple(bbox)
x1 = int(x1)
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
image = cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)
return image