Spaces:
Runtime error
Runtime error
File size: 2,823 Bytes
7e327f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import modal
from transformers import AutoModelForObjectDetection, AutoImageProcessor
import torch
from smolagents import Tool
from .app import app
from .image import image
@app.cls(gpu="T4", image=image)
class RemoteObjectDetectionModalApp:
model_name: str = modal.parameter()
@modal.method()
def forward(self, image):
self.model = AutoModelForObjectDetection.from_pretrained(self.model_name)
self.processor = AutoImageProcessor.from_pretrained(self.model_name)
self.model.eval()
# Preprocess image
inputs = self.processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = self.model(**inputs)
target_sizes = torch.tensor([image.size[::-1]]) # (height, width)
results = self.processor.post_process_object_detection(
outputs, target_sizes=target_sizes, threshold=0.5
)[0]
boxes = []
for score, label, box in zip(
results["scores"], results["labels"], results["boxes"]
):
boxes.append(
{
"box": box.tolist(), # [xmin, ymin, xmax, ymax]
"score": score.item(),
"label": self.model.config.id2label[label.item()],
}
)
return boxes
class RemoteObjectDetectionTool(Tool):
name = "object_detection"
description = """
Given an image, detect objects and return bounding boxes.
The image is a PIL image.
The output is a list of dictionaries containing the bounding boxes with the following keys:
- box: a list of 4 numbers [xmin, ymin, xmax, ymax]
- score: a number between 0 and 1
- label: a string
The bounding boxes are in the format of [xmin, ymin, xmax, ymax].
You need to provide the model name to use for object detection.
The tool returns a list of bounding boxes for all the objects in the image.
"""
inputs = {
"image": {
"type": "image",
"description": "The image to detect objects in",
},
"model_name": {
"type": "string",
"description": "The name of the model to use for object detection",
},
}
output_type = "object"
def __init__(self):
super().__init__()
self.tool_class = modal.Cls.from_name(
app.name, RemoteObjectDetectionModalApp.__name__
)
def forward(
self,
image,
model_name: str,
):
self.tool = self.tool_class(model_name=model_name)
bboxes = self.tool.forward.remote(image)
for bbox in bboxes:
print(
f"Found {bbox['label']} with score: {bbox['score']} at box: {bbox['box']}"
)
return bboxes
|