Spaces:
Running
Running
from smolagents import AgentImage, Tool | |
class CroppingTool(Tool): | |
name = "cropping" | |
description = """ | |
Given a list of images and a list of bounding boxes, crop the images to the specified regions. | |
The images are PIL images. | |
The bounding boxes are lists of 4 numbers [xmin, ymin, xmax, ymax] for each image. | |
The output is a list of cropped PIL images. | |
You can crop multiple images at once. | |
You need the same number of images and bounding boxes. | |
""" | |
inputs = { | |
"images": { | |
"type": "array", | |
"description": "The images to crop", | |
}, | |
"bboxes": { | |
"type": "array", | |
"description": "The bounding box coordinates [xmin, ymin, xmax, ymax] for each image", | |
}, | |
} | |
output_type = "array" | |
def __init__(self): | |
super().__init__() | |
def setup(self): | |
pass | |
def forward(self, images: list[AgentImage], bboxes: list[list]): | |
if len(images) != len(bboxes): | |
raise ValueError("The number of images and bounding boxes must be the same.") | |
cropped_images = [] | |
for image, bbox in zip(images, bboxes): | |
# Convert bbox to integers | |
xmin, ymin, xmax, ymax = map(int, bbox) | |
# Ensure coordinates are within image bounds | |
width, height = image.size | |
xmin = max(0, min(xmin, width)) | |
ymin = max(0, min(ymin, height)) | |
xmax = max(0, min(xmax, width)) | |
ymax = max(0, min(ymax, height)) | |
# Crop the image | |
cropped_image = image.crop((xmin, ymin, xmax, ymax)) | |
cropped_images.append(cropped_image) | |
return cropped_images | |