Spaces:
Running
Running
File size: 1,730 Bytes
111afa2 7e327f2 111afa2 7e327f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from smolagents import AgentImage, Tool
class CroppingTool(Tool):
name = "cropping"
description = """
Given a list of images and a list of bounding boxes, crop the images to the specified regions.
The images are PIL images.
The bounding boxes are lists of 4 numbers [xmin, ymin, xmax, ymax] for each image.
The output is a list of cropped PIL images.
You can crop multiple images at once.
You need the same number of images and bounding boxes.
"""
inputs = {
"images": {
"type": "array",
"description": "The images to crop",
},
"bboxes": {
"type": "array",
"description": "The bounding box coordinates [xmin, ymin, xmax, ymax] for each image",
},
}
output_type = "array"
def __init__(self):
super().__init__()
def setup(self):
pass
def forward(self, images: list[AgentImage], bboxes: list[list]):
if len(images) != len(bboxes):
raise ValueError("The number of images and bounding boxes must be the same.")
cropped_images = []
for image, bbox in zip(images, bboxes):
# Convert bbox to integers
xmin, ymin, xmax, ymax = map(int, bbox)
# Ensure coordinates are within image bounds
width, height = image.size
xmin = max(0, min(xmin, width))
ymin = max(0, min(ymin, height))
xmax = max(0, min(xmax, width))
ymax = max(0, min(ymax, height))
# Crop the image
cropped_image = image.crop((xmin, ymin, xmax, ymax))
cropped_images.append(cropped_image)
return cropped_images
|