from smolagents import AgentImage, Tool class CroppingTool(Tool): name = "cropping" description = """ Given a list of images and a list of bounding boxes, crop the images to the specified regions. The images are PIL images. The bounding boxes are lists of 4 numbers [xmin, ymin, xmax, ymax] for each image. The output is a list of cropped PIL images. You can crop multiple images at once. You need the same number of images and bounding boxes. """ inputs = { "images": { "type": "array", "description": "The images to crop", }, "bboxes": { "type": "array", "description": "The bounding box coordinates [xmin, ymin, xmax, ymax] for each image", }, } output_type = "array" def __init__(self): super().__init__() def setup(self): pass def forward(self, images: list[AgentImage], bboxes: list[list]): if len(images) != len(bboxes): raise ValueError("The number of images and bounding boxes must be the same.") cropped_images = [] for image, bbox in zip(images, bboxes): # Convert bbox to integers xmin, ymin, xmax, ymax = map(int, bbox) # Ensure coordinates are within image bounds width, height = image.size xmin = max(0, min(xmin, width)) ymin = max(0, min(ymin, height)) xmax = max(0, min(xmax, width)) ymax = max(0, min(ymax, height)) # Crop the image cropped_image = image.crop((xmin, ymin, xmax, ymax)) cropped_images.append(cropped_image) return cropped_images