ScouterAI / tools /cropping_tool.py
stevenbucaille's picture
Enhance image processing capabilities and update project structure
111afa2
raw
history blame
1.73 kB
from smolagents import AgentImage, Tool
class CroppingTool(Tool):
name = "cropping"
description = """
Given a list of images and a list of bounding boxes, crop the images to the specified regions.
The images are PIL images.
The bounding boxes are lists of 4 numbers [xmin, ymin, xmax, ymax] for each image.
The output is a list of cropped PIL images.
You can crop multiple images at once.
You need the same number of images and bounding boxes.
"""
inputs = {
"images": {
"type": "array",
"description": "The images to crop",
},
"bboxes": {
"type": "array",
"description": "The bounding box coordinates [xmin, ymin, xmax, ymax] for each image",
},
}
output_type = "array"
def __init__(self):
super().__init__()
def setup(self):
pass
def forward(self, images: list[AgentImage], bboxes: list[list]):
if len(images) != len(bboxes):
raise ValueError("The number of images and bounding boxes must be the same.")
cropped_images = []
for image, bbox in zip(images, bboxes):
# Convert bbox to integers
xmin, ymin, xmax, ymax = map(int, bbox)
# Ensure coordinates are within image bounds
width, height = image.size
xmin = max(0, min(xmin, width))
ymin = max(0, min(ymin, height))
xmax = max(0, min(xmax, width))
ymax = max(0, min(ymax, height))
# Crop the image
cropped_image = image.crop((xmin, ymin, xmax, ymax))
cropped_images.append(cropped_image)
return cropped_images