File size: 1,730 Bytes
111afa2
7e327f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111afa2
7e327f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from smolagents import AgentImage, Tool


class CroppingTool(Tool):
    name = "cropping"
    description = """
        Given a list of images and a list of bounding boxes, crop the images to the specified regions.
        The images are PIL images.
        The bounding boxes are lists of 4 numbers [xmin, ymin, xmax, ymax] for each image.
        The output is a list of cropped PIL images.
        You can crop multiple images at once.
        You need the same number of images and bounding boxes.
    """

    inputs = {
        "images": {
            "type": "array",
            "description": "The images to crop",
        },
        "bboxes": {
            "type": "array",
            "description": "The bounding box coordinates [xmin, ymin, xmax, ymax] for each image",
        },
    }
    output_type = "array"

    def __init__(self):
        super().__init__()

    def setup(self):
        pass

    def forward(self, images: list[AgentImage], bboxes: list[list]):
        if len(images) != len(bboxes):
            raise ValueError("The number of images and bounding boxes must be the same.")

        cropped_images = []
        for image, bbox in zip(images, bboxes):
            # Convert bbox to integers
            xmin, ymin, xmax, ymax = map(int, bbox)

            # Ensure coordinates are within image bounds
            width, height = image.size
            xmin = max(0, min(xmin, width))
            ymin = max(0, min(ymin, height))
            xmax = max(0, min(xmax, width))
            ymax = max(0, min(ymax, height))

            # Crop the image
            cropped_image = image.crop((xmin, ymin, xmax, ymax))
            cropped_images.append(cropped_image)

        return cropped_images