Spaces:

Agents-MCP-Hackathon
/

ScouterAI

Running

File size: 1,771 Bytes

import modal
from smolagents import Tool

from modal_apps.app import app
from modal_apps.inference_pipeline import InferencePipelineModalApp


class ImageSegmentationTool(Tool):
    name = "image_segmentation"
    description = """
        Given an image, segment the image and return the masks.
        The image is a PIL image.
        The output is a list of dictionaries containing the masks with the following keys:
        - score: an optional number between 0 and 1, can be None.
        - label: a string
        - mask: a PIL image
        You need to provide the model name to use for image segmentation.
        The tool returns a list of masks for all the objects in the image.
        You also need to provide a score threshold to filter the masks.
    """

    inputs = {
        "image": {
            "type": "image",
            "description": "The image to segment",
        },
        "model_name": {
            "type": "string",
            "description": "The name of the model to use for image segmentation",
        },
        "threshold": {
            "type": "number",
            "description": "The score threshold of the masks to return",
        },
    }
    output_type = "object"

    def __init__(self):
        super().__init__()
        self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)()

    def forward(
        self,
        image,
        model_name: str,
        threshold: float,
    ):
        segments = self.modal_app.forward.remote(
            model_name=model_name, task="image-segmentation", image=image, threshold=threshold
        )
        print("Segments: ", segments)
        for segment in segments:
            print(f"Found segment of {segment['label']}")
        return segments