Spaces:

Agents-MCP-Hackathon
/

ScouterAI

Running

File size: 1,931 Bytes

import modal
from smolagents import Tool

from modal_apps.app import app
from modal_apps.inference_pipeline import InferencePipelineModalApp


class ObjectDetectionTool(Tool):
    name = "object_detection"
    description = """
        Given an image, detect objects and return bounding boxes.
        The image is a PIL image.
        The output is a list of dictionaries containing the bounding boxes with the following keys:
        - box: a dictionary with the following keys:
            - xmin: a number
            - ymin: a number
            - xmax: a number
            - ymax: a number
        - score: a number between 0 and 1
        - label: a string
        You need to provide the model name to use for object detection.
        The tool returns a list of bounding boxes for all the objects in the image.
        You also need to provide a score threshold to filter the bounding boxes.
    """

    inputs = {
        "image": {
            "type": "image",
            "description": "The image to detect objects in",
        },
        "model_name": {
            "type": "string",
            "description": "The name of the model to use for object detection",
        },
        "threshold": {
            "type": "number",
            "description": "The score threshold of the bounding boxes to return",
        },
    }
    output_type = "object"

    def __init__(self):
        super().__init__()
        self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)()

    def forward(
        self,
        image,
        model_name: str,
        threshold: float,
    ):
        bboxes = self.modal_app.forward.remote(
            model_name=model_name, task="object-detection", image=image, threshold=threshold
        )
        for bbox in bboxes:
            print(f"Found bounding box of {bbox['label']} with score: {bbox['score']} at box: {bbox['box']}")
        return bboxes