import modal from smolagents import Tool from modal_apps.app import app from modal_apps.inference_pipeline import InferencePipelineModalApp class ImageClassificationTool(Tool): name = "image_classification" description = """ Given an image, classify it into one of the provided labels. The image is a PIL image. You need to provide the model name to use for image classification. The tool returns a list of dictionaries containing the following keys: - label: the label of the class - score: the score of the class You also need to provide the number of top classes to return. """ inputs = { "image": { "type": "image", "description": "The image to classify", }, "model_name": { "type": "string", "description": "The name of the model to use for image classification", }, "top_k_classes": { "type": "integer", "description": "The number of top classes to return", }, } output_type = "object" def __init__(self): super().__init__() self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)() def forward(self, image, model_name: str, top_k_classes: int): label = self.modal_app.forward.remote( model_name=model_name, task="image-classification", image=image, top_k=top_k_classes ) return label