ScouterAI / tools /image_segmentation_tool.py
stevenbucaille's picture
Enhance app.py with improved user interface and instructions, update model ID in llm.py, and add image classification capabilities across various components. Introduce segment anything functionality and refine README for clarity on model capabilities.
518d841
raw
history blame
1.77 kB
import modal
from smolagents import Tool
from modal_apps.app import app
from modal_apps.inference_pipeline import InferencePipelineModalApp
class ImageSegmentationTool(Tool):
name = "image_segmentation"
description = """
Given an image, segment the image and return the masks.
The image is a PIL image.
The output is a list of dictionaries containing the masks with the following keys:
- score: an optional number between 0 and 1, can be None.
- label: a string
- mask: a PIL image
You need to provide the model name to use for image segmentation.
The tool returns a list of masks for all the objects in the image.
You also need to provide a score threshold to filter the masks.
"""
inputs = {
"image": {
"type": "image",
"description": "The image to segment",
},
"model_name": {
"type": "string",
"description": "The name of the model to use for image segmentation",
},
"threshold": {
"type": "number",
"description": "The score threshold of the masks to return",
},
}
output_type = "object"
def __init__(self):
super().__init__()
self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)()
def forward(
self,
image,
model_name: str,
threshold: float,
):
segments = self.modal_app.forward.remote(
model_name=model_name, task="image-segmentation", image=image, threshold=threshold
)
print("Segments: ", segments)
for segment in segments:
print(f"Found segment of {segment['label']}")
return segments