Spaces:
Running
Running
File size: 1,931 Bytes
111afa2 518d841 111afa2 518d841 111afa2 518d841 111afa2 518d841 111afa2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import modal
from smolagents import Tool
from modal_apps.app import app
from modal_apps.inference_pipeline import InferencePipelineModalApp
class ObjectDetectionTool(Tool):
name = "object_detection"
description = """
Given an image, detect objects and return bounding boxes.
The image is a PIL image.
The output is a list of dictionaries containing the bounding boxes with the following keys:
- box: a dictionary with the following keys:
- xmin: a number
- ymin: a number
- xmax: a number
- ymax: a number
- score: a number between 0 and 1
- label: a string
You need to provide the model name to use for object detection.
The tool returns a list of bounding boxes for all the objects in the image.
You also need to provide a score threshold to filter the bounding boxes.
"""
inputs = {
"image": {
"type": "image",
"description": "The image to detect objects in",
},
"model_name": {
"type": "string",
"description": "The name of the model to use for object detection",
},
"threshold": {
"type": "number",
"description": "The score threshold of the bounding boxes to return",
},
}
output_type = "object"
def __init__(self):
super().__init__()
self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)()
def forward(
self,
image,
model_name: str,
threshold: float,
):
bboxes = self.modal_app.forward.remote(
model_name=model_name, task="object-detection", image=image, threshold=threshold
)
for bbox in bboxes:
print(f"Found bounding box of {bbox['label']} with score: {bbox['score']} at box: {bbox['box']}")
return bboxes
|