Spaces:
Running
Running
Enhance app.py with improved user interface and instructions, update model ID in llm.py, and add image classification capabilities across various components. Introduce segment anything functionality and refine README for clarity on model capabilities.
518d841
import modal | |
from smolagents import Tool | |
from modal_apps.app import app | |
from modal_apps.inference_pipeline import InferencePipelineModalApp | |
class ObjectDetectionTool(Tool): | |
name = "object_detection" | |
description = """ | |
Given an image, detect objects and return bounding boxes. | |
The image is a PIL image. | |
The output is a list of dictionaries containing the bounding boxes with the following keys: | |
- box: a dictionary with the following keys: | |
- xmin: a number | |
- ymin: a number | |
- xmax: a number | |
- ymax: a number | |
- score: a number between 0 and 1 | |
- label: a string | |
You need to provide the model name to use for object detection. | |
The tool returns a list of bounding boxes for all the objects in the image. | |
You also need to provide a score threshold to filter the bounding boxes. | |
""" | |
inputs = { | |
"image": { | |
"type": "image", | |
"description": "The image to detect objects in", | |
}, | |
"model_name": { | |
"type": "string", | |
"description": "The name of the model to use for object detection", | |
}, | |
"threshold": { | |
"type": "number", | |
"description": "The score threshold of the bounding boxes to return", | |
}, | |
} | |
output_type = "object" | |
def __init__(self): | |
super().__init__() | |
self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)() | |
def forward( | |
self, | |
image, | |
model_name: str, | |
threshold: float, | |
): | |
bboxes = self.modal_app.forward.remote( | |
model_name=model_name, task="object-detection", image=image, threshold=threshold | |
) | |
for bbox in bboxes: | |
print(f"Found bounding box of {bbox['label']} with score: {bbox['score']} at box: {bbox['box']}") | |
return bboxes | |