Spaces:

Agents-MCP-Hackathon
/

ScouterAI

Running

ScouterAI / tools /object_detection_tool.py

Enhance image processing capabilities and update project structure

111afa2 13 days ago

1.64 kB

	import modal
	from smolagents import Tool

	from modal_apps.app import app
	from modal_apps.inference_pipeline import InferencePipelineModalApp


	class ObjectDetectionTool(Tool):
	name = "object_detection"
	description = """
	Given an image, detect objects and return bounding boxes.
	The image is a PIL image.
	The output is a list of dictionaries containing the bounding boxes with the following keys:
	- box: a dictionary with the following keys:
	- xmin: a number
	- ymin: a number
	- xmax: a number
	- ymax: a number
	- score: a number between 0 and 1
	- label: a string
	You need to provide the model name to use for object detection.
	The tool returns a list of bounding boxes for all the objects in the image.
	"""

	inputs = {
	"image": {
	"type": "image",
	"description": "The image to detect objects in",
	},
	"model_name": {
	"type": "string",
	"description": "The name of the model to use for object detection",
	},
	}
	output_type = "object"

	def __init__(self):
	super().__init__()
	self.modal_app = modal.Cls.from_name(app.name, InferencePipelineModalApp.__name__)()

	def forward(
	self,
	image,
	model_name: str,
	):
	bboxes = self.modal_app.forward.remote(model_name=model_name, task="object-detection", image=image)
	for bbox in bboxes:
	print(f"Found bounding box of {bbox['label']} with score: {bbox['score']} at box: {bbox['box']}")
	return bboxes