import supervision as sv from smolagents import AgentImage, Tool class LabelAnnotatorTool(Tool): name = "label_annotator" description = """ Given an image and a list of detections, draw labels on the image. The image is a PIL image. The detections are an object of type supervision.Detections. You can use the task_inference_output_converter tool to obtain the proper format for the detections. The output is the image with the labels drawn on it. """ inputs = { "image": { "type": "image", "description": "The image to annotate the labels on.", }, "detections": { "type": "object", "description": """ The detections to annotate on the image. The detections are an object of type supervision.Detections. You can use the task_inference_output_converter tool to obtain the proper format for the detections. """, }, "text_position": { "type": "string", "description": """ The position of the label relative to the bounding box. The supported positions are: - CENTER - CENTER_LEFT - CENTER_RIGHT - TOP_CENTER - TOP_LEFT - TOP_RIGHT - BOTTOM_LEFT - BOTTOM_CENTER - BOTTOM_RIGHT - CENTER_OF_MASS """, }, } output_type = "image" def __init__(self): super().__init__() def forward( self, image: AgentImage, detections: sv.Detections, text_position: str, ): label_annotator = sv.LabelAnnotator(text_position=sv.Position(text_position)) annotated_image = label_annotator.annotate( scene=image, detections=detections, labels=detections.metadata["labels"] ) return annotated_image