|
import gradio as gr |
|
import numpy as np |
|
from vision_agent.tools import * |
|
from pillow_heif import register_heif_opener |
|
from typing import Dict |
|
|
|
|
|
register_heif_opener() |
|
|
|
import vision_agent as va |
|
|
|
def analyze_mixed_boxes(image) -> Dict: |
|
""" |
|
Analyzes an image containing mixed types of beverages, specifically water bottles and beverage cans. |
|
1) Loads the image from the provided path. |
|
2) Uses the 'countgd_object_detection' tool with the prompt 'water bottle, beverage can' to detect items. |
|
3) Splits detections into a top shelf and bottom shelf by comparing detection center to the image's vertical midpoint. |
|
4) Calculates how many water bottles and beverage cans are on each shelf and overall, along with average confidence scores. |
|
5) Overlays bounding boxes on the image to visualize detections, then saves the annotated image. |
|
6) Returns a dictionary summarizing the distribution of water bottles and beverage cans. |
|
|
|
Parameters: |
|
image (PIL.Image): The uploaded image. |
|
|
|
Returns: |
|
dict: Summary of the analysis with keys: |
|
- total_items (int): total number of detected items |
|
- total_water_bottles (int): total count of detected water bottles |
|
- total_beverage_cans (int): total count of detected beverage cans |
|
- top_shelf (dict): counts of bottles and cans on top shelf |
|
- bottom_shelf (dict): counts of bottles and cans on bottom shelf |
|
- confidence (dict): average confidence scores for bottles and cans |
|
""" |
|
|
|
image = np.array(image) |
|
height, width = image.shape[:2] |
|
|
|
|
|
detections = countgd_object_detection("water bottle, beverage can", image) |
|
|
|
|
|
mid_height = height / 2 |
|
top_shelf_dets = [] |
|
bottom_shelf_dets = [] |
|
for det in detections: |
|
cy = ((det["bbox"][1] + det["bbox"][3]) / 2) * height |
|
if cy < mid_height: |
|
top_shelf_dets.append(det) |
|
else: |
|
bottom_shelf_dets.append(det) |
|
|
|
|
|
water_bottles = [det for det in detections if det["label"] == "water bottle"] |
|
beverage_cans = [det for det in detections if det["label"] == "beverage can"] |
|
|
|
avg_bottle_conf = (sum(det["score"] for det in water_bottles) / len(water_bottles) |
|
if water_bottles else 0) |
|
avg_can_conf = (sum(det["score"] for det in beverage_cans) / len(beverage_cans) |
|
if beverage_cans else 0) |
|
|
|
top_water_bottles = [det for det in top_shelf_dets if det["label"] == "water bottle"] |
|
top_beverage_cans = [det for det in top_shelf_dets if det["label"] == "beverage can"] |
|
bottom_water_bottles = [det for det in bottom_shelf_dets if det["label"] == "water bottle"] |
|
bottom_beverage_cans = [det for det in bottom_shelf_dets if det["label"] == "beverage can"] |
|
|
|
|
|
annotated_image = overlay_bounding_boxes(image, detections) |
|
|
|
|
|
annotated_image_pil = Image.fromarray(annotated_image) |
|
|
|
|
|
result = { |
|
"total_items": len(detections), |
|
"total_water_bottles": len(water_bottles), |
|
"total_beverage_cans": len(beverage_cans), |
|
"top_shelf": { |
|
"water_bottles": len(top_water_bottles), |
|
"beverage_cans": len(top_beverage_cans), |
|
}, |
|
"bottom_shelf": { |
|
"water_bottles": len(bottom_water_bottles), |
|
"beverage_cans": len(bottom_beverage_cans), |
|
}, |
|
"confidence": { |
|
"water_bottles": round(avg_bottle_conf, 2), |
|
"beverage_cans": round(avg_can_conf, 2), |
|
}, |
|
"annotated_image": annotated_image_pil |
|
} |
|
|
|
return result |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_mixed_boxes, |
|
inputs=gr.Image(type="pil"), |
|
outputs=[gr.JSON(), gr.Image(type="pil")], |
|
title="Beverage Detection Analysis", |
|
description="Upload an image containing water bottles and beverage cans, and the tool will analyze the distribution on shelves and display an annotated image.", |
|
) |
|
|
|
iface.launch() |
|
|