import gradio as gr import numpy as np from vision_agent.tools import * from pillow_heif import register_heif_opener from typing import Dict # Register HEIF opener register_heif_opener() import vision_agent as va def analyze_mixed_boxes(image) -> Dict: """ Analyzes an image containing mixed types of beverages, specifically water bottles and beverage cans. 1) Loads the image from the provided path. 2) Uses the 'countgd_object_detection' tool with the prompt 'water bottle, beverage can' to detect items. 3) Splits detections into a top shelf and bottom shelf by comparing detection center to the image's vertical midpoint. 4) Calculates how many water bottles and beverage cans are on each shelf and overall, along with average confidence scores. 5) Overlays bounding boxes on the image to visualize detections, then saves the annotated image. 6) Returns a dictionary summarizing the distribution of water bottles and beverage cans. Parameters: image (PIL.Image): The uploaded image. Returns: dict: Summary of the analysis with keys: - total_items (int): total number of detected items - total_water_bottles (int): total count of detected water bottles - total_beverage_cans (int): total count of detected beverage cans - top_shelf (dict): counts of bottles and cans on top shelf - bottom_shelf (dict): counts of bottles and cans on bottom shelf - confidence (dict): average confidence scores for bottles and cans """ # Convert the uploaded image to a numpy array image = np.array(image) height, width = image.shape[:2] # Detect water bottles and beverage cans detections = countgd_object_detection("water bottle, beverage can", image) # Separate detections into top shelf and bottom shelf mid_height = height / 2 top_shelf_dets = [] bottom_shelf_dets = [] for det in detections: cy = ((det["bbox"][1] + det["bbox"][3]) / 2) * height if cy < mid_height: top_shelf_dets.append(det) else: bottom_shelf_dets.append(det) # Count items by label and calculate average confidence water_bottles = [det for det in detections if det["label"] == "water bottle"] beverage_cans = [det for det in detections if det["label"] == "beverage can"] avg_bottle_conf = (sum(det["score"] for det in water_bottles) / len(water_bottles) if water_bottles else 0) avg_can_conf = (sum(det["score"] for det in beverage_cans) / len(beverage_cans) if beverage_cans else 0) top_water_bottles = [det for det in top_shelf_dets if det["label"] == "water bottle"] top_beverage_cans = [det for det in top_shelf_dets if det["label"] == "beverage can"] bottom_water_bottles = [det for det in bottom_shelf_dets if det["label"] == "water bottle"] bottom_beverage_cans = [det for det in bottom_shelf_dets if det["label"] == "beverage can"] # Overlay bounding boxes and save the annotated image annotated_image = overlay_bounding_boxes(image, detections) # Convert annotated image back to PIL format for Gradio output annotated_image_pil = Image.fromarray(annotated_image) # Return the result result = { "total_items": len(detections), "total_water_bottles": len(water_bottles), "total_beverage_cans": len(beverage_cans), "top_shelf": { "water_bottles": len(top_water_bottles), "beverage_cans": len(top_beverage_cans), }, "bottom_shelf": { "water_bottles": len(bottom_water_bottles), "beverage_cans": len(bottom_beverage_cans), }, "confidence": { "water_bottles": round(avg_bottle_conf, 2), "beverage_cans": round(avg_can_conf, 2), }, "annotated_image": annotated_image_pil # return annotated image for display } return result # Gradio Interface iface = gr.Interface( fn=analyze_mixed_boxes, inputs=gr.Image(type="pil"), # allows image upload outputs=[gr.JSON(), gr.Image(type="pil")], # display result and annotated image title="Beverage Detection Analysis", description="Upload an image containing water bottles and beverage cans, and the tool will analyze the distribution on shelves and display an annotated image.", ) iface.launch()