Spaces:

Ashoka74
/

ProductPlacement

Runtime error

App Files Files Community

Ashoka74 commited on Dec 10, 2024

Commit

9615931

verified ·

1 Parent(s): efd2136

Update gradio_demo.py

Browse files

Files changed (1) hide show

gradio_demo.py +69 -85

gradio_demo.py CHANGED Viewed

@@ -854,101 +854,85 @@ def process_image(input_image, input_text):
         image_url = client.upload_file(tmpfile.name)
     os.remove(tmpfile.name)
-    # Run DINO-X detection
-    task = DinoxTask(
-        image_url=image_url,
-        prompts=[TextPrompt(text=input_text)]
-    )
-    client.run_task(task)
-    result = task.result
-    objects = result.objects
     # Process detection results
     input_boxes = []
     confidences = []
     class_names = []
     class_ids = []
-    for obj in objects:
-        input_boxes.append(obj.bbox)
-        confidences.append(obj.score)
-        cls_name = obj.category.lower().strip()
-        class_names.append(cls_name)
-        class_ids.append(class_name_to_id[cls_name])
-    input_boxes = np.array(input_boxes)
-    class_ids = np.array(class_ids)
-    # Initialize SAM2
-    torch.autocast(device_type=DEVICE, dtype=torch.bfloat16).__enter__()
-    if torch.cuda.get_device_properties(0).major >= 8:
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
-    sam2_model = build_sam2(SAM2_MODEL_CONFIG, SAM2_CHECKPOINT, device=DEVICE)
-    sam2_predictor = SAM2ImagePredictor(sam2_model)
-    sam2_predictor.set_image(input_image)
-    # sam2_predictor = run_sam_inference(SAM_IMAGE_MODEL, input_image, detections)
-    # Get masks from SAM2
-    masks, scores, logits = sam2_predictor.predict(
-        point_coords=None,
-        point_labels=None,
-        box=input_boxes,
-        multimask_output=False,
-    )
-    if masks.ndim == 4:
-        masks = masks.squeeze(1)
-    # Create visualization
-    labels = [f"{class_name} {confidence:.2f}"
-             for class_name, confidence in zip(class_names, confidences)]
-    detections = sv.Detections(
-        xyxy=input_boxes,
-        mask=masks.astype(bool),
-        class_id=class_ids
-    )
-    box_annotator = sv.BoxAnnotator()
-    label_annotator = sv.LabelAnnotator()
-    mask_annotator = sv.MaskAnnotator()
-    annotated_frame = input_image.copy()
-    annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections)
-    annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
-    annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
-    # Create transparent mask for first detected object
-    if len(detections) > 0:
-        # Get first mask
-        first_mask = detections.mask[0]
-        # Get original RGB image
-        img = input_image.copy()
-        H, W, C = img.shape
-        # Create RGBA image
-        alpha = np.zeros((H, W, 1), dtype=np.uint8)
-        alpha[first_mask] = 255
-        rgba = np.dstack((img, alpha)).astype(np.uint8)
-        # Crop to mask bounds to minimize image size
-        y_indices, x_indices = np.where(first_mask)
-        y_min, y_max = y_indices.min(), y_indices.max()
-        x_min, x_max = x_indices.min(), x_indices.max()
-        # Crop the RGBA image
-        cropped_rgba = rgba[y_min:y_max+1, x_min:x_max+1]
-        # Set extracted foreground for mask mover
-        mask_mover.set_extracted_fg(cropped_rgba)
-        return annotated_frame, cropped_rgba, gr.update(visible=True), gr.update(visible=True)
-    return annotated_frame, None, gr.update(visible=False), gr.update(visible=False)
 block = gr.Blocks().queue()

         image_url = client.upload_file(tmpfile.name)
     os.remove(tmpfile.name)
     # Process detection results
     input_boxes = []
+    masks = []
     confidences = []
     class_names = []
     class_ids = []
+    if len(input_text) == 0:
+        task = DinoxTask(
+        image_url=image_url,
+        prompts=[TextPrompt(text="<prompt_free>")],
+        # targets=[DetectionTarget.BBox, DetectionTarget.Mask]
+        )
+        client.run_task(task)
+        predictions = task.result.objects
+        classes = [pred.category for pred in predictions]
+        classes = list(set(classes))
+        class_name_to_id = {name: id for id, name in enumerate(classes)}
+        class_id_to_name = {id: name for name, id in class_name_to_id.items()}
+        for idx, obj in enumerate(predictions):
+            input_boxes.append(obj.bbox)
+            masks.append(DetectionTask.rle2mask(DetectionTask.string2rle(obj.mask.counts), obj.mask.size))  # convert mask to np.array using DDS API
+            confidences.append(obj.score)
+            cls_name = obj.category.lower().strip()
+            class_names.append(cls_name)
+            class_ids.append(class_name_to_id[cls_name])
+        boxes = np.array(input_boxes)
+        masks = np.array(masks)
+        class_ids = np.array(class_ids)
+        labels = [
+            f"{class_name} {confidence:.2f}"
+            for class_name, confidence
+            in zip(class_names, confidences)
+        ]
+        detections = sv.Detections(
+            xyxy=boxes,
+            mask=masks.astype(bool),
+            class_id=class_ids
+        )
+        box_annotator = sv.BoxAnnotator()
+        label_annotator = sv.LabelAnnotator()
+        mask_annotator = sv.MaskAnnotator()
+        annotated_frame = input_image.copy()
+        annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections)
+        annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)
+        annotated_frame = mask_annotator.annotate(scene=annotated_frame, detections=detections)
+        # Create transparent mask for first detected object
+        if len(detections) > 0:
+            # Get first mask
+            first_mask = detections.mask[0]
+            # Get original RGB image
+            img = input_image.copy()
+            H, W, C = img.shape
+            # Create RGBA image
+            alpha = np.zeros((H, W, 1), dtype=np.uint8)
+            alpha[first_mask] = 255
+            rgba = np.dstack((img, alpha)).astype(np.uint8)
+            # Crop to mask bounds to minimize image size
+            y_indices, x_indices = np.where(first_mask)
+            y_min, y_max = y_indices.min(), y_indices.max()
+            x_min, x_max = x_indices.min(), x_indices.max()
+            # Crop the RGBA image
+            cropped_rgba = rgba[y_min:y_max+1, x_min:x_max+1]
+            # Set extracted foreground for mask mover
+            mask_mover.set_extracted_fg(cropped_rgba)
+            return annotated_frame, cropped_rgba, gr.update(visible=True), gr.update(visible=True)
 block = gr.Blocks().queue()