florence-sam-kaggle

Runtime error

App Files Files Community

supersolar commited on Nov 18, 2024

Commit

369648b

verified ·

1 Parent(s): e561a91

Create f-colab.py

Browse files

Files changed (1) hide show

f-colab.py +133 -0

f-colab.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#%cd /content/florence-sam
+import os
+from typing import Tuple, Optional
+import shutil
+import os
+import cv2
+import numpy as np
+import spaces
+import supervision as sv
+import torch
+from PIL import Image
+from tqdm import tqdm
+import sys
+import json
+import pickle
+os.chdir("/content/florence-sam")
+sys.path.append('/content/florence-sam')
+from utils.video import generate_unique_name, create_directory, delete_directory
+from utils.florence import load_florence_model, run_florence_inference, \
+    FLORENCE_DETAILED_CAPTION_TASK, \
+    FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
+from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
+    IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
+from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
+DEVICE = torch.device("cuda")
+DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
+DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]
+torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
+if torch.cuda.get_device_properties(0).major >= 8:
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
+SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
+with open('/content/texts.pkl', 'rb') as file:
+    texts = pickle.load(file)
+print(texts)
+with open('/content/output_video.pkl', 'rb') as file:
+    output_video = pickle.load(file)
+print(output_video)
+VIDEO_SCALE_FACTOR = 1
+VIDEO_TARGET_DIRECTORY = "/content/"
+create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
+video_input= output_video
+texts = ['the table', 'men','ball']
+#VIDEO_TARGET_DIRECTORY = "/content/"
+if not video_input:
+    print("Please upload a video.")
+frame_generator = sv.get_video_frames_generator(video_input)
+frame = next(frame_generator)
+frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+detections_list = []
+width, height = frame.size
+all_ok_bboxes = []
+half_area = width * height * 0.5
+# 存储所有 the table 的边界框和面积
+table_bboxes = []
+table_areas = []
+given_area =1000
+ok_result =[]
+for text in texts:
+    _, result = run_florence_inference(
+        model=FLORENCE_MODEL,
+        processor=FLORENCE_PROCESSOR,
+        device=DEVICE,
+        image=frame,
+        task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
+        text=text    )
+    #print(result)
+    for bbox, label in zip(result['<OPEN_VOCABULARY_DETECTION>']['bboxes'], result['<OPEN_VOCABULARY_DETECTION>']['bboxes_labels']):
+      print(bbox, label)
+      new_result = {'<OPEN_VOCABULARY_DETECTION>': {'bboxes': [bbox], 'bboxes_labels': [label], 'polygons': [], 'polygons_labels': []}}
+      print(new_result)
+      if label == 'ping pong ball':
+          # 计算当前 ping pong ball 的面积
+          area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+          # 检查面积是否不超过给定边界框的面积
+          if area <= given_area:
+              all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
+              ok_result.append(new_result)
+      elif label == 'the table':
+          # 计算当前 the table 的面积
+          print('the tablethe table!!!!')
+          area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+          table_bboxes.append([[bbox[0] - 100, bbox[1]], [bbox[2] + 100, bbox[3]]])
+          table_areas.append(area)
+      elif label == 'table tennis bat':
+          all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
+          ok_result.append(new_result)
+      elif label == 'men':
+          print('menmne!!!!')
+          all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
+          ok_result.append(new_result)
+    # 找到面积最大的 the table
+    if table_areas:
+        max_area_index = table_areas.index(max(table_areas))
+        max_area_bbox = table_bboxes[max_area_index]
+        # 检查面积是否超过50%
+        if max(table_areas) < half_area:
+            all_ok_bboxes.append(max_area_bbox)
+            ok_result.append(new_result)
+print(ok_result)
+with open('/content/all_ok_bboxes.pkl', 'wb') as file:
+    pickle.dump(all_ok_bboxes, file)
+for xyxy in ok_result:
+    print(frame.size,xyxy)
+    detections = sv.Detections.from_lmm(
+        lmm=sv.LMM.FLORENCE_2,
+        result=xyxy,
+        resolution_wh=frame.size
+        )
+    detections = run_sam_inference(SAM_IMAGE_MODEL, frame, detections)
+    print(detections)
+    detections_list.append(detections)
+with open('/content/detections_list.pkl', 'wb') as file:
+    pickle.dump(detections_list, file)
+print(detections_list)