supersolar commited on
Commit
369648b
·
verified ·
1 Parent(s): e561a91

Create f-colab.py

Browse files
Files changed (1) hide show
  1. f-colab.py +133 -0
f-colab.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #%cd /content/florence-sam
2
+ import os
3
+ from typing import Tuple, Optional
4
+ import shutil
5
+ import os
6
+ import cv2
7
+ import numpy as np
8
+ import spaces
9
+ import supervision as sv
10
+ import torch
11
+ from PIL import Image
12
+ from tqdm import tqdm
13
+ import sys
14
+ import json
15
+ import pickle
16
+ os.chdir("/content/florence-sam")
17
+ sys.path.append('/content/florence-sam')
18
+ from utils.video import generate_unique_name, create_directory, delete_directory
19
+ from utils.florence import load_florence_model, run_florence_inference, \
20
+ FLORENCE_DETAILED_CAPTION_TASK, \
21
+ FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
22
+ from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
23
+ IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
24
+ from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
25
+ DEVICE = torch.device("cuda")
26
+ DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
27
+ DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]
28
+
29
+ torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
30
+ if torch.cuda.get_device_properties(0).major >= 8:
31
+ torch.backends.cuda.matmul.allow_tf32 = True
32
+ torch.backends.cudnn.allow_tf32 = True
33
+
34
+ FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
35
+ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
36
+
37
+ with open('/content/texts.pkl', 'rb') as file:
38
+ texts = pickle.load(file)
39
+ print(texts)
40
+
41
+ with open('/content/output_video.pkl', 'rb') as file:
42
+ output_video = pickle.load(file)
43
+ print(output_video)
44
+
45
+ VIDEO_SCALE_FACTOR = 1
46
+ VIDEO_TARGET_DIRECTORY = "/content/"
47
+ create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
48
+
49
+
50
+ video_input= output_video
51
+ texts = ['the table', 'men','ball']
52
+ #VIDEO_TARGET_DIRECTORY = "/content/"
53
+ if not video_input:
54
+ print("Please upload a video.")
55
+
56
+ frame_generator = sv.get_video_frames_generator(video_input)
57
+ frame = next(frame_generator)
58
+ frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
59
+
60
+
61
+
62
+
63
+ detections_list = []
64
+ width, height = frame.size
65
+ all_ok_bboxes = []
66
+ half_area = width * height * 0.5
67
+
68
+ # 存储所有 the table 的边界框和面积
69
+ table_bboxes = []
70
+ table_areas = []
71
+ given_area =1000
72
+ ok_result =[]
73
+ for text in texts:
74
+ _, result = run_florence_inference(
75
+ model=FLORENCE_MODEL,
76
+ processor=FLORENCE_PROCESSOR,
77
+ device=DEVICE,
78
+ image=frame,
79
+ task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
80
+ text=text )
81
+ #print(result)
82
+ for bbox, label in zip(result['<OPEN_VOCABULARY_DETECTION>']['bboxes'], result['<OPEN_VOCABULARY_DETECTION>']['bboxes_labels']):
83
+ print(bbox, label)
84
+ new_result = {'<OPEN_VOCABULARY_DETECTION>': {'bboxes': [bbox], 'bboxes_labels': [label], 'polygons': [], 'polygons_labels': []}}
85
+ print(new_result)
86
+ if label == 'ping pong ball':
87
+ # 计算当前 ping pong ball 的面积
88
+ area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
89
+ # 检查面积是否不超过给定边界框的面积
90
+ if area <= given_area:
91
+ all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
92
+ ok_result.append(new_result)
93
+ elif label == 'the table':
94
+ # 计算当前 the table 的面积
95
+ print('the tablethe table!!!!')
96
+ area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
97
+ table_bboxes.append([[bbox[0] - 100, bbox[1]], [bbox[2] + 100, bbox[3]]])
98
+ table_areas.append(area)
99
+ elif label == 'table tennis bat':
100
+ all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
101
+ ok_result.append(new_result)
102
+ elif label == 'men':
103
+ print('menmne!!!!')
104
+ all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
105
+ ok_result.append(new_result)
106
+
107
+ # 找到面积最大的 the table
108
+ if table_areas:
109
+ max_area_index = table_areas.index(max(table_areas))
110
+ max_area_bbox = table_bboxes[max_area_index]
111
+
112
+ # 检查面积是否超过50%
113
+ if max(table_areas) < half_area:
114
+ all_ok_bboxes.append(max_area_bbox)
115
+ ok_result.append(new_result)
116
+
117
+ print(ok_result)
118
+ with open('/content/all_ok_bboxes.pkl', 'wb') as file:
119
+ pickle.dump(all_ok_bboxes, file)
120
+
121
+ for xyxy in ok_result:
122
+ print(frame.size,xyxy)
123
+ detections = sv.Detections.from_lmm(
124
+ lmm=sv.LMM.FLORENCE_2,
125
+ result=xyxy,
126
+ resolution_wh=frame.size
127
+ )
128
+ detections = run_sam_inference(SAM_IMAGE_MODEL, frame, detections)
129
+ print(detections)
130
+ detections_list.append(detections)
131
+ with open('/content/detections_list.pkl', 'wb') as file:
132
+ pickle.dump(detections_list, file)
133
+ print(detections_list)