Spaces:
Runtime error
Runtime error
Create f-colab.py
Browse files- f-colab.py +133 -0
f-colab.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#%cd /content/florence-sam
|
2 |
+
import os
|
3 |
+
from typing import Tuple, Optional
|
4 |
+
import shutil
|
5 |
+
import os
|
6 |
+
import cv2
|
7 |
+
import numpy as np
|
8 |
+
import spaces
|
9 |
+
import supervision as sv
|
10 |
+
import torch
|
11 |
+
from PIL import Image
|
12 |
+
from tqdm import tqdm
|
13 |
+
import sys
|
14 |
+
import json
|
15 |
+
import pickle
|
16 |
+
os.chdir("/content/florence-sam")
|
17 |
+
sys.path.append('/content/florence-sam')
|
18 |
+
from utils.video import generate_unique_name, create_directory, delete_directory
|
19 |
+
from utils.florence import load_florence_model, run_florence_inference, \
|
20 |
+
FLORENCE_DETAILED_CAPTION_TASK, \
|
21 |
+
FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
|
22 |
+
from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
|
23 |
+
IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
|
24 |
+
from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
|
25 |
+
DEVICE = torch.device("cuda")
|
26 |
+
DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
|
27 |
+
DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]
|
28 |
+
|
29 |
+
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
|
30 |
+
if torch.cuda.get_device_properties(0).major >= 8:
|
31 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
32 |
+
torch.backends.cudnn.allow_tf32 = True
|
33 |
+
|
34 |
+
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
|
35 |
+
SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
|
36 |
+
|
37 |
+
with open('/content/texts.pkl', 'rb') as file:
|
38 |
+
texts = pickle.load(file)
|
39 |
+
print(texts)
|
40 |
+
|
41 |
+
with open('/content/output_video.pkl', 'rb') as file:
|
42 |
+
output_video = pickle.load(file)
|
43 |
+
print(output_video)
|
44 |
+
|
45 |
+
VIDEO_SCALE_FACTOR = 1
|
46 |
+
VIDEO_TARGET_DIRECTORY = "/content/"
|
47 |
+
create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
|
48 |
+
|
49 |
+
|
50 |
+
video_input= output_video
|
51 |
+
texts = ['the table', 'men','ball']
|
52 |
+
#VIDEO_TARGET_DIRECTORY = "/content/"
|
53 |
+
if not video_input:
|
54 |
+
print("Please upload a video.")
|
55 |
+
|
56 |
+
frame_generator = sv.get_video_frames_generator(video_input)
|
57 |
+
frame = next(frame_generator)
|
58 |
+
frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
detections_list = []
|
64 |
+
width, height = frame.size
|
65 |
+
all_ok_bboxes = []
|
66 |
+
half_area = width * height * 0.5
|
67 |
+
|
68 |
+
# 存储所有 the table 的边界框和面积
|
69 |
+
table_bboxes = []
|
70 |
+
table_areas = []
|
71 |
+
given_area =1000
|
72 |
+
ok_result =[]
|
73 |
+
for text in texts:
|
74 |
+
_, result = run_florence_inference(
|
75 |
+
model=FLORENCE_MODEL,
|
76 |
+
processor=FLORENCE_PROCESSOR,
|
77 |
+
device=DEVICE,
|
78 |
+
image=frame,
|
79 |
+
task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
|
80 |
+
text=text )
|
81 |
+
#print(result)
|
82 |
+
for bbox, label in zip(result['<OPEN_VOCABULARY_DETECTION>']['bboxes'], result['<OPEN_VOCABULARY_DETECTION>']['bboxes_labels']):
|
83 |
+
print(bbox, label)
|
84 |
+
new_result = {'<OPEN_VOCABULARY_DETECTION>': {'bboxes': [bbox], 'bboxes_labels': [label], 'polygons': [], 'polygons_labels': []}}
|
85 |
+
print(new_result)
|
86 |
+
if label == 'ping pong ball':
|
87 |
+
# 计算当前 ping pong ball 的面积
|
88 |
+
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
89 |
+
# 检查面积是否不超过给定边界框的面积
|
90 |
+
if area <= given_area:
|
91 |
+
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
|
92 |
+
ok_result.append(new_result)
|
93 |
+
elif label == 'the table':
|
94 |
+
# 计算当前 the table 的面积
|
95 |
+
print('the tablethe table!!!!')
|
96 |
+
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
97 |
+
table_bboxes.append([[bbox[0] - 100, bbox[1]], [bbox[2] + 100, bbox[3]]])
|
98 |
+
table_areas.append(area)
|
99 |
+
elif label == 'table tennis bat':
|
100 |
+
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
|
101 |
+
ok_result.append(new_result)
|
102 |
+
elif label == 'men':
|
103 |
+
print('menmne!!!!')
|
104 |
+
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
|
105 |
+
ok_result.append(new_result)
|
106 |
+
|
107 |
+
# 找到面积最大的 the table
|
108 |
+
if table_areas:
|
109 |
+
max_area_index = table_areas.index(max(table_areas))
|
110 |
+
max_area_bbox = table_bboxes[max_area_index]
|
111 |
+
|
112 |
+
# 检查面积是否超过50%
|
113 |
+
if max(table_areas) < half_area:
|
114 |
+
all_ok_bboxes.append(max_area_bbox)
|
115 |
+
ok_result.append(new_result)
|
116 |
+
|
117 |
+
print(ok_result)
|
118 |
+
with open('/content/all_ok_bboxes.pkl', 'wb') as file:
|
119 |
+
pickle.dump(all_ok_bboxes, file)
|
120 |
+
|
121 |
+
for xyxy in ok_result:
|
122 |
+
print(frame.size,xyxy)
|
123 |
+
detections = sv.Detections.from_lmm(
|
124 |
+
lmm=sv.LMM.FLORENCE_2,
|
125 |
+
result=xyxy,
|
126 |
+
resolution_wh=frame.size
|
127 |
+
)
|
128 |
+
detections = run_sam_inference(SAM_IMAGE_MODEL, frame, detections)
|
129 |
+
print(detections)
|
130 |
+
detections_list.append(detections)
|
131 |
+
with open('/content/detections_list.pkl', 'wb') as file:
|
132 |
+
pickle.dump(detections_list, file)
|
133 |
+
print(detections_list)
|