Spaces:
Runtime error
Runtime error
import os | |
from typing import Tuple, Optional | |
import shutil | |
import os | |
import cv2 | |
import numpy as np | |
import spaces | |
import supervision as sv | |
import torch | |
from PIL import Image | |
from tqdm import tqdm | |
import sys | |
import json | |
import pickle | |
os.chdir("/kaggle/florence-sam-kaggle") | |
sys.path.append("/kaggle/florence-sam-kaggle") | |
from utils.video import generate_unique_name, create_directory, delete_directory | |
from utils.florencegpu2 import load_florence_model, run_florence_inference, \ | |
FLORENCE_DETAILED_CAPTION_TASK, \ | |
FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK | |
from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \ | |
IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES | |
from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model | |
#DEVICE = torch.device("cuda") | |
DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1] | |
#DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0] | |
torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__() | |
if torch.cuda.get_device_properties(0).major >= 8: | |
torch.backends.cuda.matmul.allow_tf32 = True | |
torch.backends.cudnn.allow_tf32 = True | |
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE) | |
SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE) | |
with open('/kaggle/texts.pkl', 'rb') as file: | |
texts = pickle.load(file) | |
print(texts) | |
with open('/kaggle/output_video2.pkl', 'rb') as file: | |
output_video = pickle.load(file) | |
print(output_video) | |
VIDEO_SCALE_FACTOR = 1 | |
VIDEO_TARGET_DIRECTORY = "/kaggle/" | |
create_directory(directory_path=VIDEO_TARGET_DIRECTORY) | |
video_input= output_video | |
#texts = ['the table', 'men','ball'] | |
#VIDEO_TARGET_DIRECTORY = "/content/" | |
if not video_input: | |
print("Please upload a video.") | |
frame_generator = sv.get_video_frames_generator(video_input) | |
frame = next(frame_generator) | |
frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) | |
detections_list = [] | |
width, height = frame.size | |
all_ok_bboxes = [] | |
half_area = width * height * 0.5 | |
# 存储所有 the table 的边界框和面积 | |
table_bboxes = [] | |
table_areas = [] | |
given_area =1000 | |
ok_result =[] | |
for text in texts: | |
_, result = run_florence_inference( | |
model=FLORENCE_MODEL, | |
processor=FLORENCE_PROCESSOR, | |
device=DEVICE, | |
image=frame, | |
task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK, | |
text=text ) | |
#print(result) | |
for bbox, label in zip(result['<OPEN_VOCABULARY_DETECTION>']['bboxes'], result['<OPEN_VOCABULARY_DETECTION>']['bboxes_labels']): | |
print(bbox, label) | |
new_result = {'<OPEN_VOCABULARY_DETECTION>': {'bboxes': [bbox], 'bboxes_labels': [label], 'polygons': [], 'polygons_labels': []}} | |
print(new_result) | |
if label == 'ping pong ball': | |
# 计算当前 ping pong ball 的面积 | |
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) | |
# 检查面积是否不超过给定边界框的面积 | |
if area <= given_area: | |
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]]) | |
ok_result.append(new_result) | |
elif label == 'the table': | |
# 计算当前 the table 的面积 | |
print('the tablethe table!!!!') | |
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) | |
table_bboxes.append([[bbox[0] - 100, bbox[1]], [bbox[2] + 100, bbox[3]]]) | |
table_areas.append(area) | |
elif label == 'table tennis bat': | |
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]]) | |
ok_result.append(new_result) | |
elif label == 'men': | |
print('menmne!!!!') | |
all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]]) | |
ok_result.append(new_result) | |
# 找到面积最大的 the table | |
if table_areas: | |
max_area_index = table_areas.index(max(table_areas)) | |
max_area_bbox = table_bboxes[max_area_index] | |
# 检查面积是否超过50% | |
if max(table_areas) < half_area: | |
all_ok_bboxes.append(max_area_bbox) | |
ok_result.append(new_result) | |
print(ok_result) | |
with open('/kaggle/all_ok_bboxes.pkl', 'wb') as file: | |
pickle.dump(all_ok_bboxes, file) | |
for xyxy in ok_result: | |
print(frame.size,xyxy) | |
detections = sv.Detections.from_lmm( | |
lmm=sv.LMM.FLORENCE_2, | |
result=xyxy, | |
resolution_wh=frame.size | |
) | |
detections = run_sam_inference(SAM_IMAGE_MODEL, frame, detections) | |
print(detections) | |
detections_list.append(detections) | |
with open('/kaggle/detections_list2.pkl', 'wb') as file: | |
pickle.dump(detections_list, file) | |
print(detections_list) |