florence-sam-kaggle

Runtime error

App Files Files Community

florence-sam-kaggle / kaggle_florence_gpu_2.py

supersolar

Update kaggle_florence_gpu_2.py

3a235a7 verified 8 months ago

raw

history blame contribute delete

4.69 kB

	import os
	from typing import Tuple, Optional
	import shutil
	import os
	import cv2
	import numpy as np
	import spaces
	import supervision as sv
	import torch
	from PIL import Image
	from tqdm import tqdm
	import sys
	import json
	import pickle
	os.chdir("/kaggle/florence-sam-kaggle")
	sys.path.append("/kaggle/florence-sam-kaggle")
	from utils.video import generate_unique_name, create_directory, delete_directory
	from utils.florencegpu2 import load_florence_model, run_florence_inference, \
	FLORENCE_DETAILED_CAPTION_TASK, \
	FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
	from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
	IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
	from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
	#DEVICE = torch.device("cuda")
	DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
	#DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]

	torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
	if torch.cuda.get_device_properties(0).major >= 8:
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True

	FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
	SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)

	with open('/kaggle/texts.pkl', 'rb') as file:
	texts = pickle.load(file)
	print(texts)

	with open('/kaggle/output_video2.pkl', 'rb') as file:
	output_video = pickle.load(file)
	print(output_video)

	VIDEO_SCALE_FACTOR = 1
	VIDEO_TARGET_DIRECTORY = "/kaggle/"
	create_directory(directory_path=VIDEO_TARGET_DIRECTORY)


	video_input= output_video
	#texts = ['the table', 'men','ball']
	#VIDEO_TARGET_DIRECTORY = "/content/"
	if not video_input:
	print("Please upload a video.")

	frame_generator = sv.get_video_frames_generator(video_input)
	frame = next(frame_generator)
	frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))




	detections_list = []
	width, height = frame.size
	all_ok_bboxes = []
	half_area = width * height * 0.5

	# 存储所有 the table 的边界框和面积
	table_bboxes = []
	table_areas = []
	given_area =1000
	ok_result =[]
	for text in texts:
	_, result = run_florence_inference(
	model=FLORENCE_MODEL,
	processor=FLORENCE_PROCESSOR,
	device=DEVICE,
	image=frame,
	task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
	text=text )
	#print(result)
	for bbox, label in zip(result['<OPEN_VOCABULARY_DETECTION>']['bboxes'], result['<OPEN_VOCABULARY_DETECTION>']['bboxes_labels']):
	print(bbox, label)
	new_result = {'<OPEN_VOCABULARY_DETECTION>': {'bboxes': [bbox], 'bboxes_labels': [label], 'polygons': [], 'polygons_labels': []}}
	print(new_result)
	if label == 'ping pong ball':
	# 计算当前 ping pong ball 的面积
	area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
	# 检查面积是否不超过给定边界框的面积
	if area <= given_area:
	all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
	ok_result.append(new_result)
	elif label == 'the table':
	# 计算当前 the table 的面积
	print('the tablethe table!!!!')
	area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
	table_bboxes.append([[bbox[0] - 100, bbox[1]], [bbox[2] + 100, bbox[3]]])
	table_areas.append(area)
	elif label == 'table tennis bat':
	all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
	ok_result.append(new_result)
	elif label == 'men':
	print('menmne!!!!')
	all_ok_bboxes.append([[bbox[0], bbox[1]], [bbox[2], bbox[3]]])
	ok_result.append(new_result)

	# 找到面积最大的 the table
	if table_areas:
	max_area_index = table_areas.index(max(table_areas))
	max_area_bbox = table_bboxes[max_area_index]

	# 检查面积是否超过50%
	if max(table_areas) < half_area:
	all_ok_bboxes.append(max_area_bbox)
	ok_result.append(new_result)

	print(ok_result)
	with open('/kaggle/all_ok_bboxes.pkl', 'wb') as file:
	pickle.dump(all_ok_bboxes, file)

	for xyxy in ok_result:
	print(frame.size,xyxy)
	detections = sv.Detections.from_lmm(
	lmm=sv.LMM.FLORENCE_2,
	result=xyxy,
	resolution_wh=frame.size
	)
	detections = run_sam_inference(SAM_IMAGE_MODEL, frame, detections)
	print(detections)
	detections_list.append(detections)
	with open('/kaggle/detections_list2.pkl', 'wb') as file:
	pickle.dump(detections_list, file)
	print(detections_list)