supersolar commited on
Commit
2c5ad52
·
verified ·
1 Parent(s): e73b68b

Create kaggle_gpu_1.py

Browse files
Files changed (1) hide show
  1. kaggle_gpu_1.py +255 -0
kaggle_gpu_1.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Tuple, Optional
3
+ import shutil
4
+ import os
5
+ import cv2
6
+ import numpy as np
7
+ import spaces
8
+ import supervision as sv
9
+ import torch
10
+ from PIL import Image
11
+ from tqdm import tqdm
12
+ from utils.video import generate_unique_name, create_directory, delete_directory
13
+ from utils.florence import load_florence_model, run_florence_inference, \
14
+ FLORENCE_DETAILED_CAPTION_TASK, \
15
+ FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
16
+ from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
17
+ IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
18
+ from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
19
+ DEVICE = torch.device("cuda")
20
+ DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
21
+ DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]
22
+ # DEVICE = torch.device("cpu")
23
+
24
+ torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
25
+ if torch.cuda.get_device_properties(0).major >= 8:
26
+ torch.backends.cuda.matmul.allow_tf32 = True
27
+ torch.backends.cudnn.allow_tf32 = True
28
+
29
+
30
+ FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
31
+ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
32
+ SAM_VIDEO_MODEL = load_sam_video_model(device=DEVICE)
33
+
34
+
35
+ # @title #视频帧提取
36
+ import supervision as sv
37
+ import os
38
+ import cv2
39
+ import shutil
40
+ def extract_video_frames(video_input):
41
+ # 目标目录
42
+ VIDEO_TARGET_DIRECTORY = '/kaggle/working/frame'
43
+ if not os.path.exists(VIDEO_TARGET_DIRECTORY):
44
+ os.makedirs(VIDEO_TARGET_DIRECTORY)
45
+
46
+ shutil.rmtree(VIDEO_TARGET_DIRECTORY)
47
+ # 视频缩放因子
48
+ VIDEO_SCALE_FACTOR = 1
49
+
50
+ # 获取视频信息
51
+ video_info = sv.VideoInfo.from_video_path(video_input)
52
+ print(video_info)
53
+
54
+ # 生成唯一的名称
55
+ # 使用视频文件名作为唯一名称
56
+ name = os.path.splitext(os.path.basename(video_input))[0]
57
+
58
+ # 构建帧目录路径
59
+ frame_directory_path = os.path.join(VIDEO_TARGET_DIRECTORY, name)
60
+
61
+ # 创建 ImageSink 对象
62
+ frames_sink = sv.ImageSink(
63
+ target_dir_path=frame_directory_path,
64
+ image_name_pattern="{:05d}.jpeg"
65
+ )
66
+
67
+ # 获取视频帧生成器
68
+ frames_generator = sv.get_video_frames_generator(video_input)
69
+
70
+ # 使用 with 语句确保资源正确释放
71
+ with frames_sink:
72
+ # 遍历每一帧
73
+ for i, frame in enumerate(frames_generator):
74
+ # 如果需要缩放帧
75
+ if VIDEO_SCALE_FACTOR != 1:
76
+ frame = cv2.resize(frame, None, fx=VIDEO_SCALE_FACTOR, fy=VIDEO_SCALE_FACTOR)
77
+
78
+ # 保存帧
79
+ frames_sink.save_image(frame)
80
+ return frame_directory_path,video_info
81
+
82
+ # 使用示例
83
+ video_input_path = '/kaggle/input/pinnpong/VS_010.mp4'# @param {type:"string"}
84
+ video_frame_dir,video_info = extract_video_frames(video_input_path)
85
+
86
+ texts = ['the table', 'all person','ball']
87
+ from PIL import Image
88
+ import supervision as sv
89
+
90
+ def detect_objects_in_image(image_input_path, texts):
91
+ # 加载图像
92
+ image_input = Image.open(image_input_path)
93
+
94
+ # 初始化检测列表
95
+ detections_list = []
96
+
97
+ # 对每个文本进行检测
98
+ for text in texts:
99
+ _, result = run_florence_inference(
100
+ model=FLORENCE_MODEL,
101
+ processor=FLORENCE_PROCESSOR,
102
+ device=DEVICE,
103
+ image=image_input,
104
+ task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
105
+ text=text
106
+ )
107
+
108
+ # 从结果中构建监督检测对象
109
+ detections = sv.Detections.from_lmm(
110
+ lmm=sv.LMM.FLORENCE_2,
111
+ result=result,
112
+ resolution_wh=image_input.size
113
+ )
114
+
115
+ # 运行 SAM 推理
116
+ detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
117
+
118
+ # 将检测结果添加到列表中
119
+ detections_list.append(detections)
120
+
121
+ # 合并所有检测结果
122
+ detections = sv.Detections.merge(detections_list)
123
+
124
+ # 再次运行 SAM 推理
125
+ detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
126
+
127
+ return detections
128
+ # @title #合并遮罩加模糊merge_image_with_mask
129
+ import numpy as np
130
+ import cv2
131
+ import os
132
+ from PIL import Image, ImageFilter
133
+
134
+ def merge_image_with_mask(image_input_path, detections, output_folder):
135
+ # 创建输出文件夹
136
+ if not os.path.exists(output_folder):
137
+ os.makedirs(output_folder)
138
+
139
+ # 提取图片文件名
140
+ image_name = os.path.basename(image_input_path)
141
+ output_path = os.path.join(output_folder, image_name)
142
+
143
+ # 创建掩码文件夹
144
+ mask_folder = '/kaggle/working/mask'
145
+ if not os.path.exists(mask_folder):
146
+ os.makedirs(mask_folder)
147
+
148
+ # 合并掩码
149
+ combined_mask = np.zeros_like(detections.mask[0], dtype=np.uint8)
150
+ for mask in detections.mask:
151
+ combined_mask += mask
152
+ combined_mask = np.clip(combined_mask, 0, 255)
153
+ combined_mask = combined_mask.astype(np.uint8)
154
+
155
+ # 膨胀掩码
156
+ kernel = np.ones((6, 6), np.uint8)
157
+ dilated_mask = cv2.dilate(combined_mask, kernel, iterations=1)
158
+
159
+ # 保存膨胀后的掩码
160
+ #mask_path = os.path.join(mask_folder, 'test1.png')
161
+ #cv2.imwrite(mask_path, dilated_mask * 255)
162
+
163
+ # 读取原始图像
164
+ original_image = cv2.imread(image_input_path)
165
+
166
+ # 读取遮罩图片
167
+ #mask_image = cv2.imread(mask_path)
168
+
169
+ # 确保原始图片和遮罩图片尺寸一致
170
+ #assert original_image.shape == mask_image.shape, "The images must have the same dimensions."
171
+
172
+ # 使用掩膜从原始图片中提取部分区域
173
+ masked_image = cv2.bitwise_and(original_image, original_image, mask=dilated_mask)
174
+ # 将掩膜应用于原始图片
175
+ blurred_image = cv2.GaussianBlur(original_image, (21, 21), 500) # 使用较大的核大小进行模糊
176
+ # 将提取的部分区域叠加到模糊后的图片上
177
+ blurred_image = cv2.bitwise_and(blurred_image, blurred_image, mask=~dilated_mask)
178
+ # 将提取的部分区域叠加到模糊后的图片上
179
+ result = np.where(dilated_mask[:, :, None] > 0, masked_image, blurred_image)
180
+
181
+ # 保存合并后的图片
182
+ cv2.imwrite(output_path, result)
183
+ # @title #进度条批量处理文件夹process_images_in_folder(input_folder)
184
+ from tqdm import tqdm
185
+ import shutil
186
+ def process_images_in_folder(input_folder):
187
+ # 确保输出文件夹存在
188
+ output_folder = '/kaggle/working/okframe'
189
+ if not os.path.exists(output_folder):
190
+ os.makedirs(output_folder)
191
+ shutil.rmtree('/kaggle/working/okframe')
192
+ output_folder = '/kaggle/working/okframe'
193
+ if not os.path.exists(output_folder):
194
+ os.makedirs(output_folder)
195
+
196
+ # 获取文件夹中的所有文件
197
+ files = [f for f in os.listdir(input_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
198
+
199
+ # 使用 tqdm 显示进度条
200
+ for filename in tqdm(files, desc="Processing Images"):
201
+ image_input_path = os.path.join(input_folder, filename)
202
+
203
+ # 检测对象
204
+ detections = detect_objects_in_image(
205
+ image_input_path=image_input_path,
206
+ texts=texts
207
+ )
208
+
209
+ # 合并图像
210
+ merge_image_with_mask(
211
+ image_input_path=image_input_path,
212
+ detections=detections,
213
+ output_folder=output_folder
214
+ )
215
+
216
+ # 使用示例
217
+ video_name = video_input_path.split('/')[-1].split('.')[0]
218
+ input_folder = f'/kaggle/working/frame/{video_name}'
219
+ process_images_in_folder(input_folder)
220
+
221
+ # @title #合并所有帧成新视频frames_to_video(frame_folder, video_output_path, video_info)
222
+ import cv2
223
+ import os
224
+ import natsort
225
+ import numpy as np
226
+
227
+ def frames_to_video(frame_folder, video_output_path, video_info):
228
+ # 获取所有帧文件名,并使用 natsorted 进行自然排序
229
+ frame_files = natsort.natsorted([f for f in os.listdir(frame_folder) if f.endswith(('.jpg', '.png', '.jpeg'))])
230
+
231
+ # 创建视频写入器
232
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 编码器
233
+ out = cv2.VideoWriter(video_output_path, fourcc, video_info.fps, (video_info.width, video_info.height))
234
+
235
+ # 遍历所有帧文件
236
+ for frame_file in frame_files:
237
+ frame_path = os.path.join(frame_folder, frame_file)
238
+ frame = cv2.imread(frame_path)
239
+
240
+ # 如果帧大小不匹配,调整大小
241
+ if frame.shape[:2] != (video_info.height, video_info.width):
242
+ frame = cv2.resize(frame, (video_info.width, video_info.height))
243
+
244
+ # 写入视频
245
+ out.write(frame)
246
+
247
+ # 释放资源
248
+ out.release()
249
+
250
+ # 使用示例
251
+ video_info = video_info
252
+ frame_folder = '/kaggle/working/okframe'
253
+ video_output_path = '/kaggle/working/output_video.mp4'
254
+
255
+ frames_to_video(frame_folder, video_output_path, video_info)