supersolar commited on
Commit
e93626d
·
verified ·
1 Parent(s): c730836

Update 2.py

Browse files
Files changed (1) hide show
  1. 2.py +125 -177
2.py CHANGED
@@ -1,181 +1,129 @@
1
  import os
2
- from typing import Tuple, Optional
3
  import shutil
4
- import os
5
- import cv2
6
- import numpy as np
7
- import spaces
8
- import supervision as sv
9
- import torch
10
- from PIL import Image
11
- from tqdm import tqdm
12
- from utils.video import generate_unique_name, create_directory, delete_directory
13
- from utils.florencegpu2 import load_florence_model, run_florence_inference, \
14
- FLORENCE_DETAILED_CAPTION_TASK, \
15
- FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
16
- from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
17
- IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
18
- from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
19
- DEVICE = torch.device("cuda")
20
- DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
21
-
22
- # DEVICE = torch.device("cpu")
23
-
24
- torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
25
- if torch.cuda.get_device_properties(0).major >= 8:
26
- torch.backends.cuda.matmul.allow_tf32 = True
27
- torch.backends.cudnn.allow_tf32 = True
28
-
29
-
30
- FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
31
- SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
32
- SAM_VIDEO_MODEL = load_sam_video_model(device=DEVICE)
33
-
34
-
35
-
36
- texts = ['the table', 'all person','ball']
37
- from PIL import Image
38
- import supervision as sv
39
-
40
- def detect_objects_in_image(image_input_path, texts):
41
- # 加载图像
42
- image_input = Image.open(image_input_path)
43
-
44
- # 初始化检测列表
45
- detections_list = []
46
-
47
- # 对每个文本进行检测
48
- for text in texts:
49
- _, result = run_florence_inference(
50
- model=FLORENCE_MODEL,
51
- processor=FLORENCE_PROCESSOR,
52
- device=DEVICE,
53
- image=image_input,
54
- task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
55
- text=text
56
- )
57
-
58
- # 从结果中构建监督检测对象
59
- detections = sv.Detections.from_lmm(
60
- lmm=sv.LMM.FLORENCE_2,
61
- result=result,
62
- resolution_wh=image_input.size
63
- )
64
-
65
- # 运行 SAM 推理
66
- detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
67
-
68
- # 将检测结果添加到列表中
69
- detections_list.append(detections)
70
-
71
- # 合并所有检测结果
72
- detections = sv.Detections.merge(detections_list)
73
-
74
- # 再次运行 SAM 推理
75
- detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
76
-
77
- return detections
78
- # @title #合并遮罩加模糊merge_image_with_mask
79
- import numpy as np
80
  import cv2
81
- import os
82
- from PIL import Image, ImageFilter
83
-
84
- mask_folder = 'mask2'
85
- if not os.path.exists(mask_folder):
86
- os.makedirs(mask_folder)
87
- shutil.rmtree('mask2')
88
- mask_folder = 'mask2'
89
- if not os.path.exists(mask_folder):
90
- os.makedirs(mask_folder)
91
-
92
-
93
-
94
- def merge_image_with_mask(image_input_path, detections, output_folder):
95
- # 创建输出文件夹
96
- if not os.path.exists(output_folder):
97
- os.makedirs(output_folder)
98
-
99
- # 提取图片文件名
100
- image_name = os.path.basename(image_input_path)
101
- output_path = os.path.join(output_folder, image_name)
102
-
103
- # 创建掩码文件夹
104
- mask_folder = 'mask2'
105
-
106
-
107
- # 合并掩码
108
- combined_mask = np.zeros_like(detections.mask[0], dtype=np.uint8)
109
- for mask in detections.mask:
110
- combined_mask += mask
111
- combined_mask = np.clip(combined_mask, 0, 255)
112
- combined_mask = combined_mask.astype(np.uint8)
113
-
114
- # 膨胀掩码
115
- kernel = np.ones((6, 6), np.uint8)
116
- dilated_mask = cv2.dilate(combined_mask, kernel, iterations=1)
117
-
118
- # 保存膨胀后的掩码
119
- mask_path = os.path.join(mask_folder, image_name)
120
- cv2.imwrite(mask_path, dilated_mask * 255)
121
-
122
- # 读取原始图像
123
- original_image = cv2.imread(image_input_path)
124
-
125
- # 读取遮罩图片
126
- #mask_image = cv2.imread(mask_path)
127
-
128
- # 确保原始图片和遮罩图片尺寸一致
129
- #assert original_image.shape == mask_image.shape, "The images must have the same dimensions."
130
-
131
- # 使用掩膜从原始图片中提取部分区域
132
- masked_image = cv2.bitwise_and(original_image, original_image, mask=dilated_mask)
133
- # 将掩膜应用于原始图片
134
- #blurred_image = cv2.GaussianBlur(original_image, (21, 21), 500) # 使用较大的核大小进行模糊
135
- blurred_image = cv2.medianBlur(original_image, 21)
136
- # 将提取的部分区域叠加到模糊后的图片上
137
- blurred_image = cv2.bitwise_and(blurred_image, blurred_image, mask=~dilated_mask)
138
- # 将提取的部分区域叠加到模糊后的图片上
139
- result = np.where(dilated_mask[:, :, None] > 0, masked_image, blurred_image)
140
-
141
- # 保存合并后的图片
142
- cv2.imwrite(output_path, result)
143
- # @title #进度条批量处理文件夹process_images_in_folder(input_folder)
144
- from tqdm import tqdm
145
- import shutil
146
- def process_images_in_folder(input_folder):
147
- # 确保输出文件夹存在
148
- output_folder = 'okframe2'
149
- if not os.path.exists(output_folder):
150
- os.makedirs(output_folder)
151
- shutil.rmtree('okframe2')
152
- output_folder = 'okframe2'
153
- if not os.path.exists(output_folder):
154
- os.makedirs(output_folder)
155
-
156
- # 获取文件夹中的所有文件
157
- files = [f for f in os.listdir(input_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
158
-
159
- # 使用 tqdm 显示进度条
160
- for filename in tqdm(files, desc="Gpu 2 Processing Images"):
161
- image_input_path = os.path.join(input_folder, filename)
162
-
163
- # 检测对象
164
- detections = detect_objects_in_image(
165
- image_input_path=image_input_path,
166
- texts=texts
167
- )
168
-
169
- # 合并图像
170
- merge_image_with_mask(
171
- image_input_path=image_input_path,
172
- detections=detections,
173
- output_folder=output_folder
174
- )
175
-
176
- # 使用示例
177
- input_folder = 'frame2'
178
- process_images_in_folder(input_folder)
179
-
180
-
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import subprocess
3
  import shutil
4
+ import pickle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import cv2
6
+ import re
7
+
8
+ # 设置提示词并获取坐标
9
+ def set_prompt_and_get_coordinates(output_video, texts=['men', 'the table']):
10
+ if isinstance(texts, str):
11
+ texts = texts.split(',')
12
+ texts = [text.strip() for text in texts]
13
+ print(texts)
14
+ with open('/kaggle/texts.pkl', 'wb') as file:
15
+ pickle.dump(texts, file)
16
+ with open('/kaggle/output_video2.pkl', 'wb') as file:
17
+ pickle.dump(output_video, file)
18
+ command = ['python', '/kaggle/florence-sam-kaggle/kaggle_florence_gpu_2.py']
19
+ all_ok_bboxes = subprocess.run(command, capture_output=True, text=True)
20
+ return all_ok_bboxes
21
+
22
+ # 运行 sam2 处理
23
+ def run_sam2(output_video):
24
+ script_path = '/kaggle/florence-sam-kaggle/kaggle_sam2_gpu_2.py'
25
+ command = ['python3', script_path]
26
+ sam2_output = subprocess.run(command, capture_output=True, text=True)
27
+ print(sam2_output)
28
+ return sam2_output
29
+
30
+ # 生成带有音频的视频
31
+ def create_video_with_audio(image_folder, input_video_path):
32
+ image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
33
+
34
+ def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
35
+ return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)]
36
+
37
+ image_files.sort(key=natural_sort_key)
38
+
39
+ if image_files:
40
+ first_image = cv2.imread(os.path.join(image_folder, image_files[0]))
41
+ height, width, layers = first_image.shape
42
+ else:
43
+ raise ValueError("No valid images found in the folder after skipping the first one.")
44
+
45
+ cap = cv2.VideoCapture(input_video_path)
46
+ fps = cap.get(cv2.CAP_PROP_FPS)
47
+ cap.release()
48
+
49
+ if fps <= 0:
50
+ fps = 24 # 默认帧率
51
+
52
+ output_video_path = os.path.join('/kaggle/working/sam2_videos/', os.path.basename(input_video_path))
53
+ os.makedirs(os.path.dirname(output_video_path), exist_ok=True)
54
+
55
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
56
+ video_writer = cv2.VideoWriter('/kaggle/image_sequence_video2.mp4', fourcc, fps, (width, height))
57
+
58
+ for image_file in image_files:
59
+ image_path = os.path.join(image_folder, image_file)
60
+ frame = cv2.imread(image_path)
61
+ video_writer.write(frame)
62
+
63
+ video_writer.release()
64
+
65
+ temp_video_path = '/kaggle/image_sequence_video2.mp4'
66
+ command = [
67
+ 'ffmpeg',
68
+ '-y', # 覆盖输出文件
69
+ '-i', temp_video_path,
70
+ '-i', input_video_path,
71
+ '-c:v', 'copy',
72
+ '-c:a', 'copy',
73
+ '-shortest',
74
+ output_video_path
75
+ ]
76
+
77
+ try:
78
+ subprocess.run(command, check=True)
79
+ except subprocess.CalledProcessError as e:
80
+ print(f"Error running ffmpeg: {e}")
81
+ print(f"Command: {' '.join(command)}")
82
+ raise
83
+
84
+ print(f"Video created successfully: {output_video_path}")
85
+ return output_video_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ import os
88
+ import re
89
+
90
+ def natural_sort_key(s):
91
+ """生成一个用于自然排序的键"""
92
+ return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
93
+
94
+
95
+ # 处理所有视频文件
96
+ def process_all_videos(source_dir, target_dir, image_folder):
97
+ os.makedirs(target_dir, exist_ok=True)
98
+ video_files = [f for f in os.listdir(source_dir) if f.endswith(('.mp4', '.avi', '.mov'))]
99
+ video_files.sort(key=natural_sort_key)
100
+ # 反转列表
101
+ video_files.reverse()
102
+ print(video_files)
103
+ for video_file in video_files:
104
+ video_path = os.path.join(source_dir, video_file)
105
+ print(f"Processing video: {video_path}")
106
+
107
+ # 设置提示词并获取坐标
108
+ result = set_prompt_and_get_coordinates(video_path, texts="men, the table")
109
+ print(result)
110
+
111
+ # 运行 sam2 处理
112
+ result = run_sam2(video_path)
113
+ print(result.stdout)
114
+
115
+ # 生成带有音频的视频
116
+ output_video_path = create_video_with_audio(image_folder, video_path)
117
+ print(f"Output video: {output_video_path}")
118
+
119
+ # 移动处理后的视频到目标目录
120
+ target_video_path = os.path.join(target_dir, os.path.basename(output_video_path))
121
+ shutil.move(video_path, target_video_path)
122
+ print(f"Moved processed video to: {target_video_path}")
123
+
124
+ # 示例调用
125
+ source_dir = '/kaggle/o_videos'
126
+ target_dir = '/kaggle'
127
+ image_folder = '/kaggle/output2'
128
+
129
+ process_all_videos(source_dir, target_dir, image_folder)