florence-sam

Runtime error

App Files Files Community

supersolar commited on Oct 29, 2024

Commit

2c5ad52

verified ·

1 Parent(s): e73b68b

Create kaggle_gpu_1.py

Browse files

Files changed (1) hide show

kaggle_gpu_1.py +255 -0

kaggle_gpu_1.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import os
+from typing import Tuple, Optional
+import shutil
+import os
+import cv2
+import numpy as np
+import spaces
+import supervision as sv
+import torch
+from PIL import Image
+from tqdm import tqdm
+from utils.video import generate_unique_name, create_directory, delete_directory
+from utils.florence import load_florence_model, run_florence_inference, \
+    FLORENCE_DETAILED_CAPTION_TASK, \
+    FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
+from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
+    IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
+from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
+DEVICE = torch.device("cuda")
+DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
+DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][0]
+# DEVICE = torch.device("cpu")
+torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
+if torch.cuda.get_device_properties(0).major >= 8:
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
+SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
+SAM_VIDEO_MODEL = load_sam_video_model(device=DEVICE)
+# @title #视频帧提取
+import supervision as sv
+import os
+import cv2
+import shutil
+def extract_video_frames(video_input):
+    # 目标目录
+    VIDEO_TARGET_DIRECTORY = '/kaggle/working/frame'
+    if not os.path.exists(VIDEO_TARGET_DIRECTORY):
+        os.makedirs(VIDEO_TARGET_DIRECTORY)
+    shutil.rmtree(VIDEO_TARGET_DIRECTORY)
+    # 视频缩放因子
+    VIDEO_SCALE_FACTOR = 1
+    # 获取视频信息
+    video_info = sv.VideoInfo.from_video_path(video_input)
+    print(video_info)
+    # 生成唯一的名称
+    # 使用视频文件名作为唯一名称
+    name = os.path.splitext(os.path.basename(video_input))[0]
+    # 构建帧目录路径
+    frame_directory_path = os.path.join(VIDEO_TARGET_DIRECTORY, name)
+    # 创建 ImageSink 对象
+    frames_sink = sv.ImageSink(
+        target_dir_path=frame_directory_path,
+        image_name_pattern="{:05d}.jpeg"
+    )
+    # 获取视频帧生成器
+    frames_generator = sv.get_video_frames_generator(video_input)
+    # 使用 with 语句确保资源正确释放
+    with frames_sink:
+        # 遍历每一帧
+        for i, frame in enumerate(frames_generator):
+            # 如果需要缩放帧
+            if VIDEO_SCALE_FACTOR != 1:
+                frame = cv2.resize(frame, None, fx=VIDEO_SCALE_FACTOR, fy=VIDEO_SCALE_FACTOR)
+            # 保存帧
+            frames_sink.save_image(frame)
+    return frame_directory_path,video_info
+# 使用示例
+video_input_path = '/kaggle/input/pinnpong/VS_010.mp4'# @param {type:"string"}
+video_frame_dir,video_info = extract_video_frames(video_input_path)
+texts = ['the table', 'all person','ball']
+from PIL import Image
+import supervision as sv
+def detect_objects_in_image(image_input_path, texts):
+    # 加载图像
+    image_input = Image.open(image_input_path)
+    # 初始化检测列表
+    detections_list = []
+    # 对每个文本进行检测
+    for text in texts:
+        _, result = run_florence_inference(
+          model=FLORENCE_MODEL,
+          processor=FLORENCE_PROCESSOR,
+          device=DEVICE,
+          image=image_input,
+          task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
+          text=text
+        )
+        # 从结果中构建监督检测对象
+        detections = sv.Detections.from_lmm(
+            lmm=sv.LMM.FLORENCE_2,
+            result=result,
+            resolution_wh=image_input.size
+        )
+        # 运行 SAM 推理
+        detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
+        # 将检测结果添加到列表中
+        detections_list.append(detections)
+    # 合并所有检测结果
+    detections = sv.Detections.merge(detections_list)
+    # 再次运行 SAM 推理
+    detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
+    return detections
+# @title #合并遮罩加模糊merge_image_with_mask
+import numpy as np
+import cv2
+import os
+from PIL import Image, ImageFilter
+def merge_image_with_mask(image_input_path, detections, output_folder):
+    # 创建输出文件夹
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    # 提取图片文件名
+    image_name = os.path.basename(image_input_path)
+    output_path = os.path.join(output_folder, image_name)
+    # 创建掩码文件夹
+    mask_folder = '/kaggle/working/mask'
+    if not os.path.exists(mask_folder):
+        os.makedirs(mask_folder)
+    # 合并掩码
+    combined_mask = np.zeros_like(detections.mask[0], dtype=np.uint8)
+    for mask in detections.mask:
+        combined_mask += mask
+    combined_mask = np.clip(combined_mask, 0, 255)
+    combined_mask = combined_mask.astype(np.uint8)
+    # 膨胀掩码
+    kernel = np.ones((6, 6), np.uint8)
+    dilated_mask = cv2.dilate(combined_mask, kernel, iterations=1)
+    # 保存膨胀后的掩码
+    #mask_path = os.path.join(mask_folder, 'test1.png')
+    #cv2.imwrite(mask_path, dilated_mask * 255)
+    # 读取原始图像
+    original_image = cv2.imread(image_input_path)
+    # 读取遮罩图片
+    #mask_image = cv2.imread(mask_path)
+    # 确保原始图片和遮罩图片尺寸一致
+    #assert original_image.shape == mask_image.shape, "The images must have the same dimensions."
+    # 使用掩膜从原始图片中提取部分区域
+    masked_image = cv2.bitwise_and(original_image, original_image, mask=dilated_mask)
+    # 将掩膜应用于原始图片
+    blurred_image = cv2.GaussianBlur(original_image, (21, 21), 500)  # 使用较大的核大小进行模糊
+    # 将提取的部分区域叠加到模糊后的图片上
+    blurred_image = cv2.bitwise_and(blurred_image, blurred_image, mask=~dilated_mask)
+        # 将提取的部分区域叠加到模糊后的图片上
+    result = np.where(dilated_mask[:, :, None] > 0, masked_image, blurred_image)
+    # 保存合并后的图片
+    cv2.imwrite(output_path, result)
+# @title #进度条批量处理文件夹process_images_in_folder(input_folder)
+from tqdm import tqdm
+import shutil
+def process_images_in_folder(input_folder):
+    # 确保输出文件夹存在
+    output_folder = '/kaggle/working/okframe'
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    shutil.rmtree('/kaggle/working/okframe')
+    output_folder = '/kaggle/working/okframe'
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    # 获取文件夹中的所有文件
+    files = [f for f in os.listdir(input_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
+    # 使用 tqdm 显示进度条
+    for filename in tqdm(files, desc="Processing Images"):
+        image_input_path = os.path.join(input_folder, filename)
+        # 检测对象
+        detections = detect_objects_in_image(
+            image_input_path=image_input_path,
+            texts=texts
+        )
+        # 合并图像
+        merge_image_with_mask(
+            image_input_path=image_input_path,
+            detections=detections,
+            output_folder=output_folder
+        )
+# 使用示例
+video_name = video_input_path.split('/')[-1].split('.')[0]
+input_folder = f'/kaggle/working/frame/{video_name}'
+process_images_in_folder(input_folder)
+# @title #合并所有帧成新视频frames_to_video(frame_folder, video_output_path, video_info)
+import cv2
+import os
+import natsort
+import numpy as np
+def frames_to_video(frame_folder, video_output_path, video_info):
+    # 获取所有帧文件名，并使用 natsorted 进行自然排序
+    frame_files = natsort.natsorted([f for f in os.listdir(frame_folder) if f.endswith(('.jpg', '.png', '.jpeg'))])
+    # 创建视频写入器
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 编码器
+    out = cv2.VideoWriter(video_output_path, fourcc, video_info.fps, (video_info.width, video_info.height))
+    # 遍历所有帧文件
+    for frame_file in frame_files:
+        frame_path = os.path.join(frame_folder, frame_file)
+        frame = cv2.imread(frame_path)
+        # 如果帧大小不匹配，调整大小
+        if frame.shape[:2] != (video_info.height, video_info.width):
+            frame = cv2.resize(frame, (video_info.width, video_info.height))
+        # 写入视频
+        out.write(frame)
+    # 释放资源
+    out.release()
+# 使用示例
+video_info = video_info
+frame_folder = '/kaggle/working/okframe'
+video_output_path = '/kaggle/working/output_video.mp4'
+frames_to_video(frame_folder, video_output_path, video_info)