florence-sam-kaggle

Runtime error

App Files Files Community

supersolar commited on Nov 19, 2024

Commit

e93626d

verified ·

1 Parent(s): c730836

Update 2.py

Browse files

Files changed (1) hide show

2.py +125 -177

2.py CHANGED Viewed

@@ -1,181 +1,129 @@
 import os
-from typing import Tuple, Optional
 import shutil
-import os
-import cv2
-import numpy as np
-import spaces
-import supervision as sv
-import torch
-from PIL import Image
-from tqdm import tqdm
-from utils.video import generate_unique_name, create_directory, delete_directory
-from utils.florencegpu2 import load_florence_model, run_florence_inference, \
-    FLORENCE_DETAILED_CAPTION_TASK, \
-    FLORENCE_CAPTION_TO_PHRASE_GROUNDING_TASK, FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
-from utils.modes import IMAGE_INFERENCE_MODES, IMAGE_OPEN_VOCABULARY_DETECTION_MODE, \
-    IMAGE_CAPTION_GROUNDING_MASKS_MODE, VIDEO_INFERENCE_MODES
-from utils.sam import load_sam_image_model, run_sam_inference, load_sam_video_model
-DEVICE = torch.device("cuda")
-DEVICE = [torch.device(f'cuda:{i}') for i in range(torch.cuda.device_count())][-1]
-# DEVICE = torch.device("cpu")
-torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
-if torch.cuda.get_device_properties(0).major >= 8:
-    torch.backends.cuda.matmul.allow_tf32 = True
-    torch.backends.cudnn.allow_tf32 = True
-FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
-SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
-SAM_VIDEO_MODEL = load_sam_video_model(device=DEVICE)
-texts = ['the table', 'all person','ball']
-from PIL import Image
-import supervision as sv
-def detect_objects_in_image(image_input_path, texts):
-    # 加载图像
-    image_input = Image.open(image_input_path)
-    # 初始化检测列表
-    detections_list = []
-    # 对每个文本进行检测
-    for text in texts:
-        _, result = run_florence_inference(
-          model=FLORENCE_MODEL,
-          processor=FLORENCE_PROCESSOR,
-          device=DEVICE,
-          image=image_input,
-          task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
-          text=text
-        )
-        # 从结果中构建监督检测对象
-        detections = sv.Detections.from_lmm(
-            lmm=sv.LMM.FLORENCE_2,
-            result=result,
-            resolution_wh=image_input.size
-        )
-        # 运行 SAM 推理
-        detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
-        # 将检测结果添加到列表中
-        detections_list.append(detections)
-    # 合并所有检测结果
-    detections = sv.Detections.merge(detections_list)
-    # 再次运行 SAM 推理
-    detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
-    return detections
-# @title #合并遮罩加模糊merge_image_with_mask
-import numpy as np
 import cv2
-import os
-from PIL import Image, ImageFilter
-mask_folder = 'mask2'
-if not os.path.exists(mask_folder):
-    os.makedirs(mask_folder)
-shutil.rmtree('mask2')
-mask_folder = 'mask2'
-if not os.path.exists(mask_folder):
-    os.makedirs(mask_folder)
-def merge_image_with_mask(image_input_path, detections, output_folder):
-    # 创建输出文件夹
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-    # 提取图片文件名
-    image_name = os.path.basename(image_input_path)
-    output_path = os.path.join(output_folder, image_name)
-    # 创建掩码文件夹
-    mask_folder = 'mask2'
-    # 合并掩码
-    combined_mask = np.zeros_like(detections.mask[0], dtype=np.uint8)
-    for mask in detections.mask:
-        combined_mask += mask
-    combined_mask = np.clip(combined_mask, 0, 255)
-    combined_mask = combined_mask.astype(np.uint8)
-    # 膨胀掩码
-    kernel = np.ones((6, 6), np.uint8)
-    dilated_mask = cv2.dilate(combined_mask, kernel, iterations=1)
-    # 保存膨胀后的掩码
-    mask_path = os.path.join(mask_folder, image_name)
-    cv2.imwrite(mask_path, dilated_mask * 255)
-    # 读取原始图像
-    original_image = cv2.imread(image_input_path)
-    # 读取遮罩图片
-    #mask_image = cv2.imread(mask_path)
-    # 确保原始图片和遮罩图片尺寸一致
-    #assert original_image.shape == mask_image.shape, "The images must have the same dimensions."
-    # 使用掩膜从原始图片中提取部分区域
-    masked_image = cv2.bitwise_and(original_image, original_image, mask=dilated_mask)
-    # 将掩膜应用于原始图片
-    #blurred_image = cv2.GaussianBlur(original_image, (21, 21), 500)  # 使用较大的核大小进行模糊
-    blurred_image = cv2.medianBlur(original_image, 21)
-    # 将提取的部分区域叠加到模糊后的图片上
-    blurred_image = cv2.bitwise_and(blurred_image, blurred_image, mask=~dilated_mask)
-        # 将提取的部分区域叠加到模糊后的图片上
-    result = np.where(dilated_mask[:, :, None] > 0, masked_image, blurred_image)
-    # 保存合并后的图片
-    cv2.imwrite(output_path, result)
-# @title #进度条批量处理文件夹process_images_in_folder(input_folder)
-from tqdm import tqdm
-import shutil
-def process_images_in_folder(input_folder):
-    # 确保输出文件夹存在
-    output_folder = 'okframe2'
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-    shutil.rmtree('okframe2')
-    output_folder = 'okframe2'
-    if not os.path.exists(output_folder):
-        os.makedirs(output_folder)
-    # 获取文件夹中的所有文件
-    files = [f for f in os.listdir(input_folder) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
-    # 使用 tqdm 显示进度条
-    for filename in tqdm(files, desc="Gpu 2 Processing Images"):
-        image_input_path = os.path.join(input_folder, filename)
-        # 检测对象
-        detections = detect_objects_in_image(
-            image_input_path=image_input_path,
-            texts=texts
-        )
-        # 合并图像
-        merge_image_with_mask(
-            image_input_path=image_input_path,
-            detections=detections,
-            output_folder=output_folder
-        )
-# 使用示例
-input_folder = 'frame2'
-process_images_in_folder(input_folder)

 import os
+import subprocess
 import shutil
+import pickle
 import cv2
+import re
+# 设置提示词并获取坐标
+def set_prompt_and_get_coordinates(output_video, texts=['men', 'the table']):
+    if isinstance(texts, str):
+        texts = texts.split(',')
+        texts = [text.strip() for text in texts]
+        print(texts)
+    with open('/kaggle/texts.pkl', 'wb') as file:
+        pickle.dump(texts, file)
+    with open('/kaggle/output_video2.pkl', 'wb') as file:
+        pickle.dump(output_video, file)
+    command = ['python', '/kaggle/florence-sam-kaggle/kaggle_florence_gpu_2.py']
+    all_ok_bboxes = subprocess.run(command, capture_output=True, text=True)
+    return all_ok_bboxes
+# 运行 sam2 处理
+def run_sam2(output_video):
+    script_path = '/kaggle/florence-sam-kaggle/kaggle_sam2_gpu_2.py'
+    command = ['python3', script_path]
+    sam2_output = subprocess.run(command, capture_output=True, text=True)
+    print(sam2_output)
+    return sam2_output
+# 生成带有音频的视频
+def create_video_with_audio(image_folder, input_video_path):
+    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
+    def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
+        return [int(text) if text.isdigit() else text.lower() for text in re.split(_nsre, s)]
+    image_files.sort(key=natural_sort_key)
+    if image_files:
+        first_image = cv2.imread(os.path.join(image_folder, image_files[0]))
+        height, width, layers = first_image.shape
+    else:
+        raise ValueError("No valid images found in the folder after skipping the first one.")
+    cap = cv2.VideoCapture(input_video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    cap.release()
+    if fps <= 0:
+        fps = 24  # 默认帧率
+    output_video_path = os.path.join('/kaggle/working/sam2_videos/', os.path.basename(input_video_path))
+    os.makedirs(os.path.dirname(output_video_path), exist_ok=True)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    video_writer = cv2.VideoWriter('/kaggle/image_sequence_video2.mp4', fourcc, fps, (width, height))
+    for image_file in image_files:
+        image_path = os.path.join(image_folder, image_file)
+        frame = cv2.imread(image_path)
+        video_writer.write(frame)
+    video_writer.release()
+    temp_video_path = '/kaggle/image_sequence_video2.mp4'
+    command = [
+        'ffmpeg',
+        '-y',  # 覆盖输出文件
+        '-i', temp_video_path,
+        '-i', input_video_path,
+        '-c:v', 'copy',
+        '-c:a', 'copy',
+        '-shortest',
+        output_video_path
+    ]
+    try:
+        subprocess.run(command, check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error running ffmpeg: {e}")
+        print(f"Command: {' '.join(command)}")
+        raise
+    print(f"Video created successfully: {output_video_path}")
+    return output_video_path
+import os
+import re
+def natural_sort_key(s):
+    """生成一个用于自然排序的键"""
+    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
+# 处理所有视频文件
+def process_all_videos(source_dir, target_dir, image_folder):
+    os.makedirs(target_dir, exist_ok=True)
+    video_files = [f for f in os.listdir(source_dir) if f.endswith(('.mp4', '.avi', '.mov'))]
+    video_files.sort(key=natural_sort_key)
+    # 反转列表
+    video_files.reverse()
+    print(video_files)
+    for video_file in video_files:
+        video_path = os.path.join(source_dir, video_file)
+        print(f"Processing video: {video_path}")
+        # 设置提示词并获取坐标
+        result = set_prompt_and_get_coordinates(video_path, texts="men, the table")
+        print(result)
+        # 运行 sam2 处理
+        result = run_sam2(video_path)
+        print(result.stdout)
+        # 生成带有音频的视频
+        output_video_path = create_video_with_audio(image_folder, video_path)
+        print(f"Output video: {output_video_path}")
+        # 移动处理后的视频到目标目录
+        target_video_path = os.path.join(target_dir, os.path.basename(output_video_path))
+        shutil.move(video_path, target_video_path)
+        print(f"Moved processed video to: {target_video_path}")
+# 示例调用
+source_dir = '/kaggle/o_videos'
+target_dir = '/kaggle'
+image_folder = '/kaggle/output2'
+process_all_videos(source_dir, target_dir, image_folder)