File size: 4,632 Bytes
d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 27dc37c 2ab65e0 27dc37c 2ab65e0 27dc37c 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 27dc37c 2ab65e0 27dc37c 2ab65e0 27dc37c 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 27dc37c 2ab65e0 d2ebea5 27dc37c 2ab65e0 27dc37c 2ab65e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import os
import cv2
import chromadb
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader
# Initialize ChromaDB client and collection
path = "mm_vdb2"
client = chromadb.PersistentClient(path=path)
image_loader = ImageLoader()
CLIP = OpenCLIPEmbeddingFunction()
video_collection = client.get_or_create_collection(
name='video_collection',
embedding_function=CLIP,
data_loader=image_loader
)
def extract_frames(video_folder, output_folder):
"""
Extracts frames from all videos in the video_folder and saves them in the output_folder.
Args:
video_folder (str): Path to the folder containing video files.
output_folder (str): Path to the folder where extracted frames will be saved.
"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for video_filename in os.listdir(video_folder):
if video_filename.endswith('.mp4'):
video_path = os.path.join(video_folder, video_filename)
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count / fps
output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0])
if not os.path.exists(output_subfolder):
os.makedirs(output_subfolder)
success, image = video_capture.read()
frame_number = 0
while success:
# Save frames at 0 seconds, every 5 seconds, and the last frame
if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
frame_time = frame_number / fps
output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
cv2.imwrite(output_frame_filename, image)
success, image = video_capture.read()
frame_number += 1
video_capture.release()
def add_frames_to_chromadb(video_dir, frames_dir):
"""
Adds extracted frames from videos to the ChromaDB collection.
Args:
video_dir (str): Path to the folder containing video files.
frames_dir (str): Path to the folder containing the extracted frames.
"""
# Dictionary to hold video titles and their corresponding frames
video_frames = {}
# Process each video and associate its frames
for video_file in os.listdir(video_dir):
if video_file.endswith('.mp4'):
video_title = video_file[:-4]
frame_folder = os.path.join(frames_dir, video_title)
if os.path.exists(frame_folder):
# List all jpg files in the folder
video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
# Prepare ids, uris, and metadatas for ChromaDB
ids = []
uris = []
metadatas = []
for video_title, frames in video_frames.items():
video_path = os.path.join(video_dir, f"{video_title}.mp4")
for frame in frames:
frame_id = f"{frame[:-4]}_{video_title}"
frame_path = os.path.join(frames_dir, video_title, frame)
ids.append(frame_id)
uris.append(frame_path)
metadatas.append({'video_uri': video_path})
# Add frames to the ChromaDB collection
video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
def initiate_video(video_folder_path):
"""
Initiates the video processing pipeline: extracts frames from videos
and adds them to the ChromaDB collection.
Args:
video_folder_path (str): Path to the folder containing video files.
Returns:
The ChromaDB collection with the added frames.
"""
try:
print("Starting video processing pipeline...")
# Define output folder for extracted frames
output_folder_path = os.path.join(video_folder_path, 'extracted_frames')
# Extract frames from videos
print("Extracting frames...")
extract_frames(video_folder_path, output_folder_path)
print("Frames extracted successfully.")
# Add frames to ChromaDB collection
print("Adding frames to ChromaDB...")
add_frames_to_chromadb(video_folder_path, output_folder_path)
print("Frames added to ChromaDB successfully.")
return video_collection
except Exception as e:
print(f"An error occurred during video processing: {e}")
return None
|