File size: 4,994 Bytes
a51682c d2ebea5 a51682c d2ebea5 a51682c d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 d2ebea5 2ab65e0 a51682c 2ab65e0 d2ebea5 2ab65e0 a51682c d2ebea5 a51682c 27dc37c a51682c 27dc37c 2ab65e0 a51682c 2ab65e0 a51682c af27942 a51682c 2ab65e0 a51682c 2ab65e0 a51682c 2ab65e0 a51682c 2ab65e0 a51682c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import gdown
import zipfile
import os
import chromadb
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader
import cv2
path = "mm_vdb2"
client = chromadb.PersistentClient(path=path)
image_loader = ImageLoader()
CLIP = OpenCLIPEmbeddingFunction()
video_collection = client.get_or_create_collection(
name='video_collection',
embedding_function=CLIP,
data_loader=image_loader
)
def extract_frames(video_folder, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for video_filename in os.listdir(video_folder):
if video_filename.endswith('.mp4'):
video_path = os.path.join(video_folder, video_filename)
video_capture = cv2.VideoCapture(video_path)
fps = video_capture.get(cv2.CAP_PROP_FPS)
frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count / fps
output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0])
if not os.path.exists(output_subfolder):
os.makedirs(output_subfolder)
success, image = video_capture.read()
frame_number = 0
while success:
if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
frame_time = frame_number / fps
output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
cv2.imwrite(output_frame_filename, image)
success, image = video_capture.read()
frame_number += 1
video_capture.release()
def add_frames_to_chromadb(video_dir, frames_dir):
# Dictionary to hold video titles and their corresponding frames
video_frames = {}
# Process each video and associate its frames
for video_file in os.listdir(video_dir):
if video_file.endswith('.mp4'):
video_title = video_file[:-4]
frame_folder = os.path.join(frames_dir, video_title)
if os.path.exists(frame_folder):
# List all jpg files in the folder
video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
# Prepare ids, uris and metadatas
ids = []
uris = []
metadatas = []
for video_title, frames in video_frames.items():
video_path = os.path.join(video_dir, f"{video_title}.mp4")
for frame in frames:
frame_id = f"{frame[:-4]}_{video_title}"
frame_path = os.path.join(frames_dir, video_title, frame)
ids.append(frame_id)
uris.append(frame_path)
metadatas.append({'video_uri': video_path})
video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
# Running it
def unzip_file(zip_path, extract_to):
"""
Unzips a zip file to the specified directory and flattens the folder structure.
Args:
zip_path (str): Path to the zip file.
extract_to (str): Directory where the contents should be extracted.
"""
try:
# Ensure the destination directory exists
os.makedirs(extract_to, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
for file in zip_ref.namelist():
# Extract only files (not directories)
if not file.endswith('/'):
# Flatten by extracting all files to the root of extract_to
file_name = os.path.basename(file)
if file_name: # Ensure it is not an empty string
extracted_path = os.path.join(extract_to, file_name)
with zip_ref.open(file) as source, open(extracted_path, 'wb') as target:
target.write(source.read())
print(f"Successfully extracted and flattened {zip_path} to {extract_to}")
except Exception as e:
print(f"An error occurred during extraction: {e}")
def initiate_video():
file_id = "1PhbpRBtTG0Cp9URebZ7-CLRKelAYRZqb"
output_file = r"StockVideos-CC01.zip"
# Download the ZIP file
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)
print(f"File downloaded successfully: {output_file}")
# Define paths
zip_file_path = output_file
flattened_video_folder = r"videos_flattened"
frames_output_folder = r"extracted_frames"
# Ensure directories exist
os.makedirs(flattened_video_folder, exist_ok=True)
os.makedirs(frames_output_folder, exist_ok=True)
# Unzip and flatten the videos
unzip_file(zip_file_path, flattened_video_folder)
# Process the videos and extract frames
extract_frames(flattened_video_folder, frames_output_folder)
# Add frames to ChromaDB
add_frames_to_chromadb(flattened_video_folder, frames_output_folder)
return video_collection |