NEXAS commited on
Commit
a51682c
·
verified ·
1 Parent(s): ff25b71

Update utils/ingest_video.py

Browse files
Files changed (1) hide show
  1. utils/ingest_video.py +52 -43
utils/ingest_video.py CHANGED
@@ -1,10 +1,12 @@
 
 
1
  import os
2
- import cv2
3
  import chromadb
4
  from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
5
  from chromadb.utils.data_loaders import ImageLoader
6
 
7
- # Initialize ChromaDB client and collection
 
8
  path = "mm_vdb2"
9
  client = chromadb.PersistentClient(path=path)
10
 
@@ -16,14 +18,8 @@ video_collection = client.get_or_create_collection(
16
  data_loader=image_loader
17
  )
18
 
 
19
  def extract_frames(video_folder, output_folder):
20
- """
21
- Extracts frames from all videos in the video_folder and saves them in the output_folder.
22
-
23
- Args:
24
- video_folder (str): Path to the folder containing video files.
25
- output_folder (str): Path to the folder where extracted frames will be saved.
26
- """
27
  if not os.path.exists(output_folder):
28
  os.makedirs(output_folder)
29
 
@@ -42,7 +38,6 @@ def extract_frames(video_folder, output_folder):
42
  success, image = video_capture.read()
43
  frame_number = 0
44
  while success:
45
- # Save frames at 0 seconds, every 5 seconds, and the last frame
46
  if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
47
  frame_time = frame_number / fps
48
  output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
@@ -53,15 +48,7 @@ def extract_frames(video_folder, output_folder):
53
 
54
  video_capture.release()
55
 
56
-
57
  def add_frames_to_chromadb(video_dir, frames_dir):
58
- """
59
- Adds extracted frames from videos to the ChromaDB collection.
60
-
61
- Args:
62
- video_dir (str): Path to the folder containing video files.
63
- frames_dir (str): Path to the folder containing the extracted frames.
64
- """
65
  # Dictionary to hold video titles and their corresponding frames
66
  video_frames = {}
67
 
@@ -74,7 +61,7 @@ def add_frames_to_chromadb(video_dir, frames_dir):
74
  # List all jpg files in the folder
75
  video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
76
 
77
- # Prepare ids, uris, and metadatas for ChromaDB
78
  ids = []
79
  uris = []
80
  metadatas = []
@@ -88,39 +75,61 @@ def add_frames_to_chromadb(video_dir, frames_dir):
88
  uris.append(frame_path)
89
  metadatas.append({'video_uri': video_path})
90
 
91
- # Add frames to the ChromaDB collection
92
  video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
93
 
 
94
 
95
- def initiate_video(video_folder_path):
96
- """
97
- Initiates the video processing pipeline: extracts frames from videos
98
- and adds them to the ChromaDB collection.
99
 
 
 
 
 
100
  Args:
101
- video_folder_path (str): Path to the folder containing video files.
102
-
103
- Returns:
104
- The ChromaDB collection with the added frames.
105
  """
106
  try:
107
- print("Starting video processing pipeline...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # Define output folder for extracted frames
110
- output_folder_path = os.path.join(video_folder_path, 'extracted_frames')
 
 
 
 
111
 
112
- # Extract frames from videos
113
- print("Extracting frames...")
114
- extract_frames(video_folder_path, output_folder_path)
115
- print("Frames extracted successfully.")
116
 
117
- # Add frames to ChromaDB collection
118
- print("Adding frames to ChromaDB...")
119
- add_frames_to_chromadb(video_folder_path, output_folder_path)
120
- print("Frames added to ChromaDB successfully.")
121
 
122
- return video_collection
 
123
 
124
- except Exception as e:
125
- print(f"An error occurred during video processing: {e}")
126
- return None
 
 
 
 
 
1
+ import gdown
2
+ import zipfile
3
  import os
 
4
  import chromadb
5
  from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
6
  from chromadb.utils.data_loaders import ImageLoader
7
 
8
+ import cv2
9
+
10
  path = "mm_vdb2"
11
  client = chromadb.PersistentClient(path=path)
12
 
 
18
  data_loader=image_loader
19
  )
20
 
21
+
22
  def extract_frames(video_folder, output_folder):
 
 
 
 
 
 
 
23
  if not os.path.exists(output_folder):
24
  os.makedirs(output_folder)
25
 
 
38
  success, image = video_capture.read()
39
  frame_number = 0
40
  while success:
 
41
  if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
42
  frame_time = frame_number / fps
43
  output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
 
48
 
49
  video_capture.release()
50
 
 
51
  def add_frames_to_chromadb(video_dir, frames_dir):
 
 
 
 
 
 
 
52
  # Dictionary to hold video titles and their corresponding frames
53
  video_frames = {}
54
 
 
61
  # List all jpg files in the folder
62
  video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]
63
 
64
+ # Prepare ids, uris and metadatas
65
  ids = []
66
  uris = []
67
  metadatas = []
 
75
  uris.append(frame_path)
76
  metadatas.append({'video_uri': video_path})
77
 
 
78
  video_collection.add(ids=ids, uris=uris, metadatas=metadatas)
79
 
80
+ # Running it
81
 
 
 
 
 
82
 
83
+ def unzip_file(zip_path, extract_to):
84
+ """
85
+ Unzips a zip file to the specified directory and flattens the folder structure.
86
+
87
  Args:
88
+ zip_path (str): Path to the zip file.
89
+ extract_to (str): Directory where the contents should be extracted.
 
 
90
  """
91
  try:
92
+ # Ensure the destination directory exists
93
+ os.makedirs(extract_to, exist_ok=True)
94
+
95
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
96
+ for file in zip_ref.namelist():
97
+ # Extract only files (not directories)
98
+ if not file.endswith('/'):
99
+ # Flatten by extracting all files to the root of extract_to
100
+ file_name = os.path.basename(file)
101
+ if file_name: # Ensure it is not an empty string
102
+ extracted_path = os.path.join(extract_to, file_name)
103
+ with zip_ref.open(file) as source, open(extracted_path, 'wb') as target:
104
+ target.write(source.read())
105
+
106
+ print(f"Successfully extracted and flattened {zip_path} to {extract_to}")
107
+ except Exception as e:
108
+ print(f"An error occurred during extraction: {e}")
109
 
110
+ def initiate_video():
111
+ file_id = "1Nzy-ep9Zn15_mLAq8rUi-iKPGOpN8g9Q"
112
+ output_file = r"StockVideos-CC01.zip"
113
+ # Download the ZIP file
114
+ gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)
115
+ print(f"File downloaded successfully: {output_file}")
116
 
117
+ # Define paths
118
+ zip_file_path = output_file
119
+ flattened_video_folder = r"videos_flattened"
120
+ frames_output_folder = r"extracted_frames"
121
 
122
+ # Ensure directories exist
123
+ os.makedirs(flattened_video_folder, exist_ok=True)
124
+ os.makedirs(frames_output_folder, exist_ok=True)
 
125
 
126
+ # Unzip and flatten the videos
127
+ unzip_file(zip_file_path, flattened_video_folder)
128
 
129
+ # Process the videos and extract frames
130
+ extract_frames(flattened_video_folder, frames_output_folder)
131
+
132
+ # Add frames to ChromaDB
133
+ add_frames_to_chromadb(flattened_video_folder, frames_output_folder)
134
+
135
+ return video_collection