File size: 4,632 Bytes
d2ebea5
2ab65e0
d2ebea5
 
 
 
2ab65e0
d2ebea5
 
 
 
 
 
 
 
 
 
 
 
27dc37c
2ab65e0
 
27dc37c
2ab65e0
 
27dc37c
2ab65e0
 
d2ebea5
 
2ab65e0
d2ebea5
 
 
 
2ab65e0
d2ebea5
 
2ab65e0
 
d2ebea5
 
 
 
2ab65e0
d2ebea5
 
 
 
 
 
 
 
 
2ab65e0
d2ebea5
 
27dc37c
2ab65e0
 
27dc37c
2ab65e0
 
27dc37c
2ab65e0
d2ebea5
 
2ab65e0
d2ebea5
2ab65e0
 
d2ebea5
 
2ab65e0
 
 
 
 
 
 
d2ebea5
 
 
 
 
 
 
 
 
 
2ab65e0
 
 
d2ebea5
2ab65e0
27dc37c
2ab65e0
 
d2ebea5
27dc37c
2ab65e0
 
 
 
27dc37c
2ab65e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import cv2
import chromadb
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader

# Initialize ChromaDB client and collection
path = "mm_vdb2"
client = chromadb.PersistentClient(path=path)

image_loader = ImageLoader()
CLIP = OpenCLIPEmbeddingFunction()
video_collection = client.get_or_create_collection(
    name='video_collection',
    embedding_function=CLIP,
    data_loader=image_loader
)

def extract_frames(video_folder, output_folder):
    """
    Extracts frames from all videos in the video_folder and saves them in the output_folder.
    
    Args:
        video_folder (str): Path to the folder containing video files.
        output_folder (str): Path to the folder where extracted frames will be saved.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for video_filename in os.listdir(video_folder):
        if video_filename.endswith('.mp4'):
            video_path = os.path.join(video_folder, video_filename)
            video_capture = cv2.VideoCapture(video_path)
            fps = video_capture.get(cv2.CAP_PROP_FPS)
            frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
            duration = frame_count / fps

            output_subfolder = os.path.join(output_folder, os.path.splitext(video_filename)[0])
            if not os.path.exists(output_subfolder):
                os.makedirs(output_subfolder)

            success, image = video_capture.read()
            frame_number = 0
            while success:
                # Save frames at 0 seconds, every 5 seconds, and the last frame
                if frame_number == 0 or frame_number % int(fps * 5) == 0 or frame_number == frame_count - 1:
                    frame_time = frame_number / fps
                    output_frame_filename = os.path.join(output_subfolder, f'frame_{int(frame_time)}.jpg')
                    cv2.imwrite(output_frame_filename, image)

                success, image = video_capture.read()
                frame_number += 1

            video_capture.release()


def add_frames_to_chromadb(video_dir, frames_dir):
    """
    Adds extracted frames from videos to the ChromaDB collection.
    
    Args:
        video_dir (str): Path to the folder containing video files.
        frames_dir (str): Path to the folder containing the extracted frames.
    """
    # Dictionary to hold video titles and their corresponding frames
    video_frames = {}

    # Process each video and associate its frames
    for video_file in os.listdir(video_dir):
        if video_file.endswith('.mp4'):
            video_title = video_file[:-4]
            frame_folder = os.path.join(frames_dir, video_title)
            if os.path.exists(frame_folder):
                # List all jpg files in the folder
                video_frames[video_title] = [f for f in os.listdir(frame_folder) if f.endswith('.jpg')]

    # Prepare ids, uris, and metadatas for ChromaDB
    ids = []
    uris = []
    metadatas = []

    for video_title, frames in video_frames.items():
        video_path = os.path.join(video_dir, f"{video_title}.mp4")
        for frame in frames:
            frame_id = f"{frame[:-4]}_{video_title}"
            frame_path = os.path.join(frames_dir, video_title, frame)
            ids.append(frame_id)
            uris.append(frame_path)
            metadatas.append({'video_uri': video_path})

    # Add frames to the ChromaDB collection
    video_collection.add(ids=ids, uris=uris, metadatas=metadatas)


def initiate_video(video_folder_path):
    """
    Initiates the video processing pipeline: extracts frames from videos
    and adds them to the ChromaDB collection.

    Args:
        video_folder_path (str): Path to the folder containing video files.

    Returns:
        The ChromaDB collection with the added frames.
    """
    try:
        print("Starting video processing pipeline...")

        # Define output folder for extracted frames
        output_folder_path = os.path.join(video_folder_path, 'extracted_frames')

        # Extract frames from videos
        print("Extracting frames...")
        extract_frames(video_folder_path, output_folder_path)
        print("Frames extracted successfully.")

        # Add frames to ChromaDB collection
        print("Adding frames to ChromaDB...")
        add_frames_to_chromadb(video_folder_path, output_folder_path)
        print("Frames added to ChromaDB successfully.")

        return video_collection

    except Exception as e:
        print(f"An error occurred during video processing: {e}")
        return None