donghuna's picture
Update preprocessing.py
9a97756 verified
raw
history blame
1.45 kB
import numpy as np
import torch
from torchvision import transforms
import av
import logging
import base64
import io
logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO)
def read_video(video_base64, num_frames=24, target_size=(224, 224)):
video_data = base64.b64decode(video_base64)
container = av.open(io.BytesIO(video_data))
frames = []
for frame in container.decode(video=0):
frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
sampled_frames = sample_frames(frames, num_frames)
processed_frames = preprocess_frames(sampled_frames, target_size)
return processed_frames
def sample_frames(frames, num_frames):
total_frames = len(frames)
if total_frames <= num_frames:
if total_frames < num_frames:
padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
frames.extend(padding)
else:
indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
frames = [frames[i] for i in indices]
return np.array(frames)
def preprocess_frames(frames, target_size):
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(target_size),
transforms.ToTensor()
])
processed_frames = [transform(frame) for frame in frames]
return torch.stack(processed_frames).permute(1, 0, 2, 3).numpy() # (T, C, H, W) -> (C, T, H, W)