|
import numpy as np |
|
import torch |
|
from torchvision import transforms |
|
import av |
|
import logging |
|
import base64 |
|
import io |
|
|
|
logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO) |
|
|
|
def makeStack(video_base64): |
|
video_data = base64.b64decode(video_base64) |
|
|
|
container = av.open(io.BytesIO(video_data)) |
|
frames = [] |
|
for frame in container.decode(video=0): |
|
frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8)) |
|
|
|
return np.stack(frames, axis=0) |
|
|
|
def read_video(video_base64, num_frames=24, target_size=(224, 224)): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frames = makeStack(video_base64) |
|
frames = sample_frames(frames, num_frames) |
|
processed_frames = pad_and_resize(frames, target_size) |
|
return processed_frames |
|
|
|
def sample_frames(frames, num_frames): |
|
total_frames = len(frames) |
|
sampled_frames = list(frames) |
|
if total_frames <= num_frames: |
|
|
|
if total_frames < num_frames: |
|
padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)] |
|
sampled_frames.extend(padding) |
|
else: |
|
indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int) |
|
sampled_frames = [frames[i] for i in indices] |
|
|
|
return np.array(sampled_frames) |
|
|
|
|
|
def pad_and_resize(frames, target_size): |
|
transform = transforms.Compose([ |
|
transforms.ToPILImage(), |
|
transforms.Resize(target_size), |
|
transforms.ToTensor() |
|
]) |
|
processed_frames = [transform(frame) for frame in frames] |
|
processed_frames = torch.stack(processed_frames) |
|
|
|
|
|
return processed_frames.permute(0, 1, 2, 3).unsqueeze(0) |
|
|
|
|