File size: 2,239 Bytes
9a900e5 7092ebc a095c9c fd22feb 9a900e5 401448a 168ae63 fd22feb 168ae63 6255548 3a6db82 401448a 6255548 9a900e5 69fee66 9a900e5 69fee66 9a900e5 69fee66 9a900e5 69fee66 9a900e5 69fee66 9a900e5 8aa6ace 3a541c8 69fee66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import numpy as np
import torch
from torchvision import transforms
import av
import logging
import base64
import io
logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO)
def makeStack(video_base64):
video_data = base64.b64decode(video_base64)
container = av.open(io.BytesIO(video_data))
frames = []
for frame in container.decode(video=0):
frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
return np.stack(frames, axis=0)
def read_video(video_base64, num_frames=24, target_size=(224, 224)):
# video_data = base64.b64decode(video_base64)
# container = av.open(io.BytesIO(video_data))
# frames = []
# for frame in container.decode(video=0):
# frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
# sampled_frames = sample_frames(frames, num_frames)
# processed_frames = pad_and_resize(sampled_frames, target_size)
# return processed_frames
frames = makeStack(video_base64)
frames = sample_frames(frames, num_frames)
processed_frames = pad_and_resize(frames, target_size)
return processed_frames
def sample_frames(frames, num_frames):
total_frames = len(frames)
sampled_frames = list(frames)
if total_frames <= num_frames:
# sampled_frames = frames
if total_frames < num_frames:
padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
sampled_frames.extend(padding)
else:
indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
sampled_frames = [frames[i] for i in indices]
return np.array(sampled_frames)
def pad_and_resize(frames, target_size):
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(target_size),
transforms.ToTensor()
])
processed_frames = [transform(frame) for frame in frames]
processed_frames = torch.stack(processed_frames)
# return processed_frames.permute(1, 0, 2, 3).unsqueeze(0) # Add batch dimension and permute [3, 24, 224, 224]
# return processed_frames.permute(0, 2, 3, 1).unsqueeze(0) # [24, 224, 224, 3]
return processed_frames.permute(0, 1, 2, 3).unsqueeze(0)
|