import numpy as np import torch from torchvision import transforms import av import logging from ftplib import FTP logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO) def get_video_file(file_path, ftp_password): # FTP 서버 정보 ftp_server = "121.136.96.223" ftp_port = 21 ftp_user = "donghuna_ftp" # folder_path = "homes/donghuna/database/Diving48_rgb/rgb/" folder_path = "web/donghuna.com/inference-endpoints/dive-sequence-classification/" # FTP 연결 설정 ftp = FTP() ftp.connect(ftp_server, ftp_port) ftp.login(user=ftp_user, passwd=ftp_password) ftp.set_pasv(True) local_path = "test.mp4" with open(local_path, 'wb') as local_file: ftp.retrbinary(f'RETR {folder_path}{file_path}', local_file.write) def read_video(file_path, num_frames=24, target_size=(224, 224)): get_video_file(file_path, ftp_password) # logging.info(f"Reading video from: {file_path}") # container = av.open(file_path) # frames = [] # for frame in container.decode(video=0): # frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8)) # sampled_frames = sample_frames(frames, num_frames) # processed_frames = preprocess_frames(sampled_frames, target_size) # return processed_frames def sample_frames(frames, num_frames): total_frames = len(frames) if total_frames <= num_frames: if total_frames < num_frames: padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)] frames.extend(padding) else: indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int) frames = [frames[i] for i in indices] return np.array(frames) def preprocess_frames(frames, target_size): transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(target_size), transforms.ToTensor() ]) processed_frames = [transform(frame) for frame in frames] return torch.stack(processed_frames).permute(1, 0, 2, 3).numpy() # (T, C, H, W) -> (C, T, H, W)