import numpy as np import torch from transformers import TimesformerForVideoClassification from preprocessing import read_video import logging import json # 로깅 설정 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EndpointHandler: def __init__(self, model_dir): self.model = TimesformerForVideoClassification.from_pretrained( 'donghuna/timesformer-base-finetuned-k400-diving48', ignore_mismatched_sizes=True ) self.model.classifier = torch.nn.Linear(self.model.classifier.in_features, 48) # 48 output classes self.model.eval() def __call__(self, data): # video_path = data["inputs"]["video_path"] # ftp_password = data["inputs"].get("ftp_password") # ftp_password = data["inputs"]["ftp_password"] inputs = json.loads(data) video_path = inputs.get("inputs", {}).get("video_path", "") ftp_password = inputs.get("inputs", {}).get("ftp_password", "") processed_frames = read_video(video_path, ftp_password) # 디버깅: 입력 데이터 확인 logger.info(f"Received data: {data}") frames = np.array(data['frames']) frames = torch.tensor(frames).float() # Ensure the data is in the correct format # 디버깅: 프레임 데이터 확인 logger.info(f"Frames shape: {frames.shape}") # Perform inference with torch.no_grad(): outputs = self.model(frames.unsqueeze(0)) # Add batch dimension predictions = torch.softmax(outputs.logits, dim=-1) # 디버깅: 예측 결과 확인 logger.info(f"Predictions: {predictions}") predicted_class = torch.argmax(predictions, dim=-1).item() # 디버깅: 예측 클래스 확인 logger.info(f"Predicted class: {predicted_class}") return {"predicted_class": predicted_class, "predictions": predictions.tolist()}