import numpy as np import torch from transformers import TimesformerForVideoClassification from preprocessing import read_video import logging import json import traceback import os from typing import Dict, List, Any # 로깅 설정 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EndpointHandler: def __init__(self, model_dir): self.model = TimesformerForVideoClassification.from_pretrained( 'donghuna/timesformer-base-finetuned-k400-diving48', ignore_mismatched_sizes=True ) # self.model.classifier = torch.nn.Linear(self.model.classifier.in_features, 48) # 48 output classes self.model.eval() def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ data args: inputs (:obj:`str`): base64 encoded video data Return: A :obj:`list` | `dict`: A list of dictionaries with the top 3 class indices and their probabilities for each input video. """ inputs = data.get("inputs") videos = read_video(inputs) with torch.no_grad(): outputs = self.model(videos) logits = outputs.logits probabilities = torch.softmax(logits, dim=1) # Top 3 top_probs, top_indices = torch.topk(probabilities, 3, dim=1) top_probs_list = top_probs.tolist() top_indices_list = top_indices.tolist() top_results = [] for i in range(len(top_indices_list)): top_results.append({ "class_indices": top_indices_list[i], "probabilities": top_probs_list[i] }) return top_results