|
import numpy as np |
|
import torch |
|
from transformers import TimesformerForVideoClassification |
|
from preprocessing import read_video |
|
import logging |
|
import json |
|
import traceback |
|
import os |
|
from typing import Dict, List, Any |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class EndpointHandler: |
|
def __init__(self, model_dir): |
|
self.model = TimesformerForVideoClassification.from_pretrained( |
|
'donghuna/timesformer-base-finetuned-k400-diving48', |
|
ignore_mismatched_sizes=True |
|
) |
|
|
|
self.model.eval() |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
""" |
|
data args: |
|
inputs (:obj:`str`): base64 encoded video data |
|
Return: |
|
A :obj:`list` | `dict`: A list of dictionaries with the top 3 class indices and their probabilities |
|
for each input video. |
|
""" |
|
|
|
inputs = data.get("inputs") |
|
videos = read_video(inputs) |
|
|
|
with torch.no_grad(): |
|
outputs = self.model(videos) |
|
|
|
logits = outputs.logits |
|
probabilities = torch.softmax(logits, dim=1) |
|
|
|
|
|
top_probs, top_indices = torch.topk(probabilities, 3, dim=1) |
|
|
|
top_probs_list = top_probs.tolist() |
|
top_indices_list = top_indices.tolist() |
|
|
|
top_results = [] |
|
for i in range(len(top_indices_list)): |
|
top_results.append({ |
|
"class_indices": top_indices_list[i], |
|
"probabilities": top_probs_list[i] |
|
}) |
|
|
|
return top_results |
|
|
|
|