import numpy as np
import torch
from transformers import TimesformerForVideoClassification
from preprocessing import read_video
import logging
import json

# 로깅 설정
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class EndpointHandler:
    def __init__(self, model_dir):
        self.model = TimesformerForVideoClassification.from_pretrained(
            'donghuna/timesformer-base-finetuned-k400-diving48',
            ignore_mismatched_sizes=True
        )
        self.model.classifier = torch.nn.Linear(self.model.classifier.in_features, 48)  # 48 output classes
        self.model.eval()
        
    def __call__(self, data):
        # video_path = data["inputs"]["video_path"]
        # ftp_password = data["inputs"].get("ftp_password")
        # ftp_password = data["inputs"]["ftp_password"]

        inputs = json.loads(data)
        video_path = inputs.get("inputs", {}).get("video_path", "")
        ftp_password = inputs.get("inputs", {}).get("ftp_password", "")

        processed_frames = read_video(video_path, ftp_password)


        # 디버깅: 입력 데이터 확인
        logger.info(f"Received data: {data}")

        frames = np.array(data['frames'])
        frames = torch.tensor(frames).float()  # Ensure the data is in the correct format

        # 디버깅: 프레임 데이터 확인
        logger.info(f"Frames shape: {frames.shape}")
        
        # Perform inference
        with torch.no_grad():
            outputs = self.model(frames.unsqueeze(0))  # Add batch dimension
            predictions = torch.softmax(outputs.logits, dim=-1)

            # 디버깅: 예측 결과 확인
            logger.info(f"Predictions: {predictions}")
            
            predicted_class = torch.argmax(predictions, dim=-1).item()

            # 디버깅: 예측 클래스 확인
            logger.info(f"Predicted class: {predicted_class}")
        
        return {"predicted_class": predicted_class, "predictions": predictions.tolist()}