import gradio as gr
from huggingface_hub import login
from transformers import VideoClassificationPipeline, AutoModelForVideoClassification, AutoProcessor
import torch

# Load the Hugging Face API token from environment variables or enter directly
# HUGGINGFACEHUB_API_TOKEN = "your_huggingface_api_token"
# login(HUGGINGFACEHUB_API_TOKEN)

# Define the model and processor from Hugging Face
model_name = "microsoft/xclip-base-patch32"
model = AutoModelForVideoClassification.from_pretrained(model_name)
processor = AutoProcessor.from_pretrained(model_name)

# Create a video classification pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

pipeline = VideoClassificationPipeline(model=model, feature_extractor=processor, device=0 if torch.cuda.is_available() else -1)

# Define the function for video classification
def classify_video(video_path):
    predictions = pipeline(video_path)
    return predictions

# Create a Gradio interface
interface = gr.Interface(
    fn=classify_video,
    inputs=gr.Video(label="Upload a video for classification"),
    outputs=gr.Label(num_top_classes=3, label="Top 3 Predicted Classes"),
    title="Video Classification using Hugging Face",
    description="Upload a video file and get the top 3 predicted classes using a Hugging Face video classification model."
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()