import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from components import pexels, utils
import os, gc
import gradio as gr

# Load model and tokenizer
model_name = "google/flan-t5-xxl"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
pipe = pipeline(
    'text2text-generation',
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    device_map="auto"  # Sử dụng GPU nếu có
)

# Function to generate captions
def generate_captions(product_name):
    template = f"Make 5 different advertisement captions about this product: {product_name}"
    result = pipe(template)
    return result[0]['generated_text']

# Function to generate tags
def generate_tags(sentence):
    template = f"What is the most significant actions or places or things, say it in at most 5 words: {sentence}"
    result = pipe(template)
    return result[0]['generated_text']

# Main prediction function
def pred(product_name, orientation):
    if orientation == "Shorts/Reels/TikTok (1080 x 1920)":
        orientation = "portrait"
        height = 1920
        width = 1080
    elif orientation == "Youtube Videos (1920 x 1080)":
        orientation = "landscape"
        height = 1080
        width = 1920
    else:
        orientation = "square"
        height = 1080
        width = 1080

    # Generate captions
    sentences = generate_captions(product_name)

    # Generate tags
    tags = generate_tags(sentences)

    # Generate videos using Pexels API
    folder_name = pexels.generate_videos(product_name, os.getenv('pexels_api_key'), orientation, height, width, sentences, tags)
    gc.collect()

    # Combine videos
    utils.combine_videos(folder_name)
    return [sentences, os.path.join(folder_name, "Final_Ad_Video.mp4")]

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(
        """
        ### Note: Thời gian tạo 1 video là khoảng 3-4 phút  
        """
    )
    dimension = gr.Dropdown(
        ["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"], 
        label="Video Dimension", info="Choose dimension"
    )
    product_name = gr.Textbox(label="Tên Sản Phẩm")
    captions = gr.Textbox(label="Chú Thích")
    video = gr.Video()
    btn = gr.Button("Bắt Đầu Tạo Video")
    btn.click(pred, inputs=[product_name, dimension], outputs=[captions, video])

demo.launch()