import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from components import pexels, utils import os, gc import gradio as gr # Load model and tokenizer model_name = "google/flan-t5-xxl" model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained(model_name) pipe = pipeline( 'text2text-generation', model=model, tokenizer=tokenizer, max_length=256, device_map="auto" # Sử dụng GPU nếu có ) # Function to generate captions def generate_captions(product_name): template = f"Make 5 different advertisement captions about this product: {product_name}" result = pipe(template) return result[0]['generated_text'] # Function to generate tags def generate_tags(sentence): template = f"What is the most significant actions or places or things, say it in at most 5 words: {sentence}" result = pipe(template) return result[0]['generated_text'] # Main prediction function def pred(product_name, orientation): if orientation == "Shorts/Reels/TikTok (1080 x 1920)": orientation = "portrait" height = 1920 width = 1080 elif orientation == "Youtube Videos (1920 x 1080)": orientation = "landscape" height = 1080 width = 1920 else: orientation = "square" height = 1080 width = 1080 # Generate captions sentences = generate_captions(product_name) # Generate tags tags = generate_tags(sentences) # Generate videos using Pexels API folder_name = pexels.generate_videos(product_name, os.getenv('pexels_api_key'), orientation, height, width, sentences, tags) gc.collect() # Combine videos utils.combine_videos(folder_name) return [sentences, os.path.join(folder_name, "Final_Ad_Video.mp4")] # Gradio interface with gr.Blocks() as demo: gr.Markdown( """ ### Note: Thời gian tạo 1 video là khoảng 3-4 phút """ ) dimension = gr.Dropdown( ["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"], label="Video Dimension", info="Choose dimension" ) product_name = gr.Textbox(label="Tên Sản Phẩm") captions = gr.Textbox(label="Chú Thích") video = gr.Video() btn = gr.Button("Bắt Đầu Tạo Video") btn.click(pred, inputs=[product_name, dimension], outputs=[captions, video]) demo.launch()