TDN-M's picture
Update app.py
34e4015 verified
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from components import pexels, utils
import os, gc
import gradio as gr
# Load model and tokenizer
model_name = "google/flan-t5-xxl"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
pipe = pipeline(
'text2text-generation',
model=model,
tokenizer=tokenizer,
max_length=256,
device_map="auto" # Sử dụng GPU nếu có
)
# Function to generate captions
def generate_captions(product_name):
template = f"Make 5 different advertisement captions about this product: {product_name}"
result = pipe(template)
return result[0]['generated_text']
# Function to generate tags
def generate_tags(sentence):
template = f"What is the most significant actions or places or things, say it in at most 5 words: {sentence}"
result = pipe(template)
return result[0]['generated_text']
# Main prediction function
def pred(product_name, orientation):
if orientation == "Shorts/Reels/TikTok (1080 x 1920)":
orientation = "portrait"
height = 1920
width = 1080
elif orientation == "Youtube Videos (1920 x 1080)":
orientation = "landscape"
height = 1080
width = 1920
else:
orientation = "square"
height = 1080
width = 1080
# Generate captions
sentences = generate_captions(product_name)
# Generate tags
tags = generate_tags(sentences)
# Generate videos using Pexels API
folder_name = pexels.generate_videos(product_name, os.getenv('pexels_api_key'), orientation, height, width, sentences, tags)
gc.collect()
# Combine videos
utils.combine_videos(folder_name)
return [sentences, os.path.join(folder_name, "Final_Ad_Video.mp4")]
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown(
"""
### Note: Thời gian tạo 1 video là khoảng 3-4 phút
"""
)
dimension = gr.Dropdown(
["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"],
label="Video Dimension", info="Choose dimension"
)
product_name = gr.Textbox(label="Tên Sản Phẩm")
captions = gr.Textbox(label="Chú Thích")
video = gr.Video()
btn = gr.Button("Bắt Đầu Tạo Video")
btn.click(pred, inputs=[product_name, dimension], outputs=[captions, video])
demo.launch()