File size: 2,507 Bytes
3e2f324
34e4015
b110be1
b080a94
19599cb
3e2f324
34e4015
 
 
 
3e2f324
 
 
34e4015
 
 
3e2f324
 
34e4015
 
 
 
 
3e2f324
34e4015
 
 
 
 
3e2f324
34e4015
ec24a25
1fe1ea5
34e4015
1fe1ea5
 
bcbe59e
 
 
 
34e4015
1fe1ea5
 
 
34e4015
 
 
 
 
 
 
 
 
b080a94
34e4015
 
b110be1
34e4015
3e2f324
34e4015
3e2f324
8dfb7a8
d6615c6
34e4015
d6615c6
 
2341d95
34e4015
 
 
2f2db51
 
3e2f324
2f2db51
34e4015
3e2f324
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from components import pexels, utils
import os, gc
import gradio as gr

# Load model and tokenizer
model_name = "google/flan-t5-xxl"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
pipe = pipeline(
    'text2text-generation',
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    device_map="auto"  # Sử dụng GPU nếu có
)

# Function to generate captions
def generate_captions(product_name):
    template = f"Make 5 different advertisement captions about this product: {product_name}"
    result = pipe(template)
    return result[0]['generated_text']

# Function to generate tags
def generate_tags(sentence):
    template = f"What is the most significant actions or places or things, say it in at most 5 words: {sentence}"
    result = pipe(template)
    return result[0]['generated_text']

# Main prediction function
def pred(product_name, orientation):
    if orientation == "Shorts/Reels/TikTok (1080 x 1920)":
        orientation = "portrait"
        height = 1920
        width = 1080
    elif orientation == "Youtube Videos (1920 x 1080)":
        orientation = "landscape"
        height = 1080
        width = 1920
    else:
        orientation = "square"
        height = 1080
        width = 1080

    # Generate captions
    sentences = generate_captions(product_name)

    # Generate tags
    tags = generate_tags(sentences)

    # Generate videos using Pexels API
    folder_name = pexels.generate_videos(product_name, os.getenv('pexels_api_key'), orientation, height, width, sentences, tags)
    gc.collect()

    # Combine videos
    utils.combine_videos(folder_name)
    return [sentences, os.path.join(folder_name, "Final_Ad_Video.mp4")]

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown(
        """
        ### Note: Thời gian tạo 1 video là khoảng 3-4 phút  
        """
    )
    dimension = gr.Dropdown(
        ["Shorts/Reels/TikTok (1080 x 1920)", "Facebook/Youtube Videos (1920 x 1080)", "Square (1080 x 1080)"], 
        label="Video Dimension", info="Choose dimension"
    )
    product_name = gr.Textbox(label="Tên Sản Phẩm")
    captions = gr.Textbox(label="Chú Thích")
    video = gr.Video()
    btn = gr.Button("Bắt Đầu Tạo Video")
    btn.click(pred, inputs=[product_name, dimension], outputs=[captions, video])

demo.launch()