File size: 5,378 Bytes
d90acf0
 
 
 
 
 
 
 
 
 
 
 
 
 
74e2f0b
d90acf0
 
 
 
 
 
 
d842101
d90acf0
 
e8662a1
d90acf0
 
ce09997
c547ca7
d90acf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8662a1
d90acf0
54b2cb6
d90acf0
e8662a1
d90acf0
 
 
e8662a1
54b2cb6
d90acf0
54b2cb6
d90acf0
 
 
 
54b2cb6
d90acf0
 
 
 
 
 
 
 
 
 
 
 
 
b8b6401
d90acf0
 
 
 
 
 
 
 
 
ce09997
 
d90acf0
 
 
 
 
 
 
 
 
 
 
 
c547ca7
b8b6401
d90acf0
 
 
 
 
 
 
 
 
 
ce09997
 
d90acf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from pptx import Presentation
from pptx.util import Inches
from pptx.oxml.xmlchemy import OxmlElement
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE

import random
import os
from PIL import Image
from typing import List, Callable

from .llm_utils import llm_generate_titles, llm_generate_text, llm_generate_image_prompt, llm_generate_background_prompt
from .prompt_configs import PromptConfig
from .slides import generate_slide
from .kandinsky import generate_image
from .font import Font

import tqdm


def generate_presentation(
    llm_generate: Callable[[str], str],
    generate_image: Callable[[str, int, int], Image.Image],
    prompt_config: PromptConfig,
    description: str,
    slides_num: int,
    font:Font, 
    output_dir: str,
    num_inference_steps: int,
    image_size_coef: float,
) -> Presentation:
    """
    Generate a PowerPoint presentation based on a description using language and image models.

    Args:
        llm_generate (Callable[[str], str]): Function to generate text using a language model.
        generate_image (Callable[[str, int, int], Image.Image]): Function to generate images.
        prompt_config (PromptConfig): Configuration for prompts.
        description (str): Description of the presentation.
        output_dir (str): Directory to save generated images and presentation.
        font (Font): Font object to manage font styles and paths.
    Returns:
        Presentation: The generated PowerPoint presentation.
    """
    os.makedirs(os.path.join(output_dir, 'backgrounds'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'pictures'), exist_ok=True)
    presentation = Presentation()
    presentation.slide_height = Inches(9)
    presentation.slide_width = Inches(16)

    pbar = tqdm.tqdm(total=4, desc="Presentation goes brrr...")

    pbar.set_description("Generating titles for presentation")
    titles = llm_generate_titles(llm_generate, description + f", {slides_num} slides", prompt_config)[:slides_num]
    pbar.update(1)

    pbar.set_description("Generating text for slides")
    texts = [None] + llm_generate_text(
        llm_generate, 
        description + f", {slides_num} slides", 
        titles, 
        prompt_config
    )[:slides_num]
    pbar.update(1)

    # postfix added to keywords describing presentation
    background_style = random.choice(prompt_config.background_styles)

    picture_paths = []
    background_paths = []
    pbar.set_description("Generating images for slides")
    for t_index, (title, text) in enumerate(zip(titles, texts)):
        # Decide randomly presence of image on current slide
        if random.choices(
            [True, False], 
            # side-image/plain-text with background image
            weights=[4, 1], 
        k=1)[0] and text:
            image_width, image_height = random.choice(
                [(768, 1344), (1024, 1024)]
            )
            image_width, image_height = round(image_size_coef * image_width), round(image_size_coef * image_height)
            caption_prompt = llm_generate_image_prompt(
                llm_generate, 
                description, 
                title, 
                prompt_config
            )
            picture = generate_image(
                prompt=caption_prompt, 
                width=image_width, 
                height=image_height,
                num_inference_steps=num_inference_steps
            )
            picture_path = os.path.join(
                output_dir, 
                'pictures', 
                f'{t_index:06}.png'
            )
            picture.save(picture_path)
        else:
            picture_path = None
        picture_paths.append(picture_path)

        if picture_path is None:
            # background_width, background_height = 1344, 768
            background_width, background_height = round(image_size_coef * 1344), round(image_size_coef * 768)
            background_prompt = llm_generate_background_prompt(
                llm_generate, 
                description, 
                title, 
                prompt_config,
                background_style
            )
            background = generate_image(
                prompt=background_prompt, 
                width=background_width, 
                height=background_height,
                num_inference_steps=num_inference_steps
            )
            background_path = os.path.join(
                output_dir, 
                'backgrounds', 
                f'{t_index:06}.png'
            )
            background.save(background_path)
        else:
            background_path = None
        background_paths.append(background_path)
    pbar.update(1)
    
    pbar.set_description("Packing presentation")
    
    for index in range(len(titles)):
        title = titles[index]
        text = texts[index]
        picture_path = picture_paths[index]
        background_path = background_paths[index]

        generate_slide(
            presentation=presentation,
            title=title,
            text=text,
            picture_path=picture_path,
            background_path=background_path,
            font=font,
        )
    pbar.update(1)
    
    pbar.set_description("Done")
    output_path = os.path.join(output_dir, 'presentation.pptx')
    presentation.save(output_path)
    return presentation