File size: 14,358 Bytes
287c9ca
990e23e
8583908
990e23e
 
 
287c9ca
8583908
 
 
5470dfc
287c9ca
41b47a8
 
287c9ca
50c620f
09d5c67
41b47a8
990e23e
 
8583908
990e23e
 
 
8583908
41b47a8
 
 
 
09d5c67
b97795f
03bb9f6
5470dfc
09d5c67
8583908
09d5c67
990e23e
 
 
 
09d5c67
 
 
 
 
 
 
 
 
b97795f
41b47a8
 
 
50c620f
990e23e
50c620f
41b47a8
 
 
990e23e
41b47a8
 
 
 
 
 
50c620f
41b47a8
 
 
 
 
990e23e
41b47a8
 
09d5c67
41b47a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09d5c67
41b47a8
 
 
 
 
 
 
 
5470dfc
41b47a8
5470dfc
41b47a8
5470dfc
41b47a8
b97795f
41b47a8
 
09d5c67
 
 
8583908
09d5c67
8583908
990e23e
 
09d5c67
 
990e23e
8583908
990e23e
 
8583908
09d5c67
990e23e
 
8583908
09d5c67
990e23e
8583908
09d5c67
 
8583908
 
09d5c67
8583908
09d5c67
8583908
990e23e
09d5c67
990e23e
09d5c67
8583908
990e23e
8583908
990e23e
 
 
8583908
 
09d5c67
41b47a8
8583908
 
 
287c9ca
8583908
 
 
 
 
 
 
 
 
 
 
 
 
990e23e
8583908
990e23e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8583908
 
990e23e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8583908
990e23e
8583908
 
 
 
 
 
 
 
 
287c9ca
8583908
990e23e
 
 
 
 
8583908
 
 
b97795f
8583908
41b47a8
 
 
b97795f
41b47a8
b97795f
 
8583908
 
990e23e
 
 
8583908
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont # Pillow should be >= 10.0.0
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip, 
                            CompositeVideoClip)
import moviepy.video.fx.all as vfx # For effects like resize, fadein, fadeout
import numpy as np
import os
import openai
import requests
import io

class VisualEngine:
    def __init__(self, output_dir="temp_generated_media"):
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        
        self.font_filename = "arial.ttf" 
        self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
        self.font_size_pil = 24 
        self.video_overlay_font_size = 36 
        self.video_overlay_font_color = 'white'
        # For video overlays, TextClip will use ImageMagick. 'Arial' is a common system font name.
        # If issues, use self.font_path_in_container (if ImageMagick can access it via moviepy)
        self.video_overlay_font = 'Arial' 

        try:
            self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
            print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
        except IOError:
            print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
            self.font = ImageFont.load_default()
            self.font_size_pil = 11

        self.openai_api_key = None
        self.USE_AI_IMAGE_GENERATION = False
        self.dalle_model = "dall-e-3" 
        self.image_size = "1024x1024" # DALL-E 3 output size
        # Target video frame size (e.g., 16:9 aspect ratio)
        # DALL-E 3 images (1024x1024) will be letter/pillar-boxed to fit this.
        self.video_frame_size = (1280, 720) 

    def set_openai_api_key(self, api_key):
        if api_key:
            self.openai_api_key = api_key
            self.USE_AI_IMAGE_GENERATION = True
            print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
        else:
            self.USE_AI_IMAGE_GENERATION = False
            print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")

    def _get_text_dimensions(self, text_content, font_obj):
        if text_content == "" or text_content is None:
            return 0, self.font_size_pil 
        try:
            if hasattr(font_obj, 'getbbox'): # Pillow >= 8.0.0
                bbox = font_obj.getbbox(text_content)
                width = bbox[2] - bbox[0]
                height = bbox[3] - bbox[1]
                return width, height if height > 0 else self.font_size_pil
            elif hasattr(font_obj, 'getsize'): # Older Pillow
                width, height = font_obj.getsize(text_content)
                return width, height if height > 0 else self.font_size_pil
            else: 
                avg_char_width = self.font_size_pil * 0.6 
                height_estimate = self.font_size_pil * 1.2
                return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
        except Exception as e:
            print(f"Warning: Error getting text dimensions for '{text_content}': {e}. Using estimates.")
            avg_char_width = self.font_size_pil * 0.6
            height_estimate = self.font_size_pil * 1.2
            return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)

    def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)): # Default placeholder size
        img = Image.new('RGB', size, color=(30, 30, 60))
        draw = ImageDraw.Draw(img)
        padding = 30 
        max_text_width = size[0] - (2 * padding)
        lines = []
        if not text_description: text_description = "(No description provided for placeholder)"
        words = text_description.split()
        current_line = ""
        for word in words:
            test_line_candidate = current_line + word + " "
            line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
            if line_width <= max_text_width and current_line != "": current_line = test_line_candidate
            elif line_width <= max_text_width and current_line == "": current_line = test_line_candidate
            elif current_line != "":
                lines.append(current_line.strip())
                current_line = word + " "
            else:
                temp_word = word
                while self._get_text_dimensions(temp_word, self.font)[0] > max_text_width and len(temp_word) > 0: temp_word = temp_word[:-1]
                lines.append(temp_word)
                current_line = ""
        if current_line.strip(): lines.append(current_line.strip())
        if not lines: lines.append("(Text error in placeholder)")
        _, single_line_height = self._get_text_dimensions("Tg", self.font)
        if single_line_height == 0: single_line_height = self.font_size_pil 
        line_spacing_factor = 1.3
        estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
        y_text = (size[1] - estimated_line_block_height) / 2.0
        if y_text < padding: y_text = float(padding)
        for line_idx, line in enumerate(lines):
            if line_idx >= 7 and len(lines) > 8: 
                draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
                break
            line_width, _ = self._get_text_dimensions(line, self.font)
            x_text = (size[0] - line_width) / 2.0
            if x_text < padding: x_text = float(padding)
            draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
            y_text += single_line_height * line_spacing_factor
        filepath = os.path.join(self.output_dir, filename)
        try:
            img.save(filepath)
        except Exception as e:
            print(f"Error saving placeholder image {filepath}: {e}")
            return None
        return filepath

    def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
        filepath = os.path.join(self.output_dir, scene_identifier_filename)
        if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
            try:
                print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
                client = openai.OpenAI(api_key=self.openai_api_key)
                response = client.images.generate(
                    model=self.dalle_model, prompt=image_prompt_text, n=1,
                    size=self.image_size, quality="standard", response_format="url" 
                    # style="vivid" # or "natural" for DALL-E 3, optional
                )
                image_url = response.data[0].url
                revised_prompt_dalle3 = getattr(response.data[0], 'revised_prompt', None) # Safely access
                if revised_prompt_dalle3: print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
                
                image_response = requests.get(image_url, timeout=60)
                image_response.raise_for_status()
                
                img_data = Image.open(io.BytesIO(image_response.content))
                if img_data.mode == 'RGBA': # Ensure RGB for consistency, PNG can be RGBA
                    img_data = img_data.convert('RGB')
                
                # Save the AI generated image (typically 1024x1024 from DALL-E)
                img_data.save(filepath) 
                print(f"AI Image (DALL-E) saved: {filepath}")
                return filepath
            except openai.APIError as e: 
                print(f"OpenAI API Error: {e}")
            except requests.exceptions.RequestException as e:
                print(f"Requests Error downloading DALL-E image: {e}")
            except Exception as e:
                print(f"Generic error during DALL-E image generation: {e}")
            
            print("Falling back to placeholder image due to DALL-E error.")
            # Fallback uses video_frame_size to match what video expects if AI fails
            return self._create_placeholder_image_content(
                f"[DALL-E Failed] Prompt: {image_prompt_text[:150]}...", 
                scene_identifier_filename, size=self.video_frame_size
            )
        else: # AI not enabled or key missing
            # print(f"AI image generation not enabled/ready. Creating placeholder.")
            # Placeholder also uses video_frame_size for consistency in video pipeline
            return self._create_placeholder_image_content(
                image_prompt_text, scene_identifier_filename, size=self.video_frame_size
            )

    def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=3):
        if not image_data_list:
            print("No image data provided to create video.")
            return None
        
        print(f"Attempting to create video from {len(image_data_list)} images.")
        processed_clips = []

        for i, data in enumerate(image_data_list):
            img_path = data.get('path')
            scene_num = data.get('scene_num', i + 1)
            key_action = data.get('key_action', '')

            if not (img_path and os.path.exists(img_path)):
                print(f"Image path invalid or not found: {img_path}. Skipping for video.")
                continue
            try:
                pil_image_original = Image.open(img_path)
                
                if pil_image_original.mode != 'RGB': # Ensure RGB for video
                    pil_image_original = pil_image_original.convert('RGB')

                # Create a copy to resize (thumbnail modifies in-place)
                pil_image_for_frame = pil_image_original.copy()
                # Resize image to fit within self.video_frame_size, maintaining aspect ratio
                pil_image_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)

                # Create a background canvas of the exact video_frame_size (e.g., 1280x720)
                # This will letterbox/pillarbox the image if its aspect ratio differs from video_frame_size
                background_canvas = Image.new('RGB', self.video_frame_size, (0,0,0)) # Black background
                paste_x = (self.video_frame_size[0] - pil_image_for_frame.width) // 2
                paste_y = (self.video_frame_size[1] - pil_image_for_frame.height) // 2
                background_canvas.paste(pil_image_for_frame, (paste_x, paste_y))
                
                frame_np = np.array(background_canvas) # Convert final PIL image to numpy array
                
                # Base image clip
                img_clip = ImageClip(frame_np).set_duration(duration_per_image)

                # Ken Burns Effect (Simple Zoom In)
                end_scale = 1.08 # Zoom to 108% of original size by the end
                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale - 1) * (t / duration_per_image))
                img_clip = img_clip.set_position('center') # Keep centered during zoom

                # Text Overlay
                overlay_text = f"Scene {scene_num}: {key_action}"
                # Ensure font path is used if 'Arial' isn't found by ImageMagick/MoviePy
                # For TextClip, moviepy relies on ImageMagick which has its own font discovery.
                # Using a common font name like 'Arial' is often okay if mscorefonts are installed.
                # If not, you might need to point to self.font_path_in_container
                # Check if ImageMagick is installed in Docker, moviepy might need it for TextClip.
                # `apt-get install imagemagick` in Dockerfile if TextClip has issues.
                txt_clip = TextClip(
                    overlay_text, 
                    fontsize=self.video_overlay_font_size, 
                    color=self.video_overlay_font_color,
                    font=self.video_overlay_font, # Or self.font_path_in_container
                    bg_color='rgba(0,0,0,0.6)', 
                    size=(self.video_frame_size[0] * 0.9, None), # Width 90% of video, height auto
                    method='caption', 
                    align='West',
                    kerning=-1 
                ).set_duration(duration_per_image - 0.5).set_start(0.25) # Start after 0.25s, end 0.25s before clip end
                
                txt_clip = txt_clip.set_position(('center', 0.88), relative=True) # Position near bottom

                video_with_text_overlay = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
                processed_clips.append(video_with_text_overlay)

            except Exception as e_clip:
                print(f"Error processing image/creating clip for {img_path}: {e_clip}. Skipping.")
        
        if not processed_clips:
            print("No clips could be processed for the video.")
            return None
            
        # Concatenate with crossfade (0.5s)
        final_video_clip = concatenate_videoclips(processed_clips, padding=-0.5, method="compose")
        # Add fade in/out for the whole video
        if final_video_clip.duration > 1: # Ensure video is long enough for fades
            final_video_clip = final_video_clip.fx(vfx.fadein, 0.5).fx(vfx.fadeout, 0.5)

        output_path = os.path.join(self.output_dir, output_filename)
        print(f"Writing final video to: {output_path}")
        try:
            final_video_clip.write_videofile(
                output_path, fps=fps, codec='libx264', audio_codec='aac',
                temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'), 
                remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
            )
            print(f"Video successfully created: {output_path}")
            return output_path
        except Exception as e:
            print(f"Error writing final video file: {e}")
            return None
        finally: 
            for clip_item in processed_clips: 
                if hasattr(clip_item, 'close'): clip_item.close()
            if hasattr(final_video_clip, 'close'): final_video_clip.close()