File size: 13,918 Bytes
287c9ca
 
8583908
 
 
 
287c9ca
8583908
 
 
5470dfc
287c9ca
41b47a8
 
287c9ca
50c620f
09d5c67
41b47a8
8583908
 
 
 
 
 
 
41b47a8
 
 
 
09d5c67
b97795f
03bb9f6
5470dfc
09d5c67
8583908
09d5c67
8583908
 
 
 
09d5c67
 
8583908
09d5c67
 
8583908
09d5c67
 
 
 
 
b97795f
41b47a8
8583908
41b47a8
 
50c620f
5470dfc
50c620f
41b47a8
 
 
5470dfc
41b47a8
 
 
 
 
 
50c620f
41b47a8
 
 
 
 
09d5c67
8583908
41b47a8
 
09d5c67
41b47a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09d5c67
41b47a8
 
 
 
 
 
 
 
5470dfc
41b47a8
5470dfc
41b47a8
5470dfc
41b47a8
b97795f
41b47a8
8583908
41b47a8
09d5c67
 
 
8583908
09d5c67
8583908
 
09d5c67
 
8583908
 
 
 
09d5c67
 
8583908
 
 
09d5c67
8583908
09d5c67
 
8583908
 
09d5c67
8583908
09d5c67
8583908
09d5c67
 
8583908
 
 
 
 
 
09d5c67
41b47a8
b97795f
8583908
 
 
 
 
 
 
 
287c9ca
8583908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287c9ca
8583908
 
 
 
 
 
 
b97795f
8583908
41b47a8
 
 
b97795f
41b47a8
b97795f
 
8583908
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip, 
                            CompositeVideoClip, vfx) # Added vfx for effects
import moviepy.video.fx.all as vfx # More explicit import for resize
import numpy as np # For converting PIL images to numpy arrays for moviepy
import os
import openai
import requests
import io

class VisualEngine:
    def __init__(self, output_dir="temp_generated_media"):
        self.output_dir = output_dir
        os.makedirs(self.output_dir, exist_ok=True)
        
        self.font_filename = "arial.ttf" 
        self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
        self.font_size_pil = 24 # For placeholder images
        self.video_overlay_font_size = 36 # For text overlays on video
        self.video_overlay_font_color = 'white'
        # For video overlays, try to use a system font that moviepy/ImageMagick can find
        # Or provide a path to a .ttf file for TextClip's font parameter
        self.video_overlay_font = 'Arial' # Generic name, ImageMagick might find it. Or use self.font_path_in_container

        try:
            self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
            print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
        except IOError:
            print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
            self.font = ImageFont.load_default()
            self.font_size_pil = 11

        self.openai_api_key = None
        self.USE_AI_IMAGE_GENERATION = False
        self.dalle_model = "dall-e-3" 
        self.image_size = "1024x1024" 
        # For DALL-E 3, you might want a slightly larger video frame to accommodate 1024x1024 images
        self.video_frame_size = (1024, 576) # 16:9, DALL-E images will be letterboxed or cropped if not 16:9.
                                         # Or (1024,1024) if you want square video frames.

    def set_openai_api_key(self, api_key):
        # ... (remains the same) ...
        if api_key:
            self.openai_api_key = api_key
            # openai.api_key = self.openai_api_key # Older versions. New client takes key per call.
            self.USE_AI_IMAGE_GENERATION = True
            print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
        else:
            self.USE_AI_IMAGE_GENERATION = False
            print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")

    def _get_text_dimensions(self, text_content, font_obj):
        # ... (remains the same) ...
        if text_content == "" or text_content is None:
            return 0, self.font_size_pil 
        try:
            if hasattr(font_obj, 'getbbox'):
                bbox = font_obj.getbbox(text_content)
                width = bbox[2] - bbox[0]
                height = bbox[3] - bbox[1]
                return width, height if height > 0 else self.font_size_pil
            elif hasattr(font_obj, 'getsize'):
                width, height = font_obj.getsize(text_content)
                return width, height if height > 0 else self.font_size_pil
            else: 
                avg_char_width = self.font_size_pil * 0.6 
                height_estimate = self.font_size_pil * 1.2
                return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
        except Exception as e:
            print(f"Warning: Error getting text dimensions for '{text_content}': {e}. Using estimates.")
            avg_char_width = self.font_size_pil * 0.6
            height_estimate = self.font_size_pil * 1.2
            return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)

    def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
        # ... (remains the same) ...
        img = Image.new('RGB', size, color=(30, 30, 60))
        draw = ImageDraw.Draw(img)
        padding = 30 
        max_text_width = size[0] - (2 * padding)
        lines = []
        if not text_description: text_description = "(No description provided for placeholder)"
        words = text_description.split()
        current_line = ""
        for word in words:
            test_line_candidate = current_line + word + " "
            line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
            if line_width <= max_text_width and current_line != "": current_line = test_line_candidate
            elif line_width <= max_text_width and current_line == "": current_line = test_line_candidate
            elif current_line != "":
                lines.append(current_line.strip())
                current_line = word + " "
            else:
                temp_word = word
                while self._get_text_dimensions(temp_word, self.font)[0] > max_text_width and len(temp_word) > 0: temp_word = temp_word[:-1]
                lines.append(temp_word)
                current_line = ""
        if current_line.strip(): lines.append(current_line.strip())
        if not lines: lines.append("(Text error in placeholder)")
        _, single_line_height = self._get_text_dimensions("Tg", self.font)
        if single_line_height == 0: single_line_height = self.font_size_pil 
        line_spacing_factor = 1.3
        estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
        y_text = (size[1] - estimated_line_block_height) / 2.0
        if y_text < padding: y_text = float(padding)
        for line_idx, line in enumerate(lines):
            if line_idx >= 7 and len(lines) > 8: 
                draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
                break
            line_width, _ = self._get_text_dimensions(line, self.font)
            x_text = (size[0] - line_width) / 2.0
            if x_text < padding: x_text = float(padding)
            draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
            y_text += single_line_height * line_spacing_factor
        filepath = os.path.join(self.output_dir, filename)
        try:
            img.save(filepath)
        except Exception as e:
            print(f"Error saving placeholder image {filepath}: {e}")
            return None
        return filepath

    def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
        # ... (DALL-E logic remains the same, including fallback to _create_placeholder_image_content) ...
        filepath = os.path.join(self.output_dir, scene_identifier_filename)
        if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
            try:
                print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
                client = openai.OpenAI(api_key=self.openai_api_key)
                response = client.images.generate(
                    model=self.dalle_model, prompt=image_prompt_text, n=1,
                    size=self.image_size, quality="standard", response_format="url"
                )
                image_url = response.data[0].url
                revised_prompt_dalle3 = response.data[0].revised_prompt
                if revised_prompt_dalle3: print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
                image_response = requests.get(image_url, timeout=60) # Increased timeout for image download
                image_response.raise_for_status()
                img_data = Image.open(io.BytesIO(image_response.content))
                
                # Ensure image is RGB before saving as PNG (some APIs might return RGBA)
                if img_data.mode == 'RGBA':
                    img_data = img_data.convert('RGB')
                
                img_data.save(filepath) 
                print(f"AI Image (DALL-E) saved: {filepath}")
                return filepath
            except openai.APIError as e: 
                print(f"OpenAI API Error: {e}")
            except requests.exceptions.RequestException as e:
                print(f"Requests Error downloading DALL-E image: {e}")
            except Exception as e:
                print(f"Generic error during DALL-E image generation: {e}")
            print("Falling back to placeholder image due to DALL-E error.")
            return self._create_placeholder_image_content(
                f"[DALL-E Failed] Prompt: {image_prompt_text[:150]}...", 
                scene_identifier_filename, size=self.video_frame_size # Use video frame size for placeholder
            )
        else:
            return self._create_placeholder_image_content(
                image_prompt_text, scene_identifier_filename, size=self.video_frame_size
            )


    def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=3):
        """
        Creates a video from a list of image file paths and associated text.
        image_data_list: List of dictionaries, each like:
                         {'path': 'path/to/image.png', 'scene_num': 1, 'key_action': 'Some action'}
        """
        if not image_data_list:
            print("No image data provided to create video.")
            return None
        
        print(f"Attempting to create video from {len(image_data_list)} images.")
        processed_clips = []

        for i, data in enumerate(image_data_list):
            img_path = data.get('path')
            scene_num = data.get('scene_num', i + 1)
            key_action = data.get('key_action', '')

            if not (img_path and os.path.exists(img_path)):
                print(f"Image path invalid or not found: {img_path}. Skipping for video.")
                continue
            try:
                # Load image and resize to fit video_frame_size, maintaining aspect ratio (letterbox/pillarbox)
                pil_image = Image.open(img_path)
                pil_image.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS) # Resize in place

                # Create a background matching video_frame_size
                background = Image.new('RGB', self.video_frame_size, (0,0,0)) # Black background
                # Paste the thumbnail onto the center of the background
                paste_x = (self.video_frame_size[0] - pil_image.width) // 2
                paste_y = (self.video_frame_size[1] - pil_image.height) // 2
                background.paste(pil_image, (paste_x, paste_y))
                
                # Convert PIL image to numpy array for MoviePy
                frame_np = np.array(background)
                img_clip = ImageClip(frame_np).set_duration(duration_per_image)

                # Simple Ken Burns effect (zoom in slightly)
                # End scale (e.g., 1.1 = 10% zoom in). Adjust for desired effect.
                end_scale = 1.05 
                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale-1) * (t / duration_per_image) )
                # To keep it centered while zooming:
                img_clip = img_clip.set_position('center')


                # Add Text Overlay for Scene Number and Key Action
                overlay_text = f"Scene {scene_num}\n{key_action}"
                txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size, 
                                    color=self.video_overlay_font_color,
                                    font=self.video_overlay_font, # Ensure this font is findable by ImageMagick
                                    bg_color='rgba(0,0,0,0.5)', # Semi-transparent black background
                                    size=(img_clip.w * 0.9, None), # Width 90% of image, height auto
                                    method='caption', # Auto-wrap text
                                    align='West', # Left align
                                    kerning=-1
                                    ).set_duration(duration_per_image - 0.5).set_start(0.25) # Show for most of duration
                
                txt_clip = txt_clip.set_position(('center', 0.85), relative=True) # Position at 85% from top, centered

                # Composite the image and text
                video_with_text_overlay = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
                processed_clips.append(video_with_text_overlay)

            except Exception as e_clip:
                print(f"Error processing image/creating clip for {img_path}: {e_clip}. Skipping.")
        
        if not processed_clips:
            print("No clips could be processed for the video.")
            return None
            
        # Concatenate with crossfade transitions
        final_video_clip = concatenate_videoclips(processed_clips, padding=-0.5, method="compose").fx(vfx.fadein, 0.5).fx(vfx.fadeout, 0.5)
        # padding = -0.5 means 0.5s crossfade. Requires method="compose"

        output_path = os.path.join(self.output_dir, output_filename)
        print(f"Writing final video to: {output_path}")
        try:
            final_video_clip.write_videofile(
                output_path, fps=fps, codec='libx264', audio_codec='aac',
                temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'), 
                remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
            )
            print(f"Video successfully created: {output_path}")
            return output_path
        except Exception as e:
            print(f"Error writing final video file: {e}")
            return None
        finally: # Ensure clips are closed
            for clip in processed_clips: clip.close()
            if hasattr(final_video_clip, 'close'): final_video_clip.close()