File size: 15,811 Bytes
287c9ca
29c2122
9d84ba9
8583908
9840152
9d84ba9
990e23e
287c9ca
8583908
 
 
9840152
29c2122
9840152
5470dfc
287c9ca
9d84ba9
29c2122
 
 
 
 
 
9840152
29c2122
9840152
29c2122
9840152
09d5c67
29c2122
 
 
 
 
 
 
 
 
 
 
 
 
b97795f
29c2122
 
 
50c620f
29c2122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d84ba9
29c2122
9840152
 
 
29c2122
 
 
9840152
 
29c2122
9840152
29c2122
9840152
29c2122
9840152
29c2122
9840152
 
29c2122
 
 
9840152
b97795f
29c2122
 
 
 
 
 
 
 
 
 
41b47a8
09d5c67
9d84ba9
 
 
 
29c2122
 
 
9d84ba9
29c2122
9d84ba9
 
9840152
29c2122
 
 
9d84ba9
9840152
 
990e23e
9840152
29c2122
 
 
9840152
 
 
29c2122
9840152
29c2122
41b47a8
29c2122
 
9840152
29c2122
9840152
 
 
29c2122
 
 
 
 
 
 
9840152
 
29c2122
 
 
9d84ba9
8583908
29c2122
8583908
 
9d84ba9
29c2122
8583908
29c2122
 
 
 
 
 
 
 
 
9d84ba9
29c2122
8583908
29c2122
 
 
 
 
 
 
 
 
 
 
 
 
9d84ba9
29c2122
9d84ba9
29c2122
 
 
9d84ba9
9840152
9d84ba9
29c2122
9d84ba9
29c2122
9d84ba9
29c2122
 
 
 
9840152
 
 
 
 
29c2122
9840152
 
29c2122
 
8583908
 
b97795f
29c2122
9d84ba9
29c2122
9d84ba9
29c2122
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# core/visual_engine.py
# ... (imports: PIL, MoviePy, numpy, os, openai, requests, io, time, elevenlabs - same) ...
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip, 
                            CompositeVideoClip, AudioFileClip)
import moviepy.video.fx.all as vfx 
import numpy as np
import os
import openai
import requests
import io
import time
import random # For slight Ken Burns variations
from elevenlabs import generate as elevenlabs_generate_audio, set_api_key as elevenlabs_set_api_key_func

class VisualEngine:
    def __init__(self, output_dir="temp_cinegen_media"):
        # ... (font setup, API key initializations, DALL-E settings - same) ...
        self.output_dir = output_dir; os.makedirs(self.output_dir, exist_ok=True)
        self.font_filename="arial.ttf"; self.font_path_in_container=f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
        self.font_size_pil=20; self.video_overlay_font_size=30; self.video_overlay_font_color='white'; self.video_overlay_font='Arial-Bold'
        try: self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil); print(f"Placeholder font: {self.font_path_in_container}.")
        except IOError: print(f"Warn: Placeholder font '{self.font_path_in_container}' fail. Default."); self.font = ImageFont.load_default(); self.font_size_pil = 10
        self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False
        self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024" # Landscape
        self.video_frame_size = (1280, 720)
        self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_voice_id = "Rachel"
        self.pexels_api_key = None; self.USE_PEXELS = False

    # ... (set_openai_api_key, set_elevenlabs_api_key, set_pexels_api_key - same) ...
    def set_openai_api_key(self,k): # Pythonic shortened
        self.openai_api_key=k; self.USE_AI_IMAGE_GENERATION=bool(k)
        print(f"DALL-E ({self.dalle_model}) {'Ready' if k else 'Disabled'}.")
    def set_elevenlabs_api_key(self,k):
        self.elevenlabs_api_key=k
        if k: 
            try: elevenlabs_set_api_key_func(k); self.USE_ELEVENLABS=True; print("ElevenLabs Ready.")
            except Exception as e: print(f"ElevenLabs key set error: {e}. Disabled."); self.USE_ELEVENLABS=False
        else: self.USE_ELEVENLABS=False
    def set_pexels_api_key(self,k):
        self.pexels_api_key=k; self.USE_PEXELS=bool(k)
        print(f"Pexels {'Ready' if k else 'Disabled'}.")

    # ... (_get_text_dimensions, _create_placeholder_image_content - same) ...
    def _get_text_dimensions(self,t,f): # Shortened
        if not t: return 0,self.font_size_pil
        try:
            if hasattr(f,'getbbox'): bb=f.getbbox(t);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else self.font_size_pil
            elif hasattr(f,'getsize'): w,h=f.getsize(t);return w,h if h>0 else self.font_size_pil
            else: return int(len(t)*self.font_size_pil*.6),int(self.font_size_pil*1.2 if self.font_size_pil*1.2>0 else self.font_size_pil)
        except: return int(len(t)*self.font_size_pil*.6),int(self.font_size_pil*1.2)
    def _create_placeholder_image_content(self,td,fn,s=(1280,720)): # Shortened
        img=Image.new('RGB',s,color=(20,20,40));d=ImageDraw.Draw(img);p=25;max_w=s[0]-(2*p);ls=[];
        if not td: td="(Placeholder)"
        ws=td.split();cl=""
        for w in ws:
            tl=cl+w+" ";
            if self._get_text_dimensions(tl,self.font)[0]<=max_w: cl=tl
            else:
                if cl:ls.append(cl.strip())
                cl=w+" "
        if cl:ls.append(cl.strip())
        if not ls:ls.append("(Text err)")
        _,sh=self._get_text_dimensions("Ay",self.font);sh=sh if sh>0 else self.font_size_pil+2
        max_ls=min(len(ls),(s[1]-2*p)//(sh+2));
        yt=p+(s[1]-2*p-max_ls*(sh+2))/2.0
        for i in range(max_ls):
            l=ls[i];lw,_=self._get_text_dimensions(l,self.font);xt=(s[0]-lw)/2.0
            d.text((xt,yt),l,font=self.font,fill=(200,200,180));yt+=sh+2
            if i==6 and max_ls>7:d.text((xt,yt),"...",font=self.font,fill=(200,200,180));break
        fp=os.path.join(self.output_dir,fn);
        try:img.save(fp);return fp
        except Exception as e:print(f"Err placeholder save: {e}");return None

    # ... (_search_pexels_image - same logic, ensure query is good) ...
    def _search_pexels_image(self, query, output_filename):
        if not self.USE_PEXELS or not self.pexels_api_key: return None
        headers = {"Authorization": self.pexels_api_key}
        # Use a broader query, let Pexels do its magic, then maybe allow user to pick from a few
        params = {"query": query, "per_page": 3, "orientation": "landscape", "size": "large"}
        pexels_filename = output_filename.replace(".png", f"_pexels_{random.randint(100,999)}.jpg")
        filepath = os.path.join(self.output_dir, pexels_filename)
        try:
            print(f"Searching Pexels for: '{query}'")
            response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=15)
            response.raise_for_status(); data = response.json()
            if data.get("photos"):
                # For now, just take the first one. UI could let user pick.
                photo_url = data["photos"][0]["src"]["large2x"] 
                image_response = requests.get(photo_url, timeout=45); image_response.raise_for_status()
                img_data = Image.open(io.BytesIO(image_response.content))
                if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
                img_data.save(filepath); print(f"Pexels image saved: {filepath}"); return filepath
            else: print(f"No photos on Pexels for: '{query}'")
        except Exception as e: print(f"Pexels error for '{query}': {e}")
        return None

    # generate_image_visual - The Pexels fallback query should use the specific `pexels_search_query_감독`
    def generate_image_visual(self, image_prompt_text, scene_data, scene_identifier_filename):
        # ... (DALL-E logic same as previous version including retries) ...
        # Fallback logic:
        #   print("DALL-E failed. Trying Pexels...")
        #   pexels_query = scene_data.get('pexels_search_query_감독', "abstract background") # Use Gemini's suggestion
        #   pexels_path = self._search_pexels_image(pexels_query, scene_identifier_filename)
        #   if pexels_path: return pexels_path
        #   return self._create_placeholder_image_content(...)
        # For brevity, pasting the core DALL-E logic again:
        filepath = os.path.join(self.output_dir, scene_identifier_filename)
        if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
            max_retries = 2
            for attempt in range(max_retries):
                try:
                    print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
                    client = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0)
                    response = client.images.generate(model=self.dalle_model, prompt=image_prompt_text, n=1, size=self.image_size_dalle3, quality="hd", response_format="url", style="vivid")
                    image_url = response.data[0].url; revised_prompt = getattr(response.data[0], 'revised_prompt', None)
                    if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
                    image_response = requests.get(image_url, timeout=120); image_response.raise_for_status()
                    img_data = Image.open(io.BytesIO(image_response.content))
                    if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
                    img_data.save(filepath); print(f"AI Image (DALL-E) saved: {filepath}"); return filepath 
                except openai.RateLimitError as e: print(f"OpenAI Rate Limit: {e}. Retrying..."); time.sleep(5*(attempt+1)); 
                                                 if attempt == max_retries -1: print("Max retries for RateLimitError."); break
                                                 else: continue
                except openai.APIError as e: print(f"OpenAI API Error: {e}"); break 
                except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E download): {e}"); break
                except Exception as e: print(f"Generic error (DALL-E gen): {e}"); break
            
            print("DALL-E generation failed. Trying Pexels fallback...")
            # Use the specific Pexels query from Gemini's scene breakdown
            pexels_query_text = scene_data.get('pexels_search_query_감독', f"{scene_data.get('emotional_beat','')} {scene_data.get('setting_description','')}")
            pexels_path = self._search_pexels_image(pexels_query_text, scene_identifier_filename)
            if pexels_path: return pexels_path
            
            print("Pexels also failed/disabled. Using placeholder.")
            return self._create_placeholder_image_content(f"[AI/Pexels Failed] Prompt: {image_prompt_text[:100]}...", scene_identifier_filename, size=self.video_frame_size)
        else: # AI image generation not enabled
            return self._create_placeholder_image_content(image_prompt_text, scene_identifier_filename, size=self.video_frame_size)


    def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"): # Remains same logic
        if not self.USE_ELEVENLABS or not self.elevenlabs_api_key or not text_to_narrate:
            print("ElevenLabs disabled/no text. Skipping audio."); return None
        audio_filepath = os.path.join(self.output_dir, output_filename)
        try:
            print(f"Generating ElevenLabs audio (Voice: {self.elevenlabs_voice_id}) for: {text_to_narrate[:70]}...")
            # This is where the actual call to elevenlabs library happens
            # elevenlabs_set_api_key_func(self.elevenlabs_api_key) # Ensure key is set for the library
            audio_data = elevenlabs_generate_audio(text=text_to_narrate, voice=self.elevenlabs_voice_id, model="eleven_multilingual_v2")
            with open(audio_filepath, "wb") as f: f.write(audio_data)
            print(f"ElevenLabs audio saved: {audio_filepath}"); return audio_filepath
        except ImportError: print("ElevenLabs library not found. Install it.")
        except Exception as e: print(f"Error ElevenLabs audio: {e}")
        return None

    def create_video_from_images(self, image_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24, duration_per_image=4.5): # Slightly longer duration
        # ... (Image processing, Ken Burns, Text Overlay from previous full version) ...
        # Add slight random variation to Ken Burns
        if not image_data_list: return None
        processed_clips = []
        narration_audio_clip = None; final_video_clip_obj = None

        for i, data in enumerate(image_data_list):
            img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
            if not (img_path and os.path.exists(img_path)): print(f"Img not found: {img_path}"); continue
            try:
                pil_img = Image.open(img_path); 
                if pil_img.mode != 'RGB': pil_img = pil_img.convert('RGB')
                
                # Ensure image fits within video_frame_size, letter/pillarboxing
                img_copy = pil_img.copy()
                img_copy.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
                canvas = Image.new('RGB', self.video_frame_size, (random.randint(0,15), random.randint(0,15), random.randint(0,15))) # Slightly off-black bg
                xo, yo = (self.video_frame_size[0]-img_copy.width)//2, (self.video_frame_size[1]-img_copy.height)//2
                canvas.paste(img_copy, (xo,yo))
                frame_np = np.array(canvas)
                
                img_clip = ImageClip(frame_np).set_duration(duration_per_image)

                # Enhanced Ken Burns: Random start/end zoom & slight pan
                start_scale = 1.0
                end_scale = random.uniform(1.05, 1.15) # Random zoom between 5% and 15%
                
                # Subtle random panning (values between -0.05 and 0.05 relative to image dimension)
                # Pan is (fraction_of_width, fraction_of_height)
                # For this, it's easier if the image is slightly larger than the crop area initially.
                # A simpler way is to resize and then use set_position with a lambda for movement.
                # Let's simplify to just zoom for now to avoid overcomplicating the resize lambda.
                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale - 1) * (t / duration_per_image))
                img_clip = img_clip.set_position('center')
                
                if key_action:
                    txt_clip = TextClip(f"Scene {scene_num}\n{key_action}", fontsize=self.video_overlay_font_size, 
                                        color=self.video_overlay_font_color, font=self.video_overlay_font,
                                        bg_color='rgba(10,10,20,0.75)', method='caption', align='West',
                                        size=(self.video_frame_size[0]*0.9, None), kerning=-1, stroke_color='black', stroke_width=1
                                       ).set_duration(duration_per_image - 1.0).set_start(0.5).set_position(('center', 0.9), relative=True) # Slightly higher
                    final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
                else: final_scene_clip = img_clip
                processed_clips.append(final_scene_clip)
            except Exception as e: print(f"Error clip for {img_path}: {e}.")
        
        if not processed_clips: print("No clips for video."); return None
        
        transition = 0.8 # Slightly longer crossfade
        final_video_clip_obj = concatenate_videoclips(processed_clips, padding=-transition, method="compose")
        if final_video_clip_obj.duration > transition*2: 
            final_video_clip_obj = final_video_clip_obj.fx(vfx.fadein, transition).fx(vfx.fadeout, transition)
        
        if overall_narration_path and os.path.exists(overall_narration_path):
            try:
                narration_audio_clip = AudioFileClip(overall_narration_path)
                final_video_clip_obj = final_video_clip_obj.set_audio(narration_audio_clip)
                # Adjust video duration to match audio if audio is shorter.
                if narration_audio_clip.duration < final_video_clip_obj.duration:
                    final_video_clip_obj = final_video_clip_obj.subclip(0, narration_audio_clip.duration)
                print("Overall narration added.")
            except Exception as e: print(f"Error adding narration: {e}.")
        
        output_path = os.path.join(self.output_dir, output_filename)
        try:
            final_video_clip_obj.write_videofile(output_path, fps=fps, codec='libx264', preset='slow', audio_codec='aac', # 'slow' for better quality
                                        temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'), 
                                        remove_temp=True, threads=os.cpu_count() or 2, logger='bar', bitrate="5000k") # Higher bitrate
            print(f"Video created: {output_path}"); return output_path
        except Exception as e: print(f"Error writing video: {e}"); return None
        finally:
            for c in processed_clips: c.close()
            if narration_audio_clip: narration_audio_clip.close()
            if final_video_clip_obj: final_video_clip_obj.close()