File size: 12,042 Bytes
287c9ca 9d84ba9 8583908 990e23e 9d84ba9 990e23e 287c9ca 8583908 9d84ba9 5470dfc 287c9ca 9d84ba9 41b47a8 287c9ca 50c620f 09d5c67 41b47a8 9d84ba9 8583908 9d84ba9 8583908 41b47a8 09d5c67 b97795f 9d84ba9 5470dfc 09d5c67 8583908 09d5c67 9d84ba9 09d5c67 9d84ba9 09d5c67 b97795f 9d84ba9 50c620f 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 41b47a8 9d84ba9 b97795f 41b47a8 09d5c67 9d84ba9 990e23e 9d84ba9 09d5c67 9d84ba9 990e23e 8583908 9d84ba9 8583908 09d5c67 41b47a8 9d84ba9 8583908 9d84ba9 8583908 9d84ba9 8583908 9d84ba9 8583908 9d84ba9 8583908 9d84ba9 990e23e 9d84ba9 8583908 b97795f 9d84ba9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
CompositeVideoClip)
import moviepy.video.fx.all as vfx
import numpy as np
import os
import openai
import requests
import io
import time # For adding slight delay if API rate limits are hit
class VisualEngine:
def __init__(self, output_dir="temp_cinegen_media"):
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
self.font_filename = "arial.ttf"
self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
self.font_size_pil = 20 # Slightly smaller for placeholder text to fit more
self.video_overlay_font_size = 32
self.video_overlay_font_color = 'white'
self.video_overlay_font = 'Arial-Bold' # Try specific variant; ensure ImageMagick can find it or use full path
try:
self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
except IOError:
print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
self.font = ImageFont.load_default()
self.font_size_pil = 10 # Default font size estimate
self.openai_api_key = None
self.USE_AI_IMAGE_GENERATION = False
self.dalle_model = "dall-e-3"
# DALL-E 3 standard size for highest quality generally. Other options: "1792x1024", "1024x1792"
self.image_size_dalle3 = "1792x1024" # Landscape, good for cinematic
self.video_frame_size = (1280, 720) # 16:9 aspect ratio for video output
def set_openai_api_key(self, api_key):
if api_key:
self.openai_api_key = api_key
self.USE_AI_IMAGE_GENERATION = True
print(f"OpenAI API key set. AI Image Generation Enabled with {self.dalle_model}.")
else:
self.USE_AI_IMAGE_GENERATION = False
print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
def _get_text_dimensions(self, text_content, font_obj): # Remains the same
if not text_content: return 0, self.font_size_pil
try:
if hasattr(font_obj, 'getbbox'):
bbox = font_obj.getbbox(text_content); width = bbox[2] - bbox[0]; height = bbox[3] - bbox[1]
return width, height if height > 0 else self.font_size_pil
elif hasattr(font_obj, 'getsize'):
width, height = font_obj.getsize(text_content)
return width, height if height > 0 else self.font_size_pil
else:
return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2 if self.font_size_pil*1.2 > 0 else self.font_size_pil)
except Exception: return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2)
def _create_placeholder_image_content(self, text_description, filename, size=(1280, 720)): # Default to video_frame_size
img = Image.new('RGB', size, color=(20, 20, 40)) # Darker
draw = ImageDraw.Draw(img)
padding = 25
max_text_width = size[0] - (2 * padding)
lines = []
if not text_description: text_description = "(Placeholder: No prompt provided)"
# Simplified text wrapping for placeholder
words = text_description.split()
current_line = ""
for word in words:
test_line = current_line + word + " "
if self._get_text_dimensions(test_line, self.font)[0] <= max_text_width:
current_line = test_line
else:
if current_line: lines.append(current_line.strip())
current_line = word + " "
if current_line: lines.append(current_line.strip())
if not lines: lines.append("(Text too long or unrenderable for placeholder)")
_, single_line_height = self._get_text_dimensions("Ay", self.font)
if single_line_height == 0: single_line_height = self.font_size_pil + 2
num_lines_to_display = min(len(lines), (size[1] - 2 * padding) // (single_line_height + 2)) # Max lines based on height
y_text = padding + (size[1] - 2*padding - num_lines_to_display * (single_line_height + 2)) / 2.0
for i in range(num_lines_to_display):
line = lines[i]
line_width, _ = self._get_text_dimensions(line, self.font)
x_text = (size[0] - line_width) / 2.0
draw.text((x_text, y_text), line, font=self.font, fill=(200, 200, 180))
y_text += single_line_height + 2 # Line spacing
if i == 6 and num_lines_to_display > 7: # Show ellipsis if more text
draw.text((x_text, y_text), "...", font=self.font, fill=(200, 200, 180))
break
filepath = os.path.join(self.output_dir, filename)
try: img.save(filepath); return filepath
except Exception as e: print(f"Error saving placeholder: {e}"); return None
def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
filepath = os.path.join(self.output_dir, scene_identifier_filename)
if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
max_retries = 2
for attempt in range(max_retries):
try:
print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
client = openai.OpenAI(api_key=self.openai_api_key, timeout=60.0) # Timeout for client
response = client.images.generate(
model=self.dalle_model,
prompt=image_prompt_text,
n=1,
size=self.image_size_dalle3,
quality="hd", # Use "hd" for DALL-E 3 for better detail, "standard" for faster/cheaper
response_format="url",
style="vivid" # "vivid" or "natural" for DALL-E 3
)
image_url = response.data[0].url
revised_prompt = getattr(response.data[0], 'revised_prompt', None)
if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
image_response = requests.get(image_url, timeout=90) # Increased download timeout
image_response.raise_for_status()
img_data = Image.open(io.BytesIO(image_response.content))
if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
img_data.save(filepath)
print(f"AI Image (DALL-E) saved: {filepath}")
return filepath
except openai.RateLimitError as e:
print(f"OpenAI Rate Limit Error: {e}. Retrying after delay...")
if attempt < max_retries - 1: time.sleep(5 * (attempt + 1)); continue
else: print("Max retries reached for RateLimitError."); break
except openai.APIError as e: print(f"OpenAI API Error: {e}"); break
except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E image download): {e}"); break
except Exception as e: print(f"Generic error (DALL-E image gen): {e}"); break
print("DALL-E generation failed after retries. Falling back to placeholder.")
return self._create_placeholder_image_content(
f"[AI Gen Failed] Prompt: {image_prompt_text[:100]}...",
scene_identifier_filename, size=self.video_frame_size
)
else:
return self._create_placeholder_image_content(
image_prompt_text, scene_identifier_filename, size=self.video_frame_size
)
def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=4):
if not image_data_list: return None
print(f"Creating video from {len(image_data_list)} image sets.")
processed_clips = []
for i, data in enumerate(image_data_list):
img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
if not (img_path and os.path.exists(img_path)):
print(f"Image not found: {img_path}. Skipping."); continue
try:
pil_img_orig = Image.open(img_path)
if pil_img_orig.mode != 'RGB': pil_img_orig = pil_img_orig.convert('RGB')
# Resize and letterbox/pillarbox to video_frame_size
img_for_frame = pil_img_orig.copy()
img_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
canvas = Image.new('RGB', self.video_frame_size, (0,0,0))
x_offset = (self.video_frame_size[0] - img_for_frame.width) // 2
y_offset = (self.video_frame_size[1] - img_for_frame.height) // 2
canvas.paste(img_for_frame, (x_offset, y_offset))
frame_np = np.array(canvas)
img_clip = ImageClip(frame_np).set_duration(duration_per_image)
# Ken Burns: zoom from 100% to 110%
img_clip = img_clip.fx(vfx.resize, lambda t: 1 + 0.1 * (t / duration_per_image))
img_clip = img_clip.set_position('center')
# Text Overlay
if key_action:
overlay_text = f"Scene {scene_num}\n{key_action}"
txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
color=self.video_overlay_font_color, font=self.video_overlay_font,
bg_color='rgba(0,0,0,0.7)', method='caption', align='West',
size=(self.video_frame_size[0]*0.85, None), kerning=-1, stroke_color='black', stroke_width=0.5
).set_duration(duration_per_image - 1.0).set_start(0.5) # Show for duration-1s, slight delay
txt_clip = txt_clip.set_position(('center', 0.88), relative=True)
final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
else:
final_scene_clip = img_clip
processed_clips.append(final_scene_clip)
except Exception as e: print(f"Error processing clip for {img_path}: {e}. Skipping.")
if not processed_clips: print("No clips processed for video."); return None
transition_duration = 0.75 # Crossfade duration
final_video = concatenate_videoclips(processed_clips, padding=-transition_duration, method="compose")
if final_video.duration > transition_duration*2: # Ensure enough duration for fades
final_video = final_video.fx(vfx.fadein, transition_duration).fx(vfx.fadeout, transition_duration)
output_path = os.path.join(self.output_dir, output_filename)
try:
final_video.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', audio_codec='aac',
temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
remove_temp=True, threads=os.cpu_count() or 2, logger='bar')
print(f"Video created: {output_path}"); return output_path
except Exception as e: print(f"Error writing video file: {e}"); return None
finally:
for clip in processed_clips: clip.close()
if hasattr(final_video, 'close'): final_video.close() |