File size: 14,358 Bytes
287c9ca 990e23e 8583908 990e23e 287c9ca 8583908 5470dfc 287c9ca 41b47a8 287c9ca 50c620f 09d5c67 41b47a8 990e23e 8583908 990e23e 8583908 41b47a8 09d5c67 b97795f 03bb9f6 5470dfc 09d5c67 8583908 09d5c67 990e23e 09d5c67 b97795f 41b47a8 50c620f 990e23e 50c620f 41b47a8 990e23e 41b47a8 50c620f 41b47a8 990e23e 41b47a8 09d5c67 41b47a8 09d5c67 41b47a8 5470dfc 41b47a8 5470dfc 41b47a8 5470dfc 41b47a8 b97795f 41b47a8 09d5c67 8583908 09d5c67 8583908 990e23e 09d5c67 990e23e 8583908 990e23e 8583908 09d5c67 990e23e 8583908 09d5c67 990e23e 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 990e23e 09d5c67 990e23e 09d5c67 8583908 990e23e 8583908 990e23e 8583908 09d5c67 41b47a8 8583908 287c9ca 8583908 990e23e 8583908 990e23e 8583908 990e23e 8583908 990e23e 8583908 287c9ca 8583908 990e23e 8583908 b97795f 8583908 41b47a8 b97795f 41b47a8 b97795f 8583908 990e23e 8583908 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont # Pillow should be >= 10.0.0
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
CompositeVideoClip)
import moviepy.video.fx.all as vfx # For effects like resize, fadein, fadeout
import numpy as np
import os
import openai
import requests
import io
class VisualEngine:
def __init__(self, output_dir="temp_generated_media"):
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
self.font_filename = "arial.ttf"
self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
self.font_size_pil = 24
self.video_overlay_font_size = 36
self.video_overlay_font_color = 'white'
# For video overlays, TextClip will use ImageMagick. 'Arial' is a common system font name.
# If issues, use self.font_path_in_container (if ImageMagick can access it via moviepy)
self.video_overlay_font = 'Arial'
try:
self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
except IOError:
print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
self.font = ImageFont.load_default()
self.font_size_pil = 11
self.openai_api_key = None
self.USE_AI_IMAGE_GENERATION = False
self.dalle_model = "dall-e-3"
self.image_size = "1024x1024" # DALL-E 3 output size
# Target video frame size (e.g., 16:9 aspect ratio)
# DALL-E 3 images (1024x1024) will be letter/pillar-boxed to fit this.
self.video_frame_size = (1280, 720)
def set_openai_api_key(self, api_key):
if api_key:
self.openai_api_key = api_key
self.USE_AI_IMAGE_GENERATION = True
print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
else:
self.USE_AI_IMAGE_GENERATION = False
print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
def _get_text_dimensions(self, text_content, font_obj):
if text_content == "" or text_content is None:
return 0, self.font_size_pil
try:
if hasattr(font_obj, 'getbbox'): # Pillow >= 8.0.0
bbox = font_obj.getbbox(text_content)
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
return width, height if height > 0 else self.font_size_pil
elif hasattr(font_obj, 'getsize'): # Older Pillow
width, height = font_obj.getsize(text_content)
return width, height if height > 0 else self.font_size_pil
else:
avg_char_width = self.font_size_pil * 0.6
height_estimate = self.font_size_pil * 1.2
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
except Exception as e:
print(f"Warning: Error getting text dimensions for '{text_content}': {e}. Using estimates.")
avg_char_width = self.font_size_pil * 0.6
height_estimate = self.font_size_pil * 1.2
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)): # Default placeholder size
img = Image.new('RGB', size, color=(30, 30, 60))
draw = ImageDraw.Draw(img)
padding = 30
max_text_width = size[0] - (2 * padding)
lines = []
if not text_description: text_description = "(No description provided for placeholder)"
words = text_description.split()
current_line = ""
for word in words:
test_line_candidate = current_line + word + " "
line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
if line_width <= max_text_width and current_line != "": current_line = test_line_candidate
elif line_width <= max_text_width and current_line == "": current_line = test_line_candidate
elif current_line != "":
lines.append(current_line.strip())
current_line = word + " "
else:
temp_word = word
while self._get_text_dimensions(temp_word, self.font)[0] > max_text_width and len(temp_word) > 0: temp_word = temp_word[:-1]
lines.append(temp_word)
current_line = ""
if current_line.strip(): lines.append(current_line.strip())
if not lines: lines.append("(Text error in placeholder)")
_, single_line_height = self._get_text_dimensions("Tg", self.font)
if single_line_height == 0: single_line_height = self.font_size_pil
line_spacing_factor = 1.3
estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
y_text = (size[1] - estimated_line_block_height) / 2.0
if y_text < padding: y_text = float(padding)
for line_idx, line in enumerate(lines):
if line_idx >= 7 and len(lines) > 8:
draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
break
line_width, _ = self._get_text_dimensions(line, self.font)
x_text = (size[0] - line_width) / 2.0
if x_text < padding: x_text = float(padding)
draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
y_text += single_line_height * line_spacing_factor
filepath = os.path.join(self.output_dir, filename)
try:
img.save(filepath)
except Exception as e:
print(f"Error saving placeholder image {filepath}: {e}")
return None
return filepath
def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
filepath = os.path.join(self.output_dir, scene_identifier_filename)
if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
try:
print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
client = openai.OpenAI(api_key=self.openai_api_key)
response = client.images.generate(
model=self.dalle_model, prompt=image_prompt_text, n=1,
size=self.image_size, quality="standard", response_format="url"
# style="vivid" # or "natural" for DALL-E 3, optional
)
image_url = response.data[0].url
revised_prompt_dalle3 = getattr(response.data[0], 'revised_prompt', None) # Safely access
if revised_prompt_dalle3: print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
image_response = requests.get(image_url, timeout=60)
image_response.raise_for_status()
img_data = Image.open(io.BytesIO(image_response.content))
if img_data.mode == 'RGBA': # Ensure RGB for consistency, PNG can be RGBA
img_data = img_data.convert('RGB')
# Save the AI generated image (typically 1024x1024 from DALL-E)
img_data.save(filepath)
print(f"AI Image (DALL-E) saved: {filepath}")
return filepath
except openai.APIError as e:
print(f"OpenAI API Error: {e}")
except requests.exceptions.RequestException as e:
print(f"Requests Error downloading DALL-E image: {e}")
except Exception as e:
print(f"Generic error during DALL-E image generation: {e}")
print("Falling back to placeholder image due to DALL-E error.")
# Fallback uses video_frame_size to match what video expects if AI fails
return self._create_placeholder_image_content(
f"[DALL-E Failed] Prompt: {image_prompt_text[:150]}...",
scene_identifier_filename, size=self.video_frame_size
)
else: # AI not enabled or key missing
# print(f"AI image generation not enabled/ready. Creating placeholder.")
# Placeholder also uses video_frame_size for consistency in video pipeline
return self._create_placeholder_image_content(
image_prompt_text, scene_identifier_filename, size=self.video_frame_size
)
def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=3):
if not image_data_list:
print("No image data provided to create video.")
return None
print(f"Attempting to create video from {len(image_data_list)} images.")
processed_clips = []
for i, data in enumerate(image_data_list):
img_path = data.get('path')
scene_num = data.get('scene_num', i + 1)
key_action = data.get('key_action', '')
if not (img_path and os.path.exists(img_path)):
print(f"Image path invalid or not found: {img_path}. Skipping for video.")
continue
try:
pil_image_original = Image.open(img_path)
if pil_image_original.mode != 'RGB': # Ensure RGB for video
pil_image_original = pil_image_original.convert('RGB')
# Create a copy to resize (thumbnail modifies in-place)
pil_image_for_frame = pil_image_original.copy()
# Resize image to fit within self.video_frame_size, maintaining aspect ratio
pil_image_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
# Create a background canvas of the exact video_frame_size (e.g., 1280x720)
# This will letterbox/pillarbox the image if its aspect ratio differs from video_frame_size
background_canvas = Image.new('RGB', self.video_frame_size, (0,0,0)) # Black background
paste_x = (self.video_frame_size[0] - pil_image_for_frame.width) // 2
paste_y = (self.video_frame_size[1] - pil_image_for_frame.height) // 2
background_canvas.paste(pil_image_for_frame, (paste_x, paste_y))
frame_np = np.array(background_canvas) # Convert final PIL image to numpy array
# Base image clip
img_clip = ImageClip(frame_np).set_duration(duration_per_image)
# Ken Burns Effect (Simple Zoom In)
end_scale = 1.08 # Zoom to 108% of original size by the end
img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale - 1) * (t / duration_per_image))
img_clip = img_clip.set_position('center') # Keep centered during zoom
# Text Overlay
overlay_text = f"Scene {scene_num}: {key_action}"
# Ensure font path is used if 'Arial' isn't found by ImageMagick/MoviePy
# For TextClip, moviepy relies on ImageMagick which has its own font discovery.
# Using a common font name like 'Arial' is often okay if mscorefonts are installed.
# If not, you might need to point to self.font_path_in_container
# Check if ImageMagick is installed in Docker, moviepy might need it for TextClip.
# `apt-get install imagemagick` in Dockerfile if TextClip has issues.
txt_clip = TextClip(
overlay_text,
fontsize=self.video_overlay_font_size,
color=self.video_overlay_font_color,
font=self.video_overlay_font, # Or self.font_path_in_container
bg_color='rgba(0,0,0,0.6)',
size=(self.video_frame_size[0] * 0.9, None), # Width 90% of video, height auto
method='caption',
align='West',
kerning=-1
).set_duration(duration_per_image - 0.5).set_start(0.25) # Start after 0.25s, end 0.25s before clip end
txt_clip = txt_clip.set_position(('center', 0.88), relative=True) # Position near bottom
video_with_text_overlay = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
processed_clips.append(video_with_text_overlay)
except Exception as e_clip:
print(f"Error processing image/creating clip for {img_path}: {e_clip}. Skipping.")
if not processed_clips:
print("No clips could be processed for the video.")
return None
# Concatenate with crossfade (0.5s)
final_video_clip = concatenate_videoclips(processed_clips, padding=-0.5, method="compose")
# Add fade in/out for the whole video
if final_video_clip.duration > 1: # Ensure video is long enough for fades
final_video_clip = final_video_clip.fx(vfx.fadein, 0.5).fx(vfx.fadeout, 0.5)
output_path = os.path.join(self.output_dir, output_filename)
print(f"Writing final video to: {output_path}")
try:
final_video_clip.write_videofile(
output_path, fps=fps, codec='libx264', audio_codec='aac',
temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
)
print(f"Video successfully created: {output_path}")
return output_path
except Exception as e:
print(f"Error writing final video file: {e}")
return None
finally:
for clip_item in processed_clips:
if hasattr(clip_item, 'close'): clip_item.close()
if hasattr(final_video_clip, 'close'): final_video_clip.close() |