File size: 13,918 Bytes
287c9ca 8583908 287c9ca 8583908 5470dfc 287c9ca 41b47a8 287c9ca 50c620f 09d5c67 41b47a8 8583908 41b47a8 09d5c67 b97795f 03bb9f6 5470dfc 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 b97795f 41b47a8 8583908 41b47a8 50c620f 5470dfc 50c620f 41b47a8 5470dfc 41b47a8 50c620f 41b47a8 09d5c67 8583908 41b47a8 09d5c67 41b47a8 09d5c67 41b47a8 5470dfc 41b47a8 5470dfc 41b47a8 5470dfc 41b47a8 b97795f 41b47a8 8583908 41b47a8 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 8583908 09d5c67 41b47a8 b97795f 8583908 287c9ca 8583908 287c9ca 8583908 b97795f 8583908 41b47a8 b97795f 41b47a8 b97795f 8583908 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# core/visual_engine.py
from PIL import Image, ImageDraw, ImageFont
from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
CompositeVideoClip, vfx) # Added vfx for effects
import moviepy.video.fx.all as vfx # More explicit import for resize
import numpy as np # For converting PIL images to numpy arrays for moviepy
import os
import openai
import requests
import io
class VisualEngine:
def __init__(self, output_dir="temp_generated_media"):
self.output_dir = output_dir
os.makedirs(self.output_dir, exist_ok=True)
self.font_filename = "arial.ttf"
self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
self.font_size_pil = 24 # For placeholder images
self.video_overlay_font_size = 36 # For text overlays on video
self.video_overlay_font_color = 'white'
# For video overlays, try to use a system font that moviepy/ImageMagick can find
# Or provide a path to a .ttf file for TextClip's font parameter
self.video_overlay_font = 'Arial' # Generic name, ImageMagick might find it. Or use self.font_path_in_container
try:
self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
except IOError:
print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
self.font = ImageFont.load_default()
self.font_size_pil = 11
self.openai_api_key = None
self.USE_AI_IMAGE_GENERATION = False
self.dalle_model = "dall-e-3"
self.image_size = "1024x1024"
# For DALL-E 3, you might want a slightly larger video frame to accommodate 1024x1024 images
self.video_frame_size = (1024, 576) # 16:9, DALL-E images will be letterboxed or cropped if not 16:9.
# Or (1024,1024) if you want square video frames.
def set_openai_api_key(self, api_key):
# ... (remains the same) ...
if api_key:
self.openai_api_key = api_key
# openai.api_key = self.openai_api_key # Older versions. New client takes key per call.
self.USE_AI_IMAGE_GENERATION = True
print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
else:
self.USE_AI_IMAGE_GENERATION = False
print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
def _get_text_dimensions(self, text_content, font_obj):
# ... (remains the same) ...
if text_content == "" or text_content is None:
return 0, self.font_size_pil
try:
if hasattr(font_obj, 'getbbox'):
bbox = font_obj.getbbox(text_content)
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
return width, height if height > 0 else self.font_size_pil
elif hasattr(font_obj, 'getsize'):
width, height = font_obj.getsize(text_content)
return width, height if height > 0 else self.font_size_pil
else:
avg_char_width = self.font_size_pil * 0.6
height_estimate = self.font_size_pil * 1.2
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
except Exception as e:
print(f"Warning: Error getting text dimensions for '{text_content}': {e}. Using estimates.")
avg_char_width = self.font_size_pil * 0.6
height_estimate = self.font_size_pil * 1.2
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
# ... (remains the same) ...
img = Image.new('RGB', size, color=(30, 30, 60))
draw = ImageDraw.Draw(img)
padding = 30
max_text_width = size[0] - (2 * padding)
lines = []
if not text_description: text_description = "(No description provided for placeholder)"
words = text_description.split()
current_line = ""
for word in words:
test_line_candidate = current_line + word + " "
line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
if line_width <= max_text_width and current_line != "": current_line = test_line_candidate
elif line_width <= max_text_width and current_line == "": current_line = test_line_candidate
elif current_line != "":
lines.append(current_line.strip())
current_line = word + " "
else:
temp_word = word
while self._get_text_dimensions(temp_word, self.font)[0] > max_text_width and len(temp_word) > 0: temp_word = temp_word[:-1]
lines.append(temp_word)
current_line = ""
if current_line.strip(): lines.append(current_line.strip())
if not lines: lines.append("(Text error in placeholder)")
_, single_line_height = self._get_text_dimensions("Tg", self.font)
if single_line_height == 0: single_line_height = self.font_size_pil
line_spacing_factor = 1.3
estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
y_text = (size[1] - estimated_line_block_height) / 2.0
if y_text < padding: y_text = float(padding)
for line_idx, line in enumerate(lines):
if line_idx >= 7 and len(lines) > 8:
draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
break
line_width, _ = self._get_text_dimensions(line, self.font)
x_text = (size[0] - line_width) / 2.0
if x_text < padding: x_text = float(padding)
draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
y_text += single_line_height * line_spacing_factor
filepath = os.path.join(self.output_dir, filename)
try:
img.save(filepath)
except Exception as e:
print(f"Error saving placeholder image {filepath}: {e}")
return None
return filepath
def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
# ... (DALL-E logic remains the same, including fallback to _create_placeholder_image_content) ...
filepath = os.path.join(self.output_dir, scene_identifier_filename)
if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
try:
print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
client = openai.OpenAI(api_key=self.openai_api_key)
response = client.images.generate(
model=self.dalle_model, prompt=image_prompt_text, n=1,
size=self.image_size, quality="standard", response_format="url"
)
image_url = response.data[0].url
revised_prompt_dalle3 = response.data[0].revised_prompt
if revised_prompt_dalle3: print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
image_response = requests.get(image_url, timeout=60) # Increased timeout for image download
image_response.raise_for_status()
img_data = Image.open(io.BytesIO(image_response.content))
# Ensure image is RGB before saving as PNG (some APIs might return RGBA)
if img_data.mode == 'RGBA':
img_data = img_data.convert('RGB')
img_data.save(filepath)
print(f"AI Image (DALL-E) saved: {filepath}")
return filepath
except openai.APIError as e:
print(f"OpenAI API Error: {e}")
except requests.exceptions.RequestException as e:
print(f"Requests Error downloading DALL-E image: {e}")
except Exception as e:
print(f"Generic error during DALL-E image generation: {e}")
print("Falling back to placeholder image due to DALL-E error.")
return self._create_placeholder_image_content(
f"[DALL-E Failed] Prompt: {image_prompt_text[:150]}...",
scene_identifier_filename, size=self.video_frame_size # Use video frame size for placeholder
)
else:
return self._create_placeholder_image_content(
image_prompt_text, scene_identifier_filename, size=self.video_frame_size
)
def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=3):
"""
Creates a video from a list of image file paths and associated text.
image_data_list: List of dictionaries, each like:
{'path': 'path/to/image.png', 'scene_num': 1, 'key_action': 'Some action'}
"""
if not image_data_list:
print("No image data provided to create video.")
return None
print(f"Attempting to create video from {len(image_data_list)} images.")
processed_clips = []
for i, data in enumerate(image_data_list):
img_path = data.get('path')
scene_num = data.get('scene_num', i + 1)
key_action = data.get('key_action', '')
if not (img_path and os.path.exists(img_path)):
print(f"Image path invalid or not found: {img_path}. Skipping for video.")
continue
try:
# Load image and resize to fit video_frame_size, maintaining aspect ratio (letterbox/pillarbox)
pil_image = Image.open(img_path)
pil_image.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS) # Resize in place
# Create a background matching video_frame_size
background = Image.new('RGB', self.video_frame_size, (0,0,0)) # Black background
# Paste the thumbnail onto the center of the background
paste_x = (self.video_frame_size[0] - pil_image.width) // 2
paste_y = (self.video_frame_size[1] - pil_image.height) // 2
background.paste(pil_image, (paste_x, paste_y))
# Convert PIL image to numpy array for MoviePy
frame_np = np.array(background)
img_clip = ImageClip(frame_np).set_duration(duration_per_image)
# Simple Ken Burns effect (zoom in slightly)
# End scale (e.g., 1.1 = 10% zoom in). Adjust for desired effect.
end_scale = 1.05
img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale-1) * (t / duration_per_image) )
# To keep it centered while zooming:
img_clip = img_clip.set_position('center')
# Add Text Overlay for Scene Number and Key Action
overlay_text = f"Scene {scene_num}\n{key_action}"
txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
color=self.video_overlay_font_color,
font=self.video_overlay_font, # Ensure this font is findable by ImageMagick
bg_color='rgba(0,0,0,0.5)', # Semi-transparent black background
size=(img_clip.w * 0.9, None), # Width 90% of image, height auto
method='caption', # Auto-wrap text
align='West', # Left align
kerning=-1
).set_duration(duration_per_image - 0.5).set_start(0.25) # Show for most of duration
txt_clip = txt_clip.set_position(('center', 0.85), relative=True) # Position at 85% from top, centered
# Composite the image and text
video_with_text_overlay = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
processed_clips.append(video_with_text_overlay)
except Exception as e_clip:
print(f"Error processing image/creating clip for {img_path}: {e_clip}. Skipping.")
if not processed_clips:
print("No clips could be processed for the video.")
return None
# Concatenate with crossfade transitions
final_video_clip = concatenate_videoclips(processed_clips, padding=-0.5, method="compose").fx(vfx.fadein, 0.5).fx(vfx.fadeout, 0.5)
# padding = -0.5 means 0.5s crossfade. Requires method="compose"
output_path = os.path.join(self.output_dir, output_filename)
print(f"Writing final video to: {output_path}")
try:
final_video_clip.write_videofile(
output_path, fps=fps, codec='libx264', audio_codec='aac',
temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
)
print(f"Video successfully created: {output_path}")
return output_path
except Exception as e:
print(f"Error writing final video file: {e}")
return None
finally: # Ensure clips are closed
for clip in processed_clips: clip.close()
if hasattr(final_video_clip, 'close'): final_video_clip.close() |