Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,14 +14,19 @@ import time
|
|
14 |
import wave
|
15 |
import contextlib
|
16 |
import asyncio
|
17 |
-
import uuid # For unique
|
18 |
-
import shutil # For
|
|
|
19 |
|
20 |
# Image handling
|
21 |
from PIL import Image
|
|
|
|
|
|
|
22 |
|
23 |
# Video and audio processing
|
24 |
-
from moviepy.editor import ImageClip, AudioFileClip,
|
|
|
25 |
|
26 |
# Type hints
|
27 |
import typing_extensions as typing
|
@@ -30,464 +35,731 @@ import typing_extensions as typing
|
|
30 |
import nest_asyncio
|
31 |
nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter
|
32 |
|
|
|
|
|
|
|
|
|
33 |
# --- Configuration ---
|
34 |
-
st.set_page_config(page_title="ChronoWeave", layout="wide")
|
35 |
-
st.title("π ChronoWeave: Branching Narrative Generator")
|
36 |
st.markdown("""
|
37 |
-
Generate multiple, branching story timelines from a single theme using AI.
|
38 |
-
Based on the work of Yousif Ahmed. Copyright 2025 Google LLC
|
39 |
""")
|
40 |
|
41 |
# --- Constants ---
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# --- API Key Handling ---
|
|
|
|
|
|
|
48 |
try:
|
49 |
-
# Preferred way
|
50 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
51 |
-
|
52 |
except KeyError:
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
# --- Initialize Google
|
57 |
try:
|
58 |
-
# Initialize the client with the API key
|
59 |
genai.configure(api_key=GOOGLE_API_KEY)
|
60 |
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
65 |
client_live = genai.Client(
|
66 |
-
client_options={'api_endpoint': f'{
|
67 |
)
|
68 |
-
|
69 |
-
|
70 |
-
# Adjust based on the library version and observed behavior.
|
71 |
-
|
72 |
|
73 |
except Exception as e:
|
74 |
-
|
|
|
75 |
st.stop()
|
76 |
|
77 |
|
78 |
-
# --- Define
|
79 |
-
class StorySegment(
|
80 |
-
scene_id: int
|
81 |
-
image_prompt: str
|
82 |
-
audio_text: str
|
83 |
-
character_description: str
|
84 |
-
timeline_visual_modifier:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# --- Helper Functions ---
|
97 |
|
98 |
@contextlib.contextmanager
|
99 |
-
def
|
100 |
-
"""Context manager to write WAV files."""
|
101 |
-
|
|
|
|
|
102 |
wf.setnchannels(channels)
|
103 |
-
wf.setsampwidth(sample_width)
|
104 |
wf.setframerate(rate)
|
105 |
yield wf
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
collected_audio = bytearray()
|
110 |
-
|
|
|
111 |
|
112 |
try:
|
113 |
-
# Use the '
|
114 |
-
live_model = client_live.get_model(f"models/gemini-1.5-flash") # Specify model within the live client context
|
115 |
-
|
116 |
config = {
|
117 |
-
"response_modalities": ["AUDIO"]
|
|
|
|
|
|
|
|
|
|
|
118 |
}
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
async with live_model.connect(config=config) as session:
|
121 |
-
|
|
|
|
|
122 |
async for response in session.stream_content():
|
123 |
-
|
124 |
collected_audio.extend(response.audio_chunk.data)
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
if not collected_audio:
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
# Write the collected audio bytes into a WAV file.
|
132 |
-
with
|
133 |
-
wf.writeframes(
|
134 |
-
|
135 |
return output_filename
|
|
|
|
|
|
|
|
|
|
|
136 |
except Exception as e:
|
137 |
-
|
|
|
138 |
return None
|
139 |
|
140 |
|
141 |
-
def generate_story_sequence_chrono(
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
{
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
"character_description": {{"type": "string"}},
|
183 |
-
"timeline_visual_modifier": {{"type": ["string", "null"]}}
|
184 |
-
}},
|
185 |
-
"required": ["scene_id", "image_prompt", "audio_text", "character_description", "timeline_visual_modifier"]
|
186 |
-
}}
|
187 |
-
}}
|
188 |
-
}},
|
189 |
-
"required": ["timeline_id", "divergence_reason", "segments"]
|
190 |
-
}}
|
191 |
-
}},
|
192 |
-
"total_scenes_per_timeline": {{"type": "integer"}}
|
193 |
-
}},
|
194 |
-
"required": ["core_theme", "timelines", "total_scenes_per_timeline"]
|
195 |
-
}}
|
196 |
-
'''
|
197 |
|
198 |
try:
|
199 |
response = client_standard.generate_content(
|
200 |
contents=prompt,
|
201 |
generation_config=genai.types.GenerationConfig(
|
202 |
-
|
203 |
-
|
204 |
)
|
205 |
-
# The schema can also be passed via generation_config in some versions/models
|
206 |
-
# config={
|
207 |
-
# 'response_mime_type': 'application/json',
|
208 |
-
# 'response_schema': ChronoWeaveResponse # Pass the TypedDict directly
|
209 |
-
# }
|
210 |
)
|
211 |
|
212 |
-
# Debugging:
|
213 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
|
|
|
|
224 |
|
225 |
-
except
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
except Exception as e:
|
230 |
-
|
231 |
-
|
232 |
-
#
|
|
|
233 |
return None
|
234 |
|
235 |
|
236 |
-
def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1") -> Image.Image
|
237 |
-
"""
|
238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
try:
|
240 |
-
# Use the standard client's
|
|
|
|
|
|
|
241 |
response = client_standard.generate_content(
|
242 |
-
|
243 |
generation_config=genai.types.GenerationConfig(
|
244 |
-
candidate_count=1,
|
245 |
-
#
|
246 |
-
# Check
|
|
|
|
|
|
|
247 |
),
|
248 |
-
#
|
249 |
-
# tools=[genai.ImageParams(model=IMAGE_MODEL_ID, number_of_images=1, aspect_ratio=aspect_ratio, person_generation="DONT_ALLOW")]
|
250 |
)
|
251 |
|
252 |
-
#
|
253 |
-
|
254 |
-
if response.parts and response.parts[0].inline_data:
|
255 |
image_bytes = response.parts[0].inline_data.data
|
256 |
-
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
259 |
else:
|
260 |
-
# Check for
|
261 |
-
|
262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
else:
|
264 |
-
|
265 |
-
|
|
|
|
|
266 |
return None
|
267 |
|
|
|
|
|
|
|
|
|
268 |
except Exception as e:
|
269 |
-
|
|
|
270 |
return None
|
271 |
|
272 |
|
273 |
# --- Streamlit UI Elements ---
|
274 |
-
st.sidebar.header("Configuration")
|
275 |
|
276 |
-
# API Key
|
277 |
if GOOGLE_API_KEY:
|
278 |
-
st.sidebar.success("Google API Key Loaded
|
279 |
else:
|
|
|
280 |
st.sidebar.error("Google API Key Missing!", icon="π¨")
|
281 |
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
-
generate_button = st.sidebar.button("β¨ Generate ChronoWeave β¨", type="primary", disabled=(not GOOGLE_API_KEY))
|
289 |
|
290 |
st.sidebar.markdown("---")
|
291 |
-
st.sidebar.info("
|
|
|
|
|
292 |
|
293 |
# --- Main Logic ---
|
294 |
if generate_button:
|
295 |
if not theme:
|
296 |
-
st.error("Please enter a story theme.", icon="π")
|
297 |
else:
|
298 |
# Create a unique temporary directory for this run
|
299 |
-
run_id = str(uuid.uuid4())
|
300 |
-
temp_dir = os.path.join(
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
with st.status("Generating assets and composing videos...", expanded=True) as status:
|
316 |
|
317 |
-
for timeline in
|
318 |
-
timeline_id = timeline
|
319 |
-
divergence = timeline
|
320 |
-
segments = timeline
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
if generated_image:
|
349 |
-
image_path = os.path.join(temp_dir, f"
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
# Add negative prompt to prevent conversational filler
|
360 |
-
audio_negative_prompt = "Narrate the following sentence directly, with expression, without any introduction or closing remarks like 'Okay' or 'Here is the narration'. Just read the sentence:"
|
361 |
-
full_audio_prompt = f"{audio_negative_prompt}\n{audio_text}"
|
362 |
-
audio_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_audio.wav")
|
363 |
-
|
364 |
-
# Run the async audio generation function
|
365 |
-
try:
|
366 |
-
generated_audio_path = asyncio.run(generate_audio_live_async(full_audio_prompt, audio_path))
|
367 |
-
except Exception as e:
|
368 |
-
st.error(f"Asyncio error during audio gen: {e}")
|
369 |
-
generated_audio_path = None
|
370 |
-
|
371 |
-
|
372 |
-
if generated_audio_path:
|
373 |
-
temp_audio_files.append(generated_audio_path)
|
374 |
-
# st.audio(generated_audio_path) # Optional: Preview audio
|
375 |
else:
|
376 |
-
st.warning(f"
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
try:
|
411 |
final_timeline_video = concatenate_videoclips(video_clips, method="compose")
|
412 |
-
|
413 |
-
|
414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
final_video_paths[timeline_id] = output_filename
|
416 |
-
|
417 |
-
|
418 |
-
# Close clips to release resources
|
419 |
-
for clip in video_clips:
|
420 |
-
if hasattr(clip, 'close'): clip.close()
|
421 |
-
if hasattr(clip, 'audio') and hasattr(clip.audio, 'close'): clip.audio.close()
|
422 |
-
if hasattr(final_timeline_video, 'close'): final_timeline_video.close()
|
423 |
-
|
424 |
|
425 |
except Exception as e:
|
426 |
-
|
|
|
427 |
all_timelines_successful = False
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
|
442 |
# Final status update
|
|
|
443 |
if all_timelines_successful and final_video_paths:
|
444 |
-
|
|
|
|
|
445 |
elif final_video_paths:
|
446 |
-
|
|
|
|
|
447 |
else:
|
448 |
-
|
449 |
-
|
|
|
450 |
|
451 |
-
# --- Display Results ---
|
452 |
-
st.header("Generated Timelines")
|
453 |
if final_video_paths:
|
454 |
sorted_timeline_ids = sorted(final_video_paths.keys())
|
455 |
-
for
|
|
|
|
|
456 |
video_path = final_video_paths[timeline_id]
|
457 |
-
# Find matching timeline
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
else:
|
474 |
-
st.warning("No final videos were successfully generated.")
|
475 |
-
|
476 |
-
|
477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
478 |
try:
|
479 |
shutil.rmtree(temp_dir)
|
480 |
-
|
|
|
481 |
except Exception as e:
|
482 |
-
|
483 |
-
|
484 |
|
485 |
-
elif not
|
486 |
-
|
|
|
487 |
else:
|
488 |
-
# This case
|
489 |
-
st.error("
|
490 |
-
|
491 |
|
492 |
else:
|
493 |
-
st.info("Configure settings in the sidebar and click 'Generate ChronoWeave'")
|
|
|
14 |
import wave
|
15 |
import contextlib
|
16 |
import asyncio
|
17 |
+
import uuid # For unique identifiers
|
18 |
+
import shutil # For directory operations
|
19 |
+
import logging # For better logging
|
20 |
|
21 |
# Image handling
|
22 |
from PIL import Image
|
23 |
+
# Pydantic for data validation
|
24 |
+
from pydantic import BaseModel, Field, ValidationError, validator
|
25 |
+
from typing import List, Optional, Literal
|
26 |
|
27 |
# Video and audio processing
|
28 |
+
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
|
29 |
+
from moviepy.config import change_settings # Potential for setting imagemagick path if needed
|
30 |
|
31 |
# Type hints
|
32 |
import typing_extensions as typing
|
|
|
35 |
import nest_asyncio
|
36 |
nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter
|
37 |
|
38 |
+
# --- Logging Setup ---
|
39 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
40 |
+
logger = logging.getLogger(__name__)
|
41 |
+
|
42 |
# --- Configuration ---
|
43 |
+
st.set_page_config(page_title="ChronoWeave", layout="wide", initial_sidebar_state="expanded")
|
44 |
+
st.title("π ChronoWeave: Advanced Branching Narrative Generator")
|
45 |
st.markdown("""
|
46 |
+
Generate multiple, branching story timelines from a single theme using AI, complete with images and narration.
|
47 |
+
*Based on the work of Yousif Ahmed. Copyright 2025 Google LLC.*
|
48 |
""")
|
49 |
|
50 |
# --- Constants ---
|
51 |
+
# Text/JSON Model
|
52 |
+
TEXT_MODEL_ID = "models/gemini-1.5-flash" # Or "gemini-1.5-pro" for potentially higher quality/cost
|
53 |
+
# Audio Model Config
|
54 |
+
AUDIO_API_VERSION = 'v1alpha' # Required for audio modality
|
55 |
+
AUDIO_MODEL_ID = f"models/gemini-1.5-flash" # Model used via the v1alpha endpoint
|
56 |
+
AUDIO_SAMPLING_RATE = 24000 # Standard for TTS models like Google's
|
57 |
+
# Image Model Config
|
58 |
+
IMAGE_MODEL_ID = "imagen-3" # Or specific version like "imagen-3.0-generate-002"
|
59 |
+
DEFAULT_ASPECT_RATIO = "1:1"
|
60 |
+
# Video Config
|
61 |
+
VIDEO_FPS = 24
|
62 |
+
VIDEO_CODEC = "libx264" # Widely compatible H.264
|
63 |
+
AUDIO_CODEC = "aac" # Common audio codec for MP4
|
64 |
+
# File Management
|
65 |
+
TEMP_DIR_BASE = ".chrono_temp" # Base name for temporary directories
|
66 |
|
67 |
# --- API Key Handling ---
|
68 |
+
# This section correctly handles the missing secret. The error in the traceback means
|
69 |
+
# the secret wasn't set in the *deployment environment*.
|
70 |
+
GOOGLE_API_KEY = None
|
71 |
try:
|
72 |
+
# Preferred way: Use Streamlit secrets when deployed
|
73 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
74 |
+
logger.info("Google API Key loaded from Streamlit secrets.")
|
75 |
except KeyError:
|
76 |
+
# Fallback: Check environment variable (useful for local development)
|
77 |
+
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
|
78 |
+
if GOOGLE_API_KEY:
|
79 |
+
logger.info("Google API Key loaded from environment variable.")
|
80 |
+
else:
|
81 |
+
# Error if neither is found
|
82 |
+
st.error(
|
83 |
+
"π¨ **Google API Key Not Found!**\n"
|
84 |
+
"Please configure your Google API Key:\n"
|
85 |
+
"1. **Streamlit Cloud/Hugging Face Spaces:** Add it as a Secret named `GOOGLE_API_KEY` in your app's settings.\n"
|
86 |
+
"2. **Local Development:** Set the `GOOGLE_API_KEY` environment variable.",
|
87 |
+
icon="π¨"
|
88 |
+
)
|
89 |
+
st.stop() # Halt execution
|
90 |
|
91 |
+
# --- Initialize Google Clients ---
|
92 |
try:
|
|
|
93 |
genai.configure(api_key=GOOGLE_API_KEY)
|
94 |
|
95 |
+
# Client for Text/Imagen Generation (using standard API endpoint)
|
96 |
+
client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
|
97 |
+
logger.info(f"Initialized standard GenerativeModel for {TEXT_MODEL_ID}.")
|
98 |
+
|
99 |
+
# Client specifically for the Live API (Audio) using the v1alpha endpoint
|
100 |
+
# Note: Ensure the 'audiomodality' package or relevant parts of google-cloud-aiplatform are available if needed,
|
101 |
+
# depending on the exact library version and API stability.
|
102 |
+
# As of late 2023/early 2024, using a separate client instance targeting the specific endpoint is often necessary.
|
103 |
client_live = genai.Client(
|
104 |
+
client_options={'api_endpoint': f'{AUDIO_API_VERSION}.generativelanguage.googleapis.com'}
|
105 |
)
|
106 |
+
live_model = client_live.get_model(AUDIO_MODEL_ID) # Get the model handle via the live client
|
107 |
+
logger.info(f"Initialized live client for audio generation ({AUDIO_MODEL_ID} via {AUDIO_API_VERSION}).")
|
|
|
|
|
108 |
|
109 |
except Exception as e:
|
110 |
+
logger.exception("Failed to initialize Google AI Clients.")
|
111 |
+
st.error(f"π¨ Failed to initialize Google AI Clients: {e}", icon="π¨")
|
112 |
st.stop()
|
113 |
|
114 |
|
115 |
+
# --- Define Pydantic Schemas for Robust Validation ---
|
116 |
+
class StorySegment(BaseModel):
|
117 |
+
scene_id: int = Field(..., ge=0, description="Scene number within the timeline, starting from 0.")
|
118 |
+
image_prompt: str = Field(..., min_length=10, max_length=150, description="Concise visual description for image generation (15-35 words). Focus on non-human characters, setting, action, style.")
|
119 |
+
audio_text: str = Field(..., min_length=5, max_length=150, description="Single sentence of narration/dialogue for the scene (max 30 words).")
|
120 |
+
character_description: str = Field(..., max_length=100, description="Brief description of key non-human characters/objects in *this* scene's prompt for consistency.")
|
121 |
+
timeline_visual_modifier: Optional[str] = Field(None, max_length=50, description="Optional subtle visual style hint (e.g., 'slightly darker', 'more vibrant colors').")
|
122 |
+
|
123 |
+
@validator('image_prompt')
|
124 |
+
def image_prompt_no_humans(cls, v):
|
125 |
+
if any(word in v.lower() for word in ["person", "people", "human", "man", "woman", "boy", "girl", "child"]):
|
126 |
+
# Instead of raising error, we'll try to guide the LLM better in the main prompt
|
127 |
+
# raise ValueError("Image prompt must not contain descriptions of humans.")
|
128 |
+
logger.warning(f"Image prompt '{v[:50]}...' may contain human descriptions. Relying on API-level controls.")
|
129 |
+
return v
|
130 |
+
|
131 |
+
class Timeline(BaseModel):
|
132 |
+
timeline_id: int = Field(..., ge=0, description="Unique identifier for this timeline.")
|
133 |
+
divergence_reason: str = Field(..., min_length=5, description="Clear reason why this timeline branched off.")
|
134 |
+
segments: List[StorySegment] = Field(..., min_items=1, description="List of scenes composing this timeline.")
|
135 |
+
|
136 |
+
class ChronoWeaveResponse(BaseModel):
|
137 |
+
core_theme: str = Field(..., min_length=5, description="The central theme provided by the user.")
|
138 |
+
timelines: List[Timeline] = Field(..., min_items=1, description="List of generated timelines.")
|
139 |
+
total_scenes_per_timeline: int = Field(..., gt=0, description="The requested number of scenes per timeline.")
|
140 |
+
|
141 |
+
@validator('timelines')
|
142 |
+
def check_timeline_segment_count(cls, timelines, values):
|
143 |
+
if 'total_scenes_per_timeline' in values:
|
144 |
+
expected_scenes = values['total_scenes_per_timeline']
|
145 |
+
for i, timeline in enumerate(timelines):
|
146 |
+
if len(timeline.segments) != expected_scenes:
|
147 |
+
raise ValueError(f"Timeline {i} (ID: {timeline.timeline_id}) has {len(timeline.segments)} segments, but expected {expected_scenes}.")
|
148 |
+
return timelines
|
149 |
|
150 |
# --- Helper Functions ---
|
151 |
|
152 |
@contextlib.contextmanager
|
153 |
+
def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLING_RATE, sample_width: int = 2):
|
154 |
+
"""Context manager to safely write WAV files."""
|
155 |
+
wf = None
|
156 |
+
try:
|
157 |
+
wf = wave.open(filename, "wb")
|
158 |
wf.setnchannels(channels)
|
159 |
+
wf.setsampwidth(sample_width) # 2 bytes for 16-bit audio
|
160 |
wf.setframerate(rate)
|
161 |
yield wf
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"Error opening/configuring wave file {filename}: {e}")
|
164 |
+
raise # Re-raise the exception
|
165 |
+
finally:
|
166 |
+
if wf:
|
167 |
+
wf.close()
|
168 |
+
# logger.debug(f"Closed wave file: {filename}")
|
169 |
+
|
170 |
+
|
171 |
+
async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
|
172 |
+
"""
|
173 |
+
Generates audio using Gemini Live API (async version) with improved error handling.
|
174 |
+
Returns the path to the generated audio file or None on failure.
|
175 |
+
"""
|
176 |
collected_audio = bytearray()
|
177 |
+
task_id = os.path.basename(output_filename).split('.')[0] # Extract T#_S# for logging
|
178 |
+
logger.info(f"ποΈ [{task_id}] Requesting audio for: '{api_text[:60]}...'")
|
179 |
|
180 |
try:
|
181 |
+
# Use the 'live_model' obtained from the 'client_live' instance.
|
|
|
|
|
182 |
config = {
|
183 |
+
"response_modalities": ["AUDIO"],
|
184 |
+
"audio_config": { # Optional: Specify voice, etc.
|
185 |
+
"audio_encoding": "LINEAR16", # Required format for WAV output
|
186 |
+
"sample_rate_hertz": AUDIO_SAMPLING_RATE,
|
187 |
+
# "voice": voice if voice else "aura-asteria-en" # Example voice - check availability
|
188 |
+
}
|
189 |
}
|
190 |
+
|
191 |
+
# Add a strong negative prompt to avoid conversational filler
|
192 |
+
# This is prepended here, but could also be part of the main Gemini prompt structure
|
193 |
+
directive_prompt = (
|
194 |
+
"Narrate the following sentence directly and engagingly. "
|
195 |
+
"Do not add any introductory or concluding remarks like 'Okay', 'Sure', or 'Here is the narration'. "
|
196 |
+
"Speak only the sentence itself:\n\n"
|
197 |
+
f'"{api_text}"'
|
198 |
+
)
|
199 |
+
|
200 |
+
# Use the live client's model to connect
|
201 |
async with live_model.connect(config=config) as session:
|
202 |
+
# Send the refined request
|
203 |
+
await session.send_request([directive_prompt])
|
204 |
+
|
205 |
async for response in session.stream_content():
|
206 |
+
if response.audio_chunk and response.audio_chunk.data:
|
207 |
collected_audio.extend(response.audio_chunk.data)
|
208 |
+
# Handle potential errors within the stream if the API provides them
|
209 |
+
if response.error:
|
210 |
+
logger.error(f" β [{task_id}] Error during audio stream: {response.error}")
|
211 |
+
st.error(f"Audio stream error for scene {task_id}: {response.error}", icon="π")
|
212 |
+
return None # Stop processing this audio request
|
213 |
|
214 |
if not collected_audio:
|
215 |
+
logger.warning(f"β οΈ [{task_id}] No audio data received for: '{api_text[:60]}...'")
|
216 |
+
st.warning(f"No audio data generated for scene {task_id}.", icon="π")
|
217 |
+
return None
|
218 |
+
|
219 |
+
# Write the collected audio bytes into a WAV file using the context manager.
|
220 |
+
with wave_file_writer(output_filename, rate=AUDIO_SAMPLING_RATE) as wf:
|
221 |
+
wf.writeframes(bytes(collected_audio))
|
222 |
+
logger.info(f" β
[{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
|
223 |
return output_filename
|
224 |
+
|
225 |
+
except genai.types.generation_types.BlockedPromptException as bpe:
|
226 |
+
logger.error(f" β [{task_id}] Audio generation blocked for prompt '{api_text[:60]}...': {bpe}")
|
227 |
+
st.error(f"Audio generation blocked for scene {task_id} due to safety settings.", icon="π")
|
228 |
+
return None
|
229 |
except Exception as e:
|
230 |
+
logger.exception(f" β [{task_id}] Audio generation failed unexpectedly for '{api_text[:60]}...': {e}")
|
231 |
+
st.error(f"Audio generation failed for scene {task_id}: {e}", icon="π")
|
232 |
return None
|
233 |
|
234 |
|
235 |
+
def generate_story_sequence_chrono(
|
236 |
+
theme: str,
|
237 |
+
num_scenes: int,
|
238 |
+
num_timelines: int,
|
239 |
+
divergence_prompt: str = ""
|
240 |
+
) -> Optional[ChronoWeaveResponse]:
|
241 |
+
"""
|
242 |
+
Generates branching story sequences using Gemini structured output and validates with Pydantic.
|
243 |
+
Returns a validated Pydantic object or None on failure.
|
244 |
+
"""
|
245 |
+
st.info(f"π Generating {num_timelines} timeline(s) x {num_scenes} scenes for theme: '{theme}'...")
|
246 |
+
logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
|
247 |
+
|
248 |
+
divergence_instruction = (
|
249 |
+
f"Introduce clear points of divergence between timelines, starting potentially after the first scene. "
|
250 |
+
f"If provided, use this hint for divergence: '{divergence_prompt}'. "
|
251 |
+
f"Clearly state the divergence reason for each timeline (except potentially the first)."
|
252 |
+
)
|
253 |
+
|
254 |
+
prompt = f"""
|
255 |
+
Act as an expert narrative designer specializing in short, visual, branching stories for children.
|
256 |
+
Create a story based on the core theme: "{theme}".
|
257 |
+
|
258 |
+
**Instructions:**
|
259 |
+
1. Generate exactly **{num_timelines}** distinct timelines.
|
260 |
+
2. Each timeline must contain exactly **{num_scenes}** sequential scenes.
|
261 |
+
3. **Crucially, DO NOT include any humans, people, or humanoid figures** in the descriptions or actions. Focus strictly on animals, fantasy creatures, animated objects, or natural elements.
|
262 |
+
4. {divergence_instruction}
|
263 |
+
5. Maintain a consistent visual style across all scenes and timelines: **'Simple, friendly kids animation style with bright colors and rounded shapes'**, unless a `timeline_visual_modifier` subtly alters it.
|
264 |
+
6. Each scene's narration (`audio_text`) should be a single, concise sentence (approx. 5-10 seconds spoken length, max 30 words).
|
265 |
+
7. Image prompts (`image_prompt`) should be descriptive (15-35 words), focusing on the non-human character(s), setting, action, and visual style. Explicitly mention the main character(s) for consistency.
|
266 |
+
8. `character_description` should briefly describe recurring non-human characters mentioned *in the specific scene's image prompt* (name, key visual features). Keep consistent within a timeline.
|
267 |
+
|
268 |
+
**Output Format:**
|
269 |
+
Respond ONLY with a valid JSON object adhering strictly to the provided schema. Do not include any text before or after the JSON object.
|
270 |
+
|
271 |
+
**JSON Schema:**
|
272 |
+
```json
|
273 |
+
{json.dumps(ChronoWeaveResponse.schema_json(indent=2))}
|
274 |
+
```
|
275 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
try:
|
278 |
response = client_standard.generate_content(
|
279 |
contents=prompt,
|
280 |
generation_config=genai.types.GenerationConfig(
|
281 |
+
response_mime_type="application/json",
|
282 |
+
temperature=0.7 # Add some creativity
|
283 |
)
|
|
|
|
|
|
|
|
|
|
|
284 |
)
|
285 |
|
286 |
+
# Debugging: Log raw response
|
287 |
+
# logger.debug(f"Raw Gemini Response Text:\n{response.text}")
|
288 |
+
|
289 |
+
# Attempt to parse the JSON
|
290 |
+
try:
|
291 |
+
raw_data = json.loads(response.text)
|
292 |
+
except json.JSONDecodeError as json_err:
|
293 |
+
logger.error(f"Failed to decode JSON response: {json_err}")
|
294 |
+
logger.error(f"Problematic Response Text:\n{response.text}")
|
295 |
+
st.error(f"π¨ Failed to parse the story structure from the AI. Error: {json_err}", icon="π")
|
296 |
+
st.text_area("Problematic AI Response:", response.text, height=200)
|
297 |
+
return None
|
298 |
|
299 |
+
# Validate the parsed data using Pydantic
|
300 |
+
try:
|
301 |
+
validated_data = ChronoWeaveResponse.parse_obj(raw_data)
|
302 |
+
logger.info("β
Story structure generated and validated successfully!")
|
303 |
+
st.success("β
Story structure generated and validated!")
|
304 |
+
return validated_data
|
305 |
+
except ValidationError as val_err:
|
306 |
+
logger.error(f"JSON structure validation failed: {val_err}")
|
307 |
+
logger.error(f"Received Data:\n{json.dumps(raw_data, indent=2)}")
|
308 |
+
st.error(f"π¨ The generated story structure is invalid: {val_err}", icon="π§¬")
|
309 |
+
st.json(raw_data) # Show the invalid structure
|
310 |
+
return None
|
311 |
|
312 |
+
except genai.types.generation_types.BlockedPromptException as bpe:
|
313 |
+
logger.error(f"Story generation prompt blocked: {bpe}")
|
314 |
+
st.error("π¨ The story generation prompt was blocked, likely due to safety filters. Try rephrasing the theme.", icon="π«")
|
315 |
+
return None
|
316 |
except Exception as e:
|
317 |
+
logger.exception("Error during story sequence generation:")
|
318 |
+
st.error(f"π¨ An unexpected error occurred during story generation: {e}", icon="π₯")
|
319 |
+
# Optional: Show the prompt that failed (be mindful of length/PII)
|
320 |
+
# st.text_area("Failed Prompt (excerpt):", prompt[:500]+"...", height=150)
|
321 |
return None
|
322 |
|
323 |
|
324 |
+
def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
|
325 |
+
"""
|
326 |
+
Generates an image using Imagen via the standard client with specific controls.
|
327 |
+
Returns a PIL Image object or None on failure.
|
328 |
+
"""
|
329 |
+
logger.info(f"πΌοΈ [{task_id}] Requesting image for: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
|
330 |
+
|
331 |
+
# Refined prompt incorporating negative constraints and style guidance
|
332 |
+
full_prompt = (
|
333 |
+
f"Generate an image in a child-friendly, simple animation style with bright colors and rounded shapes. "
|
334 |
+
f"Ensure absolutely NO humans or human-like figures are present. Focus on animals or objects. "
|
335 |
+
f"Prompt: {prompt}"
|
336 |
+
)
|
337 |
+
|
338 |
try:
|
339 |
+
# Use the standard client's generate_content method.
|
340 |
+
# How aspect ratio and negative prompts are passed can vary slightly with API versions.
|
341 |
+
# This uses the model's understanding of the text prompt.
|
342 |
+
# For more explicit control, future API versions might use parameters in GenerationConfig or Tools.
|
343 |
response = client_standard.generate_content(
|
344 |
+
full_prompt,
|
345 |
generation_config=genai.types.GenerationConfig(
|
346 |
+
candidate_count=1,
|
347 |
+
# The following are conceptual parameters for Imagen via the unified API.
|
348 |
+
# Check the latest google-generativeai library documentation for the exact syntax.
|
349 |
+
# stop_sequences=["human", "person"], # May not be directly supported this way
|
350 |
+
# custom_params={"aspect_ratio": aspect_ratio, "negative_prompt": "human, person, people, child, realistic, photo"}
|
351 |
+
# As of now, embedding these in the text prompt is the most reliable way.
|
352 |
),
|
353 |
+
# safety_settings={'HARM_CATEGORY_DANGEROUS_CONTENT': 'BLOCK_NONE'} # Adjust safety cautiously if needed
|
|
|
354 |
)
|
355 |
|
356 |
+
# Check for valid response and image data
|
357 |
+
if response.parts and response.parts[0].inline_data and response.parts[0].inline_data.data:
|
|
|
358 |
image_bytes = response.parts[0].inline_data.data
|
359 |
+
try:
|
360 |
+
image = Image.open(BytesIO(image_bytes))
|
361 |
+
logger.info(f" β
[{task_id}] Image generated successfully.")
|
362 |
+
return image
|
363 |
+
except Exception as img_err:
|
364 |
+
logger.error(f" β [{task_id}] Failed to decode generated image data: {img_err}")
|
365 |
+
st.warning(f"Failed to decode image data for scene {task_id}.", icon="πΌοΈ")
|
366 |
+
return None
|
367 |
else:
|
368 |
+
# Check for blocking or other issues
|
369 |
+
block_reason = getattr(response.prompt_feedback, 'block_reason', None)
|
370 |
+
safety_ratings = getattr(response.prompt_feedback, 'safety_ratings', [])
|
371 |
+
if block_reason:
|
372 |
+
logger.warning(f" β οΈ [{task_id}] Image generation blocked. Reason: {block_reason}. Prompt: '{prompt[:70]}...'")
|
373 |
+
st.warning(f"Image generation blocked for scene {task_id}. Reason: {block_reason}", icon="π«")
|
374 |
+
elif safety_ratings:
|
375 |
+
filtered_ratings = [f"{r.category}: {r.probability}" for r in safety_ratings if r.probability != 'NEGLIGIBLE']
|
376 |
+
if filtered_ratings:
|
377 |
+
logger.warning(f" β οΈ [{task_id}] Image generated but flagged by safety filters: {', '.join(filtered_ratings)}. Prompt: '{prompt[:70]}...'")
|
378 |
+
st.warning(f"Image generation for scene {task_id} flagged by safety filters: {', '.join(filtered_ratings)}", icon="β οΈ")
|
379 |
+
# Proceeding, but warning the user. Consider returning None if strict safety is needed.
|
380 |
+
# return None # Uncomment this line to block flagged images
|
381 |
+
# If we proceed, we need to extract the image data despite the warning
|
382 |
+
if response.parts and response.parts[0].inline_data and response.parts[0].inline_data.data:
|
383 |
+
image_bytes = response.parts[0].inline_data.data
|
384 |
+
try:
|
385 |
+
image = Image.open(BytesIO(image_bytes))
|
386 |
+
logger.info(f" β
[{task_id}] Image generated (with safety flags).")
|
387 |
+
return image
|
388 |
+
except Exception as img_err:
|
389 |
+
logger.error(f" β [{task_id}] Failed to decode flagged image data: {img_err}")
|
390 |
+
st.warning(f"Failed to decode flagged image data for scene {task_id}.", icon="πΌοΈ")
|
391 |
+
return None
|
392 |
+
else:
|
393 |
+
# Should not happen if flagged but still contains data, but handle defensively
|
394 |
+
logger.warning(f" β οΈ [{task_id}] Image flagged but no image data found. Prompt: '{prompt[:70]}...'")
|
395 |
+
st.warning(f"No image data received for scene {task_id} (safety flagged).", icon="πΌοΈ")
|
396 |
+
return None
|
397 |
else:
|
398 |
+
logger.warning(f" β οΈ [{task_id}] No image data received, unknown reason. Prompt: '{prompt[:70]}...'")
|
399 |
+
st.warning(f"No image data received for scene {task_id}, reason unclear.", icon="πΌοΈ")
|
400 |
+
# You might want to inspect the full 'response' object here for clues
|
401 |
+
# logger.debug(f"Full Imagen response object: {response}")
|
402 |
return None
|
403 |
|
404 |
+
except genai.types.generation_types.BlockedPromptException as bpe:
|
405 |
+
logger.error(f" β [{task_id}] Image generation blocked for prompt '{prompt[:70]}...': {bpe}")
|
406 |
+
st.error(f"Image generation blocked for scene {task_id} due to safety settings.", icon="π«")
|
407 |
+
return None
|
408 |
except Exception as e:
|
409 |
+
logger.exception(f" β [{task_id}] Image generation failed unexpectedly for '{prompt[:70]}...': {e}")
|
410 |
+
st.error(f"Image generation failed for scene {task_id}: {e}", icon="πΌοΈ")
|
411 |
return None
|
412 |
|
413 |
|
414 |
# --- Streamlit UI Elements ---
|
415 |
+
st.sidebar.header("βοΈ Configuration")
|
416 |
|
417 |
+
# API Key Status (already handled, just display status)
|
418 |
if GOOGLE_API_KEY:
|
419 |
+
st.sidebar.success("Google API Key Loaded", icon="β
")
|
420 |
else:
|
421 |
+
# This part should technically not be reached due to st.stop() earlier
|
422 |
st.sidebar.error("Google API Key Missing!", icon="π¨")
|
423 |
|
424 |
+
# Story Parameters
|
425 |
+
theme = st.sidebar.text_input("π Story Theme:", "A curious squirrel finds a mysterious, glowing acorn")
|
426 |
+
num_scenes = st.sidebar.slider("π¬ Scenes per Timeline:", min_value=2, max_value=7, value=3, help="Number of scenes (image+narration) in each timeline.")
|
427 |
+
num_timelines = st.sidebar.slider("πΏ Number of Timelines:", min_value=1, max_value=4, value=2, help="Number of parallel storylines to generate.")
|
428 |
+
divergence_prompt = st.sidebar.text_input("βοΈ Divergence Hint (Optional):", placeholder="e.g., What if a bird tried to steal it?", help="A suggestion for how the timelines might differ.")
|
429 |
+
|
430 |
+
# Generation Settings
|
431 |
+
st.sidebar.subheader("π¨ Visual & Audio Settings")
|
432 |
+
aspect_ratio = st.sidebar.selectbox("πΌοΈ Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0, help="Aspect ratio for generated images.")
|
433 |
+
# Add audio voice selection if API supports it and voices are known
|
434 |
+
# available_voices = ["aura-asteria-en", "aura-luna-en", "aura-stella-en"] # Example
|
435 |
+
# audio_voice = st.sidebar.selectbox("π£οΈ Narration Voice:", available_voices, index=0)
|
436 |
+
audio_voice = None # Placeholder if voice selection isn't implemented/stable
|
437 |
|
438 |
+
generate_button = st.sidebar.button("β¨ Generate ChronoWeave β¨", type="primary", disabled=(not GOOGLE_API_KEY), use_container_width=True)
|
439 |
|
440 |
st.sidebar.markdown("---")
|
441 |
+
st.sidebar.info("β³ Generation can take several minutes, especially with more scenes or timelines.", icon="β³")
|
442 |
+
st.sidebar.markdown(f"<small>Models: Text={TEXT_MODEL_ID}, Image={IMAGE_MODEL_ID}, Audio={AUDIO_MODEL_ID} ({AUDIO_API_VERSION})</small>", unsafe_allow_html=True)
|
443 |
+
|
444 |
|
445 |
# --- Main Logic ---
|
446 |
if generate_button:
|
447 |
if not theme:
|
448 |
+
st.error("Please enter a story theme in the sidebar.", icon="π")
|
449 |
else:
|
450 |
# Create a unique temporary directory for this run
|
451 |
+
run_id = str(uuid.uuid4()).split('-')[0] # Short unique ID
|
452 |
+
temp_dir = os.path.join(TEMP_DIR_BASE, f"run_{run_id}")
|
453 |
+
try:
|
454 |
+
os.makedirs(temp_dir, exist_ok=True)
|
455 |
+
logger.info(f"Created temporary directory: {temp_dir}")
|
456 |
+
except OSError as e:
|
457 |
+
st.error(f"π¨ Failed to create temporary directory {temp_dir}: {e}", icon="π")
|
458 |
+
st.stop()
|
459 |
+
|
460 |
+
final_video_paths = {} # Stores {timeline_id: video_path}
|
461 |
+
generation_errors = {} # Stores {timeline_id: [error_messages]}
|
462 |
+
|
463 |
+
# --- 1. Generate Narrative Structure ---
|
464 |
+
chrono_response: Optional[ChronoWeaveResponse] = None
|
465 |
+
with st.spinner("Generating narrative structure... π€"):
|
466 |
+
chrono_response = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
|
467 |
+
|
468 |
+
if chrono_response:
|
469 |
+
st.success(f"Narrative structure received for {len(chrono_response.timelines)} timelines.")
|
470 |
+
logger.info(f"Successfully generated structure for {len(chrono_response.timelines)} timelines.")
|
471 |
+
|
472 |
+
# --- 2. Process Each Timeline ---
|
473 |
+
overall_start_time = time.time()
|
474 |
+
all_timelines_successful = True # Assume success initially
|
475 |
+
|
476 |
+
# Use st.status for collapsible progress updates
|
477 |
with st.status("Generating assets and composing videos...", expanded=True) as status:
|
478 |
|
479 |
+
for timeline_index, timeline in enumerate(chrono_response.timelines):
|
480 |
+
timeline_id = timeline.timeline_id
|
481 |
+
divergence = timeline.divergence_reason
|
482 |
+
segments = timeline.segments
|
483 |
+
timeline_label = f"Timeline {timeline_id}" # Consistent label
|
484 |
+
st.subheader(f"Processing {timeline_label}: {divergence}")
|
485 |
+
logger.info(f"--- Processing {timeline_label} (Index: {timeline_index}) ---")
|
486 |
+
generation_errors[timeline_id] = [] # Initialize error list for this timeline
|
487 |
+
|
488 |
+
# Store paths for this timeline's assets
|
489 |
+
temp_image_files = {} # {scene_id: path}
|
490 |
+
temp_audio_files = {} # {scene_id: path}
|
491 |
+
video_clips = [] # List of moviepy clips for concatenation
|
492 |
+
timeline_start_time = time.time()
|
493 |
+
scene_success_count = 0
|
494 |
+
|
495 |
+
|
496 |
+
for scene_index, segment in enumerate(segments):
|
497 |
+
scene_id = segment.scene_id
|
498 |
+
task_id = f"T{timeline_id}_S{scene_id}" # Unique ID for logging/filenames
|
499 |
+
status_message = f"Processing {timeline_label}, Scene {scene_id + 1}/{len(segments)}..."
|
500 |
+
status.update(label=status_message)
|
501 |
+
st.markdown(f"--- **Scene {scene_id + 1} ({task_id})** ---")
|
502 |
+
logger.info(status_message)
|
503 |
+
|
504 |
+
scene_has_error = False
|
505 |
+
|
506 |
+
# Log scene details
|
507 |
+
st.write(f" *Image Prompt:* {segment.image_prompt}" + (f" *(Modifier: {segment.timeline_visual_modifier})*" if segment.timeline_visual_modifier else ""))
|
508 |
+
st.write(f" *Audio Text:* {segment.audio_text}")
|
509 |
+
# st.write(f"* Character Desc: {segment.character_description}") # Optional verbosity
|
510 |
+
|
511 |
+
# --- 2a. Image Generation ---
|
512 |
+
with st.spinner(f"[{task_id}] Generating image... π¨"):
|
513 |
+
combined_prompt = f"{segment.image_prompt}. {segment.character_description}"
|
514 |
+
if segment.timeline_visual_modifier:
|
515 |
+
combined_prompt += f" Visual style hint: {segment.timeline_visual_modifier}."
|
516 |
+
|
517 |
+
generated_image: Optional[Image.Image] = generate_image_imagen(combined_prompt, aspect_ratio, task_id)
|
518 |
|
519 |
if generated_image:
|
520 |
+
image_path = os.path.join(temp_dir, f"{task_id}_image.png")
|
521 |
+
try:
|
522 |
+
generated_image.save(image_path)
|
523 |
+
temp_image_files[scene_id] = image_path
|
524 |
+
st.image(generated_image, width=180, caption=f"Scene {scene_id+1} Image")
|
525 |
+
except Exception as e:
|
526 |
+
logger.error(f" β [{task_id}] Failed to save image {image_path}: {e}")
|
527 |
+
st.error(f"Failed to save image for scene {task_id}.", icon="πΎ")
|
528 |
+
scene_has_error = True
|
529 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image save failed.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
530 |
else:
|
531 |
+
st.warning(f"Image generation failed for scene {task_id}. Skipping scene.", icon="πΌοΈ")
|
532 |
+
scene_has_error = True
|
533 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image generation failed.")
|
534 |
+
# No image, so skip audio and video for this scene
|
535 |
+
continue
|
536 |
+
|
537 |
+
# --- 2b. Audio Generation ---
|
538 |
+
generated_audio_path: Optional[str] = None
|
539 |
+
if not scene_has_error: # Only generate audio if image succeeded
|
540 |
+
with st.spinner(f"[{task_id}] Generating audio... π"):
|
541 |
+
audio_path_temp = os.path.join(temp_dir, f"{task_id}_audio.wav")
|
542 |
+
try:
|
543 |
+
# Run the async function using asyncio.run() which works with nest_asyncio
|
544 |
+
generated_audio_path = asyncio.run(
|
545 |
+
generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice)
|
546 |
+
)
|
547 |
+
except RuntimeError as e:
|
548 |
+
# Catch potential issues if asyncio loop is misconfigured despite nest_asyncio
|
549 |
+
logger.error(f" β [{task_id}] Asyncio runtime error during audio gen: {e}")
|
550 |
+
st.error(f"Asyncio error during audio generation for {task_id}: {e}", icon="β‘")
|
551 |
+
scene_has_error = True
|
552 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio async error.")
|
553 |
+
except Exception as e:
|
554 |
+
logger.exception(f" β [{task_id}] Unexpected error during audio generation call for {task_id}: {e}")
|
555 |
+
st.error(f"Unexpected error in audio generation for {task_id}: {e}", icon="π₯")
|
556 |
+
scene_has_error = True
|
557 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation error.")
|
558 |
+
|
559 |
+
|
560 |
+
if generated_audio_path:
|
561 |
+
temp_audio_files[scene_id] = generated_audio_path
|
562 |
+
# Optional: Preview audio
|
563 |
+
try:
|
564 |
+
with open(generated_audio_path, 'rb') as ap:
|
565 |
+
st.audio(ap.read(), format='audio/wav')
|
566 |
+
except Exception as e:
|
567 |
+
logger.warning(f" β οΈ [{task_id}] Could not display audio preview: {e}")
|
568 |
+
else:
|
569 |
+
st.warning(f"Audio generation failed for {task_id}. Skipping video clip.", icon="π")
|
570 |
+
scene_has_error = True
|
571 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation failed.")
|
572 |
+
# Clean up the image for this failed segment if audio fails
|
573 |
+
if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]):
|
574 |
+
try:
|
575 |
+
os.remove(temp_image_files[scene_id])
|
576 |
+
logger.info(f" ποΈ [{task_id}] Removed image file due to audio failure.")
|
577 |
+
del temp_image_files[scene_id]
|
578 |
+
except OSError as e:
|
579 |
+
logger.warning(f" β οΈ [{task_id}] Could not remove image file {temp_image_files[scene_id]} after audio failure: {e}")
|
580 |
+
continue # Skip video clip creation
|
581 |
+
|
582 |
+
# --- 2c. Create Video Clip ---
|
583 |
+
if not scene_has_error and scene_id in temp_image_files and scene_id in temp_audio_files:
|
584 |
+
st.write(f" π¬ Creating video clip for Scene {scene_id+1}...")
|
585 |
+
try:
|
586 |
+
# Ensure files exist before creating clips
|
587 |
+
img_path = temp_image_files[scene_id]
|
588 |
+
aud_path = temp_audio_files[scene_id]
|
589 |
+
if not os.path.exists(img_path): raise FileNotFoundError(f"Image file not found: {img_path}")
|
590 |
+
if not os.path.exists(aud_path): raise FileNotFoundError(f"Audio file not found: {aud_path}")
|
591 |
+
|
592 |
+
audio_clip = AudioFileClip(aud_path)
|
593 |
+
# Use numpy array for ImageClip to avoid potential PIL issues with moviepy versions
|
594 |
+
np_image = np.array(Image.open(img_path))
|
595 |
+
image_clip = ImageClip(np_image).set_duration(audio_clip.duration)
|
596 |
+
|
597 |
+
# Composite the clip
|
598 |
+
composite_clip = image_clip.set_audio(audio_clip)
|
599 |
+
video_clips.append(composite_clip)
|
600 |
+
logger.info(f" β
[{task_id}] Video clip created (Duration: {audio_clip.duration:.2f}s).")
|
601 |
+
st.write(f" β
Clip created (Duration: {audio_clip.duration:.2f}s).")
|
602 |
+
scene_success_count += 1
|
603 |
+
|
604 |
+
except Exception as e:
|
605 |
+
logger.exception(f" β [{task_id}] Failed to create video clip for scene {scene_id+1}: {e}")
|
606 |
+
st.error(f"Failed to create video clip for {task_id}: {e}", icon="π¬")
|
607 |
+
scene_has_error = True
|
608 |
+
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Video clip creation failed.")
|
609 |
+
# Attempt cleanup of related files if clip creation fails
|
610 |
+
if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]): os.remove(temp_image_files[scene_id])
|
611 |
+
if scene_id in temp_audio_files and os.path.exists(temp_audio_files[scene_id]): os.remove(temp_audio_files[scene_id])
|
612 |
+
|
613 |
+
# --- End of Scene Loop ---
|
614 |
+
|
615 |
+
# --- 2d. Assemble Timeline Video ---
|
616 |
+
timeline_duration = time.time() - timeline_start_time
|
617 |
+
if video_clips and scene_success_count == len(segments): # Only assemble if all scenes were successful
|
618 |
+
status.update(label=f"Composing final video for {timeline_label}...")
|
619 |
+
st.write(f"ποΈ Assembling final video for {timeline_label}...")
|
620 |
+
logger.info(f"ποΈ Assembling final video for {timeline_label} ({len(video_clips)} clips)...")
|
621 |
+
output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final.mp4")
|
622 |
+
final_timeline_video = None # Define before try block
|
623 |
try:
|
624 |
final_timeline_video = concatenate_videoclips(video_clips, method="compose")
|
625 |
+
# Write video file with specified codecs and fps
|
626 |
+
final_timeline_video.write_videofile(
|
627 |
+
output_filename,
|
628 |
+
fps=VIDEO_FPS,
|
629 |
+
codec=VIDEO_CODEC,
|
630 |
+
audio_codec=AUDIO_CODEC,
|
631 |
+
logger=None # Suppress moviepy console spam
|
632 |
+
# threads=4 # Optional: specify threads
|
633 |
+
)
|
634 |
final_video_paths[timeline_id] = output_filename
|
635 |
+
logger.info(f" β
[{timeline_label}] Final video saved: {os.path.basename(output_filename)}")
|
636 |
+
st.success(f"β
Video for {timeline_label} completed in {timeline_duration:.2f}s.")
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
|
638 |
except Exception as e:
|
639 |
+
logger.exception(f" β [{timeline_label}] Failed to write final video: {e}")
|
640 |
+
st.error(f"Failed to assemble video for {timeline_label}: {e}", icon="πΌ")
|
641 |
all_timelines_successful = False
|
642 |
+
generation_errors[timeline_id].append(f"Timeline {timeline_id}: Final video assembly failed.")
|
643 |
+
finally:
|
644 |
+
# Crucially, close all clips to release file handles
|
645 |
+
logger.debug(f"[{timeline_label}] Closing {len(video_clips)} video clips...")
|
646 |
+
for clip in video_clips:
|
647 |
+
try:
|
648 |
+
clip.close()
|
649 |
+
if clip.audio:
|
650 |
+
clip.audio.close()
|
651 |
+
except Exception as e_close:
|
652 |
+
logger.warning(f" β οΈ [{timeline_label}] Error closing clip: {e_close}")
|
653 |
+
if final_timeline_video:
|
654 |
+
try:
|
655 |
+
final_timeline_video.close()
|
656 |
+
except Exception as e_close_final:
|
657 |
+
logger.warning(f" β οΈ [{timeline_label}] Error closing final video object: {e_close_final}")
|
658 |
+
logger.debug(f"[{timeline_label}] Clips closed.")
|
659 |
|
660 |
+
elif not video_clips:
|
661 |
+
logger.warning(f"[{timeline_label}] No video clips successfully generated. Skipping final assembly.")
|
662 |
+
st.warning(f"No scenes were successfully processed for {timeline_label}. Video cannot be created.", icon="π«")
|
663 |
+
all_timelines_successful = False
|
664 |
+
else: # Some scenes failed
|
665 |
+
logger.warning(f"[{timeline_label}] Encountered errors in {len(generation_errors[timeline_id])} scene(s). Skipping final video assembly.")
|
666 |
+
st.warning(f"{timeline_label} had errors in {len(generation_errors[timeline_id])} scene(s). Final video not assembled.", icon="β οΈ")
|
667 |
+
all_timelines_successful = False
|
668 |
+
|
669 |
+
# Log errors for the timeline if any occurred
|
670 |
+
if generation_errors[timeline_id]:
|
671 |
+
logger.error(f"Errors occurred in {timeline_label}: {generation_errors[timeline_id]}")
|
672 |
+
|
673 |
+
# Intermediate cleanup (optional, can free up disk space during long runs)
|
674 |
+
# logger.debug(f"[{timeline_label}] Cleaning up intermediate files...")
|
675 |
+
# for scene_id, fpath in temp_image_files.items():
|
676 |
+
# if os.path.exists(fpath): os.remove(fpath)
|
677 |
+
# for scene_id, fpath in temp_audio_files.items():
|
678 |
+
# if os.path.exists(fpath): os.remove(fpath)
|
679 |
+
|
680 |
+
# --- End of Timelines Loop ---
|
681 |
|
682 |
# Final status update
|
683 |
+
overall_duration = time.time() - overall_start_time
|
684 |
if all_timelines_successful and final_video_paths:
|
685 |
+
status_msg = f"ChronoWeave Generation Complete! ({len(final_video_paths)} videos in {overall_duration:.2f}s)"
|
686 |
+
status.update(label=status_msg, state="complete", expanded=False)
|
687 |
+
logger.info(status_msg)
|
688 |
elif final_video_paths:
|
689 |
+
status_msg = f"ChronoWeave Partially Complete ({len(final_video_paths)} videos, some errors occurred). Total time: {overall_duration:.2f}s"
|
690 |
+
status.update(label=status_msg, state="warning", expanded=True) # Keep expanded if errors
|
691 |
+
logger.warning(status_msg)
|
692 |
else:
|
693 |
+
status_msg = f"ChronoWeave Generation Failed. No videos produced. Total time: {overall_duration:.2f}s"
|
694 |
+
status.update(label=status_msg, state="error", expanded=True)
|
695 |
+
logger.error(status_msg)
|
696 |
|
697 |
+
# --- 3. Display Results ---
|
698 |
+
st.header("π¬ Generated Timelines")
|
699 |
if final_video_paths:
|
700 |
sorted_timeline_ids = sorted(final_video_paths.keys())
|
701 |
+
cols = st.columns(len(sorted_timeline_ids)) # Create columns for side-by-side display
|
702 |
+
|
703 |
+
for idx, timeline_id in enumerate(sorted_timeline_ids):
|
704 |
video_path = final_video_paths[timeline_id]
|
705 |
+
# Find matching timeline data for context
|
706 |
+
timeline_data = next((t for t in chrono_response.timelines if t.timeline_id == timeline_id), None)
|
707 |
+
reason = timeline_data.divergence_reason if timeline_data else "Unknown Divergence"
|
708 |
+
col = cols[idx]
|
709 |
+
with col:
|
710 |
+
st.subheader(f"Timeline {timeline_id}")
|
711 |
+
st.caption(f"Divergence: {reason}")
|
712 |
+
try:
|
713 |
+
# Read video bytes for display
|
714 |
+
with open(video_path, 'rb') as video_file:
|
715 |
+
video_bytes = video_file.read()
|
716 |
+
st.video(video_bytes)
|
717 |
+
logger.info(f"Displaying video for Timeline {timeline_id}")
|
718 |
+
# Add download button
|
719 |
+
st.download_button(
|
720 |
+
label=f"Download T{timeline_id} Video",
|
721 |
+
data=video_bytes,
|
722 |
+
file_name=f"chronoweave_timeline_{timeline_id}.mp4",
|
723 |
+
mime="video/mp4"
|
724 |
+
)
|
725 |
+
# Display errors for this timeline if any occurred
|
726 |
+
if generation_errors.get(timeline_id):
|
727 |
+
with st.expander(f"β οΈ View {len(generation_errors[timeline_id])} Generation Issues"):
|
728 |
+
for error_msg in generation_errors[timeline_id]:
|
729 |
+
st.warning(f"- {error_msg}")
|
730 |
+
|
731 |
+
except FileNotFoundError:
|
732 |
+
logger.error(f"Could not find video file for display: {video_path}")
|
733 |
+
st.error(f"Error: Video file not found for Timeline {timeline_id}.", icon="π¨")
|
734 |
+
except Exception as e:
|
735 |
+
logger.exception(f"Could not display video {video_path}: {e}")
|
736 |
+
st.error(f"Error displaying video for Timeline {timeline_id}: {e}", icon="π¨")
|
737 |
else:
|
738 |
+
st.warning("No final videos were successfully generated in this run.")
|
739 |
+
# Display global errors if no videos were made
|
740 |
+
all_errors = [msg for err_list in generation_errors.values() for msg in err_list]
|
741 |
+
if all_errors:
|
742 |
+
st.subheader("Summary of Generation Issues")
|
743 |
+
for error_msg in all_errors:
|
744 |
+
st.error(f"- {error_msg}")
|
745 |
+
|
746 |
+
# --- 4. Cleanup ---
|
747 |
+
st.info(f"Cleaning up temporary directory: {temp_dir}")
|
748 |
try:
|
749 |
shutil.rmtree(temp_dir)
|
750 |
+
logger.info(f"β
Temporary directory removed: {temp_dir}")
|
751 |
+
st.success("β
Temporary files cleaned up.")
|
752 |
except Exception as e:
|
753 |
+
logger.error(f"β οΈ Could not remove temporary directory {temp_dir}: {e}")
|
754 |
+
st.warning(f"Could not automatically remove temporary files: {temp_dir}. Please remove it manually if needed.", icon="β οΈ")
|
755 |
|
756 |
+
elif not chrono_response:
|
757 |
+
# Error message already shown by generate_story_sequence_chrono
|
758 |
+
logger.error("Story generation failed, cannot proceed.")
|
759 |
else:
|
760 |
+
# This case implies chrono_response exists but somehow failed validation logic (should be caught earlier)
|
761 |
+
st.error("An unexpected issue occurred after story generation. Cannot proceed.", icon="π")
|
762 |
+
logger.error("Chrono_response existed but was falsy in the main logic block.")
|
763 |
|
764 |
else:
|
765 |
+
st.info("Configure settings in the sidebar and click 'β¨ Generate ChronoWeave β¨' to start.")
|