mgbam commited on
Commit
3c37f6f
Β·
verified Β·
1 Parent(s): 8559672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +620 -348
app.py CHANGED
@@ -14,14 +14,19 @@ import time
14
  import wave
15
  import contextlib
16
  import asyncio
17
- import uuid # For unique filenames
18
- import shutil # For cleaning up temp dirs
 
19
 
20
  # Image handling
21
  from PIL import Image
 
 
 
22
 
23
  # Video and audio processing
24
- from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips
 
25
 
26
  # Type hints
27
  import typing_extensions as typing
@@ -30,464 +35,731 @@ import typing_extensions as typing
30
  import nest_asyncio
31
  nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter
32
 
 
 
 
 
33
  # --- Configuration ---
34
- st.set_page_config(page_title="ChronoWeave", layout="wide")
35
- st.title("πŸŒ€ ChronoWeave: Branching Narrative Generator")
36
  st.markdown("""
37
- Generate multiple, branching story timelines from a single theme using AI.
38
- Based on the work of Yousif Ahmed. Copyright 2025 Google LLC.
39
  """)
40
 
41
  # --- Constants ---
42
- MODEL = "models/gemini-1.5-flash" # Or other suitable text model supporting JSON
43
- # Using v1alpha for the Live API for audio output.
44
- AUDIO_MODEL_VERSION = 'v1alpha' # Must be alpha for audio modality
45
- IMAGE_MODEL_ID = "imagen-3" # Or your preferred Imagen model "imagen-3.0-generate-002"
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # --- API Key Handling ---
 
 
 
48
  try:
49
- # Preferred way to handle secrets in Streamlit sharing/HF Spaces
50
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
51
- os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
52
  except KeyError:
53
- st.error("🚨 Google API Key not found! Please add it as a Secret named 'GOOGLE_API_KEY' in your Hugging Face Space settings.", icon="🚨")
54
- st.stop() # Halt execution if no key
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- # --- Initialize Google Client ---
57
  try:
58
- # Initialize the client with the API key
59
  genai.configure(api_key=GOOGLE_API_KEY)
60
 
61
- # Create separate clients or configure one for different API versions if needed
62
- # Client for Text/Imagen (standard API)
63
- client_standard = genai.GenerativeModel(MODEL)
64
- # Client for Live Audio (v1alpha) - requires different client init
 
 
 
 
65
  client_live = genai.Client(
66
- client_options={'api_endpoint': f'{AUDIO_MODEL_VERSION}.generativelanguage.googleapis.com'}
67
  )
68
- # Note: As of recent updates, genai.configure might handle this better,
69
- # but separating clients or explicitly setting endpoints can be more robust.
70
- # Adjust based on the library version and observed behavior.
71
-
72
 
73
  except Exception as e:
74
- st.error(f"🚨 Failed to initialize Google AI Client: {e}", icon="🚨")
 
75
  st.stop()
76
 
77
 
78
- # --- Define Structured Output Schemas ---
79
- class StorySegment(typing.TypedDict):
80
- scene_id: int
81
- image_prompt: str
82
- audio_text: str
83
- character_description: str
84
- timeline_visual_modifier: typing.Optional[str]
85
-
86
- class Timeline(typing.TypedDict):
87
- timeline_id: int
88
- divergence_reason: str
89
- segments: list[StorySegment]
90
-
91
- class ChronoWeaveResponse(typing.TypedDict):
92
- core_theme: str
93
- timelines: list[Timeline]
94
- total_scenes_per_timeline: int
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # --- Helper Functions ---
97
 
98
  @contextlib.contextmanager
99
- def wave_file(filename, channels=1, rate=24000, sample_width=2):
100
- """Context manager to write WAV files."""
101
- with wave.open(filename, "wb") as wf:
 
 
102
  wf.setnchannels(channels)
103
- wf.setsampwidth(sample_width)
104
  wf.setframerate(rate)
105
  yield wf
106
-
107
- async def generate_audio_live_async(api_text, output_filename):
108
- """Generates audio using Gemini Live API (async version)."""
 
 
 
 
 
 
 
 
 
 
 
109
  collected_audio = bytearray()
110
- st.write(f"πŸŽ™οΈ Generating audio for: '{api_text[:50]}...'") # Log start
 
111
 
112
  try:
113
- # Use the 'client_live' specifically configured for v1alpha
114
- live_model = client_live.get_model(f"models/gemini-1.5-flash") # Specify model within the live client context
115
-
116
  config = {
117
- "response_modalities": ["AUDIO"]
 
 
 
 
 
118
  }
119
- # Connect to the Live API using the live client.
 
 
 
 
 
 
 
 
 
 
120
  async with live_model.connect(config=config) as session:
121
- await session.send_request([api_text]) # Simpler send for single prompt
 
 
122
  async for response in session.stream_content():
123
- if response.audio_chunk:
124
  collected_audio.extend(response.audio_chunk.data)
 
 
 
 
 
125
 
126
  if not collected_audio:
127
- st.warning(f"⚠️ No audio data received for: '{api_text[:50]}...'")
128
- return None # Indicate failure
129
-
130
- audio_bytes = bytes(collected_audio)
131
- # Write the collected audio bytes into a WAV file.
132
- with wave_file(output_filename) as wf:
133
- wf.writeframes(audio_bytes)
134
- st.write(f" βœ… Audio saved: {os.path.basename(output_filename)}")
135
  return output_filename
 
 
 
 
 
136
  except Exception as e:
137
- st.error(f" ❌ Audio generation failed for '{api_text[:50]}...': {e}", icon="🚨")
 
138
  return None
139
 
140
 
141
- def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> ChronoWeaveResponse | None:
142
- """Generates branching story sequences using Gemini structured output."""
143
- st.write(f"πŸ“š Generating {num_timelines} timeline(s) for theme: '{theme}'...")
144
- divergence_instruction = f"Introduce divergence between timelines. {divergence_prompt}" if divergence_prompt else "Introduce natural points of divergence between timelines after the first scene or two."
145
-
146
- prompt = f'''
147
- As an expert narrative designer, create a branching story based on the theme: "{theme}".
148
- Generate exactly {num_timelines} distinct timelines, each containing exactly {num_scenes} scenes.
149
- Each scene should be approximately 5-10 seconds long when narrated.
150
-
151
- {divergence_instruction} Clearly state the reason for divergence for each timeline after the first.
152
-
153
- For each scene in each timeline, provide:
154
- - scene_id: An integer starting from 0 for the scene number within its timeline.
155
- - image_prompt: A concise (15-25 words) description for an image generation model. Focus on visual details, characters (animals/objects only, NO PEOPLE), background, and action. Maintain a consistent 'kids animation style' (e.g., simple, rounded shapes, bright colors) across all scenes and timelines unless specified by a timeline_visual_modifier.
156
- - audio_text: A single, engaging sentence of narration or dialogue for the scene (max 25 words).
157
- - character_description: Brief description of recurring characters (names, key features) mentioned in *this specific scene's image prompt*. Keep consistent within a timeline. (Max 30 words).
158
- - timeline_visual_modifier: (Optional, string or null) A *brief* hint if this timeline should have a slightly different visual feel from this scene onwards (e.g., "slightly darker lighting", "more cluttered background", "character looks worried"). Keep it subtle. Use null if no specific modifier.
159
-
160
- Constraint: Ensure the output strictly adheres to the following JSON schema. Do not include preamble or explanations outside the JSON structure. Respond ONLY with the JSON object.
161
-
162
- JSON Schema:
163
- {{
164
- "type": "object",
165
- "properties": {{
166
- "core_theme": {{"type": "string"}},
167
- "timelines": {{
168
- "type": "array",
169
- "items": {{
170
- "type": "object",
171
- "properties": {{
172
- "timeline_id": {{"type": "integer"}},
173
- "divergence_reason": {{"type": "string"}},
174
- "segments": {{
175
- "type": "array",
176
- "items": {{
177
- "type": "object",
178
- "properties": {{
179
- "scene_id": {{"type": "integer"}},
180
- "image_prompt": {{"type": "string"}},
181
- "audio_text": {{"type": "string"}},
182
- "character_description": {{"type": "string"}},
183
- "timeline_visual_modifier": {{"type": ["string", "null"]}}
184
- }},
185
- "required": ["scene_id", "image_prompt", "audio_text", "character_description", "timeline_visual_modifier"]
186
- }}
187
- }}
188
- }},
189
- "required": ["timeline_id", "divergence_reason", "segments"]
190
- }}
191
- }},
192
- "total_scenes_per_timeline": {{"type": "integer"}}
193
- }},
194
- "required": ["core_theme", "timelines", "total_scenes_per_timeline"]
195
- }}
196
- '''
197
 
198
  try:
199
  response = client_standard.generate_content(
200
  contents=prompt,
201
  generation_config=genai.types.GenerationConfig(
202
- response_mime_type="application/json",
203
- # Optional: Add temperature, etc. if needed
204
  )
205
- # The schema can also be passed via generation_config in some versions/models
206
- # config={
207
- # 'response_mime_type': 'application/json',
208
- # 'response_schema': ChronoWeaveResponse # Pass the TypedDict directly
209
- # }
210
  )
211
 
212
- # Debugging: Print raw response text
213
- # st.text_area("Raw Gemini Response:", response.text, height=200)
 
 
 
 
 
 
 
 
 
 
214
 
215
- story_data = json.loads(response.text) # Assuming response.text contains the JSON string
216
- st.success("βœ… Story structure generated successfully!")
217
- # Basic validation (can be more thorough)
218
- if 'timelines' in story_data and isinstance(story_data['timelines'], list):
219
- # Further validation could check segment structure, etc.
220
- return story_data # Return the parsed dictionary
221
- else:
222
- st.error("🚨 Generated story data is missing the 'timelines' list.", icon="🚨")
223
- return None
 
 
 
224
 
225
- except json.JSONDecodeError as e:
226
- st.error(f"🚨 Failed to decode JSON response from Gemini: {e}", icon="🚨")
227
- st.text_area("Problematic Response Text:", response.text if 'response' in locals() else "No response object.", height=150)
228
- return None
229
  except Exception as e:
230
- st.error(f"🚨 Error generating story sequence: {e}", icon="🚨")
231
- # Log the prompt potentially? Be careful with sensitive data if applicable.
232
- # st.text_area("Failed Prompt:", prompt, height=200)
 
233
  return None
234
 
235
 
236
- def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1") -> Image.Image | None:
237
- """Generates an image using Imagen."""
238
- st.write(f"πŸ–ΌοΈ Generating image for: '{prompt[:60]}...'")
 
 
 
 
 
 
 
 
 
 
 
239
  try:
240
- # Use the standard client's dedicated image generation method
 
 
 
241
  response = client_standard.generate_content(
242
- f"Generate an image with the following prompt, ensuring a child-friendly animation style and NO human figures: {prompt}",
243
  generation_config=genai.types.GenerationConfig(
244
- candidate_count=1, # Generate one image
245
- # Imagen specific parameters are often passed differently or rely on model defaults
246
- # Check documentation for precise Imagen control via the unified API
 
 
 
247
  ),
248
- # If the model/API version requires specific image parameters:
249
- # tools=[genai.ImageParams(model=IMAGE_MODEL_ID, number_of_images=1, aspect_ratio=aspect_ratio, person_generation="DONT_ALLOW")]
250
  )
251
 
252
- # Accessing image data might vary slightly depending on API response structure
253
- # This assumes response.parts contains the image data if successful
254
- if response.parts and response.parts[0].inline_data:
255
  image_bytes = response.parts[0].inline_data.data
256
- image = Image.open(BytesIO(image_bytes))
257
- st.write(" βœ… Image generated.")
258
- return image
 
 
 
 
 
259
  else:
260
- # Check for safety blocks or other reasons for failure
261
- if response.prompt_feedback.block_reason:
262
- st.warning(f" ⚠️ Image generation blocked for prompt '{prompt[:60]}...'. Reason: {response.prompt_feedback.block_reason}", icon="⚠️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  else:
264
- st.warning(f" ⚠️ No image data received for prompt '{prompt[:60]}...'.", icon="⚠️")
265
- # Debugging: st.write(response)
 
 
266
  return None
267
 
 
 
 
 
268
  except Exception as e:
269
- st.error(f" ❌ Image generation failed for '{prompt[:60]}...': {e}", icon="🚨")
 
270
  return None
271
 
272
 
273
  # --- Streamlit UI Elements ---
274
- st.sidebar.header("Configuration")
275
 
276
- # API Key display/check (already handled above, but sidebar is a good place)
277
  if GOOGLE_API_KEY:
278
- st.sidebar.success("Google API Key Loaded!", icon="βœ…")
279
  else:
 
280
  st.sidebar.error("Google API Key Missing!", icon="🚨")
281
 
282
- theme = st.sidebar.text_input("Story Theme:", "A curious squirrel finds a shiny object")
283
- num_scenes = st.sidebar.slider("Scenes per Timeline:", min_value=2, max_value=7, value=3)
284
- num_timelines = st.sidebar.slider("Number of Timelines:", min_value=1, max_value=4, value=2)
285
- divergence_prompt = st.sidebar.text_input("Divergence Hint (Optional):", placeholder="e.g., What if it started raining?")
286
- aspect_ratio = st.sidebar.selectbox("Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0)
 
 
 
 
 
 
 
 
287
 
288
- generate_button = st.sidebar.button("✨ Generate ChronoWeave ✨", type="primary", disabled=(not GOOGLE_API_KEY))
289
 
290
  st.sidebar.markdown("---")
291
- st.sidebar.info("Note: Generation can take several minutes depending on settings.")
 
 
292
 
293
  # --- Main Logic ---
294
  if generate_button:
295
  if not theme:
296
- st.error("Please enter a story theme.", icon="πŸ‘ˆ")
297
  else:
298
  # Create a unique temporary directory for this run
299
- run_id = str(uuid.uuid4())
300
- temp_dir = os.path.join(".", f"chrono_temp_{run_id}") # Create in current dir
301
- os.makedirs(temp_dir, exist_ok=True)
302
- st.write(f"Working directory: {temp_dir}")
303
-
304
- final_video_paths = {} # To store {timeline_id: video_path}
305
-
306
- with st.spinner("Generating narrative structure..."):
307
- chrono_data = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
308
-
309
- if chrono_data and 'timelines' in chrono_data:
310
- st.success(f"Found {len(chrono_data['timelines'])} timelines. Processing each...")
311
-
312
- all_timelines_successful = True # Flag to track if all timelines worked
313
-
314
- # Use st.status for detailed progress
 
 
 
 
 
 
 
 
 
 
315
  with st.status("Generating assets and composing videos...", expanded=True) as status:
316
 
317
- for timeline in chrono_data['timelines']:
318
- timeline_id = timeline['timeline_id']
319
- divergence = timeline['divergence_reason']
320
- segments = timeline['segments']
321
- st.subheader(f"Timeline {timeline_id}: {divergence}")
322
-
323
- temp_image_files = []
324
- temp_audio_files = []
325
- video_clips = []
326
- timeline_successful = True # Flag for this specific timeline
327
-
328
- for i, segment in enumerate(segments):
329
- status.update(label=f"Processing Timeline {timeline_id}, Scene {i+1}/{num_scenes}...")
330
- scene_id = segment['scene_id']
331
- image_prompt = segment['image_prompt']
332
- audio_text = segment['audio_text']
333
- char_desc = segment['character_description']
334
- vis_mod = segment['timeline_visual_modifier']
335
-
336
- st.write(f"--- Scene {i+1} (T{timeline_id}) ---")
337
- st.write(f"* **Image Prompt:** {image_prompt}" + (f" (Modifier: {vis_mod})" if vis_mod else ""))
338
- st.write(f"* **Audio Text:** {audio_text}")
339
- # st.write(f"* Character Desc: {char_desc}") # Can be verbose
340
-
341
- # --- Image Generation ---
342
- combined_prompt = f"{image_prompt} {char_desc}"
343
- if vis_mod:
344
- combined_prompt += f" Style hint: {vis_mod}"
345
-
346
- generated_image = generate_image_imagen(combined_prompt, aspect_ratio)
 
 
 
 
 
 
 
 
 
347
 
348
  if generated_image:
349
- image_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_image.png")
350
- generated_image.save(image_path)
351
- temp_image_files.append(image_path)
352
- st.image(generated_image, width=200) # Show thumbnail
353
- else:
354
- st.warning(f"Skipping scene {i+1} in timeline {timeline_id} due to image generation failure.")
355
- timeline_successful = False
356
- continue # Skip to next segment if image fails
357
-
358
- # --- Audio Generation ---
359
- # Add negative prompt to prevent conversational filler
360
- audio_negative_prompt = "Narrate the following sentence directly, with expression, without any introduction or closing remarks like 'Okay' or 'Here is the narration'. Just read the sentence:"
361
- full_audio_prompt = f"{audio_negative_prompt}\n{audio_text}"
362
- audio_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_audio.wav")
363
-
364
- # Run the async audio generation function
365
- try:
366
- generated_audio_path = asyncio.run(generate_audio_live_async(full_audio_prompt, audio_path))
367
- except Exception as e:
368
- st.error(f"Asyncio error during audio gen: {e}")
369
- generated_audio_path = None
370
-
371
-
372
- if generated_audio_path:
373
- temp_audio_files.append(generated_audio_path)
374
- # st.audio(generated_audio_path) # Optional: Preview audio
375
  else:
376
- st.warning(f"Skipping video clip for scene {i+1} in timeline {timeline_id} due to audio generation failure.")
377
- # Clean up the image file for this failed scene segment
378
- if os.path.exists(image_path):
379
- os.remove(image_path)
380
- temp_image_files.remove(image_path)
381
- timeline_successful = False
382
- continue # Skip making video clip if audio fails
383
-
384
-
385
- # --- Create Video Clip ---
386
- try:
387
- st.write(" 🎬 Creating video clip...")
388
- audio_clip = AudioFileClip(generated_audio_path)
389
- # Ensure PIL Image is used if needed, or numpy array directly
390
- np_image = np.array(Image.open(image_path))
391
- # Create ImageClip, ensure duration matches audio
392
- image_clip = ImageClip(np_image).set_duration(audio_clip.duration)
393
-
394
- # Handle potential size mismatch if needed (resize image_clip or set size explicitly)
395
- # image_clip = image_clip.resize(width=...)
396
-
397
- composite_clip = image_clip.set_audio(audio_clip) # Simpler composition
398
- video_clips.append(composite_clip)
399
- st.write(" βœ… Clip created.")
400
- except Exception as e:
401
- st.error(f" ❌ Failed to create video clip for scene {i+1} (T{timeline_id}): {e}", icon="🚨")
402
- timeline_successful = False
403
- # Don't break the whole timeline, just skip this clip maybe? Or mark timeline as failed.
404
-
405
-
406
- # --- Assemble Timeline Video ---
407
- if video_clips and timeline_successful: # Only assemble if clips were made and no major errors
408
- status.update(label=f"Composing final video for Timeline {timeline_id}...")
409
- st.write(f"🎞️ Assembling final video for Timeline {timeline_id}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  try:
411
  final_timeline_video = concatenate_videoclips(video_clips, method="compose")
412
- output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final_video.mp4")
413
- # Use 'libx264' for broader compatibility, specify audio codec
414
- final_timeline_video.write_videofile(output_filename, fps=24, codec='libx264', audio_codec='aac')
 
 
 
 
 
 
415
  final_video_paths[timeline_id] = output_filename
416
- st.success(f" βœ… Video for Timeline {timeline_id} saved: {os.path.basename(output_filename)}")
417
-
418
- # Close clips to release resources
419
- for clip in video_clips:
420
- if hasattr(clip, 'close'): clip.close()
421
- if hasattr(clip, 'audio') and hasattr(clip.audio, 'close'): clip.audio.close()
422
- if hasattr(final_timeline_video, 'close'): final_timeline_video.close()
423
-
424
 
425
  except Exception as e:
426
- st.error(f" ❌ Failed to write final video for Timeline {timeline_id}: {e}", icon="🚨")
 
427
  all_timelines_successful = False
428
- elif not video_clips:
429
- st.warning(f"No video clips were successfully generated for Timeline {timeline_id}. Skipping final video assembly.")
430
- all_timelines_successful = False
431
- else:
432
- st.warning(f"Timeline {timeline_id} encountered errors. Skipping final video assembly.")
433
- all_timelines_successful = False
434
-
 
 
 
 
 
 
 
 
 
 
435
 
436
- # Intermediate cleanup for the timeline (optional, helps manage files)
437
- # for file in temp_audio_files:
438
- # if os.path.exists(file): os.remove(file)
439
- # for file in temp_image_files:
440
- # if os.path.exists(file): os.remove(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
  # Final status update
 
443
  if all_timelines_successful and final_video_paths:
444
- status.update(label="ChronoWeave Generation Complete!", state="complete", expanded=False)
 
 
445
  elif final_video_paths:
446
- status.update(label="ChronoWeave Generation Partially Complete (some errors occurred).", state="warning", expanded=False)
 
 
447
  else:
448
- status.update(label="ChronoWeave Generation Failed.", state="error", expanded=False)
449
-
 
450
 
451
- # --- Display Results ---
452
- st.header("Generated Timelines")
453
  if final_video_paths:
454
  sorted_timeline_ids = sorted(final_video_paths.keys())
455
- for timeline_id in sorted_timeline_ids:
 
 
456
  video_path = final_video_paths[timeline_id]
457
- # Find matching timeline divergence reason
458
- reason = "Unknown"
459
- for t in chrono_data.get('timelines', []):
460
- if t.get('timeline_id') == timeline_id:
461
- reason = t.get('divergence_reason', 'N/A')
462
- break
463
- st.subheader(f"Timeline {timeline_id}: {reason}")
464
- try:
465
- video_file = open(video_path, 'rb')
466
- video_bytes = video_file.read()
467
- st.video(video_bytes)
468
- video_file.close()
469
- except FileNotFoundError:
470
- st.error(f"Could not find video file: {video_path}", icon="🚨")
471
- except Exception as e:
472
- st.error(f"Could not display video {video_path}: {e}", icon="🚨")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  else:
474
- st.warning("No final videos were successfully generated.")
475
-
476
- # --- Cleanup ---
477
- st.write("Cleaning up temporary files...")
 
 
 
 
 
 
478
  try:
479
  shutil.rmtree(temp_dir)
480
- st.write(" βœ… Temporary files removed.")
 
481
  except Exception as e:
482
- st.warning(f" ⚠️ Could not remove temporary directory {temp_dir}: {e}", icon="⚠️")
483
-
484
 
485
- elif not chrono_data:
486
- st.error("Story generation failed. Cannot proceed.", icon="πŸ›‘")
 
487
  else:
488
- # This case might happen if chrono_data is returned but is malformed (e.g., no 'timelines' key)
489
- st.error("Story data seems malformed. Cannot proceed.", icon="πŸ›‘")
490
- # st.json(chrono_data) # Display the problematic data
491
 
492
  else:
493
- st.info("Configure settings in the sidebar and click 'Generate ChronoWeave'")
 
14
  import wave
15
  import contextlib
16
  import asyncio
17
+ import uuid # For unique identifiers
18
+ import shutil # For directory operations
19
+ import logging # For better logging
20
 
21
  # Image handling
22
  from PIL import Image
23
+ # Pydantic for data validation
24
+ from pydantic import BaseModel, Field, ValidationError, validator
25
+ from typing import List, Optional, Literal
26
 
27
  # Video and audio processing
28
+ from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
29
+ from moviepy.config import change_settings # Potential for setting imagemagick path if needed
30
 
31
  # Type hints
32
  import typing_extensions as typing
 
35
  import nest_asyncio
36
  nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter
37
 
38
+ # --- Logging Setup ---
39
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
40
+ logger = logging.getLogger(__name__)
41
+
42
  # --- Configuration ---
43
+ st.set_page_config(page_title="ChronoWeave", layout="wide", initial_sidebar_state="expanded")
44
+ st.title("πŸŒ€ ChronoWeave: Advanced Branching Narrative Generator")
45
  st.markdown("""
46
+ Generate multiple, branching story timelines from a single theme using AI, complete with images and narration.
47
+ *Based on the work of Yousif Ahmed. Copyright 2025 Google LLC.*
48
  """)
49
 
50
  # --- Constants ---
51
+ # Text/JSON Model
52
+ TEXT_MODEL_ID = "models/gemini-1.5-flash" # Or "gemini-1.5-pro" for potentially higher quality/cost
53
+ # Audio Model Config
54
+ AUDIO_API_VERSION = 'v1alpha' # Required for audio modality
55
+ AUDIO_MODEL_ID = f"models/gemini-1.5-flash" # Model used via the v1alpha endpoint
56
+ AUDIO_SAMPLING_RATE = 24000 # Standard for TTS models like Google's
57
+ # Image Model Config
58
+ IMAGE_MODEL_ID = "imagen-3" # Or specific version like "imagen-3.0-generate-002"
59
+ DEFAULT_ASPECT_RATIO = "1:1"
60
+ # Video Config
61
+ VIDEO_FPS = 24
62
+ VIDEO_CODEC = "libx264" # Widely compatible H.264
63
+ AUDIO_CODEC = "aac" # Common audio codec for MP4
64
+ # File Management
65
+ TEMP_DIR_BASE = ".chrono_temp" # Base name for temporary directories
66
 
67
  # --- API Key Handling ---
68
+ # This section correctly handles the missing secret. The error in the traceback means
69
+ # the secret wasn't set in the *deployment environment*.
70
+ GOOGLE_API_KEY = None
71
  try:
72
+ # Preferred way: Use Streamlit secrets when deployed
73
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
74
+ logger.info("Google API Key loaded from Streamlit secrets.")
75
  except KeyError:
76
+ # Fallback: Check environment variable (useful for local development)
77
+ GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
78
+ if GOOGLE_API_KEY:
79
+ logger.info("Google API Key loaded from environment variable.")
80
+ else:
81
+ # Error if neither is found
82
+ st.error(
83
+ "🚨 **Google API Key Not Found!**\n"
84
+ "Please configure your Google API Key:\n"
85
+ "1. **Streamlit Cloud/Hugging Face Spaces:** Add it as a Secret named `GOOGLE_API_KEY` in your app's settings.\n"
86
+ "2. **Local Development:** Set the `GOOGLE_API_KEY` environment variable.",
87
+ icon="🚨"
88
+ )
89
+ st.stop() # Halt execution
90
 
91
+ # --- Initialize Google Clients ---
92
  try:
 
93
  genai.configure(api_key=GOOGLE_API_KEY)
94
 
95
+ # Client for Text/Imagen Generation (using standard API endpoint)
96
+ client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
97
+ logger.info(f"Initialized standard GenerativeModel for {TEXT_MODEL_ID}.")
98
+
99
+ # Client specifically for the Live API (Audio) using the v1alpha endpoint
100
+ # Note: Ensure the 'audiomodality' package or relevant parts of google-cloud-aiplatform are available if needed,
101
+ # depending on the exact library version and API stability.
102
+ # As of late 2023/early 2024, using a separate client instance targeting the specific endpoint is often necessary.
103
  client_live = genai.Client(
104
+ client_options={'api_endpoint': f'{AUDIO_API_VERSION}.generativelanguage.googleapis.com'}
105
  )
106
+ live_model = client_live.get_model(AUDIO_MODEL_ID) # Get the model handle via the live client
107
+ logger.info(f"Initialized live client for audio generation ({AUDIO_MODEL_ID} via {AUDIO_API_VERSION}).")
 
 
108
 
109
  except Exception as e:
110
+ logger.exception("Failed to initialize Google AI Clients.")
111
+ st.error(f"🚨 Failed to initialize Google AI Clients: {e}", icon="🚨")
112
  st.stop()
113
 
114
 
115
+ # --- Define Pydantic Schemas for Robust Validation ---
116
+ class StorySegment(BaseModel):
117
+ scene_id: int = Field(..., ge=0, description="Scene number within the timeline, starting from 0.")
118
+ image_prompt: str = Field(..., min_length=10, max_length=150, description="Concise visual description for image generation (15-35 words). Focus on non-human characters, setting, action, style.")
119
+ audio_text: str = Field(..., min_length=5, max_length=150, description="Single sentence of narration/dialogue for the scene (max 30 words).")
120
+ character_description: str = Field(..., max_length=100, description="Brief description of key non-human characters/objects in *this* scene's prompt for consistency.")
121
+ timeline_visual_modifier: Optional[str] = Field(None, max_length=50, description="Optional subtle visual style hint (e.g., 'slightly darker', 'more vibrant colors').")
122
+
123
+ @validator('image_prompt')
124
+ def image_prompt_no_humans(cls, v):
125
+ if any(word in v.lower() for word in ["person", "people", "human", "man", "woman", "boy", "girl", "child"]):
126
+ # Instead of raising error, we'll try to guide the LLM better in the main prompt
127
+ # raise ValueError("Image prompt must not contain descriptions of humans.")
128
+ logger.warning(f"Image prompt '{v[:50]}...' may contain human descriptions. Relying on API-level controls.")
129
+ return v
130
+
131
+ class Timeline(BaseModel):
132
+ timeline_id: int = Field(..., ge=0, description="Unique identifier for this timeline.")
133
+ divergence_reason: str = Field(..., min_length=5, description="Clear reason why this timeline branched off.")
134
+ segments: List[StorySegment] = Field(..., min_items=1, description="List of scenes composing this timeline.")
135
+
136
+ class ChronoWeaveResponse(BaseModel):
137
+ core_theme: str = Field(..., min_length=5, description="The central theme provided by the user.")
138
+ timelines: List[Timeline] = Field(..., min_items=1, description="List of generated timelines.")
139
+ total_scenes_per_timeline: int = Field(..., gt=0, description="The requested number of scenes per timeline.")
140
+
141
+ @validator('timelines')
142
+ def check_timeline_segment_count(cls, timelines, values):
143
+ if 'total_scenes_per_timeline' in values:
144
+ expected_scenes = values['total_scenes_per_timeline']
145
+ for i, timeline in enumerate(timelines):
146
+ if len(timeline.segments) != expected_scenes:
147
+ raise ValueError(f"Timeline {i} (ID: {timeline.timeline_id}) has {len(timeline.segments)} segments, but expected {expected_scenes}.")
148
+ return timelines
149
 
150
  # --- Helper Functions ---
151
 
152
  @contextlib.contextmanager
153
+ def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLING_RATE, sample_width: int = 2):
154
+ """Context manager to safely write WAV files."""
155
+ wf = None
156
+ try:
157
+ wf = wave.open(filename, "wb")
158
  wf.setnchannels(channels)
159
+ wf.setsampwidth(sample_width) # 2 bytes for 16-bit audio
160
  wf.setframerate(rate)
161
  yield wf
162
+ except Exception as e:
163
+ logger.error(f"Error opening/configuring wave file {filename}: {e}")
164
+ raise # Re-raise the exception
165
+ finally:
166
+ if wf:
167
+ wf.close()
168
+ # logger.debug(f"Closed wave file: {filename}")
169
+
170
+
171
+ async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
172
+ """
173
+ Generates audio using Gemini Live API (async version) with improved error handling.
174
+ Returns the path to the generated audio file or None on failure.
175
+ """
176
  collected_audio = bytearray()
177
+ task_id = os.path.basename(output_filename).split('.')[0] # Extract T#_S# for logging
178
+ logger.info(f"πŸŽ™οΈ [{task_id}] Requesting audio for: '{api_text[:60]}...'")
179
 
180
  try:
181
+ # Use the 'live_model' obtained from the 'client_live' instance.
 
 
182
  config = {
183
+ "response_modalities": ["AUDIO"],
184
+ "audio_config": { # Optional: Specify voice, etc.
185
+ "audio_encoding": "LINEAR16", # Required format for WAV output
186
+ "sample_rate_hertz": AUDIO_SAMPLING_RATE,
187
+ # "voice": voice if voice else "aura-asteria-en" # Example voice - check availability
188
+ }
189
  }
190
+
191
+ # Add a strong negative prompt to avoid conversational filler
192
+ # This is prepended here, but could also be part of the main Gemini prompt structure
193
+ directive_prompt = (
194
+ "Narrate the following sentence directly and engagingly. "
195
+ "Do not add any introductory or concluding remarks like 'Okay', 'Sure', or 'Here is the narration'. "
196
+ "Speak only the sentence itself:\n\n"
197
+ f'"{api_text}"'
198
+ )
199
+
200
+ # Use the live client's model to connect
201
  async with live_model.connect(config=config) as session:
202
+ # Send the refined request
203
+ await session.send_request([directive_prompt])
204
+
205
  async for response in session.stream_content():
206
+ if response.audio_chunk and response.audio_chunk.data:
207
  collected_audio.extend(response.audio_chunk.data)
208
+ # Handle potential errors within the stream if the API provides them
209
+ if response.error:
210
+ logger.error(f" ❌ [{task_id}] Error during audio stream: {response.error}")
211
+ st.error(f"Audio stream error for scene {task_id}: {response.error}", icon="πŸ”Š")
212
+ return None # Stop processing this audio request
213
 
214
  if not collected_audio:
215
+ logger.warning(f"⚠️ [{task_id}] No audio data received for: '{api_text[:60]}...'")
216
+ st.warning(f"No audio data generated for scene {task_id}.", icon="πŸ”Š")
217
+ return None
218
+
219
+ # Write the collected audio bytes into a WAV file using the context manager.
220
+ with wave_file_writer(output_filename, rate=AUDIO_SAMPLING_RATE) as wf:
221
+ wf.writeframes(bytes(collected_audio))
222
+ logger.info(f" βœ… [{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
223
  return output_filename
224
+
225
+ except genai.types.generation_types.BlockedPromptException as bpe:
226
+ logger.error(f" ❌ [{task_id}] Audio generation blocked for prompt '{api_text[:60]}...': {bpe}")
227
+ st.error(f"Audio generation blocked for scene {task_id} due to safety settings.", icon="πŸ”‡")
228
+ return None
229
  except Exception as e:
230
+ logger.exception(f" ❌ [{task_id}] Audio generation failed unexpectedly for '{api_text[:60]}...': {e}")
231
+ st.error(f"Audio generation failed for scene {task_id}: {e}", icon="πŸ”Š")
232
  return None
233
 
234
 
235
+ def generate_story_sequence_chrono(
236
+ theme: str,
237
+ num_scenes: int,
238
+ num_timelines: int,
239
+ divergence_prompt: str = ""
240
+ ) -> Optional[ChronoWeaveResponse]:
241
+ """
242
+ Generates branching story sequences using Gemini structured output and validates with Pydantic.
243
+ Returns a validated Pydantic object or None on failure.
244
+ """
245
+ st.info(f"πŸ“š Generating {num_timelines} timeline(s) x {num_scenes} scenes for theme: '{theme}'...")
246
+ logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
247
+
248
+ divergence_instruction = (
249
+ f"Introduce clear points of divergence between timelines, starting potentially after the first scene. "
250
+ f"If provided, use this hint for divergence: '{divergence_prompt}'. "
251
+ f"Clearly state the divergence reason for each timeline (except potentially the first)."
252
+ )
253
+
254
+ prompt = f"""
255
+ Act as an expert narrative designer specializing in short, visual, branching stories for children.
256
+ Create a story based on the core theme: "{theme}".
257
+
258
+ **Instructions:**
259
+ 1. Generate exactly **{num_timelines}** distinct timelines.
260
+ 2. Each timeline must contain exactly **{num_scenes}** sequential scenes.
261
+ 3. **Crucially, DO NOT include any humans, people, or humanoid figures** in the descriptions or actions. Focus strictly on animals, fantasy creatures, animated objects, or natural elements.
262
+ 4. {divergence_instruction}
263
+ 5. Maintain a consistent visual style across all scenes and timelines: **'Simple, friendly kids animation style with bright colors and rounded shapes'**, unless a `timeline_visual_modifier` subtly alters it.
264
+ 6. Each scene's narration (`audio_text`) should be a single, concise sentence (approx. 5-10 seconds spoken length, max 30 words).
265
+ 7. Image prompts (`image_prompt`) should be descriptive (15-35 words), focusing on the non-human character(s), setting, action, and visual style. Explicitly mention the main character(s) for consistency.
266
+ 8. `character_description` should briefly describe recurring non-human characters mentioned *in the specific scene's image prompt* (name, key visual features). Keep consistent within a timeline.
267
+
268
+ **Output Format:**
269
+ Respond ONLY with a valid JSON object adhering strictly to the provided schema. Do not include any text before or after the JSON object.
270
+
271
+ **JSON Schema:**
272
+ ```json
273
+ {json.dumps(ChronoWeaveResponse.schema_json(indent=2))}
274
+ ```
275
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  try:
278
  response = client_standard.generate_content(
279
  contents=prompt,
280
  generation_config=genai.types.GenerationConfig(
281
+ response_mime_type="application/json",
282
+ temperature=0.7 # Add some creativity
283
  )
 
 
 
 
 
284
  )
285
 
286
+ # Debugging: Log raw response
287
+ # logger.debug(f"Raw Gemini Response Text:\n{response.text}")
288
+
289
+ # Attempt to parse the JSON
290
+ try:
291
+ raw_data = json.loads(response.text)
292
+ except json.JSONDecodeError as json_err:
293
+ logger.error(f"Failed to decode JSON response: {json_err}")
294
+ logger.error(f"Problematic Response Text:\n{response.text}")
295
+ st.error(f"🚨 Failed to parse the story structure from the AI. Error: {json_err}", icon="πŸ“„")
296
+ st.text_area("Problematic AI Response:", response.text, height=200)
297
+ return None
298
 
299
+ # Validate the parsed data using Pydantic
300
+ try:
301
+ validated_data = ChronoWeaveResponse.parse_obj(raw_data)
302
+ logger.info("βœ… Story structure generated and validated successfully!")
303
+ st.success("βœ… Story structure generated and validated!")
304
+ return validated_data
305
+ except ValidationError as val_err:
306
+ logger.error(f"JSON structure validation failed: {val_err}")
307
+ logger.error(f"Received Data:\n{json.dumps(raw_data, indent=2)}")
308
+ st.error(f"🚨 The generated story structure is invalid: {val_err}", icon="🧬")
309
+ st.json(raw_data) # Show the invalid structure
310
+ return None
311
 
312
+ except genai.types.generation_types.BlockedPromptException as bpe:
313
+ logger.error(f"Story generation prompt blocked: {bpe}")
314
+ st.error("🚨 The story generation prompt was blocked, likely due to safety filters. Try rephrasing the theme.", icon="🚫")
315
+ return None
316
  except Exception as e:
317
+ logger.exception("Error during story sequence generation:")
318
+ st.error(f"🚨 An unexpected error occurred during story generation: {e}", icon="πŸ’₯")
319
+ # Optional: Show the prompt that failed (be mindful of length/PII)
320
+ # st.text_area("Failed Prompt (excerpt):", prompt[:500]+"...", height=150)
321
  return None
322
 
323
 
324
+ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
325
+ """
326
+ Generates an image using Imagen via the standard client with specific controls.
327
+ Returns a PIL Image object or None on failure.
328
+ """
329
+ logger.info(f"πŸ–ΌοΈ [{task_id}] Requesting image for: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
330
+
331
+ # Refined prompt incorporating negative constraints and style guidance
332
+ full_prompt = (
333
+ f"Generate an image in a child-friendly, simple animation style with bright colors and rounded shapes. "
334
+ f"Ensure absolutely NO humans or human-like figures are present. Focus on animals or objects. "
335
+ f"Prompt: {prompt}"
336
+ )
337
+
338
  try:
339
+ # Use the standard client's generate_content method.
340
+ # How aspect ratio and negative prompts are passed can vary slightly with API versions.
341
+ # This uses the model's understanding of the text prompt.
342
+ # For more explicit control, future API versions might use parameters in GenerationConfig or Tools.
343
  response = client_standard.generate_content(
344
+ full_prompt,
345
  generation_config=genai.types.GenerationConfig(
346
+ candidate_count=1,
347
+ # The following are conceptual parameters for Imagen via the unified API.
348
+ # Check the latest google-generativeai library documentation for the exact syntax.
349
+ # stop_sequences=["human", "person"], # May not be directly supported this way
350
+ # custom_params={"aspect_ratio": aspect_ratio, "negative_prompt": "human, person, people, child, realistic, photo"}
351
+ # As of now, embedding these in the text prompt is the most reliable way.
352
  ),
353
+ # safety_settings={'HARM_CATEGORY_DANGEROUS_CONTENT': 'BLOCK_NONE'} # Adjust safety cautiously if needed
 
354
  )
355
 
356
+ # Check for valid response and image data
357
+ if response.parts and response.parts[0].inline_data and response.parts[0].inline_data.data:
 
358
  image_bytes = response.parts[0].inline_data.data
359
+ try:
360
+ image = Image.open(BytesIO(image_bytes))
361
+ logger.info(f" βœ… [{task_id}] Image generated successfully.")
362
+ return image
363
+ except Exception as img_err:
364
+ logger.error(f" ❌ [{task_id}] Failed to decode generated image data: {img_err}")
365
+ st.warning(f"Failed to decode image data for scene {task_id}.", icon="πŸ–ΌοΈ")
366
+ return None
367
  else:
368
+ # Check for blocking or other issues
369
+ block_reason = getattr(response.prompt_feedback, 'block_reason', None)
370
+ safety_ratings = getattr(response.prompt_feedback, 'safety_ratings', [])
371
+ if block_reason:
372
+ logger.warning(f" ⚠️ [{task_id}] Image generation blocked. Reason: {block_reason}. Prompt: '{prompt[:70]}...'")
373
+ st.warning(f"Image generation blocked for scene {task_id}. Reason: {block_reason}", icon="🚫")
374
+ elif safety_ratings:
375
+ filtered_ratings = [f"{r.category}: {r.probability}" for r in safety_ratings if r.probability != 'NEGLIGIBLE']
376
+ if filtered_ratings:
377
+ logger.warning(f" ⚠️ [{task_id}] Image generated but flagged by safety filters: {', '.join(filtered_ratings)}. Prompt: '{prompt[:70]}...'")
378
+ st.warning(f"Image generation for scene {task_id} flagged by safety filters: {', '.join(filtered_ratings)}", icon="⚠️")
379
+ # Proceeding, but warning the user. Consider returning None if strict safety is needed.
380
+ # return None # Uncomment this line to block flagged images
381
+ # If we proceed, we need to extract the image data despite the warning
382
+ if response.parts and response.parts[0].inline_data and response.parts[0].inline_data.data:
383
+ image_bytes = response.parts[0].inline_data.data
384
+ try:
385
+ image = Image.open(BytesIO(image_bytes))
386
+ logger.info(f" βœ… [{task_id}] Image generated (with safety flags).")
387
+ return image
388
+ except Exception as img_err:
389
+ logger.error(f" ❌ [{task_id}] Failed to decode flagged image data: {img_err}")
390
+ st.warning(f"Failed to decode flagged image data for scene {task_id}.", icon="πŸ–ΌοΈ")
391
+ return None
392
+ else:
393
+ # Should not happen if flagged but still contains data, but handle defensively
394
+ logger.warning(f" ⚠️ [{task_id}] Image flagged but no image data found. Prompt: '{prompt[:70]}...'")
395
+ st.warning(f"No image data received for scene {task_id} (safety flagged).", icon="πŸ–ΌοΈ")
396
+ return None
397
  else:
398
+ logger.warning(f" ⚠️ [{task_id}] No image data received, unknown reason. Prompt: '{prompt[:70]}...'")
399
+ st.warning(f"No image data received for scene {task_id}, reason unclear.", icon="πŸ–ΌοΈ")
400
+ # You might want to inspect the full 'response' object here for clues
401
+ # logger.debug(f"Full Imagen response object: {response}")
402
  return None
403
 
404
+ except genai.types.generation_types.BlockedPromptException as bpe:
405
+ logger.error(f" ❌ [{task_id}] Image generation blocked for prompt '{prompt[:70]}...': {bpe}")
406
+ st.error(f"Image generation blocked for scene {task_id} due to safety settings.", icon="🚫")
407
+ return None
408
  except Exception as e:
409
+ logger.exception(f" ❌ [{task_id}] Image generation failed unexpectedly for '{prompt[:70]}...': {e}")
410
+ st.error(f"Image generation failed for scene {task_id}: {e}", icon="πŸ–ΌοΈ")
411
  return None
412
 
413
 
414
  # --- Streamlit UI Elements ---
415
+ st.sidebar.header("βš™οΈ Configuration")
416
 
417
+ # API Key Status (already handled, just display status)
418
  if GOOGLE_API_KEY:
419
+ st.sidebar.success("Google API Key Loaded", icon="βœ…")
420
  else:
421
+ # This part should technically not be reached due to st.stop() earlier
422
  st.sidebar.error("Google API Key Missing!", icon="🚨")
423
 
424
+ # Story Parameters
425
+ theme = st.sidebar.text_input("πŸ“– Story Theme:", "A curious squirrel finds a mysterious, glowing acorn")
426
+ num_scenes = st.sidebar.slider("🎬 Scenes per Timeline:", min_value=2, max_value=7, value=3, help="Number of scenes (image+narration) in each timeline.")
427
+ num_timelines = st.sidebar.slider("🌿 Number of Timelines:", min_value=1, max_value=4, value=2, help="Number of parallel storylines to generate.")
428
+ divergence_prompt = st.sidebar.text_input("↔️ Divergence Hint (Optional):", placeholder="e.g., What if a bird tried to steal it?", help="A suggestion for how the timelines might differ.")
429
+
430
+ # Generation Settings
431
+ st.sidebar.subheader("🎨 Visual & Audio Settings")
432
+ aspect_ratio = st.sidebar.selectbox("πŸ–ΌοΈ Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0, help="Aspect ratio for generated images.")
433
+ # Add audio voice selection if API supports it and voices are known
434
+ # available_voices = ["aura-asteria-en", "aura-luna-en", "aura-stella-en"] # Example
435
+ # audio_voice = st.sidebar.selectbox("πŸ—£οΈ Narration Voice:", available_voices, index=0)
436
+ audio_voice = None # Placeholder if voice selection isn't implemented/stable
437
 
438
+ generate_button = st.sidebar.button("✨ Generate ChronoWeave ✨", type="primary", disabled=(not GOOGLE_API_KEY), use_container_width=True)
439
 
440
  st.sidebar.markdown("---")
441
+ st.sidebar.info("⏳ Generation can take several minutes, especially with more scenes or timelines.", icon="⏳")
442
+ st.sidebar.markdown(f"<small>Models: Text={TEXT_MODEL_ID}, Image={IMAGE_MODEL_ID}, Audio={AUDIO_MODEL_ID} ({AUDIO_API_VERSION})</small>", unsafe_allow_html=True)
443
+
444
 
445
  # --- Main Logic ---
446
  if generate_button:
447
  if not theme:
448
+ st.error("Please enter a story theme in the sidebar.", icon="πŸ‘ˆ")
449
  else:
450
  # Create a unique temporary directory for this run
451
+ run_id = str(uuid.uuid4()).split('-')[0] # Short unique ID
452
+ temp_dir = os.path.join(TEMP_DIR_BASE, f"run_{run_id}")
453
+ try:
454
+ os.makedirs(temp_dir, exist_ok=True)
455
+ logger.info(f"Created temporary directory: {temp_dir}")
456
+ except OSError as e:
457
+ st.error(f"🚨 Failed to create temporary directory {temp_dir}: {e}", icon="πŸ“‚")
458
+ st.stop()
459
+
460
+ final_video_paths = {} # Stores {timeline_id: video_path}
461
+ generation_errors = {} # Stores {timeline_id: [error_messages]}
462
+
463
+ # --- 1. Generate Narrative Structure ---
464
+ chrono_response: Optional[ChronoWeaveResponse] = None
465
+ with st.spinner("Generating narrative structure... πŸ€”"):
466
+ chrono_response = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
467
+
468
+ if chrono_response:
469
+ st.success(f"Narrative structure received for {len(chrono_response.timelines)} timelines.")
470
+ logger.info(f"Successfully generated structure for {len(chrono_response.timelines)} timelines.")
471
+
472
+ # --- 2. Process Each Timeline ---
473
+ overall_start_time = time.time()
474
+ all_timelines_successful = True # Assume success initially
475
+
476
+ # Use st.status for collapsible progress updates
477
  with st.status("Generating assets and composing videos...", expanded=True) as status:
478
 
479
+ for timeline_index, timeline in enumerate(chrono_response.timelines):
480
+ timeline_id = timeline.timeline_id
481
+ divergence = timeline.divergence_reason
482
+ segments = timeline.segments
483
+ timeline_label = f"Timeline {timeline_id}" # Consistent label
484
+ st.subheader(f"Processing {timeline_label}: {divergence}")
485
+ logger.info(f"--- Processing {timeline_label} (Index: {timeline_index}) ---")
486
+ generation_errors[timeline_id] = [] # Initialize error list for this timeline
487
+
488
+ # Store paths for this timeline's assets
489
+ temp_image_files = {} # {scene_id: path}
490
+ temp_audio_files = {} # {scene_id: path}
491
+ video_clips = [] # List of moviepy clips for concatenation
492
+ timeline_start_time = time.time()
493
+ scene_success_count = 0
494
+
495
+
496
+ for scene_index, segment in enumerate(segments):
497
+ scene_id = segment.scene_id
498
+ task_id = f"T{timeline_id}_S{scene_id}" # Unique ID for logging/filenames
499
+ status_message = f"Processing {timeline_label}, Scene {scene_id + 1}/{len(segments)}..."
500
+ status.update(label=status_message)
501
+ st.markdown(f"--- **Scene {scene_id + 1} ({task_id})** ---")
502
+ logger.info(status_message)
503
+
504
+ scene_has_error = False
505
+
506
+ # Log scene details
507
+ st.write(f" *Image Prompt:* {segment.image_prompt}" + (f" *(Modifier: {segment.timeline_visual_modifier})*" if segment.timeline_visual_modifier else ""))
508
+ st.write(f" *Audio Text:* {segment.audio_text}")
509
+ # st.write(f"* Character Desc: {segment.character_description}") # Optional verbosity
510
+
511
+ # --- 2a. Image Generation ---
512
+ with st.spinner(f"[{task_id}] Generating image... 🎨"):
513
+ combined_prompt = f"{segment.image_prompt}. {segment.character_description}"
514
+ if segment.timeline_visual_modifier:
515
+ combined_prompt += f" Visual style hint: {segment.timeline_visual_modifier}."
516
+
517
+ generated_image: Optional[Image.Image] = generate_image_imagen(combined_prompt, aspect_ratio, task_id)
518
 
519
  if generated_image:
520
+ image_path = os.path.join(temp_dir, f"{task_id}_image.png")
521
+ try:
522
+ generated_image.save(image_path)
523
+ temp_image_files[scene_id] = image_path
524
+ st.image(generated_image, width=180, caption=f"Scene {scene_id+1} Image")
525
+ except Exception as e:
526
+ logger.error(f" ❌ [{task_id}] Failed to save image {image_path}: {e}")
527
+ st.error(f"Failed to save image for scene {task_id}.", icon="πŸ’Ύ")
528
+ scene_has_error = True
529
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image save failed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  else:
531
+ st.warning(f"Image generation failed for scene {task_id}. Skipping scene.", icon="πŸ–ΌοΈ")
532
+ scene_has_error = True
533
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image generation failed.")
534
+ # No image, so skip audio and video for this scene
535
+ continue
536
+
537
+ # --- 2b. Audio Generation ---
538
+ generated_audio_path: Optional[str] = None
539
+ if not scene_has_error: # Only generate audio if image succeeded
540
+ with st.spinner(f"[{task_id}] Generating audio... πŸ”Š"):
541
+ audio_path_temp = os.path.join(temp_dir, f"{task_id}_audio.wav")
542
+ try:
543
+ # Run the async function using asyncio.run() which works with nest_asyncio
544
+ generated_audio_path = asyncio.run(
545
+ generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice)
546
+ )
547
+ except RuntimeError as e:
548
+ # Catch potential issues if asyncio loop is misconfigured despite nest_asyncio
549
+ logger.error(f" ❌ [{task_id}] Asyncio runtime error during audio gen: {e}")
550
+ st.error(f"Asyncio error during audio generation for {task_id}: {e}", icon="⚑")
551
+ scene_has_error = True
552
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio async error.")
553
+ except Exception as e:
554
+ logger.exception(f" ❌ [{task_id}] Unexpected error during audio generation call for {task_id}: {e}")
555
+ st.error(f"Unexpected error in audio generation for {task_id}: {e}", icon="πŸ’₯")
556
+ scene_has_error = True
557
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation error.")
558
+
559
+
560
+ if generated_audio_path:
561
+ temp_audio_files[scene_id] = generated_audio_path
562
+ # Optional: Preview audio
563
+ try:
564
+ with open(generated_audio_path, 'rb') as ap:
565
+ st.audio(ap.read(), format='audio/wav')
566
+ except Exception as e:
567
+ logger.warning(f" ⚠️ [{task_id}] Could not display audio preview: {e}")
568
+ else:
569
+ st.warning(f"Audio generation failed for {task_id}. Skipping video clip.", icon="πŸ”Š")
570
+ scene_has_error = True
571
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation failed.")
572
+ # Clean up the image for this failed segment if audio fails
573
+ if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]):
574
+ try:
575
+ os.remove(temp_image_files[scene_id])
576
+ logger.info(f" πŸ—‘οΈ [{task_id}] Removed image file due to audio failure.")
577
+ del temp_image_files[scene_id]
578
+ except OSError as e:
579
+ logger.warning(f" ⚠️ [{task_id}] Could not remove image file {temp_image_files[scene_id]} after audio failure: {e}")
580
+ continue # Skip video clip creation
581
+
582
+ # --- 2c. Create Video Clip ---
583
+ if not scene_has_error and scene_id in temp_image_files and scene_id in temp_audio_files:
584
+ st.write(f" 🎬 Creating video clip for Scene {scene_id+1}...")
585
+ try:
586
+ # Ensure files exist before creating clips
587
+ img_path = temp_image_files[scene_id]
588
+ aud_path = temp_audio_files[scene_id]
589
+ if not os.path.exists(img_path): raise FileNotFoundError(f"Image file not found: {img_path}")
590
+ if not os.path.exists(aud_path): raise FileNotFoundError(f"Audio file not found: {aud_path}")
591
+
592
+ audio_clip = AudioFileClip(aud_path)
593
+ # Use numpy array for ImageClip to avoid potential PIL issues with moviepy versions
594
+ np_image = np.array(Image.open(img_path))
595
+ image_clip = ImageClip(np_image).set_duration(audio_clip.duration)
596
+
597
+ # Composite the clip
598
+ composite_clip = image_clip.set_audio(audio_clip)
599
+ video_clips.append(composite_clip)
600
+ logger.info(f" βœ… [{task_id}] Video clip created (Duration: {audio_clip.duration:.2f}s).")
601
+ st.write(f" βœ… Clip created (Duration: {audio_clip.duration:.2f}s).")
602
+ scene_success_count += 1
603
+
604
+ except Exception as e:
605
+ logger.exception(f" ❌ [{task_id}] Failed to create video clip for scene {scene_id+1}: {e}")
606
+ st.error(f"Failed to create video clip for {task_id}: {e}", icon="🎬")
607
+ scene_has_error = True
608
+ generation_errors[timeline_id].append(f"Scene {scene_id+1}: Video clip creation failed.")
609
+ # Attempt cleanup of related files if clip creation fails
610
+ if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]): os.remove(temp_image_files[scene_id])
611
+ if scene_id in temp_audio_files and os.path.exists(temp_audio_files[scene_id]): os.remove(temp_audio_files[scene_id])
612
+
613
+ # --- End of Scene Loop ---
614
+
615
+ # --- 2d. Assemble Timeline Video ---
616
+ timeline_duration = time.time() - timeline_start_time
617
+ if video_clips and scene_success_count == len(segments): # Only assemble if all scenes were successful
618
+ status.update(label=f"Composing final video for {timeline_label}...")
619
+ st.write(f"🎞️ Assembling final video for {timeline_label}...")
620
+ logger.info(f"🎞️ Assembling final video for {timeline_label} ({len(video_clips)} clips)...")
621
+ output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final.mp4")
622
+ final_timeline_video = None # Define before try block
623
  try:
624
  final_timeline_video = concatenate_videoclips(video_clips, method="compose")
625
+ # Write video file with specified codecs and fps
626
+ final_timeline_video.write_videofile(
627
+ output_filename,
628
+ fps=VIDEO_FPS,
629
+ codec=VIDEO_CODEC,
630
+ audio_codec=AUDIO_CODEC,
631
+ logger=None # Suppress moviepy console spam
632
+ # threads=4 # Optional: specify threads
633
+ )
634
  final_video_paths[timeline_id] = output_filename
635
+ logger.info(f" βœ… [{timeline_label}] Final video saved: {os.path.basename(output_filename)}")
636
+ st.success(f"βœ… Video for {timeline_label} completed in {timeline_duration:.2f}s.")
 
 
 
 
 
 
637
 
638
  except Exception as e:
639
+ logger.exception(f" ❌ [{timeline_label}] Failed to write final video: {e}")
640
+ st.error(f"Failed to assemble video for {timeline_label}: {e}", icon="πŸ“Ό")
641
  all_timelines_successful = False
642
+ generation_errors[timeline_id].append(f"Timeline {timeline_id}: Final video assembly failed.")
643
+ finally:
644
+ # Crucially, close all clips to release file handles
645
+ logger.debug(f"[{timeline_label}] Closing {len(video_clips)} video clips...")
646
+ for clip in video_clips:
647
+ try:
648
+ clip.close()
649
+ if clip.audio:
650
+ clip.audio.close()
651
+ except Exception as e_close:
652
+ logger.warning(f" ⚠️ [{timeline_label}] Error closing clip: {e_close}")
653
+ if final_timeline_video:
654
+ try:
655
+ final_timeline_video.close()
656
+ except Exception as e_close_final:
657
+ logger.warning(f" ⚠️ [{timeline_label}] Error closing final video object: {e_close_final}")
658
+ logger.debug(f"[{timeline_label}] Clips closed.")
659
 
660
+ elif not video_clips:
661
+ logger.warning(f"[{timeline_label}] No video clips successfully generated. Skipping final assembly.")
662
+ st.warning(f"No scenes were successfully processed for {timeline_label}. Video cannot be created.", icon="🚫")
663
+ all_timelines_successful = False
664
+ else: # Some scenes failed
665
+ logger.warning(f"[{timeline_label}] Encountered errors in {len(generation_errors[timeline_id])} scene(s). Skipping final video assembly.")
666
+ st.warning(f"{timeline_label} had errors in {len(generation_errors[timeline_id])} scene(s). Final video not assembled.", icon="⚠️")
667
+ all_timelines_successful = False
668
+
669
+ # Log errors for the timeline if any occurred
670
+ if generation_errors[timeline_id]:
671
+ logger.error(f"Errors occurred in {timeline_label}: {generation_errors[timeline_id]}")
672
+
673
+ # Intermediate cleanup (optional, can free up disk space during long runs)
674
+ # logger.debug(f"[{timeline_label}] Cleaning up intermediate files...")
675
+ # for scene_id, fpath in temp_image_files.items():
676
+ # if os.path.exists(fpath): os.remove(fpath)
677
+ # for scene_id, fpath in temp_audio_files.items():
678
+ # if os.path.exists(fpath): os.remove(fpath)
679
+
680
+ # --- End of Timelines Loop ---
681
 
682
  # Final status update
683
+ overall_duration = time.time() - overall_start_time
684
  if all_timelines_successful and final_video_paths:
685
+ status_msg = f"ChronoWeave Generation Complete! ({len(final_video_paths)} videos in {overall_duration:.2f}s)"
686
+ status.update(label=status_msg, state="complete", expanded=False)
687
+ logger.info(status_msg)
688
  elif final_video_paths:
689
+ status_msg = f"ChronoWeave Partially Complete ({len(final_video_paths)} videos, some errors occurred). Total time: {overall_duration:.2f}s"
690
+ status.update(label=status_msg, state="warning", expanded=True) # Keep expanded if errors
691
+ logger.warning(status_msg)
692
  else:
693
+ status_msg = f"ChronoWeave Generation Failed. No videos produced. Total time: {overall_duration:.2f}s"
694
+ status.update(label=status_msg, state="error", expanded=True)
695
+ logger.error(status_msg)
696
 
697
+ # --- 3. Display Results ---
698
+ st.header("🎬 Generated Timelines")
699
  if final_video_paths:
700
  sorted_timeline_ids = sorted(final_video_paths.keys())
701
+ cols = st.columns(len(sorted_timeline_ids)) # Create columns for side-by-side display
702
+
703
+ for idx, timeline_id in enumerate(sorted_timeline_ids):
704
  video_path = final_video_paths[timeline_id]
705
+ # Find matching timeline data for context
706
+ timeline_data = next((t for t in chrono_response.timelines if t.timeline_id == timeline_id), None)
707
+ reason = timeline_data.divergence_reason if timeline_data else "Unknown Divergence"
708
+ col = cols[idx]
709
+ with col:
710
+ st.subheader(f"Timeline {timeline_id}")
711
+ st.caption(f"Divergence: {reason}")
712
+ try:
713
+ # Read video bytes for display
714
+ with open(video_path, 'rb') as video_file:
715
+ video_bytes = video_file.read()
716
+ st.video(video_bytes)
717
+ logger.info(f"Displaying video for Timeline {timeline_id}")
718
+ # Add download button
719
+ st.download_button(
720
+ label=f"Download T{timeline_id} Video",
721
+ data=video_bytes,
722
+ file_name=f"chronoweave_timeline_{timeline_id}.mp4",
723
+ mime="video/mp4"
724
+ )
725
+ # Display errors for this timeline if any occurred
726
+ if generation_errors.get(timeline_id):
727
+ with st.expander(f"⚠️ View {len(generation_errors[timeline_id])} Generation Issues"):
728
+ for error_msg in generation_errors[timeline_id]:
729
+ st.warning(f"- {error_msg}")
730
+
731
+ except FileNotFoundError:
732
+ logger.error(f"Could not find video file for display: {video_path}")
733
+ st.error(f"Error: Video file not found for Timeline {timeline_id}.", icon="🚨")
734
+ except Exception as e:
735
+ logger.exception(f"Could not display video {video_path}: {e}")
736
+ st.error(f"Error displaying video for Timeline {timeline_id}: {e}", icon="🚨")
737
  else:
738
+ st.warning("No final videos were successfully generated in this run.")
739
+ # Display global errors if no videos were made
740
+ all_errors = [msg for err_list in generation_errors.values() for msg in err_list]
741
+ if all_errors:
742
+ st.subheader("Summary of Generation Issues")
743
+ for error_msg in all_errors:
744
+ st.error(f"- {error_msg}")
745
+
746
+ # --- 4. Cleanup ---
747
+ st.info(f"Cleaning up temporary directory: {temp_dir}")
748
  try:
749
  shutil.rmtree(temp_dir)
750
+ logger.info(f"βœ… Temporary directory removed: {temp_dir}")
751
+ st.success("βœ… Temporary files cleaned up.")
752
  except Exception as e:
753
+ logger.error(f"⚠️ Could not remove temporary directory {temp_dir}: {e}")
754
+ st.warning(f"Could not automatically remove temporary files: {temp_dir}. Please remove it manually if needed.", icon="⚠️")
755
 
756
+ elif not chrono_response:
757
+ # Error message already shown by generate_story_sequence_chrono
758
+ logger.error("Story generation failed, cannot proceed.")
759
  else:
760
+ # This case implies chrono_response exists but somehow failed validation logic (should be caught earlier)
761
+ st.error("An unexpected issue occurred after story generation. Cannot proceed.", icon="πŸ›‘")
762
+ logger.error("Chrono_response existed but was falsy in the main logic block.")
763
 
764
  else:
765
+ st.info("Configure settings in the sidebar and click '✨ Generate ChronoWeave ✨' to start.")