Spaces:
Running
Running
# Copyright 2025 Google LLC. Based on work by Yousif Ahmed. | |
# Concept: ChronoWeave - Branching Narrative Generation | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | |
import streamlit as st | |
import google.generativeai as genai | |
import os | |
import json | |
import numpy as np | |
from io import BytesIO | |
import time | |
import wave | |
import contextlib | |
import asyncio | |
import uuid # For unique filenames | |
import shutil # For cleaning up temp dirs | |
# Image handling | |
from PIL import Image | |
# Video and audio processing | |
from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips | |
# Type hints | |
import typing_extensions as typing | |
# Async support for Streamlit/Google API | |
import nest_asyncio | |
nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter | |
# --- Configuration --- | |
st.set_page_config(page_title="ChronoWeave", layout="wide") | |
st.title("π ChronoWeave: Branching Narrative Generator") | |
st.markdown(""" | |
Generate multiple, branching story timelines from a single theme using AI. | |
Based on the work of Yousif Ahmed. Copyright 2025 Google LLC. | |
""") | |
# --- Constants --- | |
MODEL = "models/gemini-1.5-flash" # Or other suitable text model supporting JSON | |
# Using v1alpha for the Live API for audio output. | |
AUDIO_MODEL_VERSION = 'v1alpha' # Must be alpha for audio modality | |
IMAGE_MODEL_ID = "imagen-3" # Or your preferred Imagen model "imagen-3.0-generate-002" | |
# --- API Key Handling --- | |
try: | |
# Preferred way to handle secrets in Streamlit sharing/HF Spaces | |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"] | |
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY | |
except KeyError: | |
st.error("π¨ Google API Key not found! Please add it as a Secret named 'GOOGLE_API_KEY' in your Hugging Face Space settings.", icon="π¨") | |
st.stop() # Halt execution if no key | |
# --- Initialize Google Client --- | |
try: | |
# Initialize the client with the API key | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# Create separate clients or configure one for different API versions if needed | |
# Client for Text/Imagen (standard API) | |
client_standard = genai.GenerativeModel(MODEL) | |
# Client for Live Audio (v1alpha) - requires different client init | |
client_live = genai.Client( | |
client_options={'api_endpoint': f'{AUDIO_MODEL_VERSION}.generativelanguage.googleapis.com'} | |
) | |
# Note: As of recent updates, genai.configure might handle this better, | |
# but separating clients or explicitly setting endpoints can be more robust. | |
# Adjust based on the library version and observed behavior. | |
except Exception as e: | |
st.error(f"π¨ Failed to initialize Google AI Client: {e}", icon="π¨") | |
st.stop() | |
# --- Define Structured Output Schemas --- | |
class StorySegment(typing.TypedDict): | |
scene_id: int | |
image_prompt: str | |
audio_text: str | |
character_description: str | |
timeline_visual_modifier: typing.Optional[str] | |
class Timeline(typing.TypedDict): | |
timeline_id: int | |
divergence_reason: str | |
segments: list[StorySegment] | |
class ChronoWeaveResponse(typing.TypedDict): | |
core_theme: str | |
timelines: list[Timeline] | |
total_scenes_per_timeline: int | |
# --- Helper Functions --- | |
def wave_file(filename, channels=1, rate=24000, sample_width=2): | |
"""Context manager to write WAV files.""" | |
with wave.open(filename, "wb") as wf: | |
wf.setnchannels(channels) | |
wf.setsampwidth(sample_width) | |
wf.setframerate(rate) | |
yield wf | |
async def generate_audio_live_async(api_text, output_filename): | |
"""Generates audio using Gemini Live API (async version).""" | |
collected_audio = bytearray() | |
st.write(f"ποΈ Generating audio for: '{api_text[:50]}...'") # Log start | |
try: | |
# Use the 'client_live' specifically configured for v1alpha | |
live_model = client_live.get_model(f"models/gemini-1.5-flash") # Specify model within the live client context | |
config = { | |
"response_modalities": ["AUDIO"] | |
} | |
# Connect to the Live API using the live client. | |
async with live_model.connect(config=config) as session: | |
await session.send_request([api_text]) # Simpler send for single prompt | |
async for response in session.stream_content(): | |
if response.audio_chunk: | |
collected_audio.extend(response.audio_chunk.data) | |
if not collected_audio: | |
st.warning(f"β οΈ No audio data received for: '{api_text[:50]}...'") | |
return None # Indicate failure | |
audio_bytes = bytes(collected_audio) | |
# Write the collected audio bytes into a WAV file. | |
with wave_file(output_filename) as wf: | |
wf.writeframes(audio_bytes) | |
st.write(f" β Audio saved: {os.path.basename(output_filename)}") | |
return output_filename | |
except Exception as e: | |
st.error(f" β Audio generation failed for '{api_text[:50]}...': {e}", icon="π¨") | |
return None | |
def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> ChronoWeaveResponse | None: | |
"""Generates branching story sequences using Gemini structured output.""" | |
st.write(f"π Generating {num_timelines} timeline(s) for theme: '{theme}'...") | |
divergence_instruction = f"Introduce divergence between timelines. {divergence_prompt}" if divergence_prompt else "Introduce natural points of divergence between timelines after the first scene or two." | |
prompt = f''' | |
As an expert narrative designer, create a branching story based on the theme: "{theme}". | |
Generate exactly {num_timelines} distinct timelines, each containing exactly {num_scenes} scenes. | |
Each scene should be approximately 5-10 seconds long when narrated. | |
{divergence_instruction} Clearly state the reason for divergence for each timeline after the first. | |
For each scene in each timeline, provide: | |
- scene_id: An integer starting from 0 for the scene number within its timeline. | |
- image_prompt: A concise (15-25 words) description for an image generation model. Focus on visual details, characters (animals/objects only, NO PEOPLE), background, and action. Maintain a consistent 'kids animation style' (e.g., simple, rounded shapes, bright colors) across all scenes and timelines unless specified by a timeline_visual_modifier. | |
- audio_text: A single, engaging sentence of narration or dialogue for the scene (max 25 words). | |
- character_description: Brief description of recurring characters (names, key features) mentioned in *this specific scene's image prompt*. Keep consistent within a timeline. (Max 30 words). | |
- timeline_visual_modifier: (Optional, string or null) A *brief* hint if this timeline should have a slightly different visual feel from this scene onwards (e.g., "slightly darker lighting", "more cluttered background", "character looks worried"). Keep it subtle. Use null if no specific modifier. | |
Constraint: Ensure the output strictly adheres to the following JSON schema. Do not include preamble or explanations outside the JSON structure. Respond ONLY with the JSON object. | |
JSON Schema: | |
{{ | |
"type": "object", | |
"properties": {{ | |
"core_theme": {{"type": "string"}}, | |
"timelines": {{ | |
"type": "array", | |
"items": {{ | |
"type": "object", | |
"properties": {{ | |
"timeline_id": {{"type": "integer"}}, | |
"divergence_reason": {{"type": "string"}}, | |
"segments": {{ | |
"type": "array", | |
"items": {{ | |
"type": "object", | |
"properties": {{ | |
"scene_id": {{"type": "integer"}}, | |
"image_prompt": {{"type": "string"}}, | |
"audio_text": {{"type": "string"}}, | |
"character_description": {{"type": "string"}}, | |
"timeline_visual_modifier": {{"type": ["string", "null"]}} | |
}}, | |
"required": ["scene_id", "image_prompt", "audio_text", "character_description", "timeline_visual_modifier"] | |
}} | |
}} | |
}}, | |
"required": ["timeline_id", "divergence_reason", "segments"] | |
}} | |
}}, | |
"total_scenes_per_timeline": {{"type": "integer"}} | |
}}, | |
"required": ["core_theme", "timelines", "total_scenes_per_timeline"] | |
}} | |
''' | |
try: | |
response = client_standard.generate_content( | |
contents=prompt, | |
generation_config=genai.types.GenerationConfig( | |
response_mime_type="application/json", | |
# Optional: Add temperature, etc. if needed | |
) | |
# The schema can also be passed via generation_config in some versions/models | |
# config={ | |
# 'response_mime_type': 'application/json', | |
# 'response_schema': ChronoWeaveResponse # Pass the TypedDict directly | |
# } | |
) | |
# Debugging: Print raw response text | |
# st.text_area("Raw Gemini Response:", response.text, height=200) | |
story_data = json.loads(response.text) # Assuming response.text contains the JSON string | |
st.success("β Story structure generated successfully!") | |
# Basic validation (can be more thorough) | |
if 'timelines' in story_data and isinstance(story_data['timelines'], list): | |
# Further validation could check segment structure, etc. | |
return story_data # Return the parsed dictionary | |
else: | |
st.error("π¨ Generated story data is missing the 'timelines' list.", icon="π¨") | |
return None | |
except json.JSONDecodeError as e: | |
st.error(f"π¨ Failed to decode JSON response from Gemini: {e}", icon="π¨") | |
st.text_area("Problematic Response Text:", response.text if 'response' in locals() else "No response object.", height=150) | |
return None | |
except Exception as e: | |
st.error(f"π¨ Error generating story sequence: {e}", icon="π¨") | |
# Log the prompt potentially? Be careful with sensitive data if applicable. | |
# st.text_area("Failed Prompt:", prompt, height=200) | |
return None | |
def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1") -> Image.Image | None: | |
"""Generates an image using Imagen.""" | |
st.write(f"πΌοΈ Generating image for: '{prompt[:60]}...'") | |
try: | |
# Use the standard client's dedicated image generation method | |
response = client_standard.generate_content( | |
f"Generate an image with the following prompt, ensuring a child-friendly animation style and NO human figures: {prompt}", | |
generation_config=genai.types.GenerationConfig( | |
candidate_count=1, # Generate one image | |
# Imagen specific parameters are often passed differently or rely on model defaults | |
# Check documentation for precise Imagen control via the unified API | |
), | |
# If the model/API version requires specific image parameters: | |
# tools=[genai.ImageParams(model=IMAGE_MODEL_ID, number_of_images=1, aspect_ratio=aspect_ratio, person_generation="DONT_ALLOW")] | |
) | |
# Accessing image data might vary slightly depending on API response structure | |
# This assumes response.parts contains the image data if successful | |
if response.parts and response.parts[0].inline_data: | |
image_bytes = response.parts[0].inline_data.data | |
image = Image.open(BytesIO(image_bytes)) | |
st.write(" β Image generated.") | |
return image | |
else: | |
# Check for safety blocks or other reasons for failure | |
if response.prompt_feedback.block_reason: | |
st.warning(f" β οΈ Image generation blocked for prompt '{prompt[:60]}...'. Reason: {response.prompt_feedback.block_reason}", icon="β οΈ") | |
else: | |
st.warning(f" β οΈ No image data received for prompt '{prompt[:60]}...'.", icon="β οΈ") | |
# Debugging: st.write(response) | |
return None | |
except Exception as e: | |
st.error(f" β Image generation failed for '{prompt[:60]}...': {e}", icon="π¨") | |
return None | |
# --- Streamlit UI Elements --- | |
st.sidebar.header("Configuration") | |
# API Key display/check (already handled above, but sidebar is a good place) | |
if GOOGLE_API_KEY: | |
st.sidebar.success("Google API Key Loaded!", icon="β ") | |
else: | |
st.sidebar.error("Google API Key Missing!", icon="π¨") | |
theme = st.sidebar.text_input("Story Theme:", "A curious squirrel finds a shiny object") | |
num_scenes = st.sidebar.slider("Scenes per Timeline:", min_value=2, max_value=7, value=3) | |
num_timelines = st.sidebar.slider("Number of Timelines:", min_value=1, max_value=4, value=2) | |
divergence_prompt = st.sidebar.text_input("Divergence Hint (Optional):", placeholder="e.g., What if it started raining?") | |
aspect_ratio = st.sidebar.selectbox("Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0) | |
generate_button = st.sidebar.button("β¨ Generate ChronoWeave β¨", type="primary", disabled=(not GOOGLE_API_KEY)) | |
st.sidebar.markdown("---") | |
st.sidebar.info("Note: Generation can take several minutes depending on settings.") | |
# --- Main Logic --- | |
if generate_button: | |
if not theme: | |
st.error("Please enter a story theme.", icon="π") | |
else: | |
# Create a unique temporary directory for this run | |
run_id = str(uuid.uuid4()) | |
temp_dir = os.path.join(".", f"chrono_temp_{run_id}") # Create in current dir | |
os.makedirs(temp_dir, exist_ok=True) | |
st.write(f"Working directory: {temp_dir}") | |
final_video_paths = {} # To store {timeline_id: video_path} | |
with st.spinner("Generating narrative structure..."): | |
chrono_data = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt) | |
if chrono_data and 'timelines' in chrono_data: | |
st.success(f"Found {len(chrono_data['timelines'])} timelines. Processing each...") | |
all_timelines_successful = True # Flag to track if all timelines worked | |
# Use st.status for detailed progress | |
with st.status("Generating assets and composing videos...", expanded=True) as status: | |
for timeline in chrono_data['timelines']: | |
timeline_id = timeline['timeline_id'] | |
divergence = timeline['divergence_reason'] | |
segments = timeline['segments'] | |
st.subheader(f"Timeline {timeline_id}: {divergence}") | |
temp_image_files = [] | |
temp_audio_files = [] | |
video_clips = [] | |
timeline_successful = True # Flag for this specific timeline | |
for i, segment in enumerate(segments): | |
status.update(label=f"Processing Timeline {timeline_id}, Scene {i+1}/{num_scenes}...") | |
scene_id = segment['scene_id'] | |
image_prompt = segment['image_prompt'] | |
audio_text = segment['audio_text'] | |
char_desc = segment['character_description'] | |
vis_mod = segment['timeline_visual_modifier'] | |
st.write(f"--- Scene {i+1} (T{timeline_id}) ---") | |
st.write(f"* **Image Prompt:** {image_prompt}" + (f" (Modifier: {vis_mod})" if vis_mod else "")) | |
st.write(f"* **Audio Text:** {audio_text}") | |
# st.write(f"* Character Desc: {char_desc}") # Can be verbose | |
# --- Image Generation --- | |
combined_prompt = f"{image_prompt} {char_desc}" | |
if vis_mod: | |
combined_prompt += f" Style hint: {vis_mod}" | |
generated_image = generate_image_imagen(combined_prompt, aspect_ratio) | |
if generated_image: | |
image_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_image.png") | |
generated_image.save(image_path) | |
temp_image_files.append(image_path) | |
st.image(generated_image, width=200) # Show thumbnail | |
else: | |
st.warning(f"Skipping scene {i+1} in timeline {timeline_id} due to image generation failure.") | |
timeline_successful = False | |
continue # Skip to next segment if image fails | |
# --- Audio Generation --- | |
# Add negative prompt to prevent conversational filler | |
audio_negative_prompt = "Narrate the following sentence directly, with expression, without any introduction or closing remarks like 'Okay' or 'Here is the narration'. Just read the sentence:" | |
full_audio_prompt = f"{audio_negative_prompt}\n{audio_text}" | |
audio_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_audio.wav") | |
# Run the async audio generation function | |
try: | |
generated_audio_path = asyncio.run(generate_audio_live_async(full_audio_prompt, audio_path)) | |
except Exception as e: | |
st.error(f"Asyncio error during audio gen: {e}") | |
generated_audio_path = None | |
if generated_audio_path: | |
temp_audio_files.append(generated_audio_path) | |
# st.audio(generated_audio_path) # Optional: Preview audio | |
else: | |
st.warning(f"Skipping video clip for scene {i+1} in timeline {timeline_id} due to audio generation failure.") | |
# Clean up the image file for this failed scene segment | |
if os.path.exists(image_path): | |
os.remove(image_path) | |
temp_image_files.remove(image_path) | |
timeline_successful = False | |
continue # Skip making video clip if audio fails | |
# --- Create Video Clip --- | |
try: | |
st.write(" π¬ Creating video clip...") | |
audio_clip = AudioFileClip(generated_audio_path) | |
# Ensure PIL Image is used if needed, or numpy array directly | |
np_image = np.array(Image.open(image_path)) | |
# Create ImageClip, ensure duration matches audio | |
image_clip = ImageClip(np_image).set_duration(audio_clip.duration) | |
# Handle potential size mismatch if needed (resize image_clip or set size explicitly) | |
# image_clip = image_clip.resize(width=...) | |
composite_clip = image_clip.set_audio(audio_clip) # Simpler composition | |
video_clips.append(composite_clip) | |
st.write(" β Clip created.") | |
except Exception as e: | |
st.error(f" β Failed to create video clip for scene {i+1} (T{timeline_id}): {e}", icon="π¨") | |
timeline_successful = False | |
# Don't break the whole timeline, just skip this clip maybe? Or mark timeline as failed. | |
# --- Assemble Timeline Video --- | |
if video_clips and timeline_successful: # Only assemble if clips were made and no major errors | |
status.update(label=f"Composing final video for Timeline {timeline_id}...") | |
st.write(f"ποΈ Assembling final video for Timeline {timeline_id}...") | |
try: | |
final_timeline_video = concatenate_videoclips(video_clips, method="compose") | |
output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final_video.mp4") | |
# Use 'libx264' for broader compatibility, specify audio codec | |
final_timeline_video.write_videofile(output_filename, fps=24, codec='libx264', audio_codec='aac') | |
final_video_paths[timeline_id] = output_filename | |
st.success(f" β Video for Timeline {timeline_id} saved: {os.path.basename(output_filename)}") | |
# Close clips to release resources | |
for clip in video_clips: | |
if hasattr(clip, 'close'): clip.close() | |
if hasattr(clip, 'audio') and hasattr(clip.audio, 'close'): clip.audio.close() | |
if hasattr(final_timeline_video, 'close'): final_timeline_video.close() | |
except Exception as e: | |
st.error(f" β Failed to write final video for Timeline {timeline_id}: {e}", icon="π¨") | |
all_timelines_successful = False | |
elif not video_clips: | |
st.warning(f"No video clips were successfully generated for Timeline {timeline_id}. Skipping final video assembly.") | |
all_timelines_successful = False | |
else: | |
st.warning(f"Timeline {timeline_id} encountered errors. Skipping final video assembly.") | |
all_timelines_successful = False | |
# Intermediate cleanup for the timeline (optional, helps manage files) | |
# for file in temp_audio_files: | |
# if os.path.exists(file): os.remove(file) | |
# for file in temp_image_files: | |
# if os.path.exists(file): os.remove(file) | |
# Final status update | |
if all_timelines_successful and final_video_paths: | |
status.update(label="ChronoWeave Generation Complete!", state="complete", expanded=False) | |
elif final_video_paths: | |
status.update(label="ChronoWeave Generation Partially Complete (some errors occurred).", state="warning", expanded=False) | |
else: | |
status.update(label="ChronoWeave Generation Failed.", state="error", expanded=False) | |
# --- Display Results --- | |
st.header("Generated Timelines") | |
if final_video_paths: | |
sorted_timeline_ids = sorted(final_video_paths.keys()) | |
for timeline_id in sorted_timeline_ids: | |
video_path = final_video_paths[timeline_id] | |
# Find matching timeline divergence reason | |
reason = "Unknown" | |
for t in chrono_data.get('timelines', []): | |
if t.get('timeline_id') == timeline_id: | |
reason = t.get('divergence_reason', 'N/A') | |
break | |
st.subheader(f"Timeline {timeline_id}: {reason}") | |
try: | |
video_file = open(video_path, 'rb') | |
video_bytes = video_file.read() | |
st.video(video_bytes) | |
video_file.close() | |
except FileNotFoundError: | |
st.error(f"Could not find video file: {video_path}", icon="π¨") | |
except Exception as e: | |
st.error(f"Could not display video {video_path}: {e}", icon="π¨") | |
else: | |
st.warning("No final videos were successfully generated.") | |
# --- Cleanup --- | |
st.write("Cleaning up temporary files...") | |
try: | |
shutil.rmtree(temp_dir) | |
st.write(" β Temporary files removed.") | |
except Exception as e: | |
st.warning(f" β οΈ Could not remove temporary directory {temp_dir}: {e}", icon="β οΈ") | |
elif not chrono_data: | |
st.error("Story generation failed. Cannot proceed.", icon="π") | |
else: | |
# This case might happen if chrono_data is returned but is malformed (e.g., no 'timelines' key) | |
st.error("Story data seems malformed. Cannot proceed.", icon="π") | |
# st.json(chrono_data) # Display the problematic data | |
else: | |
st.info("Configure settings in the sidebar and click 'Generate ChronoWeave'") |