Spaces:

ali-kanbar
/

Animation-To-Script

Running

App Files Files Community

ali-kanbar commited on Apr 26

Commit

68705f6

verified ·

1 Parent(s): f450b5a

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -31

app.py CHANGED Viewed

@@ -10,9 +10,6 @@ from functools import partial
 import torch
 import imageio
 import cv2
-from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
 from PIL import Image
 import edge_tts
 from transformers import AutoTokenizer, pipeline
@@ -29,6 +26,11 @@ text_pipe = pipeline(
 # Initialize the sentiment analyzer
 sentiment_analyzer = pipeline("sentiment-analysis")
 # Initialize video generation components
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -37,13 +39,27 @@ repo = "ByteDance/AnimateDiff-Lightning"
 ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
 base = "emilianJR/epiCRealism"
-# Load motion adapter
-adapter = MotionAdapter().to(device, dtype)
-adapter.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
-# Load pipeline
-pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
-pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
 # Define all required functions
 def summarize(text):
@@ -141,7 +157,7 @@ def generate_story(prompt):
     full_output = generated[0]['generated_text']
     story = full_output.split("assistant\n")[-1].strip()
     # Process sentences and check constraints
     sentences = []
     for s in story.split('.'):
@@ -219,6 +235,15 @@ def generate_story(prompt):
     return final_story
 def generate_video(summary):
     def crossfade_transition(frames1, frames2, transition_length=10):
         blended_frames = []
         frames1_np = [np.array(frame) for frame in frames1[-transition_length:]]
@@ -241,6 +266,12 @@ def generate_video(summary):
     sentences = [s.strip() for s in sentences if s.strip()]
     print(f"Total scenes: {len(sentences)}")
     # Output config
     output_dir = "generated_frames"
     video_path = "generated_video.mp4"
@@ -256,23 +287,32 @@ def generate_video(summary):
         batch_prompts = sentences[i : i + batch_size]
         for idx, prompt in enumerate(batch_prompts):
             print(f"Generating animation for prompt {i+idx+1}/{len(sentences)}: {prompt}")
-            output = pipe(
-                prompt=prompt,
-                guidance_scale=1.0,
-                num_inference_steps=step,
-                width=256,
-                height=256,
-            )
-            frames = output.frames[0]
-            if previous_frames is not None:
-                transition = crossfade_transition(previous_frames, frames, transition_frames)
-                all_frames.extend(transition)
-            all_frames.extend(frames)
-            previous_frames = frames
     # Save video
     imageio.mimsave(video_path, all_frames, fps=8)
     print(f"Video saved at {video_path}")
     return video_path
@@ -434,15 +474,10 @@ EXAMPLE_PROMPTS = [
     "A struggling local restaurant owner finds an innovative way to save their business during an economic downturn.",
     "An environmental scientist tracks mysterious wildlife behavior that reveals concerning climate changes.",
     "A community comes together to rebuild after a devastating natural disaster.",
-    "A teacher develops a unique method that transforms learning for students with special needs.",
-    "An elderly person reconnects with a childhood friend through social media after sixty years apart.",
-    "A food delivery driver forms an unexpected friendship with an isolated elderly customer during the pandemic.",
-    "A first-generation college student overcomes significant obstacles to achieve academic success.",
-    "A wildlife photographer documents the surprising recovery of an endangered species."
 ]
 # Create the Gradio interface
-with gr.Blocks(title="Animind AI Story Video Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 AI Story Video Generator")
     gr.Markdown("Enter a one-sentence prompt to generate a complete story with video and narration.")
@@ -503,6 +538,11 @@ with gr.Blocks(title="Animind AI Story Video Generator", theme=gr.themes.Soft())
             - Include interesting characters, settings, or situations
             - Make your prompt realistic but with potential for development
             - Try to suggest a potential conflict or discovery
             ## Troubleshooting

 import torch
 import imageio
 import cv2
 from PIL import Image
 import edge_tts
 from transformers import AutoTokenizer, pipeline
 # Initialize the sentiment analyzer
 sentiment_analyzer = pipeline("sentiment-analysis")
+# Load diffusers libraries after tokenizer to avoid GPU memory conflicts
+from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
 # Initialize video generation components
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
 base = "emilianJR/epiCRealism"
+print(f"Using device: {device} with dtype: {dtype}")
+# Load motion adapter and pipeline in a function to handle errors gracefully
+def load_models():
+    try:
+        print("Loading motion adapter...")
+        adapter = MotionAdapter().to(device, dtype)
+        adapter.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))
+        print("Loading diffusion pipeline...")
+        pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
+        pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
+        return adapter, pipe
+    except Exception as e:
+        print(f"Error loading models: {str(e)}")
+        traceback.print_exc()
+        return None, None
+# We'll load the models on first use to avoid startup errors
+adapter, pipe = None, None
 # Define all required functions
 def summarize(text):
     full_output = generated[0]['generated_text']
     story = full_output.split("assistant\n")[-1].strip()
     # Process sentences and check constraints
     sentences = []
     for s in story.split('.'):
     return final_story
 def generate_video(summary):
+    global adapter, pipe
+    # Load models if not already loaded
+    if adapter is None or pipe is None:
+        adapter, pipe = load_models()
+    if adapter is None or pipe is None:
+        raise Exception("Failed to load models. Please check the logs for errors.")
     def crossfade_transition(frames1, frames2, transition_length=10):
         blended_frames = []
         frames1_np = [np.array(frame) for frame in frames1[-transition_length:]]
     sentences = [s.strip() for s in sentences if s.strip()]
     print(f"Total scenes: {len(sentences)}")
+    # For development/testing purposes, limit the number of sentences
+    max_sentences = 5
+    if len(sentences) > max_sentences:
+        print(f"Limiting to first {max_sentences} sentences for faster testing")
+        sentences = sentences[:max_sentences]
     # Output config
     output_dir = "generated_frames"
     video_path = "generated_video.mp4"
         batch_prompts = sentences[i : i + batch_size]
         for idx, prompt in enumerate(batch_prompts):
             print(f"Generating animation for prompt {i+idx+1}/{len(sentences)}: {prompt}")
+            try:
+                output = pipe(
+                    prompt=prompt,
+                    guidance_scale=1.0,
+                    num_inference_steps=step,
+                    width=256,
+                    height=256,
+                )
+                frames = output.frames[0]
+                if previous_frames is not None:
+                    transition = crossfade_transition(previous_frames, frames, transition_frames)
+                    all_frames.extend(transition)
+                all_frames.extend(frames)
+                previous_frames = frames
+            except Exception as e:
+                print(f"Error generating frames for prompt: {prompt}")
+                print(f"Error details: {str(e)}")
+                # Continue with next prompt if one fails
     # Save video
+    if not all_frames:
+        raise Exception("No frames were generated. Video creation failed.")
+    print(f"Saving video with {len(all_frames)} frames")
     imageio.mimsave(video_path, all_frames, fps=8)
     print(f"Video saved at {video_path}")
     return video_path
     "A struggling local restaurant owner finds an innovative way to save their business during an economic downturn.",
     "An environmental scientist tracks mysterious wildlife behavior that reveals concerning climate changes.",
     "A community comes together to rebuild after a devastating natural disaster.",
 ]
 # Create the Gradio interface
+with gr.Blocks(title="AI Story Video Generator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 AI Story Video Generator")
     gr.Markdown("Enter a one-sentence prompt to generate a complete story with video and narration.")
             - Include interesting characters, settings, or situations
             - Make your prompt realistic but with potential for development
             - Try to suggest a potential conflict or discovery
+            ## Note on Processing Time
+            For faster testing, the app currently processes only the first 5 sentences of the story.
+            In a production environment, this limit would be removed.
             ## Troubleshooting