GeminiAi commited on
Commit
af6bbb8
·
verified ·
1 Parent(s): 1e1c13b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -23
app.py CHANGED
@@ -1,23 +1,12 @@
 
1
  import gradio as gr
2
  import torch
3
  from diffusers import StableDiffusionPipeline
4
- from moviepy.editor import ImageSequenceClip
5
  import numpy as np
6
  from transformers.utils import move_cache
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
- import os
9
- import subprocess
10
 
11
- # Ensure moviepy is installed
12
- try:
13
- from moviepy.editor import ImageSequenceClip
14
- except ModuleNotFoundError:
15
- print("MoviePy not found. Installing...")
16
- subprocess.check_call(["pip", "install", "moviepy"])
17
- from moviepy.editor import ImageSequenceClip
18
-
19
-
20
- # Handle Transformers cache migration (one-time operation)
21
  move_cache()
22
 
23
  # Initialize the Stable Diffusion pipeline
@@ -25,19 +14,19 @@ model_id = "CompVis/stable-diffusion-v1-4"
25
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
26
  pipe = pipe.to("cuda")
27
 
28
- # Load a text summarization model for better prompts
29
  summarizer_model = "facebook/bart-large-cnn"
30
  tokenizer = AutoTokenizer.from_pretrained(summarizer_model)
31
  summarizer = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model)
32
 
33
- # Function to create video from text
34
  def text_to_video(input_text, num_frames=10, fps=2):
35
- # Summarize the input text for better image prompts
36
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
37
  summary_ids = summarizer.generate(inputs["input_ids"], max_length=30, min_length=5, length_penalty=2.0)
38
  prompt = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
39
 
40
- # Generate frames using the Stable Diffusion pipeline
41
  frames = []
42
  for i in range(num_frames):
43
  prompt_with_frame = f"{prompt}, frame {i+1}"
@@ -45,17 +34,21 @@ def text_to_video(input_text, num_frames=10, fps=2):
45
  frames.append(np.array(image))
46
 
47
  # Save frames as a video
48
- video_path = "output.mp4"
49
- clip = ImageSequenceClip(frames, fps=fps)
50
- clip.write_videofile(video_path, codec="libx264")
 
 
 
 
 
51
  return video_path
52
 
53
- # Gradio interface to handle user input and output
54
  def generate_video(text, frames, fps):
55
  video_file = text_to_video(text, num_frames=frames, fps=fps)
56
  return video_file
57
 
58
- # Define the Gradio interface
59
  interface = gr.Interface(
60
  fn=generate_video,
61
  inputs=[
@@ -68,6 +61,5 @@ interface = gr.Interface(
68
  description="Enter a text prompt to generate a short video."
69
  )
70
 
71
- # Launch the app
72
  if __name__ == "__main__":
73
  interface.launch()
 
1
+ import cv2
2
  import gradio as gr
3
  import torch
4
  from diffusers import StableDiffusionPipeline
 
5
  import numpy as np
6
  from transformers.utils import move_cache
7
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
8
 
9
+ # Handle Transformers cache migration
 
 
 
 
 
 
 
 
 
10
  move_cache()
11
 
12
  # Initialize the Stable Diffusion pipeline
 
14
  pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
15
  pipe = pipe.to("cuda")
16
 
17
+ # Load text summarizer
18
  summarizer_model = "facebook/bart-large-cnn"
19
  tokenizer = AutoTokenizer.from_pretrained(summarizer_model)
20
  summarizer = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model)
21
 
22
+ # Create video from images using `OpenCV`
23
  def text_to_video(input_text, num_frames=10, fps=2):
24
+ # Summarize the input text
25
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
26
  summary_ids = summarizer.generate(inputs["input_ids"], max_length=30, min_length=5, length_penalty=2.0)
27
  prompt = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
28
 
29
+ # Generate frames
30
  frames = []
31
  for i in range(num_frames):
32
  prompt_with_frame = f"{prompt}, frame {i+1}"
 
34
  frames.append(np.array(image))
35
 
36
  # Save frames as a video
37
+ height, width, layers = frames[0].shape
38
+ video_path = "output.avi"
39
+ out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height))
40
+
41
+ for frame in frames:
42
+ out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
43
+ out.release()
44
+
45
  return video_path
46
 
47
+ # Gradio interface
48
  def generate_video(text, frames, fps):
49
  video_file = text_to_video(text, num_frames=frames, fps=fps)
50
  return video_file
51
 
 
52
  interface = gr.Interface(
53
  fn=generate_video,
54
  inputs=[
 
61
  description="Enter a text prompt to generate a short video."
62
  )
63
 
 
64
  if __name__ == "__main__":
65
  interface.launch()