Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,12 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from diffusers import StableDiffusionPipeline
|
4 |
-
from moviepy.editor import ImageSequenceClip
|
5 |
import numpy as np
|
6 |
from transformers.utils import move_cache
|
7 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
8 |
-
import os
|
9 |
-
import subprocess
|
10 |
|
11 |
-
#
|
12 |
-
try:
|
13 |
-
from moviepy.editor import ImageSequenceClip
|
14 |
-
except ModuleNotFoundError:
|
15 |
-
print("MoviePy not found. Installing...")
|
16 |
-
subprocess.check_call(["pip", "install", "moviepy"])
|
17 |
-
from moviepy.editor import ImageSequenceClip
|
18 |
-
|
19 |
-
|
20 |
-
# Handle Transformers cache migration (one-time operation)
|
21 |
move_cache()
|
22 |
|
23 |
# Initialize the Stable Diffusion pipeline
|
@@ -25,19 +14,19 @@ model_id = "CompVis/stable-diffusion-v1-4"
|
|
25 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
26 |
pipe = pipe.to("cuda")
|
27 |
|
28 |
-
# Load
|
29 |
summarizer_model = "facebook/bart-large-cnn"
|
30 |
tokenizer = AutoTokenizer.from_pretrained(summarizer_model)
|
31 |
summarizer = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model)
|
32 |
|
33 |
-
#
|
34 |
def text_to_video(input_text, num_frames=10, fps=2):
|
35 |
-
# Summarize the input text
|
36 |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
|
37 |
summary_ids = summarizer.generate(inputs["input_ids"], max_length=30, min_length=5, length_penalty=2.0)
|
38 |
prompt = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
39 |
|
40 |
-
# Generate frames
|
41 |
frames = []
|
42 |
for i in range(num_frames):
|
43 |
prompt_with_frame = f"{prompt}, frame {i+1}"
|
@@ -45,17 +34,21 @@ def text_to_video(input_text, num_frames=10, fps=2):
|
|
45 |
frames.append(np.array(image))
|
46 |
|
47 |
# Save frames as a video
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
51 |
return video_path
|
52 |
|
53 |
-
# Gradio interface
|
54 |
def generate_video(text, frames, fps):
|
55 |
video_file = text_to_video(text, num_frames=frames, fps=fps)
|
56 |
return video_file
|
57 |
|
58 |
-
# Define the Gradio interface
|
59 |
interface = gr.Interface(
|
60 |
fn=generate_video,
|
61 |
inputs=[
|
@@ -68,6 +61,5 @@ interface = gr.Interface(
|
|
68 |
description="Enter a text prompt to generate a short video."
|
69 |
)
|
70 |
|
71 |
-
# Launch the app
|
72 |
if __name__ == "__main__":
|
73 |
interface.launch()
|
|
|
1 |
+
import cv2
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
from diffusers import StableDiffusionPipeline
|
|
|
5 |
import numpy as np
|
6 |
from transformers.utils import move_cache
|
7 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
|
8 |
|
9 |
+
# Handle Transformers cache migration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
move_cache()
|
11 |
|
12 |
# Initialize the Stable Diffusion pipeline
|
|
|
14 |
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
15 |
pipe = pipe.to("cuda")
|
16 |
|
17 |
+
# Load text summarizer
|
18 |
summarizer_model = "facebook/bart-large-cnn"
|
19 |
tokenizer = AutoTokenizer.from_pretrained(summarizer_model)
|
20 |
summarizer = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model)
|
21 |
|
22 |
+
# Create video from images using `OpenCV`
|
23 |
def text_to_video(input_text, num_frames=10, fps=2):
|
24 |
+
# Summarize the input text
|
25 |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
|
26 |
summary_ids = summarizer.generate(inputs["input_ids"], max_length=30, min_length=5, length_penalty=2.0)
|
27 |
prompt = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
28 |
|
29 |
+
# Generate frames
|
30 |
frames = []
|
31 |
for i in range(num_frames):
|
32 |
prompt_with_frame = f"{prompt}, frame {i+1}"
|
|
|
34 |
frames.append(np.array(image))
|
35 |
|
36 |
# Save frames as a video
|
37 |
+
height, width, layers = frames[0].shape
|
38 |
+
video_path = "output.avi"
|
39 |
+
out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height))
|
40 |
+
|
41 |
+
for frame in frames:
|
42 |
+
out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
|
43 |
+
out.release()
|
44 |
+
|
45 |
return video_path
|
46 |
|
47 |
+
# Gradio interface
|
48 |
def generate_video(text, frames, fps):
|
49 |
video_file = text_to_video(text, num_frames=frames, fps=fps)
|
50 |
return video_file
|
51 |
|
|
|
52 |
interface = gr.Interface(
|
53 |
fn=generate_video,
|
54 |
inputs=[
|
|
|
61 |
description="Enter a text prompt to generate a short video."
|
62 |
)
|
63 |
|
|
|
64 |
if __name__ == "__main__":
|
65 |
interface.launch()
|