Spaces:
Running
Running
import cv2 | |
import gradio as gr | |
import torch | |
from diffusers import StableDiffusionPipeline | |
import numpy as np | |
from transformers.utils import move_cache | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# Handle Transformers cache migration | |
move_cache() | |
# Initialize the Stable Diffusion pipeline | |
model_id = "CompVis/stable-diffusion-v1-4" | |
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) | |
pipe = pipe.to("cuda") | |
# Load text summarizer | |
summarizer_model = "facebook/bart-large-cnn" | |
tokenizer = AutoTokenizer.from_pretrained(summarizer_model) | |
summarizer = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model) | |
# Create video from images using `OpenCV` | |
def text_to_video(input_text, num_frames=10, fps=2): | |
# Summarize the input text | |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True) | |
summary_ids = summarizer.generate(inputs["input_ids"], max_length=30, min_length=5, length_penalty=2.0) | |
prompt = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# Generate frames | |
frames = [] | |
for i in range(num_frames): | |
prompt_with_frame = f"{prompt}, frame {i+1}" | |
image = pipe(prompt_with_frame).images[0] | |
frames.append(np.array(image)) | |
# Save frames as a video | |
height, width, layers = frames[0].shape | |
video_path = "output.avi" | |
out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height)) | |
for frame in frames: | |
out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) | |
out.release() | |
return video_path | |
# Gradio interface | |
def generate_video(text, frames, fps): | |
video_file = text_to_video(text, num_frames=frames, fps=fps) | |
return video_file | |
interface = gr.Interface( | |
fn=generate_video, | |
inputs=[ | |
gr.Textbox(label="Enter your text prompt"), | |
gr.Slider(5, 30, value=10, step=1, label="Number of Frames"), | |
gr.Slider(1, 10, value=2, step=1, label="Frames per Second (FPS)"), | |
], | |
outputs=gr.Video(label="Generated Video"), | |
title="Text-to-Video Generator", | |
description="Enter a text prompt to generate a short video." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |