Spaces:
Runtime error
Runtime error
File size: 3,104 Bytes
f21ef4e 474e88e f21ef4e 474e88e f21ef4e 605a78f f21ef4e 605a78f f21ef4e 605a78f f21ef4e 77e44b8 f21ef4e 77e44b8 ac409c4 f21ef4e 474e88e f21ef4e 77e44b8 f21ef4e ac409c4 f21ef4e 474e88e f21ef4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch
import gradio as gr
from diffusers import CogVideoXPipeline, CogVideoXDPMScheduler, CogVideoXTransformer3DModel
from huggingface_hub import hf_hub_download, snapshot_download
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
# (Optional) Download additional assets for upscaling or interpolation if needed.
hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
# Load the text-to-video model using diffusers' CogVideoXPipeline.
# (Replace with your model ID if different.)
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to(device)
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
# Optionally load an image-to-video transformer if your pipeline supports image conditioning.
# (This may be used if you want to condition on an uploaded image.)
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
"THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
)
def generate_video(prompt, style, duration, image):
"""
Generate a Pokémon-themed video.
The function "flavors" the text prompt with the chosen style and mentions iconic
Pokémon elements (e.g. Ash, Pikachu, Team Rocket). The duration (in seconds) is passed
to the pipeline (if supported). The 'image' input is optional and may be ignored if the
pipeline does not support image conditioning.
"""
# Build a full prompt by combining user input with style and Pokémon-specific flavor.
full_prompt = (
f"{prompt}, in {style} style, lasting {duration} seconds. "
"Include iconic Pokémon elements like Ash, Pikachu, and Team Rocket."
)
# Generate video (adjust inference parameters as needed)
result = pipe(full_prompt, num_inference_steps=50, guidance_scale=7.5)
# Assuming the pipeline returns a dict with a 'videos' key (a list of generated videos)
video = result.videos[0]
return video
# Build the Gradio UI.
with gr.Blocks() as demo:
gr.Markdown("# 🎥 PokeVidGen AI")
gr.Markdown("Generate Pokémon anime shorts using CogVideoX-5b! Enter your scene prompt, choose an animation style, set the duration, and optionally upload an image.")
with gr.Row():
prompt_input = gr.Textbox(label="Enter Pokémon Scene", placeholder="Ash battles Team Rocket with Pikachu's Thunderbolt")
style_input = gr.Dropdown(choices=["Anime Classic", "Modern 3D", "Cartoon"], label="Animation Style", value="Anime Classic")
duration_input = gr.Slider(minimum=1, maximum=10, step=1, label="Duration (seconds)", value=5)
image_input = gr.Image(label="Optional Image", type="filepath")
generate_button = gr.Button("Generate Video")
video_output = gr.Video(label="Generated Video")
generate_button.click(fn=generate_video, inputs=[prompt_input, style_input, duration_input, image_input], outputs=video_output)
demo.launch()
|