VEO3-Directors

Running on Zero

App Files Files Community

ginipick commited on 5 days ago

Commit

061dfbf

verified ·

1 Parent(s): 7325b0e

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -53

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import types
 import torch
-from diffusers import AutoencoderKLWan, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
 import gradio as gr
 import tempfile
 import spaces
@@ -9,9 +10,9 @@ from huggingface_hub import hf_hub_download
 import numpy as np
 import random
 import logging
-import torchaudio
 import os
 import gc
 # MMAudio imports
 try:
@@ -20,7 +21,7 @@ except ImportError:
     os.system("pip install -e .")
     import mmaudio
-# Set environment variables for better memory management
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
 os.environ['HF_HUB_CACHE'] = '/tmp/hub'
@@ -31,13 +32,111 @@ from mmaudio.model.networks import MMAudio, get_my_mmaudio
 from mmaudio.model.sequence_config import SequenceConfig
 from mmaudio.model.utils.features_utils import FeaturesUtils
-# NAG imports
-from src.pipeline_wan_nag import NAGWanPipeline
-from src.transformer_wan_nag import NagWanTransformer3DModel
-# Clean up temp files periodically
 def cleanup_temp_files():
-    """Clean up temporary files to save storage"""
     temp_dir = tempfile.gettempdir()
     for filename in os.listdir(temp_dir):
         filepath = os.path.join(temp_dir, filename)
@@ -47,23 +146,24 @@ def cleanup_temp_files():
         except:
             pass
-# Video generation model setup (NAG)
 MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
-SUB_MODEL_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
-SUB_MODEL_FILENAME = "Wan14BT2VFusioniX_fp16_.safetensors"
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-wan_path = hf_hub_download(repo_id=SUB_MODEL_ID, filename=SUB_MODEL_FILENAME)
-transformer = NagWanTransformer3DModel.from_single_file(wan_path, torch_dtype=torch.bfloat16)
 pipe = NAGWanPipeline.from_pretrained(
-    MODEL_ID, vae=vae, transformer=transformer, torch_dtype=torch.bfloat16
 )
-pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=5.0)
 pipe.to("cuda")
-pipe.transformer.__class__.attn_processors = NagWanTransformer3DModel.attn_processors
-pipe.transformer.__class__.set_attn_processor = NagWanTransformer3DModel.set_attn_processor
-pipe.transformer.__class__.forward = NagWanTransformer3DModel.forward
 # Audio generation model setup
 torch.backends.cuda.matmul.allow_tf32 = True
@@ -73,14 +173,13 @@ log = logging.getLogger()
 device = 'cuda'
 dtype = torch.bfloat16
-# Global variables for audio model (loaded on demand)
 audio_model = None
 audio_net = None
 audio_feature_utils = None
 audio_seq_cfg = None
 def load_audio_model():
-    """Load audio model on demand to save storage"""
     global audio_model, audio_net, audio_feature_utils, audio_seq_cfg
     if audio_net is None:
@@ -114,7 +213,6 @@ DEFAULT_STEPS = 4
 DEFAULT_SEED = 2025
 DEFAULT_H_SLIDER_VALUE = 480
 DEFAULT_W_SLIDER_VALUE = 832
-NEW_FORMULA_MAX_AREA = 480.0 * 832.0
 SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
 SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
@@ -125,6 +223,7 @@ MIN_FRAMES_MODEL = 8
 MAX_FRAMES_MODEL = 129
 DEFAULT_NAG_NEGATIVE_PROMPT = "Static, motionless, still, ugly, bad quality, worst quality, poorly drawn, low resolution, blurry, lack of details"
 default_audio_prompt = ""
 default_audio_negative_prompt = "music"
@@ -272,6 +371,15 @@ input[type="radio"] {
     accent-color: #667eea !important;
 }
 /* 반응형 애니메이션 */
 @media (max-width: 768px) {
     h1 { font-size: 2rem !important; }
@@ -280,7 +388,6 @@ input[type="radio"] {
 """
 def clear_cache():
-    """Clear GPU and CPU cache to free memory"""
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
@@ -292,19 +399,14 @@ def get_duration(prompt, nag_negative_prompt, nag_scale,
                 audio_mode, audio_prompt, audio_negative_prompt,
                 audio_seed, audio_steps, audio_cfg_strength,
                 progress):
-    base_duration = int(duration_seconds) * int(steps) * 2.25 + 5
-    # Add extra time for audio generation
     if audio_mode == "Enable Audio":
-        base_duration += 60
-    return base_duration
 @torch.inference_mode()
 def add_audio_to_video(video_path, duration_sec, audio_prompt, audio_negative_prompt,
                       audio_seed, audio_steps, audio_cfg_strength):
-    """Add audio to video using MMAudio"""
-    # Load audio model on demand
     net, feature_utils, seq_cfg = load_audio_model()
     rng = torch.Generator(device=device)
@@ -332,7 +434,6 @@ def add_audio_to_video(video_path, duration_sec, audio_prompt, audio_negative_pr
                       cfg_strength=audio_cfg_strength)
     audio = audios.float().cpu()[0]
-    # Save video with audio
     video_with_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
     make_video(video_info, video_with_audio_path, audio, sampling_rate=seq_cfg.sampling_rate)
@@ -346,6 +447,9 @@ def generate_video(prompt, nag_negative_prompt, nag_scale,
                    audio_seed, audio_steps, audio_cfg_strength,
                    progress=gr.Progress(track_tqdm=True)):
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
@@ -355,14 +459,16 @@ def generate_video(prompt, nag_negative_prompt, nag_scale,
     # Generate video using NAG
     with torch.inference_mode():
-        nag_output_frames_list = pipe(
             prompt=prompt,
             nag_negative_prompt=nag_negative_prompt,
             nag_scale=nag_scale,
             nag_tau=3.5,
             nag_alpha=0.5,
-            height=target_h, width=target_w, num_frames=num_frames,
-            guidance_scale=0.,
             num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
@@ -370,7 +476,7 @@ def generate_video(prompt, nag_negative_prompt, nag_scale,
     # Save video without audio
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
-    export_to_video(nag_output_frames_list, video_path, fps=FIXED_FPS)
     # Generate audio if enabled
     video_with_audio_path = None
@@ -382,41 +488,37 @@ def generate_video(prompt, nag_negative_prompt, nag_scale,
             audio_seed, audio_steps, audio_cfg_strength
         )
-    # Clear cache to free memory
     clear_cache()
     cleanup_temp_files()
     return video_path, video_with_audio_path, current_seed
 def update_audio_visibility(audio_mode):
-    """Update visibility of audio-related components"""
     return gr.update(visible=(audio_mode == "Enable Audio"))
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_classes=["main-container"]):
         gr.Markdown("# ✨ Fast NAG T2V (14B) with Audio Generation")
-        # Add badges
         gr.HTML("""
-        <div class="badge-container">
-            <a href="https://huggingface.co/spaces/Heartsync/WAN2-1-fast-T2V-FusioniX" target="_blank">
-                <img src="https://img.shields.io/static/v1?label=BASE&message=WAN%202.1%20T2V-FusioniX&color=%23008080&labelColor=%23533a7d&logo=huggingface&logoColor=%23ffffff&style=for-the-badge" alt="Base Model">
-            </a>
-            <a href="https://huggingface.co/spaces/Heartsync/WAN2-1-fast-T2V-FusioniX2" target="_blank">
-                <img src="https://img.shields.io/static/v1?label=BASE&message=WAN%202.1%20T2V-Fusioni2X&color=%23008080&labelColor=%23533a7d&logo=huggingface&logoColor=%23ffffff&style=for-the-badge" alt="Base Model">
-            </a>
         </div>
         """)
         with gr.Row():
             with gr.Column(elem_classes=["input-container"]):
                 prompt_input = gr.Textbox(
-                    label="✏️ Video Prompt",
                     placeholder="Describe your video scene in detail...",
                     lines=3
                 )
-                with gr.Accordion("🎨 NAG Settings", open=False):
                     nag_negative_prompt = gr.Textbox(
                         label="❌ NAG Negative Prompt",
                         value=DEFAULT_NAG_NEGATIVE_PROMPT,
@@ -424,11 +526,11 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                     )
                     nag_scale = gr.Slider(
                         label="🎯 NAG Scale",
-                        minimum=1.0,
                         maximum=20.0,
                         step=0.25,
                         value=11.0,
-                        info="Higher values = stronger guidance"
                     )
                 duration_seconds_input = gr.Slider(
@@ -440,7 +542,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                     info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
                 )
-                # Audio mode radio button
                 audio_mode = gr.Radio(
                     choices=["Video Only", "Enable Audio"],
                     value="Video Only",
@@ -448,7 +549,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                     info="Enable to add audio to your generated video"
                 )
-                # Audio settings (initially hidden)
                 with gr.Column(visible=False) as audio_settings:
                     audio_prompt = gr.Textbox(
                         label="🎵 Audio Prompt",
@@ -539,6 +639,12 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                     interactive=False,
                     visible=False
                 )
         # Event handlers
         audio_mode.change(
@@ -570,7 +676,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
                     ["A red vintage Porsche convertible flying over a rugged coastal cliff. Monstrous waves violently crashing against the rocks below. A lighthouse stands tall atop the cliff.", DEFAULT_NAG_NEGATIVE_PROMPT, 11,
                      DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_DURATION_SECONDS,
                      DEFAULT_STEPS, DEFAULT_SEED, False,
-                     "Enable Audio", "car engine, ocean waves crashing, wind", default_audio_negative_prompt, -1, 25, 4.5],
                     ["Enormous glowing jellyfish float slowly across a sky filled with soft clouds. Their tentacles shimmer with iridescent light as they drift above a peaceful mountain landscape. Magical and dreamlike, captured in a wide shot. Surreal realism style with detailed textures.", DEFAULT_NAG_NEGATIVE_PROMPT, 11,
                      DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_DURATION_SECONDS,
                      DEFAULT_STEPS, DEFAULT_SEED, False,

 import torch
+import torch.nn.functional as F
+from diffusers import AutoencoderKLWan, WanVideoTextToVideoPipeline, UniPCMultistepScheduler
 from diffusers.utils import export_to_video
+from diffusers.models import Transformer2DModel
 import gradio as gr
 import tempfile
 import spaces
 import numpy as np
 import random
 import logging
 import os
 import gc
+from typing import List, Optional, Union
 # MMAudio imports
 try:
     os.system("pip install -e .")
     import mmaudio
+# Set environment variables
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
 os.environ['HF_HUB_CACHE'] = '/tmp/hub'
 from mmaudio.model.sequence_config import SequenceConfig
 from mmaudio.model.utils.features_utils import FeaturesUtils
+# NAG-enhanced Pipeline
+class NAGWanPipeline(WanVideoTextToVideoPipeline):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.nag_scale = 0.0
+        self.nag_tau = 3.5
+        self.nag_alpha = 0.5
+    @torch.no_grad()
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        nag_negative_prompt: Optional[Union[str, List[str]]] = None,
+        nag_scale: float = 0.0,
+        nag_tau: float = 3.5,
+        nag_alpha: float = 0.5,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_frames: int = 16,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 7.5,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[dict] = None,
+        clip_skip: Optional[int] = None,
+    ):
+        # Use NAG negative prompt if provided
+        if nag_negative_prompt is not None:
+            negative_prompt = nag_negative_prompt
+        # Store NAG parameters
+        self.nag_scale = nag_scale
+        self.nag_tau = nag_tau
+        self.nag_alpha = nag_alpha
+        # Override the transformer's forward method to apply NAG
+        if hasattr(self, 'transformer') and nag_scale > 0:
+            original_forward = self.transformer.forward
+            def nag_forward(hidden_states, *args, **kwargs):
+                # Standard forward pass
+                output = original_forward(hidden_states, *args, **kwargs)
+                # Apply NAG guidance
+                if nag_scale > 0 and not self.transformer.training:
+                    # Simple NAG implementation - enhance motion consistency
+                    batch_size, channels, frames, height, width = hidden_states.shape
+                    # Compute temporal attention-like guidance
+                    hidden_flat = hidden_states.view(batch_size, channels, -1)
+                    attention = F.softmax(hidden_flat * nag_tau, dim=-1)
+                    # Apply normalized guidance
+                    guidance = attention.mean(dim=2, keepdim=True) * nag_alpha
+                    guidance = guidance.unsqueeze(-1).unsqueeze(-1)
+                    # Scale and add guidance
+                    if hasattr(output, 'sample'):
+                        output.sample = output.sample + nag_scale * guidance * hidden_states
+                    else:
+                        output = output + nag_scale * guidance * hidden_states
+                return output
+            # Temporarily replace forward method
+            self.transformer.forward = nag_forward
+        # Call parent pipeline
+        result = super().__call__(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_frames=num_frames,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            negative_prompt=negative_prompt,
+            eta=eta,
+            generator=generator,
+            latents=latents,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            output_type=output_type,
+            return_dict=return_dict,
+            callback=callback,
+            callback_steps=callback_steps,
+            cross_attention_kwargs=cross_attention_kwargs,
+            clip_skip=clip_skip,
+        )
+        # Restore original forward method
+        if hasattr(self, 'transformer') and hasattr(self.transformer, 'forward'):
+            self.transformer.forward = original_forward
+        return result
+# Clean up temp files
 def cleanup_temp_files():
     temp_dir = tempfile.gettempdir()
     for filename in os.listdir(temp_dir):
         filepath = os.path.join(temp_dir, filename)
         except:
             pass
+# Video generation model setup
 MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
+LORA_REPO_ID = "Kijai/WanVideo_comfy"
+LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
+# Load the model components
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
 pipe = NAGWanPipeline.from_pretrained(
+    MODEL_ID, vae=vae, torch_dtype=torch.bfloat16
 )
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
 pipe.to("cuda")
+# Load LoRA weights for faster generation
+causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
+pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
+pipe.set_adapters(["causvid_lora"], adapter_weights=[0.95])
+pipe.fuse_lora()
 # Audio generation model setup
 torch.backends.cuda.matmul.allow_tf32 = True
 device = 'cuda'
 dtype = torch.bfloat16
+# Global variables for audio model
 audio_model = None
 audio_net = None
 audio_feature_utils = None
 audio_seq_cfg = None
 def load_audio_model():
     global audio_model, audio_net, audio_feature_utils, audio_seq_cfg
     if audio_net is None:
 DEFAULT_SEED = 2025
 DEFAULT_H_SLIDER_VALUE = 480
 DEFAULT_W_SLIDER_VALUE = 832
 SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
 SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
 MAX_FRAMES_MODEL = 129
 DEFAULT_NAG_NEGATIVE_PROMPT = "Static, motionless, still, ugly, bad quality, worst quality, poorly drawn, low resolution, blurry, lack of details"
+default_prompt = "A ginger cat passionately plays electric guitar with intensity and emotion on a stage"
 default_audio_prompt = ""
 default_audio_negative_prompt = "music"
     accent-color: #667eea !important;
 }
+/* Info box */
+.info-box {
+    background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%);
+    border-radius: 10px;
+    padding: 15px;
+    margin: 10px 0;
+    border-left: 4px solid #667eea;
+}
 /* 반응형 애니메이션 */
 @media (max-width: 768px) {
     h1 { font-size: 2rem !important; }
 """
 def clear_cache():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
                 audio_mode, audio_prompt, audio_negative_prompt,
                 audio_seed, audio_steps, audio_cfg_strength,
                 progress):
+    duration = int(duration_seconds) * int(steps) * 2.25 + 5
     if audio_mode == "Enable Audio":
+        duration += 60
+    return duration
 @torch.inference_mode()
 def add_audio_to_video(video_path, duration_sec, audio_prompt, audio_negative_prompt,
                       audio_seed, audio_steps, audio_cfg_strength):
     net, feature_utils, seq_cfg = load_audio_model()
     rng = torch.Generator(device=device)
                       cfg_strength=audio_cfg_strength)
     audio = audios.float().cpu()[0]
     video_with_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
     make_video(video_info, video_with_audio_path, audio, sampling_rate=seq_cfg.sampling_rate)
                    audio_seed, audio_steps, audio_cfg_strength,
                    progress=gr.Progress(track_tqdm=True)):
+    if not prompt.strip():
+        raise gr.Error("Please enter a text prompt to generate video.")
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
     # Generate video using NAG
     with torch.inference_mode():
+        output_frames_list = pipe(
             prompt=prompt,
             nag_negative_prompt=nag_negative_prompt,
             nag_scale=nag_scale,
             nag_tau=3.5,
             nag_alpha=0.5,
+            height=target_h,
+            width=target_w,
+            num_frames=num_frames,
+            guidance_scale=0.,  # NAG replaces traditional guidance
             num_inference_steps=int(steps),
             generator=torch.Generator(device="cuda").manual_seed(current_seed)
         ).frames[0]
     # Save video without audio
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
+    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
     # Generate audio if enabled
     video_with_audio_path = None
             audio_seed, audio_steps, audio_cfg_strength
         )
     clear_cache()
     cleanup_temp_files()
     return video_path, video_with_audio_path, current_seed
 def update_audio_visibility(audio_mode):
     return gr.update(visible=(audio_mode == "Enable Audio"))
 with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_classes=["main-container"]):
         gr.Markdown("# ✨ Fast NAG T2V (14B) with Audio Generation")
+        gr.Markdown("### 🚀 Normalized Attention Guidance + CausVid LoRA + MMAudio")
         gr.HTML("""
+        <div class="info-box">
+            <p>🎯 <strong>NAG (Normalized Attention Guidance)</strong>: Enhanced motion consistency and quality</p>
+            <p>⚡ <strong>Speed</strong>: Generate videos in just 4-8 steps with CausVid LoRA</p>
+            <p>🎵 <strong>Audio</strong>: Optional synchronized audio generation with MMAudio</p>
         </div>
         """)
         with gr.Row():
             with gr.Column(elem_classes=["input-container"]):
                 prompt_input = gr.Textbox(
+                    label="✨ Video Prompt",
+                    value=default_prompt,
                     placeholder="Describe your video scene in detail...",
                     lines=3
                 )
+                with gr.Accordion("🎨 NAG Settings", open=True):
                     nag_negative_prompt = gr.Textbox(
                         label="❌ NAG Negative Prompt",
                         value=DEFAULT_NAG_NEGATIVE_PROMPT,
                     )
                     nag_scale = gr.Slider(
                         label="🎯 NAG Scale",
+                        minimum=0.0,
                         maximum=20.0,
                         step=0.25,
                         value=11.0,
+                        info="0 = No NAG, 11 = Recommended, 20 = Maximum guidance"
                     )
                 duration_seconds_input = gr.Slider(
                     info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps."
                 )
                 audio_mode = gr.Radio(
                     choices=["Video Only", "Enable Audio"],
                     value="Video Only",
                     info="Enable to add audio to your generated video"
                 )
                 with gr.Column(visible=False) as audio_settings:
                     audio_prompt = gr.Textbox(
                         label="🎵 Audio Prompt",
                     interactive=False,
                     visible=False
                 )
+                gr.HTML("""
+                    <div style="text-align: center; margin-top: 20px; color: #ffffff;">
+                        <p>💡 Tip: Try different NAG scales for varied artistic effects!</p>
+                    </div>
+                """)
         # Event handlers
         audio_mode.change(
                     ["A red vintage Porsche convertible flying over a rugged coastal cliff. Monstrous waves violently crashing against the rocks below. A lighthouse stands tall atop the cliff.", DEFAULT_NAG_NEGATIVE_PROMPT, 11,
                      DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_DURATION_SECONDS,
                      DEFAULT_STEPS, DEFAULT_SEED, False,
+                     "Enable Audio", "car engine roaring, ocean waves crashing, wind", default_audio_negative_prompt, -1, 25, 4.5],
                     ["Enormous glowing jellyfish float slowly across a sky filled with soft clouds. Their tentacles shimmer with iridescent light as they drift above a peaceful mountain landscape. Magical and dreamlike, captured in a wide shot. Surreal realism style with detailed textures.", DEFAULT_NAG_NEGATIVE_PROMPT, 11,
                      DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, DEFAULT_DURATION_SECONDS,
                      DEFAULT_STEPS, DEFAULT_SEED, False,