jadechoghari
/

VidToMe

Text-to-Video

Diffusers

VidToMe

Model card Files Files and versions Community

jadechoghari commited on Oct 7

Commit

59e9a43

•

1 Parent(s): a9b9304

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +53 -28

pipeline.py CHANGED Viewed

@@ -95,44 +95,69 @@ class VidToMePipeline(DiffusionPipeline):
     from omegaconf import OmegaConf
     def _build_config(self, video_path, video_prompt, edit_prompt, control_type,
-                     n_timesteps, guidance_scale, negative_prompt, frame_range,
-                     use_lora, seed, local_merge_ratio, global_merge_ratio):
-        # Create a config using OmegaConf
         config = OmegaConf.create({
-            'sd_version': '1.5',
-            'input_path': video_path,
-            'model_key': self.model_key,
-            'work_dir': "outputs/",
-            'height': 512,
-            'width': 512,
             'inversion': {
                 'prompt': video_prompt or "Default video prompt.",
-                'save_path': "outputs/latents",
-                'steps': 50,
-                'control': control_type,
-                'save_intermediate': False
             },
             'generation': {
-                'control': control_type,
-                'guidance_scale': guidance_scale,
-                'n_timesteps': n_timesteps,
-                'negative_prompt': negative_prompt,
-                'prompt': edit_prompt or "Default edit prompt.",
-                'latents_path': "outputs/latents",
-                'output_path': "outputs/final",
-                'frame_range': frame_range or [0, 32],
-                'use_lora': use_lora,
-                'local_merge_ratio': local_merge_ratio,
-                'global_merge_ratio': global_merge_ratio
             },
-            'seed': seed,
-            'device': "cuda",
-            'float_precision': "fp16",
-            'enable_xformers_memory_efficient_attention': True
         })
         return config
 # # Sample usage
 # pipeline = VidToMePipeline(device="cuda", sd_version="2.1", float_precision="fp16")
 # pipeline(video_path="path/to/video.mp4", video_prompt="A beautiful scene of a sunset",

     from omegaconf import OmegaConf
     def _build_config(self, video_path, video_prompt, edit_prompt, control_type,
+                  n_timesteps, guidance_scale, negative_prompt, frame_range,
+                  use_lora, seed, local_merge_ratio, global_merge_ratio):
+        # Build config using OmegaConf, abstracting as much as possible
         config = OmegaConf.create({
+            'sd_version': self.sd_version,  # Default sd_version
+            'model_key': self.model_key or None,  # Optionally allow model_key to be None
+            'input_path': video_path,  # Path to the video
+            'work_dir': "workdir",  # Default workdir, can be abstracted further
+            'height': self.height,
+            'width': self.width,
             'inversion': {
+                'save_path': "${work_dir}/latents",  # Save latents during inversion
                 'prompt': video_prompt or "Default video prompt.",
+                'n_frames': None,  # None to invert all frames
+                'steps': 50,  # Default inversion steps
+                'save_intermediate': False,  # Default, but can be abstracted to user
+                'save_steps': 50,  # Default
+                'use_blip': False,  # Abstract BLIP prompt creation
+                'recon': False,  # Reconstruct the input video from latents
+                'control': control_type or "none",  # Default to 'none', can use 'tile', 'softedge', etc.
+                'control_scale': 1.0,  # Default control scale
+                'batch_size': 8,  # Default batch size for inversion
+                'force': False,  # Default, force inversion even if latents exist
             },
             'generation': {
+                'control': "pnp",  # Default to Plug-and-Play for generation control
+                'pnp_attn_t': 0.5,  # PnP args
+                'pnp_f_t': 0.8,  # PnP args
+                'control_scale': 1.0,  # Scale for ControlNet-like controls
+                'guidance_scale': guidance_scale,  # Guidance scale for CFG
+                'n_timesteps': n_timesteps,  # Number of diffusion timesteps
+                'negative_prompt': negative_prompt or "ugly, blurry, low res",  # Negative prompt to avoid undesired generations
+                'prompt': edit_prompt or None,  # Edit prompt during generation
+                'latents_path': "${work_dir}/latents",  # Latents path from inversion
+                'output_path': "${work_dir}",  # Output directory for final images
+                'chunk_size': 4,  # Number of frames processed per chunk
+                'chunk_ord': "mix-4",  # Processing order for video chunks
+                'local_merge_ratio': local_merge_ratio,  # Merge ratio for blending
+                'merge_global': True,  # Enable global merging
+                'global_merge_ratio': global_merge_ratio,  # Global merge ratio
+                'global_rand': 0.5,  # Randomness in global merge
+                'align_batch': True,  # Align batch processing
+                'frame_range': frame_range or [0, 32, 1],  # Default frame range
+                'frame_ids': None,  # Specify frame IDs to edit
+                'save_frame': True,  # Save individual frames
+                'use_lora': use_lora,  # Enable LoRA if applicable
+                # Additional LoRA configurations
+                'lora': {
+                    'pretrained_model_name_or_path_or_dict': None,  # Default LoRA model path
+                    'lora_weight_name': None,
+                    'lora_adapter': None,
+                    'lora_weight': 1.0
+                }
             },
+            'seed': seed,  # Seed for reproducibility
+            'device': "cuda",  # Default to CUDA
+            'float_precision': "fp16",  # Enable mixed-precision
+            'enable_xformers_memory_efficient_attention': True  # Default to enable xformers memory-efficient attention
         })
         return config
 # # Sample usage
 # pipeline = VidToMePipeline(device="cuda", sd_version="2.1", float_precision="fp16")
 # pipeline(video_path="path/to/video.mp4", video_prompt="A beautiful scene of a sunset",