Spaces:
Runtime error
Runtime error
import argparse | |
def _get_model_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument( | |
"--pretrained_model_name_or_path", | |
type=str, | |
default=None, | |
required=True, | |
help="Path to pretrained model or model identifier from huggingface.co/models.", | |
) | |
parser.add_argument( | |
"--revision", | |
type=str, | |
default=None, | |
required=False, | |
help="Revision of pretrained model identifier from huggingface.co/models.", | |
) | |
parser.add_argument( | |
"--variant", | |
type=str, | |
default=None, | |
help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16", | |
) | |
parser.add_argument( | |
"--cache_dir", | |
type=str, | |
default=None, | |
help="The directory where the downloaded models and datasets will be stored.", | |
) | |
def _get_dataset_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument( | |
"--data_root", | |
type=str, | |
default=None, | |
help=("A folder containing the training data."), | |
) | |
parser.add_argument( | |
"--dataset_file", | |
type=str, | |
default=None, | |
help=("Path to a CSV file if loading prompts/video paths using this format."), | |
) | |
parser.add_argument( | |
"--video_column", | |
type=str, | |
default="video", | |
help="The column of the dataset containing videos. Or, the name of the file in `--data_root` folder containing the line-separated path to video data.", | |
) | |
parser.add_argument( | |
"--caption_column", | |
type=str, | |
default="text", | |
help="The column of the dataset containing the instance prompt for each video. Or, the name of the file in `--data_root` folder containing the line-separated instance prompts.", | |
) | |
parser.add_argument( | |
"--id_token", | |
type=str, | |
default=None, | |
help="Identifier token appended to the start of each prompt if provided.", | |
) | |
parser.add_argument( | |
"--height_buckets", | |
nargs="+", | |
type=int, | |
default=[256, 320, 384, 480, 512, 576, 720, 768, 960, 1024, 1280, 1536], | |
) | |
parser.add_argument( | |
"--width_buckets", | |
nargs="+", | |
type=int, | |
default=[256, 320, 384, 480, 512, 576, 720, 768, 960, 1024, 1280, 1536], | |
) | |
parser.add_argument( | |
"--frame_buckets", | |
nargs="+", | |
type=int, | |
default=[49], | |
help="CogVideoX1.5 need to guarantee that ((num_frames - 1) // self.vae_scale_factor_temporal + 1) % patch_size_t == 0, such as 53" | |
) | |
parser.add_argument( | |
"--load_tensors", | |
action="store_true", | |
help="Whether to use a pre-encoded tensor dataset of latents and prompt embeddings instead of videos and text prompts. The expected format is that saved by running the `prepare_dataset.py` script.", | |
) | |
parser.add_argument( | |
"--random_flip", | |
type=float, | |
default=None, | |
help="If random horizontal flip augmentation is to be used, this should be the flip probability.", | |
) | |
parser.add_argument( | |
"--dataloader_num_workers", | |
type=int, | |
default=0, | |
help="Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process.", | |
) | |
parser.add_argument( | |
"--pin_memory", | |
action="store_true", | |
help="Whether or not to use the pinned memory setting in pytorch dataloader.", | |
) | |
def _get_validation_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument( | |
"--validation_prompt", | |
type=str, | |
default=None, | |
help="One or more prompt(s) that is used during validation to verify that the model is learning. Multiple validation prompts should be separated by the '--validation_prompt_seperator' string.", | |
) | |
parser.add_argument( | |
"--validation_images", | |
type=str, | |
default=None, | |
help="One or more image path(s)/URLs that is used during validation to verify that the model is learning. Multiple validation paths should be separated by the '--validation_prompt_seperator' string. These should correspond to the order of the validation prompts.", | |
) | |
parser.add_argument( | |
"--validation_prompt_separator", | |
type=str, | |
default=":::", | |
help="String that separates multiple validation prompts", | |
) | |
parser.add_argument( | |
"--num_validation_videos", | |
type=int, | |
default=1, | |
help="Number of videos that should be generated during validation per `validation_prompt`.", | |
) | |
parser.add_argument( | |
"--validation_epochs", | |
type=int, | |
default=None, | |
help="Run validation every X training epochs. Validation consists of running the validation prompt `args.num_validation_videos` times.", | |
) | |
parser.add_argument( | |
"--validation_steps", | |
type=int, | |
default=None, | |
help="Run validation every X training steps. Validation consists of running the validation prompt `args.num_validation_videos` times.", | |
) | |
parser.add_argument( | |
"--guidance_scale", | |
type=float, | |
default=6, | |
help="The guidance scale to use while sampling validation videos.", | |
) | |
parser.add_argument( | |
"--use_dynamic_cfg", | |
action="store_true", | |
default=False, | |
help="Whether or not to use the default cosine dynamic guidance schedule when sampling validation videos.", | |
) | |
parser.add_argument( | |
"--enable_model_cpu_offload", | |
action="store_true", | |
default=False, | |
help="Whether or not to enable model-wise CPU offloading when performing validation/testing to save memory.", | |
) | |
def _get_training_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") | |
parser.add_argument("--rank", type=int, default=64, help="The rank for LoRA matrices.") | |
parser.add_argument( | |
"--lora_alpha", | |
type=int, | |
default=64, | |
help="The lora_alpha to compute scaling factor (lora_alpha / rank) for LoRA matrices.", | |
) | |
parser.add_argument( | |
"--mixed_precision", | |
type=str, | |
default=None, | |
choices=["no", "fp16", "bf16"], | |
help=( | |
"Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10.and an Nvidia Ampere GPU. " | |
"Default to the value of accelerate config of the current system or the flag passed with the `accelerate.launch` command. Use this " | |
"argument to override the accelerate config." | |
), | |
) | |
parser.add_argument( | |
"--output_dir", | |
type=str, | |
default="cogvideox-sft", | |
help="The output directory where the model predictions and checkpoints will be written.", | |
) | |
parser.add_argument( | |
"--height", | |
type=int, | |
default=480, | |
help="All input videos are resized to this height.", | |
) | |
parser.add_argument( | |
"--width", | |
type=int, | |
default=720, | |
help="All input videos are resized to this width.", | |
) | |
parser.add_argument( | |
"--video_reshape_mode", | |
type=str, | |
default=None, | |
help="All input videos are reshaped to this mode. Choose between ['center', 'random', 'none']", | |
) | |
parser.add_argument("--fps", type=int, default=8, help="All input videos will be used at this FPS.") | |
parser.add_argument( | |
"--max_num_frames", | |
type=int, | |
default=49, | |
help="All input videos will be truncated to these many frames.", | |
) | |
parser.add_argument( | |
"--skip_frames_start", | |
type=int, | |
default=0, | |
help="Number of frames to skip from the beginning of each input video. Useful if training data contains intro sequences.", | |
) | |
parser.add_argument( | |
"--skip_frames_end", | |
type=int, | |
default=0, | |
help="Number of frames to skip from the end of each input video. Useful if training data contains outro sequences.", | |
) | |
parser.add_argument( | |
"--train_batch_size", | |
type=int, | |
default=4, | |
help="Batch size (per device) for the training dataloader.", | |
) | |
parser.add_argument("--num_train_epochs", type=int, default=1) | |
parser.add_argument( | |
"--max_train_steps", | |
type=int, | |
default=None, | |
help="Total number of training steps to perform. If provided, overrides `--num_train_epochs`.", | |
) | |
parser.add_argument( | |
"--checkpointing_steps", | |
type=int, | |
default=500, | |
help=( | |
"Save a checkpoint of the training state every X updates. These checkpoints can be used both as final" | |
" checkpoints in case they are better than the last checkpoint, and are also suitable for resuming" | |
" training using `--resume_from_checkpoint`." | |
), | |
) | |
parser.add_argument( | |
"--checkpoints_total_limit", | |
type=int, | |
default=None, | |
help=("Max number of checkpoints to store."), | |
) | |
parser.add_argument( | |
"--resume_from_checkpoint", | |
type=str, | |
default=None, | |
help=( | |
"Whether training should be resumed from a previous checkpoint. Use a path saved by" | |
' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' | |
), | |
) | |
parser.add_argument( | |
"--gradient_accumulation_steps", | |
type=int, | |
default=1, | |
help="Number of updates steps to accumulate before performing a backward/update pass.", | |
) | |
parser.add_argument( | |
"--gradient_checkpointing", | |
action="store_true", | |
help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", | |
) | |
parser.add_argument( | |
"--learning_rate", | |
type=float, | |
default=1e-4, | |
help="Initial learning rate (after the potential warmup period) to use.", | |
) | |
parser.add_argument( | |
"--scale_lr", | |
action="store_true", | |
default=False, | |
help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", | |
) | |
parser.add_argument( | |
"--lr_scheduler", | |
type=str, | |
default="constant", | |
help=( | |
'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' | |
' "constant", "constant_with_warmup"]' | |
), | |
) | |
parser.add_argument( | |
"--lr_warmup_steps", | |
type=int, | |
default=500, | |
help="Number of steps for the warmup in the lr scheduler.", | |
) | |
parser.add_argument( | |
"--lr_num_cycles", | |
type=int, | |
default=1, | |
help="Number of hard resets of the lr in cosine_with_restarts scheduler.", | |
) | |
parser.add_argument( | |
"--lr_power", | |
type=float, | |
default=1.0, | |
help="Power factor of the polynomial scheduler.", | |
) | |
parser.add_argument( | |
"--enable_slicing", | |
action="store_true", | |
default=False, | |
help="Whether or not to use VAE slicing for saving memory.", | |
) | |
parser.add_argument( | |
"--enable_tiling", | |
action="store_true", | |
default=False, | |
help="Whether or not to use VAE tiling for saving memory.", | |
) | |
parser.add_argument( | |
"--noised_image_dropout", | |
type=float, | |
default=0.05, | |
help="Image condition dropout probability when finetuning image-to-video.", | |
) | |
parser.add_argument( | |
"--ignore_learned_positional_embeddings", | |
action="store_true", | |
default=False, | |
help=( | |
"Whether to ignore the learned positional embeddings when training CogVideoX Image-to-Video. This setting " | |
"should be used when performing multi-resolution training, because CogVideoX-I2V does not support it " | |
"otherwise. Please read the comments in https://github.com/a-r-r-o-w/cogvideox-factory/issues/26 to understand why." | |
), | |
) | |
def _get_optimizer_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument( | |
"--optimizer", | |
type=lambda s: s.lower(), | |
default="adam", | |
choices=["adam", "adamw", "prodigy", "came"], | |
help=("The optimizer type to use."), | |
) | |
parser.add_argument( | |
"--use_8bit", | |
action="store_true", | |
help="Whether or not to use 8-bit optimizers from `bitsandbytes` or `bitsandbytes`.", | |
) | |
parser.add_argument( | |
"--use_4bit", | |
action="store_true", | |
help="Whether or not to use 4-bit optimizers from `torchao`.", | |
) | |
parser.add_argument( | |
"--use_torchao", action="store_true", help="Whether or not to use the `torchao` backend for optimizers." | |
) | |
parser.add_argument( | |
"--beta1", | |
type=float, | |
default=0.9, | |
help="The beta1 parameter for the Adam and Prodigy optimizers.", | |
) | |
parser.add_argument( | |
"--beta2", | |
type=float, | |
default=0.95, | |
help="The beta2 parameter for the Adam and Prodigy optimizers.", | |
) | |
parser.add_argument( | |
"--beta3", | |
type=float, | |
default=None, | |
help="Coefficients for computing the Prodigy optimizer's stepsize using running averages. If set to None, uses the value of square root of beta2.", | |
) | |
parser.add_argument( | |
"--prodigy_decouple", | |
action="store_true", | |
help="Use AdamW style decoupled weight decay.", | |
) | |
parser.add_argument( | |
"--weight_decay", | |
type=float, | |
default=1e-04, | |
help="Weight decay to use for optimizer.", | |
) | |
parser.add_argument( | |
"--epsilon", | |
type=float, | |
default=1e-8, | |
help="Epsilon value for the Adam optimizer and Prodigy optimizers.", | |
) | |
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") | |
parser.add_argument( | |
"--prodigy_use_bias_correction", | |
action="store_true", | |
help="Turn on Adam's bias correction.", | |
) | |
parser.add_argument( | |
"--prodigy_safeguard_warmup", | |
action="store_true", | |
help="Remove lr from the denominator of D estimate to avoid issues during warm-up stage.", | |
) | |
parser.add_argument( | |
"--use_cpu_offload_optimizer", | |
action="store_true", | |
help="Whether or not to use the CPUOffloadOptimizer from TorchAO to perform optimization step and maintain parameters on the CPU.", | |
) | |
parser.add_argument( | |
"--offload_gradients", | |
action="store_true", | |
help="Whether or not to offload the gradients to CPU when using the CPUOffloadOptimizer from TorchAO.", | |
) | |
def _get_configuration_args(parser: argparse.ArgumentParser) -> None: | |
parser.add_argument("--tracker_name", type=str, default=None, help="Project tracker name") | |
parser.add_argument( | |
"--push_to_hub", | |
action="store_true", | |
help="Whether or not to push the model to the Hub.", | |
) | |
parser.add_argument( | |
"--hub_token", | |
type=str, | |
default=None, | |
help="The token to use to push to the Model Hub.", | |
) | |
parser.add_argument( | |
"--hub_model_id", | |
type=str, | |
default=None, | |
help="The name of the repository to keep in sync with the local `output_dir`.", | |
) | |
parser.add_argument( | |
"--logging_dir", | |
type=str, | |
default="logs", | |
help="Directory where logs are stored.", | |
) | |
parser.add_argument( | |
"--allow_tf32", | |
action="store_true", | |
help=( | |
"Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see" | |
" https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" | |
), | |
) | |
parser.add_argument( | |
"--nccl_timeout", | |
type=int, | |
default=600, | |
help="Maximum timeout duration before which allgather, or related, operations fail in multi-GPU/multi-node training settings.", | |
) | |
parser.add_argument( | |
"--report_to", | |
type=str, | |
default=None, | |
help=( | |
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`' | |
' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' | |
), | |
) | |
def get_args(): | |
parser = argparse.ArgumentParser(description="Simple example of a training script for CogVideoX.") | |
_get_model_args(parser) | |
_get_dataset_args(parser) | |
_get_training_args(parser) | |
_get_validation_args(parser) | |
_get_optimizer_args(parser) | |
_get_configuration_args(parser) | |
return parser.parse_args() | |