pretrained_model_path: checkpoints/CompVis/stable-diffusion-v1-4 output_dir: /home/user/app/experiments/talking-man train_data: video_path: /tmp/gradio/f79f070ba10b1b7872faa157ada00e09644ef2f2/Untitled Video.mp4 prompt: A person of East Asian ethnicity standing in a well-lit office environment with large windows, speaking directly to the camera, wearing a white shirt, and conveying a calm and professional demeanor. The background is slightly blurred, showcasing greenery and modern office decor, with minimal distractions n_sample_frames: 8 width: 512 height: 512 sample_start_idx: 0 sample_frame_rate: 1 validation_data: prompts: - A person of East Asian descent is standing in a bright office setting with large windows, addressing the camera directly. They are dressed in a white shirt and project a calm, professional demeanor. The background is slightly out of focus, highlighting greenery and modern office furnishings, with minimal visual distractions video_length: 8 width: 512 height: 512 num_inference_steps: 50 guidance_scale: 7.5 validation_steps: 100 trainable_modules: - attn1.to_q - attn2.to_q - attn_temp train_batch_size: 1 max_train_steps: 300 learning_rate: 3.5e-05 scale_lr: false lr_scheduler: constant lr_warmup_steps: 0 adam_beta1: 0.9 adam_beta2: 0.999 adam_weight_decay: 0.01 adam_epsilon: 1.0e-08 max_grad_norm: 1.0 gradient_accumulation_steps: 1 gradient_checkpointing: true checkpointing_steps: 1000 resume_from_checkpoint: null mixed_precision: fp16 use_8bit_adam: false enable_xformers_memory_efficient_attention: true seed: 31118