data: train_bs: 1 train_width: 512 train_height: 512 meta_paths: - "./data/fashion_meta.json" sample_rate: 4 n_sample_frames: 24 solver: gradient_accumulation_steps: 1 mixed_precision: 'fp16' enable_xformers_memory_efficient_attention: True gradient_checkpointing: True max_train_steps: 10000 max_grad_norm: 1.0 # lr learning_rate: 1e-5 scale_lr: False lr_warmup_steps: 1 lr_scheduler: 'constant' # optimizer use_8bit_adam: True adam_beta1: 0.9 adam_beta2: 0.999 adam_weight_decay: 1.0e-2 adam_epsilon: 1.0e-8 val: validation_steps: 20 noise_scheduler_kwargs: num_train_timesteps: 1000 beta_start: 0.00085 beta_end: 0.012 beta_schedule: "linear" steps_offset: 1 clip_sample: false base_model_path: './pretrained_weights/stable-diffusion-v1-5' vae_model_path: './pretrained_weights/sd-vae-ft-mse' image_encoder_path: './pretrained_weights/sd-image-variations-diffusers/image_encoder' mm_path: './pretrained_weights/mm_sd_v15_v2.ckpt' weight_dtype: 'fp16' # [fp16, fp32] uncond_ratio: 0.1 noise_offset: 0.05 snr_gamma: 5.0 enable_zero_snr: True stage1_ckpt_dir: './exp_output/stage1' stage1_ckpt_step: 980 seed: 12580 resume_from_checkpoint: '' checkpointing_steps: 2000 exp_name: 'stage2' output_dir: './exp_output'