defaults: - ../config/deepspeed_zero@_here_ - ../config/deepspeed_zero2@_here_ - ../config/deepspeed_zero2_cpuoffload@_here_ - ../config/deepspeed_zero3@_here_ - ../config/deepspeed_zero3_cpuoffload@_here_ hydra: run: dir: . output_subdir: null exp_name: "reward_fl_zero2_cpuoffload" seed: 42 logging_dir: ./output/logs output_dir: ./output checkpoint_config: type: file_system output_dir: /data/models/reward_fl/ save_steps: 25 logging_steps: 1 resume_from_checkpoint: false sequence_length: 1024 train_batch_size: 8 max_grad_norm: 1.0 actor_train: model_args: model_type: diffusion_module dtype: bf16 model_config_kwargs: model_name: wan2_2 model_paths: ./examples/wan2.2-14B-reward_fl_ds/wan22_paths.json reward_model_path: /data/models/antelopev2/ tokenizer_path: /data/models/Wan-AI/Wan2.1-T2V-1.3B/google/umt5-xxl/ model_id_with_origin_paths: null trainable_models: dit2 use_gradient_checkpointing_offload: true extra_inputs: input_image max_timestep_boundary: 1.0 min_timestep_boundary: 0.9 num_inference_steps: 8 mid_timestep: 4 final_timestep: 7 lora_base_model: dit2 lora_target_modules: q,k,v,o,ffn.0,ffn.2 lora_rank: 32 training_args: learning_rate: 2.5e-6 lr_scheduler_type: constant per_device_train_batch_size: 1 gradient_accumulation_steps: 1 warmup_steps: 10 num_train_epochs: 1 data_args: file_name: ./data/example_video_dataset/metadata.csv preprocessing_num_workers: 2 strategy_args: strategy_name: diffusion_deepspeed_train strategy_config: ${deepspeed_zero2_cpuoffload} device_mapping: list(range(0,8)) system_envs: RAY_PROFILING: "0"