defaults: ../../grpo_math_1B.yaml checkpointing: checkpoint_dir: results/grpo-qwen3.5-9b-1n8g-megatron policy: model_name: Qwen/Qwen3.5-9B-Base train_micro_batch_size: 1 logprob_batch_size: 1 max_total_sequence_length: 4096 dtensor_cfg: enabled: false sequence_packing: enabled: false megatron_cfg: enabled: true tensor_model_parallel_size: 4 sequence_parallel: true apply_rope_fusion: false activation_checkpointing: true defer_fp32_logits: true generation: vllm_cfg: tensor_parallel_size: 4 gpu_memory_utilization: 0.7 logger: wandb_enabled: true tensorboard_enabled: true wandb: project: nemo-rl name: grpo-qwen3.5-9b-1n8g-megatron cluster: gpus_per_node: 8