defaults: ../../grpo_math_1B.yaml grpo: num_prompts_per_step: 2 num_generations_per_prompt: 8 checkpointing: checkpoint_dir: results/grpo-nanov3-30BA3B-2n8g-megatron-lora policy: model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 tokenizer: name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 train_global_batch_size: 16 train_micro_batch_size: 1 logprob_batch_size: 1 max_total_sequence_length: 2048 dtensor_cfg: enabled: false megatron_cfg: enabled: true bias_activation_fusion: false tensor_model_parallel_size: 2 expert_model_parallel_size: 8 sequence_parallel: true peft: enabled: true dim: 128 alpha: 512 exclude_modules: ['*out_proj*'] # Exclude all out_proj modules. When NemotronHMamba2Mixer uses cuda_kernels_forward, out_proj LoRA has no gradient. sequence_packing: enabled: false generation: vllm_cfg: tensor_parallel_size: 4 gpu_memory_utilization: 0.7 logger: wandb_enabled: true tensorboard_enabled: true wandb: project: nemo-rl name: grpo-nanov3-30BA3B-2n8g-megatron-lora cluster: gpus_per_node: 8 num_nodes: 2