defaults: ../../grpo_math_1B.yaml grpo: num_prompts_per_step: 2 num_generations_per_prompt: 8 checkpointing: checkpoint_dir: results/grpo-nanov3-30BA3B-2n8g-fsdp2-lora policy: model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16 tokenizer: name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 train_global_batch_size: 16 train_micro_batch_size: 1 logprob_batch_size: 1 max_total_sequence_length: 2048 # TODO(automodel-issue): NemotronH needs force_hf=true with transformers v5 # to bypass custom backbone→model.model attribute mismatch in parallelizer. dtensor_cfg: automodel_kwargs: force_hf: true lora_cfg: enabled: true dim: 128 alpha: 512 exclude_modules: ['*out_proj*'] # Exclude all out_proj modules. When NemotronHMamba2Mixer uses cuda_kernels_forward, out_proj LoRA has no gradient. match_all_linear: false use_triton: false sequence_packing: enabled: false generation: vllm_cfg: tensor_parallel_size: 4 gpu_memory_utilization: 0.7 logger: wandb_enabled: true tensorboard_enabled: true wandb: project: nemo-rl name: grpo-nanov3-30BA3B-2n8g-fsdp2-lora cluster: gpus_per_node: 8 num_nodes: 2