1
+ set -x
2
+
3
+ HF_MODEL_PATH=Qwen/Qwen3-30B-A3B
4
+ DIST_CKPT_PATH=${DIST_CKPT_PATH}
5
+
6
+ python scripts/converter_hf_to_mcore.py --hf_model_path $HF_MODEL_PATH --output_path $DIST_CKPT_PATH
7
+
8
+ # If you are using vllm<=0.6.3, you might need to set the following environment variable to avoid bugs:
9
+ # export VLLM_ATTENTION_BACKEND=XFORMERS
10
+ export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping
11
+
12
+ python3 -m verl.trainer.main_ppo --config-path=config \
13
+ --config-name=' ppo_megatron_trainer.yaml' \
14
+ algorithm.adv_estimator=grpo \
15
+ data.train_files=$HOME /data/gsm8k/train.parquet \
16
+ data.val_files=$HOME /data/gsm8k/test.parquet \
17
+ data.train_batch_size=64 \
18
+ data.max_prompt_length=1024 \
19
+ data.max_response_length=2048 \
20
+ data.filter_overlong_prompts=True \
21
+ data.truncation=' error' \
22
+ actor_rollout_ref.model.path= \
23
+ actor_rollout_ref.actor.optim.lr=1e-6 \
24
+ actor_rollout_ref.actor.ppo_mini_batch_size=64 \
25
+ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
26
+ actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \
27
+ actor_rollout_ref.actor.megatron.tensor_model_parallel_size=4 \
28
+ actor_rollout_ref.actor.megatron.expert_model_parallel_size=4 \
29
+ actor_rollout_ref.actor.megatron.use_dist_checkpointing=True \
30
+ actor_rollout_ref.actor.megatron.dist_checkpointing_path=$DIST_CKPT_PATH \
31
+ actor_rollout_ref.actor.use_kl_loss=True \
32
+ actor_rollout_ref.actor.kl_loss_coef=0.001 \
33
+ actor_rollout_ref.actor.kl_loss_type=low_var_kl \
34
+ actor_rollout_ref.actor.entropy_coeff=0 \
35
+ actor_rollout_ref.model.enable_gradient_checkpointing=True \
36
+ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
37
+ actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
38
+ actor_rollout_ref.rollout.name=vllm \
39
+ actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
40
+ actor_rollout_ref.rollout.n=5 \
41
+ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
42
+ actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=2 \
43
+ actor_rollout_ref.ref.megatron.tensor_model_parallel_size=4 \
44
+ actor_rollout_ref.ref.megatron.expert_model_parallel_size=4 \
45
+ actor_rollout_ref.ref.megatron.use_dist_checkpointing=True \
46
+ actor_rollout_ref.ref.megatron.dist_checkpointing_path=$DIST_CKPT_PATH \
47
+ algorithm.use_kl_in_reward=False \
48
+ trainer.critic_warmup=0 \
49
+ trainer.logger=[' console' ,' wandb' ] \
50
+ trainer.project_name=' verl_grpo_example_gsm8k_math' \
51
+ trainer.experiment_name=' qwen3_30b_moe_megatron' \
52
+ trainer.n_gpus_per_node=8 \
53
+ trainer.nnodes=4 \
54
+ trainer.save_freq=20 \
55
+ trainer.test_freq=5 \
56
+ trainer.total_epochs=15 $@
0 commit comments