volcengine
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 8 additions & 1 deletion b/‎.github/CODEOWNERS‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/workflows/e2e_eval_aime24.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_eval_aime24.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_eval_aime24.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_eval_aime24.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/.deprecate/e2e_ppo_trainer.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/.deprecate/e2e_ppo_trainer.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_spin.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_spin.yml‎
Lines changed: 38 additions & 5 deletions b/‎.github/workflows/e2e_spin.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_spin.yml‎
Lines changed: 38 additions & 5 deletions
diff --git a/‎.github/workflows/e2e_sppo.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_sppo.yml‎
Lines changed: 39 additions & 5 deletions b/‎.github/workflows/e2e_sppo.yml‎ renamed to ‎.github/workflows/.deprecate/e2e_sppo.yml‎
Lines changed: 39 additions & 5 deletions
diff --git a/‎.github/workflows/README.md‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/README.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/workflows/checkpoint_converter.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/checkpoint_converter.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/e2e_ascend.yml‎
Lines changed: 21 additions & 26 deletions b/‎.github/workflows/e2e_ascend.yml‎
Lines changed: 21 additions & 26 deletions
@@ -1,22 +1,29 @@
 /docs @eric-haibin-lin @zhaochenyang20 @hongpeng-guo
 /docs/amd_tutorial @yushengsu-thu
 /docs/slang_multiturn @zhaochenyang20 @SwordFaith
+/docs/ascend_tutorial @FightingZhen
 
 /recipe/dapo @tongyx361 @PeterSH6 @vermouth1992 @tardis-key @FightingZhen @ji-huazhong
 /recipe/spin @zhaochenyang20
 /recipe/sppo @zhaochenyang20
 
 /third_party/sglang @zhaochenyang20 @SwordFaith
 /third_party/vllm @PeterSH6 @wuxibin89
+
 /examples/grpo_trainer @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
+
 /verl/single_controller @zw0610 @wuxibin89 @hongpeng-guo
 /verl/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
+/verl/models/mcore @ISEEKYAN @vermouth1992
+/verl/models/transformers @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
 /verl/workers/engine @eric-haibin-lin @vermouth1992 @ZihengJiang
 /verl/workers/roles @eric-haibin-lin @vermouth1992 @ZihengJiang
 /verl/workers/engine/fsdp @eric-haibin-lin @vermouth1992 @ZihengJiang
 /verl/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
 /verl/workers/rollout/sglang_rollout @zhaochenyang20 @SwordFaith @chenhaiq
-/verl/models/transformers @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
+/verl/workers/actor/megatron_actor.py @ISEEKYAN @vermouth1992
+/verl/workers/critic/megatron_critic.py @ISEEKYAN @vermouth1992
+/verl/workers/megatron_workers.py @ISEEKYAN @vermouth1992
 
 /tests/single_controller @zw0610 @wuxibin89
 /tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
 
@@ -124,7 +124,7 @@ jobs:
       - name: Install the current repository
         run: |
           pip3 install --no-deps -e .[test,gpu,math]
-          pip3 install math-verify
+          pip3 install math-verify transformers==4.56.2
       - name: Prepare aime24 dataset
         run: |
           ray stop --force
 
@@ -77,7 +77,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -110,7 +110,7 @@ jobs:
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -75,7 +75,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
 
@@ -52,24 +52,41 @@ on:
 permissions:
   contents: read
 
+env:
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
+  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+
 # Cancel jobs on the same ref if a new one is triggered
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 jobs:
+  setup:
+    if: github.repository_owner == 'volcengine'
+    runs-on: ubuntu-latest
+    outputs:
+      runner-label: ${{ steps.create-runner.outputs.runner-label }}
+      mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+    steps:
+      - uses: actions/checkout@v4
+      - id: create-runner
+        uses: volcengine/vemlp-github-runner@v1
+        with:
+          mode: "create"
+          faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+          mlp-image: "${{ env.IMAGE }}"
+
   e2e_spin:
-    runs-on: [L20x8]
+    needs: setup
+    runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
-    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
-      options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -79,8 +96,24 @@ jobs:
           pip3 install -e .[test,gpu,sglang]
       - name: Prepare GSM8K dataset
         run: |
-          python3 examples/data_preprocess/gsm8k.py
+          python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
       - name: Running the E2E test with the spin algorithm
         run: |
           ray stop --force
           bash tests/special_e2e/run_spin.sh
+
+  cleanup:
+    runs-on: ubuntu-latest
+    needs:
+      [
+        setup,
+        e2e_spin
+      ]
+    if: always()
+    steps:
+      - id: destroy-runner
+        uses: volcengine/vemlp-github-runner@v1
+        with:
+          mode: "destroy"
+          faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+          mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
@@ -55,19 +55,37 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
+env:
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
+  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
+  TRANSFORMERS_VERSION: "4.56.2"
+
 jobs:
+  setup:
+    if: github.repository_owner == 'volcengine'
+    runs-on: ubuntu-latest
+    outputs:
+      runner-label: ${{ steps.create-runner.outputs.runner-label }}
+      mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
+    steps:
+      - uses: actions/checkout@v4
+      - id: create-runner
+        uses: volcengine/vemlp-github-runner@v1
+        with:
+          mode: "create"
+          faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+          mlp-image: "${{ env.IMAGE }}"
+
   e2e_sppo:
-    runs-on: [L20x8]
+    needs: setup
+    runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
-    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
-      options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
@@ -77,8 +95,24 @@ jobs:
           pip3 install -e .[test,gpu,sglang]
       - name: Prepare MATH dataset
         run: |
-          python3 examples/data_preprocess/math_dataset.py
+          python3 examples/data_preprocess/math_dataset.py --local_dataset_path $HOME/models/hf_data/DigitalLearningGmbH/MATH-lighteval
       - name: Running the E2E test with the SPPO algorithm
         run: |
           ray stop --force
           bash tests/special_e2e/run_sppo.sh
+
+  cleanup:
+    runs-on: ubuntu-latest
+    needs:
+      [
+        setup,
+        e2e_sppo
+      ]
+    if: always()
+    steps:
+      - id: destroy-runner
+        uses: volcengine/vemlp-github-runner@v1
+        with:
+          mode: "destroy"
+          faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
+          mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
@@ -66,4 +66,8 @@ jobs:
         with:
           mode: "destroy"
           faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
-          task-id: "${{ needs.setup.outputs.task-id }}"
+          task-id: "${{ needs.setup.outputs.task-id }}"
+```
+
+### Model and Dataset
+To avoid CI relies on network, we pre-download dataset on a NFS on the CI machine. The path for models are \${HOME}/models and the path for dataset is \${HOME}/models/hf_data.
@@ -81,7 +81,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
     container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -92,8 +92,8 @@ jobs:
           pip3 install -e .[test]
       - name: Download Model to Use
         run: |
-          huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
-          huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct --local-dir ${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct
+#          huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
+#          huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct --local-dir ${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct
           export HF_HUB_OFFLINE=1
       - name: Running Huggingface to Megatron dist_ckpt converter (Qwen/Qwen2.5-0.5B)
         run: |
@@ -116,7 +116,7 @@ jobs:
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
       HF_ENDPOINT: "https://hf-mirror.com"
     container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -127,7 +127,7 @@ jobs:
           pip3 install -e .[test]
       - name: Download Model to Use
         run: |
-          huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat --local-dir ${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat
+#          huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat --local-dir ${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat
           export HF_HUB_OFFLINE=1
       - name: Running Huggingface to Megatron dist_ckpt CPU converter (Qwen/Qwen1.5-MoE-A2.7B-Chat)
         run: |
 
@@ -43,22 +43,17 @@ on:
     branches:
       - main
     paths:
+      - ".github/workflows/e2e_ascend.yml"
       - "**/*.py"
+      - "docs/ascend_tutorial/**"
+      - "examples/**"
+      - "recipe/**"
+      - "tests/special_npu/**"
+      - "tests/special_sanity/**"
+      - "verl/**"
+      - "pyproject.toml"
       - "requirements-npu.txt"
-      # Other entrypoints
-      - "!examples/**"
-      - "!tests/**"
-      - "!verl/trainer/main_*.py"
-      - "!verl/trainer/fsdp_sft_trainer.py"
-      # Recipes
-      - "!recipe/**"
-      # Entrypoints
-      - ".github/workflows/e2e_ascend.yml"
-      - "examples/data_preprocess/gsm8k.py"
-      - "examples/data_preprocess/geo3k.py"
-      - "tests/special_e2e/ppo_trainer"
-      - "verl/trainer/main_ppo.py"
-      - "verl/trainer/config/ppo_trainer.yaml"
+      - "setup.py"
 
 # Cancel jobs on the same ref if a new one is triggered
 concurrency:
@@ -81,6 +76,8 @@ jobs:
         - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
         - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
         - /etc/ascend_install.info:/etc/ascend_install.info
+        - /data00/dataset:/github/home/dataset
+        - /data00/models:/github/home/models
         # Use self-host cache speed up pip and model download
         # - /home/action/actions-runner/_work/cache:/github/home/.cache/
       options: >-
@@ -109,20 +106,23 @@ jobs:
           pip3 install hf_transfer peft
           pip3 install -r requirements-npu.txt
           pip install -e .
-      - name: Install torchviison
+      - name: Install torchvision
         run: |
           pip install torchvision==0.20.1+cpu --index-url https://download.pytorch.org/whl/cpu
       - name: Uninstall Triton
         run: |
           pip uninstall -y triton
-      - name: Prepare gsm8k dataset
+      - name: Preprocess gsm8k dataset
         run: |
-          ray stop --force
-          python3 examples/data_preprocess/gsm8k.py
-      - name: Prepare geo3k dataset
+          python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/dataset/openai/gsm8k
+      - name: Preprocess geo3k dataset
+        run: |
+          python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/dataset/hiyouga/geometry3k
+      - name: Running gsm8k e2e qwen3 training tests with PPO on ASCEND NPU
         run: |
           ray stop --force
-          python3 examples/data_preprocess/geo3k.py
+          bash tests/special_npu/run_qwen3_06b_ppo.sh
+          rm -rf $HOME/ckpts
       - name: Running gsm8k e2e training tests with peft sft on ASCEND NPU
         run: |
           ray stop --force
@@ -143,11 +143,6 @@ jobs:
           ray stop --force
           bash tests/special_npu/run_qwen2_5_05b_dapo.sh
           rm -rf $HOME/ckpts
-      - name: Running gsm8k e2e qwen3 training tests with GRPO on ASCEND NPU
-        run: |
-          ray stop --force
-          bash tests/special_npu/run_qwen3_06b_grpo.sh
-          rm -rf $HOME/ckpts
       - name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
         run: |
           ray stop --force
@@ -157,4 +152,4 @@ jobs:
       - name: Running NPU profiling unit tests
         run: |
           ray stop --force
-          pytest -s -x tests/utils/test_special_mstx_profile.py
+          pytest -s -x tests/utils/test_special_mstx_profile.py