sgl-project · johnnynunez · Oct 7, 2025 · Oct 7, 2025 · Oct 7, 2025 · Oct 7, 2025
@@ -184,3 +184,171 @@ jobs:
           git add -A
           git commit -m "update whl index"
           git push
+
+  build-cu130:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: x64-kernel-build-node
+    strategy:
+      matrix:
+        python-version: [ "3.10" ]
+        cuda-version: [ "13.0" ]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Build wheels
+        run: |
+          cd sgl-kernel
+          chmod +x ./build.sh
+          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
+
+      - name: Upload to PyPI
+        working-directory: sgl-kernel
+        run: |
+          pip install twine
+          python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
+          path: sgl-kernel/dist/*
+
+  release-cu130:
+    needs: build-cu130
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - name: Set tag name
+        id: set_tag_name
+        run: |
+          if [ -z "${{ inputs.tag_name }}" ]; then
+            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
+            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
+          else
+            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
+          repository: sgl-project/whl
+          token: ${{ secrets.WHL_TOKEN }}
+          files: |
+            sgl-kernel/dist/*
+
+      - name: Clone wheel index
+        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
+        env:
+          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
+
+      - name: Update wheel index
+        run: python3 scripts/update_kernel_whl_index.py --cuda 130
+
+      - name: Push wheel index
+        run: |
+          cd sgl-whl
+          git config --local user.name "sglang-bot"
+          git config --local user.email "[email protected]"
+          git add -A
+          git commit -m "update whl index"
+          git push
+
+  build-cu130-aarch64:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: arm-kernel-build-node
+    strategy:
+      matrix:
+        python-version: [ "3.10" ]
+        cuda-version: [ "13.0" ]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Build wheels
+        run: |
+          cd sgl-kernel
+          chmod +x ./build.sh
+          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" aarch64
+
+      - name: Upload to PyPI
+        working-directory: sgl-kernel
+        run: |
+          pip install twine
+          python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
+          path: sgl-kernel/dist/*
+
+  release-cu130-aarch64:
+    needs: build-cu130-aarch64
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - name: Set tag name
+        id: set_tag_name
+        run: |
+          if [ -z "${{ inputs.tag_name }}" ]; then
+            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
+            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
+          else
+            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
+          repository: sgl-project/whl
+          token: ${{ secrets.WHL_TOKEN }}
+          files: |
+            sgl-kernel/dist/*
+
+      - name: Clone wheel index
+        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
+        env:
+          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
+
+      - name: Update wheel index
+        run: python3 scripts/update_kernel_whl_index.py --cuda 130
+
+      - name: Push wheel index
+        run: |
+          cd sgl-whl
+          git config --local user.name "sglang-bot"
+          git config --local user.email "[email protected]"
+          git add -A
+          git commit -m "update whl index"
+          git push
@@ -1,4 +1,4 @@
-ARG CUDA_VERSION=12.9.1
+ARG CUDA_VERSION=13.0.1
 FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
 ARG TARGETARCH
 
@@ -83,12 +83,13 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
       12.6.1) CUINDEX=126 ;; \
       12.8.1) CUINDEX=128 ;; \
       12.9.1) CUINDEX=129 ;; \
+      13.0.1) CUINDEX=130 ;; \
       *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
     esac \
  && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
      python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION}/sgl_kernel-${SGL_KERNEL_VERSION}+cu124-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
    fi \
-&& if [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ]; then \
+&& if [ "$CUDA_VERSION" = "12.8.1" ] || [ "$CUDA_VERSION" = "12.9.1" ] || [ "$CUDA_VERSION" = "13.0.1" ]; then \
      python3 -m pip install --no-cache-dir sgl-kernel==${SGL_KERNEL_VERSION} ; \
    fi \
  && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
@@ -130,7 +131,7 @@ RUN cd /sgl-workspace/DeepEP && \
       12.6.1) \
         CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \
         ;; \
-      12.8.1|12.9.1) \
+      12.8.1|12.9.1|13.0.1) \
         CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' \
         ;; \
       *) \

@@ -46,7 +46,7 @@ dependencies = [
   "py-spy",
   "pybase64",
   "pydantic",
-  "pynvml",
+  "nvidia-ml-py",
   "python-multipart",
   "pyzmq>=25.1.2",
   "requests",
@@ -57,10 +57,10 @@ dependencies = [
   "soundfile==0.13.1",
   "tiktoken",
   "timm==1.0.16",
-  "torch==2.8.0",
+  "torch>=2.8.0",
   "torch_memory_saver==0.0.9rc2",
-  "torchao==0.9.0",
-  "torchaudio==2.8.0",
+  "torchao==0.13.0",
+  "torchaudio>=2.8.0",
   "torchvision",
   "tqdm",
   "transformers==4.57.0",
@@ -72,7 +72,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-decord = ["decord"]
+decord = ["decord2"]
 test = [
   "accelerate",
   "expecttest",

diff --git a/python/pyproject_other.toml b/python/pyproject_other.toml
@@ -41,7 +41,7 @@ runtime_common = [
     "psutil",
     "pybase64",
     "pydantic",
-    "pynvml",
+    "nvidia-ml-py",
     "python-multipart",
     "pyzmq>=25.1.2",
     "scipy",

diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
@@ -409,7 +409,11 @@ def get_available_gpu_memory(
 
         if empty_cache:
             torch.cuda.empty_cache()
-        free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
+        shared_sysmem_device_mem_sms = (87, 110, 121)  # Orin, Thor, Spark
+        if get_device_sm() in shared_sysmem_device_mem_sms:
+            free_gpu_memory = psutil.virtual_memory().available
+        else:
+            free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
 
     elif device == "xpu":
         num_gpus = torch.xpu.device_count()

diff --git a/scripts/ci/ci_install_deepep.sh b/scripts/ci/ci_install_deepep.sh
@@ -4,7 +4,7 @@ set -euxo pipefail
 
 bash scripts/ci/ci_install_dependency.sh
 
-export GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
+export GDRCOPY_HOME=/usr/src/gdrdrv-2.5.1/
 export NVSHMEM_DIR=/opt/nvshmem/install
 export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH"
 export PATH="${NVSHMEM_DIR}/bin:$PATH"
@@ -23,9 +23,9 @@ rm -rf /opt/gdrcopy && mkdir -p /opt/gdrcopy
 rm -rf /opt/nvshmem && mkdir -p /opt/nvshmem
 cd /opt/gdrcopy
 git clone https://github.com/NVIDIA/gdrcopy.git .
-git checkout v2.4.4
+git checkout v2.5.1
 apt update
-apt install -y nvidia-dkms-535
+apt install -y nvidia-dkms-580
 apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
 apt install -y check libsubunit0 libsubunit-dev python3-venv
 cd packages
@@ -42,8 +42,8 @@ apt-get update && apt-get install -y libfabric-dev
 
 # Install NVSHMEM
 cd /opt/nvshmem
-wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
-tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz
+wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.4.5/source/nvshmem_src_cuda12-all-all-3.4.5.tar.gz
+tar -xf nvshmem_src_cuda12-all-all-3.4.5.tar.gz
 mv nvshmem_src nvshmem && cd nvshmem
 NVSHMEM_SHMEM_SUPPORT=0 \
 NVSHMEM_UCX_SUPPORT=0 \
@@ -53,7 +53,7 @@ NVSHMEM_IBGDA_SUPPORT=1 \
 NVSHMEM_PMIX_SUPPORT=0 \
 NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
 NVSHMEM_USE_GDRCOPY=1 \
-cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/opt/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90
+cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/opt/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES="90;100;103;121"
 cd build
 make -j$(nproc) install
 

diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh
@@ -3,7 +3,7 @@
 set -euxo pipefail
 
 IS_BLACKWELL=${IS_BLACKWELL:-0}
-CU_VERSION="cu128"
+CU_VERSION="cu129"
 
 # Kill existing processes
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
@@ -64,7 +64,7 @@ $PIP_CMD install mooncake-transfer-engine==0.3.6.post1 nvidia-cuda-nvrtc-cu12 py
 
 if [ "$IS_BLACKWELL" != "1" ]; then
     # For lmms_evals evaluating MMMU
-    git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
+    git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
     $PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
 
     # Install xformers

@@ -90,7 +90,7 @@ FetchContent_Populate(repo-flashinfer)
 FetchContent_Declare(
     repo-flash-attention
     GIT_REPOSITORY https://github.com/sgl-project/sgl-attn
-    GIT_TAG        f9af0c2a1d82ab1812e6987e9338363cc2bf0f8d
+    GIT_TAG        sgl-kernel
     GIT_SHALLOW    OFF
 )
 FetchContent_Populate(repo-flash-attention)
@@ -99,7 +99,7 @@ FetchContent_Populate(repo-flash-attention)
 FetchContent_Declare(
     repo-flash-attention-origin
     GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
-    GIT_TAG        203b9b3dba39d5d08dffb49c09aa622984dff07d
+    GIT_TAG        c485eeade0c3ec9ce186c3640c52c9f1ce090b81
     GIT_SHALLOW    OFF
 )
 FetchContent_Populate(repo-flash-attention-origin)
@@ -218,26 +218,37 @@ if (ENABLE_BELOW_SM90)
         "-gencode=arch=compute_80,code=sm_80"
         "-gencode=arch=compute_89,code=sm_89"
     )
+    if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+        list(APPEND SGL_KERNEL_CUDA_FLAGS
+            "-gencode=arch=compute_87,code=sm_87"
+        )
+    endif()
+
 endif()
 
 if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
     list(APPEND SGL_KERNEL_CUDA_FLAGS
         "-gencode=arch=compute_100a,code=sm_100a"
         "-gencode=arch=compute_120a,code=sm_120a"
     )
-
     # refer sm_121, sm_110 and sm_101 description  https://github.com/pytorch/pytorch/pull/156176
     if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "13.0")
         list(APPEND SGL_KERNEL_CUDA_FLAGS
             "-gencode=arch=compute_103a,code=sm_103a"
-            "-gencode=arch=compute_110a,code=sm_110a"
-            "-gencode=arch=compute_121a,code=sm_121a"
             "--compress-mode=size"
         )
+        if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            list(APPEND SGL_KERNEL_CUDA_FLAGS
+                "-gencode=arch=compute_110a,code=sm_110a"
+                "-gencode=arch=compute_121a,code=sm_121a"
+            )
+        endif()
     else()
-        list(APPEND SGL_KERNEL_CUDA_FLAGS
-            "-gencode=arch=compute_101a,code=sm_101a"
-        )
+        if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            list(APPEND SGL_KERNEL_CUDA_FLAGS
+                "-gencode=arch=compute_101a,code=sm_101a"
+            )
+        endif()
     endif()
 endif()
 

@@ -20,7 +20,10 @@ else
    BUILDER_NAME="pytorch/manylinux2_28-builder"
 fi
 
-if [ ${CUDA_VERSION} = "12.9" ]; then
+if [ ${CUDA_VERSION} = "13.0" ]; then
+   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
+   TORCH_INSTALL="pip install --no-cache-dir torch==2.9.0 --index-url https://download.pytorch.org/whl/test/cu130"
+elif [ ${CUDA_VERSION} = "12.9" ]; then
    DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
    TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu129"
 elif [ ${CUDA_VERSION} = "12.8" ]; then
@@ -67,6 +70,8 @@ docker run --rm \
    export CUDA_VERSION=${CUDA_VERSION} && \
    mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
    ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
+   export CPLUS_INCLUDE_PATH=/usr/local/cuda/include/cccl${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}} && \
+   export C_INCLUDE_PATH=/usr/local/cuda/include/cccl${C_INCLUDE_PATH:+:${C_INCLUDE_PATH}} && \
 
    cd /sgl-kernel && \
    ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \