IntelPython
diff --git a/‎.github/workflows/cuda13.yml‎
Lines changed: 47 additions & 15 deletions b/‎.github/workflows/cuda13.yml‎
Lines changed: 47 additions & 15 deletions
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 36 additions & 54 deletions b/‎.github/workflows/main.yml‎
Lines changed: 36 additions & 54 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R-package/DESCRIPTION‎
Lines changed: 2 additions & 2 deletions b/‎R-package/DESCRIPTION‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R-package/R/utils.R‎
Lines changed: 1 addition & 0 deletions b/‎R-package/R/utils.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R-package/R/xgb.Booster.R‎
Lines changed: 1 addition & 0 deletions b/‎R-package/R/xgb.Booster.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R-package/R/xgb.train.R‎
Lines changed: 16 additions & 14 deletions b/‎R-package/R/xgb.train.R‎
Lines changed: 16 additions & 14 deletions
@@ -15,11 +15,21 @@ env:
 
 jobs:
   build-cuda13:
-    name: Build CUDA 13
+    name: Build CUDA 13 wheel for ${{ matrix.arch }}
     runs-on:
       - runs-on=${{ github.run_id }}
-      - runner=linux-amd64-cpu
-      - tag=cuda13-build-cuda13
+      - runner=${{ matrix.runner }}
+      - tag=cuda13-build-cuda13-${{ matrix.arch }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - arch: aarch64
+          runner: linux-arm64-cpu
+          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64
+        - arch: x86_64
+          runner: linux-amd64-cpu
+          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -29,20 +39,31 @@ jobs:
       - name: Log into Docker registry (AWS ECR)
         run: bash ops/pipeline/login-docker-registry.sh
       - run: |
-          bash ops/pipeline/build-cuda13.sh
+          bash ops/pipeline/build-cuda13.sh ${{ matrix.image_repo }} ${{ matrix.arch }}
       - name: Stash files
         run: |
           python3 ops/pipeline/manage-artifacts.py upload \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \
             build/testxgboost ./xgboost python-package/dist/*.whl
+
   test-cpp-cuda13:
-    name: Google Test (C++) with CUDA 13
+    name: Google Test (C++) with CUDA 13, arch ${{ matrix.arch }}
     needs: [build-cuda13]
     runs-on:
       - runs-on=${{ github.run_id }}
-      - runner=linux-amd64-gpu
-      - tag=cuda13-test-cpp-cuda13
+      - runner=${{ matrix.runner }}
+      - tag=cuda13-test-cpp-cuda13-${{ matrix.arch }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - arch: aarch64
+          runner: linux-arm64-gpu
+          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64
+        - arch: x86_64
+          runner: linux-amd64-gpu
+          image_repo: xgb-ci.gpu_build_cuda13_rockylinux8
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -55,19 +76,30 @@ jobs:
         run: |
           python3 ops/pipeline/manage-artifacts.py download \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \
             --dest-dir build \
             testxgboost
           chmod +x build/testxgboost
       - run: |
-          bash ops/pipeline/test-cpp-cuda13.sh
+          bash ops/pipeline/test-cpp-cuda13.sh ${{ matrix.image_repo }}
+
   test-python-cuda13:
-    name: Run Python tests with CUDA 13
+    name: Run Python tests with CUDA 13, arch ${{ matrix.arch }}
     needs: [build-cuda13]
     runs-on:
       - runs-on=${{ github.run_id }}
-      - runner=linux-amd64-gpu
-      - tag=cuda13-test-python-cuda13
+      - runner=${{ matrix.runner }}
+      - tag=cuda13-test-python-cuda13-${{ matrix.arch }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - arch: x86_64
+            runner: linux-amd64-gpu
+            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8
+          - arch: aarch64
+            runner: linux-arm64-gpu
+            image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -80,10 +112,10 @@ jobs:
         run: |
           python3 ops/pipeline/manage-artifacts.py download \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cuda13 \
+            --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \
             --dest-dir wheelhouse \
             *.whl xgboost
           mv -v wheelhouse/xgboost .
           chmod +x ./xgboost
       - name: Run Python tests
-        run: bash ops/pipeline/test-python-wheel-cuda13.sh
+        run: bash ops/pipeline/test-python-wheel-cuda13.sh ${{ matrix.image_repo }}
@@ -37,11 +37,21 @@ jobs:
             ./xgboost
 
   build-cuda:
-    name: Build CUDA + manylinux_2_28_x86_64 wheel
+    name: Build CUDA + manylinux_2_28_${{ matrix.arch }} wheel
     runs-on:
       - runs-on=${{ github.run_id }}
-      - runner=linux-amd64-cpu
-      - tag=main-build-cuda
+      - runner=${{ matrix.runner }}
+      - tag=main-build-cuda-${{ matrix.arch }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - arch: aarch64
+          runner: linux-arm64-cpu
+          image_repo: xgb-ci.gpu_build_rockylinux8_aarch64
+        - arch: x86_64
+          runner: linux-amd64-cpu
+          image_repo: xgb-ci.gpu_build_rockylinux8
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -51,12 +61,12 @@ jobs:
       - name: Log into Docker registry (AWS ECR)
         run: bash ops/pipeline/login-docker-registry.sh
       - run: |
-          bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 disable-rmm
+          bash ops/pipeline/build-cuda.sh ${{ matrix.image_repo }} ${{ matrix.arch }} disable-rmm
       - name: Stash files
         run: |
           python3 ops/pipeline/manage-artifacts.py upload \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-cuda \
+            --prefix cache/${{ github.run_id }}/build-cuda-${{ matrix.arch }} \
             build/testxgboost ./xgboost python-package/dist/*.whl
 
   build-cuda-with-rmm:
@@ -74,54 +84,14 @@ jobs:
       - name: Log into Docker registry (AWS ECR)
         run: bash ops/pipeline/login-docker-registry.sh
       - run: |
-          bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 enable-rmm
+          bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 x86_64 enable-rmm
       - name: Stash files
         run: |
           python3 ops/pipeline/manage-artifacts.py upload \
             --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
             --prefix cache/${{ github.run_id }}/build-cuda-with-rmm \
             build/testxgboost
 
-  build-cuda-with-rmm-dev:
-    name: Build CUDA with RMM (dev)
-    runs-on:
-      - runs-on=${{ github.run_id }}
-      - runner=linux-amd64-cpu
-      - tag=main-build-cuda-with-rmm-dev
-    steps:
-      # Restart Docker daemon so that it recognizes the ephemeral disks
-      - run: sudo systemctl restart docker
-      - uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Log into Docker registry (AWS ECR)
-        run: bash ops/pipeline/login-docker-registry.sh
-      - run: |
-          bash ops/pipeline/build-cuda.sh \
-            xgb-ci.gpu_build_rockylinux8_dev_ver enable-rmm
-
-  build-python-wheels-arm64:
-    name: Build manylinux_2_28_aarch64 wheel
-    runs-on:
-      - runs-on=${{ github.run_id }}
-      - runner=linux-arm64-cpu
-      - tag=build-python-wheels-arm64
-    steps:
-      # Restart Docker daemon so that it recognizes the ephemeral disks
-      - run: sudo systemctl restart docker
-      - uses: actions/checkout@v4
-        with:
-          submodules: "true"
-      - name: Log into Docker registry (AWS ECR)
-        run: bash ops/pipeline/login-docker-registry.sh
-      - run: bash ops/pipeline/build-python-wheels-arm64.sh
-      - name: Stash files
-        run: |
-          python3 ops/pipeline/manage-artifacts.py upload \
-            --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \
-            --prefix cache/${{ github.run_id }}/build-python-wheels-arm64 \
-            ./xgboost python-package/dist/*.whl
-
   build-python-wheels-cpu:
     name: Build CPU wheel for ${{ matrix.manylinux_target }}_${{ matrix.arch }}
     runs-on:
@@ -190,13 +160,20 @@ jobs:
         include:
           - suite: gpu
             runner: linux-amd64-gpu
-            artifact_from: build-cuda
+            image_repo: xgb-ci.gpu
+            artifact_from: build-cuda-x86_64
+          - suite: gpu
+            runner: linux-arm64-gpu
+            image_repo: xgb-ci.gpu_aarch64
+            artifact_from: build-cuda-aarch64
           - suite: gpu-rmm
             runner: linux-amd64-gpu
+            image_repo: xgb-ci.gpu
             artifact_from: build-cuda-with-rmm
           - suite: mgpu
             runner: linux-amd64-mgpu
-            artifact_from: build-cuda
+            image_repo: xgb-ci.gpu
+            artifact_from: build-cuda-x86_64
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
@@ -213,11 +190,11 @@ jobs:
             --dest-dir build \
             testxgboost
           chmod +x build/testxgboost
-      - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }}
+      - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.image_repo }} ${{ matrix.suite }}
 
   test-python-wheel:
     name: Run Python tests (${{ matrix.description }})
-    needs: [build-cuda, build-python-wheels-arm64]
+    needs: [build-cuda]
     runs-on:
       - runs-on
       - runner=${{ matrix.runner }}
@@ -232,22 +209,27 @@ jobs:
             image_repo: xgb-ci.gpu
             suite: gpu
             runner: linux-amd64-gpu
-            artifact_from: build-cuda
+            artifact_from: build-cuda-x86_64
           - description: multiple-gpu
             image_repo: xgb-ci.gpu
             suite: mgpu
             runner: linux-amd64-mgpu
-            artifact_from: build-cuda
+            artifact_from: build-cuda-x86_64
           - description: cpu-amd64
             image_repo: xgb-ci.cpu
             suite: cpu
             runner: linux-amd64-cpu
-            artifact_from: build-cuda
+            artifact_from: build-cuda-x86_64
           - description: cpu-arm64
             image_repo: xgb-ci.manylinux_2_28_aarch64
             suite: cpu-arm64
             runner: linux-arm64-cpu
-            artifact_from: build-python-wheels-arm64
+            artifact_from: build-cuda-aarch64
+          - description: gpu-arm64
+            image_repo: xgb-ci.gpu_aarch64
+            suite: gpu-arm64
+            runner: linux-arm64-gpu
+            artifact_from: build-cuda-aarch64
     steps:
       # Restart Docker daemon so that it recognizes the ephemeral disks
       - run: sudo systemctl restart docker
 
@@ -4,7 +4,7 @@ if(PLUGIN_SYCL)
   string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
-project(xgboost LANGUAGES CXX C VERSION 3.1.2)
+project(xgboost LANGUAGES CXX C VERSION 3.1.3)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 
 
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 3.1.2.1
-Date: 2025-11-20
+Version: 3.1.3.1
+Date: 2026-01-05
 Authors@R: c(
   person("Tianqi", "Chen", role = c("aut"),
          email = "[email protected]"),
 
@@ -482,6 +482,7 @@ xgb.createFolds <- function(y, k) {
 #' \url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' data(agaricus.train, package = "xgboost")
 #'
 #' bst <- xgb.train(
 
@@ -252,6 +252,7 @@ xgb.get.handle <- function(object) {
 #'   \url{https://arxiv.org/abs/1706.06060}
 #'
 #' @examples
+#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
 #' ## binary classification:
 #'
 #' data(agaricus.train, package = "xgboost")
 
@@ -338,7 +338,9 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
   )
 
   # the main loop for boosting iterations
-  for (iteration in begin_iteration:end_iteration) {
+  # FIX: Handle nrounds=0 to prevent 1:0 sequence and ensure 'iteration' is defined
+  if (nrounds == 0) iteration <- end_iteration
+  for (iteration in seq(from = begin_iteration, length.out = nrounds)) {
 
     .execute.cb.before.iter(
       callbacks,
@@ -458,11 +460,11 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' See [Survival Analysis with Accelerated Failure Time](https://xgboost.readthedocs.io/en/latest/tutorials/aft_survival_analysis.html) for details.
 #' - `"multi:softmax"`: set XGBoost to do multiclass classification using the softmax objective, you also need to set num_class(number of classes)
 #' - `"multi:softprob"`: same as softmax, but output a vector of `ndata * nclass`, which can be further reshaped to `ndata * nclass` matrix. The result contains predicted probability of each data point belonging to each class.
-#' - `"rank:ndcg"`: Use LambdaMART to perform pair-wise ranking where [Normalized Discounted Cumulative Gain (NDCG)](https://en.wikipedia.org/wiki/NDCG) is maximized. This objective supports position debiasing for click data.
-#' - `"rank:map"`: Use LambdaMART to perform pair-wise ranking where [Mean Average Precision (MAP)](https://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision) is maximized
+#' - `"rank:ndcg"`: Use LambdaMART to perform pair-wise ranking where the normalized discounted cumulative gain (NDCG) is maximized. This objective supports position debiasing for click data.
+#' - `"rank:map"`: Use LambdaMART to perform pair-wise ranking where the mean average precision (MAP) is maximized
 #' - `"rank:pairwise"`: Use LambdaRank to perform pair-wise ranking using the `ranknet` objective.
-#' - `"reg:gamma"`: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be [gamma-distributed](https://en.wikipedia.org/wiki/Gamma_distribution#Occurrence_and_applications).
-#' - `"reg:tweedie"`: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be [Tweedie-distributed](https://en.wikipedia.org/wiki/Tweedie_distribution#Occurrence_and_applications).
+#' - `"reg:gamma"`: gamma regression with log-link. Output is a mean of gamma distribution. It might be useful, e.g., for modeling insurance claims severity, or for any outcome that might be gamma-distributed.
+#' - `"reg:tweedie"`: Tweedie regression with log-link. It might be useful, e.g., for modeling total loss in insurance, or for any outcome that might be tweedie-distributed.
 #' @param verbosity (default=1)
 #' Verbosity of printing messages. Valid values are 0 (silent), 1 (warning), 2 (info), 3
 #' (debug). Sometimes XGBoost tries to change configurations based on heuristics, which
@@ -613,31 +615,31 @@ xgb.train <- function(params = xgb.params(), data, nrounds, evals = list(),
 #' - Evaluation metrics for validation data, a default metric will be assigned according to objective (rmse for regression, and logloss for classification, `mean average precision` for ``rank:map``, etc.)
 #' - User can add multiple evaluation metrics.
 #' - The choices are listed below:
-#'   - `"rmse"`: [root mean square error](https://en.wikipedia.org/wiki/Root_mean_square_error)
+#'   - `"rmse"`: root mean square error
 #'   - `"rmsle"`: root mean square log error: \eqn{\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}}. Default metric of `"reg:squaredlogerror"` objective. This metric reduces errors generated by outliers in dataset.  But because `log` function is employed, `"rmsle"` might output `nan` when prediction value is less than -1.  See `"reg:squaredlogerror"` for other requirements.
-#'   - `"mae"`: [mean absolute error](https://en.wikipedia.org/wiki/Mean_absolute_error)
-#'   - `"mape"`: [mean absolute percentage error](https://en.wikipedia.org/wiki/Mean_absolute_percentage_error)
-#'   - `"mphe"`: [mean Pseudo Huber error](https://en.wikipedia.org/wiki/Huber_loss). Default metric of `"reg:pseudohubererror"` objective.
-#'   - `"logloss"`: [negative log-likelihood](https://en.wikipedia.org/wiki/Log-likelihood)
+#'   - `"mae"`: mean absolute error.
+#'   - `"mape"`: mean absolute percentage error.
+#'   - `"mphe"`: mean Pseudo Huber error. Default metric of `"reg:pseudohubererror"` objective.
+#'   - `"logloss"`: negative log-likelihood.
 #'   - `"error"`: Binary classification error rate. It is calculated as `#(wrong cases)/#(all cases)`. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
 #'   - `"error@t"`: a different than 0.5 binary classification threshold value could be specified by providing a numerical value through 't'.
 #'   - `"merror"`: Multiclass classification error rate. It is calculated as `#(wrong cases)/#(all cases)`.
 #'   - `"mlogloss"`: [Multiclass logloss](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html).
-#'   - `"auc"`: [Receiver Operating Characteristic Area under the Curve](https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve).
+#'   - `"auc"`: area under the receiver-operating characteristic curve.
 #'     Available for classification and learning-to-rank tasks.
 #'     - When used with binary classification, the objective should be `"binary:logistic"` or similar functions that work on probability.
 #'     - When used with multi-class classification, objective should be `"multi:softprob"` instead of `"multi:softmax"`, as the latter doesn't output probability.  Also the AUC is calculated by 1-vs-rest with reference class weighted by class prevalence.
 #'     - When used with LTR task, the AUC is computed by comparing pairs of documents to count correctly sorted pairs.  This corresponds to pairwise learning to rank.  The implementation has some issues with average AUC around groups and distributed workers not being well-defined.
 #'     - On a single machine the AUC calculation is exact. In a distributed environment the AUC is a weighted average over the AUC of training rows on each node - therefore, distributed AUC is an approximation sensitive to the distribution of data across workers. Use another metric in distributed environments if precision and reproducibility are important.
 #'     - When input dataset contains only negative or positive samples, the output is `NaN`.  The behavior is implementation defined, for instance, `scikit-learn` returns \eqn{0.5} instead.
-#'   - `"aucpr"`: [Area under the PR curve](https://en.wikipedia.org/wiki/Precision_and_recall).
+#'   - `"aucpr"`: area under the PR curve
 #'     Available for classification and learning-to-rank tasks.
 #'
 #'     After XGBoost 1.6, both of the requirements and restrictions for using `"aucpr"` in classification problem are similar to `"auc"`.  For ranking task, only binary relevance label \eqn{y \in [0, 1]} is supported.  Different from `"map"` (mean average precision), `"aucpr"` calculates the *interpolated* area under precision recall curve using continuous interpolation.
 #'
 #'   - `"pre"`: Precision at \eqn{k}. Supports only learning to rank task.
-#'   - `"ndcg"`: [Normalized Discounted Cumulative Gain](https://en.wikipedia.org/wiki/NDCG)
-#'   - `"map"`: [Mean Average Precision](https://en.wikipedia.org/wiki/Mean_average_precision#Mean_average_precision)
+#'   - `"ndcg"`: normalized discounted cumulative gain
+#'   - `"map"`: mean average precision
 #'
 #'     The `average precision` is defined as:
 #'