microsoft · jameslamb · Jul 9, 2024 · Jul 8, 2024
@@ -680,7 +680,7 @@ Learning Control Parameters
 
    -  gradient quantization can accelerate training, with little accuracy drop in most cases
 
-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
 
    -  *New in version 4.0.0*
 
@@ -690,7 +690,7 @@ Learning Control Parameters
 
    -  with more bins, the quantized training will be closer to full precision training
 
-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
 
    -  *New in 4.0.0*
 
@@ -700,14 +700,16 @@ Learning Control Parameters
 
    -  renewing is very helpful for good quantized training accuracy for ranking objectives
 
-   -  **Note**: can be used only with ``device_type = cpu``
+   -  **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
 
    -  *New in 4.0.0*
 
 -  ``stochastic_rounding`` :raw-html:`<a id="stochastic_rounding" title="Permalink to this parameter" href="#stochastic_rounding">&#x1F517;&#xFE0E;</a>`, default = ``true``, type = bool
 
    -  whether to use stochastic rounding in gradient quantization
 
+   -  **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
+
    -  *New in 4.0.0*
 
 IO Parameters

@@ -619,23 +619,24 @@ struct Config {
   // desc = enabling this will discretize (quantize) the gradients and hessians into bins of ``num_grad_quant_bins``
   // desc = with quantized training, most arithmetics in the training process will be integer operations
   // desc = gradient quantization can accelerate training, with little accuracy drop in most cases
-  // desc = **Note**: can be used only with ``device_type = cpu``
+  // desc = **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
   // desc = *New in version 4.0.0*
   bool use_quantized_grad = false;
 
   // desc = number of bins to quantization gradients and hessians
   // desc = with more bins, the quantized training will be closer to full precision training
-  // desc = **Note**: can be used only with ``device_type = cpu``
+  // desc = **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
   // desc = *New in 4.0.0*
   int num_grad_quant_bins = 4;
 
   // desc = whether to renew the leaf values with original gradients when quantized training
   // desc = renewing is very helpful for good quantized training accuracy for ranking objectives
-  // desc = **Note**: can be used only with ``device_type = cpu``
+  // desc = **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
   // desc = *New in 4.0.0*
   bool quant_train_renew_leaf = false;
 
   // desc = whether to use stochastic rounding in gradient quantization
+  // desc = **Note**: can be used only with ``device_type = cpu`` and ``device_type=cuda``
   // desc = *New in 4.0.0*
   bool stochastic_rounding = true;