Correction parameter description (#3803)

1787648106 · lunzhongwang · LeonEricsson · web-flow · commit 9a1e6a450801 · 2025-07-30T21:41:15.000+02:00
Co-authored-by: lunzhongwang &lt;lunzhongwang@soulapp.cn&gt;
Co-authored-by: LeonEricsson &lt;70749762+LeonEricsson@users.noreply.github.com&gt;
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -553,7 +553,7 @@ class GRPOConfig(TrainingArguments):
         metadata={
             "help": "ρ parameter from Beyond the 80/20 Rule. Keeps in the policy loss term only the top-ρ quantile of "
             "tokens by entropy of the probability distribution at each sequence position, improving results. Range: "
-            "[0.0-1.0]. A value of `1.0` masks all but the highest entropy token; `0.0` keeps all tokens. The paper "
+            "[0.0-1.0]. A value of `0.0` masks all but the highest entropy token; `1.0` keeps all tokens. The paper "
             "recommends a value of `0.2`. If used with `mask_truncated_completions=True`, only tokens from "
             "non-truncated completions are considered."
         },