🫣 [GRPO] add cache_implementation option in GRPO (#3075)

kashif · qgallouedec · web-flow · commit fc4dae256d92 · 2025-03-13T19:21:36.000+01:00
* add cache_implementation option in GRPO

* add cache_implementation to config

* Update trl/trainer/grpo_config.py

Co-authored-by: Quentin Gallouédec &lt;45557362+qgallouedec@users.noreply.github.com&gt;

---------

Co-authored-by: Quentin Gallouédec &lt;45557362+qgallouedec@users.noreply.github.com&gt;
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -72,6 +72,8 @@ class GRPOConfig(TrainingArguments):
             Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far.
             Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat
             tokens.
+        cache_implementation (`str` or `None`, *optional*, defaults to `None`):
+            Implementation of the cache method for faster generation when use_vllm is set to False.
 
         > Parameters that control generation acceleration powered by vLLM
 
@@ -217,6 +219,10 @@ class GRPOConfig(TrainingArguments):
             "to repeat tokens."
         },
     )
+    cache_implementation: Optional[str] = field(
+        default=None,
+        metadata={"help": "Implementation of the cache method for faster generation when use_vllm is set to False."},
+    )
 
     # Parameters that control generation acceleration powered by vLLM
     use_vllm: Optional[bool] = field(
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -548,6 +548,7 @@ def new_group_context():
                 top_k=args.top_k,
                 min_p=args.min_p,
                 repetition_penalty=args.repetition_penalty,
+                cache_implementation=args.cache_implementation,
             )
 
         # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the

Original file line number	Diff line number	Diff line change
`@@ -548,6 +548,7 @@ def new_group_context():`
`548`	`548`	`top_k=args.top_k,`
`549`	`549`	`min_p=args.min_p,`
`550`	`550`	`repetition_penalty=args.repetition_penalty,`
	`551`	`+ cache_implementation=args.cache_implementation,`
`551`	`552`	`)`
`552`	`553`
`553`	`554`	`# Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the`