huggingface · qgallouedec · Apr 5, 2025 · Mar 31, 2025 · Apr 1, 2025 · Apr 2, 2025
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -445,6 +445,11 @@ def data_collator(features):  # No data collation is needed in GRPO
         num_processes = self.accelerator.num_processes
         global_batch_size = args.per_device_train_batch_size * num_processes
         possible_values = [n_gen for n_gen in range(2, global_batch_size + 1) if (global_batch_size) % n_gen == 0]
+        if self.num_generations < 2:
+            raise ValueError(
+                f"GRPO requires at least 2 generations per prompt to calculate the advantages. "
+                f"You provided {self.num_generations}, which is less than the minimum required."
+            )
         if self.num_generations not in possible_values:
             raise ValueError(
                 f"The global train batch size ({num_processes} x {args.per_device_train_batch_size}) must be evenly "