We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
num_completions
num_generations
1 parent b7918c0 commit fd04760Copy full SHA for fd04760
docs/source/paper_index.md
@@ -142,7 +142,7 @@ training_args = GRPOConfig(
142
top_p=0.99,
143
top_k=100,
144
temperature=0.99,
145
- num_completions=8, # = num_return_sequences in the paper
+ num_generations=8, # = num_return_sequences in the paper
146
num_iterations=1, # = ppo_epochs in the paper
147
per_device_train_batch_size=4,
148
gradient_accumulation_steps=32,
@@ -246,7 +246,7 @@ from trl import GRPOConfig
246
config = GRPOConfig(
247
loss_type="cispo",
248
epsilon_high=5.0,
249
- num_completions=16,
+ num_generations=16,
250
scale_rewards="batch",
251
cast_lm_head_to_fp32=True
252
)
0 commit comments