We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0a38f19 commit 2b4d7d1Copy full SHA for 2b4d7d1
trl/trainer/grpo_trainer.py
@@ -1146,7 +1146,7 @@ def _generate_and_score_completions(
1146
prompt_ids = prompt_ids[:, -self.max_prompt_length :]
1147
prompt_mask = prompt_mask[:, -self.max_prompt_length :]
1148
prompts_text = self.processing_class.batch_decode(
1149
- prompt_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+ prompt_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False
1150
)
1151
1152
# Generate completions using either vLLM or regular generation
0 commit comments