We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 84982ad commit 7dbf477Copy full SHA for 7dbf477
trl/trainer/grpo_trainer.py
@@ -1065,7 +1065,7 @@ def _generate_and_score_completions(
1065
prompt_ids = prompt_ids[:, -self.max_prompt_length :]
1066
prompt_mask = prompt_mask[:, -self.max_prompt_length :]
1067
prompts_text = self.processing_class.batch_decode(
1068
- prompt_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+ prompt_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False
1069
)
1070
1071
# Generate completions using either vLLM or regular generation
0 commit comments