Skip to content

Commit cc34f16

Browse files
committed
fix extra info
1 parent da4ab8b commit cc34f16

File tree

1 file changed

+1
-9
lines changed

1 file changed

+1
-9
lines changed

verl/trainer/ppo/ray_trainer.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,6 @@ def fit(self):
11121112

11131113
with marked_timer("adv", timing_raw, color="brown"):
11141114
# we combine with rule-based rm
1115-
reward_extra_info_keys = set()
11161115
if self.config.reward_model.launch_reward_fn_async:
11171116
reward_tensor, reward_extra_infos_dict = ray.get(future_reward)
11181117
# Set token_level_scores for async case
@@ -1122,12 +1121,7 @@ def fit(self):
11221121
batch.non_tensor_batch.update(
11231122
{k: np.array(v) for k, v in reward_extra_infos_dict.items()}
11241123
)
1125-
reward_extra_info_keys = set(reward_extra_infos_dict.keys())
1126-
else:
11271124
# For sync case, token_level_scores and extra_infos are already set above
1128-
reward_extra_info_keys = (
1129-
set(reward_extra_infos_dict.keys()) if reward_extra_infos_dict else set()
1130-
)
11311125
# compute rewards. apply_kl_penalty if available
11321126
if self.config.algorithm.use_kl_in_reward:
11331127
batch, kl_metrics = apply_kl_penalty(
@@ -1182,9 +1176,7 @@ def fit(self):
11821176
]
11831177

11841178
reward_extra_infos_dict = (
1185-
extract_reward_extra_infos(batch, reward_extra_info_keys)
1186-
if reward_extra_info_keys
1187-
else {}
1179+
extract_reward_extra_infos(batch, set(reward_extra_infos_dict.keys()))
11881180
)
11891181

11901182
if "request_id" in batch.non_tensor_batch:

0 commit comments

Comments
 (0)