@@ -1112,7 +1112,6 @@ def fit(self):
1112
1112
1113
1113
with marked_timer ("adv" , timing_raw , color = "brown" ):
1114
1114
# we combine with rule-based rm
1115
- reward_extra_info_keys = set ()
1116
1115
if self .config .reward_model .launch_reward_fn_async :
1117
1116
reward_tensor , reward_extra_infos_dict = ray .get (future_reward )
1118
1117
# Set token_level_scores for async case
@@ -1122,12 +1121,7 @@ def fit(self):
1122
1121
batch .non_tensor_batch .update (
1123
1122
{k : np .array (v ) for k , v in reward_extra_infos_dict .items ()}
1124
1123
)
1125
- reward_extra_info_keys = set (reward_extra_infos_dict .keys ())
1126
- else :
1127
1124
# For sync case, token_level_scores and extra_infos are already set above
1128
- reward_extra_info_keys = (
1129
- set (reward_extra_infos_dict .keys ()) if reward_extra_infos_dict else set ()
1130
- )
1131
1125
# compute rewards. apply_kl_penalty if available
1132
1126
if self .config .algorithm .use_kl_in_reward :
1133
1127
batch , kl_metrics = apply_kl_penalty (
@@ -1182,9 +1176,7 @@ def fit(self):
1182
1176
]
1183
1177
1184
1178
reward_extra_infos_dict = (
1185
- extract_reward_extra_infos (batch , reward_extra_info_keys )
1186
- if reward_extra_info_keys
1187
- else {}
1179
+ extract_reward_extra_infos (batch , set (reward_extra_infos_dict .keys ()))
1188
1180
)
1189
1181
1190
1182
if "request_id" in batch .non_tensor_batch :
0 commit comments