Skip to content

Commit cd9429c

Browse files
Fix qwen3 inference data and qwenfc handler chat template (#1126)
1. Reasoning content was not being added in the inference data for both QwenHandler and QwenFCHandler. 2. The _format_prompt function in QwenFCHandler was not checking for tool_calls in assistant messages. All those messages where the assistant requested a tool call were converted to empty messages. This PR fixes both the above points. This also means that all those scores for qwen3-prompt and qwen3-FC that were calculated using local_inference, would need to be calculated again. Co-authored-by: Huanzhi Mao <[email protected]>
1 parent d434174 commit cd9429c

File tree

2 files changed

+34
-0
lines changed
  • berkeley-function-call-leaderboard/bfcl_eval/model_handler/local_inference

2 files changed

+34
-0
lines changed

berkeley-function-call-leaderboard/bfcl_eval/model_handler/local_inference/qwen.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,3 +186,16 @@ def _parse_query_response_prompting(self, api_response: any) -> dict:
186186
"input_token": api_response.usage.prompt_tokens,
187187
"output_token": api_response.usage.completion_tokens,
188188
}
189+
190+
@override
191+
def _add_assistant_message_prompting(
192+
self, inference_data: dict, model_response_data: dict
193+
) -> dict:
194+
inference_data["message"].append(
195+
{
196+
"role": "assistant",
197+
"content": model_response_data["model_responses"],
198+
"reasoning_content": model_response_data.get("reasoning_content", ""),
199+
}
200+
)
201+
return inference_data

berkeley-function-call-leaderboard/bfcl_eval/model_handler/local_inference/qwen_fc.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,25 @@ def _format_prompt(self, messages, function):
196196
formatted_prompt += f"<|im_start|>{role}\n{content}"
197197
else:
198198
formatted_prompt += f"<|im_start|>{role}\n{content}"
199+
200+
if "tool_calls" in message:
201+
for tool_call in message["tool_calls"]:
202+
if (tool_call == message["tool_calls"][0] and content) or tool_call != message["tool_calls"][0]:
203+
formatted_prompt += "\n"
204+
205+
if "function" in tool_call:
206+
tool_call = tool_call["function"]
207+
208+
formatted_prompt += '<tool_call>\n{"name": "'
209+
formatted_prompt += tool_call["name"]
210+
formatted_prompt += '", "arguments": '
211+
212+
if isinstance(tool_call["arguments"], str):
213+
formatted_prompt += tool_call["arguments"]
214+
else:
215+
formatted_prompt += json.dumps(tool_call["arguments"])
216+
217+
formatted_prompt += "}\n</tool_call>"
199218

200219
formatted_prompt += "<|im_end|>\n"
201220

@@ -248,6 +267,8 @@ def _parse_query_response_prompting(self, api_response: any) -> dict:
248267
"role": "assistant",
249268
"content": cleaned_response,
250269
}
270+
271+
model_responses_message_for_chat_history["reasoning_content"] = reasoning_content
251272

252273
return {
253274
"model_responses": cleaned_response,

0 commit comments

Comments
 (0)