Skip to content

[BFCL] Add microsoft/phi-4 to the Leaderboard #1000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 24, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions berkeley-function-call-leaderboard/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.

- [Apr 23, 2025] [#1000](https://github.com/ShishirPatil/gorilla/pull/1000): Add new model `microsoft/phi-4` to the leaderboard.
- [Apr 23, 2025] [#967](https://github.com/ShishirPatil/gorilla/pull/967): Add the following new models to the leaderboard:
- `microsoft/Phi-4-mini-instruct`
- `microsoft/Phi-4-mini-instruct-FC`
Expand Down
1 change: 1 addition & 0 deletions berkeley-function-call-leaderboard/SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ For model names containing `{...}`, multiple versions are available. For example
| Open-Mistral-Nemo-2407 | Prompt | Mistral AI | open-mistral-nemo-2407 |
| Open-Mistral-Nemo-2407 | Function Calling | Mistral AI | open-mistral-nemo-2407-FC |
| palmyra-x-004 | Function Calling | Writer | palmyra-x-004 |
| phi-4 | Prompt | Self-hosted 💻 | microsoft/phi-4 |
| Phi-4-mini-instruct | Prompt | Self-hosted 💻 | microsoft/Phi-4-mini-instruct |
| Phi-4-mini-instruct | Function Calling | Self-hosted 💻 | microsoft/Phi-4-mini-instruct-FC |
| Qwen2.5-{0.5B,1.5B,3B,7B,14B,32B,72B}-Instruct | Prompt | Self-hosted 💻 | Qwen/Qwen2.5-{0.5B,1.5B,3B,7B,14B,32B,72B}-Instruct |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,12 @@
"MadeAgents",
"cc-by-nc-4.0",
],
"microsoft/phi-4": [
"Phi-4 (Prompt)",
"https://huggingface.co/microsoft/phi-4",
"Microsoft",
"MIT",
],
"microsoft/Phi-4-mini-instruct": [
"Phi-4-mini-instruct (Prompt)",
"https://huggingface.co/microsoft/Phi-4-mini-instruct",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
"Salesforce/xLAM-2-3b-fc-r": SalesforceQwenHandler,
"Salesforce/xLAM-2-1b-fc-r": SalesforceQwenHandler,
"mistralai/Ministral-8B-Instruct-2410": MistralFCHandler,
"microsoft/phi-4": PhiHandler,
"microsoft/Phi-4-mini-instruct": PhiHandler,
"microsoft/Phi-4-mini-instruct-FC": PhiFCHandler,
"ibm-granite/granite-20b-functioncalling": GraniteHandler,
Expand Down Expand Up @@ -231,6 +232,13 @@
# "open-mixtral-8x22b": MistralHandler,
# "open-mixtral-8x22b-FC": MistralHandler,
# "open-mixtral-8x7b": MistralHandler,
# "microsoft/Phi-3-mini-4k-instruct": PhiHandler,
# "microsoft/Phi-3-mini-128k-instruct": PhiHandler,
# "microsoft/Phi-3-small-8k-instruct": PhiHandler,
# "microsoft/Phi-3-small-128k-instruct": PhiHandler,
# "microsoft/Phi-3-medium-4k-instruct": PhiHandler,
# "microsoft/Phi-3-medium-128k-instruct": PhiHandler,
# "microsoft/Phi-3.5-mini-instruct": PhiHandler,
# "mistral-large-2407": MistralHandler,
# "mistral-large-2407-FC": MistralHandler,
# "mistral-medium-2312": MistralHandler,
Expand All @@ -239,13 +247,6 @@
# "mistral-tiny-2312": MistralHandler,
# "meta-llama/Meta-Llama-3-8B-Instruct-FC": LlamaHandler,
# "meta-llama/Meta-Llama-3-70B-Instruct-FC": LlamaHandler,
# "microsoft/Phi-3-mini-4k-instruct": PhiHandler,
# "microsoft/Phi-3-mini-128k-instruct": PhiHandler,
# "microsoft/Phi-3-small-8k-instruct": PhiHandler,
# "microsoft/Phi-3-small-128k-instruct": PhiHandler,
# "microsoft/Phi-3-medium-4k-instruct": PhiHandler,
# "microsoft/Phi-3-medium-128k-instruct": PhiHandler,
# "microsoft/Phi-3.5-mini-instruct": PhiHandler,
# "NousResearch/Hermes-2-Pro-Mistral-7B": HermesHandler,
# "NousResearch/Hermes-2-Pro-Llama-3-8B": HermesHandler,
# "NousResearch/Hermes-2-Theta-Llama-3-8B": HermesHandler,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re
from bfcl.model_handler.local_inference.base_oss_handler import OSSHandler
from bfcl.model_handler.utils import (
combine_consecutive_user_prompts,
convert_system_prompt_into_user_prompt,
ast_parse,
convert_to_function_call,
func_doc_language_specific_pre_processing,
system_prompt_pre_processing_chat_model,
)
Expand All @@ -12,28 +13,49 @@ class PhiHandler(OSSHandler):
def __init__(self, model_name, temperature) -> None:
super().__init__(model_name, temperature)

@override
def decode_ast(self, result, language="Python"):
result = result.strip()
if result.startswith("```json"):
result = result[len("```json"):]
if result.startswith("```python"):
result = result[len("```python"):]
return super().decode_ast(result, language)

@override
def decode_execute(self, result):
funcs = re.findall(r"\[[^\[\]]+\]", result)
decoded_funcs = []
for func in funcs:
decode_output = ast_parse(func, language="Python")
decoded_funcs.extend(decode_output)

return convert_to_function_call(decoded_funcs)

@override
def _format_prompt(self, messages, function):
if "Phi-3-small" in self.model_name:
# Phi-3-small
"""
"bos_token": "<|endoftext|>",
"chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
"eos_token": "<|endoftext|>",
"""
formatted_prompt = "<|endoftext|>"
else:
# Phi-3.5-mini, Phi-3-medium, Phi-3-mini
"""
"bos_token": "<s>",
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
"""
formatted_prompt = ""

for message in messages:
formatted_prompt += f"<|{message['role']}|>\n{message['content']}<|end|>\n"

formatted_prompt += f"<|assistant|>\n"
formatted_prompt = ""

if "phi-4" in self.model_name:
# phi-4
'''
"bos_token": "<|endoftext|>"
"chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}"
"eos_token": "<|im_end|>"
'''
for message in messages:
formatted_prompt += f"<|im_start|>{message['role']}<|im_sep|>\n{message['content']}<|im_end|>\n"
formatted_prompt += "<|im_start|>assistant<|im_sep|>\n"
elif "Phi-4-mini" in self.model_name:
# Phi-4-mini
'''
"bos_token": "<|endoftext|>"
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"
"eos_token": "<|endoftext|>"
'''
for message in messages:
formatted_prompt += f"<|{message['role']}|>{message['content']}<|end|>"
formatted_prompt += "<|assistant|>"

return formatted_prompt

Expand All @@ -48,14 +70,4 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict:
test_entry["question"][0], functions, test_category
)

if "Phi-3-small" in self.model_name:
# Phi-3-small doesn't allow system role
for round_idx in range(len(test_entry["question"])):
test_entry["question"][round_idx] = convert_system_prompt_into_user_prompt(
test_entry["question"][round_idx]
)
test_entry["question"][round_idx] = combine_consecutive_user_prompts(
test_entry["question"][round_idx]
)

return {"message": [], "function": functions}