ShishirPatil · HuanzhiMao · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
 
+- [Apr 23, 2025] [#1000](https://github.com/ShishirPatil/gorilla/pull/1000): Add new model `microsoft/phi-4` to the leaderboard.
 - [Apr 23, 2025] [#967](https://github.com/ShishirPatil/gorilla/pull/967): Add the following new models to the leaderboard:
   - `microsoft/Phi-4-mini-instruct`
   - `microsoft/Phi-4-mini-instruct-FC`

diff --git a/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md b/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md
@@ -90,6 +90,7 @@ For model names containing `{...}`, multiple versions are available. For example
 | Open-Mistral-Nemo-2407                         | Prompt           | Mistral AI     | open-mistral-nemo-2407                                      |
 | Open-Mistral-Nemo-2407                         | Function Calling | Mistral AI     | open-mistral-nemo-2407-FC                                   |
 | palmyra-x-004                                  | Function Calling | Writer         | palmyra-x-004                                               |
+| phi-4                                          | Prompt           | Self-hosted 💻 | microsoft/phi-4                                             |
 | Phi-4-mini-instruct                            | Prompt           | Self-hosted 💻 | microsoft/Phi-4-mini-instruct                               |
 | Phi-4-mini-instruct                            | Function Calling | Self-hosted 💻 | microsoft/Phi-4-mini-instruct-FC                            |
 | Qwen2.5-{0.5B,1.5B,3B,7B,14B,32B,72B}-Instruct | Prompt           | Self-hosted 💻 | Qwen/Qwen2.5-{0.5B,1.5B,3B,7B,14B,32B,72B}-Instruct         |

diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py
@@ -552,6 +552,12 @@
         "MadeAgents",
         "cc-by-nc-4.0",
     ],
+    "microsoft/phi-4": [
+        "Phi-4 (Prompt)",
+        "https://huggingface.co/microsoft/phi-4",
+        "Microsoft",
+        "MIT",
+    ],
     "microsoft/Phi-4-mini-instruct": [
         "Phi-4-mini-instruct (Prompt)",
         "https://huggingface.co/microsoft/Phi-4-mini-instruct",

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py
@@ -125,6 +125,7 @@
     "Salesforce/xLAM-2-3b-fc-r": SalesforceQwenHandler,
     "Salesforce/xLAM-2-1b-fc-r": SalesforceQwenHandler,
     "mistralai/Ministral-8B-Instruct-2410": MistralFCHandler,
+    "microsoft/phi-4": PhiHandler,
     "microsoft/Phi-4-mini-instruct": PhiHandler,
     "microsoft/Phi-4-mini-instruct-FC": PhiFCHandler,
     "ibm-granite/granite-20b-functioncalling": GraniteHandler,
@@ -231,6 +232,13 @@
     # "open-mixtral-8x22b": MistralHandler,
     # "open-mixtral-8x22b-FC": MistralHandler,
     # "open-mixtral-8x7b": MistralHandler,
+    # "microsoft/Phi-3-mini-4k-instruct": PhiHandler,
+    # "microsoft/Phi-3-mini-128k-instruct": PhiHandler,
+    # "microsoft/Phi-3-small-8k-instruct": PhiHandler,
+    # "microsoft/Phi-3-small-128k-instruct": PhiHandler,
+    # "microsoft/Phi-3-medium-4k-instruct": PhiHandler,
+    # "microsoft/Phi-3-medium-128k-instruct": PhiHandler,
+    # "microsoft/Phi-3.5-mini-instruct": PhiHandler,
     # "mistral-large-2407": MistralHandler,
     # "mistral-large-2407-FC": MistralHandler,
     # "mistral-medium-2312": MistralHandler,
@@ -239,13 +247,6 @@
     # "mistral-tiny-2312": MistralHandler,
     # "meta-llama/Meta-Llama-3-8B-Instruct-FC": LlamaHandler,
     # "meta-llama/Meta-Llama-3-70B-Instruct-FC": LlamaHandler,
-    # "microsoft/Phi-3-mini-4k-instruct": PhiHandler,
-    # "microsoft/Phi-3-mini-128k-instruct": PhiHandler,
-    # "microsoft/Phi-3-small-8k-instruct": PhiHandler,
-    # "microsoft/Phi-3-small-128k-instruct": PhiHandler,
-    # "microsoft/Phi-3-medium-4k-instruct": PhiHandler,
-    # "microsoft/Phi-3-medium-128k-instruct": PhiHandler,
-    # "microsoft/Phi-3.5-mini-instruct": PhiHandler,
     # "NousResearch/Hermes-2-Pro-Mistral-7B": HermesHandler,
     # "NousResearch/Hermes-2-Pro-Llama-3-8B": HermesHandler,
     # "NousResearch/Hermes-2-Theta-Llama-3-8B": HermesHandler,

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/local_inference/phi.py b/berkeley-function-call-leaderboard/bfcl/model_handler/local_inference/phi.py
@@ -1,7 +1,8 @@
+import re
 from bfcl.model_handler.local_inference.base_oss_handler import OSSHandler
 from bfcl.model_handler.utils import (
-    combine_consecutive_user_prompts,
-    convert_system_prompt_into_user_prompt,
+    ast_parse,
+    convert_to_function_call,
     func_doc_language_specific_pre_processing,
     system_prompt_pre_processing_chat_model,
 )
@@ -12,28 +13,49 @@ class PhiHandler(OSSHandler):
     def __init__(self, model_name, temperature) -> None:
         super().__init__(model_name, temperature)
 
+    @override
+    def decode_ast(self, result, language="Python"):
+        result = result.strip()
+        if result.startswith("```json"):
+            result = result[len("```json"):]
+        if result.startswith("```python"):
+            result = result[len("```python"):]
+        return super().decode_ast(result, language)
+
+    @override
+    def decode_execute(self, result):
+        funcs = re.findall(r"\[[^\[\]]+\]", result)
+        decoded_funcs = []
+        for func in funcs:
+            decode_output = ast_parse(func, language="Python")
+            decoded_funcs.extend(decode_output)
+
+        return convert_to_function_call(decoded_funcs)
+
     @override
     def _format_prompt(self, messages, function):
-        if "Phi-3-small" in self.model_name:
-            # Phi-3-small
-            """
-            "bos_token": "<|endoftext|>",
-            "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-            "eos_token": "<|endoftext|>",
-            """
-            formatted_prompt = "<|endoftext|>"
-        else:
-            # Phi-3.5-mini, Phi-3-medium, Phi-3-mini
-            """
-            "bos_token": "<s>",
-            "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-            """
-            formatted_prompt = ""
-
-        for message in messages:
-            formatted_prompt += f"<|{message['role']}|>\n{message['content']}<|end|>\n"
-
-        formatted_prompt += f"<|assistant|>\n"
+        formatted_prompt = ""
+
+        if "phi-4" in self.model_name:
+            # phi-4
+            '''
+            "bos_token": "<|endoftext|>"
+            "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}"
+            "eos_token": "<|im_end|>"
+            '''
+            for message in messages:
+                formatted_prompt += f"<|im_start|>{message['role']}<|im_sep|>\n{message['content']}<|im_end|>\n"
+            formatted_prompt += "<|im_start|>assistant<|im_sep|>\n"
+        elif "Phi-4-mini" in self.model_name:
+            # Phi-4-mini
+            '''
+            "bos_token": "<|endoftext|>"
+            "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}"
+            "eos_token": "<|endoftext|>"
+            '''
+            for message in messages:
+                formatted_prompt += f"<|{message['role']}|>{message['content']}<|end|>"
+            formatted_prompt += "<|assistant|>"
 
         return formatted_prompt
 
@@ -48,14 +70,4 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict:
             test_entry["question"][0], functions, test_category
         )
 
-        if "Phi-3-small" in self.model_name:
-            # Phi-3-small doesn't allow system role
-            for round_idx in range(len(test_entry["question"])):
-                test_entry["question"][round_idx] = convert_system_prompt_into_user_prompt(
-                    test_entry["question"][round_idx]
-                )
-                test_entry["question"][round_idx] = combine_consecutive_user_prompts(
-                    test_entry["question"][round_idx]
-                )
-
         return {"message": [], "function": functions}