Skip to content

Commit a06538a

Browse files
chore: merge remote-tracking branch 'origin' into feat/type-hints
* origin: [BFCL] Fix prompt concatenation bug in Qwen template (ShishirPatil#1068) [BFCL] Multi-turn TravelAPI book_flight() Fix (ShishirPatil#966) _get_item() can not handle the "." directory in path string (ShishirPatil#1060) Add support for llama-3.1-nemotron-ultra-253b-v1 to BFCL (ShishirPatil#1032) [BFCL] Omit Reasoning Content from Chat History for Function-Calling Models (ShishirPatil#1064) [BFCL] Add support for Ling-Lite-V1.5 (ShishirPatil#1056) [BFCL] Added support for DeepSeek-R1-0528 and DeepSeek-V3-0324 (ShishirPatil#1063) [BFCL] Add type hinting (ShishirPatil#1058)
2 parents c0c9f96 + ac37049 commit a06538a

25 files changed

+626
-293
lines changed

berkeley-function-call-leaderboard/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
44

5+
- [Jun 18, 2025] [#1068](https://github.com/ShishirPatil/gorilla/pull/1068): Fix prompt concatenation issue in Qwen chat template. The self-hosted `Qwen3` models are affected.
6+
- [Jun 15, 2025] [#966](https://github.com/ShishirPatil/gorilla/pull/966): Removed the `travel_cost` parameter from multi-turn backend `TravelAPI.book_flight()` and now compute cost internally to eliminate ambiguity.
7+
- [Jun 15, 2025] [#1060](https://github.com/ShishirPatil/gorilla/pull/1060): Fixed multi-turn backend `GorillaFileSystem._get_item()` method to correctly handle `"."` in path strings.
8+
- [Jun 14, 2025] [#1032](https://github.com/ShishirPatil/gorilla/pull/1032): Add `Llama-3.1-Nemotron-Ultra-253B-v1` to the leaderboard.
9+
- [Jun 12, 2025] [#1056](https://github.com/ShishirPatil/gorilla/pull/1056): Add `Ling-Lite-V1.5` to the leaderboard.
10+
- [Jun 12, 2025] [#1063](https://github.com/ShishirPatil/gorilla/pull/1063): Add support for `DeepSeek-R1-0528` and `DeepSeek-V3-0324`
511
- [Jun 11, 2025] [#1061](https://github.com/ShishirPatil/gorilla/pull/1061): Add support for DashScope API inference for `Qwen3` series
612
- [Jun 8, 2025] [#1054](https://github.com/ShishirPatil/gorilla/pull/1054), [#1055](https://github.com/ShishirPatil/gorilla/pull/1055): Packagerize codebase for PyPI Distribution. Now available with `pip install bfcl-eval`, in addition to the existing `pip install -e`.
713
- [May 27, 2025] [#1040](https://github.com/ShishirPatil/gorilla/pull/1040): Add the following new models to the leaderboard:

berkeley-function-call-leaderboard/SUPPORTED_MODELS.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ For model names containing `{...}`, multiple versions are available. For example
3434
| Command R7B | Function Calling | Cohere | command-r7b-12-2024-FC |
3535
| Command-R-Plus | Function Calling | Cohere | command-r-plus-FC |
3636
| DBRX-Instruct | Prompt | Databricks | databricks-dbrx-instruct |
37-
| DeepSeek-R1 | Prompt | DeepSeek | DeepSeek-R1 |
3837
| DeepSeek-R1 | Prompt | Self-hosted 💻 | deepseek-ai/DeepSeek-R1 |
39-
| DeepSeek-V3 | Function Calling | DeepSeek | DeepSeek-V3-FC |
38+
| DeepSeek-R1-0528 | Prompt | DeepSeek | DeepSeek-R1-0528 |
39+
| DeepSeek-R1-0528 | Function Calling | DeepSeek | DeepSeek-R1-0528-FC |
40+
| DeepSeek-V3-0324 | Function Calling | DeepSeek | DeepSeek-V3-0324-FC |
4041
| DM-Cito-8B | Prompt | Mininglamp | DM-Cito-8B |
4142
| Falcon3-{1B,3B,7B,10B}-Instruct | Function Calling | Self-hosted 💻 | tiiuae/Falcon3-{1B,3B,7B,10B}-Instruct |
4243
| FireFunction-v2 | Function Calling | Fireworks AI | firefunction-v2-FC |
@@ -73,6 +74,7 @@ For model names containing `{...}`, multiple versions are available. For example
7374
| Hammer2.1-{7b,3b,1.5b,0.5b} | Function Calling | Self-hosted 💻 | MadeAgents/Hammer2.1-{7b,3b,1.5b,0.5b} |
7475
| Llama-3.1-{8B,70B}-Instruct | Function Calling | Self-hosted 💻 | meta-llama/Llama-3.1-{8B,70B}-Instruct-FC |
7576
| Llama-3.1-{8B,70B}-Instruct | Prompt | Self-hosted 💻 | meta-llama/Llama-3.1-{8B,70B}-Instruct |
77+
| Llama-3.1-Nemotron-Ultra-253B-v1 | Prompt | Nvidia | nvidia/llama-3.1-nemotron-ultra-253b-v1 |
7678
| Llama-3.2-{1B,3B}-Instruct | Function Calling | Self-hosted 💻 | meta-llama/Llama-3.2-{1B,3B}-Instruct-FC |
7779
| Llama-3.3-70B-Instruct | Function Calling | Self-hosted 💻 | meta-llama/Llama-3.3-70B-Instruct-FC |
7880
| Llama-4-Maverick-17B-128E-Instruct-FP8 | Prompt | Novita AI | meta-llama/llama-4-maverick-17b-128e-instruct-fp8-novita |
@@ -104,7 +106,7 @@ For model names containing `{...}`, multiple versions are available. For example
104106
| Phi-4-mini-instruct | Function Calling | Self-hosted 💻 | microsoft/Phi-4-mini-instruct-FC |
105107
| Qwen3-{0.6B,1.7B,4B,8B,14B,32B} | Prompt | Alibaba Cloud | qwen3-{0.6b,1.7b,4b,8b,14b,32b} |
106108
| Qwen3-{0.6B,1.7B,4B,8B,14B,32B} | Prompt | Self-hosted 💻 | Qwen/Qwen3-{0.6B,1.7B,4B,8B,14B,32B} |
107-
| Qwen3-{0.6B,1.7B,4B,8B,14B,32B} | Function Calling | Alibaba Cloud | qwen3-{0.6b,1.7b,4b,8b,14b,32b}-FC |
109+
| Qwen3-{0.6B,1.7B,4B,8B,14B,32B} | Function Calling | Alibaba Cloud | qwen3-{0.6b,1.7b,4b,8b,14b,32b}-FC |
108110
| Qwen3-{0.6B,1.7B,4B,8B,14B,32B} | Function Calling | Self-hosted 💻 | Qwen/Qwen3-{0.6B,1.7B,4B,8B,14B,32B}-FC |
109111
| Qwen3-{30B-A3B,235B-A22B} | Prompt | Alibaba Cloud | qwen3-{30b-a3b, 235b-a22b} |
110112
| Qwen3-{30B-A3B,235B-A22B} | Prompt | Self-hosted 💻 | Qwen/Qwen3-{30B-A3B,235B-A22B} |
@@ -126,7 +128,7 @@ For model names containing `{...}`, multiple versions are available. For example
126128
| xLAM-2-70b-fc-r | Function Calling | Self-hosted 💻 | Salesforce/Llama-xLAM-2-70b-fc-r |
127129
| xLAM-2-8b-fc-r | Function Calling | Self-hosted 💻 | Salesforce/Llama-xLAM-2-8b-fc-r |
128130
| yi-large | Function Calling | 01.AI | yi-large-fc |
129-
131+
| Ling-lite-v1.5 | Prompt | Ant Group | Ling/ling-lite-v1.5 |
130132
---
131133

132134
## Additional Requirements for Certain Models

berkeley-function-call-leaderboard/bfcl_eval/.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ DATABRICKS_AZURE_ENDPOINT_URL=
3030
# [OPTIONAL] For inference via Novita AI endpoint
3131
NOVITA_API_KEY=sk-XXXXXX
3232

33+
# We use the API Key from Alipay to inference Bailing (Ling) models (see https://zxb.alipay.com/llm/landing)
34+
LING_API_KEY=sk-XXXXXX
35+
3336
# [OPTIONAL] For local vllm/sglang server configuration
3437
# Defaults to localhost port 1053 if not provided
3538
VLLM_ENDPOINT=localhost

berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from bfcl_eval.model_handler.api_inference.grok import GrokHandler
1515
from bfcl_eval.model_handler.api_inference.mining import MiningHandler
1616
from bfcl_eval.model_handler.api_inference.mistral import MistralHandler
17+
from bfcl_eval.model_handler.api_inference.nemotron import NemotronHandler
1718
from bfcl_eval.model_handler.api_inference.nexus import NexusHandler
1819
from bfcl_eval.model_handler.api_inference.nova import NovaHandler
1920
from bfcl_eval.model_handler.api_inference.novita import NovitaHandler
@@ -46,6 +47,7 @@
4647
from bfcl_eval.model_handler.local_inference.salesforce_llama import SalesforceLlamaHandler
4748
from bfcl_eval.model_handler.local_inference.salesforce_qwen import SalesforceQwenHandler
4849
from bfcl_eval.model_handler.local_inference.think_agent import ThinkAgentHandler
50+
from bfcl_eval.model_handler.api_inference.ling import LingAPIHandler
4951

5052
# -----------------------------------------------------------------------------
5153
# A mapping of model identifiers to their respective model configurations.
@@ -107,10 +109,10 @@ class ModelConfig:
107109
is_fc_model=False,
108110
underscore_to_dot=False,
109111
),
110-
"DeepSeek-R1": ModelConfig(
111-
model_name="DeepSeek-R1",
112-
display_name="DeepSeek-R1 (Prompt)",
113-
url="https://api-docs.deepseek.com/news/news1226",
112+
"DeepSeek-R1-0528": ModelConfig(
113+
model_name="DeepSeek-R1-0528",
114+
display_name="DeepSeek-R1-0528 (Prompt)",
115+
url="https://api-docs.deepseek.com/news/news250528",
114116
org="DeepSeek",
115117
license="MIT",
116118
model_handler=DeepSeekAPIHandler,
@@ -119,10 +121,22 @@ class ModelConfig:
119121
is_fc_model=False,
120122
underscore_to_dot=False,
121123
),
122-
"DeepSeek-V3-FC": ModelConfig(
123-
model_name="DeepSeek-V3-FC",
124-
display_name="DeepSeek-V3 (FC)",
125-
url="https://api-docs.deepseek.com/news/news1226",
124+
"DeepSeek-R1-0528-FC": ModelConfig(
125+
model_name="DeepSeek-R1-0528-FC",
126+
display_name="DeepSeek-R1-0528 (FC)",
127+
url="https://api-docs.deepseek.com/news/news250528",
128+
org="DeepSeek",
129+
license="MIT",
130+
model_handler=DeepSeekAPIHandler,
131+
input_price=None,
132+
output_price=None,
133+
is_fc_model=True,
134+
underscore_to_dot=True,
135+
),
136+
"DeepSeek-V3-0324-FC": ModelConfig(
137+
model_name="DeepSeek-V3-0324",
138+
display_name="DeepSeek-V3-0324 (FC)",
139+
url="https://api-docs.deepseek.com/news/news250325",
126140
org="DeepSeek",
127141
license="DeepSeek License",
128142
model_handler=DeepSeekAPIHandler,
@@ -743,6 +757,18 @@ class ModelConfig:
743757
is_fc_model=False,
744758
underscore_to_dot=False,
745759
),
760+
"nvidia/llama-3.1-nemotron-ultra-253b-v1": ModelConfig(
761+
model_name="nvidia/llama-3.1-nemotron-ultra-253b-v1",
762+
display_name="Llama-3.1-Nemotron-Ultra-253B-v1 (FC)",
763+
url="https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1",
764+
org="NVIDIA",
765+
license="nvidia-open-model-license",
766+
model_handler=NemotronHandler,
767+
input_price=None,
768+
output_price=None,
769+
is_fc_model=True,
770+
underscore_to_dot=False,
771+
),
746772
"nvidia/nemotron-4-340b-instruct": ModelConfig(
747773
model_name="nvidia/nemotron-4-340b-instruct",
748774
display_name="Nemotron-4-340b-instruct (Prompt)",
@@ -1067,6 +1093,18 @@ class ModelConfig:
10671093
is_fc_model=False,
10681094
underscore_to_dot=False,
10691095
),
1096+
"Ling/ling-lite-v1.5": ModelConfig(
1097+
model_name="Ling/ling-lite-v1.5",
1098+
display_name="ling-lite-v1.5 (Prompt)",
1099+
url="https://huggingface.co/inclusionAI/Ling-lite-1.5",
1100+
org="Ling",
1101+
license="MIT",
1102+
model_handler=LingAPIHandler,
1103+
input_price=None,
1104+
output_price=None,
1105+
is_fc_model=False,
1106+
underscore_to_dot=False,
1107+
),
10701108
}
10711109

10721110
# Inference through local hosting

berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414

1515
SUPPORTED_MODELS = [
1616
"gorilla-openfunctions-v2",
17-
"DeepSeek-R1",
18-
"DeepSeek-V3-FC",
17+
"DeepSeek-R1-0528",
18+
"DeepSeek-R1-0528-FC",
19+
"DeepSeek-V3-0324-FC",
1920
"gpt-4.5-preview-2025-02-27",
2021
"gpt-4.5-preview-2025-02-27-FC",
2122
"gpt-4.1-2025-04-14-FC",
@@ -67,6 +68,7 @@
6768
"command-r7b-12-2024-FC",
6869
"command-a-03-2025-FC",
6970
"snowflake/arctic",
71+
"nvidia/llama-3.1-nemotron-ultra-253b-v1",
7072
"nvidia/nemotron-4-340b-instruct",
7173
"BitAgent/GoGoAgent",
7274
"palmyra-x-004",
@@ -162,4 +164,5 @@
162164
"meta-llama/llama-4-scout-17b-16e-instruct-FC-novita",
163165
"qwen/qwq-32b-FC-novita",
164166
"qwen/qwq-32b-novita",
167+
"Ling/ling-lite-v1.5",
165168
]

0 commit comments

Comments
 (0)