Skip to content

Commit a59b66c

Browse files
authored
[BFCL] Update Gemini model checkpoints to stable 2.5 releases (#1102)
Add the following new models to the leaderboard: - `gemini-2.5-pro-FC` - `gemini-2.5-pro` - `gemini-2.5-flash-FC` - `gemini-2.5-flash` - `gemini-2.5-flash-lite-preview-06-17-FC` - `gemini-2.5-flash-lite-preview-06-17`
1 parent 30994aa commit a59b66c

File tree

7 files changed

+63
-60
lines changed

7 files changed

+63
-60
lines changed

berkeley-function-call-leaderboard/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
44

5+
- [Jul 6, 2025] [#1100](https://github.com/ShishirPatil/gorilla/pull/1100): Add the following new models to the leaderboard:
6+
- `gemini-2.5-pro-FC`
7+
- `gemini-2.5-pro`
8+
- `gemini-2.5-flash-FC`
9+
- `gemini-2.5-flash`
10+
- `gemini-2.5-flash-lite-preview-06-17-FC`
11+
- `gemini-2.5-flash-lite-preview-06-17`
512
- [Jul 6, 2025] [#1099](https://github.com/ShishirPatil/gorilla/pull/1099): Migrate Gemini inference to Google AI Studio.
613
- [Jul 2, 2025] [#1090](https://github.com/ShishirPatil/gorilla/pull/1090): Updated OpenAI models to use `developer` role instead of `system` role, following OpenAI's documentation recommendations. This change affects only the OpenAI Responses handler.
714
- [Jul 2, 2025] [#1062](https://github.com/ShishirPatil/gorilla/pull/1062): Introduce OpenAI Responses handler, and add support for `o3-2025-04-16` and `o4-mini-2025-04-16`.

berkeley-function-call-leaderboard/SUPPORTED_MODELS.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,12 @@ For model names containing `{...}`, multiple versions are available. For example
4141
| Falcon3-{1B,3B,7B,10B}-Instruct | Function Calling | Self-hosted 💻 | tiiuae/Falcon3-{1B,3B,7B,10B}-Instruct |
4242
| FireFunction-v2 | Function Calling | Fireworks AI | firefunction-v2-FC |
4343
| Functionary-{Small,Medium}-v3.1 | Function Calling | MeetKai | meetkai/functionary-{small,medium}-v3.1-FC |
44-
| Gemini-2.0-Flash-001 | Function Calling | Google | gemini-2.0-flash-001-FC |
45-
| Gemini-2.0-Flash-001 | Prompt | Google | gemini-2.0-flash-001 |
46-
| Gemini-2.0-Flash-Lite-001 | Function Calling | Google | gemini-2.0-flash-lite-001-FC |
47-
| Gemini-2.0-Flash-Lite-001 | Prompt | Google | gemini-2.0-flash-lite-001 |
48-
| Gemini-2.0-Flash-Thinking-Exp-01-21 | Prompt | Google | gemini-2.0-flash-thinking-exp-01-21 |
49-
| Gemini-2.5-Pro-Exp-05-06 | Function Calling | Google | gemini-2.5-pro-preview-05-06-FC |
50-
| Gemini-2.5-Pro-Exp-05-06 | Prompt | Google | gemini-2.5-pro-preview-05-06 |
44+
| Gemini-2.5-Flash | Function Calling | Google | gemini-2.5-flash-FC |
45+
| Gemini-2.5-Flash | Prompt | Google | gemini-2.5-flash |
46+
| Gemini-2.5-Flash-Lite-Preview-06-17 | Function Calling | Google | gemini-2.5-flash-lite-preview-06-17-FC |
47+
| Gemini-2.5-Flash-Lite-Preview-06-17 | Prompt | Google | gemini-2.5-flash-lite-preview-06-17 |
48+
| Gemini-2.5-Pro | Function Calling | Google | gemini-2.5-pro-FC |
49+
| Gemini-2.5-Pro | Prompt | Google | gemini-2.5-pro |
5150
| Gemma-3-{1b,4b,12b,27b}-it | Prompt | Self-hosted 💻 | google/gemma-3-{1b,4b,12b,27b}-it |
5251
| GLM-4-9b-Chat | Function Calling | Self-hosted 💻 | THUDM/glm-4-9b-chat |
5352
| GoGoAgent | Prompt | BitAgent | BitAgent/GoGoAgent |

berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -584,87 +584,75 @@ class ModelConfig:
584584
is_fc_model=False,
585585
underscore_to_dot=False,
586586
),
587-
"gemini-2.0-flash-lite-001-FC": ModelConfig(
588-
model_name="gemini-2.0-flash-lite-001-FC",
589-
display_name="Gemini-2.0-Flash-Lite-001 (FC)",
587+
"gemini-2.5-flash-lite-preview-06-17-FC": ModelConfig(
588+
model_name="gemini-2.5-flash-lite-preview-06-17-FC",
589+
display_name="Gemini-2.5-Flash-Lite-Preview-06-17 (FC)",
590590
url="https://deepmind.google/technologies/gemini/flash-lite/",
591591
org="Google",
592592
license="Proprietary",
593593
model_handler=GeminiHandler,
594-
input_price=0.075,
595-
output_price=0.3,
594+
input_price=0.1,
595+
output_price=0.4,
596596
is_fc_model=True,
597597
underscore_to_dot=True,
598598
),
599-
"gemini-2.0-flash-lite-001": ModelConfig(
600-
model_name="gemini-2.0-flash-lite-001",
601-
display_name="Gemini-2.0-Flash-Lite-001 (Prompt)",
599+
"gemini-2.5-flash-lite-preview-06-17": ModelConfig(
600+
model_name="gemini-2.5-flash-lite-preview-06-17",
601+
display_name="Gemini-2.5-Flash-Lite-Preview-06-17 (Prompt)",
602602
url="https://deepmind.google/technologies/gemini/flash-lite/",
603603
org="Google",
604604
license="Proprietary",
605605
model_handler=GeminiHandler,
606-
input_price=0.075,
607-
output_price=0.3,
606+
input_price=0.1,
607+
output_price=0.4,
608608
is_fc_model=False,
609609
underscore_to_dot=False,
610610
),
611-
"gemini-2.0-flash-001-FC": ModelConfig(
612-
model_name="gemini-2.0-flash-001-FC",
613-
display_name="Gemini-2.0-Flash-001 (FC)",
611+
"gemini-2.5-flash-FC": ModelConfig(
612+
model_name="gemini-2.5-flash-FC",
613+
display_name="Gemini-2.5-Flash (FC)",
614614
url="https://deepmind.google/technologies/gemini/flash/",
615615
org="Google",
616616
license="Proprietary",
617617
model_handler=GeminiHandler,
618-
input_price=0.15,
619-
output_price=0.6,
618+
input_price=0.3,
619+
output_price=2.5,
620620
is_fc_model=True,
621621
underscore_to_dot=True,
622622
),
623-
"gemini-2.0-flash-001": ModelConfig(
624-
model_name="gemini-2.0-flash-001",
625-
display_name="Gemini-2.0-Flash-001 (Prompt)",
623+
"gemini-2.5-flash": ModelConfig(
624+
model_name="gemini-2.5-flash",
625+
display_name="Gemini-2.5-Flash (Prompt)",
626626
url="https://deepmind.google/technologies/gemini/flash/",
627627
org="Google",
628628
license="Proprietary",
629629
model_handler=GeminiHandler,
630-
input_price=0.15,
631-
output_price=0.6,
630+
input_price=0.3,
631+
output_price=2.5,
632632
is_fc_model=False,
633633
underscore_to_dot=False,
634634
),
635-
"gemini-2.5-pro-preview-05-06-FC": ModelConfig(
636-
model_name="gemini-2.5-pro-preview-05-06-FC",
637-
display_name="Gemini-2.5-Pro-Preview-05-06 (FC)",
635+
"gemini-2.5-pro-FC": ModelConfig(
636+
model_name="gemini-2.5-pro-FC",
637+
display_name="Gemini-2.5-Pro (FC)",
638638
url="https://deepmind.google/technologies/gemini/pro/",
639639
org="Google",
640640
license="Proprietary",
641641
model_handler=GeminiHandler,
642-
input_price=0,
643-
output_price=0,
642+
input_price=1.5,
643+
output_price=10,
644644
is_fc_model=True,
645645
underscore_to_dot=True,
646646
),
647-
"gemini-2.5-pro-preview-05-06": ModelConfig(
648-
model_name="gemini-2.5-pro-preview-05-06",
649-
display_name="Gemini-2.5-Pro-Preview-05-06 (Prompt)",
647+
"gemini-2.5-pro": ModelConfig(
648+
model_name="gemini-2.5-pro",
649+
display_name="Gemini-2.5-Pro (Prompt)",
650650
url="https://deepmind.google/technologies/gemini/pro/",
651651
org="Google",
652652
license="Proprietary",
653653
model_handler=GeminiHandler,
654-
input_price=0,
655-
output_price=0,
656-
is_fc_model=False,
657-
underscore_to_dot=False,
658-
),
659-
"gemini-2.0-flash-thinking-exp-01-21": ModelConfig(
660-
model_name="gemini-2.0-flash-thinking-exp-01-21",
661-
display_name="Gemini-2.0-Flash-Thinking-Exp-01-21 (Prompt)",
662-
url="https://deepmind.google/technologies/gemini/flash-thinking/",
663-
org="Google",
664-
license="Proprietary",
665-
model_handler=GeminiHandler,
666-
input_price=0,
667-
output_price=0,
654+
input_price=1.5,
655+
output_price=10,
668656
is_fc_model=False,
669657
underscore_to_dot=False,
670658
),

berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,12 @@
5252
"mistral-medium-2505-FC",
5353
"firefunction-v2-FC",
5454
"Nexusflow-Raven-v2",
55-
"gemini-2.0-flash-lite-001-FC",
56-
"gemini-2.0-flash-lite-001",
57-
"gemini-2.0-flash-001-FC",
58-
"gemini-2.0-flash-001",
59-
"gemini-2.5-pro-preview-05-06-FC",
60-
"gemini-2.5-pro-preview-05-06",
61-
"gemini-2.0-flash-thinking-exp-01-21",
55+
"gemini-2.5-flash-lite-preview-06-17-FC",
56+
"gemini-2.5-flash-lite-preview-06-17",
57+
"gemini-2.5-flash-FC",
58+
"gemini-2.5-flash",
59+
"gemini-2.5-pro-FC",
60+
"gemini-2.5-pro",
6261
"meetkai/functionary-small-v3.1-FC",
6362
"meetkai/functionary-medium-v3.1-FC",
6463
"databricks-dbrx-instruct",

berkeley-function-call-leaderboard/bfcl_eval/eval_checker/eval_runner_helper.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,15 @@ def process_data(key, data, output_list):
7979
isinstance(inner_item, list) for inner_item in data[key]
8080
):
8181
flattened_list = sum(data[key], [])
82-
output_list.extend([item for item in flattened_list if item != 0])
82+
output_list.extend(
83+
[
84+
item
85+
for item in flattened_list
86+
if isinstance(item, (int, float)) and item != 0
87+
]
88+
)
8389
else:
84-
if data[key] != 0:
90+
if isinstance(data[key], (int, float)) and data[key] != 0:
8591
output_list.append(data[key])
8692

8793
if model_name not in leaderboard_table:

berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/gemini.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ def _parse_query_response_prompting(self, api_response: any) -> dict:
287287
and len(api_response.candidates[0].content.parts) > 0
288288
):
289289
assert (
290-
len(api_response.candidates[0].content.parts) == 2
291-
), api_response.candidates[0].content.parts
290+
len(api_response.candidates[0].content.parts) <= 2
291+
), f"Length of response parts should be less than or equal to 2. {api_response.candidates[0].content.parts}"
292292

293293
model_responses = ""
294294
reasoning_content = ""

berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ def convert_to_tool(functions, mapping, model_style):
149149
):
150150
params["description"] += f" Enum values: {str(params['enum'])}."
151151
del params["enum"]
152+
# No `format` when type is `string`
153+
if "format" in params and params["type"] == "string":
154+
params["description"] += f" Format: {str(params['format'])}."
155+
del params["format"]
152156

153157
# Process the return field
154158
if "response" in item:

0 commit comments

Comments
 (0)