Skip to content

Commit f810631

Browse files
authored
[BFCL] Add Google Gemma-3 Series Models (#939)
Add the following new models to the leaderboard: - `google/gemma-3-1b-it` - `google/gemma-3-4b-it` - `google/gemma-3-12b-it` - `google/gemma-3-27b-it`
1 parent 9b17c68 commit f810631

File tree

6 files changed

+34
-23
lines changed

6 files changed

+34
-23
lines changed

berkeley-function-call-leaderboard/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ All notable changes to the Berkeley Function Calling Leaderboard will be documen
66
- `gemini-2.0-flash-lite-001-FC`
77
- `gemini-2.0-flash-lite-001`
88
- `gemini-2.0-flash-thinking-exp-01-21`
9+
- [Mar 13, 2025] [#939](https://github.com/ShishirPatil/gorilla/pull/939): Add the following new models to the leaderboard:
10+
- `google/gemma-3-1b-it`
11+
- `google/gemma-3-4b-it`
12+
- `google/gemma-3-12b-it`
13+
- `google/gemma-3-27b-it`
914
- [Mar 13, 2025] [#941](https://github.com/ShishirPatil/gorilla/pull/941): Add new model `Team-ACE/ToolACE-2-8B` to the leaderboard.
1015
- [Mar 2, 2025] [#923](https://github.com/ShishirPatil/gorilla/pull/923): Add the following new models to the leaderboard:
1116
- `claude-3-7-sonnet-20250219`

berkeley-function-call-leaderboard/SUPPORTED_MODELS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Below is a comprehensive table of models supported for running leaderboard evalu
6868
|palmyra-x-004 | Function Calling|
6969
|BitAgent/GoGoAgent | Prompt|
7070
|deepseek-ai/DeepSeek-R1 💻| Prompt|
71-
|google/gemma-2-{2b,9b,27b}-it 💻| Prompt|
71+
|google/gemma-3-{1b,4b,12b,27b}-it 💻| Prompt|
7272
|mistralai/Ministral-8B-Instruct-2410 💻| Function Calling|
7373
|meta-llama/Meta-Llama-3-{8B,70B}-Instruct 💻| Prompt|
7474
|meta-llama/Llama-3.1-{8B,70B}-Instruct-FC 💻| Function Calling|

berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -452,30 +452,30 @@
452452
"DeepSeek",
453453
"DeepSeek License",
454454
],
455-
"google/gemma-7b-it": [
456-
"Gemma-7b-it (Prompt)",
457-
"https://blog.google/technology/developers/gemma-open-models/",
455+
"google/gemma-3-1b-it": [
456+
"Gemma-3-1b-it (Prompt)",
457+
"https://blog.google/technology/developers/gemma-3/",
458458
"Google",
459459
"gemma-terms-of-use",
460460
],
461-
"google/gemma-2-2b-it": [
462-
"Gemma-2-2b-it (Prompt)",
463-
"https://blog.google/technology/developers/gemma-open-models/",
461+
"google/gemma-3-4b-it": [
462+
"Gemma-3-4b-it (Prompt)",
463+
"https://blog.google/technology/developers/gemma-3/",
464464
"Google",
465465
"gemma-terms-of-use",
466466
],
467-
"google/gemma-2-9b-it": [
468-
"Gemma-2-9b-it (Prompt)",
469-
"https://blog.google/technology/developers/gemma-open-models/",
467+
"google/gemma-3-12b-it": [
468+
"Gemma-3-12b-it (Prompt)",
469+
"https://blog.google/technology/developers/gemma-3/",
470470
"Google",
471471
"gemma-terms-of-use",
472472
],
473-
"google/gemma-2-27b-it": [
474-
"Gemma-2-27b-it (Prompt)",
475-
"https://blog.google/technology/developers/gemma-open-models/",
473+
"google/gemma-3-27b-it": [
474+
"Gemma-3-27b-it (Prompt)",
475+
"https://blog.google/technology/developers/gemma-3/",
476476
"Google",
477477
"gemma-terms-of-use",
478-
],
478+
],
479479
"glaiveai/glaive-function-calling-v1": [
480480
"Glaive-v1 (FC)",
481481
"https://huggingface.co/glaiveai/glaive-function-calling-v1",

berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,10 @@
105105
# Inference through local hosting
106106
local_inference_handler_map = {
107107
"deepseek-ai/DeepSeek-R1": DeepseekReasoningHandler, # This is the local version of DeepSeek-R1
108-
"google/gemma-2-2b-it": GemmaHandler,
109-
"google/gemma-2-9b-it": GemmaHandler,
110-
"google/gemma-2-27b-it": GemmaHandler,
108+
"google/gemma-3-1b-it": GemmaHandler,
109+
"google/gemma-3-4b-it": GemmaHandler,
110+
"google/gemma-3-12b-it": GemmaHandler,
111+
"google/gemma-3-27b-it": GemmaHandler,
111112
"meta-llama/Meta-Llama-3-8B-Instruct": LlamaHandler,
112113
"meta-llama/Meta-Llama-3-70B-Instruct": LlamaHandler,
113114
"meta-llama/Llama-3.1-8B-Instruct-FC": LlamaFCHandler,

berkeley-function-call-leaderboard/bfcl/model_handler/local_inference/gemma.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from bfcl.model_handler.local_inference.base_oss_handler import OSSHandler
22
from bfcl.model_handler.utils import (
33
combine_consecutive_user_prompts,
4-
convert_system_prompt_into_user_prompt,
54
func_doc_language_specific_pre_processing,
65
system_prompt_pre_processing_chat_model,
76
)
@@ -16,12 +15,20 @@ def __init__(self, model_name, temperature) -> None:
1615
def _format_prompt(self, messages, function):
1716
"""
1817
"bos_token": "<bos>",
19-
"chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
18+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
2019
"""
2120
formatted_prompt = "<bos>"
2221

22+
if messages[0]["role"] == "system":
23+
first_user_prefix = messages[0]["content"].strip() + "\n\n"
24+
messages = messages[1:]
25+
else:
26+
first_user_prefix = ""
27+
28+
is_first = True
2329
for message in messages:
24-
formatted_prompt += f"<start_of_turn>{message['role']}\n{message['content'].strip()}<end_of_turn>\n"
30+
formatted_prompt += f"<start_of_turn>{message['role']}\n{first_user_prefix if is_first else ''}{message['content'].strip()}<end_of_turn>\n"
31+
is_first = False
2532

2633
formatted_prompt += f"<start_of_turn>model\n"
2734

@@ -39,9 +46,6 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict:
3946
)
4047

4148
for round_idx in range(len(test_entry["question"])):
42-
test_entry["question"][round_idx] = convert_system_prompt_into_user_prompt(
43-
test_entry["question"][round_idx]
44-
)
4549
test_entry["question"][round_idx] = combine_consecutive_user_prompts(
4650
test_entry["question"][round_idx]
4751
)

berkeley-function-call-leaderboard/bfcl/model_handler/local_inference/qwen_fc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class QwenFCHandler(OSSHandler):
1313
def __init__(self, model_name, temperature) -> None:
1414
super().__init__(model_name, temperature)
1515
self.is_fc_model = True
16+
self.model_name_huggingface = model_name.replace("-FC", "")
1617

1718
@override
1819
def decode_ast(self, result, language="Python"):

0 commit comments

Comments
 (0)