Skip to content

Commit c0c9f96

Browse files
authored
Merge branch 'main' into feat/type-hints
2 parents a5e2028 + 98dca00 commit c0c9f96

File tree

10 files changed

+619
-189
lines changed

10 files changed

+619
-189
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Publish BFCL to PyPI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
7+
jobs:
8+
build_and_publish:
9+
runs-on: ubuntu-latest
10+
permissions:
11+
# IMPORTANT: this permission is mandatory for Trusted Publishing
12+
id-token: write
13+
14+
steps:
15+
- name: Checkout
16+
uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0 # we need full history to count commits
19+
20+
- name: Compute CalVer-serial version
21+
id: ver
22+
run: |
23+
# 1) today's date in UTC -> 2025.06.08
24+
DATE=$(date -u '+%Y.%m.%d')
25+
26+
# 2) how many commits since 00:00 UTC?
27+
COMMITS_TODAY=$(git rev-list --count \
28+
--since="$(date -u '+%Y-%m-%d 00:00')" HEAD)
29+
30+
# 3) serial = commits - 1 (first push → 0)
31+
SERIAL=$((COMMITS_TODAY - 1))
32+
33+
if [ "$SERIAL" -eq 0 ]; then
34+
VERSION="$DATE"
35+
else
36+
VERSION="$DATE.$SERIAL"
37+
fi
38+
39+
echo "Computed version: $VERSION"
40+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
41+
42+
- name: Build wheel & sdist
43+
env:
44+
# 4) Tell setuptools-scm to *pretend* this is the version
45+
SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.ver.outputs.version }}
46+
working-directory: berkeley-function-call-leaderboard
47+
run: |
48+
python -m pip install --upgrade build "setuptools-scm[toml]>=8"
49+
python -m build
50+
ls -l dist
51+
52+
- name: Publish to PyPI
53+
uses: pypa/gh-action-pypi-publish@release/v1
54+
with:
55+
packages-dir: berkeley-function-call-leaderboard/dist/

berkeley-function-call-leaderboard/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
44

5+
- [Jun 11, 2025] [#1061](https://github.com/ShishirPatil/gorilla/pull/1061): Add support for DashScope API inference for `Qwen3` series
6+
- [Jun 8, 2025] [#1054](https://github.com/ShishirPatil/gorilla/pull/1054), [#1055](https://github.com/ShishirPatil/gorilla/pull/1055): Packagerize codebase for PyPI Distribution. Now available with `pip install bfcl-eval`, in addition to the existing `pip install -e`.
57
- [May 27, 2025] [#1040](https://github.com/ShishirPatil/gorilla/pull/1040): Add the following new models to the leaderboard:
68
- `mistral-medium-2505`
79
- `mistral-medium-2505-FC`

berkeley-function-call-leaderboard/SUPPORTED_MODELS.md

Lines changed: 113 additions & 106 deletions
Large diffs are not rendered by default.

berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py

Lines changed: 219 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from bfcl_eval.model_handler.api_inference.cohere import CohereHandler
66
from bfcl_eval.model_handler.api_inference.databricks import DatabricksHandler
77
from bfcl_eval.model_handler.api_inference.deepseek import DeepSeekAPIHandler
8+
from bfcl_eval.model_handler.api_inference.dm_cito import DMCitoHandler
89
from bfcl_eval.model_handler.api_inference.fireworks import FireworksHandler
910
from bfcl_eval.model_handler.api_inference.functionary import FunctionaryHandler
1011
from bfcl_eval.model_handler.api_inference.gemini import GeminiHandler
@@ -18,6 +19,7 @@
1819
from bfcl_eval.model_handler.api_inference.novita import NovitaHandler
1920
from bfcl_eval.model_handler.api_inference.nvidia import NvidiaHandler
2021
from bfcl_eval.model_handler.api_inference.openai import OpenAIHandler
22+
from bfcl_eval.model_handler.api_inference.qwen import QwenAPIHandler
2123
from bfcl_eval.model_handler.api_inference.writer import WriterHandler
2224
from bfcl_eval.model_handler.api_inference.yi import YiHandler
2325
from bfcl_eval.model_handler.local_inference.bielik import BielikHandler
@@ -44,8 +46,6 @@
4446
from bfcl_eval.model_handler.local_inference.salesforce_llama import SalesforceLlamaHandler
4547
from bfcl_eval.model_handler.local_inference.salesforce_qwen import SalesforceQwenHandler
4648
from bfcl_eval.model_handler.local_inference.think_agent import ThinkAgentHandler
47-
from bfcl_eval.model_handler.api_inference.qwq import QwenAPIHandler
48-
from bfcl_eval.model_handler.api_inference.dm_cito import DMCitoHandler
4949

5050
# -----------------------------------------------------------------------------
5151
# A mapping of model identifiers to their respective model configurations.
@@ -70,7 +70,7 @@ class ModelConfig:
7070
input_price (Optional[float]): USD per million input tokens (None for open source models).
7171
output_price (Optional[float]): USD per million output tokens (None for open source models).
7272
is_fc_model (bool): True if this model is used in Function-Calling mode, otherwise False for Prompt-based mode.
73-
underscore_to_dot (bool): True if model does not support '.' in function names, in which case we will replace '.' with '_'.
73+
underscore_to_dot (bool): True if model does not support '.' in function names, in which case we will replace '.' with '_'. Currently this only matters for checker. TODO: We should let the tool compilation step also take this into account.
7474
7575
"""
7676

@@ -827,6 +827,222 @@ class ModelConfig:
827827
is_fc_model=False,
828828
underscore_to_dot=False,
829829
),
830+
"qwen3-0.6b-FC": ModelConfig(
831+
model_name="qwen3-0.6b-FC",
832+
display_name="Qwen3-0.6B (FC)",
833+
url="https://huggingface.co/Qwen/Qwen3-0.6B",
834+
org="Qwen",
835+
license="apache-2.0",
836+
model_handler=QwenAPIHandler,
837+
input_price=None,
838+
output_price=None,
839+
is_fc_model=True,
840+
underscore_to_dot=True,
841+
),
842+
"qwen3-0.6b": ModelConfig(
843+
model_name="qwen3-0.6b",
844+
display_name="Qwen3-0.6B (Prompt)",
845+
url="https://huggingface.co/Qwen/Qwen3-0.6B",
846+
org="Qwen",
847+
license="apache-2.0",
848+
model_handler=QwenAPIHandler,
849+
input_price=None,
850+
output_price=None,
851+
is_fc_model=False,
852+
underscore_to_dot=False,
853+
),
854+
"qwen3-1.7b-FC": ModelConfig(
855+
model_name="qwen3-1.7b-FC",
856+
display_name="Qwen3-1.7B (FC)",
857+
url="https://huggingface.co/Qwen/Qwen3-1.7B",
858+
org="Qwen",
859+
license="apache-2.0",
860+
model_handler=QwenAPIHandler,
861+
input_price=None,
862+
output_price=None,
863+
is_fc_model=True,
864+
underscore_to_dot=True,
865+
),
866+
"qwen3-1.7b": ModelConfig(
867+
model_name="qwen3-1.7b",
868+
display_name="Qwen3-1.7B (Prompt)",
869+
url="https://huggingface.co/Qwen/Qwen3-1.7B",
870+
org="Qwen",
871+
license="apache-2.0",
872+
model_handler=QwenAPIHandler,
873+
input_price=None,
874+
output_price=None,
875+
is_fc_model=False,
876+
underscore_to_dot=False,
877+
),
878+
"qwen3-4b-FC": ModelConfig(
879+
model_name="qwen3-4b-FC",
880+
display_name="Qwen3-4B (FC)",
881+
url="https://huggingface.co/Qwen/Qwen3-4B",
882+
org="Qwen",
883+
license="apache-2.0",
884+
model_handler=QwenAPIHandler,
885+
input_price=None,
886+
output_price=None,
887+
is_fc_model=True,
888+
underscore_to_dot=True,
889+
),
890+
"qwen3-4b": ModelConfig(
891+
model_name="qwen3-4b",
892+
display_name="Qwen3-4B (Prompt)",
893+
url="https://huggingface.co/Qwen/Qwen3-4B",
894+
org="Qwen",
895+
license="apache-2.0",
896+
model_handler=QwenAPIHandler,
897+
input_price=None,
898+
output_price=None,
899+
is_fc_model=False,
900+
underscore_to_dot=False,
901+
),
902+
"qwen3-8b-FC": ModelConfig(
903+
model_name="qwen3-8b-FC",
904+
display_name="Qwen3-8B (FC)",
905+
url="https://huggingface.co/Qwen/Qwen3-8B",
906+
org="Qwen",
907+
license="apache-2.0",
908+
model_handler=QwenAPIHandler,
909+
input_price=None,
910+
output_price=None,
911+
is_fc_model=True,
912+
underscore_to_dot=True,
913+
),
914+
"qwen3-8b": ModelConfig(
915+
model_name="qwen3-8b",
916+
display_name="Qwen3-8B (Prompt)",
917+
url="https://huggingface.co/Qwen/Qwen3-8B",
918+
org="Qwen",
919+
license="apache-2.0",
920+
model_handler=QwenAPIHandler,
921+
input_price=None,
922+
output_price=None,
923+
is_fc_model=False,
924+
underscore_to_dot=False,
925+
),
926+
"qwen3-14b-FC": ModelConfig(
927+
model_name="qwen3-14b-FC",
928+
display_name="Qwen3-14B (FC)",
929+
url="https://huggingface.co/Qwen/Qwen3-14B",
930+
org="Qwen",
931+
license="apache-2.0",
932+
model_handler=QwenAPIHandler,
933+
input_price=None,
934+
output_price=None,
935+
is_fc_model=True,
936+
underscore_to_dot=True,
937+
),
938+
"qwen3-14b": ModelConfig(
939+
model_name="qwen3-14b",
940+
display_name="Qwen3-14B (Prompt)",
941+
url="https://huggingface.co/Qwen/Qwen3-14B",
942+
org="Qwen",
943+
license="apache-2.0",
944+
model_handler=QwenAPIHandler,
945+
input_price=None,
946+
output_price=None,
947+
is_fc_model=False,
948+
underscore_to_dot=False,
949+
),
950+
"qwen3-32b-FC": ModelConfig(
951+
model_name="qwen3-32b-FC",
952+
display_name="Qwen3-32B (FC)",
953+
url="https://huggingface.co/Qwen/Qwen3-32B",
954+
org="Qwen",
955+
license="apache-2.0",
956+
model_handler=QwenAPIHandler,
957+
input_price=None,
958+
output_price=None,
959+
is_fc_model=True,
960+
underscore_to_dot=True,
961+
),
962+
"qwen3-32b": ModelConfig(
963+
model_name="qwen3-32b",
964+
display_name="Qwen3-32B (Prompt)",
965+
url="https://huggingface.co/Qwen/Qwen3-32B",
966+
org="Qwen",
967+
license="apache-2.0",
968+
model_handler=QwenAPIHandler,
969+
input_price=None,
970+
output_price=None,
971+
is_fc_model=False,
972+
underscore_to_dot=False,
973+
),
974+
"qwen3-30b-a3b-FC": ModelConfig(
975+
model_name="qwen3-30b-a3b-FC",
976+
display_name="Qwen3-30B-A3B (FC)",
977+
url="https://huggingface.co/Qwen/Qwen3-30B-A3B",
978+
org="Qwen",
979+
license="apache-2.0",
980+
model_handler=QwenAPIHandler,
981+
input_price=None,
982+
output_price=None,
983+
is_fc_model=True,
984+
underscore_to_dot=True,
985+
),
986+
"qwen3-30b-a3b": ModelConfig(
987+
model_name="qwen3-30b-a3b",
988+
display_name="Qwen3-30B-A3B (Prompt)",
989+
url="https://huggingface.co/Qwen/Qwen3-30B-A3B",
990+
org="Qwen",
991+
license="apache-2.0",
992+
model_handler=QwenAPIHandler,
993+
input_price=None,
994+
output_price=None,
995+
is_fc_model=False,
996+
underscore_to_dot=False,
997+
),
998+
"qwen3-235b-a22b-FC": ModelConfig(
999+
model_name="qwen3-235b-a22b-FC",
1000+
display_name="Qwen3-235B-A22B (FC)",
1001+
url="https://huggingface.co/Qwen/Qwen3-235B-A22B",
1002+
org="Qwen",
1003+
license="apache-2.0",
1004+
model_handler=QwenAPIHandler,
1005+
input_price=None,
1006+
output_price=None,
1007+
is_fc_model=True,
1008+
underscore_to_dot=True,
1009+
),
1010+
"qwen3-235b-a22b": ModelConfig(
1011+
model_name="qwen3-235b-a22b",
1012+
display_name="Qwen3-235B-A22B (Prompt)",
1013+
url="https://huggingface.co/Qwen/Qwen3-235B-A22B",
1014+
org="Qwen",
1015+
license="apache-2.0",
1016+
model_handler=QwenAPIHandler,
1017+
input_price=None,
1018+
output_price=None,
1019+
is_fc_model=False,
1020+
underscore_to_dot=False,
1021+
),
1022+
"qwq-32b-FC": ModelConfig(
1023+
model_name="qwq-32b-FC",
1024+
display_name="QwQ-32B (FC)",
1025+
url="https://huggingface.co/Qwen/QwQ-32B",
1026+
org="Qwen",
1027+
license="apache-2.0",
1028+
model_handler=QwenAPIHandler,
1029+
input_price=None,
1030+
output_price=None,
1031+
is_fc_model=True,
1032+
underscore_to_dot=True,
1033+
),
1034+
"qwq-32b": ModelConfig(
1035+
model_name="qwq-32b",
1036+
display_name="QwQ-32B (Prompt)",
1037+
url="https://huggingface.co/Qwen/QwQ-32B",
1038+
org="Qwen",
1039+
license="apache-2.0",
1040+
model_handler=QwenAPIHandler,
1041+
input_price=None,
1042+
output_price=None,
1043+
is_fc_model=False,
1044+
underscore_to_dot=False,
1045+
),
8301046
"xiaoming-14B": ModelConfig(
8311047
model_name="xiaoming-14B",
8321048
display_name="xiaoming-14B (Prompt)",
@@ -853,7 +1069,6 @@ class ModelConfig:
8531069
),
8541070
}
8551071

856-
8571072
# Inference through local hosting
8581073
local_inference_model_map = {
8591074
"deepseek-ai/DeepSeek-R1": ModelConfig(
@@ -1492,18 +1707,6 @@ class ModelConfig:
14921707
is_fc_model=False,
14931708
underscore_to_dot=False,
14941709
),
1495-
"QwQ-32B": ModelConfig(
1496-
model_name="qwq-32b",
1497-
display_name="QwQ-32B (Prompt)",
1498-
url="https://huggingface.co/Qwen/QwQ-32B",
1499-
org="Qwen",
1500-
license="apache-2.0",
1501-
model_handler=QwenAPIHandler,
1502-
input_price=None,
1503-
output_price=None,
1504-
is_fc_model=False,
1505-
underscore_to_dot=False,
1506-
),
15071710
"tiiuae/Falcon3-10B-Instruct-FC": ModelConfig(
15081711
model_name="tiiuae/Falcon3-10B-Instruct-FC",
15091712
display_name="Falcon3-10B-Instruct (FC)",

berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,24 @@
7474
"grok-3-beta",
7575
"grok-3-mini-beta-FC",
7676
"grok-3-mini-beta",
77-
"QwQ-32B",
77+
"qwen3-0.6b-FC",
78+
"qwen3-0.6b",
79+
"qwen3-1.7b-FC",
80+
"qwen3-1.7b",
81+
"qwen3-4b-FC",
82+
"qwen3-4b",
83+
"qwen3-8b-FC",
84+
"qwen3-8b",
85+
"qwen3-14b-FC",
86+
"qwen3-14b",
87+
"qwen3-32b-FC",
88+
"qwen3-32b",
89+
"qwen3-30b-a3b-FC",
90+
"qwen3-30b-a3b",
91+
"qwen3-235b-a22b-FC",
92+
"qwen3-235b-a22b",
93+
"qwq-32b-FC",
94+
"qwq-32b",
7895
"xiaoming-14B",
7996
"DM-Cito-8B",
8097
"deepseek-ai/DeepSeek-R1",
@@ -118,6 +135,10 @@
118135
"Qwen/Qwen3-14B",
119136
"Qwen/Qwen3-32B-FC",
120137
"Qwen/Qwen3-32B",
138+
"Qwen/Qwen3-30B-A3B-FC",
139+
"Qwen/Qwen3-30B-A3B",
140+
"Qwen/Qwen3-235B-A22B-FC",
141+
"Qwen/Qwen3-235B-A22B",
121142
"Team-ACE/ToolACE-2-8B",
122143
"openbmb/MiniCPM3-4B",
123144
"openbmb/MiniCPM3-4B-FC",

0 commit comments

Comments
 (0)