[BFCL] Add New Model gpt-4.5-preview-2025-02-27, gpt-4.5-preview-2025-02-27-FC (#922)

HuanzhiMao · web-flow · commit 8c20361f4ba4 · 2025-02-28T17:53:05.000-08:00
Add the following new models to the leaderboard:
  - `gpt-4.5-preview-2025-02-27`
  - `gpt-4.5-preview-2025-02-27-FC`

This PR also upgraded `openai` library to version `1.65.0`.
diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.
 
+- [Feb 27, 2025] [#922](https://github.com/ShishirPatil/gorilla/pull/922): Add the following new models to the leaderboard:
+  - `gpt-4.5-preview-2025-02-27`
+  - `gpt-4.5-preview-2025-02-27-FC`
 - [Feb 24, 2025] [#917](https://github.com/ShishirPatil/gorilla/pull/917): Add new model `BitAgent/BitAgent-8B` to the leaderboard.
 - [Feb 5, 2025] [#900](https://github.com/ShishirPatil/gorilla/pull/900), [#913](https://github.com/ShishirPatil/gorilla/pull/913): Add the following new models to the leaderboard:
   - `uiuc-convai/CoALM-8B`
diff --git a/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md b/berkeley-function-call-leaderboard/SUPPORTED_MODELS.md
@@ -25,6 +25,8 @@ Below is a comprehensive table of models supported for running leaderboard evalu
 |gpt-4o-2024-11-20 | Prompt|
 |gpt-4o-mini-2024-07-18-FC | Function Calling|
 |gpt-4o-mini-2024-07-18 | Prompt|
+|gpt-4.5-preview-2025-02-27-FC | Function Calling|
+|gpt-4.5-preview-2025-02-27 | Prompt|
 |o1-2024-12-17-FC | Function Calling|
 |o1-2024-12-17 | Prompt|
 |o3-mini-2025-01-31-FC | Function Calling|
diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py
@@ -1,5 +1,12 @@
 from bfcl.model_handler.handler_map import local_inference_handler_map
 
+# Items in the dictionary are in the format of:
+# {model_name: [
+#     model_display_name_in_leaderboard,
+#     url_to_model_page,
+#     model_creator,
+#     model_license,
+# ]}
 MODEL_METADATA_MAPPING = {
     "gorilla-openfunctions-v2": [
         "Gorilla-OpenFunctions-v2 (FC)",
@@ -13,6 +20,18 @@
         "DeepSeek",
         "DeepSeek License",
     ],
+    "gpt-4.5-preview-2025-02-27-FC": [
+        "GPT-4.5-Preview-2025-02-27 (FC)",
+        "https://openai.com/index/introducing-gpt-4-5/",
+        "OpenAI",
+        "Proprietary",
+    ],
+    "gpt-4.5-preview-2025-02-27": [
+        "GPT-4.5-Preview-2025-02-27 (Prompt)",
+        "https://openai.com/index/introducing-gpt-4-5/",
+        "OpenAI",
+        "Proprietary",
+    ],
     "o1-2024-12-17-FC": [
         "o1-2024-12-17 (FC)",
         "https://openai.com/o1/",
@@ -38,13 +57,13 @@
         "Proprietary",
     ],
     "gpt-4o-2024-11-20": [
-        "gpt-4o-2024-11-20 (Prompt)",
+        "GPT-4o-2024-11-20 (Prompt)",
         "https://openai.com/index/hello-gpt-4o/",
         "OpenAI",
         "Proprietary",
     ],
     "gpt-4o-2024-11-20-FC": [
-        "gpt-4o-2024-11-20 (FC)",
+        "GPT-4o-2024-11-20 (FC)",
         "https://openai.com/index/hello-gpt-4o/",
         "OpenAI",
         "Proprietary",
@@ -932,6 +951,8 @@
     "mistral-small-2402-FC": 1,
     "mistral-small-2402": 1,
     "mistral-tiny-2312": 0.25,
+    "gpt-4.5-preview-2025-02-27-FC": 75,
+    "gpt-4.5-preview-2025-02-27": 75,
     "o1-2024-12-17-FC": 15,
     "o1-2024-12-17": 15,
     "o3-mini-2025-01-31-FC": 1.1,
@@ -1005,6 +1026,8 @@
     "mistral-medium-2312": 8.1,
     "mistral-small-2402-FC": 3,
     "mistral-tiny-2312": 0.25,
+    "gpt-4.5-preview-2025-02-27-FC": 150,
+    "gpt-4.5-preview-2025-02-27": 150,
     "o1-2024-12-17-FC": 60,
     "o1-2024-12-17": 60,
     "o3-mini-2025-01-31-FC": 4,
diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py
@@ -114,6 +114,7 @@
 UNDERSCORE_TO_DOT = [
     # TODO: Use the model style to determine this, single source of truth
     "DeepSeek-V3",
+    "gpt-4.5-preview-2025-02-27-FC",
     "o1-2024-12-17-FC",
     "o3-mini-2025-01-31-FC",
     "gpt-4o-2024-11-20-FC",
diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py
@@ -41,6 +41,8 @@
 api_inference_handler_map = {
     "gorilla-openfunctions-v2": GorillaHandler,
     "DeepSeek-V3": DeepSeekAPIHandler,
+    "gpt-4.5-preview-2025-02-27": OpenAIHandler,
+    "gpt-4.5-preview-2025-02-27-FC": OpenAIHandler,
     "o1-2024-12-17-FC": OpenAIHandler,
     "o1-2024-12-17": OpenAIHandler,
     "o3-mini-2025-01-31-FC": OpenAIHandler,
diff --git a/berkeley-function-call-leaderboard/pyproject.toml b/berkeley-function-call-leaderboard/pyproject.toml
@@ -22,7 +22,7 @@ dependencies = [
     "tree_sitter==0.21.3",
     "tree-sitter-java==0.21.0",
     "tree-sitter-javascript==0.21.4",
-    "openai==1.58.0",
+    "openai==1.65.0",
     "mistralai==1.1.0",
     "anthropic==0.39.0",
     "cohere==5.13.3",