Skip to content

Commit f1417a8

Browse files
authored
[BFCL] Fix Dataset Pre-Processing for Java and JavaScript Test Category, Part 2 (#545)
This PR fixes the function doc pre-processing issues for the Java and JavaScript test categories, following up on PR #538. - Some unnecessary steps in the `convert_to_tool` function are removed. These steps should not exist as not every model handler calls the `convert_to_tool` function (for example, the OSS models) and would unfairly benefit the models that use it. To make sure that every model gets the same pre-processed function doc, the pre-processing phase needs to be in the `language_specific_pre_processing` function (which is used by every handler). - Properly handle the inner element type for nested types.
1 parent b0e3289 commit f1417a8

File tree

12 files changed

+36
-41
lines changed

12 files changed

+36
-41
lines changed

berkeley-function-call-leaderboard/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ Some companies have proposed some optimization strategies in their models' handl
209209
## Changelog
210210

211211
* [July 22, 2024] [#540](https://github.com/ShishirPatil/gorilla/pull/540): Chore: Improve handling of vLLM's cleanup phase error by combining all selected test categories into one single task to submit to the vLLM server.
212-
* [July 21, 2024] [#538](https://github.com/ShishirPatil/gorilla/pull/538): Fix `language_specific_pre_processing` function to properly handle pre-processing for prompts and function docs in Java and JavaScript test categories. All entries in these categories are affected.
212+
* [July 21, 2024] [#538](https://github.com/ShishirPatil/gorilla/pull/538), [#545](https://github.com/ShishirPatil/gorilla/pull/545): Fix `language_specific_pre_processing` and `convert_to_tool` function to properly handle pre-processing for prompts and function docs in Java and JavaScript test categories. All entries in these categories are affected.
213213
* [July 20, 2024] [#537](https://github.com/ShishirPatil/gorilla/pull/537): Update generation script for locally-hosted OSS model to use single-node multi-GPU inference method (tensor parallel). Ray is not used anymore.
214214
* [July 16, 2024] [#525](https://github.com/ShishirPatil/gorilla/pull/525), [#536](https://github.com/ShishirPatil/gorilla/pull/536): Add new model `ibm-granite/granite-20b-functioncalling` to the leaderboard.
215215
* [July 10, 2024] [#522](https://github.com/ShishirPatil/gorilla/pull/522): Bug fix in the evaluation dataset for Executable Parallel Multiple category. This includes updates to both prompts and function docs. 2 entries are affected.

berkeley-function-call-leaderboard/model_handler/claude_fc_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def inference(self, prompt, functions, test_category):
3434
if type(functions) is not list:
3535
functions = [functions]
3636
claude_tool = convert_to_tool(
37-
functions, GORILLA_TO_OPENAPI, self.model_style, test_category, True
37+
functions, GORILLA_TO_OPENAPI, self.model_style, test_category
3838
)
3939
message = [{"role": "user", "content": prompt}]
4040
start_time = time.time()

berkeley-function-call-leaderboard/model_handler/claude_prompt_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> Non
2727

2828
def _get_claude_function_calling_response(self, prompt, functions, test_category):
2929
input_tool = convert_to_tool(
30-
functions, GORILLA_TO_PYTHON, self.model_style, test_category, True
30+
functions, GORILLA_TO_PYTHON, self.model_style, test_category
3131
)
3232
system_prompt = construct_tool_use_system_prompt(input_tool)
3333
start = time.time()

berkeley-function-call-leaderboard/model_handler/cohere_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def inference(self, prompt, functions, test_category):
7575
message = prompt
7676
# Convert JSON schema into R+ compatible function calls.
7777
cohere_tool = convert_to_tool(
78-
functions, GORILLA_TO_PYTHON, self.model_style, test_category, True
78+
functions, GORILLA_TO_PYTHON, self.model_style, test_category
7979
)
8080
start_time = time.time()
8181
if len(cohere_tool) > 0:

berkeley-function-call-leaderboard/model_handler/firework_ai_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def inference(self, prompt, functions, test_category):
3939
functions = [functions]
4040
message = [{"role": "user", "content": prompt}]
4141
oai_tool = convert_to_tool(
42-
functions, GORILLA_TO_OPENAPI, self.model_style, test_category, True
42+
functions, GORILLA_TO_OPENAPI, self.model_style, test_category
4343
)
4444
start_time = time.time()
4545
model_name = self.model_name.replace("-FC", "")

berkeley-function-call-leaderboard/model_handler/gemini_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def inference(self, prompt, functions, test_category):
9797
prompt = augment_prompt_by_languge(prompt, test_category)
9898
functions = language_specific_pre_processing(functions, test_category)
9999
gemini_tool = convert_to_tool(
100-
functions, GORILLA_TO_OPENAPI, self.model_style, test_category, True
100+
functions, GORILLA_TO_OPENAPI, self.model_style, test_category
101101
)
102102
result, metadata = self._query_gemini(prompt, gemini_tool)
103103
return result, metadata

berkeley-function-call-leaderboard/model_handler/glm_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> Non
2323

2424
def apply_chat_template(self, prompt, function, test_category):
2525
oai_tool = convert_to_tool(
26-
function, GORILLA_TO_OPENAPI, ModelStyle.OpenAI, test_category, True
26+
function, GORILLA_TO_OPENAPI, ModelStyle.OpenAI, test_category
2727
)
2828
conversation = [{"role": "user", "content": prompt, "tools": oai_tool}]
2929
return self.tokenizer.apply_chat_template(

berkeley-function-call-leaderboard/model_handler/gpt_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def inference(self, prompt,functions,test_category):
5656
functions = [functions]
5757
message = [{"role": "user", "content": prompt}]
5858
oai_tool = convert_to_tool(
59-
functions, GORILLA_TO_OPENAPI, self.model_style, test_category, True
59+
functions, GORILLA_TO_OPENAPI, self.model_style, test_category
6060
)
6161
start_time = time.time()
6262
if len(oai_tool) > 0:

berkeley-function-call-leaderboard/model_handler/granite_handler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def _format_prompt(prompt, function, test_category):
3838
GORILLA_TO_OPENAPI,
3939
model_style=ModelStyle.OSSMODEL,
4040
test_category=test_category,
41-
stringify_parameters=True,
4241
)
4342

4443
functions_str = "\n".join([json.dumps(func) for func in function])

berkeley-function-call-leaderboard/model_handler/hermes_handler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> Non
1212
def _format_prompt(prompt, function, test_category):
1313
# Hermes use Langchain to OpenAI conversion. It does not use tool call but function call.
1414
function = convert_to_tool(
15-
function, GORILLA_TO_OPENAPI, ModelStyle.OSSMODEL, test_category, True
15+
function, GORILLA_TO_OPENAPI, ModelStyle.OSSMODEL, test_category
1616
)
1717
pydantic_format = """{"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}"""
1818
tool_call_format = """{"arguments": <args-dict>, "name": <function-name>}"""

0 commit comments

Comments
 (0)