Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions berkeley-function-call-leaderboard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ Some companies have proposed some optimization strategies in their models' handl

## Changelog

* [July 26, 2024] [#549](https://github.com/ShishirPatil/gorilla/pull/549): Fix js_type_converter.py to properly handle JavaScript array value inside dictionary.
* [July 25, 2024] [#532](https://github.com/ShishirPatil/gorilla/pull/532), [#543](https://github.com/ShishirPatil/gorilla/pull/543), [#556](https://github.com/ShishirPatil/gorilla/pull/556), [#542](https://github.com/ShishirPatil/gorilla/pull/542): Add the following new models to the leaderboard:
- `Salesforce/xLAM-7b-fc-r`
- `Salesforce/xLAM-1b-fc-r`
- `yi-large-fc`
- `NousResearch/Hermes-2-Pro-Llama-3-8B`
- `NousResearch/Hermes-2-Pro-Llama-3-70B`
- `NousResearch/Hermes-2-Theta-Llama-3-8B`
- `NousResearch/Hermes-2-Theta-Llama-3-70B`
* [July 22, 2024] [#540](https://github.com/ShishirPatil/gorilla/pull/540): Chore: Improve handling of vLLM's cleanup phase error by combining all selected test categories into one single task to submit to the vLLM server.
* [July 21, 2024] [#538](https://github.com/ShishirPatil/gorilla/pull/538), [#545](https://github.com/ShishirPatil/gorilla/pull/545): Fix `language_specific_pre_processing` and `convert_to_tool` function to properly handle pre-processing for prompts and function docs in Java and JavaScript test categories. All entries in these categories are affected.
* [July 20, 2024] [#537](https://github.com/ShishirPatil/gorilla/pull/537): Update generation script for locally-hosted OSS model to use single-node multi-GPU inference method (tensor parallel). Ray is not used anymore.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,23 +102,30 @@ def parse_js_collection(code, type_str, nested_type=None):
return code

elif type_str == "dict":

if code == "{}":
return {} # Return an empty dictionary for an empty object
dict_pattern = r"\{(.*?)\}"
# Check if the code is a dictionary
dict_match = re.match(dict_pattern, code)
if dict_match:
try:
pairs = dict_match.group(1).split(",")
content = dict_match.group(1)
pairs = re.findall(r"([^:]+):\s*(.*?)(?:,\s*(?=[^,]+:)|$)", content)
dictionary = {}
for pair in pairs:
key, value = pair.split(":")
key = parse_js_value(key.strip().strip("'"))
value = parse_js_value(value.strip().strip("'"))
dictionary[key] = value
for key, value in pairs:
key = key.strip().strip("'\"")
value = value.strip()
if value.startswith("[") and value.endswith("]"):
# Handle array values
dictionary[key] = parse_js_collection(value, "array")
elif value.startswith("{") and value.endswith("}"):
# Handle nested dictionary values
dictionary[key] = parse_js_collection(value, "dict")
else:
dictionary[key] = parse_js_value(value.strip("'\""))
return dictionary
except:
except Exception as e:
print(f"Error parsing dictionary: {e}")
return code
else:
return code # default to string
Expand Down Expand Up @@ -147,8 +154,6 @@ def parse_js_value(value_str: str):


# Write tests for the `js_type_converter` function


def test_js_type_converter():
assert js_type_converter("true", "Boolean") == True
assert js_type_converter("false", "Boolean") == False
Expand Down Expand Up @@ -288,6 +293,19 @@ def test_js_type_converter_nested_array():
print("All nested array tests passed successfully!")


def test_js_type_converter_dictionary_with_arrays():
complex_dict = js_type_converter(
'{"initialState": initialStateObject, "reducers": reducersMap, "middlewares": ["loggerMiddleware"], "enhancers": ["applyMiddleware", "myMiddleWare"]}',
"dict",
)
assert isinstance(complex_dict, dict)
assert complex_dict["initialState"] == "initialStateObject"
assert complex_dict["reducers"] == "reducersMap"
assert complex_dict["middlewares"] == ["loggerMiddleware"]
assert complex_dict["enhancers"] == ["applyMiddleware", "myMiddleWare"]
print("Complex dictionary test passed successfully!")

if __name__ == "__main__":
test_js_type_converter()
test_js_type_converter_nested_array()
test_js_type_converter_dictionary_with_arrays()
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,3 @@ def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> Non
self.model_style = ModelStyle.OpenAI

self.client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")

def write(self, result, file_to_open):
model_name = self.model_name
if not os.path.exists("./result"):
os.mkdir("./result")
if not os.path.exists("./result/" + model_name.replace("/", "_")):
os.mkdir("./result/" + model_name.replace("/", "_"))
with open(
"./result/" + model_name.replace("/", "_") + "/" + file_to_open, "a+"
) as f:
f.write(json.dumps(result) + "\n")
10 changes: 0 additions & 10 deletions berkeley-function-call-leaderboard/model_handler/nvidia_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,6 @@ def inference(self, prompt, functions, test_category):
output_token = response.usage.completion_tokens
metadata = {"input_tokens": input_token, "output_tokens": output_token, "latency": latency}
return result, metadata

def write(self, result, file_to_open):
if not os.path.exists("./result"):
os.mkdir("./result")
if not os.path.exists("./result/" + self.model_name.replace("/", "_")):
os.mkdir("./result/" + self.model_name.replace("/", "_"))
with open(
"./result/" + self.model_name.replace("/", "_") + "/" + file_to_open.replace(".json", "_result.json"), "a+"
) as f:
f.write(json.dumps(result) + "\n")

def decode_ast(self, result, language="Python"):
result = result.replace("\n", "")
Expand Down