ShishirPatil · ShishirPatil · Jul 29, 2024 · Jul 25, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -214,6 +214,15 @@ Some companies have proposed some optimization strategies in their models' handl
 
 ## Changelog
 
+* [July 26, 2024] [#549](https://github.com/ShishirPatil/gorilla/pull/549): Fix js_type_converter.py to properly handle JavaScript array value inside dictionary. 
+* [July 25, 2024] [#532](https://github.com/ShishirPatil/gorilla/pull/532), [#543](https://github.com/ShishirPatil/gorilla/pull/543), [#556](https://github.com/ShishirPatil/gorilla/pull/556), [#542](https://github.com/ShishirPatil/gorilla/pull/542): Add the following new models to the leaderboard:
+    - `Salesforce/xLAM-7b-fc-r`
+    - `Salesforce/xLAM-1b-fc-r`
+    - `yi-large-fc`
+    - `NousResearch/Hermes-2-Pro-Llama-3-8B`
+    - `NousResearch/Hermes-2-Pro-Llama-3-70B`
+    - `NousResearch/Hermes-2-Theta-Llama-3-8B`
+    - `NousResearch/Hermes-2-Theta-Llama-3-70B`
 * [July 22, 2024] [#540](https://github.com/ShishirPatil/gorilla/pull/540): Chore: Improve handling of vLLM's cleanup phase error by combining all selected test categories into one single task to submit to the vLLM server.
 * [July 21, 2024] [#538](https://github.com/ShishirPatil/gorilla/pull/538), [#545](https://github.com/ShishirPatil/gorilla/pull/545): Fix `language_specific_pre_processing` and `convert_to_tool` function to properly handle pre-processing for prompts and function docs in Java and JavaScript test categories. All entries in these categories are affected.
 * [July 20, 2024] [#537](https://github.com/ShishirPatil/gorilla/pull/537): Update generation script for locally-hosted OSS model to use single-node multi-GPU inference method (tensor parallel). Ray is not used anymore.

diff --git a/berkeley-function-call-leaderboard/eval_checker/js_type_converter.py b/berkeley-function-call-leaderboard/eval_checker/js_type_converter.py
@@ -102,23 +102,30 @@ def parse_js_collection(code, type_str, nested_type=None):
             return code
 
     elif type_str == "dict":
-
         if code == "{}":
             return {}  # Return an empty dictionary for an empty object
         dict_pattern = r"\{(.*?)\}"
         # Check if the code is a dictionary
         dict_match = re.match(dict_pattern, code)
         if dict_match:
             try:
-                pairs = dict_match.group(1).split(",")
+                content = dict_match.group(1)
+                pairs = re.findall(r"([^:]+):\s*(.*?)(?:,\s*(?=[^,]+:)|$)", content)
                 dictionary = {}
-                for pair in pairs:
-                    key, value = pair.split(":")
-                    key = parse_js_value(key.strip().strip("'"))
-                    value = parse_js_value(value.strip().strip("'"))
-                    dictionary[key] = value
+                for key, value in pairs:
+                    key = key.strip().strip("'\"")
+                    value = value.strip()
+                    if value.startswith("[") and value.endswith("]"):
+                        # Handle array values
+                        dictionary[key] = parse_js_collection(value, "array")
+                    elif value.startswith("{") and value.endswith("}"):
+                        # Handle nested dictionary values
+                        dictionary[key] = parse_js_collection(value, "dict")
+                    else:
+                        dictionary[key] = parse_js_value(value.strip("'\""))
                 return dictionary
-            except:
+            except Exception as e:
+                print(f"Error parsing dictionary: {e}")
                 return code
         else:
             return code  # default to string
@@ -147,8 +154,6 @@ def parse_js_value(value_str: str):
 
 
 # Write tests for the `js_type_converter` function
-
-
 def test_js_type_converter():
     assert js_type_converter("true", "Boolean") == True
     assert js_type_converter("false", "Boolean") == False
@@ -288,6 +293,19 @@ def test_js_type_converter_nested_array():
     print("All nested array tests passed successfully!")
 
 
+def test_js_type_converter_dictionary_with_arrays():
+    complex_dict = js_type_converter(
+        '{"initialState": initialStateObject, "reducers": reducersMap, "middlewares": ["loggerMiddleware"], "enhancers": ["applyMiddleware", "myMiddleWare"]}',
+        "dict",
+    )
+    assert isinstance(complex_dict, dict)
+    assert complex_dict["initialState"] == "initialStateObject"
+    assert complex_dict["reducers"] == "reducersMap"
+    assert complex_dict["middlewares"] == ["loggerMiddleware"]
+    assert complex_dict["enhancers"] == ["applyMiddleware", "myMiddleWare"]
+    print("Complex dictionary test passed successfully!")
+
 if __name__ == "__main__":
     test_js_type_converter()
     test_js_type_converter_nested_array()
+    test_js_type_converter_dictionary_with_arrays()
diff --git a/berkeley-function-call-leaderboard/model_handler/functionary_handler.py b/berkeley-function-call-leaderboard/model_handler/functionary_handler.py
@@ -13,14 +13,3 @@ def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> Non
         self.model_style = ModelStyle.OpenAI
 
         self.client = OpenAI(base_url="http://localhost:8000/v1", api_key="functionary")
-
-    def write(self, result, file_to_open):
-        model_name = self.model_name
-        if not os.path.exists("./result"):
-            os.mkdir("./result")
-        if not os.path.exists("./result/" + model_name.replace("/", "_")):
-            os.mkdir("./result/" + model_name.replace("/", "_"))
-        with open(
-            "./result/" + model_name.replace("/", "_") + "/" + file_to_open, "a+"
-        ) as f:
-            f.write(json.dumps(result) + "\n")
diff --git a/berkeley-function-call-leaderboard/model_handler/nvidia_handler.py b/berkeley-function-call-leaderboard/model_handler/nvidia_handler.py
@@ -53,16 +53,6 @@ def inference(self, prompt, functions, test_category):
         output_token = response.usage.completion_tokens
         metadata = {"input_tokens": input_token, "output_tokens": output_token, "latency": latency}
         return result, metadata
-
-    def write(self, result, file_to_open):
-        if not os.path.exists("./result"):
-            os.mkdir("./result")
-        if not os.path.exists("./result/" + self.model_name.replace("/", "_")):
-            os.mkdir("./result/" + self.model_name.replace("/", "_"))
-        with open(
-            "./result/" + self.model_name.replace("/", "_") + "/" + file_to_open.replace(".json", "_result.json"), "a+"
-        ) as f:
-            f.write(json.dumps(result) + "\n")
 
     def decode_ast(self, result, language="Python"):
         result = result.replace("\n", "")