22
22
from bfcl .constants .executable_backend_config import MULTI_TURN_FUNC_DOC_FILE_MAPPING
23
23
24
24
25
+ #### Helper functions to extract/parse/complete test category from different formats ####
26
+
27
+
25
28
def extract_test_category (input_string : Union [str , Path ]) -> str :
26
29
"""
27
30
Extract the test category from a given file name.
@@ -51,6 +54,18 @@ def extract_test_category_from_id(test_entry_id: str, remove_prereq: bool = Fals
51
54
return test_entry_id .rsplit ("_" , 1 )[0 ]
52
55
53
56
57
+ def extract_memory_backend_type (test_category ):
58
+ """
59
+ This function extracts the memory backend type from the test category.
60
+ The test category should be in the form of `memory_kv` or `memory_knowledge_graph`, etc.
61
+ """
62
+ if not is_memory (test_category ):
63
+ raise ValueError (f"Test category { test_category } is not a memory category." )
64
+
65
+ # Split the test category by underscores and extract the backend type
66
+ return test_category [len ("memory_" ) :]
67
+
68
+
54
69
def find_file_by_category (
55
70
test_category : str ,
56
71
folder_path : Path ,
@@ -101,6 +116,25 @@ def get_file_name_by_category(
101
116
return file_name
102
117
103
118
119
+ def parse_test_category_argument (test_category_args : list [str ]) -> list [str ]:
120
+ test_name_total = set ()
121
+
122
+ for test_category in test_category_args :
123
+ if test_category in TEST_COLLECTION_MAPPING :
124
+ for test_name in TEST_COLLECTION_MAPPING [test_category ]:
125
+ test_name_total .add (test_name )
126
+ elif test_category in ALL_CATEGORIES :
127
+ test_name_total .add (test_category )
128
+ else :
129
+ # Invalid test category name
130
+ raise Exception (f"Invalid test category name provided: { test_category } " )
131
+
132
+ return sorted (list (test_name_total ))
133
+
134
+
135
+ #### Predicate functions to check the test category ####
136
+
137
+
104
138
def is_web_search (test_category ):
105
139
return "web_search" in test_category
106
140
@@ -164,16 +198,8 @@ def is_sql(test_category):
164
198
def contain_multi_turn_interaction (test_category ):
165
199
return is_multi_turn (test_category ) or is_agentic (test_category )
166
200
167
- def extract_memory_backend_type (test_category ):
168
- """
169
- This function extracts the memory backend type from the test category.
170
- The test category should be in the form of `memory_kv` or `memory_knowledge_graph`, etc.
171
- """
172
- if not is_memory (test_category ):
173
- raise ValueError (f"Test category { test_category } is not a memory category." )
174
201
175
- # Split the test category by underscores and extract the backend type
176
- return test_category [len ("memory_" ) :]
202
+ #### Helper functions to load/write the dataset files ####
177
203
178
204
179
205
def load_file (file_path , sort_by_id = False ):
@@ -188,6 +214,29 @@ def load_file(file_path, sort_by_id=False):
188
214
return result
189
215
190
216
217
+ def load_dataset_entry (test_category : str ) -> list [dict ]:
218
+ """
219
+ This function retrieves the dataset entry for a given test category.
220
+ The input should not be a test category goup, but a specific test category.
221
+ """
222
+ if not is_memory (test_category ):
223
+ file_name = f"{ VERSION_PREFIX } _{ test_category } .json"
224
+ all_entries = load_file (PROMPT_PATH / file_name )
225
+ else :
226
+ # Memory categories
227
+ all_entries = []
228
+ for scenario in MEMORY_SCENARIO_NAME :
229
+ file_name = f"{ VERSION_PREFIX } _memory_{ scenario } .json"
230
+ entries = load_file (PROMPT_PATH / file_name )
231
+ all_entries += process_memory_test_case (entries , test_category , scenario )
232
+
233
+ all_entries = process_agentic_test_case (all_entries )
234
+ all_entries = populate_test_cases_with_predefined_functions (all_entries )
235
+ all_entries = process_func_doc (all_entries )
236
+
237
+ return all_entries
238
+
239
+
191
240
def write_list_of_dicts_to_file (filename , data , subdir = None ):
192
241
if subdir :
193
242
# Ensure the subdirectory exists
@@ -264,6 +313,9 @@ def sort_key(entry):
264
313
return (priority , test_category , int (index ))
265
314
266
315
316
+ #### Helper functions to check the output format ####
317
+
318
+
267
319
# TODO: Reorganize this function to be more readable
268
320
def is_function_calling_format_output (decoded_output ):
269
321
"""
@@ -313,20 +365,7 @@ def is_empty_output(decoded_output):
313
365
return False
314
366
315
367
316
- def parse_test_category_argument (test_category_args : list [str ]) -> list [str ]:
317
- test_name_total = set ()
318
-
319
- for test_category in test_category_args :
320
- if test_category in TEST_COLLECTION_MAPPING :
321
- for test_name in TEST_COLLECTION_MAPPING [test_category ]:
322
- test_name_total .add (test_name )
323
- elif test_category in ALL_CATEGORIES :
324
- test_name_total .add (test_category )
325
- else :
326
- # Invalid test category name
327
- raise Exception (f"Invalid test category name provided: { test_category } " )
328
-
329
- return sorted (list (test_name_total ))
368
+ #### Helper functions to process the dataset entries ####
330
369
331
370
332
371
def _get_language_specific_hint (test_category ):
@@ -496,29 +535,6 @@ def populate_test_cases_with_predefined_functions(test_cases: list[dict]) -> lis
496
535
return test_cases
497
536
498
537
499
- def load_dataset_entry (test_category : str ) -> list [dict ]:
500
- """
501
- This function retrieves the dataset entry for a given test category.
502
- The input should not be a test category goup, but a specific test category.
503
- """
504
- if not is_memory (test_category ):
505
- file_name = f"{ VERSION_PREFIX } _{ test_category } .json"
506
- all_entries = load_file (PROMPT_PATH / file_name )
507
- else :
508
- # Memory categories
509
- all_entries = []
510
- for scenario in MEMORY_SCENARIO_NAME :
511
- file_name = f"{ VERSION_PREFIX } _memory_{ scenario } .json"
512
- entries = load_file (PROMPT_PATH / file_name )
513
- all_entries += process_memory_test_case (entries , test_category , scenario )
514
-
515
- all_entries = process_agentic_test_case (all_entries )
516
- all_entries = populate_test_cases_with_predefined_functions (all_entries )
517
- all_entries = process_func_doc (all_entries )
518
-
519
- return all_entries
520
-
521
-
522
538
def clean_up_memory_prereq_entries (test_cases : list [dict ]) -> list [dict ]:
523
539
"""
524
540
Remove memory-prerequisite test cases when their corresponding
0 commit comments