Skip to content

Commit 724b543

Browse files
Guojiacheng2017kushalarora
authored andcommitted
update visualized.ipynb (open-compass#516)
* Update visualize.ipynb 1. solve the problem that some benchmark score is too high and out of range; 2. solve the problem that some model lack the evaluation of MMBench_TEST_EN; * * visualized.ipynb
1 parent 46ea6cd commit 724b543

File tree

1 file changed

+33
-3
lines changed

1 file changed

+33
-3
lines changed

scripts/visualize.ipynb

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@
5050
" for item in data_list:\n",
5151
" assert new_range[0] <= item[lb] <= new_range[1]\n",
5252
" item[lb] = (item[lb] - new_range[0]) / max_range * 100\n",
53+
" return data_list, range_map\n",
54+
"\n",
55+
"# solve the problem that some benchmark score is too high and out of range\n",
56+
"def log_normalize(raw_data, labels):\n",
57+
" data_list = cp.deepcopy(raw_data)\n",
58+
" minimum, maximum, max_range, range_map = {}, {}, 0, {}\n",
59+
" for lb in labels:\n",
60+
" minimum[lb] = min([np.log(x[lb]) for x in data_list])\n",
61+
" maximum[lb] = max([np.log(x[lb]) for x in data_list])\n",
62+
" max_range = max(max_range, maximum[lb] - minimum[lb])\n",
63+
" max_range *= 1.005\n",
64+
" for lb in labels:\n",
65+
" mid = (minimum[lb] + maximum[lb]) / 2\n",
66+
" new_range = (mid - max_range / 2, mid + max_range / 2) if (mid + max_range / 2) < 100 else (100 - max_range, 100)\n",
67+
" range_map[lb] = new_range\n",
68+
" for item in data_list:\n",
69+
" assert new_range[0] <= np.log(item[lb]) <= new_range[1]\n",
70+
" item[lb] = (np.log(item[lb]) - new_range[0]) / max_range * 100\n",
5371
" return data_list, range_map"
5472
]
5573
},
@@ -64,11 +82,19 @@
6482
"models = list(data)\n",
6583
"print(models)\n",
6684
"\n",
85+
"# model2vis = [\n",
86+
"# 'GPT-4v (detail: low)', 'GeminiProVision', 'Qwen-VL-Plus', \n",
87+
"# 'InternLM-XComposer2-VL', 'LLaVA-v1.5-13B', 'CogVLM-17B-Chat',\n",
88+
"# 'mPLUG-Owl2', 'Qwen-VL-Chat', 'IDEFICS-80B-Instruct'\n",
89+
"# ]\n",
90+
"\n",
6791
"model2vis = [\n",
68-
" 'GPT-4v (detail: low)', 'GeminiProVision', 'Qwen-VL-Plus', \n",
69-
" 'InternLM-XComposer2-VL', 'LLaVA-v1.5-13B', 'CogVLM-17B-Chat',\n",
92+
" # 'GPT-4v (detail: low)', 'GeminiProVision', 'InternLM-XComposer2-VL', \n",
93+
" 'GPT-4v (1106, detail-low)', 'Gemini-1.0-Pro', 'Gemini-1.5-Pro', #'Gemini-1.5-Flash', 'Qwen-VL-Plus', \n",
94+
" 'InternLM-XComposer2', 'LLaVA-v1.5-13B', 'CogVLM-17B-Chat',\n",
7095
" 'mPLUG-Owl2', 'Qwen-VL-Chat', 'IDEFICS-80B-Instruct'\n",
7196
"]\n",
97+
"\n",
7298
"colors = [\n",
7399
" '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', \n",
74100
" '#e377c2', '#7f7f7f', '#bcbd22'\n",
@@ -81,8 +107,12 @@
81107
"metadata": {},
82108
"outputs": [],
83109
"source": [
110+
"from collections import defaultdict\n",
111+
"\n",
84112
"split = 'MMBench_TEST_EN'\n",
85-
"data_sub = {k: v[split] for k, v in data.items()}\n",
113+
"# data_sub = {k: v[split] for k, v in data.items()}\n",
114+
"data_sub = {k: defaultdict(int, v)[split] for k, v in data.items()}\n",
115+
"# solve the problem that some model lack the evaluation of MMBench_TEST_EN\n",
86116
"\n",
87117
"labels = list(data_sub[model2vis[0]])\n",
88118
"labels.remove('Overall')\n",

0 commit comments

Comments
 (0)