Skip to content

Commit 9fcc494

Browse files
Fix vqa dataset loading (#1195)
1 parent ab1d992 commit 9fcc494

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

swift/llm/utils/dataset.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,16 +1346,14 @@ def preprocess_row(row):
13461346
def preprocess_okvqa(dataset):
13471347

13481348
def preprocess(row):
1349-
image = row['image']
13501349
query = row['question']
13511350
response = np.random.choice(row['answers'])
13521351
return {
13531352
'response': response,
1354-
'images': image,
13551353
'query': query,
13561354
}
13571355

1358-
return dataset.map(preprocess, load_from_cache_file=False)
1356+
return dataset.map(preprocess, load_from_cache_file=False).rename_column('image', 'images')
13591357

13601358

13611359
register_dataset(
@@ -1371,16 +1369,14 @@ def preprocess(row):
13711369
def preprocess_a_okvqa(dataset):
13721370

13731371
def preprocess(row):
1374-
image = row['image']
13751372
query = row['question']
13761373
response = np.random.choice(row['rationales'])
13771374
return {
13781375
'response': response,
1379-
'images': image,
13801376
'query': query,
13811377
}
13821378

1383-
return dataset.map(preprocess, load_from_cache_file=False)
1379+
return dataset.map(preprocess, load_from_cache_file=False).rename_column('image', 'images')
13841380

13851381

13861382
register_dataset(
@@ -1396,17 +1392,15 @@ def preprocess(row):
13961392
def preprocess_ocr_vqa(dataset):
13971393

13981394
def preprocess(row):
1399-
image = row['image']
14001395
idx = np.random.choice(range(len(row['questions'])))
14011396
query = row['questions'][idx]
14021397
response = row['answers'][idx]
14031398
return {
14041399
'response': response,
1405-
'images': image,
14061400
'query': query,
14071401
}
14081402

1409-
return dataset.map(preprocess, load_from_cache_file=False)
1403+
return dataset.map(preprocess, load_from_cache_file=False).rename_column('image', 'images')
14101404

14111405

14121406
register_dataset(

0 commit comments

Comments
 (0)