Skip to content
This repository was archived by the owner on Jan 15, 2024. It is now read-only.

Commit 86f4621

Browse files
committed
Delete Python3.5 incompatible multiprocessing.Pool usage of BERTDatasetTransform
Traceback (most recent call last): File "./scripts/bert/finetune_classifier.py", line 373, in <module> bert_tokenizer, task, batch_size, dev_batch_size, args.max_len, vocabulary, args.pad) File "./scripts/bert/finetune_classifier.py", line 306, in preprocess_data data_train = mx.gluon.data.SimpleDataset(pool.map(trans, train_tsv)) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 266, in map return self._map_async(func, iterable, mapstar, chunksize).get() File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 644, in get raise self._value File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/pool.py", line 424, in _handle_tasks put(task) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/connection.py", line 206, in send self._send_bytes(ForkingPickler.dumps(obj)) File "/var/lib/jenkins/workspace/gluon-nlp-gpu-py3-master@6/conda/gpu/py3-master/lib/python3.5/multiprocessing/reduction.py", line 50, in dumps cls(buf, protocol).dump(obj) _pickle.PicklingError: Can't pickle <class 'module'>: attribute lookup module on builtins failed
1 parent 718903f commit 86f4621

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

scripts/bert/finetune_classifier.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
import random
4040
import logging
4141
import warnings
42-
import multiprocessing
4342
import numpy as np
4443
import mxnet as mx
4544
from mxnet import gluon
@@ -289,8 +288,6 @@
289288

290289
def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab, pad=False):
291290
"""Train/eval Data preparation function."""
292-
pool = multiprocessing.Pool()
293-
294291
# transformation for data train and dev
295292
label_dtype = 'float32' if not task.class_labels else 'int32'
296293
trans = BERTDatasetTransform(tokenizer, max_len,
@@ -303,7 +300,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
303300
# data train
304301
# task.dataset_train returns (segment_name, dataset)
305302
train_tsv = task.dataset_train()[1]
306-
data_train = mx.gluon.data.SimpleDataset(pool.map(trans, train_tsv))
303+
data_train = mx.gluon.data.SimpleDataset(list(map(trans, train_tsv)))
307304
data_train_len = data_train.transform(
308305
lambda input_id, length, segment_id, label_id: length, lazy=False)
309306
# bucket sampler for training
@@ -331,7 +328,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
331328
dev_tsv_list = dev_tsv if isinstance(dev_tsv, list) else [dev_tsv]
332329
loader_dev_list = []
333330
for segment, data in dev_tsv_list:
334-
data_dev = mx.gluon.data.SimpleDataset(pool.map(trans, data))
331+
data_dev = mx.gluon.data.SimpleDataset(list(map(trans, data)))
335332
loader_dev = mx.gluon.data.DataLoader(
336333
data_dev,
337334
batch_size=dev_batch_size,
@@ -356,7 +353,7 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, vocab,
356353
test_tsv_list = test_tsv if isinstance(test_tsv, list) else [test_tsv]
357354
loader_test_list = []
358355
for segment, data in test_tsv_list:
359-
data_test = mx.gluon.data.SimpleDataset(pool.map(test_trans, data))
356+
data_test = mx.gluon.data.SimpleDataset(list(map(test_trans, data)))
360357
loader_test = mx.gluon.data.DataLoader(
361358
data_test,
362359
batch_size=dev_batch_size,

0 commit comments

Comments
 (0)