Skip to content
This repository was archived by the owner on Jan 15, 2024. It is now read-only.

Commit a563293

Browse files
eric-haibin-linsxjscience
authored andcommitted
remove padding warning (#916)
1 parent fbd7527 commit a563293

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

scripts/bert/finetune_classifier.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,12 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, pad=Fa
295295
data_train_len = data_train.transform(
296296
lambda input_id, length, segment_id, label_id: length, lazy=False)
297297
# bucket sampler for training
298+
pad_val = vocabulary[vocabulary.padding_token]
298299
batchify_fn = nlp.data.batchify.Tuple(
299-
nlp.data.batchify.Pad(axis=0), nlp.data.batchify.Stack(),
300-
nlp.data.batchify.Pad(axis=0), nlp.data.batchify.Stack(label_dtype))
300+
nlp.data.batchify.Pad(axis=0, pad_val=pad_val), # input
301+
nlp.data.batchify.Stack(), # length
302+
nlp.data.batchify.Pad(axis=0, pad_val=0), # segment
303+
nlp.data.batchify.Stack(label_dtype)) # label
301304
batch_sampler = nlp.data.sampler.FixedBucketSampler(
302305
data_train_len,
303306
batch_size=batch_size,
@@ -327,8 +330,8 @@ def preprocess_data(tokenizer, task, batch_size, dev_batch_size, max_len, pad=Fa
327330

328331
# batchify for data test
329332
test_batchify_fn = nlp.data.batchify.Tuple(
330-
nlp.data.batchify.Pad(axis=0), nlp.data.batchify.Stack(),
331-
nlp.data.batchify.Pad(axis=0))
333+
nlp.data.batchify.Pad(axis=0, pad_val=pad_val), nlp.data.batchify.Stack(),
334+
nlp.data.batchify.Pad(axis=0, pad_val=0))
332335
# transform for data test
333336
test_trans = BERTDatasetTransform(tokenizer, max_len,
334337
class_labels=None,

0 commit comments

Comments
 (0)