Skip to content
This repository was archived by the owner on Jan 15, 2024. It is now read-only.

Commit 8cd5a12

Browse files
author
Sheng Zha
committed
fix lint
1 parent 0e518d2 commit 8cd5a12

File tree

19 files changed

+426
-363
lines changed

19 files changed

+426
-363
lines changed

scripts/bert/bert_qa_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
import time
1919
from functools import partial
2020

21-
from gluonnlp.data.utils import whitespace_splitter
2221
from mxnet.gluon.data import SimpleDataset
22+
from gluonnlp.data.utils import whitespace_splitter
2323

2424

2525
class SquadExample(object):

scripts/bert/staticbert/static_export_squad.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,10 @@ def evaluate(data_source):
214214
tic = time.time()
215215
for batch in data_source:
216216
inputs, token_types, valid_length = batch
217-
out = net(inputs.astype('float32').as_in_context(ctx),
218-
token_types.astype('float32').as_in_context(ctx),
219-
valid_length.astype('float32').as_in_context(ctx))
217+
net(inputs.astype('float32').as_in_context(ctx),
218+
token_types.astype('float32').as_in_context(ctx),
219+
valid_length.astype('float32').as_in_context(ctx))
220+
mx.nd.waitall()
220221
toc = time.time()
221222
log.info('Inference time cost={:.2f} s, Thoughput={:.2f} samples/s'
222223
.format(toc - tic,

scripts/bert/staticbert/static_finetune_squad.py

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,3 @@
1-
"""
2-
SQuAD with Static Bidirectional Encoder Representations from Transformers (BERT)
3-
4-
=========================================================================================
5-
6-
This example shows how to finetune a model with pre-trained BERT parameters with static shape for
7-
SQuAD, with Gluon NLP Toolkit.
8-
9-
@article{devlin2018bert,
10-
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
11-
author={Devlin, Jacob and Chang, Ming- \
12-
Wei and Lee, Kenton and Toutanova, Kristina},
13-
journal={arXiv preprint arXiv:1810.04805},
14-
year={2018}
15-
}
16-
"""
17-
181
# coding=utf-8
192

203
# Licensed to the Apache Software Foundation (ASF) under one
@@ -34,7 +17,22 @@
3417
# specific language governing permissions and limitations
3518
# under the License.
3619
# pylint:disable=redefined-outer-name,logging-format-interpolation
20+
"""
21+
SQuAD with Static Bidirectional Encoder Representations from Transformers (BERT)
3722
23+
=========================================================================================
24+
25+
This example shows how to finetune a model with pre-trained BERT parameters with static shape for
26+
SQuAD, with Gluon NLP Toolkit.
27+
28+
@article{devlin2018bert,
29+
title={BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
30+
author={Devlin, Jacob and Chang, Ming- \
31+
Wei and Lee, Kenton and Toutanova, Kristina},
32+
journal={arXiv preprint arXiv:1810.04805},
33+
year={2018}
34+
}
35+
"""
3836
import argparse
3937
import collections
4038
import json
@@ -43,21 +41,21 @@
4341
import random
4442
import time
4543
import warnings
44+
import sys
4645

4746
import numpy as np
4847
import mxnet as mx
4948
from mxnet import gluon, nd
5049

51-
import sys
52-
sys.path.append("..")
53-
5450
import gluonnlp as nlp
55-
from gluonnlp.data import SQuAD
51+
5652
from static_bert_qa_model import BertForQALoss, StaticBertForQA
5753
from bert_qa_dataset import (SQuADTransform, preprocess_dataset)
5854
from bert_qa_evaluate import get_F1_EM, predictions
5955
from static_bert import get_model
6056

57+
sys.path.append('..')
58+
6159
np.random.seed(6)
6260
random.seed(6)
6361
mx.random.seed(6)
@@ -302,9 +300,9 @@ def train():
302300
"""Training function."""
303301
log.info('Loader Train data...')
304302
if version_2:
305-
train_data = SQuAD('train', version='2.0')
303+
train_data = nlp.data.SQuAD('train', version='2.0')
306304
else:
307-
train_data = SQuAD('train', version='1.1')
305+
train_data = nlp.data.SQuAD('train', version='1.1')
308306
log.info('Number of records in Train data:{}'.format(len(train_data)))
309307

310308
train_data_transform, _ = preprocess_dataset(
@@ -410,11 +408,11 @@ def set_new_lr(step_num, batch_id):
410408

411409
if (batch_id + 1) % log_interval == 0:
412410
toc = time.time()
413-
log.info(
414-
'Epoch: {}, Batch: {}/{}, Loss={:.4f}, lr={:.7f} Time cost={:.1f} Thoughput={:.2f} samples/s' # pylint: disable=line-too-long
415-
.format(epoch_id, batch_id, len(train_dataloader),
416-
step_loss / log_interval,
417-
trainer.learning_rate, toc - tic, log_num / (toc - tic)))
411+
log.info('Epoch: %d, Batch: %d/%d, Loss=%.4f, lr=%.7f '
412+
'Time cost=%.1f Thoughput=%.2f samples/s',
413+
epoch_id, batch_id, len(train_dataloader),
414+
step_loss / log_interval,
415+
trainer.learning_rate, toc - tic, log_num / (toc - tic))
418416
tic = time.time()
419417
step_loss = 0.0
420418
log_num = 0
@@ -431,9 +429,9 @@ def evaluate():
431429
"""
432430
log.info('Loader dev data...')
433431
if version_2:
434-
dev_data = SQuAD('dev', version='2.0')
432+
dev_data = nlp.data.SQuAD('dev', version='2.0')
435433
else:
436-
dev_data = SQuAD('dev', version='1.1')
434+
dev_data = nlp.data.SQuAD('dev', version='1.1')
437435
log.info('Number of records in Train data:{}'.format(len(dev_data)))
438436

439437
dev_dataset = dev_data.transform(

scripts/bert/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
import io
2525
import json
2626

27-
import gluonnlp
2827
import mxnet as mx
28+
import gluonnlp
2929

3030
__all__ = ['convert_vocab']
3131

scripts/language_model/sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def __init__(self, range_max, num_sampled, dtype=None, **kwargs):
6363
def _prob_helper(self, num_tries, prob):
6464
return (num_tries.astype('float64') * (-prob).log1p()).expm1() * -1
6565

66-
def forward(self, true_classes):
66+
def forward(self, true_classes): # pylint: disable=arguments-differ
6767
"""Draw samples from log uniform distribution and returns sampled candidates,
6868
expected count for true classes and sampled classes.
6969

scripts/parsing/common/config.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,29 @@
1616
# KIND, either express or implied. See the License for the
1717
# specific language governing permissions and limitations
1818
# under the License.
19+
"""Training config."""
20+
1921
import os
2022
import pickle
2123

2224
from scripts.parsing.common.savable import Savable
2325

2426

2527
class _Config(Savable):
26-
def __init__(self, train_file, dev_file, test_file, save_dir, pretrained_embeddings_file=None, min_occur_count=2,
28+
def __init__(self, train_file, dev_file, test_file, save_dir,
29+
pretrained_embeddings_file=None, min_occur_count=2,
2730
lstm_layers=3, word_dims=100, tag_dims=100, dropout_emb=0.33, lstm_hiddens=400,
28-
dropout_lstm_input=0.33, dropout_lstm_hidden=0.33, mlp_arc_size=500, mlp_rel_size=100,
29-
dropout_mlp=0.33, learning_rate=2e-3, decay=.75, decay_steps=5000, beta_1=.9, beta_2=.9, epsilon=1e-12,
31+
dropout_lstm_input=0.33,
32+
dropout_lstm_hidden=0.33, mlp_arc_size=500, mlp_rel_size=100,
33+
dropout_mlp=0.33, learning_rate=2e-3, decay=.75, decay_steps=5000,
34+
beta_1=.9, beta_2=.9, epsilon=1e-12,
3035
num_buckets_train=40,
31-
num_buckets_valid=10, num_buckets_test=10, train_iters=50000, train_batch_size=5000, debug=False):
36+
num_buckets_valid=10, num_buckets_test=10,
37+
train_iters=50000, train_batch_size=5000, debug=False):
3238
"""Internal structure for hyper parameters, intended for pickle serialization.
33-
May be replaced by a dict, but this class provides intuitive properties and saving/loading mechanism
39+
40+
May be replaced by a dict, but this class provides intuitive properties
41+
and saving/loading mechanism
3442
3543
Parameters
3644
----------
@@ -63,6 +71,7 @@ def __init__(self, train_file, dev_file, test_file, save_dir, pretrained_embeddi
6371
train_batch_size
6472
debug
6573
"""
74+
super(_Config, self).__init__()
6675
self.pretrained_embeddings_file = pretrained_embeddings_file
6776
self.train_file = train_file
6877
self.dev_file = dev_file

0 commit comments

Comments
 (0)