dmlc · leezu · Jul 19, 2019 · Jul 16, 2019 · Jul 18, 2019 · Jul 18, 2019
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -16,7 +16,7 @@ dependencies:
   - pytest-xdist<2
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-mkl>=1.5.0b20190407
     - sacremoses
     - sentencepiece<0.2
@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -26,7 +26,7 @@ dependencies:
   - ipykernel
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-mkl>=1.4.1
     - sacremoses
     - sentencepiece<0.2

@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy

@@ -4,7 +4,7 @@ dependencies:
   - python=3.6
   - pip=18.1
   - perl
-  - pylint=1.9.2
+  - pylint=2.3.1
   - flake8
   - sphinx=1.7.7
   - spacy
@@ -26,7 +26,7 @@ dependencies:
   - ipykernel
   - regex
   - pip:
-    - pylint-quotes<0.2
+    - pylint-quotes==0.2.1
     - mxnet-cu92mkl>=1.4.1
     - sacremoses
     - sentencepiece<0.2

@@ -33,7 +33,7 @@
 from gluonnlp.data import BERTTokenizer
 
 
-class TrainingInstance(object):
+class TrainingInstance:
     """A single training instance (sentence pair)."""
 
     def __init__(self, tokens, segment_ids, masked_lm_positions,

@@ -30,7 +30,7 @@
     from baidu_ernie_data import BaiduErnieXNLI, BaiduErnieLCQMC, BaiduErnieChnSentiCorp
 
 
-class GlueTask(object):
+class GlueTask:
     """Abstract GLUE task class.
 
     Parameters

@@ -24,31 +24,31 @@
 import multiprocessing
 from gluonnlp.data.stream import _PathDataset
 
-class DatasetFn(object):
+class DatasetFn:
     """Callable object to generate a gluon.data.Dataset given a url.
 
     Subclasses should override the __call__ method.
     """
     def __call__(self, dataset_url):
         raise NotImplementedError
 
-class SamplerFn(object):
+class SamplerFn:
     """Callable object to generate a gluon.data.sampler.Sampler given a dataset.
 
     Subclasses should override the __call__ method.
     """
     def __call__(self, dataset):
         raise NotImplementedError
 
-class DataLoaderFn(object):
+class DataLoaderFn:
     """Callable object to generate a DataLoader object given a dataset and sampler.
 
     Subclasses should override the __call__ method.
     """
     def __call__(self, dataset, sampler):
         raise NotImplementedError
 
-class SimpleDataLoaderFn(object):
+class SimpleDataLoaderFn:
     """A simple callable object that geneartes a data loader by applying
     dataloader_cls(dataset, batch_sampler=sampler, **dataset_params)
     """
@@ -77,7 +77,7 @@ def _worker_fn(url, dataset_fn, sampler_fn):
     sampler = sampler_fn(dataset)
     return (dataset, sampler)
 
-class _MultiWorkerIter(object):
+class _MultiWorkerIter:
     """Internal multi-worker iterator for DataLoader."""
     def __init__(self, worker_pool, worker_fn, dataset, file_sampler,
                  dataset_fn, sampler_fn, dataloader_fn, prefetch):
@@ -165,7 +165,7 @@ def __iter__(self):
         return self
 
 
-class DatasetLoader(object):
+class DatasetLoader:
     """Loads data from a list of datasets and returns mini-batches of data.
 
     One dataset is loaded at a time.

@@ -188,7 +188,7 @@ def load_segment(file_path, bert_tokenizer):
     return subword_sentences
 
 
-class BERTTaggingDataset(object):
+class BERTTaggingDataset:
     """
 
     Parameters

@@ -23,7 +23,7 @@
 
 __all__ = ['SQuADTransform', 'preprocess_dataset']
 
-class SquadExample(object):
+class SquadExample:
     """A single training/test example for SQuAD question.
 
        For examples without an answer, the start and end position are -1.
@@ -86,7 +86,7 @@ def preprocess_dataset(dataset, transform, num_workers=8):
     return dataset, dataset_len
 
 
-class SQuADFeature(object):
+class SQuADFeature:
     """Single feature of a single example transform of the SQuAD question.
 
     """
@@ -120,7 +120,7 @@ def __init__(self,
         self.is_impossible = is_impossible
 
 
-class SQuADTransform(object):
+class SQuADTransform:
     """Dataset Transformation for BERT-style QA.
 
     The transformation is processed in the following steps:

@@ -21,7 +21,7 @@
 import numpy as np
 from gluonnlp.data import BERTSentenceTransform
 
-class BERTDatasetTransform(object):
+class BERTDatasetTransform:
     """Dataset transformation for BERT-style sentence classification or regression.
 
     Parameters

@@ -55,7 +55,7 @@ def to_unicode(s):
 logger = logging.getLogger(__name__)
 
 
-class BertEmbedding(object):
+class BertEmbedding:
     """
     Encoding from BERT model.
 

@@ -107,7 +107,7 @@ def group_by_ctx(arr_list):
     return total_norm, chosen_scale, is_finite
 
 
-class FP16Trainer(object):
+class FP16Trainer:
     """ Trainer for mixed precision training.
 
     Parameters
@@ -182,7 +182,7 @@ def step(self, batch_size, max_norm=None):
         # update scale based on overflow information
         self._scaler.update_scale(overflow)
 
-class LossScaler(object):
+class LossScaler:
     """Abstract loss scaler"""
     def has_overflow(self, params):
         """ detect inf and nan """
@@ -208,7 +208,6 @@ def __init__(self, init_scale=1):
 
     def update_scale(self, overflow):
         """update loss scale"""
-        pass
 
 class DynamicLossScaler(LossScaler):
     """Class that manages dynamic loss scaling.

@@ -24,6 +24,7 @@
 import argparse
 import random
 import multiprocessing
+import functools
 
 import numpy as np
 
@@ -258,7 +259,7 @@ def __call__(self, dataset, sampler):
                                     num_workers=self._num_ctxes)
         return dataloader
 
-class BERTLoaderTransform(object):
+class BERTLoaderTransform:
     """Create dataloader for a BERT dataset. """
 
     def __init__(self, use_avg_len, batch_size, shuffle, num_ctxes, num_buckets):
@@ -281,7 +282,8 @@ def get_pretrain_data_npz(data, batch_size, num_ctxes, shuffle, use_avg_len,
         'Number of training files must be greater than the number of partitions. ' \
         'Only found %d files at %s'%(num_files, data)
     split_sampler = nlp.data.SplitSampler(num_files, num_parts=num_parts, part_index=part_idx)
-    stream = nlp.data.SimpleDatasetStream(nlp.data.NumpyDataset, data, split_sampler)
+    NumpyDataset = functools.partial(nlp.data.NumpyDataset, allow_pickle=True)
+    stream = nlp.data.SimpleDatasetStream(NumpyDataset, data, split_sampler)
     stream = nlp.data.PrefetchingStream(stream, worker_type='process')
 
     # create data loader based on the dataset

@@ -110,7 +110,7 @@ def _tokenize_mteval_13a(segment):
     return norm
 
 
-class UnicodeRegex(object):
+class UnicodeRegex:
     """Ad-hoc hack to recognize all punctuation and symbols.
     """
     def __init__(self):

@@ -65,7 +65,7 @@ def _load_cached_dataset(prefix):
         return None
 
 
-class TrainValDataTransform(object):
+class TrainValDataTransform:
     """Transform the machine translation dataset.
 
     Clip source and the target sentences to the maximum length. For the source sentence, append the
@@ -176,7 +176,7 @@ def load_translation_data(dataset, bleu, args):
         fetch_tgt_sentence = lambda src, tgt: tgt.split()
         val_tgt_sentences = list(data_val.transform(fetch_tgt_sentence))
         test_tgt_sentences = list(data_test.transform(fetch_tgt_sentence))
-    elif bleu == '13a' or bleu == 'intl':
+    elif bleu in ('13a', 'intl'):
         fetch_tgt_sentence = lambda src, tgt: tgt
         if dataset == 'WMT2016BPE':
             val_text = nlp.data.WMT2016('newstest2013', src_lang=src_lang, tgt_lang=tgt_lang)
@@ -185,7 +185,7 @@ def load_translation_data(dataset, bleu, args):
             val_text = nlp.data.WMT2014('newstest2013', src_lang=src_lang, tgt_lang=tgt_lang)
             test_text = nlp.data.WMT2014('newstest2014', src_lang=src_lang, tgt_lang=tgt_lang,
                                          full=args.full)
-        elif dataset == 'IWSLT2015' or dataset == 'TOY':
+        elif dataset in ('IWSLT2015', 'TOY'):
             val_text = data_val
             test_text = data_test
         else:

@@ -25,7 +25,7 @@
 import mxnet as mx
 from gluonnlp.model import BeamSearchScorer, BeamSearchSampler
 
-class BeamSearchTranslator(object):
+class BeamSearchTranslator:
     """Beam Search Translator
 
     Parameters

@@ -27,7 +27,7 @@
 from .savable import Savable
 
 
-class ConllWord(object):
+class ConllWord:
     """CoNLL format template, see http://anthology.aclweb.org/W/W06/W06-2920.pdf
 
     Parameters
@@ -76,7 +76,7 @@ def __str__(self):
         return '\t'.join(['_' if v is None else v for v in values])
 
 
-class ConllSentence(object):
+class ConllSentence:
     """A list of ConllWord
 
     Parameters
@@ -365,7 +365,7 @@ def rel_size(self):
         return len(self._id2rel)
 
 
-class DataLoader(object):
+class DataLoader:
     """
     Load CoNLL data
     Adopted from https://github.com/jcyk/Dynet-Biaffine-dependency-parser with some modifications

@@ -21,7 +21,7 @@
 import numpy as np
 
 
-class KMeans(object):
+class KMeans:
     """
     Cluster sentences by their lengths
 
@@ -101,7 +101,6 @@ def __init__(self, k, len_cntr):
         # print('%d) Final splits: %s; Final mass: %d' % (i, self._splits, self.get_mass()))
 
         self._reindex()
-        return
 
     def _recenter(self):
         """

@@ -21,7 +21,7 @@
 import pickle
 
 
-class Savable(object):
+class Savable:
     """
     A super class for save/load operations.
     """

@@ -78,7 +78,6 @@ def strongconnect(self, v, index, stack):
             w = stack.pop()
             self._onstack[w] = False
             self._SCCs[-1].add(w)
-        return
 
     # ======================
     @property