fix doctest

Sheng Zha · Sheng Zha · commit 177cbac84bde · 2019-05-02T16:31:59.000-07:00
diff --git a/ci/jenkins/Jenkinsfile_py3-master_gpu_integration b/ci/jenkins/Jenkinsfile_py3-master_gpu_integration
@@ -40,11 +40,11 @@ core_logic: {
   utils.parallel_stage('Scripts', [
     build_steps.test_unittest('gluon-nlp-gpu-py3-master', 'gpu/py3-master',
                               'scripts/tests', 'src/gluonnlp',
-                              'gpu and (not serial) and integration',
+                              'gpu and (not (serial or skip_master)) and integration',
                               4, true, true),
     build_steps.test_unittest('gluon-nlp-gpu-py3-master', 'gpu/py3-master',
                               'scripts/tests', 'src/gluonnlp',
-                              'gpu and serial and integration',
+                              'gpu and serial and integration and (not skip_master)',
                               0, true, true)
   ])
 }
diff --git a/ci/jenkins/build_steps.groovy b/ci/jenkins/build_steps.groovy
@@ -90,7 +90,7 @@ def create_website(workspace_name, conda_env_name) {
     path = env.BRANCH_NAME
   }
   return ["${conda_env_name}: website'": {
-    node(NODE_LINUX_CPU) {
+    node(NODE_LINUX_GPU) {
       ws("workspace/${workspace_name}") {
         utils.init_git()
         sh """
diff --git a/env/gpu/py3-master.yml b/env/gpu/py3-master.yml
@@ -14,8 +14,21 @@ dependencies:
   - pytest-cov=2.6.0
   - mock<3
   - pytest-xdist<2
+  - recommonmark
+  - pandoc=1.19.2
+  - notedown
+  - numba>=v0.40.0
+  - nbsphinx>=0.3.4,<0.4
+  - nbconvert=5.4.0
+  - tornado=5.1.1
+  - ipython
+  - ipykernel
   - pip:
     - pylint-quotes<0.2
     - mxnet-cu92mkl>=1.5.0b20190407
     - sacremoses
     - sentencepiece<0.2
+    - https://github.com/mli/mx-theme/archive/0.3.1.tar.gz
+    - seaborn
+    - jieba
+    - sphinx-autorun
diff --git a/src/gluonnlp/data/glue.py b/src/gluonnlp/data/glue.py
@@ -93,7 +93,7 @@ class GlueCoLA(_GlueDataset):
     >>> len(cola_dev[0])
     2
     >>> cola_dev[0]
-    [u'The sailors rode the breeze clear of the rocks.', u'1']
+    ['The sailors rode the breeze clear of the rocks.', '1']
     >>> cola_test = gluonnlp.data.GlueCoLA('test', root='./datasets/cola')
     -etc-
     >>> len(cola_test)
@@ -156,15 +156,15 @@ class GlueSST2(_GlueDataset):
     >>> len(sst_dev[0])
     2
     >>> sst_dev[0]
-    [u"it 's a charming and often affecting journey . ", u'1']
+    ["it 's a charming and often affecting journey . ", '1']
     >>> sst_test = gluonnlp.data.GlueSST2('test', root='./datasets/sst')
     -etc-
     >>> len(sst_test)
     1821
     >>> len(sst_test[0])
     1
     >>> sst_test[0]
-    [u'uneasy mishmash of styles and genres .']
+    ['uneasy mishmash of styles and genres .']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_sst'),
@@ -222,15 +222,15 @@ class GlueSTSB(_GlueDataset):
     >>> len(stsb_dev[0])
     3
     >>> stsb_dev[0]
-    [u'A man with a hard hat is dancing.', u'A man wearing a hard hat is dancing.', u'5.000']
+    ['A man with a hard hat is dancing.', 'A man wearing a hard hat is dancing.', '5.000']
     >>> stsb_test = gluonnlp.data.GlueSTSB('test', root='./datasets/stsb')
     -etc-
     >>> len(stsb_test)
     1379
     >>> len(stsb_test[0])
     2
     >>> stsb_test[0]
-    [u'A girl is styling her hair.', u'A girl is brushing her hair.']
+    ['A girl is styling her hair.', 'A girl is brushing her hair.']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_stsb'),
@@ -285,15 +285,15 @@ class GlueQQP(_GlueDataset):
     >>> len(qqp_dev[0])
     3
     >>> qqp_dev[0]
-    [u'Why are African-Americans so beautiful?', u'Why are hispanics so beautiful?', u'0']
+    ['Why are African-Americans so beautiful?', 'Why are hispanics so beautiful?', '0']
     >>> qqp_test = gluonnlp.data.GlueQQP('test', root='./datasets/qqp')
     -etc-
     >>> len(qqp_test)
     390965
     >>> len(qqp_test[3])
     2
     >>> qqp_test[3]
-    [u'Is it safe to invest in social trade biz?', u'Is social trade geniune?']
+    ['Is it safe to invest in social trade biz?', 'Is social trade geniune?']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_qqp'),
@@ -343,20 +343,20 @@ class GlueRTE(_GlueDataset):
     --------
     >>> rte_dev = gluonnlp.data.GlueRTE('dev', root='./datasets/rte')
     -etc-
-    >>> len(rte)
+    >>> len(rte_dev)
     277
-    >>> len(rte[0])
+    >>> len(rte_dev[0])
     3
-    >>> rte[0]
-    [u'Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.', u'Christopher Reeve had an accident.', u'not_entailment']
+    >>> rte_dev[0]
+    ['Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.', 'Christopher Reeve had an accident.', 'not_entailment']
     >>> rte_test = gluonnlp.data.GlueRTE('test', root='./datasets/rte')
     -etc-
     >>> len(rte_test)
     3000
     >>> len(rte_test[16])
     2
     >>> rte_test[16]
-    [u'United failed to progress beyond the group stages of the Champions League and trail in the Premiership title race, sparking rumours over its future.', u'United won the Champions League.']
+    ['United failed to progress beyond the group stages of the Champions League and trail in the Premiership title race, sparking rumours over its future.', 'United won the Champions League.']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_rte'),
@@ -394,7 +394,7 @@ class GlueMNLI(_GlueDataset):
 
     Parameters
     ----------
-    segment : {'train', 'dev_matched', 'dev_mismatched', 'test_matched', 'dev_mismatched'},
+    segment : {'train', 'dev_matched', 'dev_mismatched', 'test_matched', 'test_mismatched'},
               default 'train'
         Dataset segment.
     root : str, default '$MXNET_HOME/datasets/glue_mnli'
@@ -405,22 +405,22 @@ class GlueMNLI(_GlueDataset):
 
     Examples
     --------
-    >>> mnli_dev = gluonnlp.data.GlueMNLI('dev', root='./datasets/mnli')
+    >>> mnli_dev = gluonnlp.data.GlueMNLI('dev_matched', root='./datasets/mnli')
     -etc-
     >>> len(mnli_dev)
     9815
     >>> len(mnli_dev[0])
     3
     >>> mnli_dev[0]
-    [u'The new rights are nice enough', u'Everyone really likes the newest benefits ', u'neutral']
-    >>> mnli_test = gluonnlp.data.GlueCoLA('test', root='./datasets/mnli')
+    ['The new rights are nice enough', 'Everyone really likes the newest benefits ', 'neutral']
+    >>> mnli_test = gluonnlp.data.GlueMNLI('test_matched', root='./datasets/mnli')
     -etc-
     >>> len(mnli_test)
     9796
     >>> len(mnli_test[0])
     2
     >>> mnli_test[0]
-    [u'Hierbas, ans seco, ans dulce, and frigola are just a few names worth keeping a look-out for.', u'Hierbas is a name worth looking out for.']
+    ['Hierbas, ans seco, ans dulce, and frigola are just a few names worth keeping a look-out for.', 'Hierbas is a name worth looking out for.']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_mnli'),
@@ -487,15 +487,15 @@ class GlueQNLI(_GlueDataset):
     >>> len(qnli_dev[0])
     3
     >>> qnli_dev[0]
-    [u'Which NFL team represented the AFC at Super Bowl 50?', u'The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title.', u'entailment']
+    ['Which NFL team represented the AFC at Super Bowl 50?', 'The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title.', 'entailment']
     >>> qnli_test = gluonnlp.data.GlueQNLI('test', root='./datasets/qnli')
     -etc-
     >>> len(qnli_test)
     5740
     >>> len(qnli_test[0])
     2
     >>> qnli_test[0]
-    [u'What is the seldom used force unit equal to one thousand newtons?', u'Other arcane units of force include the sth\xe8ne, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.']
+    ['What seldom used term of a unit of force equal to 1000 pound s of force?', 'Other arcane units of force include the sthène, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_qnli'),
@@ -549,15 +549,15 @@ class GlueWNLI(_GlueDataset):
     >>> len(wnli_dev[0])
     3
     >>> wnli_dev[0]
-    [u'The drain is clogged with hair. It has to be cleaned.', u'The hair has to be cleaned.', u'0']
+    ['The drain is clogged with hair. It has to be cleaned.', 'The hair has to be cleaned.', '0']
     >>> wnli_test = gluonnlp.data.GlueWNLI('test', root='./datasets/wnli')
     -etc-
     >>> len(wnli_test)
     146
     >>> len(wnli_test[0])
     2
     >>> wnli_test[0]
-    [u'Maude and Dora had seen the trains rushing across the prairie, with long, rolling puffs of black smoke streaming back from the engine. Their roars and their wild, clear whistles could be heard from far away. Horses ran away when they came in sight.', u'Horses ran away when Maude and Dora came in sight.']
+    ['Maude and Dora had seen the trains rushing across the prairie, with long, rolling puffs of black smoke streaming back from the engine. Their roars and their wild, clear whistles could be heard from far away. Horses ran away when they came in sight.', 'Horses ran away when Maude and Dora came in sight.']
     """
     def __init__(self, segment='train',
                  root=os.path.join(get_home_dir(), 'datasets', 'glue_wnli'),
diff --git a/src/gluonnlp/data/registry.py b/src/gluonnlp/data/registry.py
@@ -60,7 +60,7 @@ def register(class_=None, **kwargs):
     ...         pass
     >>> my_dataset = gluonnlp.data.create('MyDataset')
     >>> print(type(my_dataset))
-    <class 'MyDataset'>
+    <class 'gluonnlp.data.registry.MyDataset'>
 
     """
 
diff --git a/src/gluonnlp/data/transforms.py b/src/gluonnlp/data/transforms.py
@@ -153,10 +153,10 @@ class NLTKMosesTokenizer(object):
     Examples
     --------
     >>> tokenizer = gluonnlp.data.NLTKMosesTokenizer()
-    >>> tokenizer("Gluon NLP toolkit provides a suite of text processing tools.")
+    >>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
     ['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
-    >>> tokenizer("Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools "
-    ...           "zur Verfügung.")
+    >>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools '
+    ...           'zur Verfügung.')
     ['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
 'zur', 'Verf\xfcgung', '.']
     """
@@ -215,10 +215,10 @@ class SacreMosesTokenizer(object):
     Examples
     --------
     >>> tokenizer = gluonnlp.data.SacreMosesTokenizer()
-    >>> tokenizer("Gluon NLP toolkit provides a suite of text processing tools.")
+    >>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
     ['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
-    >>> tokenizer("Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools "
-    ...           "zur Verfügung.")
+    >>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools '
+    ...           'zur Verfügung.')
     ['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
 'zur', 'Verf\xfcgung', '.']
     """
@@ -275,17 +275,17 @@ class SpacyTokenizer(object):
     Parameters
     ----------
     lang : str
-        The language to tokenize. Default is "en", i.e, English.
+        The language to tokenize. Default is 'en', i.e, English.
         You may refer to https://spacy.io/usage/models for supported languages.
 
     Examples
     --------
     >>> tokenizer = gluonnlp.data.SpacyTokenizer()
-    >>> tokenizer(u"Gluon NLP toolkit provides a suite of text processing tools.")
+    >>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
     ['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
     >>> tokenizer = gluonnlp.data.SpacyTokenizer('de')
-    >>> tokenizer(u"Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools"
-    ...            " zur Verfügung.")
+    >>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools'
+    ...           ' zur Verfügung.')
     ['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
 'zur', 'Verf\xfcgung', '.']
     """
@@ -463,9 +463,9 @@ class JiebaTokenizer(object):
     Examples
     --------
     >>> tokenizer = gluonnlp.data.JiebaTokenizer()
-    >>> tokenizer(u"我来到北京清华大学")
+    >>> tokenizer('我来到北京清华大学')
     ['我', '来到', '北京', '清华大学']
-    >>> tokenizer(u"小明硕士毕业于中国科学院计算所，后在日本京都大学深造")
+    >>> tokenizer('小明硕士毕业于中国科学院计算所，后在日本京都大学深造')
     ['小明', '硕士', '毕业', '于', '中国科学院', '计算所', '，', '后', '在', '日本京都大学', '深造']
 
     """
@@ -518,9 +518,9 @@ class NLTKStanfordSegmenter(object):
     Examples
     --------
     >>> tokenizer = gluonnlp.data.NLTKStanfordSegmenter() #doctest:+SKIP
-    >>> tokenizer(u"我来到北京清华大学")
+    >>> tokenizer('我来到北京清华大学') #doctest:+SKIP
     ['我', '来到', '北京', '清华大学']
-    >>> tokenizer(u"小明硕士毕业于中国科学院计算所，后在日本京都大学深造")
+    >>> tokenizer('小明硕士毕业于中国科学院计算所，后在日本京都大学深造') #doctest:+SKIP
     ['小明', '硕士', '毕业', '于', '中国科学院', '计算所', '，', '后', '在', '日本京都大学', '深造']
 
     """
@@ -729,10 +729,10 @@ class BERTBasicTokenizer(object):
     Examples
     --------
     >>> tokenizer = gluonnlp.data.BERTBasicTokenizer(lower=True)
-    >>> tokenizer(u" \tHeLLo!how  \n Are yoU?  ")
+    >>> tokenizer(' \tHeLLo!how  \n Are yoU?  ')
     ['hello', '!', 'how', 'are', 'you', '?']
     >>> tokenizer = gluonnlp.data.BERTBasicTokenizer(lower=False)
-    >>> tokenizer(u" \tHeLLo!how  \n Are yoU?  ")
+    >>> tokenizer(' \tHeLLo!how  \n Are yoU?  ')
     ['HeLLo', '!', 'how', 'Are', 'yoU', '?']
 
     """
@@ -923,7 +923,7 @@ class BERTTokenizer(object):
     ...                                          pretrained=False, root='./model')
     -etc-
     >>> tokenizer = gluonnlp.data.BERTTokenizer(vocab=vocab)
-    >>> tokenizer(u"gluonnlp: 使NLP变得简单。")
+    >>> tokenizer('gluonnlp: 使NLP变得简单。')
     ['gl', '##uo', '##nn', '##lp', ':', '使', 'nl', '##p', '变', '得', '简', '单', '。']
 
     """
diff --git a/src/gluonnlp/embedding/evaluation.py b/src/gluonnlp/embedding/evaluation.py
@@ -79,15 +79,15 @@ def register(class_):
     >>> similarity_function = gluonnlp.embedding.evaluation.create('similarity',
     ...                                                            'MySimilarityFunction')
     >>> print(type(similarity_function))
-    <class 'MySimilarityFunction'>
+    <class 'gluonnlp.embedding.evaluation.MySimilarityFunction'>
 
     >>> @gluonnlp.embedding.evaluation.register
     ... class MyAnalogyFunction(gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction):
     ...     def __init__(self, k=1, eps=1E-10):
     ...         pass
     >>> analogy_function = gluonnlp.embedding.evaluation.create('analogy', 'MyAnalogyFunction')
     >>> print(type(analogy_function))
-    <class 'MyAnalogyFunction'>
+    <class 'gluonnlp.embedding.evaluation.MyAnalogyFunction'>
 
     """
 
diff --git a/src/gluonnlp/embedding/token_embedding.py b/src/gluonnlp/embedding/token_embedding.py
@@ -59,7 +59,7 @@ def register(embedding_cls):
     ...         pass
     >>> embed = gluonnlp.embedding.create('MyTextEmbed')
     >>> print(type(embed))
-    <class 'MyTextEmbed'>
+    <class 'gluonnlp.embedding.token_embedding.MyTextEmbed'>
     """
 
     register_text_embedding = registry.get_register_func(TokenEmbedding, 'token embedding')
@@ -814,6 +814,7 @@ class GloVe(TokenEmbedding):
         >>> import warnings; warnings.filterwarnings('ignore');
         >>> import gluonnlp as nlp
         >>> nlp.embedding.list_sources('GloVe')
+        -etc-
 
     Parameters
     ----------
@@ -892,6 +893,7 @@ class FastText(TokenEmbedding):
         >>> import warnings; warnings.filterwarnings('ignore');
         >>> import gluonnlp as nlp
         >>> nlp.embedding.list_sources('FastText')
+        -etc-
 
 
     Parameters
@@ -992,6 +994,7 @@ class Word2Vec(TokenEmbedding):
         >>> import warnings; warnings.filterwarnings('ignore');
         >>> import gluonnlp as nlp
         >>> nlp.embedding.list_sources('Word2Vec')
+        -etc-
 
     Parameters
     ----------
diff --git a/src/gluonnlp/metric/masked_accuracy.py b/src/gluonnlp/metric/masked_accuracy.py
@@ -214,12 +214,12 @@ class MaskedAccuracy(EvalMetric):
     >>> masks    = [mx.nd.array([1, 1, 0])]
     >>> acc = MaskedAccuracy()
     >>> acc.update(preds=predicts, labels=labels, masks=masks)
-    >>> print acc.get()
-    ('accuracy', 0.5)
+    >>> acc.get()
+    ('masked-accuracy', 0.5)
     >>> acc2 = MaskedAccuracy()
     >>> acc2.update(preds=predicts, labels=labels)
-    >>> print acc2.get()
-    ('accuracy', 0.6666667)
+    >>> acc2.get()
+    ('masked-accuracy', 0.6666666666666666)
     """
     def __init__(self, axis=1, name='masked-accuracy',
                  output_names=None, label_names=None):
diff --git a/src/gluonnlp/model/utils.py b/src/gluonnlp/model/utils.py

Original file line number	Diff line number	Diff line change
`@@ -90,7 +90,7 @@ def create_website(workspace_name, conda_env_name) {`
`90`	`90`	`path = env.BRANCH_NAME`
`91`	`91`	`}`
`92`	`92`	`return ["${conda_env_name}: website'": {`
`93`		`- node(NODE_LINUX_CPU) {`
	`93`	`+ node(NODE_LINUX_GPU) {`
`94`	`94`	`ws("workspace/${workspace_name}") {`
`95`	`95`	`utils.init_git()`
`96`	`96`	`sh """`