Skip to content
This repository was archived by the owner on Jan 15, 2024. It is now read-only.

Commit 177cbac

Browse files
author
Sheng Zha
committed
fix doctest
1 parent 8cd5a12 commit 177cbac

File tree

10 files changed

+81
-65
lines changed

10 files changed

+81
-65
lines changed

ci/jenkins/Jenkinsfile_py3-master_gpu_integration

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ core_logic: {
4040
utils.parallel_stage('Scripts', [
4141
build_steps.test_unittest('gluon-nlp-gpu-py3-master', 'gpu/py3-master',
4242
'scripts/tests', 'src/gluonnlp',
43-
'gpu and (not serial) and integration',
43+
'gpu and (not (serial or skip_master)) and integration',
4444
4, true, true),
4545
build_steps.test_unittest('gluon-nlp-gpu-py3-master', 'gpu/py3-master',
4646
'scripts/tests', 'src/gluonnlp',
47-
'gpu and serial and integration',
47+
'gpu and serial and integration and (not skip_master)',
4848
0, true, true)
4949
])
5050
}

ci/jenkins/build_steps.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def create_website(workspace_name, conda_env_name) {
9090
path = env.BRANCH_NAME
9191
}
9292
return ["${conda_env_name}: website'": {
93-
node(NODE_LINUX_CPU) {
93+
node(NODE_LINUX_GPU) {
9494
ws("workspace/${workspace_name}") {
9595
utils.init_git()
9696
sh """

env/gpu/py3-master.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,21 @@ dependencies:
1414
- pytest-cov=2.6.0
1515
- mock<3
1616
- pytest-xdist<2
17+
- recommonmark
18+
- pandoc=1.19.2
19+
- notedown
20+
- numba>=v0.40.0
21+
- nbsphinx>=0.3.4,<0.4
22+
- nbconvert=5.4.0
23+
- tornado=5.1.1
24+
- ipython
25+
- ipykernel
1726
- pip:
1827
- pylint-quotes<0.2
1928
- mxnet-cu92mkl>=1.5.0b20190407
2029
- sacremoses
2130
- sentencepiece<0.2
31+
- https://github.com/mli/mx-theme/archive/0.3.1.tar.gz
32+
- seaborn
33+
- jieba
34+
- sphinx-autorun

src/gluonnlp/data/glue.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class GlueCoLA(_GlueDataset):
9393
>>> len(cola_dev[0])
9494
2
9595
>>> cola_dev[0]
96-
[u'The sailors rode the breeze clear of the rocks.', u'1']
96+
['The sailors rode the breeze clear of the rocks.', '1']
9797
>>> cola_test = gluonnlp.data.GlueCoLA('test', root='./datasets/cola')
9898
-etc-
9999
>>> len(cola_test)
@@ -156,15 +156,15 @@ class GlueSST2(_GlueDataset):
156156
>>> len(sst_dev[0])
157157
2
158158
>>> sst_dev[0]
159-
[u"it 's a charming and often affecting journey . ", u'1']
159+
["it 's a charming and often affecting journey . ", '1']
160160
>>> sst_test = gluonnlp.data.GlueSST2('test', root='./datasets/sst')
161161
-etc-
162162
>>> len(sst_test)
163163
1821
164164
>>> len(sst_test[0])
165165
1
166166
>>> sst_test[0]
167-
[u'uneasy mishmash of styles and genres .']
167+
['uneasy mishmash of styles and genres .']
168168
"""
169169
def __init__(self, segment='train',
170170
root=os.path.join(get_home_dir(), 'datasets', 'glue_sst'),
@@ -222,15 +222,15 @@ class GlueSTSB(_GlueDataset):
222222
>>> len(stsb_dev[0])
223223
3
224224
>>> stsb_dev[0]
225-
[u'A man with a hard hat is dancing.', u'A man wearing a hard hat is dancing.', u'5.000']
225+
['A man with a hard hat is dancing.', 'A man wearing a hard hat is dancing.', '5.000']
226226
>>> stsb_test = gluonnlp.data.GlueSTSB('test', root='./datasets/stsb')
227227
-etc-
228228
>>> len(stsb_test)
229229
1379
230230
>>> len(stsb_test[0])
231231
2
232232
>>> stsb_test[0]
233-
[u'A girl is styling her hair.', u'A girl is brushing her hair.']
233+
['A girl is styling her hair.', 'A girl is brushing her hair.']
234234
"""
235235
def __init__(self, segment='train',
236236
root=os.path.join(get_home_dir(), 'datasets', 'glue_stsb'),
@@ -285,15 +285,15 @@ class GlueQQP(_GlueDataset):
285285
>>> len(qqp_dev[0])
286286
3
287287
>>> qqp_dev[0]
288-
[u'Why are African-Americans so beautiful?', u'Why are hispanics so beautiful?', u'0']
288+
['Why are African-Americans so beautiful?', 'Why are hispanics so beautiful?', '0']
289289
>>> qqp_test = gluonnlp.data.GlueQQP('test', root='./datasets/qqp')
290290
-etc-
291291
>>> len(qqp_test)
292292
390965
293293
>>> len(qqp_test[3])
294294
2
295295
>>> qqp_test[3]
296-
[u'Is it safe to invest in social trade biz?', u'Is social trade geniune?']
296+
['Is it safe to invest in social trade biz?', 'Is social trade geniune?']
297297
"""
298298
def __init__(self, segment='train',
299299
root=os.path.join(get_home_dir(), 'datasets', 'glue_qqp'),
@@ -343,20 +343,20 @@ class GlueRTE(_GlueDataset):
343343
--------
344344
>>> rte_dev = gluonnlp.data.GlueRTE('dev', root='./datasets/rte')
345345
-etc-
346-
>>> len(rte)
346+
>>> len(rte_dev)
347347
277
348-
>>> len(rte[0])
348+
>>> len(rte_dev[0])
349349
3
350-
>>> rte[0]
351-
[u'Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.', u'Christopher Reeve had an accident.', u'not_entailment']
350+
>>> rte_dev[0]
351+
['Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.', 'Christopher Reeve had an accident.', 'not_entailment']
352352
>>> rte_test = gluonnlp.data.GlueRTE('test', root='./datasets/rte')
353353
-etc-
354354
>>> len(rte_test)
355355
3000
356356
>>> len(rte_test[16])
357357
2
358358
>>> rte_test[16]
359-
[u'United failed to progress beyond the group stages of the Champions League and trail in the Premiership title race, sparking rumours over its future.', u'United won the Champions League.']
359+
['United failed to progress beyond the group stages of the Champions League and trail in the Premiership title race, sparking rumours over its future.', 'United won the Champions League.']
360360
"""
361361
def __init__(self, segment='train',
362362
root=os.path.join(get_home_dir(), 'datasets', 'glue_rte'),
@@ -394,7 +394,7 @@ class GlueMNLI(_GlueDataset):
394394
395395
Parameters
396396
----------
397-
segment : {'train', 'dev_matched', 'dev_mismatched', 'test_matched', 'dev_mismatched'},
397+
segment : {'train', 'dev_matched', 'dev_mismatched', 'test_matched', 'test_mismatched'},
398398
default 'train'
399399
Dataset segment.
400400
root : str, default '$MXNET_HOME/datasets/glue_mnli'
@@ -405,22 +405,22 @@ class GlueMNLI(_GlueDataset):
405405
406406
Examples
407407
--------
408-
>>> mnli_dev = gluonnlp.data.GlueMNLI('dev', root='./datasets/mnli')
408+
>>> mnli_dev = gluonnlp.data.GlueMNLI('dev_matched', root='./datasets/mnli')
409409
-etc-
410410
>>> len(mnli_dev)
411411
9815
412412
>>> len(mnli_dev[0])
413413
3
414414
>>> mnli_dev[0]
415-
[u'The new rights are nice enough', u'Everyone really likes the newest benefits ', u'neutral']
416-
>>> mnli_test = gluonnlp.data.GlueCoLA('test', root='./datasets/mnli')
415+
['The new rights are nice enough', 'Everyone really likes the newest benefits ', 'neutral']
416+
>>> mnli_test = gluonnlp.data.GlueMNLI('test_matched', root='./datasets/mnli')
417417
-etc-
418418
>>> len(mnli_test)
419419
9796
420420
>>> len(mnli_test[0])
421421
2
422422
>>> mnli_test[0]
423-
[u'Hierbas, ans seco, ans dulce, and frigola are just a few names worth keeping a look-out for.', u'Hierbas is a name worth looking out for.']
423+
['Hierbas, ans seco, ans dulce, and frigola are just a few names worth keeping a look-out for.', 'Hierbas is a name worth looking out for.']
424424
"""
425425
def __init__(self, segment='train',
426426
root=os.path.join(get_home_dir(), 'datasets', 'glue_mnli'),
@@ -487,15 +487,15 @@ class GlueQNLI(_GlueDataset):
487487
>>> len(qnli_dev[0])
488488
3
489489
>>> qnli_dev[0]
490-
[u'Which NFL team represented the AFC at Super Bowl 50?', u'The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title.', u'entailment']
490+
['Which NFL team represented the AFC at Super Bowl 50?', 'The American Football Conference (AFC) champion Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers 24\u201310 to earn their third Super Bowl title.', 'entailment']
491491
>>> qnli_test = gluonnlp.data.GlueQNLI('test', root='./datasets/qnli')
492492
-etc-
493493
>>> len(qnli_test)
494494
5740
495495
>>> len(qnli_test[0])
496496
2
497497
>>> qnli_test[0]
498-
[u'What is the seldom used force unit equal to one thousand newtons?', u'Other arcane units of force include the sth\xe8ne, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.']
498+
['What seldom used term of a unit of force equal to 1000 pound s of force?', 'Other arcane units of force include the sthène, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.']
499499
"""
500500
def __init__(self, segment='train',
501501
root=os.path.join(get_home_dir(), 'datasets', 'glue_qnli'),
@@ -549,15 +549,15 @@ class GlueWNLI(_GlueDataset):
549549
>>> len(wnli_dev[0])
550550
3
551551
>>> wnli_dev[0]
552-
[u'The drain is clogged with hair. It has to be cleaned.', u'The hair has to be cleaned.', u'0']
552+
['The drain is clogged with hair. It has to be cleaned.', 'The hair has to be cleaned.', '0']
553553
>>> wnli_test = gluonnlp.data.GlueWNLI('test', root='./datasets/wnli')
554554
-etc-
555555
>>> len(wnli_test)
556556
146
557557
>>> len(wnli_test[0])
558558
2
559559
>>> wnli_test[0]
560-
[u'Maude and Dora had seen the trains rushing across the prairie, with long, rolling puffs of black smoke streaming back from the engine. Their roars and their wild, clear whistles could be heard from far away. Horses ran away when they came in sight.', u'Horses ran away when Maude and Dora came in sight.']
560+
['Maude and Dora had seen the trains rushing across the prairie, with long, rolling puffs of black smoke streaming back from the engine. Their roars and their wild, clear whistles could be heard from far away. Horses ran away when they came in sight.', 'Horses ran away when Maude and Dora came in sight.']
561561
"""
562562
def __init__(self, segment='train',
563563
root=os.path.join(get_home_dir(), 'datasets', 'glue_wnli'),

src/gluonnlp/data/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def register(class_=None, **kwargs):
6060
... pass
6161
>>> my_dataset = gluonnlp.data.create('MyDataset')
6262
>>> print(type(my_dataset))
63-
<class 'MyDataset'>
63+
<class 'gluonnlp.data.registry.MyDataset'>
6464
6565
"""
6666

src/gluonnlp/data/transforms.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ class NLTKMosesTokenizer(object):
153153
Examples
154154
--------
155155
>>> tokenizer = gluonnlp.data.NLTKMosesTokenizer()
156-
>>> tokenizer("Gluon NLP toolkit provides a suite of text processing tools.")
156+
>>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
157157
['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
158-
>>> tokenizer("Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools "
159-
... "zur Verfügung.")
158+
>>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools '
159+
... 'zur Verfügung.')
160160
['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
161161
'zur', 'Verf\xfcgung', '.']
162162
"""
@@ -215,10 +215,10 @@ class SacreMosesTokenizer(object):
215215
Examples
216216
--------
217217
>>> tokenizer = gluonnlp.data.SacreMosesTokenizer()
218-
>>> tokenizer("Gluon NLP toolkit provides a suite of text processing tools.")
218+
>>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
219219
['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
220-
>>> tokenizer("Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools "
221-
... "zur Verfügung.")
220+
>>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools '
221+
... 'zur Verfügung.')
222222
['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
223223
'zur', 'Verf\xfcgung', '.']
224224
"""
@@ -275,17 +275,17 @@ class SpacyTokenizer(object):
275275
Parameters
276276
----------
277277
lang : str
278-
The language to tokenize. Default is "en", i.e, English.
278+
The language to tokenize. Default is 'en', i.e, English.
279279
You may refer to https://spacy.io/usage/models for supported languages.
280280
281281
Examples
282282
--------
283283
>>> tokenizer = gluonnlp.data.SpacyTokenizer()
284-
>>> tokenizer(u"Gluon NLP toolkit provides a suite of text processing tools.")
284+
>>> tokenizer('Gluon NLP toolkit provides a suite of text processing tools.')
285285
['Gluon', 'NLP', 'toolkit', 'provides', 'a', 'suite', 'of', 'text', 'processing', 'tools', '.']
286286
>>> tokenizer = gluonnlp.data.SpacyTokenizer('de')
287-
>>> tokenizer(u"Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools"
288-
... " zur Verfügung.")
287+
>>> tokenizer('Das Gluon NLP-Toolkit stellt eine Reihe von Textverarbeitungstools'
288+
... ' zur Verfügung.')
289289
['Das', 'Gluon', 'NLP-Toolkit', 'stellt', 'eine', 'Reihe', 'von', 'Textverarbeitungstools', \
290290
'zur', 'Verf\xfcgung', '.']
291291
"""
@@ -463,9 +463,9 @@ class JiebaTokenizer(object):
463463
Examples
464464
--------
465465
>>> tokenizer = gluonnlp.data.JiebaTokenizer()
466-
>>> tokenizer(u"我来到北京清华大学")
466+
>>> tokenizer('我来到北京清华大学')
467467
['我', '来到', '北京', '清华大学']
468-
>>> tokenizer(u"小明硕士毕业于中国科学院计算所,后在日本京都大学深造")
468+
>>> tokenizer('小明硕士毕业于中国科学院计算所,后在日本京都大学深造')
469469
['小明', '硕士', '毕业', '于', '中国科学院', '计算所', ',', '后', '在', '日本京都大学', '深造']
470470
471471
"""
@@ -518,9 +518,9 @@ class NLTKStanfordSegmenter(object):
518518
Examples
519519
--------
520520
>>> tokenizer = gluonnlp.data.NLTKStanfordSegmenter() #doctest:+SKIP
521-
>>> tokenizer(u"我来到北京清华大学")
521+
>>> tokenizer('我来到北京清华大学') #doctest:+SKIP
522522
['我', '来到', '北京', '清华大学']
523-
>>> tokenizer(u"小明硕士毕业于中国科学院计算所,后在日本京都大学深造")
523+
>>> tokenizer('小明硕士毕业于中国科学院计算所,后在日本京都大学深造') #doctest:+SKIP
524524
['小明', '硕士', '毕业', '于', '中国科学院', '计算所', ',', '后', '在', '日本京都大学', '深造']
525525
526526
"""
@@ -729,10 +729,10 @@ class BERTBasicTokenizer(object):
729729
Examples
730730
--------
731731
>>> tokenizer = gluonnlp.data.BERTBasicTokenizer(lower=True)
732-
>>> tokenizer(u" \tHeLLo!how \n Are yoU? ")
732+
>>> tokenizer(' \tHeLLo!how \n Are yoU? ')
733733
['hello', '!', 'how', 'are', 'you', '?']
734734
>>> tokenizer = gluonnlp.data.BERTBasicTokenizer(lower=False)
735-
>>> tokenizer(u" \tHeLLo!how \n Are yoU? ")
735+
>>> tokenizer(' \tHeLLo!how \n Are yoU? ')
736736
['HeLLo', '!', 'how', 'Are', 'yoU', '?']
737737
738738
"""
@@ -923,7 +923,7 @@ class BERTTokenizer(object):
923923
... pretrained=False, root='./model')
924924
-etc-
925925
>>> tokenizer = gluonnlp.data.BERTTokenizer(vocab=vocab)
926-
>>> tokenizer(u"gluonnlp: 使NLP变得简单。")
926+
>>> tokenizer('gluonnlp: 使NLP变得简单。')
927927
['gl', '##uo', '##nn', '##lp', ':', '使', 'nl', '##p', '变', '得', '简', '单', '。']
928928
929929
"""

src/gluonnlp/embedding/evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ def register(class_):
7979
>>> similarity_function = gluonnlp.embedding.evaluation.create('similarity',
8080
... 'MySimilarityFunction')
8181
>>> print(type(similarity_function))
82-
<class 'MySimilarityFunction'>
82+
<class 'gluonnlp.embedding.evaluation.MySimilarityFunction'>
8383
8484
>>> @gluonnlp.embedding.evaluation.register
8585
... class MyAnalogyFunction(gluonnlp.embedding.evaluation.WordEmbeddingAnalogyFunction):
8686
... def __init__(self, k=1, eps=1E-10):
8787
... pass
8888
>>> analogy_function = gluonnlp.embedding.evaluation.create('analogy', 'MyAnalogyFunction')
8989
>>> print(type(analogy_function))
90-
<class 'MyAnalogyFunction'>
90+
<class 'gluonnlp.embedding.evaluation.MyAnalogyFunction'>
9191
9292
"""
9393

src/gluonnlp/embedding/token_embedding.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def register(embedding_cls):
5959
... pass
6060
>>> embed = gluonnlp.embedding.create('MyTextEmbed')
6161
>>> print(type(embed))
62-
<class 'MyTextEmbed'>
62+
<class 'gluonnlp.embedding.token_embedding.MyTextEmbed'>
6363
"""
6464

6565
register_text_embedding = registry.get_register_func(TokenEmbedding, 'token embedding')
@@ -814,6 +814,7 @@ class GloVe(TokenEmbedding):
814814
>>> import warnings; warnings.filterwarnings('ignore');
815815
>>> import gluonnlp as nlp
816816
>>> nlp.embedding.list_sources('GloVe')
817+
-etc-
817818
818819
Parameters
819820
----------
@@ -892,6 +893,7 @@ class FastText(TokenEmbedding):
892893
>>> import warnings; warnings.filterwarnings('ignore');
893894
>>> import gluonnlp as nlp
894895
>>> nlp.embedding.list_sources('FastText')
896+
-etc-
895897
896898
897899
Parameters
@@ -992,6 +994,7 @@ class Word2Vec(TokenEmbedding):
992994
>>> import warnings; warnings.filterwarnings('ignore');
993995
>>> import gluonnlp as nlp
994996
>>> nlp.embedding.list_sources('Word2Vec')
997+
-etc-
995998
996999
Parameters
9971000
----------

src/gluonnlp/metric/masked_accuracy.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,12 @@ class MaskedAccuracy(EvalMetric):
214214
>>> masks = [mx.nd.array([1, 1, 0])]
215215
>>> acc = MaskedAccuracy()
216216
>>> acc.update(preds=predicts, labels=labels, masks=masks)
217-
>>> print acc.get()
218-
('accuracy', 0.5)
217+
>>> acc.get()
218+
('masked-accuracy', 0.5)
219219
>>> acc2 = MaskedAccuracy()
220220
>>> acc2.update(preds=predicts, labels=labels)
221-
>>> print acc2.get()
222-
('accuracy', 0.6666667)
221+
>>> acc2.get()
222+
('masked-accuracy', 0.6666666666666666)
223223
"""
224224
def __init__(self, axis=1, name='masked-accuracy',
225225
output_names=None, label_names=None):

0 commit comments

Comments
 (0)