Skip to content

Commit 1b6e3f1

Browse files
committed
Merge branch 'master' into doc-metrics
2 parents 308f00e + 1214801 commit 1b6e3f1

File tree

161 files changed

+2240
-1580
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+2240
-1580
lines changed

.circleci/config.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ jobs:
3434
parallelism: 1
3535
steps:
3636
- checkout
37-
# we need a version of isort with https://github.com/timothycrosley/isort/pull/1000
38-
- run: sudo pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
3937
- run: sudo pip install .[quality]
40-
- run: black --check --line-length 119 --target-version py36 src tests benchmarks
41-
- run: isort --check-only --recursive src tests benchmarks
42-
- run: flake8 src tests benchmarks
38+
- run: black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics
39+
- run: isort --check-only tests src benchmarks datasets metrics
40+
- run: flake8 tests src benchmarks datasets metrics
4341
build_doc:
4442
working_directory: ~/nlp
4543
docker:

CONTRIBUTING.md

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,6 @@
2828
it with `pip uninstall nlp` before reinstalling it in editable
2929
mode with the `-e` flag.)
3030

31-
Right now, we need an unreleased version of `isort` to avoid a
32-
[bug](https://github.com/timothycrosley/isort/pull/1000):
33-
34-
```bash
35-
$ pip install -U git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
36-
```
37-
3831
5. Develop the features on your branch. If you want to add a dataset see more in-detail intsructions in the section [*How to add a dataset*](#how-to-add-a-dataset). Alternatively, you can follow the steps to [add a dataset](https://huggingface.co/nlp/add_dataset.html) and [share a dataset](https://huggingface.co/nlp/share_dataset.html) in the documentation.
3932

4033
6. Format your code. Run black and isort so that your newly added files look nice with the following command:

Makefile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
# Check that source code meets quality standards
44

55
quality:
6-
black --check --line-length 119 --target-version py36 tests src benchmarks
7-
isort --check-only --recursive tests src benchmarks datasets
8-
flake8 tests src benchmarks
6+
black --check --line-length 119 --target-version py36 tests src benchmarks datasets metrics
7+
isort --check-only tests src benchmarks datasets metrics
8+
flake8 tests src benchmarks datasets metrics
99

1010
# Format source code automatically
1111

1212
style:
13-
black --line-length 119 --target-version py36 tests src benchmarks
14-
isort --recursive tests src datasets benchmarks
13+
black --line-length 119 --target-version py36 tests src benchmarks datasets metrics
14+
isort tests src benchmarks datasets metrics
1515

1616
# Run tests for the library
1717

benchmarks/benchmark_array_xd.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
import os
33
import tempfile
44

5+
from utils import generate_examples, get_duration
6+
57
import nlp
68
from nlp.arrow_writer import ArrowWriter
79
from nlp.features import Array2D
8-
from utils import generate_examples, get_duration
910

1011

1112
SHAPE_TEST_1 = (30, 487)

benchmarks/benchmark_indices_mapping.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import os
33
import tempfile
44

5-
import nlp
65
from utils import generate_example_dataset, get_duration
76

7+
import nlp
8+
89

910
SPEED_TEST_N_EXAMPLES = 500_000
1011

benchmarks/benchmark_iterating.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import os
33
import tempfile
44

5-
import nlp
65
from utils import generate_example_dataset, get_duration
76

7+
import nlp
8+
89

910
SPEED_TEST_N_EXAMPLES = 50_000
1011
SMALL_TEST = 5_000

benchmarks/benchmark_map_filter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import tempfile
44

55
import transformers
6+
from utils import generate_example_dataset, get_duration
67

78
import nlp
8-
from utils import generate_example_dataset, get_duration
99

1010

1111
SPEED_TEST_N_EXAMPLES = 500_000

datasets/aeslc/aeslc.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,16 @@ def _split_generators(self, dl_manager):
6969
input_path = os.path.join(dl_path, "AESLC-master", "enron_subject_line")
7070
return [
7171
nlp.SplitGenerator(
72-
name=nlp.Split.TRAIN, gen_kwargs={"pattern": os.path.join(input_path, "train", "*.subject")},
72+
name=nlp.Split.TRAIN,
73+
gen_kwargs={"pattern": os.path.join(input_path, "train", "*.subject")},
7374
),
7475
nlp.SplitGenerator(
75-
name=nlp.Split.VALIDATION, gen_kwargs={"pattern": os.path.join(input_path, "dev", "*.subject")},
76+
name=nlp.Split.VALIDATION,
77+
gen_kwargs={"pattern": os.path.join(input_path, "dev", "*.subject")},
7678
),
7779
nlp.SplitGenerator(
78-
name=nlp.Split.TEST, gen_kwargs={"pattern": os.path.join(input_path, "test", "*.subject")},
80+
name=nlp.Split.TEST,
81+
gen_kwargs={"pattern": os.path.join(input_path, "test", "*.subject")},
7982
),
8083
]
8184

datasets/ag_news/ag_news.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,24 @@
1919
from __future__ import absolute_import, division, print_function
2020

2121
import csv
22-
import os
2322

2423
import nlp
2524

2625

2726
_DESCRIPTION = """\
28-
AG is a collection of more than 1 million news articles. News articles have been
29-
gathered from more than 2000 news sources by ComeToMyHead in more than 1 year of
30-
activity. ComeToMyHead is an academic news search engine which has been running
31-
since July, 2004. The dataset is provided by the academic comunity for research
32-
purposes in data mining (clustering, classification, etc), information retrieval
33-
(ranking, search, etc), xml, data compression, data streaming, and any other
34-
non-commercial activity. For more information, please refer to the link
27+
AG is a collection of more than 1 million news articles. News articles have been
28+
gathered from more than 2000 news sources by ComeToMyHead in more than 1 year of
29+
activity. ComeToMyHead is an academic news search engine which has been running
30+
since July, 2004. The dataset is provided by the academic comunity for research
31+
purposes in data mining (clustering, classification, etc), information retrieval
32+
(ranking, search, etc), xml, data compression, data streaming, and any other
33+
non-commercial activity. For more information, please refer to the link
3534
http://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html .
3635
37-
The AG's news topic classification dataset is constructed by Xiang Zhang
38-
([email protected]) from the dataset above. It is used as a text
39-
classification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann
40-
LeCun. Character-level Convolutional Networks for Text Classification. Advances
36+
The AG's news topic classification dataset is constructed by Xiang Zhang
37+
([email protected]) from the dataset above. It is used as a text
38+
classification benchmark in the following paper: Xiang Zhang, Junbo Zhao, Yann
39+
LeCun. Character-level Convolutional Networks for Text Classification. Advances
4140
in Neural Information Processing Systems 28 (NIPS 2015).
4241
"""
4342

datasets/allocine/allocine.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class AllocineConfig(nlp.BuilderConfig):
3232
def __init__(self, **kwargs):
3333
"""BuilderConfig for Allocine.
3434
35-
Args:
36-
**kwargs: keyword arguments forwarded to super.
37-
"""
35+
Args:
36+
**kwargs: keyword arguments forwarded to super.
37+
"""
3838
super(AllocineConfig, self).__init__(**kwargs)
3939

4040

@@ -58,7 +58,10 @@ def _info(self):
5858
return nlp.DatasetInfo(
5959
description=_DESCRIPTION,
6060
features=nlp.Features(
61-
{"review": nlp.Value("string"), "label": nlp.features.ClassLabel(names=["neg", "pos"]),}
61+
{
62+
"review": nlp.Value("string"),
63+
"label": nlp.features.ClassLabel(names=["neg", "pos"]),
64+
}
6265
),
6366
supervised_keys=None,
6467
homepage="https://github.com/TheophileBlard/french-sentiment-analysis-with-bert",

0 commit comments

Comments
 (0)