Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/results/benchmark_array_xd.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"write_array2d": 0.07093274600629229, "read_unformated after write_array2d": 0.03530075500020757, "read_formatted_as_numpy after write_array2d": 0.10929270699853078, "read_batch_unformated after write_array2d": 0.03727920600795187, "read_batch_formatted_as_numpy after write_array2d": 0.018853643006877974, "read_col_unformated after write_array2d": 0.05644163000397384, "read_col_formatted_as_numpy after write_array2d": 0.011610292000113986, "write_nested_sequence": 1.6535991109994939, "read_unformated after write_nested_sequence": 0.3739209540071897, "read_formatted_as_numpy after write_nested_sequence": 0.40762836500653066, "read_batch_unformated after write_nested_sequence": 0.3337586460111197, "read_batch_formatted_as_numpy after write_nested_sequence": 0.054717567007173784, "read_col_unformated after write_nested_sequence": 0.3173944180016406, "read_col_formatted_as_numpy after write_nested_sequence": 0.004956340009812266, "write_flattened_sequence": 1.4975415869994322, "read_unformated after write_flattened_sequence": 0.26713552299770527, "read_formatted_as_numpy after write_flattened_sequence": 0.07673935199272819, "read_batch_unformated after write_flattened_sequence": 0.25450974798877724, "read_batch_formatted_as_numpy after write_flattened_sequence": 0.009374254994327202, "read_col_unformated after write_flattened_sequence": 0.25912448299641255, "read_col_formatted_as_numpy after write_flattened_sequence": 0.004277604995877482}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these files supposed to be part of the PR ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't care that much I guess but let me remove them indeed

{"write_array2d": 0.14168284999323077, "read_unformated after write_array2d": 0.04353281999647152, "read_formatted_as_numpy after write_array2d": 0.1285462469968479, "read_batch_unformated after write_array2d": 0.023109222995117307, "read_batch_formatted_as_numpy after write_array2d": 0.011352884990628809, "read_col_unformated after write_array2d": 0.037052362007671036, "read_col_formatted_as_numpy after write_array2d": 0.007985618998645805, "write_nested_sequence": 1.4927163410029607, "read_unformated after write_nested_sequence": 0.28319963401008863, "read_formatted_as_numpy after write_nested_sequence": 0.419271487990045, "read_batch_unformated after write_nested_sequence": 0.3234798710036557, "read_batch_formatted_as_numpy after write_nested_sequence": 0.03850809299910907, "read_col_unformated after write_nested_sequence": 0.29384092400141526, "read_col_formatted_as_numpy after write_nested_sequence": 0.004250421989127062, "write_flattened_sequence": 1.4521546780015342, "read_unformated after write_flattened_sequence": 0.25513897799828555, "read_formatted_as_numpy after write_flattened_sequence": 0.07564631900459062, "read_batch_unformated after write_flattened_sequence": 0.2758980469952803, "read_batch_formatted_as_numpy after write_flattened_sequence": 0.011008214991306886, "read_col_unformated after write_flattened_sequence": 0.25848906899045687, "read_col_formatted_as_numpy after write_flattened_sequence": 0.004328447001171298}
2 changes: 1 addition & 1 deletion benchmarks/results/benchmark_indices_mapping.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"num examples": 500000, "select": 0.034579699000460096, "sort": 0.5558962740033166, "shuffle": 0.18372017299407162, "train_test_split": 0.29882429300050717, "shard": 0.014594822001527064}
{"num examples": 500000, "select": 0.03741131999413483, "sort": 0.7371353159978753, "shuffle": 0.17655655200360343, "train_test_split": 0.29633847798686475, "shard": 0.01452581599005498}
2 changes: 1 addition & 1 deletion benchmarks/results/benchmark_iterating.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"num examples": 50000, "read 5000": 0.2146801049966598, "read 50000": 2.115211837008246, "read_batch 50000 10": 1.4512460459955037, "read_batch 50000 100": 1.385236697999062, "read_batch 50000 1000": 1.4181318079936318, "read_formatted numpy 5000": 4.044872473998112, "read_formatted pandas 5000": 3.4310112629900686, "read_formatted torch 5000": 4.470335923993844, "read_formatted tensorflow 5000": 5.384795637000934, "read_formatted_batch numpy 5000 10": 0.4460094000096433, "read_formatted_batch numpy 5000 1000": 0.007665968994842842, "shuffled read 5000": 0.2283045439980924, "shuffled read 50000": 2.2466989499953343, "shuffled read_batch 50000 10": 59.94365781600936, "shuffled read_batch 50000 100": 7.204961794006522, "shuffled read_batch 50000 1000": 2.4927480350015685, "shuffled read_formatted numpy 5000": 4.631365966997691, "shuffled read_formatted_batch numpy 5000 10": 6.5569094810052775, "shuffled read_formatted_batch numpy 5000 1000": 0.06912206900597084}
{"num examples": 50000, "read 5000": 0.2152090710005723, "read 50000": 2.077654693988734, "read_batch 50000 10": 1.5041199039987987, "read_batch 50000 100": 1.5411947140091797, "read_batch 50000 1000": 1.4684901159926085, "read_formatted numpy 5000": 4.584776938994764, "read_formatted pandas 5000": 3.7457121399929747, "read_formatted torch 5000": 4.565676491998602, "read_formatted tensorflow 5000": 5.269861594992108, "read_formatted_batch numpy 5000 10": 0.4242750950070331, "read_formatted_batch numpy 5000 1000": 0.007607111998368055, "shuffled read 5000": 0.22604441999283154, "shuffled read 50000": 2.268928524994408, "shuffled read_batch 50000 10": 55.44462437101174, "shuffled read_batch 50000 100": 6.876476717996411, "shuffled read_batch 50000 1000": 2.1420724369963864, "shuffled read_formatted numpy 5000": 4.8052272600034485, "shuffled read_formatted_batch numpy 5000 10": 6.500664097999106, "shuffled read_formatted_batch numpy 5000 1000": 0.0754691059992183}
2 changes: 1 addition & 1 deletion benchmarks/results/benchmark_map_filter.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"num examples": 500000, "map identity": 10.665630593008245, "map identity batched": 0.7198751819960307, "map no-op batched": 0.5252309559873538, "map no-op batched numpy": 0.5331113779975567, "map no-op batched pandas": 0.3913883039931534, "map no-op batched pytorch": 0.5091918510006508, "map no-op batched tensorflow": 1.2273747170111164, "map fast-tokenizer batched": 8.285753931006184, "filter": 1.7507986380078364}
{"num examples": 500000, "map identity": 10.19139202599763, "map identity batched": 0.6804238399927272, "map no-op batched": 0.5342009569867514, "map no-op batched numpy": 0.5792830920108827, "map no-op batched pandas": 0.4343639040016569, "map no-op batched pytorch": 0.5403374370071106, "map no-op batched tensorflow": 1.3869360350072384, "map fast-tokenizer batched": 8.074308118986664, "filter": 1.841787679004483}
1 change: 0 additions & 1 deletion datasets/wiki_dpr/wiki_dpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ def _post_process(self, dataset, resources_paths):
dataset.add_faiss_index(
"embeddings",
train_size=train_size,
faiss_verbose=logging.getLogger().level <= logging.DEBUG,
custom_index=ivf_index,
)
logging.info("Saving wiki_dpr faiss index")
Expand Down
2 changes: 1 addition & 1 deletion docs/source/add_dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ Here again, let's take the simple example of the `squad dataset loading script <

def _generate_examples(self, filepath):
"""This function returns the examples in the raw (text) form."""
logging.info("generating examples from = %s", filepath)
logger.info("generating examples from = %s", filepath)
with open(filepath) as f:
squad = json.load(f)
for article in squad["data"]:
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,4 @@ The documentation is organized in five parts:
package_reference/loading_methods
package_reference/main_classes
package_reference/builder_classes
package_reference/logging_methods
50 changes: 50 additions & 0 deletions docs/source/package_reference/logging_methods.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
Logging methods
----------------------------------------------------

`nlp` tries to be very transparent and explicit about it's inner working bt this can be quite verbose at some time.
A series of logging methods let you easily adjust the level of logging of the whole library.

Functions
~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: nlp.logging.get_verbosity

.. autofunction:: nlp.logging.set_verbosity

.. autofunction:: nlp.logging.set_verbosity_info

.. autofunction:: nlp.logging.set_verbosity_warning

.. autofunction:: nlp.logging.set_verbosity_debug

.. autofunction:: nlp.logging.set_verbosity_error

.. autofunction:: nlp.logging.disable_default_handler

.. autofunction:: nlp.logging.enable_default_handler

.. autofunction:: nlp.logging.disable_propagation

.. autofunction:: nlp.logging.enable_propagation

.. autofunction:: nlp.logging.get_logger

Levels
~~~~~~~~~~~~~~~~~~~~~

.. autodata:: nlp.logging.CRITICAL

.. autodata:: nlp.logging.DEBUG

.. autodata:: nlp.logging.ERROR

.. autodata:: nlp.logging.FATAL

.. autodata:: nlp.logging.INFO

.. autodata:: nlp.logging.NOTSET

.. autodata:: nlp.logging.WARN

.. autodata:: nlp.logging.WARNING

4 changes: 2 additions & 2 deletions metrics/bleurt/bleurt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
# limitations under the License.
""" BLEURT metric. """

import logging
import os

import nlp
from nlp.logging import get_logger
from bleurt import score # From: git+https://github.com/google-research/bleurt.git


logger = logging.getLogger(__name__)
logger = get_logger(__name__)

_CITATION = """\
@inproceedings{bleurt,
Expand Down
3 changes: 0 additions & 3 deletions nlp-cli
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python
import logging
from argparse import ArgumentParser

from nlp.commands.convert import ConvertCommand
Expand All @@ -10,8 +9,6 @@ from nlp.commands.test import TestCommand
from nlp.commands.run_beam import RunBeamCommand
from nlp.commands.dummy_data import DummyDataCommand

logging.basicConfig(level=logging.INFO)

if __name__ == '__main__':
parser = ArgumentParser('HuggingFace NLP CLI tool', usage='nlp-cli <command> [<args>]')
commands_parser = parser.add_subparsers(help='nlp-cli command helpers')
Expand Down
28 changes: 1 addition & 27 deletions notebooks/Overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2330,32 +2330,6 @@
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hJHyEmievSUh",
"colab_type": "code",
"outputId": "afc32e2a-6d42-4d77-fee6-0afdb5a1f206",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"import logging\n",
"logging.basicConfig(level=logging.INFO)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"17\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
Expand Down Expand Up @@ -4991,4 +4965,4 @@
"outputs": []
}
]
}
}
Loading