|
16 | 16 | # Lint as: python3 |
17 | 17 | """WebQuestions Benchmark for Question Answering.""" |
18 | 18 |
|
19 | | -from __future__ import absolute_import |
20 | | -from __future__ import division |
21 | | -from __future__ import print_function |
| 19 | +from __future__ import absolute_import, division, print_function |
22 | 20 |
|
23 | 21 | import json |
24 | 22 | import re |
|
43 | 41 | } |
44 | 42 | """ |
45 | 43 | _SPLIT_DOWNLOAD_URL = { |
46 | | - 'train': |
47 | | - 'https://worksheets.codalab.org/rest/bundles/0x4a763f8cde224c2da592b75f29e2f5c2/contents/blob/', |
48 | | - 'test': |
49 | | - 'https://worksheets.codalab.org/rest/bundles/0xe7bac352fce7448c9ef238fb0a297ec2/contents/blob/', |
| 44 | + "train": "https://worksheets.codalab.org/rest/bundles/0x4a763f8cde224c2da592b75f29e2f5c2/contents/blob/", |
| 45 | + "test": "https://worksheets.codalab.org/rest/bundles/0xe7bac352fce7448c9ef238fb0a297ec2/contents/blob/", |
50 | 46 | } |
51 | 47 |
|
52 | 48 | _DESCRIPTION = """\ |
|
58 | 54 |
|
59 | 55 |
|
60 | 56 | class WebQuestions(nlp.GeneratorBasedBuilder): |
61 | | - """WebQuestions Benchmark for Question Answering.""" |
62 | | - |
63 | | - VERSION = nlp.Version('1.0.0') |
64 | | - |
65 | | - def _info(self): |
66 | | - return nlp.DatasetInfo( |
67 | | - description=_DESCRIPTION, |
68 | | - features=nlp.Features({ |
69 | | - 'url': |
70 | | - nlp.Value('string'), |
71 | | - 'question': |
72 | | - nlp.Value('string'), |
73 | | - 'answers': |
74 | | - nlp.features.Sequence(nlp.Value('string')), |
75 | | - }), |
76 | | - supervised_keys=None, |
77 | | - homepage='https://worksheets.codalab.org/worksheets/0xba659fe363cb46e7a505c5b6a774dc8a', |
78 | | - citation=_CITATION, |
79 | | - ) |
80 | | - |
81 | | - def _split_generators(self, dl_manager): |
82 | | - """Returns SplitGenerators.""" |
83 | | - file_paths = dl_manager.download(_SPLIT_DOWNLOAD_URL) |
84 | | - |
85 | | - return [ |
86 | | - nlp.SplitGenerator( |
87 | | - name=split, gen_kwargs={'file_path': file_path}) |
88 | | - for split, file_path in file_paths.items() |
89 | | - ] |
90 | | - |
91 | | - def _generate_examples(self, file_path): |
92 | | - """Parses split file and yields examples.""" |
93 | | - |
94 | | - def _target_to_answers(target): |
95 | | - target = re.sub(r'^\(list |\)$', '', target) |
96 | | - return [ |
97 | | - ''.join(ans) for ans in |
98 | | - re.findall(r'\(description (?:"([^"]+?)"|([^)]+?))\)\w*', target) |
99 | | - ] |
100 | | - |
101 | | - with open(file_path) as f: |
102 | | - examples = json.load(f) |
103 | | - for i, ex in enumerate(examples): |
104 | | - yield i, { |
105 | | - 'url': ex['url'], |
106 | | - 'question': ex['utterance'], |
107 | | - 'answers': _target_to_answers(ex['targetValue']), |
108 | | - } |
| 57 | + """WebQuestions Benchmark for Question Answering.""" |
| 58 | + |
| 59 | + VERSION = nlp.Version("1.0.0") |
| 60 | + |
| 61 | + def _info(self): |
| 62 | + return nlp.DatasetInfo( |
| 63 | + description=_DESCRIPTION, |
| 64 | + features=nlp.Features( |
| 65 | + { |
| 66 | + "url": nlp.Value("string"), |
| 67 | + "question": nlp.Value("string"), |
| 68 | + "answers": nlp.features.Sequence(nlp.Value("string")), |
| 69 | + } |
| 70 | + ), |
| 71 | + supervised_keys=None, |
| 72 | + homepage="https://worksheets.codalab.org/worksheets/0xba659fe363cb46e7a505c5b6a774dc8a", |
| 73 | + citation=_CITATION, |
| 74 | + ) |
| 75 | + |
| 76 | + def _split_generators(self, dl_manager): |
| 77 | + """Returns SplitGenerators.""" |
| 78 | + file_paths = dl_manager.download(_SPLIT_DOWNLOAD_URL) |
| 79 | + |
| 80 | + return [ |
| 81 | + nlp.SplitGenerator(name=split, gen_kwargs={"file_path": file_path}) |
| 82 | + for split, file_path in file_paths.items() |
| 83 | + ] |
| 84 | + |
| 85 | + def _generate_examples(self, file_path): |
| 86 | + """Parses split file and yields examples.""" |
| 87 | + |
| 88 | + def _target_to_answers(target): |
| 89 | + target = re.sub(r"^\(list |\)$", "", target) |
| 90 | + return ["".join(ans) for ans in re.findall(r'\(description (?:"([^"]+?)"|([^)]+?))\)\w*', target)] |
| 91 | + |
| 92 | + with open(file_path) as f: |
| 93 | + examples = json.load(f) |
| 94 | + for i, ex in enumerate(examples): |
| 95 | + yield i, { |
| 96 | + "url": ex["url"], |
| 97 | + "question": ex["utterance"], |
| 98 | + "answers": _target_to_answers(ex["targetValue"]), |
| 99 | + } |
0 commit comments