Skip to content

Commit 0e85b15

Browse files
fix web questions dummy data
1 parent 34fc6aa commit 0e85b15

File tree

2 files changed

+46
-55
lines changed

2 files changed

+46
-55
lines changed
-88 Bytes
Binary file not shown.

datasets/web_questions/web_questions.py

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
# Lint as: python3
1717
"""WebQuestions Benchmark for Question Answering."""
1818

19-
from __future__ import absolute_import
20-
from __future__ import division
21-
from __future__ import print_function
19+
from __future__ import absolute_import, division, print_function
2220

2321
import json
2422
import re
@@ -43,10 +41,8 @@
4341
}
4442
"""
4543
_SPLIT_DOWNLOAD_URL = {
46-
'train':
47-
'https://worksheets.codalab.org/rest/bundles/0x4a763f8cde224c2da592b75f29e2f5c2/contents/blob/',
48-
'test':
49-
'https://worksheets.codalab.org/rest/bundles/0xe7bac352fce7448c9ef238fb0a297ec2/contents/blob/',
44+
"train": "https://worksheets.codalab.org/rest/bundles/0x4a763f8cde224c2da592b75f29e2f5c2/contents/blob/",
45+
"test": "https://worksheets.codalab.org/rest/bundles/0xe7bac352fce7448c9ef238fb0a297ec2/contents/blob/",
5046
}
5147

5248
_DESCRIPTION = """\
@@ -58,51 +54,46 @@
5854

5955

6056
class WebQuestions(nlp.GeneratorBasedBuilder):
61-
"""WebQuestions Benchmark for Question Answering."""
62-
63-
VERSION = nlp.Version('1.0.0')
64-
65-
def _info(self):
66-
return nlp.DatasetInfo(
67-
description=_DESCRIPTION,
68-
features=nlp.Features({
69-
'url':
70-
nlp.Value('string'),
71-
'question':
72-
nlp.Value('string'),
73-
'answers':
74-
nlp.features.Sequence(nlp.Value('string')),
75-
}),
76-
supervised_keys=None,
77-
homepage='https://worksheets.codalab.org/worksheets/0xba659fe363cb46e7a505c5b6a774dc8a',
78-
citation=_CITATION,
79-
)
80-
81-
def _split_generators(self, dl_manager):
82-
"""Returns SplitGenerators."""
83-
file_paths = dl_manager.download(_SPLIT_DOWNLOAD_URL)
84-
85-
return [
86-
nlp.SplitGenerator(
87-
name=split, gen_kwargs={'file_path': file_path})
88-
for split, file_path in file_paths.items()
89-
]
90-
91-
def _generate_examples(self, file_path):
92-
"""Parses split file and yields examples."""
93-
94-
def _target_to_answers(target):
95-
target = re.sub(r'^\(list |\)$', '', target)
96-
return [
97-
''.join(ans) for ans in
98-
re.findall(r'\(description (?:"([^"]+?)"|([^)]+?))\)\w*', target)
99-
]
100-
101-
with open(file_path) as f:
102-
examples = json.load(f)
103-
for i, ex in enumerate(examples):
104-
yield i, {
105-
'url': ex['url'],
106-
'question': ex['utterance'],
107-
'answers': _target_to_answers(ex['targetValue']),
108-
}
57+
"""WebQuestions Benchmark for Question Answering."""
58+
59+
VERSION = nlp.Version("1.0.0")
60+
61+
def _info(self):
62+
return nlp.DatasetInfo(
63+
description=_DESCRIPTION,
64+
features=nlp.Features(
65+
{
66+
"url": nlp.Value("string"),
67+
"question": nlp.Value("string"),
68+
"answers": nlp.features.Sequence(nlp.Value("string")),
69+
}
70+
),
71+
supervised_keys=None,
72+
homepage="https://worksheets.codalab.org/worksheets/0xba659fe363cb46e7a505c5b6a774dc8a",
73+
citation=_CITATION,
74+
)
75+
76+
def _split_generators(self, dl_manager):
77+
"""Returns SplitGenerators."""
78+
file_paths = dl_manager.download(_SPLIT_DOWNLOAD_URL)
79+
80+
return [
81+
nlp.SplitGenerator(name=split, gen_kwargs={"file_path": file_path})
82+
for split, file_path in file_paths.items()
83+
]
84+
85+
def _generate_examples(self, file_path):
86+
"""Parses split file and yields examples."""
87+
88+
def _target_to_answers(target):
89+
target = re.sub(r"^\(list |\)$", "", target)
90+
return ["".join(ans) for ans in re.findall(r'\(description (?:"([^"]+?)"|([^)]+?))\)\w*', target)]
91+
92+
with open(file_path) as f:
93+
examples = json.load(f)
94+
for i, ex in enumerate(examples):
95+
yield i, {
96+
"url": ex["url"],
97+
"question": ex["utterance"],
98+
"answers": _target_to_answers(ex["targetValue"]),
99+
}

0 commit comments

Comments
 (0)