Skip to content

Commit 8ff90db

Browse files
authored
Merge pull request #2787 from tseaver/2354-bigquery-nested_data_types
Correctly model JSON repr of complex nested records.
2 parents d775489 + ab9278f commit 8ff90db

File tree

4 files changed

+155
-12
lines changed

4 files changed

+155
-12
lines changed

bigquery/google/cloud/bigquery/_helpers.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ def _record_from_json(value, field):
5858
"""Coerce 'value' to a mapping, if set or not nullable."""
5959
if _not_null(value, field):
6060
record = {}
61-
for subfield, cell in zip(field.fields, value['f']):
61+
record_iter = zip(field.fields, value['f'])
62+
for subfield, cell in record_iter:
6263
converter = _CELLDATA_FROM_JSON[subfield.field_type]
63-
if field.mode == 'REPEATED':
64-
value = [converter(item, subfield) for item in cell['v']]
64+
if subfield.mode == 'REPEATED':
65+
value = [converter(item['v'], subfield) for item in cell['v']]
6566
else:
6667
value = converter(cell['v'], subfield)
6768
record[subfield.name] = value
@@ -104,7 +105,7 @@ def _row_from_json(row, schema):
104105
for field, cell in zip(schema, row['f']):
105106
converter = _CELLDATA_FROM_JSON[field.field_type]
106107
if field.mode == 'REPEATED':
107-
row_data.append([converter(item, field)
108+
row_data.append([converter(item['v'], field)
108109
for item in cell['v']])
109110
else:
110111
row_data.append(converter(cell['v'], field))

bigquery/unit_tests/test__helpers.py

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def test_w_scalar_subfield(self):
186186
def test_w_repeated_subfield(self):
187187
subfield = _Field('REPEATED', 'color', 'STRING')
188188
field = _Field('REQUIRED', fields=[subfield])
189-
value = {'f': [{'v': ['red', 'yellow', 'blue']}]}
189+
value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}
190190
coerced = self._call_fut(value, field)
191191
self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})
192192

@@ -234,6 +234,97 @@ def test_w_string_value(self):
234234
self.assertEqual(coerced, 'Wonderful!')
235235

236236

237+
class Test_row_from_json(unittest.TestCase):
238+
239+
def _call_fut(self, row, schema):
240+
from google.cloud.bigquery._helpers import _row_from_json
241+
return _row_from_json(row, schema)
242+
243+
def test_w_single_scalar_column(self):
244+
# SELECT 1 AS col
245+
col = _Field('REQUIRED', 'col', 'INTEGER')
246+
row = {u'f': [{u'v': u'1'}]}
247+
self.assertEqual(self._call_fut(row, schema=[col]), (1,))
248+
249+
def test_w_single_struct_column(self):
250+
# SELECT (1, 2) AS col
251+
sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
252+
sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
253+
col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
254+
row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
255+
self.assertEqual(self._call_fut(row, schema=[col]),
256+
({'sub_1': 1, 'sub_2': 2},))
257+
258+
def test_w_single_array_column(self):
259+
# SELECT [1, 2, 3] as col
260+
col = _Field('REPEATED', 'col', 'INTEGER')
261+
row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
262+
self.assertEqual(self._call_fut(row, schema=[col]),
263+
([1, 2, 3],))
264+
265+
def test_w_struct_w_nested_array_column(self):
266+
# SELECT ([1, 2], 3, [4, 5]) as col
267+
first = _Field('REPEATED', 'first', 'INTEGER')
268+
second = _Field('REQUIRED', 'second', 'INTEGER')
269+
third = _Field('REPEATED', 'third', 'INTEGER')
270+
col = _Field('REQUIRED', 'col', 'RECORD',
271+
fields=[first, second, third])
272+
row = {
273+
u'f': [
274+
{u'v': {
275+
u'f': [
276+
{u'v': [{u'v': u'1'}, {u'v': u'2'}]},
277+
{u'v': u'3'},
278+
{u'v': [{u'v': u'4'}, {u'v': u'5'}]}
279+
]
280+
}},
281+
]
282+
}
283+
self.assertEqual(
284+
self._call_fut(row, schema=[col]),
285+
({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))
286+
287+
def test_w_array_of_struct(self):
288+
# SELECT [(1, 2, 3), (4, 5, 6)] as col
289+
first = _Field('REQUIRED', 'first', 'INTEGER')
290+
second = _Field('REQUIRED', 'second', 'INTEGER')
291+
third = _Field('REQUIRED', 'third', 'INTEGER')
292+
col = _Field('REPEATED', 'col', 'RECORD',
293+
fields=[first, second, third])
294+
row = {u'f': [{u'v': [
295+
{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
296+
{u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
297+
]}]}
298+
self.assertEqual(
299+
self._call_fut(row, schema=[col]),
300+
([
301+
{u'first': 1, u'second': 2, u'third': 3},
302+
{u'first': 4, u'second': 5, u'third': 6},
303+
],))
304+
305+
def test_w_array_of_struct_w_array(self):
306+
# SELECT [([1, 2, 3], 4), ([5, 6], 7)]
307+
first = _Field('REPEATED', 'first', 'INTEGER')
308+
second = _Field('REQUIRED', 'second', 'INTEGER')
309+
col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
310+
row = {u'f': [{u'v': [
311+
{u'v': {u'f': [
312+
{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
313+
{u'v': u'4'}
314+
]}},
315+
{u'v': {u'f': [
316+
{u'v': [{u'v': u'5'}, {u'v': u'6'}]},
317+
{u'v': u'7'}
318+
]}}
319+
]}]}
320+
self.assertEqual(
321+
self._call_fut(row, schema=[col]),
322+
([
323+
{u'first': [1, 2, 3], u'second': 4},
324+
{u'first': [5, 6], u'second': 7},
325+
],))
326+
327+
237328
class Test_rows_from_json(unittest.TestCase):
238329

239330
def _call_fut(self, value, field):
@@ -253,12 +344,12 @@ def test_w_record_subfield(self):
253344
{'f': [
254345
{'v': 'Phred Phlyntstone'},
255346
{'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
256-
{'v': ['orange', 'black']},
347+
{'v': [{'v': 'orange'}, {'v': 'black'}]},
257348
]},
258349
{'f': [
259350
{'v': 'Bharney Rhubble'},
260351
{'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
261-
{'v': ['brown']},
352+
{'v': [{'v': 'brown'}]},
262353
]},
263354
{'f': [
264355
{'v': 'Wylma Phlyntstone'},

bigquery/unit_tests/test_table.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self):
11731173
'pageToken': TOKEN,
11741174
'rows': [
11751175
{'f': [
1176-
{'v': ['red', 'green']},
1177-
{'v': [{'f': [{'v': ['1', '2']},
1178-
{'v': ['3.1415', '1.414']}]}]},
1176+
{'v': [{'v': 'red'}, {'v': 'green'}]},
1177+
{'v': [{
1178+
'v': {
1179+
'f': [
1180+
{'v': [{'v': '1'}, {'v': '2'}]},
1181+
{'v': [{'v': '3.1415'}, {'v': '1.414'}]},
1182+
]}
1183+
}]},
11791184
]},
11801185
]
11811186
}
11821187
conn = _Connection(DATA)
11831188
client = _Client(project=self.PROJECT, connection=conn)
11841189
dataset = _Dataset(client)
1185-
full_name = SchemaField('color', 'STRING', mode='REPEATED')
1190+
color = SchemaField('color', 'STRING', mode='REPEATED')
11861191
index = SchemaField('index', 'INTEGER', 'REPEATED')
11871192
score = SchemaField('score', 'FLOAT', 'REPEATED')
11881193
struct = SchemaField('struct', 'RECORD', mode='REPEATED',
11891194
fields=[index, score])
11901195
table = self._make_one(self.TABLE_NAME, dataset=dataset,
1191-
schema=[full_name, struct])
1196+
schema=[color, struct])
11921197

11931198
iterator = table.fetch_data()
11941199
page = six.next(iterator.pages)

system_tests/bigquery.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,49 @@ def _job_done(instance):
478478
# them here. The best we can do is not that the API call didn't
479479
# raise an error, and that the job completed (in the `retry()`
480480
# above).
481+
482+
def test_sync_query_w_nested_arrays_and_structs(self):
483+
EXAMPLES = [
484+
{
485+
'sql': 'SELECT 1',
486+
'expected': 1,
487+
},
488+
{
489+
'sql': 'SELECT (1, 2)',
490+
'expected': {'_field_1': 1, '_field_2': 2},
491+
},
492+
{
493+
'sql': 'SELECT [1, 2, 3]',
494+
'expected': [1, 2, 3],
495+
},
496+
{
497+
'sql': 'SELECT ([1, 2], 3, [4, 5])',
498+
'expected':
499+
{'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]},
500+
},
501+
{
502+
'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]',
503+
'expected': [
504+
{'_field_1': 1, '_field_2': 2, '_field_3': 3},
505+
{'_field_1': 4, '_field_2': 5, '_field_3': 6},
506+
],
507+
},
508+
{
509+
'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]',
510+
'expected': [
511+
{u'_field_1': [1, 2, 3], u'_field_2': 4},
512+
{u'_field_1': [5, 6], u'_field_2': 7},
513+
],
514+
},
515+
{
516+
'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))',
517+
'expected': [{u'_field_1': [1, 2]}],
518+
},
519+
]
520+
for example in EXAMPLES:
521+
query = Config.CLIENT.run_sync_query(example['sql'])
522+
query.use_legacy_sql = False
523+
query.run()
524+
self.assertEqual(len(query.rows), 1)
525+
self.assertEqual(len(query.rows[0]), 1)
526+
self.assertEqual(query.rows[0][0], example['expected'])

0 commit comments

Comments
 (0)