Skip to content

Commit c001fb9

Browse files
committed
Adding HappyBase Connection.create_table().
1 parent 115263b commit c001fb9

File tree

2 files changed

+313
-3
lines changed

2 files changed

+313
-3
lines changed

gcloud/bigtable/happybase/connection.py

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@
1515
"""Google Cloud Bigtable HappyBase connection module."""
1616

1717

18+
import datetime
1819
import warnings
1920

2021
import six
2122

2223
from gcloud.bigtable.client import Client
24+
from gcloud.bigtable.column_family import GCRuleIntersection
25+
from gcloud.bigtable.column_family import MaxAgeGCRule
26+
from gcloud.bigtable.column_family import MaxVersionsGCRule
2327
from gcloud.bigtable.happybase.table import Table
2428
from gcloud.bigtable.table import Table as _LowLevelTable
2529

@@ -124,9 +128,6 @@ class Connection(object):
124128
:type kwargs: dict
125129
:param kwargs: Remaining keyword arguments. Provided for HappyBase
126130
compatibility.
127-
128-
:raises: :class:`ValueError <exceptions.ValueError>` if any of the unused
129-
parameters are specified with a value other than the defaults.
130131
"""
131132

132133
_cluster = None
@@ -265,6 +266,77 @@ def tables(self):
265266

266267
return table_names
267268

269+
def create_table(self, name, families):
270+
"""Create a table.
271+
272+
.. warning::
273+
274+
The only column family options from HappyBase that are able to be
275+
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
276+
277+
.. note::
278+
279+
This method is **not** atomic. The Cloud Bigtable API separates
280+
the creation of a table from the creation of column families. Thus
281+
this method needs to send 1 request for the table creation and 1
282+
request for each column family. If any of these fails, the method
283+
will fail, but the progress made towards completion cannot be
284+
rolled back.
285+
286+
Values in ``families`` represent column family options. In HappyBase,
287+
these are dictionaries, corresponding to the ``ColumnDescriptor``
288+
structure in the Thrift API. The accepted keys are:
289+
290+
* ``max_versions`` (``int``)
291+
* ``compression`` (``str``)
292+
* ``in_memory`` (``bool``)
293+
* ``bloom_filter_type`` (``str``)
294+
* ``bloom_filter_vector_size`` (``int``)
295+
* ``bloom_filter_nb_hashes`` (``int``)
296+
* ``block_cache_enabled`` (``bool``)
297+
* ``time_to_live`` (``int``)
298+
299+
:type name: str
300+
:param name: The name of the table to be created.
301+
302+
:type families: dict
303+
:param families: Dictionary with column family names as keys and column
304+
family options as the values. The options can be among
305+
306+
* :class:`dict`
307+
* :class:`.GarbageCollectionRule`
308+
309+
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
310+
not a dictionary,
311+
:class:`ValueError <exceptions.ValueError>` if ``families``
312+
has no entries
313+
"""
314+
if not isinstance(families, dict):
315+
raise TypeError('families arg must be a dictionary')
316+
317+
if not families:
318+
raise ValueError('Cannot create table %r (no column '
319+
'families specified)' % (name,))
320+
321+
# Parse all keys before making any API requests.
322+
gc_rule_dict = {}
323+
for column_family_name, option in families.items():
324+
if isinstance(column_family_name, six.binary_type):
325+
column_family_name = column_family_name.decode('utf-8')
326+
if column_family_name.endswith(':'):
327+
column_family_name = column_family_name[:-1]
328+
gc_rule_dict[column_family_name] = _parse_family_option(option)
329+
330+
# Create table instance and then make API calls.
331+
name = self._table_name(name)
332+
low_level_table = _LowLevelTable(name, self._cluster)
333+
low_level_table.create()
334+
335+
for column_family_name, gc_rule in gc_rule_dict.items():
336+
column_family = low_level_table.column_family(
337+
column_family_name, gc_rule=gc_rule)
338+
column_family.create()
339+
268340
def delete_table(self, name, disable=False):
269341
"""Delete the specified table.
270342
@@ -336,3 +408,52 @@ def compact_table(self, name, major=False):
336408
"""
337409
raise NotImplementedError('The Cloud Bigtable API does not support '
338410
'compacting a table.')
411+
412+
413+
def _parse_family_option(option):
414+
"""Parses a column family option into a garbage collection rule.
415+
416+
.. note::
417+
418+
If ``option`` is not a dictionary, the type is not checked.
419+
If ``option`` is :data:`None`, there is nothing to do, since this
420+
is the correct output.
421+
422+
:type option: :class:`dict`,
423+
:data:`NoneType <types.NoneType>`,
424+
:class:`.GarbageCollectionRule`
425+
:param option: A column family option passes as a dictionary value in
426+
:meth:`Connection.create_table`.
427+
428+
:rtype: :class:`.GarbageCollectionRule`
429+
:returns: A garbage collection rule parsed from the input.
430+
"""
431+
result = option
432+
if isinstance(result, dict):
433+
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
434+
all_keys = ', '.join(repr(key) for key in result.keys())
435+
warning_msg = ('Cloud Bigtable only supports max_versions and '
436+
'time_to_live column family settings. '
437+
'Received: %s' % (all_keys,))
438+
_WARN(warning_msg)
439+
440+
max_num_versions = result.get('max_versions')
441+
max_age = None
442+
if 'time_to_live' in result:
443+
max_age = datetime.timedelta(seconds=result['time_to_live'])
444+
445+
versions_rule = age_rule = None
446+
if max_num_versions is not None:
447+
versions_rule = MaxVersionsGCRule(max_num_versions)
448+
if max_age is not None:
449+
age_rule = MaxAgeGCRule(max_age)
450+
451+
if versions_rule is None:
452+
result = age_rule
453+
else:
454+
if age_rule is None:
455+
result = versions_rule
456+
else:
457+
result = GCRuleIntersection(rules=[age_rule, versions_rule])
458+
459+
return result

gcloud/bigtable/happybase/test_connection.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,90 @@ def test_tables_with_prefix(self):
311311
result = connection.tables()
312312
self.assertEqual(result, [unprefixed_table_name1])
313313

314+
def test_create_table(self):
315+
import operator
316+
from gcloud._testing import _Monkey
317+
from gcloud.bigtable.happybase import connection as MUT
318+
319+
cluster = _Cluster() # Avoid implicit environ check.
320+
connection = self._makeOne(autoconnect=False, cluster=cluster)
321+
mock_gc_rule = object()
322+
called_options = []
323+
324+
def mock_parse_family_option(option):
325+
called_options.append(option)
326+
return mock_gc_rule
327+
328+
name = 'table-name'
329+
col_fam1 = 'cf1'
330+
col_fam_option1 = object()
331+
col_fam2 = u'cf2'
332+
col_fam_option2 = object()
333+
col_fam3 = b'cf3'
334+
col_fam_option3 = object()
335+
families = {
336+
col_fam1: col_fam_option1,
337+
# A trailing colon is also allowed.
338+
col_fam2 + ':': col_fam_option2,
339+
col_fam3 + b':': col_fam_option3,
340+
}
341+
342+
tables_created = []
343+
344+
def make_table(*args, **kwargs):
345+
result = _MockLowLevelTable(*args, **kwargs)
346+
tables_created.append(result)
347+
return result
348+
349+
with _Monkey(MUT, _LowLevelTable=make_table,
350+
_parse_family_option=mock_parse_family_option):
351+
connection.create_table(name, families)
352+
353+
# Just one table would have been created.
354+
table_instance, = tables_created
355+
self.assertEqual(table_instance.args, (name, cluster))
356+
self.assertEqual(table_instance.kwargs, {})
357+
self.assertEqual(table_instance.create_calls, 1)
358+
359+
# Check if our mock was called twice, but we don't know the order.
360+
self.assertEqual(
361+
set(called_options),
362+
set([col_fam_option1, col_fam_option2, col_fam_option3]))
363+
364+
# We expect three column family instances created, but don't know the
365+
# order due to non-deterministic dict.items().
366+
col_fam_created = table_instance.col_fam_created
367+
self.assertEqual(len(col_fam_created), 3)
368+
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
369+
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
370+
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
371+
self.assertEqual(col_fam_created[0].create_calls, 1)
372+
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
373+
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
374+
self.assertEqual(col_fam_created[1].create_calls, 1)
375+
self.assertEqual(col_fam_created[2].column_family_id,
376+
col_fam3.decode('utf-8'))
377+
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
378+
self.assertEqual(col_fam_created[2].create_calls, 1)
379+
380+
def test_create_table_bad_type(self):
381+
cluster = _Cluster() # Avoid implicit environ check.
382+
connection = self._makeOne(autoconnect=False, cluster=cluster)
383+
384+
name = 'table-name'
385+
families = None
386+
with self.assertRaises(TypeError):
387+
connection.create_table(name, families)
388+
389+
def test_create_table_bad_value(self):
390+
cluster = _Cluster() # Avoid implicit environ check.
391+
connection = self._makeOne(autoconnect=False, cluster=cluster)
392+
393+
name = 'table-name'
394+
families = {}
395+
with self.assertRaises(ValueError):
396+
connection.create_table(name, families)
397+
314398
def test_delete_table(self):
315399
from gcloud._testing import _Monkey
316400
from gcloud.bigtable.happybase import connection as MUT
@@ -376,6 +460,90 @@ def test_compact_table(self):
376460
connection.compact_table(name, major=major)
377461

378462

463+
class Test__parse_family_option(unittest2.TestCase):
464+
465+
def _callFUT(self, option):
466+
from gcloud.bigtable.happybase.connection import _parse_family_option
467+
return _parse_family_option(option)
468+
469+
def test_dictionary_no_keys(self):
470+
option = {}
471+
result = self._callFUT(option)
472+
self.assertEqual(result, None)
473+
474+
def test_null(self):
475+
option = None
476+
result = self._callFUT(option)
477+
self.assertEqual(result, None)
478+
479+
def test_dictionary_bad_key(self):
480+
from gcloud._testing import _Monkey
481+
from gcloud.bigtable.happybase import connection as MUT
482+
483+
warned = []
484+
485+
def mock_warn(msg):
486+
warned.append(msg)
487+
488+
option = {'badkey': None}
489+
with _Monkey(MUT, _WARN=mock_warn):
490+
result = self._callFUT(option)
491+
492+
self.assertEqual(result, None)
493+
self.assertEqual(len(warned), 1)
494+
self.assertIn('badkey', warned[0])
495+
496+
def test_dictionary_versions_key(self):
497+
from gcloud.bigtable.column_family import MaxVersionsGCRule
498+
499+
versions = 42
500+
option = {'max_versions': versions}
501+
result = self._callFUT(option)
502+
503+
gc_rule = MaxVersionsGCRule(versions)
504+
self.assertEqual(result, gc_rule)
505+
506+
def test_dictionary_ttl_key(self):
507+
import datetime
508+
from gcloud.bigtable.column_family import MaxAgeGCRule
509+
510+
time_to_live = 24 * 60 * 60
511+
max_age = datetime.timedelta(days=1)
512+
option = {'time_to_live': time_to_live}
513+
result = self._callFUT(option)
514+
515+
gc_rule = MaxAgeGCRule(max_age)
516+
self.assertEqual(result, gc_rule)
517+
518+
def test_dictionary_both_keys(self):
519+
import datetime
520+
from gcloud.bigtable.column_family import GCRuleIntersection
521+
from gcloud.bigtable.column_family import MaxAgeGCRule
522+
from gcloud.bigtable.column_family import MaxVersionsGCRule
523+
524+
versions = 42
525+
time_to_live = 24 * 60 * 60
526+
option = {
527+
'max_versions': versions,
528+
'time_to_live': time_to_live,
529+
}
530+
result = self._callFUT(option)
531+
532+
max_age = datetime.timedelta(days=1)
533+
# NOTE: This relies on the order of the rules in the method we are
534+
# calling matching this order here.
535+
gc_rule1 = MaxAgeGCRule(max_age)
536+
gc_rule2 = MaxVersionsGCRule(versions)
537+
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
538+
self.assertEqual(result, gc_rule)
539+
540+
def test_non_dictionary(self):
541+
option = object()
542+
self.assertFalse(isinstance(option, dict))
543+
result = self._callFUT(option)
544+
self.assertEqual(result, option)
545+
546+
379547
class _Client(object):
380548

381549
def __init__(self, *args, **kwargs):
@@ -418,12 +586,33 @@ def list_tables(self):
418586
return self.list_tables_result
419587

420588

589+
class _MockLowLevelColumnFamily(object):
590+
591+
def __init__(self, column_family_id, gc_rule=None):
592+
self.column_family_id = column_family_id
593+
self.gc_rule = gc_rule
594+
self.create_calls = 0
595+
596+
def create(self):
597+
self.create_calls += 1
598+
599+
421600
class _MockLowLevelTable(object):
422601

423602
def __init__(self, *args, **kwargs):
424603
self.args = args
425604
self.kwargs = kwargs
426605
self.delete_calls = 0
606+
self.create_calls = 0
607+
self.col_fam_created = []
427608

428609
def delete(self):
429610
self.delete_calls += 1
611+
612+
def create(self):
613+
self.create_calls += 1
614+
615+
def column_family(self, column_family_id, gc_rule=None):
616+
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
617+
self.col_fam_created.append(result)
618+
return result

0 commit comments

Comments
 (0)