apache · BryanCutler · Dec 6, 2016 · Dec 6, 2016
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
@@ -49,5 +49,4 @@
                             list_, struct, field,
                             DataType, Field, Schema, schema)
 
-from pyarrow.table import (Column, RecordBatch, dataframe_from_batches, Table,
-                           from_pandas_dataframe)
+from pyarrow.table import Column, RecordBatch, Table, from_pandas_dataframe
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
@@ -415,52 +415,6 @@ cdef class RecordBatch:
         return result
 
 
-def dataframe_from_batches(batches):
-    """
-    Convert a list of Arrow RecordBatches to a pandas.DataFrame
-
-    Parameters
-    ----------
-
-    batches: list of RecordBatch
-        RecordBatch list to be converted, schemas must be equal
-    """
-
-    cdef:
-        vector[shared_ptr[CArray]] c_array_chunks
-        vector[shared_ptr[CColumn]] c_columns
-        shared_ptr[CTable] c_table
-        Array arr
-        Schema schema
-
-    import pandas as pd
-
-    schema = batches[0].schema
-
-    # check schemas are equal
-    if any((not schema.equals(other.schema) for other in batches[1:])):
-        raise ArrowException("Error converting list of RecordBatches to "
-                "DataFrame, not all schemas are equal")
-
-    cdef int K = batches[0].num_columns
-
-    # create chunked columns from the batches
-    c_columns.resize(K)
-    for i in range(K):
-        for batch in batches:
-            arr = batch[i]
-            c_array_chunks.push_back(arr.sp_array)
-        c_columns[i].reset(new CColumn(schema.sp_schema.get().field(i),
-                           c_array_chunks))
-        c_array_chunks.clear()
-
-    # create a Table from columns and convert to DataFrame
-    c_table.reset(new CTable('', schema.sp_schema, c_columns))
-    table = Table()
-    table.init(c_table)
-    return table.to_pandas()
-
-
 cdef class Table:
     """
     A collection of top-level named, equal length Arrow arrays.
@@ -567,6 +521,54 @@ cdef class Table:
 
         return result
 
+    @staticmethod
+    def from_batches(batches):
+        """
+        Construct a Table from a list of Arrow RecordBatches
+
+        Parameters
+        ----------
+
+        batches: list of RecordBatch
+            RecordBatch list to be converted, schemas must be equal
+        """
+
+        cdef:
+            vector[shared_ptr[CArray]] c_array_chunks
+            vector[shared_ptr[CColumn]] c_columns
+            shared_ptr[CTable] c_table
+            Array arr
+            Schema schema
+
+        import pandas as pd
+
+        schema = batches[0].schema
+
+        # check schemas are equal
+        for other in batches[1:]:
+            if not schema.equals(other.schema):
+                raise ArrowException("Error converting list of RecordBatches "
+                        "to DataFrame, not all schemas are equal: {%s} != {%s}"
+                        % (str(schema), str(other.schema)))
+
+        cdef int K = batches[0].num_columns
+
+        # create chunked columns from the batches
+        c_columns.resize(K)
+        for i in range(K):
+            for batch in batches:
+                arr = batch[i]
+                c_array_chunks.push_back(arr.sp_array)
+            c_columns[i].reset(new CColumn(schema.sp_schema.get().field(i),
+                               c_array_chunks))
+            c_array_chunks.clear()
+
+        # create a Table from columns and convert to DataFrame
+        c_table.reset(new CTable('', schema.sp_schema, c_columns))
+        table = Table()
+        table.init(c_table)
+        return table
+
     def to_pandas(self):
         """
         Convert the arrow::Table to a pandas DataFrame

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
@@ -69,7 +69,8 @@ def test_recordbatchlist_to_pandas():
     batch1 = pa.RecordBatch.from_pandas(data1)
     batch2 = pa.RecordBatch.from_pandas(data2)
 
-    result = pa.dataframe_from_batches([batch1, batch2])
+    table = pa.Table.from_batches([batch1, batch2])
+    result = table.to_pandas()
     data = pd.concat([data1, data2], ignore_index=True)
     assert_frame_equal(data, result)
 
@@ -82,7 +83,7 @@ def test_recordbatchlist_schema_equals():
     batch2 = pa.RecordBatch.from_pandas(data2)
 
     with pytest.raises(pa.ArrowException):
-        pa.dataframe_from_batches([batch1, batch2])
+        pa.Table.from_batches([batch1, batch2])
 
 
 def test_table_basics():