Skip to content

Commit 25a7a9a

Browse files
authored
FIX-#7569: Fix handling of pyarrow dtype and empty dataframes (#7570)
Ensure empty Dataframes with pyarrow dtypes maintain the dtype metadata. Signed-off-by: Michael Peleshenko <[email protected]>
1 parent 95beced commit 25a7a9a

File tree

2 files changed

+10
-1
lines changed

2 files changed

+10
-1
lines changed

modin/core/dataframe/pandas/dataframe/dataframe.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,9 @@ def __init__(
174174
self._row_lengths_cache = row_lengths
175175
self._column_widths_cache = column_widths
176176
self._pandas_backend = pandas_backend
177-
if pandas_backend != "pyarrow":
177+
if pandas_backend != "pyarrow" or len(partitions) == 0:
178+
# If the backend is pyarrow and there are no partitions, the computed dtype otherwise becomes NaN,
179+
# which means we lost the dtype, so actually set it in that case
178180
self.set_dtypes_cache(dtypes)
179181
else:
180182
# In this case, the type precomputation may be incorrect; we need

modin/tests/pandas/dataframe/test_default.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import numpy as np
1919
import pandas
2020
import pandas._libs.lib as lib
21+
import pyarrow as pa
2122
import pytest
2223
from numpy.testing import assert_array_equal
2324

@@ -1533,3 +1534,9 @@ def test_series_does_not_warn_distributing_takes_time():
15331534
with warnings.catch_warnings():
15341535
warnings.filterwarnings("error", regex, UserWarning)
15351536
pd.Series(np.random.randint(1_000_000, size=(2_400_000)))
1537+
1538+
1539+
@pytest.mark.parametrize("dtype", [np.int64, pd.ArrowDtype(pa.int64())])
1540+
def test_empty_df_dtypes(dtype):
1541+
df = pd.DataFrame({"A": []}, dtype=dtype)
1542+
assert df.dtypes["A"] == dtype

0 commit comments

Comments
 (0)