Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,12 @@ struct ValueConverter<Type, enable_if_integer<Type>> {

static inline Result<ValueType> FromPython(PyObject* obj) {
ValueType value;
RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
arrow::Status s_ = internal::CIntFromPython(obj, &value);
if (!s_.ok() && !internal::PyIntScalar_Check(obj)) {
return internal::InvalidValue(obj, "tried to convert to int");
} else {
RETURN_NOT_OK(s_);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the cases that this couldn't be converted, but that obj is an integer? When the integer is too big to fit in a C int?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and also when converting a negative integer to a uint:

pa.scalar(-1, type='uint8')

No other tests are touched if I recompile without this check

}
return value;
}
};
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ def test_is_null():
def test_fill_null():
arr = pa.array([1, 2, None, 4], type=pa.int8())
fill_value = pa.array([5], type=pa.int8())
with pytest.raises(TypeError):
with pytest.raises(pa.ArrowInvalid, match="tried to convert to int"):
arr.fill_null(fill_value)

arr = pa.array([None, None, None, None], type=pa.null())
Expand Down
8 changes: 2 additions & 6 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import decimal
import itertools
import math
import traceback

import numpy as np
import pytz
Expand Down Expand Up @@ -382,11 +381,8 @@ def test_sequence_custom_integers(seq):
@parametrize_with_iterable_types
def test_broken_integers(seq):
Copy link
Contributor Author

@arw2019 arw2019 Aug 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We lose the the more specific traceback and ZeroDivisionError message, in favor of

In [11]: class MyBrokenInt: 
    ...:     def __init__(self): 
    ...:         1/0
In [12]: pa.array([MyBrokenInt()], type=pa.int64())        
---------------------------------------------------------------------------
ArrowInvalid                              Traceback (most recent call last)
<ipython-input-12-1cf156b165b3> in <module>
----> 1 pa.array([MyBrokenInt()], type=pa.int64())

~/git_repo/arrow/python/pyarrow/array.pxi in pyarrow.lib.array()
    269     else:
    270         # ConvertPySequence does strict conversion if type is explicitly passed
--> 271         return _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
    272 
    273 

~/git_repo/arrow/python/pyarrow/array.pxi in pyarrow.lib._sequence_to_array()
     38 
     39     with nogil:
---> 40         check_status(ConvertPySequence(sequence, mask, options, &out))
     41 
     42     if out.get().num_chunks() == 1:

~/git_repo/arrow/python/pyarrow/error.pxi in pyarrow.lib.check_status()
     82 
     83         if status.IsInvalid():
---> 84             raise ArrowInvalid(message)
     85         elif status.IsIOError():
     86             # Note: OSError constructor is
ArrowInvalid: Could not convert <__main__.MyBrokenInt object at 0x7fc331394290> with type MyBrokenInt: tried to convert to int

but this is the same message as what we get on master for

In [11]: class MyBrokenInt: 
    ...:     def __init__(self): 
    ...:         1/1 

so maybe it's ok?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that is fine, personally

data = [MyBrokenInt()]
with pytest.raises(ZeroDivisionError) as exc_info:
with pytest.raises(pa.ArrowInvalid):
pa.array(seq(data), type=pa.int64())
# Original traceback is kept
tb_lines = traceback.format_tb(exc_info.tb)
assert "# MARKER" in tb_lines[-1]


def test_numpy_scalars_mixed_type():
Expand Down Expand Up @@ -1643,7 +1639,7 @@ def test_map_from_dicts():

# Invalid dictionary types
for entry in [[{'key': '1', 'value': 5}], [{'key': {'value': 2}}]]:
with pytest.raises(TypeError, match="integer is required"):
with pytest.raises(pa.ArrowInvalid, match="tried to convert to int"):
pa.array([entry], type=pa.map_('i4', 'i4'))


Expand Down
34 changes: 18 additions & 16 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2447,19 +2447,22 @@ def test_category_zero_chunks(self):
expected = pd.DataFrame({'a': expected})
tm.assert_frame_equal(result, expected)

def test_mixed_types_fails(self):
data = pd.DataFrame({'a': ['a', 1, 2.0]})
with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)

data = pd.DataFrame({'a': [1, True]})
with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)

data = pd.DataFrame({'a': ['a', 1, 2.0]})
expected_msg = 'Conversion failed for column a'
with pytest.raises(pa.ArrowTypeError, match=expected_msg):
pa.Table.from_pandas(data)
@pytest.mark.parametrize(
"data,error_type",
[
({"a": ["a", 1, 2.0]}, pa.ArrowTypeError),
({"a": ["a", 1, 2.0]}, pa.ArrowTypeError),
({"a": [1, True]}, pa.ArrowTypeError),
({"a": [True, "a"]}, pa.ArrowInvalid),
({"a": [1, "a"]}, pa.ArrowInvalid),
({"a": [1.0, "a"]}, pa.ArrowInvalid),
],
)
def test_mixed_types_fails(self, data, error_type):
df = pd.DataFrame(data)
msg = "Conversion failed for column a with type object"
with pytest.raises(error_type, match=msg):
pa.Table.from_pandas(df)

def test_strided_data_import(self):
cases = []
Expand Down Expand Up @@ -3531,11 +3534,10 @@ def test_dictionary_from_pandas_specified_type():
assert result.type.equals(typ)
assert result.to_pylist() == ['a', 'b']

# mismatching values type -> raise error (for now a deprecation warning)
# mismatching values type -> raise error
typ = pa.dictionary(index_type=pa.int8(), value_type=pa.int64())
with pytest.warns(FutureWarning):
with pytest.raises(pa.ArrowInvalid):
result = pa.array(cat, type=typ)
assert result.to_pylist() == ['a', 'b']

# mismatching order -> raise error (for now a deprecation warning)
typ = pa.dictionary(
Expand Down