Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
77fc61e
- Accept dtype.kind = 'O' in `from_dtype`
Jun 20, 2025
895e360
ruff: yes we really want .iat
Jun 20, 2025
8fe26b8
linting
Jun 20, 2025
d2bf820
add test for failing coverage and dtypes
Jun 20, 2025
918a9f0
linter
Jun 20, 2025
b514789
rst mistakes and linting
Jun 20, 2025
07f8ea0
comparable datatypes only
Jun 20, 2025
aa0ab3f
still keep the else line to catch unknown dtypes but remove from cove…
Jul 2, 2025
b9380b7
make test agree with the from_dtype strategy
Jul 2, 2025
e0c2909
formatting
Jul 2, 2025
6aea8bc
addressed comments :)
Jul 3, 2025
b65e335
formatting
Jul 3, 2025
212a628
formatting
Jul 3, 2025
088d272
Got rst sytnax wrong again...
Jul 3, 2025
c52bb66
Merge branch 'master' into allow_objects_in_numpy_arrays_and_pandas_s…
Liam-DeVoe Jul 11, 2025
f8b6ad6
clean up some things
Liam-DeVoe Jul 11, 2025
56ce942
restrict objects allowed in arrays, better tests
Aug 7, 2025
d9d5c2e
linting
Aug 7, 2025
ee7ac5d
linting
Aug 7, 2025
1975676
formatting
Aug 7, 2025
d0d57f4
Revert "formatting"
Aug 8, 2025
f471a18
formatting
Aug 8, 2025
1d6b5aa
use proper linting tooling
Aug 8, 2025
7556a9a
Merge branch 'master' into allow_objects_in_numpy_arrays_and_pandas_s…
Liam-DeVoe Aug 17, 2025
74520e3
cleaned up and simplified the implementation a lot (particularly in p…
Aug 27, 2025
f87f00d
formatting/linting
Aug 27, 2025
a32eb9c
removed assert_safe_equals; using list equality now
Aug 27, 2025
877212e
Merge branch 'master' into allow_objects_in_numpy_arrays_and_pandas_s…
philastrophist Aug 27, 2025
6226f0e
not sure how that happened
Aug 27, 2025
704d80e
Merge remote-tracking branch 'upstream/master' into allow_objects_in_…
Aug 27, 2025
808c01c
format
Aug 27, 2025
77fac3b
Merge branch 'master' into allow_objects_in_numpy_arrays_and_pandas_s…
Liam-DeVoe Sep 17, 2025
c1d6fc4
Merge branch 'allow_objects_in_numpy_arrays_and_pandas_series' of git…
Liam-DeVoe Sep 17, 2025
631ff0f
refactor tests
Liam-DeVoe Sep 17, 2025
1f70970
simplify numpy code
Liam-DeVoe Sep 17, 2025
5982df7
format
Liam-DeVoe Sep 17, 2025
b26eaac
bring back array equality check
Liam-DeVoe Sep 17, 2025
d3e5f3b
comment, weaker series dtype test
Liam-DeVoe Sep 17, 2025
adab86e
simplify pandas code
Liam-DeVoe Sep 17, 2025
a2b28b1
Merge branch 'master' into allow_objects_in_numpy_arrays_and_pandas_s…
Liam-DeVoe Oct 16, 2025
1ec77f5
refactor
Liam-DeVoe Oct 18, 2025
6e39885
xfail pandas object test
Liam-DeVoe Oct 19, 2025
003b239
nonstrict
Liam-DeVoe Oct 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ their individual contributions.
* `Saul Shanabrook <https://www.github.com/saulshanabrook>`_ ([email protected])
* `Sebastiaan Zeeff <https://github.com/SebastiaanZ>`_ ([email protected])
* `Sharyar Memon <https://github.com/sharyar>`_ ([email protected])
* `Shaun Read <https://github.com/philastrophist>`_
* `Shlok Gandhi <https://github.com/shlok57>`_ ([email protected])
* `Sogata Ray <https://github.com/rayardinanda>`_ ([email protected])
* `Stuart Cook <https://www.github.com/Zalathar>`_
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
RELEASE_TYPE: minor

The extras for |hypothesis-numpy| and |hypothesis-pandas| now support automatically inferring a strategy for ``dtype="O"``. Previously, Hypothesis required an explicit elements strategy to be passed, for example ``nps.arrays("O", shape=(1,), elements=st.just(object()))``. Now, Hypothesis automatically infers ``elements=st.from_type(object)``.

Thanks to Shaun Read for identifying and fixing this!
4 changes: 4 additions & 0 deletions hypothesis-python/docs/prolog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@
.. |is_hypothesis_test| replace:: :func:`~hypothesis.is_hypothesis_test`
.. |currently_in_test_context| replace:: :func:`~hypothesis.currently_in_test_context`

.. |hypothesis-numpy| replace:: :ref:`NumPy <hypothesis-numpy>`
.. |hypothesis-pandas| replace:: :ref:`pandas <hypothesis-pandas>`
.. |hypothesis-django| replace:: :ref:`Django <hypothesis-django>`

.. |str| replace:: :obj:`python:str`
.. |int| replace:: :obj:`python:int`
.. |bool| replace:: :obj:`python:bool`
Expand Down
17 changes: 14 additions & 3 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ def compat_kw(*args, **kw):
else: # NEP-7 defines the NaT value as integer -(2**63)
elems = st.integers(-(2**63) + 1, 2**63 - 1)
result = st.builds(dtype.type, elems, res)
elif dtype.kind == "O":
return st.from_type(object)
else:
Comment on lines +216 to 218
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not actually clear to me whether we want st.from_type(object) or from_type(type).flatmap(st.from_type) here. Should we make the former simply register to the latter?

raise InvalidArgument(f"No strategy inference for {dtype}")
return result.map(dtype.type)
Expand All @@ -236,22 +238,31 @@ def __repr__(self):
)

def set_element(self, val, result, idx, *, fill=False):
# `val` is either an arbitrary object (for dtype="O"), or otherwise an
# instance of a numpy dtype. This means we can *usually* expect e.g.
# val.dtype to be present, but can only guarantee it if
# `self.dtype != "O"`.

try:
result[idx] = val
except TypeError as err:
raise InvalidArgument(
f"Could not add element={val!r} of {val.dtype!r} to array of "
f"Could not add element={val!r} of "
f"{getattr(val, 'dtype', type(val))} to array of "
f"{result.dtype!r} - possible mismatch of time units in dtypes?"
) from err

try:
elem_changed = self._check_elements and val != result[idx] and val == val
except Exception as err: # pragma: no cover
# This branch only exists to help debug weird behaviour in Numpy,
# such as the string problems we had a while back.
raise HypothesisException(
f"Internal error when checking element={val!r} of {val.dtype!r} "
f"to array of {result.dtype!r}"
f"Internal error when checking element={val!r} of "
f"{getattr(val, 'dtype', type(val))!r} to array of "
f"{result.dtype!r}"
) from err

if elem_changed:
strategy = self.fill if fill else self.element_strategy
if self.dtype.kind == "f": # pragma: no cover
Expand Down
18 changes: 11 additions & 7 deletions hypothesis-python/src/hypothesis/extra/pandas/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,16 @@ def elements_and_dtype(elements, dtype, source=None):
def convert_element(value):
if is_na_dtype and value is None:
return None
name = f"draw({prefix}elements)"

if dtype.kind == "O":
# as an optimization, pass objects straight through, because
# we know numpy won't convert them.
return value

try:
return np.array([value], dtype=dtype)[0]
except (TypeError, ValueError, OverflowError):
name = f"draw({prefix}elements)"
raise InvalidArgument(
f"Cannot convert {name}={value!r} of type "
f"{type(value).__name__} to dtype {dtype.str}"
Expand Down Expand Up @@ -583,7 +589,6 @@ def row():
raise InvalidArgument(f"duplicate definition of column name {c.name!r}")

column_names.add(c.name)

c.elements, _ = elements_and_dtype(c.elements, c.dtype, label)

if c.dtype is None and rows is not None:
Expand All @@ -594,7 +599,6 @@ def row():
c.fill = npst.fill_for(
fill=c.fill, elements=c.elements, unique=c.unique, name=label
)

rewritten_columns.append(c)

if rows is None:
Expand All @@ -614,13 +618,12 @@ def just_draw_columns(draw):

# For columns with no filling the problem is harder, and drawing
# them like that would result in rows being very far apart from
# each other in the underlying data stream, which gets in the way
# each other in the choice sequence, which gets in the way
# of shrinking. So what we do is reorder and draw those columns
# row wise, so that the values of each row are next to each other.
# This makes life easier for the shrinker when deleting blocks of
# data.
columns_without_fill = [c for c in rewritten_columns if c.fill.is_empty]
# This makes life easier for the shrinker when deleting choices.

columns_without_fill = [c for c in rewritten_columns if c.fill.is_empty]
if columns_without_fill:
for c in columns_without_fill:
data[c.name] = pandas.Series(
Expand All @@ -642,6 +645,7 @@ def just_draw_columns(draw):
reject()
else:
value = draw(c.elements)

try:
data[c.name].iloc[i] = value
except ValueError as err: # pragma: no cover
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/numpy/test_argument_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def e(a, **kwargs):
e(nps.array_shapes, min_dims=33),
e(nps.array_shapes, max_dims=33),
e(nps.arrays, dtype=float, shape=(0.5,)),
e(nps.arrays, dtype=object, shape=1),
e(nps.arrays, dtype=numpy.void, shape=1),
e(nps.arrays, dtype=float, shape=1, fill=3),
e(nps.arrays, dtype="U", shape=1, elements=st.just("abc\0\0")),
e(nps.arrays, dtype=int, shape=1, elements="not a strategy"),
Expand Down
71 changes: 70 additions & 1 deletion hypothesis-python/tests/numpy/test_gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
from hypothesis.extra import numpy as nps
from hypothesis.strategies._internal.lazy import unwrap_strategies

from tests.common.debug import check_can_generate_examples, find_any, minimal
from tests.common.debug import (
assert_all_examples,
check_can_generate_examples,
find_any,
minimal,
)
from tests.common.utils import fails_with, flaky

ANY_SHAPE = nps.array_shapes(min_dims=0, max_dims=32, min_side=0, max_side=32)
Expand Down Expand Up @@ -1273,3 +1278,67 @@ def test_infers_elements_and_fill():
assert not elems.has_reusable_values
s = unwrap_strategies(nps.arrays(dtype=np.uint32, shape=1, elements=elems))
assert s.fill.is_empty


@given(nps.arrays(np.dtype("O"), shape=nps.array_shapes()))
def test_object_arrays_are_of_type_object(obj_array):
assert obj_array.dtype == np.dtype("O")


def test_class_instances_not_allowed_in_scalar_array():
class A:
pass

s = nps.arrays(
nps.scalar_dtypes(),
shape=nps.array_shapes(),
elements=st.just(A()),
)

# can raise ValueError during generation. For example if scalar_dtype is
# corresponds to a datetime, numpy will raise "cannot convert A to a datetime".
with pytest.raises((InvalidArgument, ValueError)):
check_can_generate_examples(s)


def test_object_arrays_with_mixed_elements_has_object_dtype():
class A:
pass

s = nps.arrays(
np.dtype("O"),
shape=nps.array_shapes(),
elements=st.just(A()) | st.integers(),
)

assert_all_examples(s, lambda arr: arr.dtype == np.dtype("O"))
find_any(s, lambda arr: len({type(x) for x in arr.ravel()}) > 1)


@given(st.data())
def test_object_array_can_hold_arbitrary_class_instances(data):
instance = data.draw(st.from_type(type).flatmap(st.from_type))
s = nps.arrays(np.dtype("O"), nps.array_shapes(), elements=st.just(instance))
arr = data.draw(s)

assert all(v is instance for v in arr.ravel())


def test_object_array_can_hold_incomparable_elements():
class Incomparable:
def __eq__(self, other):
raise TypeError

check_can_generate_examples(
nps.arrays(
np.dtype("O"),
nps.array_shapes(),
elements=st.just(Incomparable()),
)
)


def test_can_generate_nested_object_arrays():
int_arrays = nps.arrays(np.dtype("int"), nps.array_shapes())
s = nps.arrays(np.dtype("O"), nps.array_shapes(), elements=int_arrays)
check_can_generate_examples(s)
58 changes: 57 additions & 1 deletion hypothesis-python/tests/pandas/test_data_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@
import pytest

from hypothesis import HealthCheck, given, reject, settings, strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.extra import numpy as npst, pandas as pdst
from hypothesis.extra.pandas.impl import IntegerDtype

from tests.common.debug import find_any
from tests.common.debug import (
assert_all_examples,
check_can_generate_examples,
find_any,
)
from tests.pandas.helpers import supported_by_pandas


Expand Down Expand Up @@ -279,3 +284,54 @@ def test_pandas_nullable_types():
df = find_any(st, lambda s: s.isna().any().any())
for s in df.columns:
assert type(df[s].dtype) == pd.core.arrays.integer.Int8Dtype


@given(pdst.data_frames(columns=[pdst.column("col", dtype=object)]))
def test_object_columns_are_of_type_object(df):
assert df["col"].dtype == np.dtype("O")


def test_class_instances_not_allowed_in_scalar_columns():
class A:
pass

s = pdst.data_frames(
columns=[
pdst.column(
"col",
elements=st.just(A()),
dtype=npst.scalar_dtypes(),
)
]
)

with pytest.raises(InvalidArgument):
check_can_generate_examples(s)


def test_can_generate_object_arrays_with_mixed_dtype_elements():
class A:
pass

s = pdst.data_frames(
columns=[pdst.column("col", st.just(A()) | st.integers(), dtype=object)],
index=pdst.range_indexes(1),
)
assert_all_examples(s, lambda df: df["col"].dtype == np.dtype("O"))
find_any(s, lambda df: len({type(x) for x in df["col"].values}) > 1)


@given(st.data())
@pytest.mark.xfail(
strict=False,
reason="not actually true due to pandas conversion. see "
"https://github.com/HypothesisWorks/hypothesis/pull/4444#issuecomment-3413951478",
)
def test_object_dataframe_can_hold_arbitrary_class_instances(data):
instance = data.draw(st.from_type(type).flatmap(st.from_type))
s = pdst.data_frames(
columns=[pdst.column("col", elements=st.just(instance), dtype=object)]
)
df = data.draw(s)

assert all(v is instance for v in df["col"].values)
70 changes: 64 additions & 6 deletions hypothesis-python/tests/pandas/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,80 @@
import pandas as pd
import pytest

from hypothesis import assume, given, strategies as st
from hypothesis.extra import numpy as npst, pandas as pdst
from hypothesis import given, settings, strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.extra import numpy as nps, pandas as pdst
from hypothesis.extra.pandas.impl import IntegerDtype

from tests.common.debug import assert_all_examples, assert_no_examples, find_any
from tests.common.debug import (
assert_all_examples,
assert_no_examples,
check_can_generate_examples,
find_any,
)
from tests.pandas.helpers import supported_by_pandas


@given(st.data())
def test_can_create_a_series_of_any_dtype(data):
dtype = np.dtype(data.draw(npst.scalar_dtypes()))
assume(supported_by_pandas(dtype))
dtype = data.draw(nps.scalar_dtypes().filter(supported_by_pandas))
# Use raw data to work around pandas bug in repr. See
# https://github.com/pandas-dev/pandas/issues/27484
series = data.conjecture_data.draw(pdst.series(dtype=dtype))
assert series.dtype == pd.Series([], dtype=dtype).dtype
assert series.dtype == dtype


@given(pdst.series(dtype=object))
def test_can_create_a_series_of_object_python_type(series):
assert series.dtype == np.dtype("O")


@given(
pdst.series(
elements=nps.arrays(
nps.array_dtypes() | nps.scalar_dtypes(),
nps.array_shapes(),
),
dtype=object,
)
)
@settings(max_examples=5)
def test_object_series_are_of_type_object(series):
assert series.dtype == np.dtype("O")


def test_class_instances_not_allowed_in_scalar_series():
class A:
pass

with pytest.raises(InvalidArgument):
check_can_generate_examples(
pdst.series(elements=st.just(A()), dtype=np.dtype("int"))
)


def test_object_series_with_mixed_elements_still_has_object_dtype():
class A:
pass

s = nps.arrays(
np.dtype("O"),
shape=nps.array_shapes(),
elements=st.just(A()) | st.integers(),
)

assert_all_examples(s, lambda arr: arr.dtype == np.dtype("O"))
find_any(s, lambda arr: len({type(x) for x in arr.ravel()}) > 1)


@given(st.data())
@settings(max_examples=10)
def test_series_can_hold_arbitrary_class_instances(data):
instance = data.draw(st.from_type(type).flatmap(st.from_type))
s = pdst.series(elements=st.just(instance), dtype=object)
series = data.draw(s)

assert all(v is instance for v in series.values)


@given(pdst.series(dtype=float, index=pdst.range_indexes(min_size=2, max_size=5)))
Expand Down