HypothesisWorks · philastrophist · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025 · Jun 20, 2025
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -175,6 +175,7 @@ their individual contributions.
 * `Saul Shanabrook <https://www.github.com/saulshanabrook>`_ ([email protected])
 * `Sebastiaan Zeeff <https://github.com/SebastiaanZ>`_ ([email protected])
 * `Sharyar Memon <https://github.com/sharyar>`_ ([email protected])
+* `Shaun Read <https://github.com/philastrophist>`_
 * `Shlok Gandhi <https://github.com/shlok57>`_ ([email protected])
 * `Sogata Ray <https://github.com/rayardinanda>`_ ([email protected])
 * `Stuart Cook <https://www.github.com/Zalathar>`_

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: minor
+
+The extras for |hypothesis-numpy| and |hypothesis-pandas| now support automatically inferring a strategy for ``dtype="O"``. Previously, Hypothesis required an explicit elements strategy to be passed, for example ``nps.arrays("O", shape=(1,), elements=st.just(object()))``. Now, Hypothesis automatically infers ``elements=st.from_type(object)``.
+
+Thanks to Shaun Read for identifying and fixing this!
diff --git a/hypothesis-python/docs/prolog.rst b/hypothesis-python/docs/prolog.rst
@@ -185,6 +185,10 @@
 .. |is_hypothesis_test| replace:: :func:`~hypothesis.is_hypothesis_test`
 .. |currently_in_test_context| replace:: :func:`~hypothesis.currently_in_test_context`
 
+.. |hypothesis-numpy| replace:: :ref:`NumPy <hypothesis-numpy>`
+.. |hypothesis-pandas| replace:: :ref:`pandas <hypothesis-pandas>`
+.. |hypothesis-django| replace:: :ref:`Django <hypothesis-django>`
+
 .. |str| replace:: :obj:`python:str`
 .. |int| replace:: :obj:`python:int`
 .. |bool| replace:: :obj:`python:bool`

diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py
@@ -213,6 +213,8 @@ def compat_kw(*args, **kw):
         else:  # NEP-7 defines the NaT value as integer -(2**63)
             elems = st.integers(-(2**63) + 1, 2**63 - 1)
         result = st.builds(dtype.type, elems, res)
+    elif dtype.kind == "O":
+        return st.from_type(object)
     else:
         raise InvalidArgument(f"No strategy inference for {dtype}")
     return result.map(dtype.type)
@@ -236,22 +238,31 @@ def __repr__(self):
         )
 
     def set_element(self, val, result, idx, *, fill=False):
+        # `val` is either an arbitrary object (for dtype="O"), or otherwise an
+        # instance of a numpy dtype. This means we can *usually* expect e.g.
+        # val.dtype to be present, but can only guarantee it if
+        # `self.dtype != "O"`.
+
         try:
             result[idx] = val
         except TypeError as err:
             raise InvalidArgument(
-                f"Could not add element={val!r} of {val.dtype!r} to array of "
+                f"Could not add element={val!r} of "
+                f"{getattr(val, 'dtype', type(val))} to array of "
                 f"{result.dtype!r} - possible mismatch of time units in dtypes?"
             ) from err
+
         try:
             elem_changed = self._check_elements and val != result[idx] and val == val
         except Exception as err:  # pragma: no cover
             # This branch only exists to help debug weird behaviour in Numpy,
             # such as the string problems we had a while back.
             raise HypothesisException(
-                f"Internal error when checking element={val!r} of {val.dtype!r} "
-                f"to array of {result.dtype!r}"
+                f"Internal error when checking element={val!r} of "
+                f"{getattr(val, 'dtype', type(val))!r} to array of "
+                f"{result.dtype!r}"
             ) from err
+
         if elem_changed:
             strategy = self.fill if fill else self.element_strategy
             if self.dtype.kind == "f":  # pragma: no cover

diff --git a/hypothesis-python/src/hypothesis/extra/pandas/impl.py b/hypothesis-python/src/hypothesis/extra/pandas/impl.py
@@ -125,10 +125,16 @@ def elements_and_dtype(elements, dtype, source=None):
         def convert_element(value):
             if is_na_dtype and value is None:
                 return None
-            name = f"draw({prefix}elements)"
+
+            if dtype.kind == "O":
+                # as an optimization, pass objects straight through, because
+                # we know numpy won't convert them.
+                return value
+
             try:
                 return np.array([value], dtype=dtype)[0]
             except (TypeError, ValueError, OverflowError):
+                name = f"draw({prefix}elements)"
                 raise InvalidArgument(
                     f"Cannot convert {name}={value!r} of type "
                     f"{type(value).__name__} to dtype {dtype.str}"
@@ -583,7 +589,6 @@ def row():
             raise InvalidArgument(f"duplicate definition of column name {c.name!r}")
 
         column_names.add(c.name)
-
         c.elements, _ = elements_and_dtype(c.elements, c.dtype, label)
 
         if c.dtype is None and rows is not None:
@@ -594,7 +599,6 @@ def row():
         c.fill = npst.fill_for(
             fill=c.fill, elements=c.elements, unique=c.unique, name=label
         )
-
         rewritten_columns.append(c)
 
     if rows is None:
@@ -614,13 +618,12 @@ def just_draw_columns(draw):
 
             # For columns with no filling the problem is harder, and drawing
             # them like that would result in rows being very far apart from
-            # each other in the underlying data stream, which gets in the way
+            # each other in the choice sequence, which gets in the way
             # of shrinking. So what we do is reorder and draw those columns
             # row wise, so that the values of each row are next to each other.
-            # This makes life easier for the shrinker when deleting blocks of
-            # data.
-            columns_without_fill = [c for c in rewritten_columns if c.fill.is_empty]
+            # This makes life easier for the shrinker when deleting choices.
 
+            columns_without_fill = [c for c in rewritten_columns if c.fill.is_empty]
             if columns_without_fill:
                 for c in columns_without_fill:
                     data[c.name] = pandas.Series(
@@ -642,6 +645,7 @@ def just_draw_columns(draw):
                                 reject()
                         else:
                             value = draw(c.elements)
+
                         try:
                             data[c.name].iloc[i] = value
                         except ValueError as err:  # pragma: no cover

diff --git a/hypothesis-python/tests/numpy/test_argument_validation.py b/hypothesis-python/tests/numpy/test_argument_validation.py
@@ -40,7 +40,7 @@ def e(a, **kwargs):
         e(nps.array_shapes, min_dims=33),
         e(nps.array_shapes, max_dims=33),
         e(nps.arrays, dtype=float, shape=(0.5,)),
-        e(nps.arrays, dtype=object, shape=1),
+        e(nps.arrays, dtype=numpy.void, shape=1),
         e(nps.arrays, dtype=float, shape=1, fill=3),
         e(nps.arrays, dtype="U", shape=1, elements=st.just("abc\0\0")),
         e(nps.arrays, dtype=int, shape=1, elements="not a strategy"),

diff --git a/hypothesis-python/tests/numpy/test_gen_data.py b/hypothesis-python/tests/numpy/test_gen_data.py
@@ -29,7 +29,12 @@
 from hypothesis.extra import numpy as nps
 from hypothesis.strategies._internal.lazy import unwrap_strategies
 
-from tests.common.debug import check_can_generate_examples, find_any, minimal
+from tests.common.debug import (
+    assert_all_examples,
+    check_can_generate_examples,
+    find_any,
+    minimal,
+)
 from tests.common.utils import fails_with, flaky
 
 ANY_SHAPE = nps.array_shapes(min_dims=0, max_dims=32, min_side=0, max_side=32)
@@ -1273,3 +1278,67 @@ def test_infers_elements_and_fill():
     assert not elems.has_reusable_values
     s = unwrap_strategies(nps.arrays(dtype=np.uint32, shape=1, elements=elems))
     assert s.fill.is_empty
+
+
+@given(nps.arrays(np.dtype("O"), shape=nps.array_shapes()))
+def test_object_arrays_are_of_type_object(obj_array):
+    assert obj_array.dtype == np.dtype("O")
+
+
+def test_class_instances_not_allowed_in_scalar_array():
+    class A:
+        pass
+
+    s = nps.arrays(
+        nps.scalar_dtypes(),
+        shape=nps.array_shapes(),
+        elements=st.just(A()),
+    )
+
+    # can raise ValueError during generation. For example if scalar_dtype is
+    # corresponds to a datetime, numpy will raise "cannot convert A to a datetime".
+    with pytest.raises((InvalidArgument, ValueError)):
+        check_can_generate_examples(s)
+
+
+def test_object_arrays_with_mixed_elements_has_object_dtype():
+    class A:
+        pass
+
+    s = nps.arrays(
+        np.dtype("O"),
+        shape=nps.array_shapes(),
+        elements=st.just(A()) | st.integers(),
+    )
+
+    assert_all_examples(s, lambda arr: arr.dtype == np.dtype("O"))
+    find_any(s, lambda arr: len({type(x) for x in arr.ravel()}) > 1)
+
+
+@given(st.data())
+def test_object_array_can_hold_arbitrary_class_instances(data):
+    instance = data.draw(st.from_type(type).flatmap(st.from_type))
+    s = nps.arrays(np.dtype("O"), nps.array_shapes(), elements=st.just(instance))
+    arr = data.draw(s)
+
+    assert all(v is instance for v in arr.ravel())
+
+
+def test_object_array_can_hold_incomparable_elements():
+    class Incomparable:
+        def __eq__(self, other):
+            raise TypeError
+
+    check_can_generate_examples(
+        nps.arrays(
+            np.dtype("O"),
+            nps.array_shapes(),
+            elements=st.just(Incomparable()),
+        )
+    )
+
+
+def test_can_generate_nested_object_arrays():
+    int_arrays = nps.arrays(np.dtype("int"), nps.array_shapes())
+    s = nps.arrays(np.dtype("O"), nps.array_shapes(), elements=int_arrays)
+    check_can_generate_examples(s)
diff --git a/hypothesis-python/tests/pandas/test_data_frame.py b/hypothesis-python/tests/pandas/test_data_frame.py
@@ -13,10 +13,15 @@
 import pytest
 
 from hypothesis import HealthCheck, given, reject, settings, strategies as st
+from hypothesis.errors import InvalidArgument
 from hypothesis.extra import numpy as npst, pandas as pdst
 from hypothesis.extra.pandas.impl import IntegerDtype
 
-from tests.common.debug import find_any
+from tests.common.debug import (
+    assert_all_examples,
+    check_can_generate_examples,
+    find_any,
+)
 from tests.pandas.helpers import supported_by_pandas
 
 
@@ -279,3 +284,54 @@ def test_pandas_nullable_types():
     df = find_any(st, lambda s: s.isna().any().any())
     for s in df.columns:
         assert type(df[s].dtype) == pd.core.arrays.integer.Int8Dtype
+
+
+@given(pdst.data_frames(columns=[pdst.column("col", dtype=object)]))
+def test_object_columns_are_of_type_object(df):
+    assert df["col"].dtype == np.dtype("O")
+
+
+def test_class_instances_not_allowed_in_scalar_columns():
+    class A:
+        pass
+
+    s = pdst.data_frames(
+        columns=[
+            pdst.column(
+                "col",
+                elements=st.just(A()),
+                dtype=npst.scalar_dtypes(),
+            )
+        ]
+    )
+
+    with pytest.raises(InvalidArgument):
+        check_can_generate_examples(s)
+
+
+def test_can_generate_object_arrays_with_mixed_dtype_elements():
+    class A:
+        pass
+
+    s = pdst.data_frames(
+        columns=[pdst.column("col", st.just(A()) | st.integers(), dtype=object)],
+        index=pdst.range_indexes(1),
+    )
+    assert_all_examples(s, lambda df: df["col"].dtype == np.dtype("O"))
+    find_any(s, lambda df: len({type(x) for x in df["col"].values}) > 1)
+
+
+@given(st.data())
+@pytest.mark.xfail(
+    strict=False,
+    reason="not actually true due to pandas conversion. see "
+    "https://github.com/HypothesisWorks/hypothesis/pull/4444#issuecomment-3413951478",
+)
+def test_object_dataframe_can_hold_arbitrary_class_instances(data):
+    instance = data.draw(st.from_type(type).flatmap(st.from_type))
+    s = pdst.data_frames(
+        columns=[pdst.column("col", elements=st.just(instance), dtype=object)]
+    )
+    df = data.draw(s)
+
+    assert all(v is instance for v in df["col"].values)
diff --git a/hypothesis-python/tests/pandas/test_series.py b/hypothesis-python/tests/pandas/test_series.py
@@ -12,22 +12,80 @@
 import pandas as pd
 import pytest
 
-from hypothesis import assume, given, strategies as st
-from hypothesis.extra import numpy as npst, pandas as pdst
+from hypothesis import given, settings, strategies as st
+from hypothesis.errors import InvalidArgument
+from hypothesis.extra import numpy as nps, pandas as pdst
 from hypothesis.extra.pandas.impl import IntegerDtype
 
-from tests.common.debug import assert_all_examples, assert_no_examples, find_any
+from tests.common.debug import (
+    assert_all_examples,
+    assert_no_examples,
+    check_can_generate_examples,
+    find_any,
+)
 from tests.pandas.helpers import supported_by_pandas
 
 
 @given(st.data())
 def test_can_create_a_series_of_any_dtype(data):
-    dtype = np.dtype(data.draw(npst.scalar_dtypes()))
-    assume(supported_by_pandas(dtype))
+    dtype = data.draw(nps.scalar_dtypes().filter(supported_by_pandas))
     # Use raw data to work around pandas bug in repr. See
     # https://github.com/pandas-dev/pandas/issues/27484
     series = data.conjecture_data.draw(pdst.series(dtype=dtype))
-    assert series.dtype == pd.Series([], dtype=dtype).dtype
+    assert series.dtype == dtype
+
+
+@given(pdst.series(dtype=object))
+def test_can_create_a_series_of_object_python_type(series):
+    assert series.dtype == np.dtype("O")
+
+
+@given(
+    pdst.series(
+        elements=nps.arrays(
+            nps.array_dtypes() | nps.scalar_dtypes(),
+            nps.array_shapes(),
+        ),
+        dtype=object,
+    )
+)
+@settings(max_examples=5)
+def test_object_series_are_of_type_object(series):
+    assert series.dtype == np.dtype("O")
+
+
+def test_class_instances_not_allowed_in_scalar_series():
+    class A:
+        pass
+
+    with pytest.raises(InvalidArgument):
+        check_can_generate_examples(
+            pdst.series(elements=st.just(A()), dtype=np.dtype("int"))
+        )
+
+
+def test_object_series_with_mixed_elements_still_has_object_dtype():
+    class A:
+        pass
+
+    s = nps.arrays(
+        np.dtype("O"),
+        shape=nps.array_shapes(),
+        elements=st.just(A()) | st.integers(),
+    )
+
+    assert_all_examples(s, lambda arr: arr.dtype == np.dtype("O"))
+    find_any(s, lambda arr: len({type(x) for x in arr.ravel()}) > 1)
+
+
+@given(st.data())
+@settings(max_examples=10)
+def test_series_can_hold_arbitrary_class_instances(data):
+    instance = data.draw(st.from_type(type).flatmap(st.from_type))
+    s = pdst.series(elements=st.just(instance), dtype=object)
+    series = data.draw(s)
+
+    assert all(v is instance for v in series.values)
 
 
 @given(pdst.series(dtype=float, index=pdst.range_indexes(min_size=2, max_size=5)))