Property-Based Testing with Hypothesis

Test properties of code with generated inputs, not just examples.

Why Property Testing?

# Example-based: tests specific cases
def test_sort_examples():
    assert sort([3, 1, 2]) == [1, 2, 3]
    assert sort([]) == []
    assert sort([1]) == [1]

# Property-based: tests properties for ANY input
from hypothesis import given
from hypothesis import strategies as st

@given(st.lists(st.integers()))
def test_sort_properties(lst):
    result = sort(lst)
    # Property 1: Same length
    assert len(result) == len(lst)
    # Property 2: Sorted order
    assert all(result[i] <= result[i+1] for i in range(len(result)-1))
    # Property 3: Same elements
    assert sorted(lst) == result

Basic Hypothesis Usage

from hypothesis import given, settings, assume
from hypothesis import strategies as st

@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
    """Addition is commutative."""
    assert a + b == b + a

@given(st.text())
def test_reverse_twice(s):
    """Reversing twice returns original."""
    assert s[::-1][::-1] == s

@given(st.lists(st.integers(), min_size=1))
def test_max_in_list(lst):
    """Max is an element of the list."""
    assert max(lst) in lst

Common Strategies

from hypothesis import strategies as st

# Primitives
st.integers()                    # Any integer
st.integers(min_value=0)         # Non-negative
st.floats(allow_nan=False)       # Floats without NaN
st.text()                        # Unicode strings
st.text(alphabet="abc", max_size=10)
st.booleans()
st.none()
st.binary()                      # Bytes

# Collections
st.lists(st.integers())          # List of ints
st.lists(st.text(), min_size=1, max_size=10)
st.sets(st.integers())
st.frozensets(st.text())
st.dictionaries(st.text(), st.integers())

# Tuples
st.tuples(st.integers(), st.text())   # Fixed structure
st.tuples(st.integers(), st.integers(), st.integers())

# Optional / One of
st.one_of(st.integers(), st.text())   # Either type
st.none() | st.integers()              # Optional int
st.sampled_from(["red", "green", "blue"])  # Enum-like

Building Custom Strategies

from hypothesis import strategies as st
from dataclasses import dataclass

@dataclass
class User:
    name: str
    age: int
    email: str

# Strategy for User objects
user_strategy = st.builds(
    User,
    name=st.text(min_size=1, max_size=50),
    age=st.integers(min_value=0, max_value=150),
    email=st.emails()
)

@given(user_strategy)
def test_user_validation(user):
    assert validate_user(user)


# Composite strategies for complex logic
@st.composite
def sorted_lists(draw):
    """Generate pre-sorted lists."""
    lst = draw(st.lists(st.integers()))
    return sorted(lst)

@given(sorted_lists())
def test_binary_search(sorted_lst):
    if sorted_lst:
        target = sorted_lst[len(sorted_lst) // 2]
        assert binary_search(sorted_lst, target) != -1


# Dependent strategies
@st.composite
def list_and_index(draw):
    """Generate a list and valid index into it."""
    lst = draw(st.lists(st.integers(), min_size=1))
    index = draw(st.integers(min_value=0, max_value=len(lst)-1))
    return lst, index

@given(list_and_index())
def test_indexing(data):
    lst, index = data
    # This will never raise IndexError
    assert lst[index] is not None or lst[index] is None

Filtering and Assumptions

from hypothesis import given, assume
from hypothesis import strategies as st

# Filter strategy (preferred when possible)
@given(st.integers().filter(lambda x: x % 2 == 0))
def test_even_numbers(n):
    assert n % 2 == 0

# assume() for runtime filtering
@given(st.integers(), st.integers())
def test_division(a, b):
    assume(b != 0)  # Skip if b is 0
    assert (a // b) * b + (a % b) == a

# Combining filters
positive_even = st.integers(min_value=1).filter(lambda x: x % 2 == 0)

Settings and Configuration

from hypothesis import given, settings, Verbosity, Phase
from hypothesis import strategies as st

# Per-test settings
@settings(max_examples=500)  # More examples (default: 100)
@given(st.integers())
def test_thorough(n):
    pass

@settings(deadline=None)  # Disable timing check
@given(st.lists(st.integers()))
def test_slow_operation(lst):
    expensive_operation(lst)

@settings(
    max_examples=1000,
    verbosity=Verbosity.verbose,
    phases=[Phase.generate],  # Skip shrinking
)
@given(st.text())
def test_verbose(s):
    pass


# Profile for CI (in conftest.py)
from hypothesis import settings, Verbosity

settings.register_profile("ci", max_examples=1000)
settings.register_profile("dev", max_examples=10)
settings.register_profile("debug", max_examples=10, verbosity=Verbosity.verbose)

# Use: pytest --hypothesis-profile=ci

Stateful Testing

from hypothesis.stateful import RuleBasedStateMachine, rule, invariant
from hypothesis import strategies as st

class DatabaseMachine(RuleBasedStateMachine):
    """Test database operations maintain invariants."""

    def __init__(self):
        super().__init__()
        self.db = {}  # Model
        self.real_db = RealDatabase()  # System under test

    @rule(key=st.text(), value=st.integers())
    def set_value(self, key, value):
        """Set a value in both model and real DB."""
        self.db[key] = value
        self.real_db.set(key, value)

    @rule(key=st.text())
    def get_value(self, key):
        """Get value should match model."""
        expected = self.db.get(key)
        actual = self.real_db.get(key)
        assert expected == actual

    @rule(key=st.text())
    def delete_value(self, key):
        """Delete from both."""
        self.db.pop(key, None)
        self.real_db.delete(key)

    @invariant()
    def keys_match(self):
        """Keys should always match."""
        assert set(self.db.keys()) == set(self.real_db.keys())


# Run stateful tests
TestDatabase = DatabaseMachine.TestCase

pytest Integration

# conftest.py
from hypothesis import settings, Verbosity, Phase

# Default profile for all tests
settings.register_profile("default", max_examples=100)

# CI profile - more examples, deterministic
settings.register_profile(
    "ci",
    max_examples=500,
    derandomize=True,  # Deterministic for CI
)

# Load profile from env or default
import os
settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))


# pytest.ini
# [pytest]
# addopts = --hypothesis-profile=default

Shrinking Examples

from hypothesis import given, settings
from hypothesis import strategies as st

@given(st.lists(st.integers()))
def test_shrinking_demo(lst):
    """Hypothesis shrinks failing inputs to minimal examples."""
    # This will fail, but Hypothesis finds minimal case
    assert sum(lst) < 100

# Hypothesis will shrink to something like:
# Falsifying example: test_shrinking_demo(lst=[100])
# Not: test_shrinking_demo(lst=[3847, -293, 10293, ...])

Common Patterns

# Roundtrip / Encode-Decode
@given(st.binary())
def test_compression_roundtrip(data):
    assert decompress(compress(data)) == data

@given(st.dictionaries(st.text(), st.integers()))
def test_json_roundtrip(d):
    assert json.loads(json.dumps(d)) == d


# Oracle testing (compare implementations)
@given(st.lists(st.integers()))
def test_sort_vs_stdlib(lst):
    assert my_sort(lst) == sorted(lst)


# Metamorphic relations
@given(st.lists(st.integers()))
def test_sort_idempotent(lst):
    """Sorting twice equals sorting once."""
    assert sort(sort(lst)) == sort(lst)

@given(st.lists(st.integers()), st.integers())
def test_sort_append(lst, x):
    """Appending and sorting vs inserting sorted."""
    assert sort(lst + [x]) == sort(sorted(lst) + [x])

Quick Reference

Strategy	Description
`st.integers()`	Any integer
`st.floats()`	Floats (configure nan, inf)
`st.text()`	Unicode strings
`st.binary()`	Byte strings
`st.lists(st.X())`	Lists of X
`st.dictionaries(k, v)`	Dict with key/value strategies
`st.builds(Class, ...)`	Build objects
`st.one_of(a, b)`	Either a or b
`st.sampled_from([...])`	Pick from list
`@st.composite`	Custom strategy

Setting	Purpose
`max_examples=N`	Number of test cases
`deadline=None`	Disable timing
`derandomize=True`	Reproducible runs
`verbosity=Verbosity.verbose`	Debug output

property-testing.md 8.3 KB History Raw

Property-Based Testing with Hypothesis

Why Property Testing?

Basic Hypothesis Usage

Common Strategies

Building Custom Strategies

Filtering and Assumptions

Settings and Configuration

Stateful Testing

pytest Integration

Shrinking Examples

Common Patterns

Quick Reference

property-testing.md 8.3 KB

History Raw